intel/brw: Allow using performance analysis pass pre-register allocation.
Mainly this involves changing 'struct state' so that the dep_ready array is allocated with a dynamic size based on the number of VGRFs of the program instead of assuming a fixed XE3_MAX_GRF count of GRF dependencies. VGRF register dependencies are then handled by using one dep_ready entry per VGRF allocation instead of one per hardware register. The ability to use the performance analysis pass pre-regalloc will mostly be useful on xe3+, but this also has the side effect of saving some memory on xe2 and earlier platforms since we no longer need to allocate XE3_MAX_GRF dep_ready entries for them. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36618>
This commit is contained in:
committed by
Marge Bot
parent
3936a43496
commit
dfc2a89d96
@@ -65,10 +65,8 @@ namespace {
|
||||
* potentially depend on.
|
||||
*/
|
||||
enum intel_eu_dependency_id {
|
||||
/* Register part of the GRF. */
|
||||
EU_DEPENDENCY_ID_GRF0 = 0,
|
||||
/* Address register part of the ARF. */
|
||||
EU_DEPENDENCY_ID_ADDR0 = EU_DEPENDENCY_ID_GRF0 + XE3_MAX_GRF,
|
||||
EU_DEPENDENCY_ID_ADDR0 = 0,
|
||||
/* Accumulator register part of the ARF. */
|
||||
EU_DEPENDENCY_ID_ACCUM0 = EU_DEPENDENCY_ID_ADDR0 + 1,
|
||||
/* Flag register part of the ARF. */
|
||||
@@ -77,15 +75,33 @@ namespace {
|
||||
EU_DEPENDENCY_ID_SBID_WR0 = EU_DEPENDENCY_ID_FLAG0 + 16,
|
||||
/* SBID token read completion. Only used on Gfx12+. */
|
||||
EU_DEPENDENCY_ID_SBID_RD0 = EU_DEPENDENCY_ID_SBID_WR0 + 32,
|
||||
/* Number of computation dependencies currently tracked. */
|
||||
EU_NUM_DEPENDENCY_IDS = EU_DEPENDENCY_ID_SBID_RD0 + 32
|
||||
/* Register part of the GRF. */
|
||||
EU_DEPENDENCY_ID_GRF0 = EU_DEPENDENCY_ID_SBID_RD0 + 32,
|
||||
EU_DEPENDENCY_ID_INVALID = ~0u
|
||||
};
|
||||
|
||||
unsigned
|
||||
num_grf_dependency_ids(const brw_shader *s) {
|
||||
return (!s->grf_used ? s->alloc.count :
|
||||
s->devinfo->ver >= 30 ? XE3_MAX_GRF :
|
||||
s->devinfo->ver >= 20 ? XE2_MAX_GRF :
|
||||
BRW_MAX_GRF);
|
||||
}
|
||||
|
||||
/**
|
||||
* State of our modeling of the program execution.
|
||||
*/
|
||||
struct state {
|
||||
state() : unit_ready(), dep_ready(), unit_busy(), weight(1.0) {}
|
||||
state(const brw_shader *s) :
|
||||
unit_ready(), dep_ready(), num_dependency_ids(EU_DEPENDENCY_ID_GRF0 + num_grf_dependency_ids(s)), unit_busy(), weight(1.0)
|
||||
{
|
||||
dep_ready = new unsigned[num_dependency_ids]();
|
||||
}
|
||||
|
||||
~state() {
|
||||
delete[] dep_ready;
|
||||
}
|
||||
|
||||
/**
|
||||
* Time at which a given unit will be ready to execute the next
|
||||
* computation, in clock units.
|
||||
@@ -95,7 +111,9 @@ namespace {
|
||||
* Time at which an instruction dependent on a given dependency ID will
|
||||
* be ready to execute, in clock units.
|
||||
*/
|
||||
unsigned dep_ready[EU_NUM_DEPENDENCY_IDS];
|
||||
unsigned *dep_ready;
|
||||
unsigned num_dependency_ids;
|
||||
|
||||
/**
|
||||
* Aggregated utilization of a given unit excluding idle cycles,
|
||||
* in clock units.
|
||||
@@ -736,7 +754,7 @@ namespace {
|
||||
void
|
||||
stall_on_dependency(state &st, enum intel_eu_dependency_id id)
|
||||
{
|
||||
if (id < ARRAY_SIZE(st.dep_ready))
|
||||
if (id < st.num_dependency_ids)
|
||||
st.unit_ready[EU_UNIT_FE] = MAX2(st.unit_ready[EU_UNIT_FE],
|
||||
st.dep_ready[id]);
|
||||
}
|
||||
@@ -775,7 +793,7 @@ namespace {
|
||||
mark_read_dependency(state &st, const perf_desc &perf,
|
||||
enum intel_eu_dependency_id id)
|
||||
{
|
||||
if (id < ARRAY_SIZE(st.dep_ready))
|
||||
if (id < st.num_dependency_ids)
|
||||
st.dep_ready[id] = st.unit_ready[EU_UNIT_FE] + perf.ls;
|
||||
}
|
||||
|
||||
@@ -791,7 +809,7 @@ namespace {
|
||||
st.dep_ready[id] = st.unit_ready[EU_UNIT_FE] + perf.la;
|
||||
else if (id >= EU_DEPENDENCY_ID_FLAG0 && id < EU_DEPENDENCY_ID_SBID_WR0)
|
||||
st.dep_ready[id] = st.unit_ready[EU_UNIT_FE] + perf.lf;
|
||||
else if (id < ARRAY_SIZE(st.dep_ready))
|
||||
else if (id < st.num_dependency_ids)
|
||||
st.dep_ready[id] = st.unit_ready[EU_UNIT_FE] + perf.ld;
|
||||
}
|
||||
|
||||
@@ -803,13 +821,12 @@ namespace {
|
||||
const int delta)
|
||||
{
|
||||
if (r.file == VGRF) {
|
||||
const unsigned i = r.nr + r.offset / REG_SIZE + delta;
|
||||
assert(i < EU_DEPENDENCY_ID_ADDR0 - EU_DEPENDENCY_ID_GRF0);
|
||||
const unsigned i = r.nr;
|
||||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i);
|
||||
|
||||
} else if (r.file == FIXED_GRF) {
|
||||
const unsigned i = r.nr + delta;
|
||||
assert(i < EU_DEPENDENCY_ID_ADDR0 - EU_DEPENDENCY_ID_GRF0);
|
||||
assert(i < XE3_MAX_GRF);
|
||||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i);
|
||||
|
||||
} else if (r.file == ARF && r.nr >= BRW_ARF_ADDRESS &&
|
||||
@@ -824,7 +841,7 @@ namespace {
|
||||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_ACCUM0 + i);
|
||||
|
||||
} else {
|
||||
return EU_NUM_DEPENDENCY_IDS;
|
||||
return EU_DEPENDENCY_ID_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -846,11 +863,10 @@ namespace {
|
||||
tgl_swsb_rd_dependency_id(tgl_swsb swsb)
|
||||
{
|
||||
if (swsb.mode) {
|
||||
assert(swsb.sbid <
|
||||
EU_NUM_DEPENDENCY_IDS - EU_DEPENDENCY_ID_SBID_RD0);
|
||||
assert(swsb.sbid < EU_DEPENDENCY_ID_GRF0 - EU_DEPENDENCY_ID_SBID_RD0);
|
||||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_SBID_RD0 + swsb.sbid);
|
||||
} else {
|
||||
return EU_NUM_DEPENDENCY_IDS;
|
||||
return EU_DEPENDENCY_ID_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -866,7 +882,7 @@ namespace {
|
||||
EU_DEPENDENCY_ID_SBID_RD0 - EU_DEPENDENCY_ID_SBID_WR0);
|
||||
return intel_eu_dependency_id(EU_DEPENDENCY_ID_SBID_WR0 + swsb.sbid);
|
||||
} else {
|
||||
return EU_NUM_DEPENDENCY_IDS;
|
||||
return EU_DEPENDENCY_ID_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1075,7 +1091,7 @@ namespace {
|
||||
const float loop_weight = 10;
|
||||
unsigned halt_count = 0;
|
||||
unsigned elapsed = 0;
|
||||
state st;
|
||||
state st { s };
|
||||
|
||||
foreach_block(block, s->cfg) {
|
||||
const unsigned elapsed0 = elapsed;
|
||||
|
||||
Reference in New Issue
Block a user