pan: add actual register usage to the shaderdb stats

Add the actual registers used (including uniforms used) by the shader
to stats. This is calculated by the stats gathering code, because the
scheduler and scoreboard passes run after register allocation and can
sometimes change the results.

Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38961>
This commit is contained in:
Eric R. Smith
2025-11-23 10:57:23 -04:00
committed by Marge Bot
parent 2fdd6eb09e
commit 20e17ca864
6 changed files with 50 additions and 8 deletions

View File

@@ -108,8 +108,8 @@ bi_choose_scoreboard_slot(bi_instr *message)
return 0;
}
static uint64_t
bi_read_mask(bi_instr *I, bool staging_only)
uint64_t
bi_instr_read_mask(bi_instr *I, bool staging_only)
{
uint64_t mask = 0;
@@ -131,8 +131,8 @@ bi_read_mask(bi_instr *I, bool staging_only)
return mask;
}
static uint64_t
bi_write_mask(bi_instr *I)
uint64_t
bi_instr_write_mask(bi_instr *I)
{
uint64_t mask = 0;
@@ -177,10 +177,10 @@ bi_push_clause(struct bi_scoreboard_state *st, bi_clause *clause)
if (!I)
return;
st->read[slot] |= bi_read_mask(I, true);
st->read[slot] |= bi_instr_read_mask(I, true);
if (bi_get_opcode_props(I)->sr_write)
st->write[slot] |= bi_write_mask(I);
st->write[slot] |= bi_instr_write_mask(I);
}
/* Adds a dependency on each slot writing any specified register */
@@ -220,8 +220,8 @@ bi_set_dependencies(bi_block *block, bi_clause *clause,
struct bi_scoreboard_state *st)
{
bi_foreach_instr_in_clause(block, clause, I) {
uint64_t read = bi_read_mask(I, false);
uint64_t written = bi_write_mask(I);
uint64_t read = bi_instr_read_mask(I, false);
uint64_t written = bi_instr_write_mask(I);
/* Read-after-write; write-after-write */
bi_depend_on_writers(clause, st, read | written);

View File

@@ -5226,11 +5226,18 @@ emit_cf_list(bi_context *ctx, struct exec_list *list)
struct bi_stats {
unsigned nr_clauses, nr_tuples, nr_ins;
unsigned nr_arith, nr_texture, nr_varying, nr_ldst;
unsigned nr_fau_uniforms;
uint64_t reg_mask;
};
static void
bi_count_tuple_stats(bi_clause *clause, bi_tuple *tuple, struct bi_stats *stats)
{
/* check for FAU access */
if (tuple->fau_idx & 0x80) {
unsigned ureg = 1 + (tuple->fau_idx & 0x7f);
stats->nr_fau_uniforms = MAX2(stats->nr_fau_uniforms, 2*ureg);
}
/* Count instructions */
stats->nr_ins += (tuple->fma ? 1 : 0) + (tuple->add ? 1 : 0);
@@ -5366,6 +5373,8 @@ bi_gather_stats(bi_context *ctx, unsigned size, struct bifrost_stats *out)
.spills = ctx->spills,
.fills = ctx->fills,
.spill_cost = ctx->spill_cost,
.registers_used = util_bitcount64(counts.reg_mask),
.uniforms_used = counts.nr_fau_uniforms,
};
out->cycles = MAX2(out->arith, MAX3(out->t, out->v, out->ldst));
@@ -5406,6 +5415,8 @@ va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out)
.spills = ctx->spills,
.fills = ctx->fills,
.spill_cost = ctx->spill_cost,
.registers_used = util_bitcount64(counts.reg_mask),
.uniforms_used = counts.nr_fau_uniforms,
};
struct valhall_stats stats = stats_abs;
stats.fma /= model->rates.fma;

View File

@@ -1446,6 +1446,8 @@ void bi_opt_push_ubo(bi_context *ctx);
void bi_opt_reorder_push(bi_context *ctx);
void bi_lower_swizzle(bi_context *ctx);
void bi_lower_fau(bi_context *ctx);
uint64_t bi_instr_read_mask(bi_instr *I, bool staging_only);
uint64_t bi_instr_write_mask(bi_instr *I);
void bi_assign_scoreboard(bi_context *ctx);
void bi_register_allocate(bi_context *ctx);
void va_optimize(bi_context *ctx);

View File

@@ -91,6 +91,10 @@ va_select_fau_page(const bi_instr *I)
struct va_stats {
/** Counts per pipe */
unsigned fma, cvt, sfu, v, ls, t;
/** Mask of registers used */
uint64_t reg_mask;
/** number of uniform registers used */
unsigned nr_fau_uniforms;
};
void va_count_instr_stats(bi_instr *I, struct va_stats *stats);

View File

@@ -32,6 +32,27 @@ va_count_instr_stats(bi_instr *I, struct va_stats *stats)
/* Adjusted for 64-bit arithmetic */
unsigned words = bi_count_write_registers(I, 0);
bi_foreach_dest(I, d) {
if (I->dest[d].type == BI_INDEX_REGISTER)
stats->reg_mask |= (uint64_t)bi_writemask(I, d) << I->dest[d].value;
}
bi_foreach_src(I, s) {
if (I->src[s].type == BI_INDEX_REGISTER) {
unsigned pos = I->src[s].offset + I->src[s].value;
unsigned count = bi_count_read_registers(I, s);
stats->reg_mask |= ((uint64_t)BITFIELD_MASK(count)) << pos;
}
if (I->src[s].type == BI_INDEX_FAU) {
bi_index index = I->src[s];
unsigned val = index.value;
if (val >= BIR_FAU_UNIFORM) {
val = val & ~BIR_FAU_UNIFORM;
if (val < BIR_FAU_UNIFORM) {
stats->nr_fau_uniforms = MAX2(stats->nr_fau_uniforms, val+1);
}
}
}
}
switch (valhall_opcodes[I->op].unit) {
/* Arithmetic is 2x slower for 64-bit than 32-bit */
case VA_UNIT_FMA:

View File

@@ -67,6 +67,8 @@
<stat name="Spills">Number of spill instructions</stat>
<stat name="Fills">Number of fill instructions</stat>
<stat name="Spill cost">Cost of spill and fill instructions</stat>
<stat name="Registers used">Number of registers used</stat>
<stat name="Uniforms used">Uniform registers used</stat>
</isa>
<isa name="Valhall">
@@ -84,6 +86,8 @@
<stat name="Spills">Number of spill instructions</stat>
<stat name="Fills">Number of fill instructions</stat>
<stat name="Spill cost">Cost of spill and fill instructions</stat>
<stat name="Registers used">Number of registers used</stat>
<stat name="Uniforms used">Uniform registers used</stat>
</isa>
</family>