From 20e17ca864a97061123e5086236dd1559dbd0df8 Mon Sep 17 00:00:00 2001 From: "Eric R. Smith" Date: Sun, 23 Nov 2025 10:57:23 -0400 Subject: [PATCH] pan: add actual register usage to the shaderdb stats Add the actual registers used (including uniforms used) by the shader to stats. This is calculated by the stats gathering code, because the scheduler and scoreboard passes run after register allocation and can sometimes change the results. Reviewed-by: Christoph Pillmayer Part-of: --- .../compiler/bifrost/bifrost/bi_scoreboard.c | 16 +++++++------- .../compiler/bifrost/bifrost_compile.c | 11 ++++++++++ src/panfrost/compiler/bifrost/compiler.h | 2 ++ .../compiler/bifrost/valhall/va_compiler.h | 4 ++++ .../compiler/bifrost/valhall/va_perf.c | 21 +++++++++++++++++++ src/util/shader_stats.xml | 4 ++++ 6 files changed, 50 insertions(+), 8 deletions(-) diff --git a/src/panfrost/compiler/bifrost/bifrost/bi_scoreboard.c b/src/panfrost/compiler/bifrost/bifrost/bi_scoreboard.c index a1edd4222e5..26c01158880 100644 --- a/src/panfrost/compiler/bifrost/bifrost/bi_scoreboard.c +++ b/src/panfrost/compiler/bifrost/bifrost/bi_scoreboard.c @@ -108,8 +108,8 @@ bi_choose_scoreboard_slot(bi_instr *message) return 0; } -static uint64_t -bi_read_mask(bi_instr *I, bool staging_only) +uint64_t +bi_instr_read_mask(bi_instr *I, bool staging_only) { uint64_t mask = 0; @@ -131,8 +131,8 @@ bi_read_mask(bi_instr *I, bool staging_only) return mask; } -static uint64_t -bi_write_mask(bi_instr *I) +uint64_t +bi_instr_write_mask(bi_instr *I) { uint64_t mask = 0; @@ -177,10 +177,10 @@ bi_push_clause(struct bi_scoreboard_state *st, bi_clause *clause) if (!I) return; - st->read[slot] |= bi_read_mask(I, true); + st->read[slot] |= bi_instr_read_mask(I, true); if (bi_get_opcode_props(I)->sr_write) - st->write[slot] |= bi_write_mask(I); + st->write[slot] |= bi_instr_write_mask(I); } /* Adds a dependency on each slot writing any specified register */ @@ -220,8 +220,8 @@ bi_set_dependencies(bi_block *block, bi_clause *clause, struct bi_scoreboard_state *st) { bi_foreach_instr_in_clause(block, clause, I) { - uint64_t read = bi_read_mask(I, false); - uint64_t written = bi_write_mask(I); + uint64_t read = bi_instr_read_mask(I, false); + uint64_t written = bi_instr_write_mask(I); /* Read-after-write; write-after-write */ bi_depend_on_writers(clause, st, read | written); diff --git a/src/panfrost/compiler/bifrost/bifrost_compile.c b/src/panfrost/compiler/bifrost/bifrost_compile.c index 558ee51abf1..ae539eccdbb 100644 --- a/src/panfrost/compiler/bifrost/bifrost_compile.c +++ b/src/panfrost/compiler/bifrost/bifrost_compile.c @@ -5226,11 +5226,18 @@ emit_cf_list(bi_context *ctx, struct exec_list *list) struct bi_stats { unsigned nr_clauses, nr_tuples, nr_ins; unsigned nr_arith, nr_texture, nr_varying, nr_ldst; + unsigned nr_fau_uniforms; + uint64_t reg_mask; }; static void bi_count_tuple_stats(bi_clause *clause, bi_tuple *tuple, struct bi_stats *stats) { + /* check for FAU access */ + if (tuple->fau_idx & 0x80) { + unsigned ureg = 1 + (tuple->fau_idx & 0x7f); + stats->nr_fau_uniforms = MAX2(stats->nr_fau_uniforms, 2*ureg); + } /* Count instructions */ stats->nr_ins += (tuple->fma ? 1 : 0) + (tuple->add ? 1 : 0); @@ -5366,6 +5373,8 @@ bi_gather_stats(bi_context *ctx, unsigned size, struct bifrost_stats *out) .spills = ctx->spills, .fills = ctx->fills, .spill_cost = ctx->spill_cost, + .registers_used = util_bitcount64(counts.reg_mask), + .uniforms_used = counts.nr_fau_uniforms, }; out->cycles = MAX2(out->arith, MAX3(out->t, out->v, out->ldst)); @@ -5406,6 +5415,8 @@ va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out) .spills = ctx->spills, .fills = ctx->fills, .spill_cost = ctx->spill_cost, + .registers_used = util_bitcount64(counts.reg_mask), + .uniforms_used = counts.nr_fau_uniforms, }; struct valhall_stats stats = stats_abs; stats.fma /= model->rates.fma; diff --git a/src/panfrost/compiler/bifrost/compiler.h b/src/panfrost/compiler/bifrost/compiler.h index 6985d9f2191..891937f6799 100644 --- a/src/panfrost/compiler/bifrost/compiler.h +++ b/src/panfrost/compiler/bifrost/compiler.h @@ -1446,6 +1446,8 @@ void bi_opt_push_ubo(bi_context *ctx); void bi_opt_reorder_push(bi_context *ctx); void bi_lower_swizzle(bi_context *ctx); void bi_lower_fau(bi_context *ctx); +uint64_t bi_instr_read_mask(bi_instr *I, bool staging_only); +uint64_t bi_instr_write_mask(bi_instr *I); void bi_assign_scoreboard(bi_context *ctx); void bi_register_allocate(bi_context *ctx); void va_optimize(bi_context *ctx); diff --git a/src/panfrost/compiler/bifrost/valhall/va_compiler.h b/src/panfrost/compiler/bifrost/valhall/va_compiler.h index 00650110539..e80c2f9a479 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_compiler.h +++ b/src/panfrost/compiler/bifrost/valhall/va_compiler.h @@ -91,6 +91,10 @@ va_select_fau_page(const bi_instr *I) struct va_stats { /** Counts per pipe */ unsigned fma, cvt, sfu, v, ls, t; + /** Mask of registers used */ + uint64_t reg_mask; + /** number of uniform registers used */ + unsigned nr_fau_uniforms; }; void va_count_instr_stats(bi_instr *I, struct va_stats *stats); diff --git a/src/panfrost/compiler/bifrost/valhall/va_perf.c b/src/panfrost/compiler/bifrost/valhall/va_perf.c index efa7c7b4a1d..d59a509652f 100644 --- a/src/panfrost/compiler/bifrost/valhall/va_perf.c +++ b/src/panfrost/compiler/bifrost/valhall/va_perf.c @@ -32,6 +32,27 @@ va_count_instr_stats(bi_instr *I, struct va_stats *stats) /* Adjusted for 64-bit arithmetic */ unsigned words = bi_count_write_registers(I, 0); + bi_foreach_dest(I, d) { + if (I->dest[d].type == BI_INDEX_REGISTER) + stats->reg_mask |= (uint64_t)bi_writemask(I, d) << I->dest[d].value; + } + bi_foreach_src(I, s) { + if (I->src[s].type == BI_INDEX_REGISTER) { + unsigned pos = I->src[s].offset + I->src[s].value; + unsigned count = bi_count_read_registers(I, s); + stats->reg_mask |= ((uint64_t)BITFIELD_MASK(count)) << pos; + } + if (I->src[s].type == BI_INDEX_FAU) { + bi_index index = I->src[s]; + unsigned val = index.value; + if (val >= BIR_FAU_UNIFORM) { + val = val & ~BIR_FAU_UNIFORM; + if (val < BIR_FAU_UNIFORM) { + stats->nr_fau_uniforms = MAX2(stats->nr_fau_uniforms, val+1); + } + } + } + } switch (valhall_opcodes[I->op].unit) { /* Arithmetic is 2x slower for 64-bit than 32-bit */ case VA_UNIT_FMA: diff --git a/src/util/shader_stats.xml b/src/util/shader_stats.xml index 36f870b9060..7628275bded 100644 --- a/src/util/shader_stats.xml +++ b/src/util/shader_stats.xml @@ -67,6 +67,8 @@ Number of spill instructions Number of fill instructions Cost of spill and fill instructions + Number of registers used + Uniform registers used @@ -84,6 +86,8 @@ Number of spill instructions Number of fill instructions Cost of spill and fill instructions + Number of registers used + Uniform registers used