pan: move pan_shader_update_info call for bifrost

Doing it inside bifrost_compile lets us access the info
structure for more detailed shaderdb stats.

Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38961>
This commit is contained in:
Eric R. Smith
2025-12-08 10:00:06 -04:00
committed by Marge Bot
parent 158be3dc1d
commit 94093488eb
3 changed files with 184 additions and 39 deletions

View File

@@ -46,7 +46,7 @@
#include "bifrost_nir.h"
#include "compiler.h"
static void pan_stats_verbose(FILE *f, const char *prefix, const bi_context *ctx,
static void pan_stats_verbose(FILE *f, const char *prefix, bi_context *ctx,
const struct pan_stats *stats,
const struct pan_shader_info *info);
@@ -5411,17 +5411,10 @@ bi_gather_stats(bi_context *ctx, unsigned size, struct bifrost_stats *out)
}
static void
va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out)
va_count_stats(bi_context *ctx, unsigned nr_ins, unsigned size,
const struct va_stats *counts,
struct valhall_stats *out)
{
unsigned nr_ins = 0;
struct va_stats counts = {0};
/* Count instructions */
bi_foreach_instr_global(ctx, I) {
nr_ins++;
va_count_instr_stats(I, &counts);
}
const struct pan_model *model =
pan_get_model(ctx->inputs->gpu_id, ctx->inputs->gpu_variant);
@@ -5434,19 +5427,19 @@ va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out)
struct valhall_stats stats_abs = {
.instrs = nr_ins,
.code_size = size,
.fma = ((float)counts.fma),
.cvt = ((float)counts.cvt),
.sfu = ((float)counts.sfu),
.v = ((float)counts.v),
.t = ((float)counts.t),
.ls = ((float)counts.ls),
.fma = ((float)counts->fma),
.cvt = ((float)counts->cvt),
.sfu = ((float)counts->sfu),
.v = ((float)counts->v),
.t = ((float)counts->t),
.ls = ((float)counts->ls),
.threads = (ctx->info.work_reg_count <= 32) ? 2 : 1,
.loops = ctx->loop_count,
.spills = ctx->spills,
.fills = ctx->fills,
.spill_cost = ctx->spill_cost,
.registers_used = util_bitcount64(counts.reg_mask),
.uniforms_used = counts.nr_fau_uniforms,
.registers_used = util_bitcount64(counts->reg_mask),
.uniforms_used = counts->nr_fau_uniforms,
};
struct valhall_stats stats = stats_abs;
stats.fma /= model->rates.fma;
@@ -5464,6 +5457,118 @@ va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out)
MAX3(stats.v, stats.t, stats.ls));
}
static unsigned
va_gather_stats_block(bi_block *block, struct va_stats *counts)
{
unsigned nr_ins = 0;
bi_foreach_instr_in_block(block, I) {
nr_ins++;
va_count_instr_stats(I, counts);
}
return nr_ins;
}
/*
* Gather stats for a minimum length path through the shader.
*/
static unsigned
va_gather_min_path_stats(bi_block *block, struct va_stats *counts)
{
struct va_stats min_counts;
struct va_stats save_counts = *counts;
unsigned min_ins = 0;
unsigned nr_ins;
bi_foreach_successor(block, next) {
// if following a path leads to a loop, do not do it
if (bi_block_dominates(next, block)) {
continue;
}
nr_ins = va_gather_min_path_stats(next, counts);
if (min_ins == 0 || nr_ins < min_ins) {
min_ins = nr_ins;
min_counts = *counts;
}
*counts = save_counts;
}
if (min_ins != 0) {
*counts = min_counts;
}
nr_ins = min_ins + va_gather_stats_block(block, counts);
return nr_ins;
}
/*
* Gather stats for a maximum length path through the shader.
* This is slightly tricky because we do want to count loops,
* but at most once. If we see we've visited a block already,
* bail out.
*/
static unsigned
va_gather_max_path_stats(bi_block *block, struct va_stats *counts, BITSET_WORD *visited)
{
struct va_stats max_counts;
struct va_stats save_counts = *counts;
unsigned max_ins = 0;
unsigned nr_ins;
BITSET_SET(visited, block->index);
bi_foreach_successor(block, next) {
// if we've already visited this block, skip it
if (BITSET_TEST(visited, next->index)) {
continue;
}
nr_ins = va_gather_max_path_stats(next, counts, visited);
if (nr_ins > max_ins) {
max_ins = nr_ins;
max_counts = *counts;
}
*counts = save_counts;
}
if (max_ins != 0) {
*counts = max_counts;
}
nr_ins = max_ins + va_gather_stats_block(block, counts);
return nr_ins;
}
enum gather_stats_mode {
GATHER_STATS_FULL = 0,
GATHER_STATS_MIN,
GATHER_STATS_MAX
};
static void
va_gather_stats(bi_context *ctx, unsigned size, struct valhall_stats *out,
enum gather_stats_mode mode)
{
unsigned nr_ins = 0;
struct va_stats counts = {0};
bi_block *first_block = bi_start_block(&ctx->blocks);
BITSET_WORD *visited;
/* Count instructions */
switch (mode) {
case GATHER_STATS_FULL:
bi_foreach_instr_global(ctx, I) {
nr_ins++;
va_count_instr_stats(I, &counts);
}
break;
case GATHER_STATS_MIN:
nr_ins = va_gather_min_path_stats(first_block, &counts);
break;
case GATHER_STATS_MAX:
visited = BITSET_RZALLOC(NULL, ctx->num_blocks);
nr_ins = va_gather_max_path_stats(first_block, &counts, visited);
ralloc_free(visited);
break;
}
/* convert to stats */
va_count_stats(ctx, nr_ins, size, &counts, out);
}
/*
* Some operations are only available as 32-bit instructions. 64-bit floats are
* unsupported and ints are lowered with nir_lower_int64. Certain 8-bit and
@@ -6843,14 +6948,18 @@ bi_compile_variant_nir(nir_shader *nir,
fflush(stdout);
}
/* gather instruction statistics */
if (ctx->arch >= 9) {
stats->isa = PAN_STAT_VALHALL;
va_gather_stats(ctx, binary->size - offset, &stats->valhall);
va_gather_stats(ctx, binary->size - offset, &stats->valhall, GATHER_STATS_FULL);
} else {
stats->isa = PAN_STAT_BIFROST;
bi_gather_stats(ctx, binary->size - offset, &stats->bifrost);
}
/* update info struct */
pan_shader_update_info(pinfo, ctx->nir, inputs);
if ((bifrost_debug & (BIFROST_DBG_SHADERDB|BIFROST_DBG_STATSFULL))
&& !skip_internal) {
const char *prefix = bi_shader_stage_name(ctx);
@@ -7167,12 +7276,24 @@ do_report_pipes(FILE *f, const char *prefix, unsigned n, const char *statname[],
fprintf(f, " %6.3f", statval[i]);
if (statval[i] > limit_val) {
limit_idx = i;
limit_val = statval[i];
}
}
fprintf(f, " %6s\n", statname[limit_idx]);
/* print the first thing that matches the bound */
char bound_str[256];
unsigned max_str = sizeof(bound_str) - 1; /* leave room for trailing 0 */
strncpy(bound_str, statname[limit_idx], max_str);
/* now print any others that match */
for (unsigned i = limit_idx + 1; i < n; i++) {
if (statval[i] == limit_val) {
strncat(bound_str, ", ", max_str);
strncat(bound_str, statname[i], max_str);
}
}
fprintf(f, " %6s\n", bound_str);
}
static void
bifrost_stats_verbose(FILE *f, const bi_context *ctx, const struct bifrost_stats *stats,
bifrost_stats_verbose(FILE *f, bi_context *ctx, const struct bifrost_stats *stats,
const struct pan_shader_info *info)
{
report_regs(f, stats->registers_used, stats->uniforms_used);
@@ -7182,13 +7303,15 @@ bifrost_stats_verbose(FILE *f, const bi_context *ctx, const struct bifrost_stats
fprintf(f, "Stack size: %u bytes\n", ctx->info.tls_size);
/* now print instruction statistics */
unsigned n = 4;
static const char *statname[4] = {
static const char *statname[] = {
"A", "LS", "V", "T"
};
float statval[4] = {
float statval[] = {
stats->arith, stats->ldst, stats->v, stats->t,
};
unsigned n = ARRAY_SIZE(statname);
assert(n == ARRAY_SIZE(statval));
/* special case, empty prefix prints column headings */
do_report_pipes(f, "", n, statname, statval);
do_report_pipes(f, "Total instruction cycles:", n, statname, statval);
@@ -7196,27 +7319,47 @@ bifrost_stats_verbose(FILE *f, const bi_context *ctx, const struct bifrost_stats
}
static void
valhall_stats_verbose(FILE *f, const bi_context *ctx, const struct valhall_stats *stats,
valhall_stats_verbose(FILE *f, bi_context *ctx, const struct valhall_stats *stats,
const struct pan_shader_info *info)
{
struct valhall_stats min_stats = { 0 };
struct valhall_stats max_stats = { 0 };
report_regs(f, stats->registers_used, stats->uniforms_used);
fprintf(f, "Code size: %u bytes\n", stats->code_size);
fprintf(f, "Loops: %u\n", stats->loops);
fprintf(f, "Spills/fills: %u/%u\n", stats->spills, stats->fills);
fprintf(f, "Stack size: %u bytes\n", ctx->info.tls_size);
va_gather_stats(ctx, stats->code_size, &min_stats, GATHER_STATS_MIN);
va_gather_stats(ctx, stats->code_size, &max_stats, GATHER_STATS_MAX);
/* now print instruction statistics */
float arith = MAX3(stats->fma, stats->sfu, stats->cvt);
unsigned n = 4;
static const char *statname[4] = {
"A", "LS", "V", "T"
float min_arith = MAX3(min_stats.fma, min_stats.sfu, min_stats.cvt);
float max_arith = MAX3(max_stats.fma, max_stats.sfu, max_stats.cvt);
static const char *statname[] = {
"A", "FMA", "CVT", "SFU", "LS", "V", "T"
};
float statval[4] = {
arith, stats->ls, stats->v, stats->t,
float statval[] = {
arith, stats->fma, stats->cvt, stats->sfu, stats->ls, stats->v, stats->t,
};
float min_statval[] = {
min_arith, min_stats.fma, min_stats.cvt, min_stats.sfu,
min_stats.ls, min_stats.v, min_stats.t,
};
float max_statval[] = {
max_arith, max_stats.fma, max_stats.cvt, max_stats.sfu,
max_stats.ls, max_stats.v, max_stats.t,
};
unsigned n = ARRAY_SIZE(statval);
assert(n == ARRAY_SIZE(statname));
do_report_pipes(f, "", n, statname, statval);
do_report_pipes(f, "Total instruction cycles:", n, statname, statval);
fprintf(f, "\nA = Arithmetic, LS = Load/Store, V = Varying, T = Texture\n");
do_report_pipes(f, "Shortest path cycles:", n, statname, min_statval);
do_report_pipes(f, "Longest path cycles:", n, statname, max_statval);
fprintf(f, "\nA = Arithmetic, FMA = Arith FMA, CVT = Arith CVT, SFU = Arith SFU\n");
fprintf(f, "LS = Load/Store, V = Varying, T = Texture\n");
}
static const char *bool_str(bool x) {
@@ -7224,7 +7367,7 @@ static const char *bool_str(bool x) {
}
static void
pan_stats_verbose(FILE *f, const char *prefix, const bi_context *ctx, const struct pan_stats *stats,
pan_stats_verbose(FILE *f, const char *prefix, bi_context *ctx, const struct pan_stats *stats,
const struct pan_shader_info *info)
{
const struct pan_model *model = pan_get_model(ctx->inputs->gpu_id, ctx->inputs->gpu_variant);
@@ -7260,6 +7403,7 @@ pan_stats_verbose(FILE *f, const char *prefix, const bi_context *ctx, const stru
case MESA_SHADER_FRAGMENT:
fprintf(f, "Has side-effects: %s\n", bool_str(info->fs.sidefx));
fprintf(f, "Modifies coverage: %s\n", bool_str(info->fs.writes_coverage));
fprintf(f, "Reads color buffer: %s\n", bool_str(info->fs.outputs_read != 0));
break;
default:
break;

View File

@@ -164,7 +164,7 @@ pan_lookup_pushed_ubo(struct pan_ubo_push *push, unsigned ubo, unsigned offs)
void
pan_shader_update_info(struct pan_shader_info *info, nir_shader *s,
struct pan_compile_inputs *inputs)
const struct pan_compile_inputs *inputs)
{
unsigned arch = pan_arch(inputs->gpu_id);
@@ -306,12 +306,13 @@ pan_shader_compile(nir_shader *s, struct pan_compile_inputs *inputs,
NIR_PASS(_, s, nir_inline_sysval, nir_intrinsic_load_printf_buffer_size,
PAN_PRINTF_BUFFER_SIZE - 8);
if (arch >= 6)
if (arch >= 6) {
bifrost_compile_shader_nir(s, inputs, binary, info);
else
/* pan_shader_update_info done in the compile */
} else {
midgard_compile_shader_nir(s, inputs, binary, info);
pan_shader_update_info(info, s, inputs);
pan_shader_update_info(info, s, inputs);
}
}
void

View File

@@ -367,7 +367,7 @@ struct pan_shader_info {
};
void pan_shader_update_info(struct pan_shader_info *info, nir_shader *s,
struct pan_compile_inputs *inputs);
const struct pan_compile_inputs *inputs);
void pan_shader_compile(nir_shader *nir, struct pan_compile_inputs *inputs,
struct util_dynarray *binary,