From 2158211eebbddb022dfc1e2e6b847d570f94bbf0 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Tue, 19 Aug 2025 15:36:53 +0200 Subject: [PATCH] ir3: allow shared srcs for ldc.k This works just fine and opens up a lot more opportunities for early preamble. Note that I haven't seen actual cases where the index is large enough to need a register but verified in computerator that it works. Totals: MaxWaves: 2377648 -> 2377666 (+0.00%) Instrs: 48207402 -> 48219491 (+0.03%); split: -0.01%, +0.03% CodeSize: 101907026 -> 101929790 (+0.02%); split: -0.01%, +0.03% NOPs: 8386320 -> 8392647 (+0.08%); split: -0.03%, +0.10% MOVs: 1468853 -> 1474439 (+0.38%); split: -0.19%, +0.57% Full: 1716708 -> 1716655 (-0.00%) (ss): 1113167 -> 1115183 (+0.18%); split: -0.05%, +0.23% (sy): 552317 -> 552334 (+0.00%); split: -0.10%, +0.10% (ss)-stall: 4013046 -> 4011814 (-0.03%); split: -0.10%, +0.06% (sy)-stall: 16741190 -> 16738674 (-0.02%); split: -0.20%, +0.19% Preamble Instrs: 11506988 -> 11422360 (-0.74%); split: -0.79%, +0.06% Early Preamble: 121339 -> 123955 (+2.16%) Last helper: 11686328 -> 11688700 (+0.02%); split: -0.01%, +0.03% Cat0: 9241457 -> 9248390 (+0.08%); split: -0.02%, +0.10% Cat1: 2353411 -> 2359061 (+0.24%); split: -0.12%, +0.36% Cat7: 1637795 -> 1637301 (-0.03%); split: -0.18%, +0.14% Totals from 5370 (3.26% of 164705) affected shaders: MaxWaves: 66838 -> 66856 (+0.03%) Instrs: 4127945 -> 4140034 (+0.29%); split: -0.08%, +0.37% CodeSize: 8376584 -> 8399348 (+0.27%); split: -0.08%, +0.35% NOPs: 892650 -> 898977 (+0.71%); split: -0.24%, +0.95% MOVs: 199423 -> 205009 (+2.80%); split: -1.42%, +4.22% Full: 76648 -> 76595 (-0.07%) (ss): 106018 -> 108034 (+1.90%); split: -0.56%, +2.46% (sy): 48427 -> 48444 (+0.04%); split: -1.10%, +1.13% (ss)-stall: 479348 -> 478116 (-0.26%); split: -0.80%, +0.54% (sy)-stall: 1880900 -> 1878384 (-0.13%); split: -1.81%, +1.68% Preamble Instrs: 1096452 -> 1011824 (-7.72%); split: -8.34%, +0.62% Early Preamble: 0 -> 2616 (+inf%) Last helper: 1313193 -> 1315565 (+0.18%); split: -0.10%, +0.29% Cat0: 992161 -> 999094 (+0.70%); split: -0.23%, +0.93% Cat1: 234329 -> 239979 (+2.41%); split: -1.21%, +3.62% Cat7: 118722 -> 118228 (-0.42%); split: -2.42%, +2.00% The regressions in NOPs/MOVs seem to be cases of bad luck in RA/scheduling. I looked at a couple of cases and the main shader is essentially the same before RA. It's a bit unfortunate the differences in the preamble can have such an impact on the main shader... Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3_compiler_nir.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c index eef0c7d7fa5..c3a6ff79ae8 100644 --- a/src/freedreno/ir3/ir3_compiler_nir.c +++ b/src/freedreno/ir3/ir3_compiler_nir.c @@ -1212,8 +1212,11 @@ emit_intrinsic_copy_ubo_to_uniform(struct ir3_context *ctx, struct ir3_instruction *addr1 = ir3_create_addr1(&ctx->build, base); - struct ir3_instruction *offset = ir3_get_src(ctx, &intr->src[1])[0]; - struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0]; + bool use_shared = ctx->compiler->has_scalar_alu; + struct ir3_instruction *offset = + ir3_get_src_shared(ctx, &intr->src[1], use_shared)[0]; + struct ir3_instruction *idx = + ir3_get_src_shared(ctx, &intr->src[0], use_shared)[0]; struct ir3_instruction *ldc = ir3_LDC_K(b, idx, 0, offset, 0); ldc->cat6.iim_val = size; ldc->barrier_class = ldc->barrier_conflict = IR3_BARRIER_CONST_W;