From 062d4d83e1570583fe24537d488f607db7781cce Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Mon, 3 Oct 2022 16:49:15 -0700 Subject: [PATCH] ir3: Re-fuse ffmas after nir_lower_imul cleanup breaks them. The nir_opt_algebraic() call to clean up nir_lower_imul's split up mul operations (stuff like "the top 16 bits were 0, no need to mul and add that part") would trigger the options->fuse_ffma_* early ffma splitting, so you need to call nir_opt_algebraic_late() again after that (which, in turn, requires a DCE). Gets us a lot more ffmas in Aztec Ruins high under zink/angle, but doesn't seem to change perf. shader-db highlights: total instructions in shared programs: 11574843 -> 10999629 (-4.97%) instructions in affected programs: 3308870 -> 2733656 (-17.38%) total dwords in shared programs: 24344722 -> 23230122 (-4.58%) dwords in affected programs: 6569568 -> 5454968 (-16.97%) total full in shared programs: 762616 -> 762224 (-0.05%) full in affected programs: 15505 -> 15113 (-2.53%) total stp in shared programs: 4046 -> 4050 (0.10%) stp in affected programs: 3372 -> 3376 (0.12%) total ldp in shared programs: 2166 -> 2170 (0.18%) ldp in affected programs: 1716 -> 1720 (0.23%) total (ss) in shared programs: 219541 -> 216261 (-1.49%) (ss) in affected programs: 23227 -> 19947 (-14.12%) total (sy) in shared programs: 101633 -> 101927 (0.29%) (sy) in affected programs: 8611 -> 8905 (3.41%) total waves in shared programs: 1501942 -> 1501772 (-0.01%) waves in affected programs: 1880 -> 1710 (-9.04%) Part-of: --- src/freedreno/ir3/ir3_context.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index cacb43e51aa..46e771c58d6 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -84,6 +84,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, * in ir3_optimize_nir(): */ bool progress = false; + bool needs_late_alg = false; NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs); /* we could need cleanup after lower_locals_to_regs */ @@ -91,6 +92,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, progress = false; NIR_PASS(progress, ctx->s, nir_opt_algebraic); NIR_PASS(progress, ctx->s, nir_opt_constant_folding); + needs_late_alg = true; } /* We want to lower nir_op_imul as late as possible, to catch also @@ -107,6 +109,13 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, NIR_PASS(progress, ctx->s, nir_opt_dead_write_vars); NIR_PASS(progress, ctx->s, nir_opt_dce); NIR_PASS(progress, ctx->s, nir_opt_constant_folding); + needs_late_alg = true; + } + + /* nir_opt_algebraic() above would have unfused our ffmas, re-fuse them. */ + if (needs_late_alg) { + NIR_PASS(progress, ctx->s, nir_opt_algebraic_late); + NIR_PASS(progress, ctx->s, nir_opt_dce); } /* Enable the texture pre-fetch feature only a4xx onwards. But