From fd77cc7c32ad8ce5763e2b12823a9fccb9b998bc Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 5 Feb 2025 19:44:44 +0100 Subject: [PATCH] ac/nir/lower_ps: move exports after packing alu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If ACO's wqm section ends just before the first export, this mixing alu and exports means the alu in question can't be reordered as much by the ILP scheduler. Foz-DB Navi31: Totals from 8959 (11.31% of 79188) affected shaders: Instrs: 5977212 -> 5978494 (+0.02%); split: -0.02%, +0.04% CodeSize: 32982732 -> 32987876 (+0.02%); split: -0.01%, +0.03% Latency: 35218073 -> 35216277 (-0.01%); split: -0.02%, +0.02% InvThroughput: 5149751 -> 5149696 (-0.00%); split: -0.00%, +0.00% SClause: 220552 -> 220551 (-0.00%); split: -0.01%, +0.01% PreVGPRs: 313203 -> 313069 (-0.04%); split: -0.06%, +0.01% Foz-DB Navi21: Totals from 8895 (11.21% of 79377) affected shaders: MaxWaves: 219280 -> 219272 (-0.00%); split: +0.00%, -0.01% Instrs: 5393330 -> 5393366 (+0.00%); split: -0.00%, +0.00% CodeSize: 29921900 -> 29922024 (+0.00%); split: -0.00%, +0.00% VGPRs: 406664 -> 406688 (+0.01%); split: -0.00%, +0.01% Latency: 35653975 -> 35652220 (-0.00%); split: -0.02%, +0.02% InvThroughput: 7992134 -> 7992032 (-0.00%); split: -0.00%, +0.00% SClause: 223784 -> 223786 (+0.00%) Copies: 370984 -> 370983 (-0.00%) PreVGPRs: 314323 -> 314330 (+0.00%); split: -0.01%, +0.01% VALU: 3800023 -> 3800022 (-0.00%) Reviewed-by: Marek Olšák Part-of: --- src/amd/common/nir/ac_nir_lower_ps_late.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/amd/common/nir/ac_nir_lower_ps_late.c b/src/amd/common/nir/ac_nir_lower_ps_late.c index 70bedabef2a..0c633ddc40d 100644 --- a/src/amd/common/nir/ac_nir_lower_ps_late.c +++ b/src/amd/common/nir/ac_nir_lower_ps_late.c @@ -650,6 +650,10 @@ export_ps_outputs(nir_builder *b, lower_ps_state *s) } if (s->exp_num) { + /* Move exports to the end to avoid mixing alu and exports. */ + for (unsigned i = 0; i < s->exp_num; i++) + nir_instr_move(nir_after_impl(b->impl), &s->exp[i]->instr); + if (s->options->dual_src_blend_swizzle) { emit_ps_dual_src_blend_swizzle(b, s, first_color_export); /* Skip last export flag setting because they have been replaced by