From b1d730982e50e3c029037cd5d94960319bf4e831 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 14 Nov 2025 10:43:11 +0100 Subject: [PATCH] aco/insert_NOPs: remove redundant VALUMaskWriteHazard waits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This removes a lot of VALU->SALU waits. Foz-DB Navi31: Totals from 8908 (10.84% of 82179) affected shaders: Instrs: 17118986 -> 17084870 (-0.20%) CodeSize: 91057212 -> 90919300 (-0.15%); split: -0.15%, +0.00% Latency: 154044128 -> 154036848 (-0.00%); split: -0.00%, +0.00% InvThroughput: 26608698 -> 26607933 (-0.00%); split: -0.00%, +0.00% Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_insert_NOPs.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 9f8ac442da2..cf7e136183c 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -1496,7 +1496,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& unsigned reg = op.physReg() + i; /* s_waitcnt_depctr on sa_sdst */ - if (ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu[reg]) { + if (ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu[reg] && wait.sa_sdst > 0) { imm &= 0xfffe; wait.sa_sdst = 0; } @@ -1504,11 +1504,13 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& /* s_waitcnt_depctr on va_sdst (if non-VCC SGPR) or va_vcc (if VCC SGPR) */ if (ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu[reg]) { bool is_vcc = reg == vcc || reg == vcc_hi; - imm &= is_vcc ? 0xfffd : 0xf1ff; - if (is_vcc) + if (is_vcc && wait.va_vcc > 0) { + imm &= 0xfffd; wait.va_vcc = 0; - else + } else if (!is_vcc && wait.va_sdst > 0) { + imm &= 0xf1ff; wait.va_sdst = 0; + } } } }