diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 9f8ac442da2..cf7e136183c 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -1496,7 +1496,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& unsigned reg = op.physReg() + i; /* s_waitcnt_depctr on sa_sdst */ - if (ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu[reg]) { + if (ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu[reg] && wait.sa_sdst > 0) { imm &= 0xfffe; wait.sa_sdst = 0; } @@ -1504,11 +1504,13 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& /* s_waitcnt_depctr on va_sdst (if non-VCC SGPR) or va_vcc (if VCC SGPR) */ if (ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu[reg]) { bool is_vcc = reg == vcc || reg == vcc_hi; - imm &= is_vcc ? 0xfffd : 0xf1ff; - if (is_vcc) + if (is_vcc && wait.va_vcc > 0) { + imm &= 0xfffd; wait.va_vcc = 0; - else + } else if (!is_vcc && wait.va_sdst > 0) { + imm &= 0xf1ff; wait.va_sdst = 0; + } } } }