From 0f32b573a4457bcac7762c8a5e85b3a033479408 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 2 Sep 2025 15:40:03 +0100 Subject: [PATCH] aco/gfx10: skip waitcnts or use vm_vsrc(0) for workgroup lds barriers fossil-db (navi21): Totals from 36594 (45.84% of 79825) affected shaders: Instrs: 19922581 -> 19922563 (-0.00%) CodeSize: 103616980 -> 103616956 (-0.00%) Latency: 69862064 -> 69053273 (-1.16%) InvThroughput: 14607708 -> 14606308 (-0.01%); split: -0.01%, +0.00% fossil-db (navi31): Totals from 1641 (2.06% of 79825) affected shaders: Instrs: 1247591 -> 1247875 (+0.02%); split: -0.00%, +0.03% CodeSize: 6259516 -> 6260612 (+0.02%); split: -0.00%, +0.02% Latency: 7657224 -> 7577299 (-1.04%); split: -1.05%, +0.00% InvThroughput: 1150669 -> 1148171 (-0.22%); split: -0.22%, +0.00% Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/amd/compiler/aco_insert_waitcnt.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index f3ec2402d4c..97cf9229255 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -487,6 +487,7 @@ finish_barrier_internal(wait_ctx& ctx, wait_imm& imm, depctr_wait& depctr, Instr if (info->scope[storage_idx] <= scope_workgroup) { bool is_vmem = instr->isVMEM() || (instr->isFlatLike() && !instr->flatlike().may_use_lds); + bool is_lds = instr->isDS() && !instr->ds().gds; bool is_barrier = instr->isBarrier(); /* This is only called for control barriers. */ /* In non-WGP, the L1 (L0 on GFX10+) cache keeps all memory operations in-order for the same @@ -496,6 +497,13 @@ finish_barrier_internal(wait_ctx& ctx, wait_imm& imm, depctr_wait& depctr, Instr events &= ~(event_vmem | event_vmem_store); vm_vsrc |= is_barrier && ctx.gfx_level >= GFX10; } + + /* Similar for LDS. */ + if ((events & event_lds) && + (is_lds || (is_barrier && ctx.gfx_level >= GFX10 && !ctx.program->wgp_mode))) { + events &= ~event_lds; + vm_vsrc |= is_barrier; + } } if (events)