From fd19ff0b9eb93d7bc4a5e0b2d2ae8db5a816ff56 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 27 Nov 2024 14:51:32 +0000 Subject: [PATCH] aco: force linear for event_vmem_sample and event_vmem_bvh I don't know if this issue affects GFX12, but workaround it anyway to be safe. fossil-db (gfx1200): Totals from 3463 (4.36% of 79395) affected shaders: Instrs: 9794280 -> 9833253 (+0.40%); split: -0.00%, +0.40% CodeSize: 52306040 -> 52457988 (+0.29%); split: -0.01%, +0.30% Latency: 90549385 -> 93617517 (+3.39%); split: -0.00%, +3.39% InvThroughput: 13189030 -> 13602942 (+3.14%); split: -0.00%, +3.14% Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/amd/compiler/aco_insert_waitcnt.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 523da86fb3a..c4365597b84 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -562,7 +562,8 @@ insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_ /* We can't safely write to unwritten destination VGPR lanes with DS/VMEM on GFX11 without * waiting for the load to finish. */ - uint32_t ds_vmem_events = event_lds | event_gds | event_vmem | event_flat; + uint32_t ds_vmem_events = + event_lds | event_gds | event_vmem | event_vmem_sample | event_vmem_bvh | event_flat; bool force_linear = ctx.gfx_level >= GFX11 && (event & ds_vmem_events); insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, force_linear);