diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index a8bba344c6d..396373e23d8 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -1049,6 +1049,11 @@ insert_wait_states(Program* program) std::stack> loop_header_indices; unsigned loop_progress = 0; + if (program->pending_lds_access) { + update_barrier_imm(in_ctx[0], get_counters_for_event(event_lds), event_lds, + memory_sync_info(storage_shared)); + } + for (Definition def : program->args_pending_vmem) { update_counters(in_ctx[0], event_vmem); insert_wait_entry(in_ctx[0], def, event_vmem); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 716d019432f..09b1da18503 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2145,6 +2145,9 @@ public: std::vector args_pending_vmem; + /* For shader part with previous shader part that has lds access. */ + bool pending_lds_access = false; + struct { FILE* output = stderr; bool shorten_messages = false;