diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 21597766df5..d6cf9b48852 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -920,12 +920,6 @@ schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx) if (candidate->opcode == aco_opcode::p_logical_start) break; - /* break if we'd make the previous SMEM instruction stall */ - bool can_stall_prev_smem = - idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx; - if (can_stall_prev_smem && ctx.last_SMEM_stall >= 0) - break; - if (should_form_clause(current, candidate.get())) { /* We can't easily tell how much this will decrease the def-to-use * distances, so just use how far it will be moved as a heuristic. */ @@ -938,6 +932,12 @@ schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx) break; } + /* Break if we'd make the previous SMEM instruction stall. */ + bool can_stall_prev_smem = + idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx; + if (!part_of_clause && can_stall_prev_smem && ctx.last_SMEM_stall >= 0) + break; + /* if current depends on candidate, add additional dependencies and continue */ bool can_move_down = !is_vmem || part_of_clause || candidate->definitions.empty(); if (only_clauses) { @@ -985,10 +985,9 @@ schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx) continue; } k++; + if (candidate_idx < ctx.last_SMEM_dep_idx) + ctx.last_SMEM_stall++; } - - if (candidate_idx < ctx.last_SMEM_dep_idx) - ctx.last_SMEM_stall++; } /* find the first instruction depending on current or find another VMEM */