aco/scheduler: ignore potential SMEM stalls when forming clauses

Totals from 4190 (5.25% of 79839) affected shaders: (Navi48)

MaxWaves: 117020 -> 117014 (-0.01%)
Instrs: 4801892 -> 4801547 (-0.01%); split: -0.06%, +0.05%
CodeSize: 25327632 -> 25325500 (-0.01%); split: -0.05%, +0.04%
VGPRs: 236452 -> 236488 (+0.02%)
Latency: 30569070 -> 30539464 (-0.10%); split: -0.13%, +0.04%
InvThroughput: 4891650 -> 4891062 (-0.01%); split: -0.03%, +0.01%
VClause: 119615 -> 118763 (-0.71%); split: -1.02%, +0.31%
SClause: 100482 -> 100297 (-0.18%); split: -0.44%, +0.26%
Copies: 326644 -> 326756 (+0.03%); split: -0.19%, +0.22%
Branches: 98982 -> 98980 (-0.00%)
VALU: 2712397 -> 2712534 (+0.01%); split: -0.02%, +0.03%
SALU: 591836 -> 591817 (-0.00%); split: -0.00%, +0.00%
VOPD: 993 -> 987 (-0.60%); split: +0.20%, -0.81%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36599>
This commit is contained in:
Daniel Schürmann
2025-08-05 11:11:22 +02:00
committed by Marge Bot
parent d3a0f268b9
commit 70f0c065e8
+8 -9
View File
@@ -920,12 +920,6 @@ schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx)
if (candidate->opcode == aco_opcode::p_logical_start)
break;
/* break if we'd make the previous SMEM instruction stall */
bool can_stall_prev_smem =
idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
if (can_stall_prev_smem && ctx.last_SMEM_stall >= 0)
break;
if (should_form_clause(current, candidate.get())) {
/* We can't easily tell how much this will decrease the def-to-use
* distances, so just use how far it will be moved as a heuristic. */
@@ -938,6 +932,12 @@ schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx)
break;
}
/* Break if we'd make the previous SMEM instruction stall. */
bool can_stall_prev_smem =
idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
if (!part_of_clause && can_stall_prev_smem && ctx.last_SMEM_stall >= 0)
break;
/* if current depends on candidate, add additional dependencies and continue */
bool can_move_down = !is_vmem || part_of_clause || candidate->definitions.empty();
if (only_clauses) {
@@ -985,10 +985,9 @@ schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx)
continue;
}
k++;
if (candidate_idx < ctx.last_SMEM_dep_idx)
ctx.last_SMEM_stall++;
}
if (candidate_idx < ctx.last_SMEM_dep_idx)
ctx.last_SMEM_stall++;
}
/* find the first instruction depending on current or find another VMEM */