From 70f0c065e847dc6fbf4b697d60ec6a860cb78245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 5 Aug 2025 11:11:22 +0200 Subject: [PATCH] aco/scheduler: ignore potential SMEM stalls when forming clauses Totals from 4190 (5.25% of 79839) affected shaders: (Navi48) MaxWaves: 117020 -> 117014 (-0.01%) Instrs: 4801892 -> 4801547 (-0.01%); split: -0.06%, +0.05% CodeSize: 25327632 -> 25325500 (-0.01%); split: -0.05%, +0.04% VGPRs: 236452 -> 236488 (+0.02%) Latency: 30569070 -> 30539464 (-0.10%); split: -0.13%, +0.04% InvThroughput: 4891650 -> 4891062 (-0.01%); split: -0.03%, +0.01% VClause: 119615 -> 118763 (-0.71%); split: -1.02%, +0.31% SClause: 100482 -> 100297 (-0.18%); split: -0.44%, +0.26% Copies: 326644 -> 326756 (+0.03%); split: -0.19%, +0.22% Branches: 98982 -> 98980 (-0.00%) VALU: 2712397 -> 2712534 (+0.01%); split: -0.02%, +0.03% SALU: 591836 -> 591817 (-0.00%); split: -0.00%, +0.00% VOPD: 993 -> 987 (-0.60%); split: +0.20%, -0.81% Part-of: --- src/amd/compiler/aco_scheduler.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 21597766df5..d6cf9b48852 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -920,12 +920,6 @@ schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx) if (candidate->opcode == aco_opcode::p_logical_start) break; - /* break if we'd make the previous SMEM instruction stall */ - bool can_stall_prev_smem = - idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx; - if (can_stall_prev_smem && ctx.last_SMEM_stall >= 0) - break; - if (should_form_clause(current, candidate.get())) { /* We can't easily tell how much this will decrease the def-to-use * distances, so just use how far it will be moved as a heuristic. */ @@ -938,6 +932,12 @@ schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx) break; } + /* Break if we'd make the previous SMEM instruction stall. */ + bool can_stall_prev_smem = + idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx; + if (!part_of_clause && can_stall_prev_smem && ctx.last_SMEM_stall >= 0) + break; + /* if current depends on candidate, add additional dependencies and continue */ bool can_move_down = !is_vmem || part_of_clause || candidate->definitions.empty(); if (only_clauses) { @@ -985,10 +985,9 @@ schedule_VMEM(sched_ctx& ctx, Block* block, Instruction* current, int idx) continue; } k++; + if (candidate_idx < ctx.last_SMEM_dep_idx) + ctx.last_SMEM_stall++; } - - if (candidate_idx < ctx.last_SMEM_dep_idx) - ctx.last_SMEM_stall++; } /* find the first instruction depending on current or find another VMEM */