From 0bc8a9be67407382da25cef7e11c2a56ae1f72c1 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Mon, 15 Apr 2024 11:20:48 +0100 Subject: [PATCH] aco: make store clauses more aggressively MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently this significantly improves performance of a radeonsi resolve shader. fossil-db (navi31): Totals from 2372 (2.99% of 79395) affected shaders: MaxWaves: 59903 -> 59863 (-0.07%) Instrs: 3508838 -> 3506178 (-0.08%); split: -0.10%, +0.02% CodeSize: 18516272 -> 18505956 (-0.06%); split: -0.07%, +0.02% VGPRs: 152708 -> 154604 (+1.24%) Latency: 27881253 -> 27861445 (-0.07%); split: -0.07%, +0.00% InvThroughput: 4076649 -> 4076220 (-0.01%); split: -0.03%, +0.02% VClause: 92696 -> 89409 (-3.55%); split: -3.55%, +0.01% Copies: 310787 -> 311697 (+0.29%); split: -0.03%, +0.32% VALU: 1891048 -> 1891933 (+0.05%); split: -0.01%, +0.05% VOPD: 2534 -> 2559 (+0.99%); split: +1.07%, -0.08% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11014 Part-of: --- src/amd/compiler/aco_scheduler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 20a08ac7636..11bb77720e2 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -20,6 +20,7 @@ #define VMEM_MAX_MOVES (256 - ctx.num_waves * 16) /* creating clauses decreases def-use distances, so make it less aggressive the lower num_waves is */ #define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 2) +#define VMEM_STORE_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 4) #define POS_EXP_MAX_MOVES 512 namespace aco { @@ -1035,7 +1036,7 @@ schedule_VMEM_store(sched_ctx& ctx, Block* block, std::vector& r DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true); int skip = 0; - for (int i = 0; (i - skip) < VMEM_CLAUSE_MAX_GRAB_DIST; i++) { + for (int i = 0; (i - skip) < VMEM_STORE_CLAUSE_MAX_GRAB_DIST; i++) { aco_ptr& candidate = block->instructions[cursor.source_idx]; if (candidate->opcode == aco_opcode::p_logical_start) break;