From d64f5a3f9da19a6de5969dcb6f3f32f8cec538af Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 27 Nov 2020 16:37:07 +0000 Subject: [PATCH] aco: move VMEM instructions below descriptor loads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is to prevent sequences like: a = descriptor_load() vmem(a) b = descriptor_load() vmem(b) and instead create: a = descriptor_load() b = descriptor_load() vmem(a) vmem(b) fossil-db (GFX10.3): Totals from 114521 (78.30% of 146267) affected shaders: VGPRs: 4540352 -> 4540216 (-0.00%); split: -0.03%, +0.02% CodeSize: 289864228 -> 289114652 (-0.26%); split: -0.29%, +0.03% MaxWaves: 2940234 -> 2940338 (+0.00%); split: +0.00%, -0.00% Instrs: 55112418 -> 54919910 (-0.35%); split: -0.38%, +0.03% Latency: 956528393 -> 954682011 (-0.19%); split: -0.24%, +0.05% InvThroughput: 229280830 -> 229238107 (-0.02%); split: -0.04%, +0.02% VClause: 1141832 -> 1139002 (-0.25%); split: -0.63%, +0.38% SClause: 2357840 -> 2225008 (-5.63%); split: -6.01%, +0.38% Copies: 3316040 -> 3331519 (+0.47%); split: -0.31%, +0.77% Branches: 1187212 -> 1186919 (-0.02%); split: -0.03%, +0.01% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_scheduler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 89c68baa81a..ff88c0d0bc3 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -645,7 +645,8 @@ void schedule_SMEM(sched_ctx& ctx, Block* block, /* break when encountering another MEM instruction, logical_start or barriers */ if (candidate->opcode == aco_opcode::p_logical_start) break; - if (candidate->isVMEM()) + /* only move VMEM instructions below descriptor loads. be more aggressive at higher num_waves to help create more vmem clauses */ + if (candidate->isVMEM() && (cursor.insert_idx - cursor.source_idx > (ctx.num_waves * 4) || current->operands[0].size() == 4)) break; /* don't move descriptor loads below buffer loads */ if (candidate->format == Format::SMEM && current->operands[0].size() == 4 && candidate->operands[0].size() == 2)