aco: make store clauses more aggressively
Apparently this significantly improves performance of a radeonsi resolve shader. fossil-db (navi31): Totals from 2372 (2.99% of 79395) affected shaders: MaxWaves: 59903 -> 59863 (-0.07%) Instrs: 3508838 -> 3506178 (-0.08%); split: -0.10%, +0.02% CodeSize: 18516272 -> 18505956 (-0.06%); split: -0.07%, +0.02% VGPRs: 152708 -> 154604 (+1.24%) Latency: 27881253 -> 27861445 (-0.07%); split: -0.07%, +0.00% InvThroughput: 4076649 -> 4076220 (-0.01%); split: -0.03%, +0.02% VClause: 92696 -> 89409 (-3.55%); split: -3.55%, +0.01% Copies: 310787 -> 311697 (+0.29%); split: -0.03%, +0.32% VALU: 1891048 -> 1891933 (+0.05%); split: -0.01%, +0.05% VOPD: 2534 -> 2559 (+0.99%); split: +1.07%, -0.08% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11014 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28763>
This commit is contained in:
@@ -20,6 +20,7 @@
|
||||
#define VMEM_MAX_MOVES (256 - ctx.num_waves * 16)
|
||||
/* creating clauses decreases def-use distances, so make it less aggressive the lower num_waves is */
|
||||
#define VMEM_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 2)
|
||||
#define VMEM_STORE_CLAUSE_MAX_GRAB_DIST (ctx.num_waves * 4)
|
||||
#define POS_EXP_MAX_MOVES 512
|
||||
|
||||
namespace aco {
|
||||
@@ -1035,7 +1036,7 @@ schedule_VMEM_store(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& r
|
||||
DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
|
||||
int skip = 0;
|
||||
|
||||
for (int i = 0; (i - skip) < VMEM_CLAUSE_MAX_GRAB_DIST; i++) {
|
||||
for (int i = 0; (i - skip) < VMEM_STORE_CLAUSE_MAX_GRAB_DIST; i++) {
|
||||
aco_ptr<Instruction>& candidate = block->instructions[cursor.source_idx];
|
||||
if (candidate->opcode == aco_opcode::p_logical_start)
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user