From 09b99f1b7c73eaeda60c14c4ded779ead6edfde8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 26 Jan 2021 18:49:58 +0100 Subject: [PATCH] aco/ra: refactor affinity coalescing Also adds v_interp_p2_f32 to the list of affinity-related instructions. Totals from 68 (0.05% of 149839) affected shaders (GFX10.3): CodeSize: 792928 -> 792056 (-0.11%) Instrs: 152843 -> 152625 (-0.14%) Latency: 1235353 -> 1235278 (-0.01%) InvThroughput: 224087 -> 224049 (-0.02%) Copies: 9218 -> 9000 (-2.36%) Reviewed-by: Tony Wasserka Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 31 ++++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 736c3e786c6..4d519cd884b 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2095,14 +2095,33 @@ void get_affinities(ra_ctx& ctx, std::vector& live_out_per_block) phi_ressources[it->second][0] = def.getTemp(); /* try to coalesce phi affinities with parallelcopies */ Operand op = Operand(); - if (!def.isFixed() && instr->opcode == aco_opcode::p_parallelcopy) + switch (instr->opcode) { + case aco_opcode::p_parallelcopy: op = instr->operands[i]; - else if ((instr->opcode == aco_opcode::v_mad_f32 || - (instr->opcode == aco_opcode::v_fma_f32 && ctx.program->chip_class >= GFX10) || - instr->opcode == aco_opcode::v_mad_f16 || - instr->opcode == aco_opcode::v_mad_legacy_f16 || - (instr->opcode == aco_opcode::v_fma_f16 && ctx.program->chip_class >= GFX10)) && !instr->usesModifiers()) + break; + + case aco_opcode::v_interp_p2_f32: + case aco_opcode::v_writelane_b32: + case aco_opcode::v_writelane_b32_e64: op = instr->operands[2]; + break; + + case aco_opcode::v_fma_f32: + case aco_opcode::v_fma_f16: + case aco_opcode::v_pk_fma_f16: + if (ctx.program->chip_class < GFX10) + continue; + FALLTHROUGH; + case aco_opcode::v_mad_f32: + case aco_opcode::v_mad_f16: + if (instr->usesModifiers()) + continue; + op = instr->operands[2]; + break; + + default: + continue; + } if (op.isTemp() && op.isFirstKillBeforeDef() && def.regClass() == op.regClass()) { phi_ressources[it->second].emplace_back(op.getTemp());