From a9645fdd89251b33ffeb157694382167ed17feb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 2 Apr 2025 10:59:32 +0200 Subject: [PATCH] aco: introduce concept of vector-aligned Operands Operand::isVectorAligned indicates that the Operand is part of a vector consisting of multiple operands. Therefore, it must reside in a register aligned with the next Operand. Part-of: --- src/amd/compiler/aco_ir.h | 7 +++++ src/amd/compiler/aco_live_var_analysis.cpp | 35 ++++++++++++++++++---- 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 3e8396eb338..099a446c036 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -856,6 +856,12 @@ public: constexpr bool isFirstKillBeforeDef() const noexcept { return isFirstKill() && !isLateKill(); } + /* Indicates that the Operand is part of a vector consisting of multiple operands. + * Therefore, it must reside in a register aligned with the next Operand. + */ + constexpr void setVectorAligned(bool flag) noexcept { isVectorAligned_ = flag; } + constexpr bool isVectorAligned() const noexcept { return isVectorAligned_; } + constexpr bool operator==(Operand other) const noexcept { if (other.bytes() != bytes()) @@ -906,6 +912,7 @@ private: uint8_t isLateKill_ : 1; uint8_t isClobbered_ : 1; uint8_t isCopyKill_ : 1; + uint8_t isVectorAligned_ : 1; uint8_t is16bit_ : 1; uint8_t is24bit_ : 1; uint8_t signext : 1; diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index c2dbdf1e442..bf3d66cde66 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -216,17 +216,23 @@ process_live_temps_per_block(live_ctx& ctx, Block* block) /* we need to do this in a separate loop because the next one can * setKill() for several operands at once and we don't want to * overwrite that in a later iteration */ + bool is_vector_op = false; for (Operand& op : insn->operands) { op.setKill(false); /* Linear vgprs must be late kill: this is to ensure linear VGPR operands and * normal VGPR definitions don't try to use the same register, which is problematic * because of assignment restrictions. */ - if (op.hasRegClass() && op.regClass().is_linear_vgpr() && !op.isUndefined() && - has_vgpr_def) - op.setLateKill(true); - else - op.setLateKill(false); + bool lateKill = + op.hasRegClass() && op.regClass().is_linear_vgpr() && !op.isUndefined() && has_vgpr_def; + + /* If this Operand is part of a vector which is only partially killed by the instruction, + * a definition might not fit into the gaps that get created. Mitigate by using lateKill. + */ + // TODO: is it beneficial to skip that if the vector is fully killed? + lateKill |= is_vector_op || op.isVectorAligned(); + op.setLateKill(lateKill); + is_vector_op = op.isVectorAligned(); } if (ctx.program->gfx_level >= GFX10 && insn->isVALU() && @@ -305,6 +311,25 @@ process_live_temps_per_block(live_ctx& ctx, Block* block) } } } + /* If this operand is part of a vector, check if the temporary needs to be duplicated. */ + if (is_vector_op || operand.isVectorAligned()) { + /* Set copyKill if any other vector-operand uses the same temporary. If a scalar operand + * uses the same temporary, assume that it can share the register. This ignores other + * register constraints like tied definitions or precolored registers. + */ + bool other_is_vector_op = false; + for (unsigned j = 0; j < i; j++) { + if ((other_is_vector_op || insn->operands[j].isVectorAligned()) && + insn->operands[j].getTemp() == temp) { + operand_demand += temp; + insn->register_demand += temp; /* Because of lateKill */ + operand.setCopyKill(true); + break; + } + other_is_vector_op = insn->operands[j].isVectorAligned(); + } + } + is_vector_op = operand.isVectorAligned(); if (operand.isLateKill()) { /* Make sure that same temporaries have same lateKill flags. */