aco/ra: rewrite handling of tied definitions
The old version worked by precoloring both the operand and definition to whatever register the operand was at the time. This didn't allow moving the operand/definition after precoloring them, or two tied operands with the same temporary. The new version works by temporarily making the operands late kill and creating a copy if the temporary is live-through or used as another tied operand. Then we can simply later make the operands early kill again and assign the definitions to the operand's register. This way, we can move the operand to make space and the new location will not intersect with any other definition and won't cause the operand and definition registers to mismatch. fossil-db (gfx1201): Totals from 2253 (2.84% of 79377) affected shaders: Instrs: 1634747 -> 1630799 (-0.24%); split: -0.27%, +0.03% CodeSize: 8688148 -> 8672348 (-0.18%); split: -0.20%, +0.02% VGPRs: 106500 -> 106512 (+0.01%) Latency: 11385480 -> 11382965 (-0.02%); split: -0.04%, +0.01% InvThroughput: 1754430 -> 1754326 (-0.01%); split: -0.01%, +0.00% SClause: 38954 -> 38964 (+0.03%); split: -0.01%, +0.04% Copies: 110772 -> 110800 (+0.03%); split: -0.02%, +0.04% Branches: 29093 -> 29092 (-0.00%) VALU: 902011 -> 902008 (-0.00%) SALU: 260175 -> 260203 (+0.01%); split: -0.01%, +0.02% fossil-db (navi31): Totals from 2 (0.00% of 79377) affected shaders: Latency: 1766 -> 1765 (-0.06%) InvThroughput: 3219 -> 3215 (-0.12%) fossil-db (navi21): Totals from 14 (0.02% of 79377) affected shaders: (no affected stats) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34700>
This commit is contained in:
@@ -1824,6 +1824,10 @@ get_reg(ra_ctx& ctx, const RegisterFile& reg_file, Temp temp,
|
||||
std::vector<parallelcopy>& parallelcopies, aco_ptr<Instruction>& instr,
|
||||
int operand_index = -1)
|
||||
{
|
||||
/* Note that "temp" might already be filled in the register file if we're making a copy of the
|
||||
* temporary.
|
||||
*/
|
||||
|
||||
auto split_vec = ctx.split_vectors.find(temp.id());
|
||||
if (split_vec != ctx.split_vectors.end()) {
|
||||
unsigned offset = 0;
|
||||
@@ -3138,6 +3142,68 @@ undo_renames(ra_ctx& ctx, std::vector<parallelcopy>& parallelcopies,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
handle_operands_tied_to_definitions(ra_ctx& ctx, std::vector<parallelcopy>& parallelcopies,
|
||||
aco_ptr<Instruction>& instr, RegisterFile& reg_file,
|
||||
aco::small_vec<uint32_t, 2> tied_defs)
|
||||
{
|
||||
for (uint32_t op_idx : tied_defs) {
|
||||
Operand& op = instr->operands[op_idx];
|
||||
assert((!op.isLateKill() || op.isCopyKill()) && !op.isPrecolored());
|
||||
|
||||
/* If the operand is copy-kill, then there's an earlier operand which is also tied to a
|
||||
* definition but uses the same temporary as this one.
|
||||
*
|
||||
* We also need to copy if there is different definition which is precolored and intersects
|
||||
* with this operand, but we don't bother since it shouldn't happen.
|
||||
*/
|
||||
if (!op.isKill() || op.isCopyKill()) {
|
||||
PhysReg reg = get_reg(ctx, reg_file, op.getTemp(), parallelcopies, instr, op_idx);
|
||||
|
||||
/* update_renames() in case we moved this operand. */
|
||||
update_renames(ctx, reg_file, parallelcopies, instr);
|
||||
|
||||
Operand pc_op(op.getTemp());
|
||||
pc_op.setFixed(ctx.assignments[op.tempId()].reg);
|
||||
Definition pc_def(reg, op.regClass());
|
||||
parallelcopies.emplace_back(pc_op, pc_def, op_idx);
|
||||
|
||||
update_renames(ctx, reg_file, parallelcopies, instr);
|
||||
}
|
||||
|
||||
/* Flag the operand's temporary as lateKill. This serves as placeholder
|
||||
* for the tied definition until the instruction is fully handled.
|
||||
*/
|
||||
for (Operand& other_op : instr->operands) {
|
||||
if (other_op.isTemp() && other_op.getTemp() == op.getTemp())
|
||||
other_op.setLateKill(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
assign_tied_definitions(ra_ctx& ctx, aco_ptr<Instruction>& instr, RegisterFile& reg_file,
|
||||
aco::small_vec<uint32_t, 2> tied_defs)
|
||||
{
|
||||
unsigned fixed_def_idx = 0;
|
||||
for (auto op_idx : tied_defs) {
|
||||
Definition& def = instr->definitions[fixed_def_idx++];
|
||||
Operand& op = instr->operands[op_idx];
|
||||
assert(op.isKill());
|
||||
assert(def.regClass().type() == op.regClass().type() && def.size() <= op.size());
|
||||
|
||||
def.setFixed(op.physReg());
|
||||
ctx.assignments[def.tempId()].set(def);
|
||||
reg_file.clear(op);
|
||||
reg_file.fill(def);
|
||||
|
||||
for (Operand& other_op : instr->operands) {
|
||||
if (other_op.isTemp() && other_op.getTemp() == op.getTemp())
|
||||
other_op.setLateKill(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
emit_parallel_copy_internal(ra_ctx& ctx, std::vector<parallelcopy>& parallelcopy,
|
||||
aco_ptr<Instruction>& instr,
|
||||
@@ -3321,30 +3387,23 @@ register_allocation(Program* program, ra_test_policy policy)
|
||||
|
||||
optimize_encoding(ctx, register_file, instr);
|
||||
|
||||
auto tied_defs = get_tied_defs(instr.get());
|
||||
handle_operands_tied_to_definitions(ctx, parallelcopy, instr, register_file, tied_defs);
|
||||
|
||||
/* remove dead vars from register file */
|
||||
for (const Operand& op : instr->operands) {
|
||||
if (op.isTemp() && op.isFirstKillBeforeDef())
|
||||
register_file.clear(op);
|
||||
}
|
||||
|
||||
/* Handle definitions which must have the same register as an operand.
|
||||
* We expect that the definition has the same size as the operand, otherwise the new
|
||||
* location for the operand (if it's not killed) might intersect with the old one.
|
||||
* We can't read from the old location because it's corrupted, and we can't write the new
|
||||
* location because that's used by a live-through operand.
|
||||
*/
|
||||
unsigned fixed_def_idx = 0;
|
||||
for (auto op_idx : get_tied_defs(instr.get())) {
|
||||
instr->definitions[fixed_def_idx++].setPrecolored(instr->operands[op_idx].physReg());
|
||||
instr->operands[op_idx].setPrecolored(instr->operands[op_idx].physReg());
|
||||
}
|
||||
|
||||
/* handle fixed definitions first */
|
||||
for (unsigned i = 0; i < instr->definitions.size(); ++i) {
|
||||
auto& definition = instr->definitions[i];
|
||||
if (!definition.isFixed())
|
||||
continue;
|
||||
|
||||
assert(i >= tied_defs.size());
|
||||
|
||||
adjust_max_used_regs(ctx, definition.regClass(), definition.physReg());
|
||||
/* check if the target register is blocked */
|
||||
if (register_file.test(definition.physReg(), definition.bytes())) {
|
||||
@@ -3371,11 +3430,11 @@ register_allocation(Program* program, ra_test_policy policy)
|
||||
register_file.fill(definition);
|
||||
}
|
||||
|
||||
/* handle all other definitions */
|
||||
/* handle normal definitions */
|
||||
for (unsigned i = 0; i < instr->definitions.size(); ++i) {
|
||||
Definition* definition = &instr->definitions[i];
|
||||
|
||||
if (definition->isFixed() || !definition->isTemp())
|
||||
if (definition->isFixed() || !definition->isTemp() || i < tied_defs.size())
|
||||
continue;
|
||||
|
||||
/* find free reg */
|
||||
@@ -3450,8 +3509,11 @@ register_allocation(Program* program, ra_test_policy policy)
|
||||
register_file.fill(*definition);
|
||||
}
|
||||
|
||||
/* This avoids renaming tied operands because they are both killed and late-kill. */
|
||||
undo_renames(ctx, parallelcopy, instr);
|
||||
|
||||
assign_tied_definitions(ctx, instr, register_file, tied_defs);
|
||||
|
||||
handle_pseudo(ctx, register_file, instr.get());
|
||||
|
||||
/* kill definitions and late-kill operands and ensure that sub-dword operands can actually
|
||||
|
||||
Reference in New Issue
Block a user