aco: remove val_and_copy_adjacent
If this is true, then the only instruction the loops visit is p_logical_end and the loops are no-ops. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18077>
This commit is contained in:
@@ -370,8 +370,6 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in
|
||||
*/
|
||||
const bool can_remove_copy =
|
||||
exec_copy->operands[0].isKill() || (vopc && ctx.program->gfx_level < GFX10);
|
||||
/* Whether exec_val and exec_copy are adjacent (with p_logical_end inbetween). */
|
||||
const bool val_and_copy_adjacent = exec_val_idx == exec_copy_idx - 2;
|
||||
|
||||
/* Always allow reassigning when the value is written by (usable) VOPC.
|
||||
* Note, VOPC implicitly contains "& exec" because it yields zero on inactive lanes.
|
||||
@@ -390,31 +388,29 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in
|
||||
const Definition exec_wr_def = exec_val->definitions[0];
|
||||
const Definition exec_copy_def = exec_copy->definitions[0];
|
||||
|
||||
if (!val_and_copy_adjacent) {
|
||||
/* When exec_val and exec_copy are non-adjacent, check whether there are any
|
||||
* instructions inbetween (besides p_logical_end) which may inhibit the optimization.
|
||||
*/
|
||||
for (int idx = exec_val_idx + 1; idx < exec_copy_idx; ++idx) {
|
||||
aco_ptr<Instruction>& instr = block.instructions[idx];
|
||||
/* When exec_val and exec_copy are non-adjacent, check whether there are any
|
||||
* instructions inbetween (besides p_logical_end) which may inhibit the optimization.
|
||||
*/
|
||||
for (int idx = exec_val_idx + 1; idx < exec_copy_idx; ++idx) {
|
||||
aco_ptr<Instruction>& instr = block.instructions[idx];
|
||||
|
||||
if (save_original_exec) {
|
||||
/* Check if the instruction uses the exec_copy_def register, in which case we can't
|
||||
* optimize. */
|
||||
for (const Operand& op : instr->operands)
|
||||
if (regs_intersect(exec_copy_def, op))
|
||||
return;
|
||||
for (const Definition& def : instr->definitions)
|
||||
if (regs_intersect(exec_copy_def, def))
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check if the instruction may implicitly read VCC, eg. v_cndmask or add with carry.
|
||||
* Some of these may be fine to convert to VOP3 but there are edge cases, eg. SDWA.
|
||||
* Better leave these instructions alone.
|
||||
*/
|
||||
if (instr->isVALU() && instr->operands.size() >= 3 && !instr->isVOP3())
|
||||
return;
|
||||
if (save_original_exec) {
|
||||
/* Check if the instruction uses the exec_copy_def register, in which case we can't
|
||||
* optimize. */
|
||||
for (const Operand& op : instr->operands)
|
||||
if (regs_intersect(exec_copy_def, op))
|
||||
return;
|
||||
for (const Definition& def : instr->definitions)
|
||||
if (regs_intersect(exec_copy_def, def))
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check if the instruction may implicitly read VCC, eg. v_cndmask or add with carry.
|
||||
* Some of these may be fine to convert to VOP3 but there are edge cases, eg. SDWA.
|
||||
* Better leave these instructions alone.
|
||||
*/
|
||||
if (instr->isVALU() && instr->operands.size() >= 3 && !instr->isVOP3())
|
||||
return;
|
||||
}
|
||||
|
||||
if (save_original_exec) {
|
||||
@@ -471,19 +467,17 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in
|
||||
exec_val->definitions[0] = Definition(exec, ctx.program->lane_mask);
|
||||
}
|
||||
|
||||
if (!val_and_copy_adjacent) {
|
||||
/* If there are other instructions (besides p_logical_end) between
|
||||
* writing the value and copying it to exec, reassign uses
|
||||
* of the old definition.
|
||||
*/
|
||||
for (int idx = exec_val_idx + 1; idx < exec_copy_idx; ++idx) {
|
||||
aco_ptr<Instruction>& instr = block.instructions[idx];
|
||||
for (Operand& op : instr->operands) {
|
||||
if (op.physReg() == exec_wr_def.physReg())
|
||||
op = Operand(exec, op.regClass());
|
||||
if (exec_wr_def.size() == 2 && op.physReg() == exec_wr_def.physReg().advance(4))
|
||||
op = Operand(exec_hi, op.regClass());
|
||||
}
|
||||
/* If there are other instructions (besides p_logical_end) between
|
||||
* writing the value and copying it to exec, reassign uses
|
||||
* of the old definition.
|
||||
*/
|
||||
for (int idx = exec_val_idx + 1; idx < exec_copy_idx; ++idx) {
|
||||
aco_ptr<Instruction>& instr = block.instructions[idx];
|
||||
for (Operand& op : instr->operands) {
|
||||
if (op.physReg() == exec_wr_def.physReg())
|
||||
op = Operand(exec, op.regClass());
|
||||
if (exec_wr_def.size() == 2 && op.physReg() == exec_wr_def.physReg().advance(4))
|
||||
op = Operand(exec_hi, op.regClass());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user