diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 9d14092aa3e..b4fa696b429 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -370,8 +370,6 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in */ const bool can_remove_copy = exec_copy->operands[0].isKill() || (vopc && ctx.program->gfx_level < GFX10); - /* Whether exec_val and exec_copy are adjacent (with p_logical_end inbetween). */ - const bool val_and_copy_adjacent = exec_val_idx == exec_copy_idx - 2; /* Always allow reassigning when the value is written by (usable) VOPC. * Note, VOPC implicitly contains "& exec" because it yields zero on inactive lanes. @@ -390,31 +388,29 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in const Definition exec_wr_def = exec_val->definitions[0]; const Definition exec_copy_def = exec_copy->definitions[0]; - if (!val_and_copy_adjacent) { - /* When exec_val and exec_copy are non-adjacent, check whether there are any - * instructions inbetween (besides p_logical_end) which may inhibit the optimization. - */ - for (int idx = exec_val_idx + 1; idx < exec_copy_idx; ++idx) { - aco_ptr& instr = block.instructions[idx]; + /* When exec_val and exec_copy are non-adjacent, check whether there are any + * instructions inbetween (besides p_logical_end) which may inhibit the optimization. + */ + for (int idx = exec_val_idx + 1; idx < exec_copy_idx; ++idx) { + aco_ptr& instr = block.instructions[idx]; - if (save_original_exec) { - /* Check if the instruction uses the exec_copy_def register, in which case we can't - * optimize. */ - for (const Operand& op : instr->operands) - if (regs_intersect(exec_copy_def, op)) - return; - for (const Definition& def : instr->definitions) - if (regs_intersect(exec_copy_def, def)) - return; - } - - /* Check if the instruction may implicitly read VCC, eg. v_cndmask or add with carry. - * Some of these may be fine to convert to VOP3 but there are edge cases, eg. SDWA. - * Better leave these instructions alone. - */ - if (instr->isVALU() && instr->operands.size() >= 3 && !instr->isVOP3()) - return; + if (save_original_exec) { + /* Check if the instruction uses the exec_copy_def register, in which case we can't + * optimize. */ + for (const Operand& op : instr->operands) + if (regs_intersect(exec_copy_def, op)) + return; + for (const Definition& def : instr->definitions) + if (regs_intersect(exec_copy_def, def)) + return; } + + /* Check if the instruction may implicitly read VCC, eg. v_cndmask or add with carry. + * Some of these may be fine to convert to VOP3 but there are edge cases, eg. SDWA. + * Better leave these instructions alone. + */ + if (instr->isVALU() && instr->operands.size() >= 3 && !instr->isVOP3()) + return; } if (save_original_exec) { @@ -471,19 +467,17 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in exec_val->definitions[0] = Definition(exec, ctx.program->lane_mask); } - if (!val_and_copy_adjacent) { - /* If there are other instructions (besides p_logical_end) between - * writing the value and copying it to exec, reassign uses - * of the old definition. - */ - for (int idx = exec_val_idx + 1; idx < exec_copy_idx; ++idx) { - aco_ptr& instr = block.instructions[idx]; - for (Operand& op : instr->operands) { - if (op.physReg() == exec_wr_def.physReg()) - op = Operand(exec, op.regClass()); - if (exec_wr_def.size() == 2 && op.physReg() == exec_wr_def.physReg().advance(4)) - op = Operand(exec_hi, op.regClass()); - } + /* If there are other instructions (besides p_logical_end) between + * writing the value and copying it to exec, reassign uses + * of the old definition. + */ + for (int idx = exec_val_idx + 1; idx < exec_copy_idx; ++idx) { + aco_ptr& instr = block.instructions[idx]; + for (Operand& op : instr->operands) { + if (op.physReg() == exec_wr_def.physReg()) + op = Operand(exec, op.regClass()); + if (exec_wr_def.size() == 2 && op.physReg() == exec_wr_def.physReg().advance(4)) + op = Operand(exec_hi, op.regClass()); } }