diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index fa66232befc..dd04f0ac178 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -295,6 +295,27 @@ regs_intersect(const T& a, const U& b) return a_hi > b_lo && b_hi > a_lo; } +template +bool +instr_accesses(Instruction* instr, const T& a, bool ignore_reads) +{ + if (!ignore_reads) { + for (const Operand& op : instr->operands) + if (regs_intersect(a, op)) + return true; + } + + for (const Definition& def : instr->definitions) + if (regs_intersect(a, def)) + return true; + + if (instr->isPseudo() && instr->pseudo().needs_scratch_reg && + regs_intersect(a, Definition(instr->pseudo().scratch_sgpr, s1))) + return true; + + return false; +} + void try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const int exec_val_idx, const int exec_copy_idx) @@ -394,24 +415,6 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in /* When exec_val and exec_copy are non-adjacent, check whether there are any * instructions inbetween (besides p_logical_end) which may inhibit the optimization. */ - for (int idx = exec_val_idx + 1; idx < exec_copy_idx; ++idx) { - aco_ptr& instr = block.instructions[idx]; - - if (save_original_exec) { - /* Check if the instruction uses the exec_copy_def register, in which case we can't - * optimize. */ - for (const Operand& op : instr->operands) - if (regs_intersect(exec_copy_def, op)) - return; - for (const Definition& def : instr->definitions) - if (regs_intersect(exec_copy_def, def)) - return; - if (instr->isPseudo() && instr->pseudo().needs_scratch_reg && - regs_intersect(exec_copy_def, Definition(instr->pseudo().scratch_sgpr, s1))) - return; - } - } - if (save_original_exec) { /* We insert the exec copy before exec_val, so exec_val can't use those registers. */ for (const Operand& op : exec_val->operands) @@ -421,6 +424,15 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in if (((vopc && !vcmpx_exec_only) || !can_remove_copy) && regs_intersect(exec_copy_def, exec_wr_def)) return; + + for (int idx = exec_val_idx + 1; idx < exec_copy_idx; ++idx) { + Instruction* instr = block.instructions[idx].get(); + + /* Check if the instruction uses the exec_copy_def register, in which case we can't + * optimize. */ + if (instr_accesses(instr, exec_copy_def, false)) + return; + } } if (vopc) {