diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index f9260026242..043221ae044 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -547,30 +547,25 @@ eliminate_useless_exec_writes_in_block(ssa_elimination_ctx& ctx, Block& block) { /* Check if any successor needs the outgoing exec mask from the current block. */ - bool exec_write_used; + bool copy_to_exec = false; + bool copy_from_exec = false; - if (!ctx.logical_phi_info[block.index].empty()) { - exec_write_used = true; - } else { - bool copy_to_exec = false; - bool copy_from_exec = false; - - for (const auto& successor_phi_info : ctx.linear_phi_info[block.index]) { - copy_to_exec |= successor_phi_info.def.physReg() == exec; - copy_from_exec |= successor_phi_info.op.physReg() == exec; - } - - if (copy_from_exec) - exec_write_used = true; - else if (copy_to_exec) - exec_write_used = false; - else - /* blocks_incoming_exec_used is initialized to true, so this is correct even for loops. */ - exec_write_used = - std::any_of(block.linear_succs.begin(), block.linear_succs.end(), - [&ctx](int succ_idx) { return ctx.blocks_incoming_exec_used[succ_idx]; }); + for (const auto& successor_phi_info : ctx.linear_phi_info[block.index]) { + copy_to_exec |= successor_phi_info.def.physReg() == exec; + copy_from_exec |= successor_phi_info.op.physReg() == exec; } + bool exec_write_used; + if (copy_from_exec) + exec_write_used = true; + else if (copy_to_exec) + exec_write_used = false; + else + /* blocks_incoming_exec_used is initialized to true, so this is correct even for loops. */ + exec_write_used = + std::any_of(block.linear_succs.begin(), block.linear_succs.end(), + [&ctx](int succ_idx) { return ctx.blocks_incoming_exec_used[succ_idx]; }); + /* Collect information about the branching sequence. */ bool logical_end_found = false; @@ -590,7 +585,9 @@ eliminate_useless_exec_writes_in_block(ssa_elimination_ctx& ctx, Block& block) break; /* See if the current instruction needs or writes exec. */ - bool needs_exec = needs_exec_mask(instr.get()); + bool needs_exec = + needs_exec_mask(instr.get()) || + (instr->opcode == aco_opcode::p_logical_end && !ctx.logical_phi_info[block.index].empty()); bool writes_exec = instr_writes_exec(instr.get()); logical_end_found |= instr->opcode == aco_opcode::p_logical_end;