aco/opt_postRA: allow v_cmpx to clobber exec before nop split/create vector

Kind of ugly, but I really hate seeing this in every rt traversal loop:

image_bvh64_intersect_ray v[56:59], [v40, v41, v42, v47, v48, v49, v50, v51, v52, v53, v54, v55], s[44:47]
v_cmp_class_f32_e64 s57, 0xff800000, v12
s_and_b32 exec_lo, s57, exec_lo
s_cbranch_execz BB219

Foz-DB Navi21:
Totals from 3394 (3.48% of 97591) affected shaders:
Instrs: 9536259 -> 9533592 (-0.03%)
CodeSize: 51657072 -> 51640120 (-0.03%); split: -0.03%, +0.00%
Latency: 109493553 -> 109513317 (+0.02%); split: -0.01%, +0.02%
InvThroughput: 29125525 -> 29131876 (+0.02%); split: -0.00%, +0.02%
Copies: 815888 -> 818219 (+0.29%); split: -0.01%, +0.30%
Branches: 277451 -> 277449 (-0.00%)
SALU: 1217642 -> 1214976 (-0.22%)

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38697>
This commit is contained in:
Georg Lehmann
2025-11-27 13:49:40 +01:00
committed by Marge Bot
parent 1f2d129bfa
commit 39a61502e5

View File

@@ -952,6 +952,30 @@ fixup_reg_writes(pr_opt_ctx& ctx, unsigned start)
ctx.current_instr_idx = current_idx;
}
bool
is_nop_copy(Instruction* instr)
{
if (instr->opcode == aco_opcode::p_split_vector) {
PhysReg op_reg = instr->operands[0].physReg();
for (const Definition& def : instr->definitions) {
if (def.physReg() != op_reg)
return false;
op_reg = op_reg.advance(def.bytes());
}
return true;
} else if (instr->opcode == aco_opcode::p_create_vector) {
PhysReg def_reg = instr->definitions[0].physReg();
for (const Operand& op : instr->operands) {
if (op.physReg() != def_reg)
return false;
def_reg = def_reg.advance(op.bytes());
}
return true;
} else {
return false;
}
}
bool
try_optimize_branching_sequence(pr_opt_ctx& ctx, aco_ptr<Instruction>& exec_copy)
{
@@ -1076,7 +1100,7 @@ try_optimize_branching_sequence(pr_opt_ctx& ctx, aco_ptr<Instruction>& exec_copy
/* Ensure that nothing needs a previous exec between exec_val_idx and the current exec write. */
for (unsigned i = exec_val_idx.instr + 1; i < ctx.current_instr_idx; i++) {
Instruction* instr = ctx.current_block->instructions[i].get();
if (instr && needs_exec_mask(instr))
if (instr && needs_exec_mask(instr) && !is_nop_copy(instr))
return false;
/* If the successor has phis, copies might have to be inserted at p_logical_end. */