aco/opt_postRA: allow v_cmpx to clobber exec before nop split/create vector
Kind of ugly, but I really hate seeing this in every rt traversal loop: image_bvh64_intersect_ray v[56:59], [v40, v41, v42, v47, v48, v49, v50, v51, v52, v53, v54, v55], s[44:47] v_cmp_class_f32_e64 s57, 0xff800000, v12 s_and_b32 exec_lo, s57, exec_lo s_cbranch_execz BB219 Foz-DB Navi21: Totals from 3394 (3.48% of 97591) affected shaders: Instrs: 9536259 -> 9533592 (-0.03%) CodeSize: 51657072 -> 51640120 (-0.03%); split: -0.03%, +0.00% Latency: 109493553 -> 109513317 (+0.02%); split: -0.01%, +0.02% InvThroughput: 29125525 -> 29131876 (+0.02%); split: -0.00%, +0.02% Copies: 815888 -> 818219 (+0.29%); split: -0.01%, +0.30% Branches: 277451 -> 277449 (-0.00%) SALU: 1217642 -> 1214976 (-0.22%) Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38697>
This commit is contained in:
@@ -952,6 +952,30 @@ fixup_reg_writes(pr_opt_ctx& ctx, unsigned start)
|
||||
ctx.current_instr_idx = current_idx;
|
||||
}
|
||||
|
||||
bool
|
||||
is_nop_copy(Instruction* instr)
|
||||
{
|
||||
if (instr->opcode == aco_opcode::p_split_vector) {
|
||||
PhysReg op_reg = instr->operands[0].physReg();
|
||||
for (const Definition& def : instr->definitions) {
|
||||
if (def.physReg() != op_reg)
|
||||
return false;
|
||||
op_reg = op_reg.advance(def.bytes());
|
||||
}
|
||||
return true;
|
||||
} else if (instr->opcode == aco_opcode::p_create_vector) {
|
||||
PhysReg def_reg = instr->definitions[0].physReg();
|
||||
for (const Operand& op : instr->operands) {
|
||||
if (op.physReg() != def_reg)
|
||||
return false;
|
||||
def_reg = def_reg.advance(op.bytes());
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
try_optimize_branching_sequence(pr_opt_ctx& ctx, aco_ptr<Instruction>& exec_copy)
|
||||
{
|
||||
@@ -1076,7 +1100,7 @@ try_optimize_branching_sequence(pr_opt_ctx& ctx, aco_ptr<Instruction>& exec_copy
|
||||
/* Ensure that nothing needs a previous exec between exec_val_idx and the current exec write. */
|
||||
for (unsigned i = exec_val_idx.instr + 1; i < ctx.current_instr_idx; i++) {
|
||||
Instruction* instr = ctx.current_block->instructions[i].get();
|
||||
if (instr && needs_exec_mask(instr))
|
||||
if (instr && needs_exec_mask(instr) && !is_nop_copy(instr))
|
||||
return false;
|
||||
|
||||
/* If the successor has phis, copies might have to be inserted at p_logical_end. */
|
||||
|
||||
Reference in New Issue
Block a user