aco: combine a & ~b to bfi(b, 0, a)

Foz-DB Navi21:
Totals from 905 (0.68% of 132657) affected shaders:
Instrs: 1223583 -> 1221016 (-0.21%); split: -0.22%, +0.01%
CodeSize: 6567272 -> 6567064 (-0.00%); split: -0.04%, +0.03%
SpillSGPRs: 1231 -> 1223 (-0.65%)
SpillVGPRs: 829 -> 823 (-0.72%); split: -1.45%, +0.72%
Latency: 40952209 -> 40946230 (-0.01%); split: -0.02%, +0.01%
InvThroughput: 9411929 -> 9397932 (-0.15%); split: -0.17%, +0.02%
VClause: 29108 -> 29112 (+0.01%); split: -0.04%, +0.05%
Copies: 105272 -> 105221 (-0.05%); split: -0.28%, +0.23%
Branches: 29330 -> 29329 (-0.00%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24383>
This commit is contained in:
Georg Lehmann
2023-07-29 18:47:04 +02:00
committed by Marge Bot
parent c9b2f83a2f
commit 1659d982c3
+42 -1
View File
@@ -3588,6 +3588,45 @@ combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
return false;
}
/* v_and(a, not(b)) -> v_bfi_b32(b, 0, a) */
bool
combine_v_and_not(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
if (instr->usesModifiers())
return false;
for (unsigned i = 0; i < 2; i++) {
Instruction* op_instr = follow_operand(ctx, instr->operands[i], true);
if (op_instr && !op_instr->usesModifiers() &&
(op_instr->opcode == aco_opcode::v_not_b32 ||
op_instr->opcode == aco_opcode::s_not_b32)) {
Operand ops[3] = {
op_instr->operands[0],
Operand::zero(),
instr->operands[!i],
};
if (!check_vop3_operands(ctx, 3, ops))
continue;
Instruction* new_instr =
create_instruction<VALU_instruction>(aco_opcode::v_bfi_b32, Format::VOP3, 3, 1);
new_instr->operands[0] = copy_operand(ctx, op_instr->operands[0]);
new_instr->operands[1] = Operand::zero();
new_instr->operands[2] = instr->operands[!i];
new_instr->definitions[0] = instr->definitions[0];
new_instr->pass_flags = instr->pass_flags;
instr.reset(new_instr);
decrease_uses(ctx, op_instr);
ctx.info[instr->definitions[0].tempId()].label = 0;
return true;
}
}
return false;
}
/* v_add_co(c, s_lshl(a, b)) -> v_mad_u32_u24(a, 1<<b, c)
* v_add_co(c, v_lshlrev(a, b)) -> v_mad_u32_u24(b, 1<<a, c)
* v_sub(c, s_lshl(a, b)) -> v_mad_i32_i24(a, -(1<<b), c)
@@ -4456,7 +4495,9 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
} else if (instr->opcode == aco_opcode::s_abs_i32) {
combine_sabsdiff(ctx, instr);
} else if (instr->opcode == aco_opcode::v_and_b32) {
combine_and_subbrev(ctx, instr);
if (combine_and_subbrev(ctx, instr)) {
} else if (combine_v_and_not(ctx, instr)) {
}
} else if (instr->opcode == aco_opcode::v_fma_f32 || instr->opcode == aco_opcode::v_fma_f16) {
/* set existing v_fma_f32 with label_mad so we can create v_fmamk_f32/v_fmaak_f32.
* since ctx.uses[mad_info::mul_temp_id] is always 0, we don't have to worry about