aco/optimizer: check recursively if we can eliminate s_and exec
Totals from 2860 (2.12% of 134913) affected shaders: (GFX10.3) CodeSize: 5990728 -> 5979164 (-0.19%); split: -0.20%, +0.01% Instrs: 1094562 -> 1091653 (-0.27%); split: -0.28%, +0.01% Latency: 8689841 -> 8684523 (-0.06%); split: -0.07%, +0.00% InvThroughput: 1840533 -> 1840527 (-0.00%); split: -0.00%, +0.00% SClause: 51437 -> 51439 (+0.00%) Copies: 82461 -> 82472 (+0.01%) PreSGPRs: 83136 -> 83172 (+0.04%) Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15675>
This commit is contained in:
@@ -1149,6 +1149,27 @@ can_eliminate_fcanonicalize(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp tmp)
|
||||
return instr_info.can_use_input_modifiers[(int)op] && does_fp_op_flush_denorms(ctx, op);
|
||||
}
|
||||
|
||||
bool
|
||||
can_eliminate_and_exec(opt_ctx& ctx, Temp tmp, unsigned pass_flags)
|
||||
{
|
||||
if (ctx.info[tmp.id()].is_vopc()) {
|
||||
Instruction* vopc_instr = ctx.info[tmp.id()].instr;
|
||||
/* Remove superfluous s_and when the VOPC instruction uses the same exec and thus
|
||||
* already produces the same result */
|
||||
return vopc_instr->pass_flags == pass_flags;
|
||||
}
|
||||
if (ctx.info[tmp.id()].is_bitwise()) {
|
||||
Instruction* instr = ctx.info[tmp.id()].instr;
|
||||
if (instr->pass_flags != pass_flags)
|
||||
return false;
|
||||
return std::all_of(
|
||||
instr->operands.begin(), instr->operands.end(),
|
||||
[&](const Operand& op)
|
||||
{ return op.isTemp() && can_eliminate_and_exec(ctx, op.getTemp(), pass_flags); });
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
is_copy_label(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& info)
|
||||
{
|
||||
@@ -1802,16 +1823,9 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
* s_and is unnecessary. */
|
||||
ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
|
||||
break;
|
||||
} else if (ctx.info[instr->operands[0].tempId()].is_vopc()) {
|
||||
Instruction* vopc_instr = ctx.info[instr->operands[0].tempId()].instr;
|
||||
/* Remove superfluous s_and when the VOPC instruction uses the same exec and thus
|
||||
* already produces the same result */
|
||||
if (vopc_instr->pass_flags == instr->pass_flags) {
|
||||
assert(instr->pass_flags > 0);
|
||||
ctx.info[instr->definitions[0].tempId()].set_temp(
|
||||
vopc_instr->definitions[0].getTemp());
|
||||
break;
|
||||
}
|
||||
} else if (can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), instr->pass_flags)) {
|
||||
ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp());
|
||||
break;
|
||||
}
|
||||
}
|
||||
FALLTHROUGH;
|
||||
@@ -1827,14 +1841,15 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
})) {
|
||||
ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise();
|
||||
}
|
||||
FALLTHROUGH;
|
||||
ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
|
||||
break;
|
||||
case aco_opcode::s_lshl_b32:
|
||||
case aco_opcode::v_or_b32:
|
||||
case aco_opcode::v_lshlrev_b32:
|
||||
case aco_opcode::v_bcnt_u32_b32:
|
||||
case aco_opcode::v_and_b32:
|
||||
case aco_opcode::v_xor_b32:
|
||||
ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get());
|
||||
ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
|
||||
break;
|
||||
case aco_opcode::v_min_f32:
|
||||
case aco_opcode::v_min_f16:
|
||||
|
||||
Reference in New Issue
Block a user