From 6d383159d405b0cf9c9c03eaeea4ed46aa2b1d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 30 Mar 2022 18:01:45 +0200 Subject: [PATCH] aco/optimizer: check recursively if we can eliminate s_and exec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Totals from 2860 (2.12% of 134913) affected shaders: (GFX10.3) CodeSize: 5990728 -> 5979164 (-0.19%); split: -0.20%, +0.01% Instrs: 1094562 -> 1091653 (-0.27%); split: -0.28%, +0.01% Latency: 8689841 -> 8684523 (-0.06%); split: -0.07%, +0.00% InvThroughput: 1840533 -> 1840527 (-0.00%); split: -0.00%, +0.00% SClause: 51437 -> 51439 (+0.00%) Copies: 82461 -> 82472 (+0.01%) PreSGPRs: 83136 -> 83172 (+0.04%) Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_optimizer.cpp | 39 +++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 56af25a11f7..5ac2ebc7335 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1149,6 +1149,27 @@ can_eliminate_fcanonicalize(opt_ctx& ctx, aco_ptr& instr, Temp tmp) return instr_info.can_use_input_modifiers[(int)op] && does_fp_op_flush_denorms(ctx, op); } +bool +can_eliminate_and_exec(opt_ctx& ctx, Temp tmp, unsigned pass_flags) +{ + if (ctx.info[tmp.id()].is_vopc()) { + Instruction* vopc_instr = ctx.info[tmp.id()].instr; + /* Remove superfluous s_and when the VOPC instruction uses the same exec and thus + * already produces the same result */ + return vopc_instr->pass_flags == pass_flags; + } + if (ctx.info[tmp.id()].is_bitwise()) { + Instruction* instr = ctx.info[tmp.id()].instr; + if (instr->pass_flags != pass_flags) + return false; + return std::all_of( + instr->operands.begin(), instr->operands.end(), + [&](const Operand& op) + { return op.isTemp() && can_eliminate_and_exec(ctx, op.getTemp(), pass_flags); }); + } + return false; +} + bool is_copy_label(opt_ctx& ctx, aco_ptr& instr, ssa_info& info) { @@ -1802,16 +1823,9 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) * s_and is unnecessary. */ ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); break; - } else if (ctx.info[instr->operands[0].tempId()].is_vopc()) { - Instruction* vopc_instr = ctx.info[instr->operands[0].tempId()].instr; - /* Remove superfluous s_and when the VOPC instruction uses the same exec and thus - * already produces the same result */ - if (vopc_instr->pass_flags == instr->pass_flags) { - assert(instr->pass_flags > 0); - ctx.info[instr->definitions[0].tempId()].set_temp( - vopc_instr->definitions[0].getTemp()); - break; - } + } else if (can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), instr->pass_flags)) { + ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); + break; } } FALLTHROUGH; @@ -1827,14 +1841,15 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) })) { ctx.info[instr->definitions[0].tempId()].set_uniform_bitwise(); } - FALLTHROUGH; + ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get()); + break; case aco_opcode::s_lshl_b32: case aco_opcode::v_or_b32: case aco_opcode::v_lshlrev_b32: case aco_opcode::v_bcnt_u32_b32: case aco_opcode::v_and_b32: case aco_opcode::v_xor_b32: - ctx.info[instr->definitions[0].tempId()].set_bitwise(instr.get()); + ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get()); break; case aco_opcode::v_min_f32: case aco_opcode::v_min_f16: