From 7aa94efe827fa2b99490e04ce2f5c1a4d737871c Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 18 Oct 2022 15:48:21 +0200 Subject: [PATCH] aco: Combine constant bit test to s_bitcmp. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi21: Totals from 73988 (54.84% of 134913) affected shaders: VGPRs: 2959768 -> 2959752 (-0.00%) SpillSGPRs: 10250 -> 10697 (+4.36%); split: -0.64%, +5.00% SpillVGPRs: 2326 -> 2291 (-1.50%); split: -2.24%, +0.73% CodeSize: 261339476 -> 261045912 (-0.11%); split: -0.12%, +0.00% Scratch: 239616 -> 238592 (-0.43%) Instrs: 49214044 -> 49188242 (-0.05%); split: -0.06%, +0.00% Latency: 413214139 -> 413296229 (+0.02%); split: -0.03%, +0.05% InvThroughput: 71741622 -> 71786300 (+0.06%); split: -0.07%, +0.13% VClause: 856838 -> 856973 (+0.02%); split: -0.01%, +0.02% SClause: 1504502 -> 1504567 (+0.00%); split: -0.01%, +0.02% Copies: 4058433 -> 4060424 (+0.05%); split: -0.03%, +0.08% Branches: 1502953 -> 1502945 (-0.00%); split: -0.00%, +0.00% PreSGPRs: 3081927 -> 3081531 (-0.01%); split: -0.02%, +0.01% PreVGPRs: 2513990 -> 2513992 (+0.00%) The vast majority of instruction count regressions are caused by parallel-rdp. Signed-off-by: Georg Lehmann Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 6832dbf78f6..822ea9967fb 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2839,12 +2839,18 @@ combine_s_bitcmp(opt_ctx& ctx, aco_ptr& instr) if (!lshl_instr || lshl_instr->opcode != s_lshl || !lshl_instr->operands[0].constantEquals(1) || (lshl_instr->operands[1].isLiteral() && and_instr->operands[!and_idx].isLiteral())) + lshl_instr = nullptr; + + uint64_t constant; + if (!lshl_instr && + (!is_operand_constant(ctx, and_instr->operands[and_idx], b64 ? 64 : 32, &constant) || + !util_is_power_of_two_or_zero64(constant) || constant == 0)) continue; bool test1 = false; if (instr->operands[!cmp_idx].constantEquals(0)) { test1 = lg; - } else if (instr->operands[!cmp_idx].isTemp() && + } else if (lshl_instr && instr->operands[!cmp_idx].isTemp() && instr->operands[!cmp_idx].tempId() == lshl_instr->definitions[0].tempId()) { test1 = !lg; ctx.uses[lshl_instr->definitions[0].tempId()]--; @@ -2862,9 +2868,13 @@ combine_s_bitcmp(opt_ctx& ctx, aco_ptr& instr) instr->opcode = aco_opcode::s_bitcmp0_b32; instr->operands[0] = copy_operand(ctx, and_instr->operands[!and_idx]); - instr->operands[1] = copy_operand(ctx, lshl_instr->operands[1]); decrease_uses(ctx, and_instr); - decrease_op_uses_if_dead(ctx, lshl_instr); + if (lshl_instr) { + instr->operands[1] = copy_operand(ctx, lshl_instr->operands[1]); + decrease_op_uses_if_dead(ctx, lshl_instr); + } else { + instr->operands[1] = Operand::c32(ffsll(constant) - 1); + } return true; } }