From c538f47f031a52861e81d161ce9a6ef5380883cd Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 8 Mar 2025 11:35:19 +0100 Subject: [PATCH] aco/optimizer: create ff0/bcnt0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi21: Totals from 1 (0.00% of 82387) affected shaders: Instrs: 350 -> 347 (-0.86%) CodeSize: 1800 -> 1788 (-0.67%) Latency: 2427 -> 2421 (-0.25%) SALU: 80 -> 77 (-3.75%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 67333a85fb8..851d2f59fd1 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4531,6 +4531,18 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) add_opt(s_not_b32, s_xnor_b32, 0x3, "01"); } else if (info.opcode == aco_opcode::s_xor_b64) { add_opt(s_not_b64, s_xnor_b64, 0x3, "01"); + } else if ((info.opcode == aco_opcode::s_sub_u32 || info.opcode == aco_opcode::s_sub_i32) && + !ctx.uses[info.defs[1].tempId()]) { + add_opt(s_bcnt1_i32_b32, s_bcnt0_i32_b32, 0x2, "10", remove_const_cb<32>); + add_opt(s_bcnt1_i32_b64, s_bcnt0_i32_b64, 0x2, "10", remove_const_cb<64>); + } else if (info.opcode == aco_opcode::s_bcnt1_i32_b32) { + add_opt(s_not_b32, s_bcnt0_i32_b32, 0x1, "0"); + } else if (info.opcode == aco_opcode::s_bcnt1_i32_b64) { + add_opt(s_not_b64, s_bcnt0_i32_b64, 0x1, "0"); + } else if (info.opcode == aco_opcode::s_ff1_i32_b32 && ctx.program->gfx_level < GFX11) { + add_opt(s_not_b32, s_ff0_i32_b32, 0x1, "0"); + } else if (info.opcode == aco_opcode::s_ff1_i32_b64 && ctx.program->gfx_level < GFX11) { + add_opt(s_not_b64, s_ff0_i32_b64, 0x1, "0"); } if (match_and_apply_patterns(ctx, info, patterns)) {