From fec10ea3ead275488bdd6f7637926a8ce69966f1 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sat, 14 Dec 2024 18:10:39 +0100 Subject: [PATCH] aco/optimizer: use new helpers for add16 opts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi48: Totals from 164 (0.20% of 82419) affected shaders: Instrs: 145304 -> 145335 (+0.02%); split: -0.00%, +0.02% CodeSize: 794156 -> 794280 (+0.02%); split: -0.00%, +0.02% Latency: 1884349 -> 1884227 (-0.01%); split: -0.01%, +0.00% InvThroughput: 350403 -> 350393 (-0.00%) Foz-DB Navi21: Totals from 164 (0.20% of 82387) affected shaders: Instrs: 117416 -> 117414 (-0.00%) CodeSize: 673328 -> 673312 (-0.00%) Latency: 1896952 -> 1897094 (+0.01%); split: -0.00%, +0.01% InvThroughput: 638536 -> 638556 (+0.00%); split: -0.01%, +0.01% Copies: 14579 -> 14577 (-0.01%) VALU: 65895 -> 65893 (-0.00%) Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index ae786481238..6cfa4becefc 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4788,14 +4788,6 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) } } else if (instr->opcode == aco_opcode::v_not_b32 && ctx.program->gfx_level >= GFX10) { combine_not_xor(ctx, instr); - } else if (instr->opcode == aco_opcode::v_add_u16 && !instr->valu().clamp) { - combine_three_valu_op( - ctx, instr, aco_opcode::v_mul_lo_u16, - ctx.program->gfx_level == GFX8 ? aco_opcode::v_mad_legacy_u16 : aco_opcode::v_mad_u16, - "120", 1 | 2); - } else if (instr->opcode == aco_opcode::v_add_u16_e64 && !instr->valu().clamp) { - combine_three_valu_op(ctx, instr, aco_opcode::v_mul_lo_u16_e64, aco_opcode::v_mad_u16, "120", - 1 | 2); } else if (instr->opcode == aco_opcode::v_add_u32 && !instr->usesModifiers()) { if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) { } else if (combine_add_bcnt(ctx, instr)) { @@ -5008,6 +5000,13 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) /* v_mul_f32(a, v_cndmask_b32(0, 1.0, cond)) -> v_cndmask_b32(0, a, cond) */ add_opt(v_cndmask_b32, v_cndmask_b32, 0x3, "1032", and_cb, remove_const_cb<0x3f800000>>, true); + } else if (info.opcode == aco_opcode::v_add_u16 && !info.clamp) { + if (ctx.program->gfx_level < GFX9) + add_opt(v_mul_lo_u16, v_mad_legacy_u16, 0x3, "120"); + else + add_opt(v_mul_lo_u16, v_mad_u16, 0x3, "120"); + } else if (info.opcode == aco_opcode::v_add_u16_e64 && !info.clamp) { + add_opt(v_mul_lo_u16_e64, v_mad_u16, 0x3, "120"); } if (match_and_apply_patterns(ctx, info, patterns)) {