From 7b4f0e714c2d4f3b8f227e29ef1bb38a9bf404c8 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 14 Sep 2023 13:25:07 +0200 Subject: [PATCH] aco/gfx11: support vinterp as fma_mix Totals from 718 (0.94% of 76572) affected shaders: Instrs: 657897 -> 654219 (-0.56%) CodeSize: 3471668 -> 3457352 (-0.41%); split: -0.41%, +0.00% VGPRs: 34200 -> 34164 (-0.11%) Latency: 11687698 -> 11677030 (-0.09%); split: -0.10%, +0.00% InvThroughput: 1455371 -> 1451537 (-0.26%); split: -0.26%, +0.00% VClause: 7598 -> 7600 (+0.03%) SClause: 18293 -> 18241 (-0.28%); split: -0.44%, +0.15% Copies: 34641 -> 34644 (+0.01%); split: -0.05%, +0.06% Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 11 +++++++---- src/amd/compiler/tests/test_optimizer.cpp | 6 ++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 5bb52baf177..311b8ad4d04 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3993,15 +3993,18 @@ combine_output_conversion(opt_ctx& ctx, aco_ptr& instr) return false; Instruction* conv = def_info.instr; - if (!can_use_mad_mix(ctx, instr) || ctx.uses[instr->definitions[0].tempId()] != 1) - return false; - - if (!ctx.uses[conv->definitions[0].tempId()]) + if (!ctx.uses[conv->definitions[0].tempId()] || ctx.uses[instr->definitions[0].tempId()] != 1) return false; if (conv->usesModifiers()) return false; + if (instr->opcode == aco_opcode::v_interp_p2_f32_inreg) + interp_p2_f32_inreg_to_fma_dpp(instr); + + if (!can_use_mad_mix(ctx, instr)) + return false; + if (!instr->isVOP3P()) to_mad_mix(ctx, instr); diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 04a710d6bfa..f09b94cc43b 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -2225,5 +2225,11 @@ BEGIN_TEST(optimize.vinterp_inreg_output_modifiers) tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp); writeout(3, tmp); + //! v2b: %res4 = v_fma_mixlo_f16 %c, %b, %a quad_perm:[2,2,2,2] fi + //! p_unit_test 4, %res4 + tmp = bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, bld.def(v1), inputs[2], inputs[1], + inputs[0]); + writeout(4, f2f16(tmp)); + finish_opt_test(); END_TEST