aco: combine extracts into s_pack_ll_b32_b16

fossil-db (navi21):
Totals from 3 (0.00% of 79395) affected shaders:
Instrs: 45941 -> 45924 (-0.04%)
CodeSize: 241768 -> 241756 (-0.00%)
Latency: 176501 -> 176491 (-0.01%)
Copies: 6884 -> 6882 (-0.03%)
SALU: 6101 -> 6088 (-0.21%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29912>
This commit is contained in:
Rhys Perry
2024-06-06 15:25:13 +01:00
committed by Marge Bot
parent 98cb50297b
commit ca161a96d1
2 changed files with 67 additions and 1 deletions
+50
View File
@@ -1977,3 +1977,53 @@ BEGIN_TEST(optimize.vinterp_inreg_output_modifiers)
finish_opt_test();
END_TEST
BEGIN_TEST(optimize.s_pack)
//>> s1: %a, s1: %b, s1: %c = p_startpgm
if (!setup_cs("s1 s1 s1", GFX11))
return;
Temp lo = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), inputs[1],
Operand::c32(0), Operand::c32(16u), Operand::c32(false));
Temp hi = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), inputs[2],
Operand::c32(1), Operand::c32(16u), Operand::c32(false));
//! s1: %res0 = s_pack_lh_b32_b16 %b, %c
//! p_unit_test 0, %res0
writeout(0, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), lo, hi));
//! s1: %res1 = s_pack_ll_b32_b16 %b, %b
//! p_unit_test 1, %res1
writeout(1, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), lo, lo));
//! s1: %res2 = s_pack_hl_b32_b16 %c, %b
//! p_unit_test 2, %res2
writeout(2, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), hi, lo));
//! s1: %res3 = s_pack_hh_b32_b16 %c, %c
//! p_unit_test 3, %res3
writeout(3, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), hi, hi));
lo = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), inputs[1], Operand::c32(0),
Operand::c32(16u), Operand::c32(false));
hi = bld.pseudo(aco_opcode::p_extract, bld.def(s1), bld.def(s1, scc), inputs[2], Operand::c32(1),
Operand::c32(16u), Operand::c32(false));
//! s1: %res4 = s_pack_ll_b32_b16 %a, %b
//! p_unit_test 4, %res4
writeout(4, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), inputs[0], lo));
//! s1: %res5 = s_pack_lh_b32_b16 %a, %c
//! p_unit_test 5, %res5
writeout(5, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), inputs[0], hi));
//! s1: %res6 = s_pack_ll_b32_b16 %b, %a
//! p_unit_test 6, %res6
writeout(6, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), lo, inputs[0]));
//! s1: %res7 = s_pack_hl_b32_b16 %c, %a
//! p_unit_test 7, %res7
writeout(7, bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), hi, inputs[0]));
finish_opt_test();
END_TEST