diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 54a11d399f6..e1953b0ec31 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -1020,3 +1020,82 @@ BEGIN_TEST(optimize.denorm_propagation) } } END_TEST + +BEGIN_TEST(optimizer.dpp) + //>> v1: %a, v1: %b, s2: %c = p_startpgm + if (!setup_cs("v1 v1 s2", GFX10_3)) + return; + + Operand a(inputs[0]); + Operand b(inputs[1]); + Operand c(inputs[2]); + + /* basic optimization */ + //! v1: %res0 = v_add_f32 %a, %b row_mirror bound_ctrl:1 + //! p_unit_test 0, %res0 + Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tmp0, b); + writeout(0, res0); + + /* operand swapping */ + //! v1: %res1 = v_subrev_f32 %a, %b row_mirror bound_ctrl:1 + //! p_unit_test 1, %res1 + Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), b, tmp1); + writeout(1, res1); + + //! v1: %tmp2 = v_mov_b32 %a row_mirror bound_ctrl:1 + //! v1: %res2 = v_sub_f32 %b, %tmp2 row_half_mirror bound_ctrl:1 + //! p_unit_test 2, %res2 + Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), b, tmp2, dpp_row_half_mirror); + writeout(2, res2); + + /* modifiers */ + //! v1: %res3 = v_add_f32 -%a, %b row_mirror bound_ctrl:1 + //! p_unit_test 3, %res3 + auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + tmp3.instr->dpp().neg[0] = true; + Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tmp3, b); + writeout(3, res3); + + //! v1: %res4 = v_add_f32 -%a, %b row_mirror bound_ctrl:1 + //! p_unit_test 4, %res4 + Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp4, b); + res4.instr->vop3().neg[0] = true; + writeout(4, res4); + + //! v1: %tmp5 = v_mov_b32 %a row_mirror bound_ctrl:1 + //! v1: %res5 = v_add_f32 %tmp5, %b clamp + //! p_unit_test 5, %res5 + Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp5, b); + res5.instr->vop3().clamp = true; + writeout(5, res5); + + //! v1: %res6 = v_add_f32 |%a|, %b row_mirror bound_ctrl:1 + //! p_unit_test 6, %res6 + auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + tmp6.instr->dpp().neg[0] = true; + auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp6, b); + res6.instr->vop3().abs[0] = true; + writeout(6, res6); + + //! v1: %res7 = v_subrev_f32 %a, |%b| row_mirror bound_ctrl:1 + //! p_unit_test 7, %res7 + Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), b, tmp7); + res7.instr->vop3().abs[0] = true; + writeout(7, res7); + + /* vcc */ + //! v1: %res8 = v_cndmask_b32 %a, %b, %c:vcc row_mirror bound_ctrl:1 + //! p_unit_test 8, %res8 + Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror); + Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp8, b, c); + writeout(8, res8); + + finish_opt_test(); +END_TEST +