aco: add fetch_inactive field to DPP instructions
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25525>
This commit is contained in:
@@ -59,7 +59,8 @@ BEGIN_TEST(optimize.neg)
|
||||
Temp neg_abs_a = fneg(abs_a);
|
||||
writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), neg_abs_a, inputs[1]));
|
||||
|
||||
//! v1: %res5 = v_mul_f32 -%a, %b row_shl:1 bound_ctrl:1
|
||||
//~gfx9! v1: %res5 = v_mul_f32 -%a, %b row_shl:1 bound_ctrl:1
|
||||
//~gfx10! v1: %res5 = v_mul_f32 -%a, %b row_shl:1 bound_ctrl:1 fi
|
||||
//! p_unit_test 5, %res5
|
||||
writeout(5,
|
||||
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), neg_a, inputs[1], dpp_row_sl(1)));
|
||||
@@ -999,42 +1000,42 @@ BEGIN_TEST(optimizer.dpp)
|
||||
Operand d(inputs[3]);
|
||||
|
||||
/* basic optimization */
|
||||
//! v1: %res0 = v_add_f32 %a, %b row_mirror bound_ctrl:1
|
||||
//! v1: %res0 = v_add_f32 %a, %b row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 0, %res0
|
||||
Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tmp0, b);
|
||||
writeout(0, res0);
|
||||
|
||||
/* operand swapping */
|
||||
//! v1: %res1 = v_subrev_f32 %a, %b row_mirror bound_ctrl:1
|
||||
//! v1: %res1 = v_subrev_f32 %a, %b row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 1, %res1
|
||||
Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), b, tmp1);
|
||||
writeout(1, res1);
|
||||
|
||||
//! v1: %tmp2 = v_mov_b32 %a row_mirror bound_ctrl:1
|
||||
//! v1: %res2 = v_sub_f32 %b, %tmp2 row_half_mirror bound_ctrl:1
|
||||
//! v1: %tmp2 = v_mov_b32 %a row_mirror bound_ctrl:1 fi
|
||||
//! v1: %res2 = v_sub_f32 %b, %tmp2 row_half_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 2, %res2
|
||||
Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1), b, tmp2, dpp_row_half_mirror);
|
||||
writeout(2, res2);
|
||||
|
||||
/* modifiers */
|
||||
//! v1: %res3 = v_add_f32 -%a, %b row_mirror bound_ctrl:1
|
||||
//! v1: %res3 = v_add_f32 -%a, %b row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 3, %res3
|
||||
auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
tmp3->dpp16().neg[0] = true;
|
||||
Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tmp3, b);
|
||||
writeout(3, res3);
|
||||
|
||||
//! v1: %res4 = v_add_f32 -%a, %b row_mirror bound_ctrl:1
|
||||
//! v1: %res4 = v_add_f32 -%a, %b row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 4, %res4
|
||||
Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp4, b);
|
||||
res4->valu().neg[0] = true;
|
||||
writeout(4, res4);
|
||||
|
||||
//! v1: %tmp5 = v_mov_b32 %a row_mirror bound_ctrl:1
|
||||
//! v1: %tmp5 = v_mov_b32 %a row_mirror bound_ctrl:1 fi
|
||||
//! v1: %res5 = v_add_f32 %tmp5, %b clamp
|
||||
//! p_unit_test 5, %res5
|
||||
Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
@@ -1042,7 +1043,7 @@ BEGIN_TEST(optimizer.dpp)
|
||||
res5->valu().clamp = true;
|
||||
writeout(5, res5);
|
||||
|
||||
//! v1: %res6 = v_add_f32 |%a|, %b row_mirror bound_ctrl:1
|
||||
//! v1: %res6 = v_add_f32 |%a|, %b row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 6, %res6
|
||||
auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
tmp6->dpp16().neg[0] = true;
|
||||
@@ -1050,14 +1051,14 @@ BEGIN_TEST(optimizer.dpp)
|
||||
res6->valu().abs[0] = true;
|
||||
writeout(6, res6);
|
||||
|
||||
//! v1: %res7 = v_subrev_f32 %a, |%b| row_mirror bound_ctrl:1
|
||||
//! v1: %res7 = v_subrev_f32 %a, |%b| row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 7, %res7
|
||||
Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), b, tmp7);
|
||||
res7->valu().abs[0] = true;
|
||||
writeout(7, res7);
|
||||
|
||||
//! v1: %tmp11 = v_mov_b32 -%a row_mirror bound_ctrl:1
|
||||
//! v1: %tmp11 = v_mov_b32 -%a row_mirror bound_ctrl:1 fi
|
||||
//! v1: %res11 = v_add_u32 %tmp11, %b
|
||||
//! p_unit_test 11, %res11
|
||||
auto tmp11 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
@@ -1065,7 +1066,7 @@ BEGIN_TEST(optimizer.dpp)
|
||||
Temp res11 = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), tmp11, b);
|
||||
writeout(11, res11);
|
||||
|
||||
//! v1: %tmp12 = v_mov_b32 -%a row_mirror bound_ctrl:1
|
||||
//! v1: %tmp12 = v_mov_b32 -%a row_mirror bound_ctrl:1 fi
|
||||
//! v1: %res12 = v_add_f16 %tmp12, %b
|
||||
//! p_unit_test 12, %res12
|
||||
auto tmp12 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
@@ -1074,21 +1075,21 @@ BEGIN_TEST(optimizer.dpp)
|
||||
writeout(12, res12);
|
||||
|
||||
/* vcc */
|
||||
//! v1: %res8 = v_cndmask_b32 %a, %b, %c:vcc row_mirror bound_ctrl:1
|
||||
//! v1: %res8 = v_cndmask_b32 %a, %b, %c:vcc row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 8, %res8
|
||||
Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp8, b, c);
|
||||
writeout(8, res8);
|
||||
|
||||
/* sgprs */
|
||||
//! v1: %tmp9 = v_mov_b32 %a row_mirror bound_ctrl:1
|
||||
//! v1: %tmp9 = v_mov_b32 %a row_mirror bound_ctrl:1 fi
|
||||
//! v1: %res9 = v_add_f32 %tmp9, %d
|
||||
//! p_unit_test 9, %res9
|
||||
Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
Temp res9 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp9, d);
|
||||
writeout(9, res9);
|
||||
|
||||
//! v1: %tmp10 = v_mov_b32 %a row_mirror bound_ctrl:1
|
||||
//! v1: %tmp10 = v_mov_b32 %a row_mirror bound_ctrl:1 fi
|
||||
//! v1: %res10 = v_add_f32 %d, %tmp10
|
||||
//! p_unit_test 10, %res10
|
||||
Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
|
||||
@@ -1109,7 +1110,7 @@ BEGIN_TEST(optimize.dpp_prop)
|
||||
Temp one = bld.copy(bld.def(v1), Operand::c32(1));
|
||||
writeout(0, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), one, inputs[0], dpp_row_sl(1)));
|
||||
|
||||
//! v1: %res1 = v_mul_f32 %a, %one row_shl:1 bound_ctrl:1
|
||||
//! v1: %res1 = v_mul_f32 %a, %one row_shl:1 bound_ctrl:1 fi
|
||||
//! p_unit_test 1, %res1
|
||||
writeout(1, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], one, dpp_row_sl(1)));
|
||||
|
||||
@@ -1120,7 +1121,7 @@ BEGIN_TEST(optimize.dpp_prop)
|
||||
bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1)));
|
||||
|
||||
//! v1: %literal2 = p_parallelcopy 0x12345679
|
||||
//! v1: %res3 = v_mul_f32 %a, %literal row_shl:1 bound_ctrl:1
|
||||
//! v1: %res3 = v_mul_f32 %a, %literal row_shl:1 bound_ctrl:1 fi
|
||||
//! p_unit_test 3, %res3
|
||||
Temp literal2 = bld.copy(bld.def(v1), Operand::c32(0x12345679u));
|
||||
writeout(3,
|
||||
@@ -1132,7 +1133,7 @@ BEGIN_TEST(optimize.dpp_prop)
|
||||
Temp b_v = bld.copy(bld.def(v1), inputs[1]);
|
||||
writeout(4, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), b_v, inputs[0], dpp_row_sl(1)));
|
||||
|
||||
//! v1: %res5 = v_mul_f32 %a, %b_v row_shl:1 bound_ctrl:1
|
||||
//! v1: %res5 = v_mul_f32 %a, %b_v row_shl:1 bound_ctrl:1 fi
|
||||
//! p_unit_test 5, %res5
|
||||
writeout(5, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], b_v, dpp_row_sl(1)));
|
||||
|
||||
@@ -2006,11 +2007,11 @@ BEGIN_TEST(optimize.dpp_opsel)
|
||||
Temp b_hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), b, Operand::c32(1));
|
||||
Temp b_lo = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), b, Operand::c32(0));
|
||||
|
||||
//! v2b: %res0 = v_add_f16 hi(%a), hi(%b) row_mirror bound_ctrl:1
|
||||
//! v2b: %res0 = v_add_f16 hi(%a), hi(%b) row_mirror bound_ctrl:1 fi
|
||||
//! p_unit_test 0, %res0
|
||||
writeout(0, fadd(dpp16_hi, b_hi));
|
||||
|
||||
//! v2b: %res1 = v_add_f16 hi(%a), %b dpp8:[0,0,0,0,0,0,0,0]
|
||||
//! v2b: %res1 = v_add_f16 hi(%a), %b dpp8:[0,0,0,0,0,0,0,0] fi
|
||||
//! p_unit_test 1, %res1
|
||||
writeout(1, fadd(b_lo, dpp8_hi));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user