aco/optimizer: fix signed extract of sub dword temps with SDWA
If an instruction didn't already use SDWA convert_to_SDWA in apply_extract
will add ubyte0/uword0 selections for v1b/v2b operands. This loses information
that the instruction doesn't care about the high bits and makes the next
apply_extract_twice fail.
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Fixes: 6cb9d39bc2 ("aco: combine extracts with sub-dword definitions")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32803>
This commit is contained in:
@@ -1149,9 +1149,13 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
|
||||
instr.reset(mad);
|
||||
} else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
|
||||
(tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
|
||||
convert_to_SDWA(ctx.program->gfx_level, instr);
|
||||
instr->sdwa().sel[idx] = apply_extract_twice(sel, instr->operands[idx].getTemp(),
|
||||
instr->sdwa().sel[idx], Temp(0, v1));
|
||||
if (instr->isSDWA()) {
|
||||
instr->sdwa().sel[idx] = apply_extract_twice(sel, instr->operands[idx].getTemp(),
|
||||
instr->sdwa().sel[idx], Temp(0, v1));
|
||||
} else {
|
||||
convert_to_SDWA(ctx.program->gfx_level, instr);
|
||||
instr->sdwa().sel[idx] = sel;
|
||||
}
|
||||
} else if (instr->isVALU()) {
|
||||
if (sel.offset()) {
|
||||
instr->valu().opsel[idx] = true;
|
||||
|
||||
@@ -689,7 +689,7 @@ BEGIN_TEST(optimize.sdwa.subdword_extract)
|
||||
Operand::c32(8), Operand::c32(0)),
|
||||
inputs[2]));
|
||||
|
||||
//! v1b: %res3 = v_or_b32 %a, %b dst_sel:ubyte0 dst_preserve src0_sel:ubyte0 src1_sel:ubyte2
|
||||
//! v1b: %res3 = v_or_b32 %a, %b dst_sel:ubyte0 dst_preserve src0_sel:uword0 src1_sel:ubyte2
|
||||
//! p_unit_test 3, %res3
|
||||
writeout(3, bld.vop2(aco_opcode::v_or_b32, bld.def(v1b),
|
||||
bld.pseudo(aco_opcode::p_extract, bld.def(v1b), a, Operand::c32(0),
|
||||
@@ -697,6 +697,22 @@ BEGIN_TEST(optimize.sdwa.subdword_extract)
|
||||
bld.pseudo(aco_opcode::p_extract, bld.def(v1b), b, Operand::c32(1),
|
||||
Operand::c32(16), Operand::c32(0))));
|
||||
|
||||
//! v2b: %res4 = v_cvt_f16_i16 %a dst_sel:uword0 dst_preserve src0_sel:sbyte0
|
||||
//! p_unit_test 4, %res4
|
||||
writeout(4, bld.vop1(aco_opcode::v_cvt_f16_i16, bld.def(v2b),
|
||||
bld.pseudo(aco_opcode::p_extract, bld.def(v2b), a, Operand::c32(0),
|
||||
Operand::c32(8), Operand::c32(1))));
|
||||
|
||||
/* TODO incremental conversion to sdwa loses information if zero extend is actually necessary */
|
||||
//! v2b: %tmp5 = p_extract %b, 1, 8, 1
|
||||
//! v2b: %res5 = v_or_b32 %a, %tmp5 dst_sel:uword0 dst_preserve src0_sel:sbyte0 src1_sel:uword0
|
||||
//! p_unit_test 5, %res5
|
||||
writeout(5, bld.vop2(aco_opcode::v_or_b32, bld.def(v2b),
|
||||
bld.pseudo(aco_opcode::p_extract, bld.def(v2b), a, Operand::c32(0),
|
||||
Operand::c32(8), Operand::c32(1)),
|
||||
bld.pseudo(aco_opcode::p_extract, bld.def(v2b), b, Operand::c32(1),
|
||||
Operand::c32(8), Operand::c32(1))));
|
||||
|
||||
finish_opt_test();
|
||||
END_TEST
|
||||
|
||||
|
||||
Reference in New Issue
Block a user