aco: fix sgpr ubfe/ibfe if the offset is too large

If the offset is large enough, it could affect the width. I'm also not
sure if the hardware masks the offset by 0x1f.

Found by inspection. No fossil-db changes.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6424>
This commit is contained in:
Rhys Perry
2020-08-21 13:12:38 +01:00
committed by Marge Bot
parent 454bc595d1
commit 14d748eb28
+14 -13
View File
@@ -2765,24 +2765,25 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
unreachable("Unsupported BFE bit size");
if (dst.type() == RegType::sgpr) {
Operand extract;
nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src);
nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src);
if (const_offset && const_bits) {
uint32_t const_extract = (const_bits->u32 << 16) | const_offset->u32;
extract = Operand(const_extract);
uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f);
aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32;
bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand(extract));
} else if (instr->op == nir_op_ubfe) {
Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset);
Temp masked = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask);
bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset);
} else {
Operand width;
if (const_bits) {
width = Operand(const_bits->u32 << 16);
} else {
width = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u));
}
extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), offset, width);
}
Operand bits_op = const_bits ? Operand(const_bits->u32 << 16) :
bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u));
Operand offset_op = const_offset ? Operand(const_offset->u32 & 0x1fu) :
bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), offset, Operand(0x1fu));
aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32;
bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract);
Temp extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), bits_op, offset_op);
bld.sop2(aco_opcode::s_bfe_i32, Definition(dst), bld.def(s1, scc), base, extract);
}
} else {
aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::v_bfe_u32 : aco_opcode::v_bfe_i32;