aco: fix sgpr ubfe/ibfe if the offset is too large
If the offset is large enough, it could affect the width. I'm also not sure if the hardware masks the offset by 0x1f. Found by inspection. No fossil-db changes. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6424>
This commit is contained in:
@@ -2765,24 +2765,25 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
||||
unreachable("Unsupported BFE bit size");
|
||||
|
||||
if (dst.type() == RegType::sgpr) {
|
||||
Operand extract;
|
||||
nir_const_value* const_offset = nir_src_as_const_value(instr->src[1].src);
|
||||
nir_const_value* const_bits = nir_src_as_const_value(instr->src[2].src);
|
||||
if (const_offset && const_bits) {
|
||||
uint32_t const_extract = (const_bits->u32 << 16) | const_offset->u32;
|
||||
extract = Operand(const_extract);
|
||||
uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f);
|
||||
aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32;
|
||||
bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand(extract));
|
||||
} else if (instr->op == nir_op_ubfe) {
|
||||
Temp mask = bld.sop2(aco_opcode::s_bfm_b32, bld.def(s1), bits, offset);
|
||||
Temp masked = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask);
|
||||
bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset);
|
||||
} else {
|
||||
Operand width;
|
||||
if (const_bits) {
|
||||
width = Operand(const_bits->u32 << 16);
|
||||
} else {
|
||||
width = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u));
|
||||
}
|
||||
extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), offset, width);
|
||||
}
|
||||
Operand bits_op = const_bits ? Operand(const_bits->u32 << 16) :
|
||||
bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), bits, Operand(16u));
|
||||
Operand offset_op = const_offset ? Operand(const_offset->u32 & 0x1fu) :
|
||||
bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), offset, Operand(0x1fu));
|
||||
|
||||
aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32;
|
||||
bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, extract);
|
||||
Temp extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), bits_op, offset_op);
|
||||
bld.sop2(aco_opcode::s_bfe_i32, Definition(dst), bld.def(s1, scc), base, extract);
|
||||
}
|
||||
|
||||
} else {
|
||||
aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::v_bfe_u32 : aco_opcode::v_bfe_i32;
|
||||
|
||||
Reference in New Issue
Block a user