nak/sm50: add support for brev

Fixes dEQP-VK.spirv_assembly.type.vec3.u32.bit_reverse_comp

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26723>
This commit is contained in:
Daniel Almeida
2023-12-18 09:20:37 -06:00
committed by Marge Bot
parent 84a7e94f31
commit b18472c718
5 changed files with 115 additions and 4 deletions
+1 -1
View File
@@ -87,7 +87,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
op.lower_flrp16 = true;
op.lower_flrp32 = true;
op.lower_flrp64 = true;
op.lower_bitfield_extract = true;
op.lower_bitfield_extract = dev.sm >= 70;
op.lower_bitfield_insert = true;
op.lower_pack_half_2x16 = true;
op.lower_pack_unorm_2x16 = true;
+32
View File
@@ -2067,6 +2067,37 @@ impl SM50Instr {
self.set_dst(op.dst);
}
fn encode_bfe(&mut self, op: &OpBfe) {
match &op.range.src_ref {
SrcRef::Imm32(imm32) => {
self.set_opcode(0x3800);
// We guarantee that imm32 is 16bits, as it's a result of a PRMT
// instruction that only fills the bottom two bytes.
self.set_src_imm_i20(20..39, 56, *imm32 & 0xffff);
}
SrcRef::CBuf(cbuf) => {
self.set_opcode(0x4c00);
self.set_src_cb(20..39, cbuf);
}
SrcRef::Zero | SrcRef::Reg(_) => {
self.set_opcode(0x5c00);
self.set_reg_src(20..28, op.range);
}
src => panic!("Unsupported src type for BFE: {src}"),
}
if op.signed {
self.set_bit(48, true);
}
if op.reverse {
self.set_bit(40, true);
}
self.set_reg_src(8..16, op.base);
self.set_dst(op.dst);
}
pub fn encode(
instr: &Instr,
sm: u8,
@@ -2139,6 +2170,7 @@ impl SM50Instr {
Op::Nop(_) => si.encode_nop(),
Op::Isberd(op) => si.encode_isberd(&op),
Op::Out(op) => si.encode_out(&op),
Op::Bfe(op) => si.encode_bfe(&op),
_ => panic!("Unhandled instruction {}", instr.op),
}
+32 -2
View File
@@ -543,9 +543,39 @@ impl<'a> ShaderFromNir<'a> {
}
nir_op_bitfield_reverse => {
let dst = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpBRev {
if self.info.sm >= 70 {
b.push_op(OpBRev {
dst: dst.into(),
src: srcs[0],
});
} else {
// No BREV in Maxwell
b.push_op(OpBfe {
dst: dst.into(),
base: srcs[0],
signed: false,
range: Src::new_imm_u32(0x2000),
reverse: true,
});
}
dst
}
nir_op_ibitfield_extract | nir_op_ubitfield_extract => {
let range = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpPrmt {
dst: range.into(),
srcs: [srcs[1], srcs[2]],
sel: 0x0040.into(),
mode: PrmtMode::Index,
});
let dst = b.alloc_ssa(RegFile::GPR, 1);
b.push_op(OpBfe {
dst: dst.into(),
src: srcs[0],
base: srcs[0],
signed: !matches!(alu.op, nir_op_ubitfield_extract),
range: range.into(),
reverse: false,
});
dst
}
+47 -1
View File
@@ -2640,6 +2640,50 @@ impl DisplayOp for OpBRev {
}
impl_display_for_op!(OpBRev);
/// Bitfield extract. Extracts all bits from `base` starting at `offset` into
/// `dst`.
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpBfe {
/// Where to insert the bits.
pub dst: Dst,
/// The source of bits to extract.
#[src_type(ALU)]
pub base: Src,
/// The range of bits to extract. This source is interpreted as four
/// separate bytes, [b0, b1, b2, b3].
///
/// b0 and b1: unused
/// b2: the number of bits to extract.
/// b3: the offset of the first bit to extract.
///
/// This matches the way the hardware works.
#[src_type(ALU)]
pub range: Src,
/// Whether the output is signed
pub signed: bool,
/// Whether to reverse the bits before inserting them into `dst`.
pub reverse: bool,
}
impl DisplayOp for OpBfe {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "bfe")?;
if self.signed {
write!(f, ".s")?;
}
if self.reverse {
write!(f, ".rev")?;
}
write!(f, " {} {}", self.base, self.range,)
}
}
impl_display_for_op!(OpBfe);
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpFlo {
@@ -4824,6 +4868,7 @@ pub enum Op {
DSetP(OpDSetP),
BMsk(OpBMsk),
BRev(OpBRev),
Bfe(OpBfe),
Flo(OpFlo),
IAbs(OpIAbs),
INeg(OpINeg),
@@ -5277,7 +5322,8 @@ impl Instr {
| Op::Lop3(_)
| Op::Shf(_)
| Op::Shl(_)
| Op::Shr(_) => true,
| Op::Shr(_)
| Op::Bfe(_) => true,
// Conversions are variable latency?!?
Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => {
+3
View File
@@ -359,6 +359,9 @@ fn legalize_sm50_instr(
copy_alu_src_if_not_reg(b, &mut op.handle, SrcType::GPR);
copy_alu_src_if_i20_overflow(b, &mut op.stream, SrcType::ALU);
}
Op::Bfe(op) => {
copy_alu_src_if_not_reg(b, &mut op.base, SrcType::ALU);
}
_ => {
let src_types = instr.src_types();
for (i, src) in instr.srcs_mut().iter_mut().enumerate() {