From b18472c718b04ba415b23dbb7ca5c554ba33aaf8 Mon Sep 17 00:00:00 2001 From: Daniel Almeida Date: Mon, 18 Dec 2023 09:20:37 -0600 Subject: [PATCH] nak/sm50: add support for brev Fixes dEQP-VK.spirv_assembly.type.vec3.u32.bit_reverse_comp Part-of: --- src/nouveau/compiler/nak/api.rs | 2 +- src/nouveau/compiler/nak/encode_sm50.rs | 32 +++++++++++++++++ src/nouveau/compiler/nak/from_nir.rs | 34 ++++++++++++++++-- src/nouveau/compiler/nak/ir.rs | 48 ++++++++++++++++++++++++- src/nouveau/compiler/nak/legalize.rs | 3 ++ 5 files changed, 115 insertions(+), 4 deletions(-) diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index 0b50adf28f9..41888da7021 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -87,7 +87,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options { op.lower_flrp16 = true; op.lower_flrp32 = true; op.lower_flrp64 = true; - op.lower_bitfield_extract = true; + op.lower_bitfield_extract = dev.sm >= 70; op.lower_bitfield_insert = true; op.lower_pack_half_2x16 = true; op.lower_pack_unorm_2x16 = true; diff --git a/src/nouveau/compiler/nak/encode_sm50.rs b/src/nouveau/compiler/nak/encode_sm50.rs index 180e3f072cb..cdabdd44183 100644 --- a/src/nouveau/compiler/nak/encode_sm50.rs +++ b/src/nouveau/compiler/nak/encode_sm50.rs @@ -2067,6 +2067,37 @@ impl SM50Instr { self.set_dst(op.dst); } + fn encode_bfe(&mut self, op: &OpBfe) { + match &op.range.src_ref { + SrcRef::Imm32(imm32) => { + self.set_opcode(0x3800); + // We guarantee that imm32 is 16bits, as it's a result of a PRMT + // instruction that only fills the bottom two bytes. + self.set_src_imm_i20(20..39, 56, *imm32 & 0xffff); + } + SrcRef::CBuf(cbuf) => { + self.set_opcode(0x4c00); + self.set_src_cb(20..39, cbuf); + } + SrcRef::Zero | SrcRef::Reg(_) => { + self.set_opcode(0x5c00); + self.set_reg_src(20..28, op.range); + } + src => panic!("Unsupported src type for BFE: {src}"), + } + + if op.signed { + self.set_bit(48, true); + } + + if op.reverse { + self.set_bit(40, true); + } + + self.set_reg_src(8..16, op.base); + self.set_dst(op.dst); + } + pub fn encode( instr: &Instr, sm: u8, @@ -2139,6 +2170,7 @@ impl SM50Instr { Op::Nop(_) => si.encode_nop(), Op::Isberd(op) => si.encode_isberd(&op), Op::Out(op) => si.encode_out(&op), + Op::Bfe(op) => si.encode_bfe(&op), _ => panic!("Unhandled instruction {}", instr.op), } diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index 34209ba421a..f89f2842bcc 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -543,9 +543,39 @@ impl<'a> ShaderFromNir<'a> { } nir_op_bitfield_reverse => { let dst = b.alloc_ssa(RegFile::GPR, 1); - b.push_op(OpBRev { + if self.info.sm >= 70 { + b.push_op(OpBRev { + dst: dst.into(), + src: srcs[0], + }); + } else { + // No BREV in Maxwell + b.push_op(OpBfe { + dst: dst.into(), + base: srcs[0], + signed: false, + range: Src::new_imm_u32(0x2000), + reverse: true, + }); + } + dst + } + nir_op_ibitfield_extract | nir_op_ubitfield_extract => { + let range = b.alloc_ssa(RegFile::GPR, 1); + b.push_op(OpPrmt { + dst: range.into(), + srcs: [srcs[1], srcs[2]], + sel: 0x0040.into(), + mode: PrmtMode::Index, + }); + + let dst = b.alloc_ssa(RegFile::GPR, 1); + b.push_op(OpBfe { dst: dst.into(), - src: srcs[0], + base: srcs[0], + signed: !matches!(alu.op, nir_op_ubitfield_extract), + range: range.into(), + reverse: false, }); dst } diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 5cb133f12c9..d41ef0bd329 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -2640,6 +2640,50 @@ impl DisplayOp for OpBRev { } impl_display_for_op!(OpBRev); +/// Bitfield extract. Extracts all bits from `base` starting at `offset` into +/// `dst`. +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpBfe { + /// Where to insert the bits. + pub dst: Dst, + + /// The source of bits to extract. + #[src_type(ALU)] + pub base: Src, + + /// The range of bits to extract. This source is interpreted as four + /// separate bytes, [b0, b1, b2, b3]. + /// + /// b0 and b1: unused + /// b2: the number of bits to extract. + /// b3: the offset of the first bit to extract. + /// + /// This matches the way the hardware works. + #[src_type(ALU)] + pub range: Src, + + /// Whether the output is signed + pub signed: bool, + + /// Whether to reverse the bits before inserting them into `dst`. + pub reverse: bool, +} + +impl DisplayOp for OpBfe { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "bfe")?; + if self.signed { + write!(f, ".s")?; + } + if self.reverse { + write!(f, ".rev")?; + } + write!(f, " {} {}", self.base, self.range,) + } +} +impl_display_for_op!(OpBfe); + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpFlo { @@ -4824,6 +4868,7 @@ pub enum Op { DSetP(OpDSetP), BMsk(OpBMsk), BRev(OpBRev), + Bfe(OpBfe), Flo(OpFlo), IAbs(OpIAbs), INeg(OpINeg), @@ -5277,7 +5322,8 @@ impl Instr { | Op::Lop3(_) | Op::Shf(_) | Op::Shl(_) - | Op::Shr(_) => true, + | Op::Shr(_) + | Op::Bfe(_) => true, // Conversions are variable latency?!? Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => { diff --git a/src/nouveau/compiler/nak/legalize.rs b/src/nouveau/compiler/nak/legalize.rs index 74ef70dbafe..caa76cc1b6c 100644 --- a/src/nouveau/compiler/nak/legalize.rs +++ b/src/nouveau/compiler/nak/legalize.rs @@ -359,6 +359,9 @@ fn legalize_sm50_instr( copy_alu_src_if_not_reg(b, &mut op.handle, SrcType::GPR); copy_alu_src_if_i20_overflow(b, &mut op.stream, SrcType::ALU); } + Op::Bfe(op) => { + copy_alu_src_if_not_reg(b, &mut op.base, SrcType::ALU); + } _ => { let src_types = instr.src_types(); for (i, src) in instr.srcs_mut().iter_mut().enumerate() {