nak/sm50: add support for brev
Fixes dEQP-VK.spirv_assembly.type.vec3.u32.bit_reverse_comp Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26723>
This commit is contained in:
committed by
Marge Bot
parent
84a7e94f31
commit
b18472c718
@@ -87,7 +87,7 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
|
||||
op.lower_flrp16 = true;
|
||||
op.lower_flrp32 = true;
|
||||
op.lower_flrp64 = true;
|
||||
op.lower_bitfield_extract = true;
|
||||
op.lower_bitfield_extract = dev.sm >= 70;
|
||||
op.lower_bitfield_insert = true;
|
||||
op.lower_pack_half_2x16 = true;
|
||||
op.lower_pack_unorm_2x16 = true;
|
||||
|
||||
@@ -2067,6 +2067,37 @@ impl SM50Instr {
|
||||
self.set_dst(op.dst);
|
||||
}
|
||||
|
||||
fn encode_bfe(&mut self, op: &OpBfe) {
|
||||
match &op.range.src_ref {
|
||||
SrcRef::Imm32(imm32) => {
|
||||
self.set_opcode(0x3800);
|
||||
// We guarantee that imm32 is 16bits, as it's a result of a PRMT
|
||||
// instruction that only fills the bottom two bytes.
|
||||
self.set_src_imm_i20(20..39, 56, *imm32 & 0xffff);
|
||||
}
|
||||
SrcRef::CBuf(cbuf) => {
|
||||
self.set_opcode(0x4c00);
|
||||
self.set_src_cb(20..39, cbuf);
|
||||
}
|
||||
SrcRef::Zero | SrcRef::Reg(_) => {
|
||||
self.set_opcode(0x5c00);
|
||||
self.set_reg_src(20..28, op.range);
|
||||
}
|
||||
src => panic!("Unsupported src type for BFE: {src}"),
|
||||
}
|
||||
|
||||
if op.signed {
|
||||
self.set_bit(48, true);
|
||||
}
|
||||
|
||||
if op.reverse {
|
||||
self.set_bit(40, true);
|
||||
}
|
||||
|
||||
self.set_reg_src(8..16, op.base);
|
||||
self.set_dst(op.dst);
|
||||
}
|
||||
|
||||
pub fn encode(
|
||||
instr: &Instr,
|
||||
sm: u8,
|
||||
@@ -2139,6 +2170,7 @@ impl SM50Instr {
|
||||
Op::Nop(_) => si.encode_nop(),
|
||||
Op::Isberd(op) => si.encode_isberd(&op),
|
||||
Op::Out(op) => si.encode_out(&op),
|
||||
Op::Bfe(op) => si.encode_bfe(&op),
|
||||
_ => panic!("Unhandled instruction {}", instr.op),
|
||||
}
|
||||
|
||||
|
||||
@@ -543,9 +543,39 @@ impl<'a> ShaderFromNir<'a> {
|
||||
}
|
||||
nir_op_bitfield_reverse => {
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.push_op(OpBRev {
|
||||
if self.info.sm >= 70 {
|
||||
b.push_op(OpBRev {
|
||||
dst: dst.into(),
|
||||
src: srcs[0],
|
||||
});
|
||||
} else {
|
||||
// No BREV in Maxwell
|
||||
b.push_op(OpBfe {
|
||||
dst: dst.into(),
|
||||
base: srcs[0],
|
||||
signed: false,
|
||||
range: Src::new_imm_u32(0x2000),
|
||||
reverse: true,
|
||||
});
|
||||
}
|
||||
dst
|
||||
}
|
||||
nir_op_ibitfield_extract | nir_op_ubitfield_extract => {
|
||||
let range = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.push_op(OpPrmt {
|
||||
dst: range.into(),
|
||||
srcs: [srcs[1], srcs[2]],
|
||||
sel: 0x0040.into(),
|
||||
mode: PrmtMode::Index,
|
||||
});
|
||||
|
||||
let dst = b.alloc_ssa(RegFile::GPR, 1);
|
||||
b.push_op(OpBfe {
|
||||
dst: dst.into(),
|
||||
src: srcs[0],
|
||||
base: srcs[0],
|
||||
signed: !matches!(alu.op, nir_op_ubitfield_extract),
|
||||
range: range.into(),
|
||||
reverse: false,
|
||||
});
|
||||
dst
|
||||
}
|
||||
|
||||
@@ -2640,6 +2640,50 @@ impl DisplayOp for OpBRev {
|
||||
}
|
||||
impl_display_for_op!(OpBRev);
|
||||
|
||||
/// Bitfield extract. Extracts all bits from `base` starting at `offset` into
|
||||
/// `dst`.
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpBfe {
|
||||
/// Where to insert the bits.
|
||||
pub dst: Dst,
|
||||
|
||||
/// The source of bits to extract.
|
||||
#[src_type(ALU)]
|
||||
pub base: Src,
|
||||
|
||||
/// The range of bits to extract. This source is interpreted as four
|
||||
/// separate bytes, [b0, b1, b2, b3].
|
||||
///
|
||||
/// b0 and b1: unused
|
||||
/// b2: the number of bits to extract.
|
||||
/// b3: the offset of the first bit to extract.
|
||||
///
|
||||
/// This matches the way the hardware works.
|
||||
#[src_type(ALU)]
|
||||
pub range: Src,
|
||||
|
||||
/// Whether the output is signed
|
||||
pub signed: bool,
|
||||
|
||||
/// Whether to reverse the bits before inserting them into `dst`.
|
||||
pub reverse: bool,
|
||||
}
|
||||
|
||||
impl DisplayOp for OpBfe {
|
||||
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "bfe")?;
|
||||
if self.signed {
|
||||
write!(f, ".s")?;
|
||||
}
|
||||
if self.reverse {
|
||||
write!(f, ".rev")?;
|
||||
}
|
||||
write!(f, " {} {}", self.base, self.range,)
|
||||
}
|
||||
}
|
||||
impl_display_for_op!(OpBfe);
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(SrcsAsSlice, DstsAsSlice)]
|
||||
pub struct OpFlo {
|
||||
@@ -4824,6 +4868,7 @@ pub enum Op {
|
||||
DSetP(OpDSetP),
|
||||
BMsk(OpBMsk),
|
||||
BRev(OpBRev),
|
||||
Bfe(OpBfe),
|
||||
Flo(OpFlo),
|
||||
IAbs(OpIAbs),
|
||||
INeg(OpINeg),
|
||||
@@ -5277,7 +5322,8 @@ impl Instr {
|
||||
| Op::Lop3(_)
|
||||
| Op::Shf(_)
|
||||
| Op::Shl(_)
|
||||
| Op::Shr(_) => true,
|
||||
| Op::Shr(_)
|
||||
| Op::Bfe(_) => true,
|
||||
|
||||
// Conversions are variable latency?!?
|
||||
Op::F2F(_) | Op::F2I(_) | Op::I2F(_) | Op::I2I(_) | Op::FRnd(_) => {
|
||||
|
||||
@@ -359,6 +359,9 @@ fn legalize_sm50_instr(
|
||||
copy_alu_src_if_not_reg(b, &mut op.handle, SrcType::GPR);
|
||||
copy_alu_src_if_i20_overflow(b, &mut op.stream, SrcType::ALU);
|
||||
}
|
||||
Op::Bfe(op) => {
|
||||
copy_alu_src_if_not_reg(b, &mut op.base, SrcType::ALU);
|
||||
}
|
||||
_ => {
|
||||
let src_types = instr.src_types();
|
||||
for (i, src) in instr.srcs_mut().iter_mut().enumerate() {
|
||||
|
||||
Reference in New Issue
Block a user