From b96d2d4351a91308e6697b7e089a52cc2df2dc7f Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 16 Jul 2024 16:05:03 -0500 Subject: [PATCH] nak: Add some helpers for working with OpPrmt selectors We had some helpers for this at one point but the old ones were super clunky and didn't really do what we wanted so they were removed. However, we have a lot of manual banging in opt_copy_prop and we're about to add more. These new helpers will make it all a lot safer. Part-of: --- src/nouveau/compiler/nak/ir.rs | 76 +++++++++++++++++++++++ src/nouveau/compiler/nak/opt_copy_prop.rs | 67 +++++--------------- 2 files changed, 92 insertions(+), 51 deletions(-) diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index d2df2c60af5..1d823773849 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -903,6 +903,12 @@ impl From for SrcRef { } } +impl From for SrcRef { + fn from(sel: PrmtSel) -> SrcRef { + u32::from(sel.0).into() + } +} + impl From for SrcRef { fn from(cb: CBufRef) -> SrcRef { SrcRef::CBuf(cb) @@ -3659,6 +3665,41 @@ impl DisplayOp for OpMov { } impl_display_for_op!(OpMov); +#[derive(Copy, Clone)] +pub struct PrmtSelByte(u8); + +impl PrmtSelByte { + pub fn src(&self) -> usize { + ((self.0 >> 2) & 0x1).into() + } + + pub fn byte(&self) -> usize { + (self.0 & 0x3).into() + } + + pub fn msb(&self) -> bool { + (self.0 & 0x8) != 0 + } + + pub fn fold_u32(&self, u: u32) -> u8 { + let mut sb = (u >> (self.byte() * 8)) as u8; + if self.msb() { + sb = ((sb as i8) >> 7) as u8; + } + sb + } +} + +#[derive(Clone, Copy, Eq, Hash, PartialEq)] +pub struct PrmtSel(pub u16); + +impl PrmtSel { + pub fn get(&self, byte_idx: usize) -> PrmtSelByte { + assert!(byte_idx < 4); + PrmtSelByte(((self.0 >> (byte_idx * 4)) & 0xf) as u8) + } +} + #[allow(dead_code)] #[derive(Clone, Copy, Eq, Hash, PartialEq)] pub enum PrmtMode { @@ -3700,6 +3741,41 @@ pub struct OpPrmt { pub mode: PrmtMode, } +impl OpPrmt { + pub fn get_sel(&self) -> Option { + // TODO: We could construct a PrmtSel for the other modes but we don't + // use them right now because they're kinda pointless. + if self.mode != PrmtMode::Index { + return None; + } + + if let Some(sel) = self.sel.as_u32() { + // The top 16 bits are ignored + Some(PrmtSel(sel as u16)) + } else { + None + } + } + + pub fn as_u32(&self) -> Option { + let Some(sel) = self.get_sel() else { + return None; + }; + + let mut imm = 0_u32; + for b in 0..4 { + let sel_byte = sel.get(b); + let Some(src_u32) = self.srcs[sel_byte.src()].as_u32() else { + return None; + }; + + let sb = sel_byte.fold_u32(src_u32); + imm |= u32::from(sb) << (b * 8); + } + Some(imm) + } +} + impl DisplayOp for OpPrmt { fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( diff --git a/src/nouveau/compiler/nak/opt_copy_prop.rs b/src/nouveau/compiler/nak/opt_copy_prop.rs index e9d9292a874..aaae7bd8581 100644 --- a/src/nouveau/compiler/nak/opt_copy_prop.rs +++ b/src/nouveau/compiler/nak/opt_copy_prop.rs @@ -37,8 +37,8 @@ struct CopyEntry { struct PrmtEntry { bi: usize, + sel: PrmtSel, srcs: [Src; 2], - selection: u16, } enum CopyPropEntry { @@ -73,21 +73,15 @@ impl CopyPropPass { &mut self, bi: usize, dst: SSAValue, + sel: PrmtSel, srcs: [Src; 2], - selection: u16, ) { assert!( srcs[0].src_ref.get_reg().is_none() && srcs[1].src_ref.get_reg().is_none() ); - self.ssa_map.insert( - dst, - CopyPropEntry::Prmt(PrmtEntry { - bi, - srcs, - selection, - }), - ); + self.ssa_map + .insert(dst, CopyPropEntry::Prmt(PrmtEntry { bi, sel, srcs })); } fn add_fp64_copy(&mut self, bi: usize, dst: &SSARef, src: Src) { @@ -277,25 +271,22 @@ impl CopyPropPass { let mut combined = [0_u8; 4]; for i in 0..4 { - let val = ((entry.selection >> (swizzle_prmt[i] * 4)) - & 0xF) as u8; + let prmt_byte = entry.sel.get(swizzle_prmt[i].into()); // If we have a sign extension, we cannot simplify it. - if val & 8 != 0 { + if prmt_byte.msb() { return; } - let target_src_idx = val / 4; - // Ensure we are using the same source, we cannot // combine multiple sources. if entry_src_idx.is_none() { - entry_src_idx = Some(target_src_idx); - } else if entry_src_idx != Some(target_src_idx) { + entry_src_idx = Some(prmt_byte.src()); + } else if entry_src_idx != Some(prmt_byte.src()) { return; } - combined[i] = val & 0x3; + combined[i] = prmt_byte.byte().try_into().unwrap(); } let entry_src_idx = usize::from(entry_src_idx.unwrap()); @@ -597,41 +588,15 @@ impl CopyPropPass { Op::Prmt(prmt) => { let dst = prmt.dst.as_ssa().unwrap(); assert!(dst.comps() == 1); - if prmt.mode != PrmtMode::Index { - return; - } - let Some(sel) = prmt.sel.as_u32() else { - return; - }; - - // The top 16 bits are ignored - let sel = sel as u16; - - if sel == 0x3210 { - self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[0]); - } else if sel == 0x7654 { - self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[1]); - } else { - let mut is_imm = true; - let mut imm = 0_u32; - for d in 0..4 { - let s = ((sel >> d * 4) & 0x7) as usize; - let sign = (sel >> d * 4) & 0x8 != 0; - if let Some(u) = prmt.srcs[s / 4].as_u32() { - let mut sb = (u >> (s * 8)) as u8; - if sign { - sb = ((sb as i8) >> 7) as u8; - } - imm |= (sb as u32) << (d * 8); - } else { - is_imm = false; - break; - } - } - if is_imm { + if let Some(sel) = prmt.get_sel() { + if let Some(imm) = prmt.as_u32() { self.add_copy(bi, dst[0], SrcType::GPR, imm.into()); + } else if sel == PrmtSel(0x3210) { + self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[0]); + } else if sel == PrmtSel(0x7654) { + self.add_copy(bi, dst[0], SrcType::GPR, prmt.srcs[1]); } else { - self.add_prmt(bi, dst[0], prmt.srcs, sel); + self.add_prmt(bi, dst[0], sel, prmt.srcs); } } }