From ec2c866a77662f7ab33c7cf368ecef122a2ee1af Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Wed, 25 Oct 2023 08:37:43 -0500 Subject: [PATCH] nak: Emit CCtl in barriers with acq/rel semantics Part-of: --- src/nouveau/compiler/nak_encode_sm75.rs | 24 ++++++++ src/nouveau/compiler/nak_from_nir.rs | 22 ++++++++ src/nouveau/compiler/nak_ir.rs | 74 +++++++++++++++++++++++++ 3 files changed, 120 insertions(+) diff --git a/src/nouveau/compiler/nak_encode_sm75.rs b/src/nouveau/compiler/nak_encode_sm75.rs index b4905c30dff..c84ffce8800 100644 --- a/src/nouveau/compiler/nak_encode_sm75.rs +++ b/src/nouveau/compiler/nak_encode_sm75.rs @@ -1552,6 +1552,29 @@ impl SM75Instr { self.set_pred_dst(81..84, Dst::None); } + fn encode_cctl(&mut self, op: &OpCCtl) { + assert!(op.mem_space == MemSpace::Global); + self.set_opcode(0x98f); + + self.set_reg_src(24..32, op.addr); + self.set_field(32..64, op.addr_offset); + + self.set_field( + 87..91, + match op.op { + CCtlOp::PF1 => 0_u8, + CCtlOp::PF2 => 1_u8, + CCtlOp::WB => 2_u8, + CCtlOp::IV => 3_u8, + CCtlOp::IVAll => 4_u8, + CCtlOp::RS => 5_u8, + CCtlOp::IVAllP => 6_u8, + CCtlOp::WBAll => 7_u8, + CCtlOp::WBAllP => 8_u8, + }, + ); + } + fn encode_membar(&mut self, op: &OpMemBar) { self.set_opcode(0x992); @@ -1845,6 +1868,7 @@ impl SM75Instr { Op::ALd(op) => si.encode_ald(&op), Op::ASt(op) => si.encode_ast(&op), Op::Ipa(op) => si.encode_ipa(&op), + Op::CCtl(op) => si.encode_cctl(&op), Op::MemBar(op) => si.encode_membar(&op), Op::BMov(op) => si.encode_bmov(&op), Op::Break(op) => si.encode_break(&op), diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 1fa052551ab..4d5d4ab5e32 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -1850,6 +1850,18 @@ impl<'a> ShaderFromNir<'a> { self.set_dst(&intrin.def, dst); } nir_intrinsic_barrier => { + let modes = intrin.memory_modes(); + let semantics = intrin.memory_semantics(); + if (modes & nir_var_mem_global) != 0 + && (semantics & NIR_MEMORY_RELEASE) != 0 + { + b.push_op(OpCCtl { + op: CCtlOp::WBAll, + mem_space: MemSpace::Global, + addr: 0.into(), + addr_offset: 0, + }); + } if intrin.memory_scope() != SCOPE_NONE { let mem_scope = match intrin.memory_scope() { SCOPE_INVOCATION | SCOPE_SUBGROUP => MemScope::CTA, @@ -1869,6 +1881,16 @@ impl<'a> ShaderFromNir<'a> { } _ => panic!("Unhandled execution scope"), } + if (modes & nir_var_mem_global) != 0 + && (semantics & NIR_MEMORY_ACQUIRE) != 0 + { + b.push_op(OpCCtl { + op: CCtlOp::IVAll, + mem_space: MemSpace::Global, + addr: 0.into(), + addr_offset: 0, + }); + } } nir_intrinsic_read_invocation | nir_intrinsic_shuffle diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index e7f6051dc98..2bf87606b34 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -3440,6 +3440,77 @@ impl fmt::Display for OpIpa { } } +#[allow(dead_code)] +pub enum CCtlOp { + PF1, + PF2, + WB, + IV, + IVAll, + RS, + IVAllP, + WBAll, + WBAllP, +} + +impl CCtlOp { + pub fn is_all(&self) -> bool { + match self { + CCtlOp::PF1 + | CCtlOp::PF2 + | CCtlOp::WB + | CCtlOp::IV + | CCtlOp::RS => false, + CCtlOp::IVAll | CCtlOp::IVAllP | CCtlOp::WBAll | CCtlOp::WBAllP => { + true + } + } + } +} + +impl fmt::Display for CCtlOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CCtlOp::PF1 => write!(f, "PF1"), + CCtlOp::PF2 => write!(f, "PF2"), + CCtlOp::WB => write!(f, "WB"), + CCtlOp::IV => write!(f, "IV"), + CCtlOp::IVAll => write!(f, "IVALL"), + CCtlOp::RS => write!(f, "RS"), + CCtlOp::IVAllP => write!(f, "IVALLP"), + CCtlOp::WBAll => write!(f, "WBALL"), + CCtlOp::WBAllP => write!(f, "WBALLP"), + } + } +} + +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpCCtl { + pub op: CCtlOp, + + pub mem_space: MemSpace, + + #[src_type(GPR)] + pub addr: Src, + + pub addr_offset: i32, +} + +impl fmt::Display for OpCCtl { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "CCTL.{}", self.mem_space)?; + if !self.op.is_all() { + write!(f, " [{}", self.addr)?; + if self.addr_offset > 0 { + write!(f, "+{:#x}", self.addr_offset)?; + } + write!(f, "]")?; + } + Ok(()) + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpMemBar { @@ -4159,6 +4230,7 @@ pub enum Op { ALd(OpALd), ASt(OpASt), Ipa(OpIpa), + CCtl(OpCCtl), MemBar(OpMemBar), BMov(OpBMov), Break(OpBreak), @@ -4515,6 +4587,7 @@ impl Instr { | Op::St(_) | Op::Atom(_) | Op::AtomCas(_) + | Op::CCtl(_) | Op::MemBar(_) | Op::Kill(_) | Op::Break(_) @@ -4594,6 +4667,7 @@ impl Instr { | Op::ALd(_) | Op::ASt(_) | Op::Ipa(_) + | Op::CCtl(_) | Op::MemBar(_) => false, // Control-flow ops