diff --git a/src/nouveau/compiler/nak_encode_sm75.rs b/src/nouveau/compiler/nak_encode_sm75.rs index 5fd00d88fb7..95345643db9 100644 --- a/src/nouveau/compiler/nak_encode_sm75.rs +++ b/src/nouveau/compiler/nak_encode_sm75.rs @@ -1210,6 +1210,29 @@ impl SM75Instr { self.set_atom_op(87..91, op.atom_op); } + fn encode_atomg_cas(&mut self, op: &OpAtomCas) { + self.set_opcode(0x38b); + + self.set_dst(op.dst); + self.set_pred_dst(81..84, Dst::None); + + self.set_reg_src(24..32, op.addr); + self.set_reg_src(32..40, op.cmpr); + self.set_field(40..64, op.addr_offset); + self.set_reg_src(64..72, op.data); + + self.set_field( + 72..73, + match op.addr_type { + MemAddrType::A32 => 0_u8, + MemAddrType::A64 => 1_u8, + }, + ); + + self.set_atom_type(73..76, op.atom_type); + self.set_mem_order_scope(&op.mem_order, &op.mem_scope); + } + fn encode_atoms(&mut self, op: &OpAtom) { self.set_opcode(0x38c); @@ -1227,6 +1250,23 @@ impl SM75Instr { self.set_atom_op(87..91, op.atom_op); } + fn encode_atoms_cas(&mut self, op: &OpAtomCas) { + self.set_opcode(0x38d); + + self.set_dst(op.dst); + + self.set_reg_src(24..32, op.addr); + self.set_reg_src(32..40, op.cmpr); + self.set_field(40..64, op.addr_offset); + self.set_reg_src(64..72, op.data); + + assert!(op.addr_type == MemAddrType::A32); + assert!(op.mem_order == MemOrder::Strong); + assert!(op.mem_scope == MemScope::CTA); + + self.set_atom_type(73..76, op.atom_type); + } + fn encode_atom(&mut self, op: &OpAtom) { match op.mem_space { MemSpace::Global => self.encode_atomg(op), @@ -1235,6 +1275,14 @@ impl SM75Instr { } } + fn encode_atom_cas(&mut self, op: &OpAtomCas) { + match op.mem_space { + MemSpace::Global => self.encode_atomg_cas(op), + MemSpace::Local => panic!("Atomics do not support local"), + MemSpace::Shared => self.encode_atoms_cas(op), + } + } + fn encode_ald(&mut self, op: &OpALd) { self.set_opcode(0x321); @@ -1418,6 +1466,7 @@ impl SM75Instr { Op::Ld(op) => si.encode_ld(&op), Op::St(op) => si.encode_st(&op), Op::Atom(op) => si.encode_atom(&op), + Op::AtomCas(op) => si.encode_atom_cas(&op), Op::ALd(op) => si.encode_ald(&op), Op::ASt(op) => si.encode_ast(&op), Op::Ipa(op) => si.encode_ipa(&op), diff --git a/src/nouveau/compiler/nak_from_nir.rs b/src/nouveau/compiler/nak_from_nir.rs index 464f895cf04..c67fad8a82c 100644 --- a/src/nouveau/compiler/nak_from_nir.rs +++ b/src/nouveau/compiler/nak_from_nir.rs @@ -914,6 +914,29 @@ impl<'a> ShaderFromNir<'a> { }; self.instrs.push(atom.into()); } + nir_intrinsic_global_atomic_swap => { + assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg); + let bit_size = intrin.def.bit_size(); + let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let cmpr = self.get_src(&srcs[1]); + let data = self.get_src(&srcs[2]); + let atom_type = AtomType::U(bit_size); + let dst = self.get_dst(&intrin.def); + + let atom = OpAtomCas { + dst: dst, + addr: addr, + cmpr: cmpr, + data: data, + atom_type: atom_type, + addr_type: MemAddrType::A64, + addr_offset: offset, + mem_space: MemSpace::Global, + mem_order: MemOrder::Strong, + mem_scope: MemScope::System, + }; + self.instrs.push(atom.into()); + } nir_intrinsic_load_barycentric_centroid => (), nir_intrinsic_load_barycentric_pixel => (), nir_intrinsic_load_barycentric_sample => (), @@ -1063,6 +1086,29 @@ impl<'a> ShaderFromNir<'a> { }; self.instrs.push(atom.into()); } + nir_intrinsic_shared_atomic_swap => { + assert!(intrin.atomic_op() == nir_atomic_op_cmpxchg); + let bit_size = intrin.def.bit_size(); + let (addr, offset) = self.get_io_addr_offset(&srcs[0], 24); + let cmpr = self.get_src(&srcs[1]); + let data = self.get_src(&srcs[2]); + let atom_type = AtomType::U(bit_size); + let dst = self.get_dst(&intrin.def); + + let atom = OpAtomCas { + dst: dst, + addr: addr, + cmpr: cmpr, + data: data, + atom_type: atom_type, + addr_type: MemAddrType::A32, + addr_offset: offset, + mem_space: MemSpace::Shared, + mem_order: MemOrder::Strong, + mem_scope: MemScope::CTA, + }; + self.instrs.push(atom.into()); + } nir_intrinsic_store_global => { let data = self.get_src(&srcs[0]); let size_B = diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 2769d20fc7b..99684613275 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -2400,6 +2400,51 @@ impl fmt::Display for OpAtom { } } +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpAtomCas { + pub dst: Dst, + + #[src_type(GPR)] + pub addr: Src, + + #[src_type(SSA)] + pub cmpr: Src, + + #[src_type(SSA)] + pub data: Src, + + pub atom_type: AtomType, + + pub addr_type: MemAddrType, + pub addr_offset: i32, + + pub mem_space: MemSpace, + pub mem_order: MemOrder, + pub mem_scope: MemScope, +} + +impl fmt::Display for OpAtomCas { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "ATOM.CAS.{}.{}.{} {}", + self.atom_type, self.mem_order, self.mem_scope, self.dst + )?; + write!(f, " [")?; + if !self.addr.is_zero() { + write!(f, "{}", self.addr)?; + } + if self.addr_offset > 0 { + if !self.addr.is_zero() { + write!(f, "+")?; + } + write!(f, "+{:#x}", self.addr_offset)?; + } + write!(f, "] {} {}", self.cmpr, self.data) + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpALd { @@ -2881,6 +2926,7 @@ pub enum Op { Ld(OpLd), St(OpSt), Atom(OpAtom), + AtomCas(OpAtomCas), ALd(OpALd), ASt(OpASt), Ipa(OpIpa), @@ -3349,6 +3395,7 @@ impl Instr { | Op::SuSt(_) | Op::St(_) | Op::Atom(_) + | Op::AtomCas(_) | Op::MemBar(_) | Op::Bra(_) | Op::Exit(_) @@ -3394,6 +3441,7 @@ impl Instr { Op::Ld(_) => None, Op::St(_) => None, Op::Atom(_) => None, + Op::AtomCas(_) => None, Op::MemBar(_) => None, Op::Bar(_) => None, Op::Bra(_) | Op::Exit(_) => Some(15),