From 00fe8e45a0dbbdc2fe6f7c5cab9a3a0f50313f2f Mon Sep 17 00:00:00 2001 From: Mel Henning Date: Thu, 26 Jun 2025 15:50:15 -0400 Subject: [PATCH] nak: Add OpMatch Reviewed-by: Faith Ekstrand Part-of: --- src/nouveau/compiler/nak/ir.rs | 44 ++++++++++++++++++- src/nouveau/compiler/nak/nvdisasm_tests.rs | 31 +++++++++++++ .../compiler/nak/opt_instr_sched_common.rs | 5 ++- src/nouveau/compiler/nak/sm70_encode.rs | 28 ++++++++++++ .../compiler/nak/sm75_instr_latencies.rs | 1 + .../compiler/nak/sm80_instr_latencies.rs | 2 + 6 files changed, 108 insertions(+), 3 deletions(-) diff --git a/src/nouveau/compiler/nak/ir.rs b/src/nouveau/compiler/nak/ir.rs index 5326ea17f60..dc19b7ecb23 100644 --- a/src/nouveau/compiler/nak/ir.rs +++ b/src/nouveau/compiler/nak/ir.rs @@ -7052,6 +7052,46 @@ impl DisplayOp for OpVote { } impl_display_for_op!(OpVote); +#[allow(dead_code)] +#[derive(Copy, Clone)] +pub enum MatchOp { + All, + Any, +} + +impl fmt::Display for MatchOp { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MatchOp::All => write!(f, ".all"), + MatchOp::Any => write!(f, ".any"), + } + } +} + +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpMatch { + #[dst_type(Pred)] + pub pred: Dst, + + #[dst_type(GPR)] + pub mask: Dst, + + #[src_type(GPR)] + pub src: Src, + + pub op: MatchOp, + pub u64: bool, +} + +impl DisplayOp for OpMatch { + fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let u64_str = if self.u64 { ".u64" } else { "" }; + write!(f, "match{}{} {}", self.op, u64_str, self.src) + } +} +impl_display_for_op!(OpMatch); + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpUndef { @@ -7689,6 +7729,7 @@ pub enum Op { PixLd(OpPixLd), S2R(OpS2R), Vote(OpVote), + Match(OpMatch), Undef(OpUndef), SrcBar(OpSrcBar), PhiSrcs(OpPhiSrcs), @@ -7866,7 +7907,8 @@ impl Op { | Op::ViLd(_) | Op::Kill(_) | Op::PixLd(_) - | Op::S2R(_) => false, + | Op::S2R(_) + | Op::Match(_) => false, Op::Nop(_) | Op::Vote(_) => true, // Virtual ops diff --git a/src/nouveau/compiler/nak/nvdisasm_tests.rs b/src/nouveau/compiler/nak/nvdisasm_tests.rs index 866b45cd7e8..d707cf92cfa 100644 --- a/src/nouveau/compiler/nak/nvdisasm_tests.rs +++ b/src/nouveau/compiler/nak/nvdisasm_tests.rs @@ -723,3 +723,34 @@ pub fn test_redux() { c.check(sm); } } + +#[test] +pub fn test_match() { + let r3 = RegRef::new(RegFile::GPR, 3, 1); + let p1 = RegRef::new(RegFile::Pred, 1, 1); + + for sm in SM_LIST { + let mut c = DisasmCheck::new(); + + for (op, pred, pred_str) in [ + (MatchOp::All, Dst::Reg(p1), "p1, "), + (MatchOp::Any, Dst::None, ""), + ] { + for (src_comps, u64_str) in [(1, ""), (2, ".u64")] { + let src = RegRef::new(RegFile::GPR, 4, src_comps); + let instr = OpMatch { + pred: pred.clone(), + mask: Dst::Reg(r3), + + src: SrcRef::Reg(src).into(), + op, + u64: src_comps == 2, + }; + let disasm = format!("match{op}{u64_str} {pred_str}r3, r4;"); + c.push(instr, disasm); + } + } + + c.check(sm); + } +} diff --git a/src/nouveau/compiler/nak/opt_instr_sched_common.rs b/src/nouveau/compiler/nak/opt_instr_sched_common.rs index a4457b3a646..0b3a8a8dd7f 100644 --- a/src/nouveau/compiler/nak/opt_instr_sched_common.rs +++ b/src/nouveau/compiler/nak/opt_instr_sched_common.rs @@ -211,7 +211,7 @@ pub fn side_effect_type(op: &Op) -> SideEffect { | Op::ViLd(_) | Op::Kill(_) | Op::S2R(_) => SideEffect::Barrier, - Op::PixLd(_) | Op::Vote(_) => SideEffect::None, + Op::PixLd(_) | Op::Vote(_) | Op::Match(_) => SideEffect::None, Op::Nop(OpNop { label, .. }) => { if label.is_none() { SideEffect::None @@ -316,7 +316,8 @@ pub fn estimate_variable_latency(sm: u8, op: &Op) -> u32 { | Op::ViLd(_) | Op::Kill(_) | Op::PixLd(_) - | Op::S2R(_) => 16, + | Op::S2R(_) + | Op::Match(_) => 16, _ => panic!("Unknown variable latency op {op}"), } diff --git a/src/nouveau/compiler/nak/sm70_encode.rs b/src/nouveau/compiler/nak/sm70_encode.rs index 2c6032a8a72..bd9abdf9a4a 100644 --- a/src/nouveau/compiler/nak/sm70_encode.rs +++ b/src/nouveau/compiler/nak/sm70_encode.rs @@ -3791,6 +3791,33 @@ impl SM70Op for OpVote { } } +impl SM70Op for OpMatch { + fn legalize(&mut self, b: &mut LegalizeBuilder) { + legalize_ext_instr(self, b); + } + + fn encode(&self, e: &mut SM70Encoder<'_>) { + e.set_opcode(0x3a1); + + e.set_dst(&self.mask); + e.set_reg_src(24..32, &self.src); + e.set_bit(73, self.u64); + + e.set_bit( + 79, + match self.op { + MatchOp::Any => { + assert!(matches!(self.pred, Dst::None)); + true + } + MatchOp::All => false, + }, + ); + + e.set_pred_dst(81..84, &self.pred); + } +} + macro_rules! as_sm70_op_match { ($op: expr) => { match $op { @@ -3878,6 +3905,7 @@ macro_rules! as_sm70_op_match { Op::Out(op) => op, Op::OutFinal(op) => op, Op::Vote(op) => op, + Op::Match(op) => op, _ => panic!("Unsupported op: {}", $op), } }; diff --git a/src/nouveau/compiler/nak/sm75_instr_latencies.rs b/src/nouveau/compiler/nak/sm75_instr_latencies.rs index c1552a6ba56..0e297a6af29 100644 --- a/src/nouveau/compiler/nak/sm75_instr_latencies.rs +++ b/src/nouveau/compiler/nak/sm75_instr_latencies.rs @@ -134,6 +134,7 @@ impl RegLatencySM75 { Op::Prmt(_) => CoupledAlu, Op::Nop(_) => CoupledDisp, Op::Vote(_) => CoupledDisp, + Op::Match(_) => Decoupled, Op::S2R(_) => Decoupled, // S2UR => Decoupled, Op::R2UR(_) => { diff --git a/src/nouveau/compiler/nak/sm80_instr_latencies.rs b/src/nouveau/compiler/nak/sm80_instr_latencies.rs index 67c10087c27..335fb0723d8 100644 --- a/src/nouveau/compiler/nak/sm80_instr_latencies.rs +++ b/src/nouveau/compiler/nak/sm80_instr_latencies.rs @@ -173,6 +173,7 @@ impl RegLatencySM80 { Op::Prmt(_) => CoupledAlu, Op::Nop(_) => CoupledDisp64, Op::Vote(_) => CoupledAlu, + Op::Match(_) => Decoupled, Op::S2R(_) => Decoupled, // S2UR => Decoupled, Op::R2UR(_) | Op::Redux(_) => { @@ -837,6 +838,7 @@ impl PredLatencySM80 { Op::Txq(_) => PredLatencySM80::Decoupled, Op::Vote(_) => PredLatencySM80::Disp_Alu, + Op::Match(_) => PredLatencySM80::Decoupled, _ => { panic!("Illegal op in sm80 pred latency {}", op); }