nak: Add OpMatch

Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35778>
This commit is contained in:
Mel Henning
2025-06-26 15:50:15 -04:00
committed by Marge Bot
parent ef6273c43c
commit 00fe8e45a0
6 changed files with 108 additions and 3 deletions
+43 -1
View File
@@ -7052,6 +7052,46 @@ impl DisplayOp for OpVote {
}
impl_display_for_op!(OpVote);
#[allow(dead_code)]
#[derive(Copy, Clone)]
pub enum MatchOp {
All,
Any,
}
impl fmt::Display for MatchOp {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MatchOp::All => write!(f, ".all"),
MatchOp::Any => write!(f, ".any"),
}
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpMatch {
#[dst_type(Pred)]
pub pred: Dst,
#[dst_type(GPR)]
pub mask: Dst,
#[src_type(GPR)]
pub src: Src,
pub op: MatchOp,
pub u64: bool,
}
impl DisplayOp for OpMatch {
fn fmt_op(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let u64_str = if self.u64 { ".u64" } else { "" };
write!(f, "match{}{} {}", self.op, u64_str, self.src)
}
}
impl_display_for_op!(OpMatch);
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpUndef {
@@ -7689,6 +7729,7 @@ pub enum Op {
PixLd(OpPixLd),
S2R(OpS2R),
Vote(OpVote),
Match(OpMatch),
Undef(OpUndef),
SrcBar(OpSrcBar),
PhiSrcs(OpPhiSrcs),
@@ -7866,7 +7907,8 @@ impl Op {
| Op::ViLd(_)
| Op::Kill(_)
| Op::PixLd(_)
| Op::S2R(_) => false,
| Op::S2R(_)
| Op::Match(_) => false,
Op::Nop(_) | Op::Vote(_) => true,
// Virtual ops
@@ -723,3 +723,34 @@ pub fn test_redux() {
c.check(sm);
}
}
#[test]
pub fn test_match() {
let r3 = RegRef::new(RegFile::GPR, 3, 1);
let p1 = RegRef::new(RegFile::Pred, 1, 1);
for sm in SM_LIST {
let mut c = DisasmCheck::new();
for (op, pred, pred_str) in [
(MatchOp::All, Dst::Reg(p1), "p1, "),
(MatchOp::Any, Dst::None, ""),
] {
for (src_comps, u64_str) in [(1, ""), (2, ".u64")] {
let src = RegRef::new(RegFile::GPR, 4, src_comps);
let instr = OpMatch {
pred: pred.clone(),
mask: Dst::Reg(r3),
src: SrcRef::Reg(src).into(),
op,
u64: src_comps == 2,
};
let disasm = format!("match{op}{u64_str} {pred_str}r3, r4;");
c.push(instr, disasm);
}
}
c.check(sm);
}
}
@@ -211,7 +211,7 @@ pub fn side_effect_type(op: &Op) -> SideEffect {
| Op::ViLd(_)
| Op::Kill(_)
| Op::S2R(_) => SideEffect::Barrier,
Op::PixLd(_) | Op::Vote(_) => SideEffect::None,
Op::PixLd(_) | Op::Vote(_) | Op::Match(_) => SideEffect::None,
Op::Nop(OpNop { label, .. }) => {
if label.is_none() {
SideEffect::None
@@ -316,7 +316,8 @@ pub fn estimate_variable_latency(sm: u8, op: &Op) -> u32 {
| Op::ViLd(_)
| Op::Kill(_)
| Op::PixLd(_)
| Op::S2R(_) => 16,
| Op::S2R(_)
| Op::Match(_) => 16,
_ => panic!("Unknown variable latency op {op}"),
}
+28
View File
@@ -3791,6 +3791,33 @@ impl SM70Op for OpVote {
}
}
impl SM70Op for OpMatch {
fn legalize(&mut self, b: &mut LegalizeBuilder) {
legalize_ext_instr(self, b);
}
fn encode(&self, e: &mut SM70Encoder<'_>) {
e.set_opcode(0x3a1);
e.set_dst(&self.mask);
e.set_reg_src(24..32, &self.src);
e.set_bit(73, self.u64);
e.set_bit(
79,
match self.op {
MatchOp::Any => {
assert!(matches!(self.pred, Dst::None));
true
}
MatchOp::All => false,
},
);
e.set_pred_dst(81..84, &self.pred);
}
}
macro_rules! as_sm70_op_match {
($op: expr) => {
match $op {
@@ -3878,6 +3905,7 @@ macro_rules! as_sm70_op_match {
Op::Out(op) => op,
Op::OutFinal(op) => op,
Op::Vote(op) => op,
Op::Match(op) => op,
_ => panic!("Unsupported op: {}", $op),
}
};
@@ -134,6 +134,7 @@ impl RegLatencySM75 {
Op::Prmt(_) => CoupledAlu,
Op::Nop(_) => CoupledDisp,
Op::Vote(_) => CoupledDisp,
Op::Match(_) => Decoupled,
Op::S2R(_) => Decoupled,
// S2UR => Decoupled,
Op::R2UR(_) => {
@@ -173,6 +173,7 @@ impl RegLatencySM80 {
Op::Prmt(_) => CoupledAlu,
Op::Nop(_) => CoupledDisp64,
Op::Vote(_) => CoupledAlu,
Op::Match(_) => Decoupled,
Op::S2R(_) => Decoupled,
// S2UR => Decoupled,
Op::R2UR(_) | Op::Redux(_) => {
@@ -837,6 +838,7 @@ impl PredLatencySM80 {
Op::Txq(_) => PredLatencySM80::Decoupled,
Op::Vote(_) => PredLatencySM80::Disp_Alu,
Op::Match(_) => PredLatencySM80::Decoupled,
_ => {
panic!("Illegal op in sm80 pred latency {}", op);
}