From 4c798afb25600dcc0784402c6854f7793fe75bb5 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Mon, 28 Aug 2023 13:39:41 -0500 Subject: [PATCH] nak: Add a new OpCopy instruction for parallel copy lowering This is different from OpMov because OpMov is an actual hardware instruction and we want OpCopy to be able to do magic. Part-of: --- src/nouveau/compiler/nak.rs | 4 +- src/nouveau/compiler/nak_builder.rs | 4 + src/nouveau/compiler/nak_ir.rs | 102 +++----------- src/nouveau/compiler/nak_legalize.rs | 3 +- src/nouveau/compiler/nak_lower_copy_swap.rs | 135 +++++++++++++++++++ src/nouveau/compiler/nak_lower_par_copies.rs | 2 +- src/nouveau/compiler/nak_opt_copy_prop.rs | 5 + 7 files changed, 164 insertions(+), 91 deletions(-) create mode 100644 src/nouveau/compiler/nak_lower_copy_swap.rs diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs index b2ccc13251c..3e4789715f0 100644 --- a/src/nouveau/compiler/nak.rs +++ b/src/nouveau/compiler/nak.rs @@ -14,6 +14,7 @@ mod nak_from_nir; mod nak_ir; mod nak_legalize; mod nak_liveness; +mod nak_lower_copy_swap; mod nak_lower_par_copies; mod nak_opt_copy_prop; mod nak_opt_dce; @@ -462,8 +463,7 @@ pub extern "C" fn nak_compile_shader( s.lower_vec_split(); s.lower_par_copies(); - s.lower_swap(); - s.lower_mov_predicate(); + s.lower_copy_swap(); s.calc_instr_deps(); if DEBUG.print() { diff --git a/src/nouveau/compiler/nak_builder.rs b/src/nouveau/compiler/nak_builder.rs index a4862f267fa..5b78f2a9110 100644 --- a/src/nouveau/compiler/nak_builder.rs +++ b/src/nouveau/compiler/nak_builder.rs @@ -57,6 +57,10 @@ pub trait Builder { }); } + fn copy_to(&mut self, dst: Dst, src: Src) { + self.push_op(OpCopy { dst: dst, src: src }); + } + fn swap(&mut self, x: RegRef, y: RegRef) { assert!(x.file() == y.file()); self.push_op(OpSwap { diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs index 9d7d169fe3d..9235104875f 100644 --- a/src/nouveau/compiler/nak_ir.rs +++ b/src/nouveau/compiler/nak_ir.rs @@ -3179,6 +3179,19 @@ impl fmt::Display for OpPhiDsts { } } +#[repr(C)] +#[derive(SrcsAsSlice, DstsAsSlice)] +pub struct OpCopy { + pub dst: Dst, + pub src: Src, +} + +impl fmt::Display for OpCopy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "COPY {} {}", self.dst, self.src) + } +} + #[repr(C)] #[derive(SrcsAsSlice, DstsAsSlice)] pub struct OpSwap { @@ -3511,6 +3524,7 @@ pub enum Op { Undef(OpUndef), PhiSrcs(OpPhiSrcs), PhiDsts(OpPhiDsts), + Copy(OpCopy), Swap(OpSwap), ParCopy(OpParCopy), FSOut(OpFSOut), @@ -3891,6 +3905,7 @@ impl Instr { Op::Undef(_) | Op::PhiSrcs(_) | Op::PhiDsts(_) + | Op::Copy(_) | Op::Swap(_) | Op::ParCopy(_) | Op::FSOut(_) => { @@ -4111,93 +4126,6 @@ impl Shader { } }) } - - pub fn lower_swap(&mut self) { - self.map_instrs(|instr: Box, _| -> MappedInstrs { - match instr.op { - Op::Swap(swap) => { - let x = *swap.dsts[0].as_reg().unwrap(); - let y = *swap.dsts[1].as_reg().unwrap(); - - assert!(x.file() == y.file()); - assert!(x.comps() == 1 && y.comps() == 1); - assert!(swap.srcs[0].src_mod.is_none()); - assert!(*swap.srcs[0].src_ref.as_reg().unwrap() == y); - assert!(swap.srcs[1].src_mod.is_none()); - assert!(*swap.srcs[1].src_ref.as_reg().unwrap() == x); - - let mut b = InstrBuilder::new(); - if x == y { - /* Nothing to do */ - } else if x.is_predicate() { - b.push_op(OpPLop3 { - dsts: [x.into(), y.into()], - srcs: [x.into(), y.into(), Src::new_imm_bool(true)], - ops: [ - LogicOp::new_lut(&|_, y, _| y), - LogicOp::new_lut(&|x, _, _| x), - ], - }) - } else { - let xor = LogicOp::new_lut(&|x, y, _| x ^ y); - b.lop2_to(x.into(), xor, x.into(), y.into()); - b.lop2_to(y.into(), xor, x.into(), y.into()); - b.lop2_to(x.into(), xor, x.into(), y.into()); - } - b.as_mapped_instrs() - } - _ => MappedInstrs::One(instr), - } - }) - } - - pub fn lower_mov_predicate(&mut self) { - self.map_instrs(|instr: Box, _| -> MappedInstrs { - match &instr.op { - Op::Mov(mov) => { - assert!(mov.src.src_mod.is_none()); - match mov.src.src_ref { - SrcRef::True => { - let mut b = InstrBuilder::new(); - b.lop2_to( - mov.dst, - LogicOp::new_const(true), - Src::new_imm_bool(true), - Src::new_imm_bool(true), - ); - b.as_mapped_instrs() - } - SrcRef::False => { - let mut b = InstrBuilder::new(); - b.lop2_to( - mov.dst, - LogicOp::new_const(false), - Src::new_imm_bool(true), - Src::new_imm_bool(true), - ); - b.as_mapped_instrs() - } - SrcRef::Reg(reg) => { - if reg.is_predicate() { - let mut b = InstrBuilder::new(); - b.lop2_to( - mov.dst, - LogicOp::new_lut(&|x, _, _| x), - mov.src, - Src::new_imm_bool(true), - ); - b.as_mapped_instrs() - } else { - MappedInstrs::One(instr) - } - } - _ => MappedInstrs::One(instr), - } - } - _ => MappedInstrs::One(instr), - } - }) - } } impl fmt::Display for Shader { diff --git a/src/nouveau/compiler/nak_legalize.rs b/src/nouveau/compiler/nak_legalize.rs index c47f8856c67..a99ad0a529f 100644 --- a/src/nouveau/compiler/nak_legalize.rs +++ b/src/nouveau/compiler/nak_legalize.rs @@ -233,7 +233,8 @@ impl<'a> LegalizeInstr<'a> { self.mov_src_if_not_reg(src0, RegFile::GPR); self.mov_src_if_not_reg(src2, RegFile::GPR); } - Op::Ldc(_) => (), /* Nothing to do */ + Op::Ldc(_) => (), // Nothing to do + Op::Copy(_) => (), // Nothing to do _ => { let src_types = instr.src_types(); for (i, src) in instr.srcs_mut().iter_mut().enumerate() { diff --git a/src/nouveau/compiler/nak_lower_copy_swap.rs b/src/nouveau/compiler/nak_lower_copy_swap.rs new file mode 100644 index 00000000000..52cedcdf29a --- /dev/null +++ b/src/nouveau/compiler/nak_lower_copy_swap.rs @@ -0,0 +1,135 @@ +// Copyright © 2022 Collabora, Ltd. +// SPDX-License-Identifier: MIT + +use crate::nak_ir::*; + +struct LowerCopySwap {} + +impl LowerCopySwap { + fn new() -> Self { + Self {} + } + + fn lower_copy(&mut self, b: &mut impl Builder, copy: OpCopy) { + let dst_reg = copy.dst.as_reg().unwrap(); + assert!(dst_reg.comps() == 1); + assert!(copy.src.src_mod.is_none()); + + match dst_reg.file() { + RegFile::GPR => match copy.src.src_ref { + SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => { + b.push_op(OpMov { + dst: copy.dst, + src: copy.src, + quad_lanes: 0xf, + }); + } + SrcRef::True | SrcRef::False => { + panic!("Cannot copy to GPR"); + } + SrcRef::Reg(src_reg) => match src_reg.file() { + RegFile::GPR => { + b.push_op(OpMov { + dst: copy.dst, + src: copy.src, + quad_lanes: 0xf, + }); + } + _ => panic!("Cannot copy to GPR"), + }, + SrcRef::SSA(_) => panic!("Should be run after RA"), + }, + RegFile::Pred => match copy.src.src_ref { + SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => { + panic!("Cannot copy to Pred"); + } + SrcRef::True => { + b.lop2_to( + copy.dst, + LogicOp::new_const(true), + Src::new_imm_bool(true), + Src::new_imm_bool(true), + ); + } + SrcRef::False => { + b.lop2_to( + copy.dst, + LogicOp::new_const(false), + Src::new_imm_bool(true), + Src::new_imm_bool(true), + ); + } + SrcRef::Reg(src_reg) => match src_reg.file() { + RegFile::Pred => { + b.lop2_to( + copy.dst, + LogicOp::new_lut(&|x, _, _| x), + copy.src, + Src::new_imm_bool(true), + ); + } + _ => panic!("Cannot copy to Pred"), + }, + SrcRef::SSA(_) => panic!("Should be run after RA"), + }, + _ => panic!("Unhandled register file"), + } + } + + fn lower_swap(&mut self, b: &mut impl Builder, swap: OpSwap) { + let x = *swap.dsts[0].as_reg().unwrap(); + let y = *swap.dsts[1].as_reg().unwrap(); + + assert!(x.file() == y.file()); + assert!(x.comps() == 1 && y.comps() == 1); + assert!(swap.srcs[0].src_mod.is_none()); + assert!(*swap.srcs[0].src_ref.as_reg().unwrap() == y); + assert!(swap.srcs[1].src_mod.is_none()); + assert!(*swap.srcs[1].src_ref.as_reg().unwrap() == x); + + if x == y { + /* Nothing to do */ + } else if x.is_predicate() { + b.push_op(OpPLop3 { + dsts: [x.into(), y.into()], + srcs: [x.into(), y.into(), Src::new_imm_bool(true)], + ops: [ + LogicOp::new_lut(&|_, y, _| y), + LogicOp::new_lut(&|x, _, _| x), + ], + }) + } else { + let xor = LogicOp::new_lut(&|x, y, _| x ^ y); + b.lop2_to(x.into(), xor, x.into(), y.into()); + b.lop2_to(y.into(), xor, x.into(), y.into()); + b.lop2_to(x.into(), xor, x.into(), y.into()); + } + } + + fn run(&mut self, s: &mut Shader) { + s.map_instrs(|instr: Box, _| -> MappedInstrs { + match instr.op { + Op::Copy(copy) => { + debug_assert!(instr.pred.is_true()); + let mut b = InstrBuilder::new(); + self.lower_copy(&mut b, copy); + b.as_mapped_instrs() + } + Op::Swap(swap) => { + debug_assert!(instr.pred.is_true()); + let mut b = InstrBuilder::new(); + self.lower_swap(&mut b, swap); + b.as_mapped_instrs() + } + _ => MappedInstrs::One(instr), + } + }); + } +} + +impl Shader { + pub fn lower_copy_swap(&mut self) { + let mut pass = LowerCopySwap::new(); + pass.run(self); + } +} diff --git a/src/nouveau/compiler/nak_lower_par_copies.rs b/src/nouveau/compiler/nak_lower_par_copies.rs index 95db4bbde42..ddaf43436de 100644 --- a/src/nouveau/compiler/nak_lower_par_copies.rs +++ b/src/nouveau/compiler/nak_lower_par_copies.rs @@ -123,7 +123,7 @@ fn lower_par_copy(pc: OpParCopy) -> MappedInstrs { if let Some(src_idx) = graph.src(dst_idx) { let dst = *vals[dst_idx].as_reg().unwrap(); let src = vals[src_idx]; - b.mov_to(dst.into(), src.into()); + b.copy_to(dst.into(), src.into()); if graph.del_edge(dst_idx, src_idx) { ready.push(src_idx); } diff --git a/src/nouveau/compiler/nak_opt_copy_prop.rs b/src/nouveau/compiler/nak_opt_copy_prop.rs index 4914e26edf2..284b1c0aec1 100644 --- a/src/nouveau/compiler/nak_opt_copy_prop.rs +++ b/src/nouveau/compiler/nak_opt_copy_prop.rs @@ -414,6 +414,11 @@ impl CopyPropPass { assert!(dst.comps() == 1); self.add_copy(dst[0], SrcType::I32, neg.src.ineg()); } + Op::Copy(copy) => { + let dst = copy.dst.as_ssa().unwrap(); + assert!(dst.comps() == 1); + self.add_copy(dst[0], SrcType::GPR, copy.src); + } Op::ParCopy(pcopy) => { for (dst, src) in pcopy.dsts_srcs.iter() { let dst = dst.as_ssa().unwrap();