nak: Add a new OpCopy instruction for parallel copy lowering

This is different from OpMov because OpMov is an actual hardware
instruction and we want OpCopy to be able to do magic.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
Faith Ekstrand
2023-08-28 13:39:41 -05:00
committed by Marge Bot
parent b78ccebf4b
commit 4c798afb25
7 changed files with 164 additions and 91 deletions
+2 -2
View File
@@ -14,6 +14,7 @@ mod nak_from_nir;
mod nak_ir;
mod nak_legalize;
mod nak_liveness;
mod nak_lower_copy_swap;
mod nak_lower_par_copies;
mod nak_opt_copy_prop;
mod nak_opt_dce;
@@ -462,8 +463,7 @@ pub extern "C" fn nak_compile_shader(
s.lower_vec_split();
s.lower_par_copies();
s.lower_swap();
s.lower_mov_predicate();
s.lower_copy_swap();
s.calc_instr_deps();
if DEBUG.print() {
+4
View File
@@ -57,6 +57,10 @@ pub trait Builder {
});
}
fn copy_to(&mut self, dst: Dst, src: Src) {
self.push_op(OpCopy { dst: dst, src: src });
}
fn swap(&mut self, x: RegRef, y: RegRef) {
assert!(x.file() == y.file());
self.push_op(OpSwap {
+15 -87
View File
@@ -3179,6 +3179,19 @@ impl fmt::Display for OpPhiDsts {
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpCopy {
pub dst: Dst,
pub src: Src,
}
impl fmt::Display for OpCopy {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "COPY {} {}", self.dst, self.src)
}
}
#[repr(C)]
#[derive(SrcsAsSlice, DstsAsSlice)]
pub struct OpSwap {
@@ -3511,6 +3524,7 @@ pub enum Op {
Undef(OpUndef),
PhiSrcs(OpPhiSrcs),
PhiDsts(OpPhiDsts),
Copy(OpCopy),
Swap(OpSwap),
ParCopy(OpParCopy),
FSOut(OpFSOut),
@@ -3891,6 +3905,7 @@ impl Instr {
Op::Undef(_)
| Op::PhiSrcs(_)
| Op::PhiDsts(_)
| Op::Copy(_)
| Op::Swap(_)
| Op::ParCopy(_)
| Op::FSOut(_) => {
@@ -4111,93 +4126,6 @@ impl Shader {
}
})
}
pub fn lower_swap(&mut self) {
self.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
match instr.op {
Op::Swap(swap) => {
let x = *swap.dsts[0].as_reg().unwrap();
let y = *swap.dsts[1].as_reg().unwrap();
assert!(x.file() == y.file());
assert!(x.comps() == 1 && y.comps() == 1);
assert!(swap.srcs[0].src_mod.is_none());
assert!(*swap.srcs[0].src_ref.as_reg().unwrap() == y);
assert!(swap.srcs[1].src_mod.is_none());
assert!(*swap.srcs[1].src_ref.as_reg().unwrap() == x);
let mut b = InstrBuilder::new();
if x == y {
/* Nothing to do */
} else if x.is_predicate() {
b.push_op(OpPLop3 {
dsts: [x.into(), y.into()],
srcs: [x.into(), y.into(), Src::new_imm_bool(true)],
ops: [
LogicOp::new_lut(&|_, y, _| y),
LogicOp::new_lut(&|x, _, _| x),
],
})
} else {
let xor = LogicOp::new_lut(&|x, y, _| x ^ y);
b.lop2_to(x.into(), xor, x.into(), y.into());
b.lop2_to(y.into(), xor, x.into(), y.into());
b.lop2_to(x.into(), xor, x.into(), y.into());
}
b.as_mapped_instrs()
}
_ => MappedInstrs::One(instr),
}
})
}
pub fn lower_mov_predicate(&mut self) {
self.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
match &instr.op {
Op::Mov(mov) => {
assert!(mov.src.src_mod.is_none());
match mov.src.src_ref {
SrcRef::True => {
let mut b = InstrBuilder::new();
b.lop2_to(
mov.dst,
LogicOp::new_const(true),
Src::new_imm_bool(true),
Src::new_imm_bool(true),
);
b.as_mapped_instrs()
}
SrcRef::False => {
let mut b = InstrBuilder::new();
b.lop2_to(
mov.dst,
LogicOp::new_const(false),
Src::new_imm_bool(true),
Src::new_imm_bool(true),
);
b.as_mapped_instrs()
}
SrcRef::Reg(reg) => {
if reg.is_predicate() {
let mut b = InstrBuilder::new();
b.lop2_to(
mov.dst,
LogicOp::new_lut(&|x, _, _| x),
mov.src,
Src::new_imm_bool(true),
);
b.as_mapped_instrs()
} else {
MappedInstrs::One(instr)
}
}
_ => MappedInstrs::One(instr),
}
}
_ => MappedInstrs::One(instr),
}
})
}
}
impl fmt::Display for Shader {
+2 -1
View File
@@ -233,7 +233,8 @@ impl<'a> LegalizeInstr<'a> {
self.mov_src_if_not_reg(src0, RegFile::GPR);
self.mov_src_if_not_reg(src2, RegFile::GPR);
}
Op::Ldc(_) => (), /* Nothing to do */
Op::Ldc(_) => (), // Nothing to do
Op::Copy(_) => (), // Nothing to do
_ => {
let src_types = instr.src_types();
for (i, src) in instr.srcs_mut().iter_mut().enumerate() {
+135
View File
@@ -0,0 +1,135 @@
// Copyright © 2022 Collabora, Ltd.
// SPDX-License-Identifier: MIT
use crate::nak_ir::*;
struct LowerCopySwap {}
impl LowerCopySwap {
fn new() -> Self {
Self {}
}
fn lower_copy(&mut self, b: &mut impl Builder, copy: OpCopy) {
let dst_reg = copy.dst.as_reg().unwrap();
assert!(dst_reg.comps() == 1);
assert!(copy.src.src_mod.is_none());
match dst_reg.file() {
RegFile::GPR => match copy.src.src_ref {
SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => {
b.push_op(OpMov {
dst: copy.dst,
src: copy.src,
quad_lanes: 0xf,
});
}
SrcRef::True | SrcRef::False => {
panic!("Cannot copy to GPR");
}
SrcRef::Reg(src_reg) => match src_reg.file() {
RegFile::GPR => {
b.push_op(OpMov {
dst: copy.dst,
src: copy.src,
quad_lanes: 0xf,
});
}
_ => panic!("Cannot copy to GPR"),
},
SrcRef::SSA(_) => panic!("Should be run after RA"),
},
RegFile::Pred => match copy.src.src_ref {
SrcRef::Zero | SrcRef::Imm32(_) | SrcRef::CBuf(_) => {
panic!("Cannot copy to Pred");
}
SrcRef::True => {
b.lop2_to(
copy.dst,
LogicOp::new_const(true),
Src::new_imm_bool(true),
Src::new_imm_bool(true),
);
}
SrcRef::False => {
b.lop2_to(
copy.dst,
LogicOp::new_const(false),
Src::new_imm_bool(true),
Src::new_imm_bool(true),
);
}
SrcRef::Reg(src_reg) => match src_reg.file() {
RegFile::Pred => {
b.lop2_to(
copy.dst,
LogicOp::new_lut(&|x, _, _| x),
copy.src,
Src::new_imm_bool(true),
);
}
_ => panic!("Cannot copy to Pred"),
},
SrcRef::SSA(_) => panic!("Should be run after RA"),
},
_ => panic!("Unhandled register file"),
}
}
fn lower_swap(&mut self, b: &mut impl Builder, swap: OpSwap) {
let x = *swap.dsts[0].as_reg().unwrap();
let y = *swap.dsts[1].as_reg().unwrap();
assert!(x.file() == y.file());
assert!(x.comps() == 1 && y.comps() == 1);
assert!(swap.srcs[0].src_mod.is_none());
assert!(*swap.srcs[0].src_ref.as_reg().unwrap() == y);
assert!(swap.srcs[1].src_mod.is_none());
assert!(*swap.srcs[1].src_ref.as_reg().unwrap() == x);
if x == y {
/* Nothing to do */
} else if x.is_predicate() {
b.push_op(OpPLop3 {
dsts: [x.into(), y.into()],
srcs: [x.into(), y.into(), Src::new_imm_bool(true)],
ops: [
LogicOp::new_lut(&|_, y, _| y),
LogicOp::new_lut(&|x, _, _| x),
],
})
} else {
let xor = LogicOp::new_lut(&|x, y, _| x ^ y);
b.lop2_to(x.into(), xor, x.into(), y.into());
b.lop2_to(y.into(), xor, x.into(), y.into());
b.lop2_to(x.into(), xor, x.into(), y.into());
}
}
fn run(&mut self, s: &mut Shader) {
s.map_instrs(|instr: Box<Instr>, _| -> MappedInstrs {
match instr.op {
Op::Copy(copy) => {
debug_assert!(instr.pred.is_true());
let mut b = InstrBuilder::new();
self.lower_copy(&mut b, copy);
b.as_mapped_instrs()
}
Op::Swap(swap) => {
debug_assert!(instr.pred.is_true());
let mut b = InstrBuilder::new();
self.lower_swap(&mut b, swap);
b.as_mapped_instrs()
}
_ => MappedInstrs::One(instr),
}
});
}
}
impl Shader {
pub fn lower_copy_swap(&mut self) {
let mut pass = LowerCopySwap::new();
pass.run(self);
}
}
+1 -1
View File
@@ -123,7 +123,7 @@ fn lower_par_copy(pc: OpParCopy) -> MappedInstrs {
if let Some(src_idx) = graph.src(dst_idx) {
let dst = *vals[dst_idx].as_reg().unwrap();
let src = vals[src_idx];
b.mov_to(dst.into(), src.into());
b.copy_to(dst.into(), src.into());
if graph.del_edge(dst_idx, src_idx) {
ready.push(src_idx);
}
@@ -414,6 +414,11 @@ impl CopyPropPass {
assert!(dst.comps() == 1);
self.add_copy(dst[0], SrcType::I32, neg.src.ineg());
}
Op::Copy(copy) => {
let dst = copy.dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
self.add_copy(dst[0], SrcType::GPR, copy.src);
}
Op::ParCopy(pcopy) => {
for (dst, src) in pcopy.dsts_srcs.iter() {
let dst = dst.as_ssa().unwrap();