nak: Optimize OpLop3 and OpPLop3

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
Faith Ekstrand
2023-04-27 17:29:19 -05:00
committed by Marge Bot
parent 59f6d657f3
commit 818ec3242b
4 changed files with 376 additions and 3 deletions
+6
View File
@@ -15,6 +15,7 @@ mod nak_liveness;
mod nak_lower_par_copies;
mod nak_opt_copy_prop;
mod nak_opt_dce;
mod nak_opt_lop;
mod nir;
mod union_find;
mod util;
@@ -433,6 +434,11 @@ pub extern "C" fn nak_compile_shader(
println!("NAK IR:\n{}", &s);
}
s.opt_lop();
if DEBUG.print() {
println!("NAK IR:\n{}", &s);
}
s.opt_dce();
if DEBUG.print() {
println!("NAK IR:\n{}", &s);
+36 -3
View File
@@ -418,13 +418,13 @@ pub enum CBuf {
BindlessGPR(RegRef),
}
#[derive(Clone, Copy)]
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub struct CBufRef {
pub buf: CBuf,
pub offset: u16,
}
#[derive(Clone, Copy)]
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub enum SrcRef {
Zero,
True,
@@ -971,15 +971,22 @@ impl fmt::Display for IntCmpType {
}
}
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
pub struct LogicOp {
pub lut: u8,
}
impl LogicOp {
pub const SRC_MASKS: [u8; 3] = [0xf0, 0xcc, 0xaa];
#[inline]
pub fn new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp {
LogicOp {
lut: f(0xf0, 0xcc, 0xaa),
lut: f(
LogicOp::SRC_MASKS[0],
LogicOp::SRC_MASKS[1],
LogicOp::SRC_MASKS[2],
),
}
}
@@ -989,6 +996,32 @@ impl LogicOp {
}
}
pub fn src_used(&self, src_idx: usize) -> bool {
let mask = LogicOp::SRC_MASKS[src_idx];
let shift = LogicOp::SRC_MASKS[src_idx].trailing_zeros();
self.lut & !mask != (self.lut >> shift) & !mask
}
pub fn fix_src(&mut self, src_idx: usize, val: bool) {
let mask = LogicOp::SRC_MASKS[src_idx];
let shift = LogicOp::SRC_MASKS[src_idx].trailing_zeros();
if val {
let t_bits = self.lut & mask;
self.lut = t_bits | (t_bits >> shift)
} else {
let f_bits = self.lut & !mask;
self.lut = (f_bits << shift) | f_bits
};
}
pub fn invert_src(&mut self, src_idx: usize) {
let mask = LogicOp::SRC_MASKS[src_idx];
let shift = LogicOp::SRC_MASKS[src_idx].trailing_zeros();
let t_bits = self.lut & mask;
let f_bits = self.lut & !mask;
self.lut = (f_bits << shift) | (t_bits >> shift);
}
pub fn eval<
T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>,
>(
+72
View File
@@ -310,6 +310,36 @@ impl CopyPropPass {
for b in &mut f.blocks {
for instr in &mut b.instrs {
match &instr.op {
Op::Lop3(lop) => {
let dst = lop.dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
let dst = dst[0];
let op = lop.op;
if op.lut == 0 {
self.add_copy(
dst,
SrcType::ALU,
SrcRef::Zero.into(),
);
} else if op.lut == !0 {
self.add_copy(
dst,
SrcType::ALU,
SrcRef::Imm32(u32::MAX).into(),
);
} else {
for s in 0..3 {
if op.lut == LogicOp::SRC_MASKS[s] {
self.add_copy(
dst,
SrcType::ALU,
lop.srcs[s],
);
}
}
}
}
Op::Mov(mov) => {
let dst = mov.dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
@@ -317,6 +347,48 @@ impl CopyPropPass {
self.add_copy(dst[0], SrcType::GPR, mov.src);
}
}
Op::PLop3(lop) => {
for i in 0..2 {
let dst = match lop.dsts[i] {
Dst::SSA(vec) => {
assert!(vec.comps() == 1);
vec[0]
}
_ => continue,
};
let op = lop.ops[i];
if op.lut == 0 {
self.add_copy(
dst,
SrcType::Pred,
SrcRef::False.into(),
);
} else if op.lut == !0 {
self.add_copy(
dst,
SrcType::Pred,
SrcRef::True.into(),
);
} else {
for s in 0..3 {
if op.lut == LogicOp::SRC_MASKS[s] {
self.add_copy(
dst,
SrcType::Pred,
lop.srcs[i],
);
} else if op.lut == !LogicOp::SRC_MASKS[s] {
self.add_copy(
dst,
SrcType::Pred,
lop.srcs[i].bnot(),
);
}
}
}
}
}
Op::FMov(mov) => {
let dst = mov.dst.as_ssa().unwrap();
assert!(dst.comps() == 1);
+262
View File
@@ -0,0 +1,262 @@
/*
* Copyright © 2022 Collabora, Ltd.
* SPDX-License-Identifier: MIT
*/
use crate::nak_ir::*;
use std::collections::HashMap;
use std::slice;
struct LopEntry {
op: LogicOp,
srcs_used: u8,
srcs: [Src; 3],
}
struct LopPass {
use_counts: HashMap<SSAValue, u32>,
ssa_lop: HashMap<SSAValue, LopEntry>,
}
fn src_as_bool(src: &Src) -> Option<bool> {
assert!(src.src_mod.is_none());
match src.src_ref {
SrcRef::Zero | SrcRef::False | SrcRef::Imm32(0) => Some(false),
SrcRef::True | SrcRef::Imm32(u32::MAX) => Some(true),
_ => return None,
}
}
impl LopPass {
fn new(f: &Function) -> LopPass {
let mut use_counts = HashMap::new();
for b in &f.blocks {
for instr in &b.instrs {
if let Pred::SSA(ssa) = instr.pred {
use_counts.entry(ssa).and_modify(|e| *e += 1).or_insert(1);
}
for src in instr.srcs() {
if let SrcRef::SSA(vec) = src.src_ref {
for ssa in vec.iter() {
use_counts
.entry(*ssa)
.and_modify(|e| *e += 1)
.or_insert(1);
}
}
}
}
}
LopPass {
use_counts: use_counts,
ssa_lop: HashMap::new(),
}
}
fn add_lop(&mut self, ssa: SSAValue, op: LogicOp, srcs: [Src; 3]) {
let mut srcs_used = 0;
for i in 0..3 {
if op.src_used(i) {
srcs_used |= 1 << i;
assert!(src_as_bool(&srcs[i]).is_none());
}
}
let entry = LopEntry {
op: op,
srcs_used: srcs_used,
srcs: srcs,
};
self.ssa_lop.insert(ssa, entry);
}
fn dedup_srcs(&self, op: &mut LogicOp, srcs: &[Src; 3]) {
if srcs[0].src_ref == srcs[1].src_ref {
*op = LogicOp::new_lut(&|x, _, z| op.eval(x, x, z))
}
if srcs[0].src_ref == srcs[2].src_ref {
*op = LogicOp::new_lut(&|x, y, _| op.eval(x, y, x))
}
if srcs[1].src_ref == srcs[2].src_ref {
*op = LogicOp::new_lut(&|x, y, _| op.eval(x, y, y))
}
}
fn try_prop_to_src(
&self,
ops: &mut [LogicOp],
srcs: &mut [Src; 3],
src_idx: usize,
) {
loop {
assert!(srcs[src_idx].src_mod.is_none());
let ssa = match srcs[src_idx].src_ref {
SrcRef::SSA(vec) => {
assert!(vec.comps() == 1);
vec[0]
}
_ => return,
};
let entry = match self.ssa_lop.get(&ssa) {
Some(e) => e,
None => return,
};
let entry_use_count = *self.use_counts.get(&ssa).unwrap();
if entry.srcs_used.count_ones() > 1 && entry_use_count > 1 {
return;
}
let mut entry_srcs = [usize::MAX; 3];
let mut next_src = 0_usize;
for i in 0..3 {
if entry.srcs_used & (1 << i) == 0 {
continue;
}
let mut found = false;
for j in 0..3 {
if entry.srcs[i].src_ref == srcs[j].src_ref {
entry_srcs[i] = j;
found = true;
break;
}
}
if found {
continue;
}
loop {
if next_src >= srcs.len() {
return;
}
/* All callers of this function need to ensure that
* constant sources are already folded so we know we
* can always re-use them.
*/
if next_src == src_idx
|| src_as_bool(&srcs[next_src]).is_some()
{
entry_srcs[i] = next_src;
next_src += 1;
break;
}
next_src += 1;
}
}
/* Clear out the propagated source. What we put here doesn't matter
* since it's no longer used. It may be overwritten by one of the
* entry sources but there is no guarantee of this.
*/
srcs[src_idx] = match ssa.file() {
RegFile::GPR | RegFile::UGPR => SrcRef::Zero.into(),
RegFile::Pred | RegFile::UPred => SrcRef::True.into(),
};
for i in 0..3 {
if entry_srcs[i] != usize::MAX {
srcs[entry_srcs[i]] = entry.srcs[i];
}
}
for op in ops.iter_mut() {
*op = LogicOp::new_lut(&|x, y, z| {
let mut s = [x, y, z];
let mut es = [0; 3];
for i in 0..3 {
if entry_srcs[i] != usize::MAX {
es[i] = s[entry_srcs[i]];
}
}
let e = entry.op.eval(es[0], es[1], es[2]);
s[src_idx] = e;
op.eval(s[0], s[1], s[2])
});
}
}
}
fn opt_lop3(&mut self, op: &mut OpLop3) {
self.dedup_srcs(&mut op.op, &op.srcs);
for (i, src) in op.srcs.iter_mut().enumerate() {
assert!(src.src_mod.is_none());
if let Some(b) = src_as_bool(src) {
op.op.fix_src(i, b);
}
if !op.op.src_used(i) {
/* Replace unused sources with RZ */
*src = SrcRef::Zero.into();
}
}
for i in 0..3 {
self.try_prop_to_src(slice::from_mut(&mut op.op), &mut op.srcs, i);
}
if let Dst::SSA(ssa) = op.dst {
assert!(ssa.comps() == 1);
self.add_lop(ssa[0], op.op, op.srcs);
}
}
fn opt_plop3(&mut self, op: &mut OpPLop3) {
self.dedup_srcs(&mut op.ops[0], &op.srcs);
self.dedup_srcs(&mut op.ops[1], &op.srcs);
/* Replace unused sources with PT */
for (i, src) in op.srcs.iter_mut().enumerate() {
if src.src_mod.is_bnot() {
op.ops[0].invert_src(i);
op.ops[1].invert_src(i);
src.src_mod = SrcMod::None;
}
if let Some(b) = src_as_bool(src) {
op.ops[0].fix_src(i, b);
op.ops[1].fix_src(i, b);
}
if !op.ops[0].src_used(i) && !op.ops[1].src_used(i) {
*src = SrcRef::True.into();
}
}
for i in 0..3 {
self.try_prop_to_src(&mut op.ops, &mut op.srcs, i);
}
for i in 0..2 {
if let Dst::SSA(ssa) = op.dsts[i] {
assert!(ssa.comps() == 1);
self.add_lop(ssa[0], op.ops[i], op.srcs);
}
}
}
fn run(&mut self, f: &mut Function) {
for b in &mut f.blocks {
for instr in &mut b.instrs {
match &mut instr.op {
Op::Lop3(op) => self.opt_lop3(op),
Op::PLop3(op) => self.opt_plop3(op),
_ => (),
}
}
}
}
}
impl Shader {
pub fn opt_lop(&mut self) {
for f in &mut self.functions {
let mut pass = LopPass::new(f);
pass.run(f);
}
}
}