nak: Optimize OpLop3 and OpPLop3
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
committed by
Marge Bot
parent
59f6d657f3
commit
818ec3242b
@@ -15,6 +15,7 @@ mod nak_liveness;
|
||||
mod nak_lower_par_copies;
|
||||
mod nak_opt_copy_prop;
|
||||
mod nak_opt_dce;
|
||||
mod nak_opt_lop;
|
||||
mod nir;
|
||||
mod union_find;
|
||||
mod util;
|
||||
@@ -433,6 +434,11 @@ pub extern "C" fn nak_compile_shader(
|
||||
println!("NAK IR:\n{}", &s);
|
||||
}
|
||||
|
||||
s.opt_lop();
|
||||
if DEBUG.print() {
|
||||
println!("NAK IR:\n{}", &s);
|
||||
}
|
||||
|
||||
s.opt_dce();
|
||||
if DEBUG.print() {
|
||||
println!("NAK IR:\n{}", &s);
|
||||
|
||||
@@ -418,13 +418,13 @@ pub enum CBuf {
|
||||
BindlessGPR(RegRef),
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||
pub struct CBufRef {
|
||||
pub buf: CBuf,
|
||||
pub offset: u16,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||
pub enum SrcRef {
|
||||
Zero,
|
||||
True,
|
||||
@@ -971,15 +971,22 @@ impl fmt::Display for IntCmpType {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, Hash, PartialEq)]
|
||||
pub struct LogicOp {
|
||||
pub lut: u8,
|
||||
}
|
||||
|
||||
impl LogicOp {
|
||||
pub const SRC_MASKS: [u8; 3] = [0xf0, 0xcc, 0xaa];
|
||||
|
||||
#[inline]
|
||||
pub fn new_lut<F: Fn(u8, u8, u8) -> u8>(f: &F) -> LogicOp {
|
||||
LogicOp {
|
||||
lut: f(0xf0, 0xcc, 0xaa),
|
||||
lut: f(
|
||||
LogicOp::SRC_MASKS[0],
|
||||
LogicOp::SRC_MASKS[1],
|
||||
LogicOp::SRC_MASKS[2],
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -989,6 +996,32 @@ impl LogicOp {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn src_used(&self, src_idx: usize) -> bool {
|
||||
let mask = LogicOp::SRC_MASKS[src_idx];
|
||||
let shift = LogicOp::SRC_MASKS[src_idx].trailing_zeros();
|
||||
self.lut & !mask != (self.lut >> shift) & !mask
|
||||
}
|
||||
|
||||
pub fn fix_src(&mut self, src_idx: usize, val: bool) {
|
||||
let mask = LogicOp::SRC_MASKS[src_idx];
|
||||
let shift = LogicOp::SRC_MASKS[src_idx].trailing_zeros();
|
||||
if val {
|
||||
let t_bits = self.lut & mask;
|
||||
self.lut = t_bits | (t_bits >> shift)
|
||||
} else {
|
||||
let f_bits = self.lut & !mask;
|
||||
self.lut = (f_bits << shift) | f_bits
|
||||
};
|
||||
}
|
||||
|
||||
pub fn invert_src(&mut self, src_idx: usize) {
|
||||
let mask = LogicOp::SRC_MASKS[src_idx];
|
||||
let shift = LogicOp::SRC_MASKS[src_idx].trailing_zeros();
|
||||
let t_bits = self.lut & mask;
|
||||
let f_bits = self.lut & !mask;
|
||||
self.lut = (f_bits << shift) | (t_bits >> shift);
|
||||
}
|
||||
|
||||
pub fn eval<
|
||||
T: BitAnd<Output = T> + BitOr<Output = T> + Copy + Not<Output = T>,
|
||||
>(
|
||||
|
||||
@@ -310,6 +310,36 @@ impl CopyPropPass {
|
||||
for b in &mut f.blocks {
|
||||
for instr in &mut b.instrs {
|
||||
match &instr.op {
|
||||
Op::Lop3(lop) => {
|
||||
let dst = lop.dst.as_ssa().unwrap();
|
||||
assert!(dst.comps() == 1);
|
||||
let dst = dst[0];
|
||||
|
||||
let op = lop.op;
|
||||
if op.lut == 0 {
|
||||
self.add_copy(
|
||||
dst,
|
||||
SrcType::ALU,
|
||||
SrcRef::Zero.into(),
|
||||
);
|
||||
} else if op.lut == !0 {
|
||||
self.add_copy(
|
||||
dst,
|
||||
SrcType::ALU,
|
||||
SrcRef::Imm32(u32::MAX).into(),
|
||||
);
|
||||
} else {
|
||||
for s in 0..3 {
|
||||
if op.lut == LogicOp::SRC_MASKS[s] {
|
||||
self.add_copy(
|
||||
dst,
|
||||
SrcType::ALU,
|
||||
lop.srcs[s],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Op::Mov(mov) => {
|
||||
let dst = mov.dst.as_ssa().unwrap();
|
||||
assert!(dst.comps() == 1);
|
||||
@@ -317,6 +347,48 @@ impl CopyPropPass {
|
||||
self.add_copy(dst[0], SrcType::GPR, mov.src);
|
||||
}
|
||||
}
|
||||
Op::PLop3(lop) => {
|
||||
for i in 0..2 {
|
||||
let dst = match lop.dsts[i] {
|
||||
Dst::SSA(vec) => {
|
||||
assert!(vec.comps() == 1);
|
||||
vec[0]
|
||||
}
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let op = lop.ops[i];
|
||||
if op.lut == 0 {
|
||||
self.add_copy(
|
||||
dst,
|
||||
SrcType::Pred,
|
||||
SrcRef::False.into(),
|
||||
);
|
||||
} else if op.lut == !0 {
|
||||
self.add_copy(
|
||||
dst,
|
||||
SrcType::Pred,
|
||||
SrcRef::True.into(),
|
||||
);
|
||||
} else {
|
||||
for s in 0..3 {
|
||||
if op.lut == LogicOp::SRC_MASKS[s] {
|
||||
self.add_copy(
|
||||
dst,
|
||||
SrcType::Pred,
|
||||
lop.srcs[i],
|
||||
);
|
||||
} else if op.lut == !LogicOp::SRC_MASKS[s] {
|
||||
self.add_copy(
|
||||
dst,
|
||||
SrcType::Pred,
|
||||
lop.srcs[i].bnot(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Op::FMov(mov) => {
|
||||
let dst = mov.dst.as_ssa().unwrap();
|
||||
assert!(dst.comps() == 1);
|
||||
|
||||
@@ -0,0 +1,262 @@
|
||||
/*
|
||||
* Copyright © 2022 Collabora, Ltd.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
use crate::nak_ir::*;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::slice;
|
||||
|
||||
struct LopEntry {
|
||||
op: LogicOp,
|
||||
srcs_used: u8,
|
||||
srcs: [Src; 3],
|
||||
}
|
||||
|
||||
struct LopPass {
|
||||
use_counts: HashMap<SSAValue, u32>,
|
||||
ssa_lop: HashMap<SSAValue, LopEntry>,
|
||||
}
|
||||
|
||||
fn src_as_bool(src: &Src) -> Option<bool> {
|
||||
assert!(src.src_mod.is_none());
|
||||
match src.src_ref {
|
||||
SrcRef::Zero | SrcRef::False | SrcRef::Imm32(0) => Some(false),
|
||||
SrcRef::True | SrcRef::Imm32(u32::MAX) => Some(true),
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
|
||||
impl LopPass {
|
||||
fn new(f: &Function) -> LopPass {
|
||||
let mut use_counts = HashMap::new();
|
||||
for b in &f.blocks {
|
||||
for instr in &b.instrs {
|
||||
if let Pred::SSA(ssa) = instr.pred {
|
||||
use_counts.entry(ssa).and_modify(|e| *e += 1).or_insert(1);
|
||||
}
|
||||
|
||||
for src in instr.srcs() {
|
||||
if let SrcRef::SSA(vec) = src.src_ref {
|
||||
for ssa in vec.iter() {
|
||||
use_counts
|
||||
.entry(*ssa)
|
||||
.and_modify(|e| *e += 1)
|
||||
.or_insert(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
LopPass {
|
||||
use_counts: use_counts,
|
||||
ssa_lop: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_lop(&mut self, ssa: SSAValue, op: LogicOp, srcs: [Src; 3]) {
|
||||
let mut srcs_used = 0;
|
||||
for i in 0..3 {
|
||||
if op.src_used(i) {
|
||||
srcs_used |= 1 << i;
|
||||
assert!(src_as_bool(&srcs[i]).is_none());
|
||||
}
|
||||
}
|
||||
let entry = LopEntry {
|
||||
op: op,
|
||||
srcs_used: srcs_used,
|
||||
srcs: srcs,
|
||||
};
|
||||
self.ssa_lop.insert(ssa, entry);
|
||||
}
|
||||
|
||||
fn dedup_srcs(&self, op: &mut LogicOp, srcs: &[Src; 3]) {
|
||||
if srcs[0].src_ref == srcs[1].src_ref {
|
||||
*op = LogicOp::new_lut(&|x, _, z| op.eval(x, x, z))
|
||||
}
|
||||
if srcs[0].src_ref == srcs[2].src_ref {
|
||||
*op = LogicOp::new_lut(&|x, y, _| op.eval(x, y, x))
|
||||
}
|
||||
if srcs[1].src_ref == srcs[2].src_ref {
|
||||
*op = LogicOp::new_lut(&|x, y, _| op.eval(x, y, y))
|
||||
}
|
||||
}
|
||||
|
||||
fn try_prop_to_src(
|
||||
&self,
|
||||
ops: &mut [LogicOp],
|
||||
srcs: &mut [Src; 3],
|
||||
src_idx: usize,
|
||||
) {
|
||||
loop {
|
||||
assert!(srcs[src_idx].src_mod.is_none());
|
||||
let ssa = match srcs[src_idx].src_ref {
|
||||
SrcRef::SSA(vec) => {
|
||||
assert!(vec.comps() == 1);
|
||||
vec[0]
|
||||
}
|
||||
_ => return,
|
||||
};
|
||||
|
||||
let entry = match self.ssa_lop.get(&ssa) {
|
||||
Some(e) => e,
|
||||
None => return,
|
||||
};
|
||||
|
||||
let entry_use_count = *self.use_counts.get(&ssa).unwrap();
|
||||
if entry.srcs_used.count_ones() > 1 && entry_use_count > 1 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut entry_srcs = [usize::MAX; 3];
|
||||
let mut next_src = 0_usize;
|
||||
for i in 0..3 {
|
||||
if entry.srcs_used & (1 << i) == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut found = false;
|
||||
for j in 0..3 {
|
||||
if entry.srcs[i].src_ref == srcs[j].src_ref {
|
||||
entry_srcs[i] = j;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if found {
|
||||
continue;
|
||||
}
|
||||
|
||||
loop {
|
||||
if next_src >= srcs.len() {
|
||||
return;
|
||||
}
|
||||
|
||||
/* All callers of this function need to ensure that
|
||||
* constant sources are already folded so we know we
|
||||
* can always re-use them.
|
||||
*/
|
||||
if next_src == src_idx
|
||||
|| src_as_bool(&srcs[next_src]).is_some()
|
||||
{
|
||||
entry_srcs[i] = next_src;
|
||||
next_src += 1;
|
||||
break;
|
||||
}
|
||||
next_src += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Clear out the propagated source. What we put here doesn't matter
|
||||
* since it's no longer used. It may be overwritten by one of the
|
||||
* entry sources but there is no guarantee of this.
|
||||
*/
|
||||
srcs[src_idx] = match ssa.file() {
|
||||
RegFile::GPR | RegFile::UGPR => SrcRef::Zero.into(),
|
||||
RegFile::Pred | RegFile::UPred => SrcRef::True.into(),
|
||||
};
|
||||
|
||||
for i in 0..3 {
|
||||
if entry_srcs[i] != usize::MAX {
|
||||
srcs[entry_srcs[i]] = entry.srcs[i];
|
||||
}
|
||||
}
|
||||
for op in ops.iter_mut() {
|
||||
*op = LogicOp::new_lut(&|x, y, z| {
|
||||
let mut s = [x, y, z];
|
||||
let mut es = [0; 3];
|
||||
for i in 0..3 {
|
||||
if entry_srcs[i] != usize::MAX {
|
||||
es[i] = s[entry_srcs[i]];
|
||||
}
|
||||
}
|
||||
let e = entry.op.eval(es[0], es[1], es[2]);
|
||||
s[src_idx] = e;
|
||||
op.eval(s[0], s[1], s[2])
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn opt_lop3(&mut self, op: &mut OpLop3) {
|
||||
self.dedup_srcs(&mut op.op, &op.srcs);
|
||||
|
||||
for (i, src) in op.srcs.iter_mut().enumerate() {
|
||||
assert!(src.src_mod.is_none());
|
||||
|
||||
if let Some(b) = src_as_bool(src) {
|
||||
op.op.fix_src(i, b);
|
||||
}
|
||||
|
||||
if !op.op.src_used(i) {
|
||||
/* Replace unused sources with RZ */
|
||||
*src = SrcRef::Zero.into();
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..3 {
|
||||
self.try_prop_to_src(slice::from_mut(&mut op.op), &mut op.srcs, i);
|
||||
}
|
||||
|
||||
if let Dst::SSA(ssa) = op.dst {
|
||||
assert!(ssa.comps() == 1);
|
||||
self.add_lop(ssa[0], op.op, op.srcs);
|
||||
}
|
||||
}
|
||||
|
||||
fn opt_plop3(&mut self, op: &mut OpPLop3) {
|
||||
self.dedup_srcs(&mut op.ops[0], &op.srcs);
|
||||
self.dedup_srcs(&mut op.ops[1], &op.srcs);
|
||||
|
||||
/* Replace unused sources with PT */
|
||||
for (i, src) in op.srcs.iter_mut().enumerate() {
|
||||
if src.src_mod.is_bnot() {
|
||||
op.ops[0].invert_src(i);
|
||||
op.ops[1].invert_src(i);
|
||||
src.src_mod = SrcMod::None;
|
||||
}
|
||||
|
||||
if let Some(b) = src_as_bool(src) {
|
||||
op.ops[0].fix_src(i, b);
|
||||
op.ops[1].fix_src(i, b);
|
||||
}
|
||||
|
||||
if !op.ops[0].src_used(i) && !op.ops[1].src_used(i) {
|
||||
*src = SrcRef::True.into();
|
||||
}
|
||||
}
|
||||
|
||||
for i in 0..3 {
|
||||
self.try_prop_to_src(&mut op.ops, &mut op.srcs, i);
|
||||
}
|
||||
|
||||
for i in 0..2 {
|
||||
if let Dst::SSA(ssa) = op.dsts[i] {
|
||||
assert!(ssa.comps() == 1);
|
||||
self.add_lop(ssa[0], op.ops[i], op.srcs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn run(&mut self, f: &mut Function) {
|
||||
for b in &mut f.blocks {
|
||||
for instr in &mut b.instrs {
|
||||
match &mut instr.op {
|
||||
Op::Lop3(op) => self.opt_lop3(op),
|
||||
Op::PLop3(op) => self.opt_plop3(op),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Shader {
|
||||
pub fn opt_lop(&mut self) {
|
||||
for f in &mut self.functions {
|
||||
let mut pass = LopPass::new(f);
|
||||
pass.run(f);
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user