diff --git a/src/nouveau/compiler/nak.rs b/src/nouveau/compiler/nak.rs
index 3e4789715f0..f0a652bfa00 100644
--- a/src/nouveau/compiler/nak.rs
+++ b/src/nouveau/compiler/nak.rs
@@ -19,6 +19,7 @@ mod nak_lower_par_copies;
mod nak_opt_copy_prop;
mod nak_opt_dce;
mod nak_opt_lop;
+mod nak_to_cssa;
mod nir;
mod util;
diff --git a/src/nouveau/compiler/nak_ir.rs b/src/nouveau/compiler/nak_ir.rs
index 73f31e250cd..699c0fac083 100644
--- a/src/nouveau/compiler/nak_ir.rs
+++ b/src/nouveau/compiler/nak_ir.rs
@@ -3045,6 +3045,13 @@ impl VecPair {
self.a.iter().zip(self.b.iter())
}
+ pub fn iter_mut(
+ &mut self,
+ ) -> Zip, slice::IterMut<'_, B>> {
+ debug_assert!(self.a.len() == self.b.len());
+ self.a.iter_mut().zip(self.b.iter_mut())
+ }
+
pub fn len(&self) -> usize {
debug_assert!(self.a.len() == self.b.len());
self.a.len()
diff --git a/src/nouveau/compiler/nak_to_cssa.rs b/src/nouveau/compiler/nak_to_cssa.rs
new file mode 100644
index 00000000000..d1d2ad08a2f
--- /dev/null
+++ b/src/nouveau/compiler/nak_to_cssa.rs
@@ -0,0 +1,373 @@
+// Copyright © 2023 Collabora, Ltd.
+// SPDX-License-Identifier: MIT
+
+// Implements conversion to CSSA as described in "Revisiting Out-of-SSA
+// Translation for Correctness, Code Quality, and Efficiency" by Boissinot et.
+// al.
+//
+// The primary difference between this algorithm and that of the Boissinot
+// paper is that we don't actually insert parallel copies and remove redundant
+// entries. Instead, we treat OpPhiSrcs and OpPhiDsts as as the parallel
+// copies with the phi index standing in for all of the SSA values used
+// directly by the phi. This lets us avoid adding and removing parallel copies
+// and can instead add the parallel copies at the end.
+
+use crate::nak_cfg::CFG;
+use crate::nak_ir::*;
+use crate::nak_liveness::{BlockLiveness, Liveness, SimpleLiveness};
+
+use std::collections::HashMap;
+use std::iter::Peekable;
+
+struct MergedIter {
+ a: Peekable,
+ b: Peekable,
+}
+
+impl MergedIter {
+ fn new(a: I, b: I) -> Self {
+ Self {
+ a: a.peekable(),
+ b: b.peekable(),
+ }
+ }
+}
+
+impl Iterator for MergedIter
+where
+ ::Item: Ord,
+{
+ type Item = ::Item;
+
+ fn next(&mut self) -> Option<::Item> {
+ if let Some(a) = self.a.peek() {
+ if let Some(b) = self.b.peek() {
+ if a <= b {
+ self.a.next()
+ } else {
+ self.b.next()
+ }
+ } else {
+ self.a.next()
+ }
+ } else {
+ self.b.next()
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option) {
+ let (a_max, a_size) = self.a.size_hint();
+ let (b_max, b_size) = self.b.size_hint();
+ (a_max + b_max, a_size.zip(b_size).map(|(a, b)| a + b))
+ }
+}
+
+enum CoalesceItem {
+ SSA(SSAValue),
+ Phi(u32),
+}
+
+struct CoalesceNode {
+ set: usize,
+ block: usize,
+ ip_1: usize,
+ item: CoalesceItem,
+}
+
+struct CoalesceSet {
+ nodes: Vec,
+}
+
+struct CoalesceGraph<'a> {
+ live: &'a SimpleLiveness,
+ nodes: Vec,
+ sets: Vec,
+ ssa_node: HashMap,
+ phi_node_file: HashMap,
+}
+
+impl<'a> CoalesceGraph<'a> {
+ fn new(live: &'a SimpleLiveness) -> Self {
+ Self {
+ live: live,
+ nodes: Vec::new(),
+ sets: Vec::new(),
+ ssa_node: HashMap::new(),
+ phi_node_file: HashMap::new(),
+ }
+ }
+
+ fn add_ssa(&mut self, ssa: SSAValue) {
+ debug_assert!(self.sets.is_empty());
+
+ // Set it to usize::MAX for now. We'll update later
+ if self.ssa_node.insert(ssa, usize::MAX).is_none() {
+ let (block, ip) = self.live.def_block_ip(&ssa);
+ self.nodes.push(CoalesceNode {
+ set: usize::MAX,
+ block: block,
+ ip_1: ip + 1,
+ item: CoalesceItem::SSA(ssa),
+ });
+ }
+ }
+
+ fn add_phi_dst(&mut self, phi: u32, file: RegFile, block: usize) {
+ debug_assert!(self.sets.is_empty());
+
+ // Record the register file now. We'll set the node later
+ let old = self.phi_node_file.insert(phi, (usize::MAX, file));
+ debug_assert!(old.is_none());
+
+ self.nodes.push(CoalesceNode {
+ set: usize::MAX,
+ block: block,
+ ip_1: 0,
+ item: CoalesceItem::Phi(phi),
+ });
+ }
+
+ fn add_phi_src(&mut self, phi: u32, block: usize) {
+ debug_assert!(self.sets.is_empty());
+
+ self.nodes.push(CoalesceNode {
+ set: usize::MAX,
+ block: block,
+ ip_1: usize::MAX,
+ item: CoalesceItem::Phi(phi),
+ });
+ }
+
+ fn init_sets(&mut self, cfg: &CFG) {
+ // Sort the nodes by dom_dfs_pre_index followed by ip+1. Stash the
+ // dom_dfs_pre_index in the set for now. We don't actually fill out
+ // the set field until later.
+ for n in self.nodes.iter_mut() {
+ n.set = cfg.dom_dfs_pre_index(n.block);
+ }
+ self.nodes
+ .sort_by(|a, b| a.set.cmp(&b.set).then(a.ip_1.cmp(&b.ip_1)));
+
+ for ni in 0..self.nodes.len() {
+ match &self.nodes[ni].item {
+ CoalesceItem::SSA(ssa) => {
+ let old = self.ssa_node.insert(*ssa, ni);
+ debug_assert!(old == Some(usize::MAX));
+
+ self.nodes[ni].set = self.sets.len();
+ self.sets.push(CoalesceSet { nodes: vec![ni] });
+ }
+ CoalesceItem::Phi(phi) => {
+ let (pn, _) = self.phi_node_file.get_mut(phi).unwrap();
+
+ // We only want one set per phi and phi_node contains the
+ // index to any one of the nodes.
+ if *pn == usize::MAX {
+ self.nodes[ni].set = self.sets.len();
+ self.sets.push(CoalesceSet { nodes: vec![ni] });
+ *pn = ni;
+ } else {
+ let s = self.nodes[*pn].set;
+ self.nodes[ni].set = s;
+ }
+ }
+ }
+ }
+ }
+
+ fn node_dominates(&self, p: usize, c: usize, cfg: &CFG) -> bool {
+ if self.nodes[p].block == self.nodes[c].block {
+ self.nodes[p].ip_1 <= self.nodes[c].ip_1
+ } else {
+ cfg.dominates(self.nodes[p].block, self.nodes[c].block)
+ }
+ }
+
+ fn phi_ssa_interferes(&self, phi: &CoalesceNode, ssa: &SSAValue) -> bool {
+ if phi.ip_1 == 0 {
+ self.live.block_live(phi.block).is_live_in(ssa)
+ } else {
+ debug_assert!(phi.ip_1 == usize::MAX);
+ self.live.block_live(phi.block).is_live_out(ssa)
+ }
+ }
+
+ fn nodes_interfere(&self, a: usize, b: usize) -> bool {
+ let a = &self.nodes[a];
+ let b = &self.nodes[b];
+
+ match &a.item {
+ CoalesceItem::SSA(a_ssa) => match &b.item {
+ CoalesceItem::SSA(b_ssa) => self.live.interferes(a_ssa, b_ssa),
+ CoalesceItem::Phi(_) => self.phi_ssa_interferes(b, a_ssa),
+ },
+ CoalesceItem::Phi(_) => match &b.item {
+ CoalesceItem::SSA(b_ssa) => self.phi_ssa_interferes(a, b_ssa),
+ CoalesceItem::Phi(_) => {
+ // Phi nodes represent the temporary SSA value made between
+ // the parallel copy and the phi in the Boissinot algorithm
+ // so they interfere if and only if they're in the same
+ // block and both at the start or both at the end.
+ a.block == b.block && a.ip_1 == b.ip_1
+ }
+ },
+ }
+ }
+
+ pub fn sets_interfere(&self, a: usize, b: usize, cfg: &CFG) -> bool {
+ let a = &self.sets[a];
+ let b = &self.sets[b];
+
+ // Stack of nodes which dominate the current node
+ let mut dom = Vec::new();
+
+ for n in MergedIter::new(a.nodes.iter(), b.nodes.iter()) {
+ loop {
+ if let Some(p) = dom.last() {
+ if self.node_dominates(*p, *n, cfg) {
+ dom.pop();
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ }
+
+ if let Some(p) = dom.last() {
+ if self.nodes_interfere(*n, *p) {
+ return true;
+ }
+ }
+
+ dom.push(*n);
+ }
+
+ false
+ }
+
+ pub fn sets_merge(&mut self, a: usize, b: usize) -> usize {
+ let a_nodes = std::mem::replace(&mut self.sets[a].nodes, Vec::new());
+ let b_nodes = std::mem::replace(&mut self.sets[b].nodes, Vec::new());
+ let nodes = MergedIter::new(a_nodes.into_iter(), b_nodes.into_iter());
+
+ self.sets[a].nodes = nodes
+ .map(|n| {
+ self.nodes[n].set = a;
+ n
+ })
+ .collect();
+
+ a
+ }
+
+ pub fn ssa_set(&self, ssa: &SSAValue) -> usize {
+ self.nodes[*self.ssa_node.get(ssa).unwrap()].set
+ }
+
+ pub fn phi_set_file(&self, phi: &u32) -> (usize, RegFile) {
+ let (n, file) = self.phi_node_file.get(phi).unwrap();
+ (self.nodes[*n].set, *file)
+ }
+}
+
+impl Function {
+ pub fn to_cssa(&mut self) {
+ let live = SimpleLiveness::for_function(self);
+
+ let mut cg = CoalesceGraph::new(&live);
+ for (bi, b) in self.blocks.iter().enumerate() {
+ if let Some(phi) = b.phi_dsts() {
+ for (idx, dst) in phi.dsts.iter() {
+ let vec = dst.as_ssa().unwrap();
+ debug_assert!(vec.comps() == 1);
+ cg.add_ssa(vec[0]);
+ cg.add_phi_dst(*idx, vec[0].file(), bi);
+ }
+ }
+
+ if let Some(phi) = b.phi_srcs() {
+ for (idx, src) in phi.srcs.iter() {
+ if let SrcRef::SSA(vec) = src.src_ref {
+ debug_assert!(vec.comps() == 1);
+ cg.add_ssa(vec[0]);
+ }
+ cg.add_phi_src(*idx, bi);
+ }
+ }
+ }
+ cg.init_sets(&self.blocks);
+
+ for bi in 0..self.blocks.len() {
+ let block_instrs =
+ std::mem::replace(&mut self.blocks[bi].instrs, Vec::new());
+
+ let mut instrs = Vec::new();
+ for mut instr in block_instrs.into_iter() {
+ match &mut instr.op {
+ Op::PhiDsts(phi) => {
+ let mut pcopy = OpParCopy::new();
+ for (idx, dst) in phi.dsts.iter_mut() {
+ let (ps, file) = cg.phi_set_file(idx);
+
+ let vec = dst.as_ssa().unwrap();
+ debug_assert!(vec.comps() == 1);
+ debug_assert!(vec[0].file() == file);
+ let ds = cg.ssa_set(&vec[0]);
+
+ if !cg.sets_interfere(ps, ds, &self.blocks) {
+ cg.sets_merge(ps, ds);
+ continue;
+ }
+
+ let tmp = self.ssa_alloc.alloc(file);
+ pcopy.push(*dst, tmp.into());
+ *dst = tmp.into();
+ }
+
+ instrs.push(instr);
+ if !pcopy.is_empty() {
+ instrs.push(Instr::new_boxed(pcopy));
+ }
+ }
+ Op::PhiSrcs(phi) => {
+ let mut pcopy = OpParCopy::new();
+ for (idx, src) in phi.srcs.iter_mut() {
+ let (ps, file) = cg.phi_set_file(idx);
+
+ debug_assert!(src.src_mod.is_none());
+ if let SrcRef::SSA(vec) = &src.src_ref {
+ debug_assert!(vec.comps() == 1);
+ let ss = cg.ssa_set(&vec[0]);
+ if cg.sets_interfere(ps, ss, &self.blocks) {
+ let tmp = self.ssa_alloc.alloc(file);
+ pcopy.push(tmp.into(), *src);
+ *src = tmp.into();
+ } else {
+ cg.sets_merge(ps, ss);
+ }
+ } else {
+ // Non-SSA sources get an actual Mov instruction
+ // and are not considered part of the parallel
+ // copy.
+ let tmp = self.ssa_alloc.alloc(file);
+ instrs.push(Instr::new_boxed(OpCopy {
+ dst: tmp.into(),
+ src: *src,
+ }));
+ *src = tmp.into();
+ }
+ }
+
+ if !pcopy.is_empty() {
+ instrs.push(Instr::new_boxed(pcopy));
+ }
+ instrs.push(instr);
+ }
+ _ => instrs.push(instr),
+ }
+ }
+ self.blocks[bi].instrs = instrs;
+ }
+ }
+}