diff --git a/src/nouveau/compiler/nak_liveness.rs b/src/nouveau/compiler/nak_liveness.rs index 2011f7f3cf2..ac27a51b8a2 100644 --- a/src/nouveau/compiler/nak_liveness.rs +++ b/src/nouveau/compiler/nak_liveness.rs @@ -124,8 +124,13 @@ impl FromIterator for LiveSet { } pub trait BlockLiveness { + /// Returns true if @val is still live after @ip fn is_live_after_ip(&self, val: &SSAValue, ip: usize) -> bool; + + /// Returns true if @val is live-in to this block fn is_live_in(&self, val: &SSAValue) -> bool; + + /// Returns true if @val is live-out of this block fn is_live_out(&self, val: &SSAValue) -> bool; fn get_instr_pressure(&self, ip: usize, instr: &Instr) -> PerRegFile { @@ -415,6 +420,7 @@ impl NextUseBlockLiveness { self.entry_mut(ssa).add_in_block_use(ip); } + /// Returns an iterator over all the values which are live-in to this block pub fn iter_live_in<'a>(&'a self) -> impl Iterator { self.ssa_map.iter().filter_map(|(ssa, entry)| { if entry.defined || entry.uses.is_empty() { @@ -425,6 +431,12 @@ impl NextUseBlockLiveness { }) } + /// Returns the IP of the first use of @val + /// + /// The returned IP is relative to the start of this block. If the next use + /// is in some successor block, the returned IP is relative to the start of + /// this block. If @val is not used in this block and is not live-out, None + /// is returned. pub fn first_use(&self, val: &SSAValue) -> Option { if let Some(entry) = self.ssa_map.get(val) { entry.uses.first().cloned() @@ -433,6 +445,11 @@ impl NextUseBlockLiveness { } } + /// Returns the IP of the first use of @val which is greater than or equal + /// to @ip + /// + /// All IPs are relative to the start of the block. If the next use is some + /// successor block, the returned IP is relative to the start of this block. pub fn next_use_after_or_at_ip( &self, val: &SSAValue, @@ -485,6 +502,12 @@ impl BlockLiveness for NextUseBlockLiveness { } } +/// An implementation of Liveness that tracks next-use IPs for each SSAValue +/// +/// Along with the usual liveness information, this tracks next-use IPs for each +/// SSAValue. Cross-block next-use IPs computed are as per the global next-use +/// distance algorithm described in "Register Spilling and Live-Range Splitting +/// for SSA-Form Programs" by Braun and Hack. pub struct NextUseLiveness { blocks: Vec, } diff --git a/src/nouveau/compiler/nak_repair_ssa.rs b/src/nouveau/compiler/nak_repair_ssa.rs index 4363e8a499d..2918ed529e0 100644 --- a/src/nouveau/compiler/nak_repair_ssa.rs +++ b/src/nouveau/compiler/nak_repair_ssa.rs @@ -50,7 +50,7 @@ fn get_ssa_or_phi( } if all_same { - let pred_ssa = pred_ssa.expect("Unreachable block"); + let pred_ssa = pred_ssa.expect("Undefined value"); b_defs.insert(ssa, pred_ssa); pred_ssa } else { @@ -119,6 +119,22 @@ fn get_or_insert_phi_srcs<'a>(bb: &'a mut BasicBlock) -> &'a mut OpPhiSrcs { } impl Function { + /// Repairs SSA form + /// + /// Certain passes such as register spilling may produce a program that is + /// no longer in SSA form. This pass is able to repair SSA by inserting + /// phis as needed. Even though we do not require dominance or that each + /// value be defined once we do require that, for every use of an SSAValue + /// and for every path from the start of the program to that use, there must + /// be some definition of the value along that path. + /// + /// The algorithm implemented here is based on the one in "Simple and + /// Efficient Construction of Static Single Assignment Form" by Braun, et. + /// al. The primary difference between our implementation and the paper is + /// that we can't rewrite the IR on-the-fly. Instead, we store everything + /// in hash tables and handle removing redundant phis with back-edges as a + /// separate pass between figuring out where phis are needed and actually + /// constructing the phi instructions. pub fn repair_ssa(&mut self) { // First, count the number of defs for each SSA value. This will allow // us to skip any SSA values which only have a single definition in diff --git a/src/nouveau/compiler/nak_spill_values.rs b/src/nouveau/compiler/nak_spill_values.rs index f9b364200e2..a0a464ba926 100644 --- a/src/nouveau/compiler/nak_spill_values.rs +++ b/src/nouveau/compiler/nak_spill_values.rs @@ -801,6 +801,55 @@ fn spill_values( } impl Function { + /// Spill values from @file to fit within @limit registers + /// + /// This pass assumes that the function is already in CSSA form. See + /// @to_cssa for more details. + /// + /// The algorithm implemented here is roughly based on "Register Spilling + /// and Live-Range Splitting for SSA-Form Programs" by Braun and Hack. The + /// primary contributions of the Braun and Hack paper are the global + /// next-use distances which are implemented by @NextUseLiveness and a + /// heuristic for computing spill sets at block boundaries. The paper + /// describes two sets: + /// + /// - W, the set of variables currently resident + /// + /// - S, the set of variables which have been spilled + /// + /// These sets are tracked as we walk instructions and [un]spill values to + /// satisfy the given limit. When spills are required we spill the value + /// with the nighest next-use IP. At block boundaries, Braun and Hack + /// describe a heuristic for determining the starting W and S sets based on + /// the W and S from the end of each of the forward edge predecessor blocks. + /// + /// What Braun and Hack do not describe is how to handle phis and parallel + /// copies. Because we assume the function is already in CSSA form, we can + /// use a fairly simple algorithm. On the first pass, we ignore phi sources + /// and assign phi destinations based on W at the start of the block. If + /// the phi destination is in W, we leave it alone. If it is not in W, then + /// we allocate a new spill value and assign it to the phi destination. In + /// a second pass, we handle phi sources based on the destination. If the + /// destination is in W, we leave it alone. If the destination is spilled, + /// we read from the spill value corresponding to the source, spilling first + /// if needed. In the second pass, we also handle spilling across blocks as + /// needed for values that do not pass through a phi. + /// + /// A special case is also required for parallel copies because they can + /// have an unbounded number of destinations. For any source values not in + /// W, we allocate a spill value for the destination and copy in the spill + /// register file. For any sources which are in W, we try to leave as much + /// in W as possible. However, since source values may not be killed by the + /// copy and because one source value may be copied to arbitrarily many + /// destinations, that is not always possible. Whenever we need to spill + /// values, we spill according to the highest next-use of the destination + /// and we spill the source first and then parallel copy the source into a + /// spilled destination value. + /// + /// This all assumes that it's better to copy in spill space than to unspill + /// just for the sake of a parallel copy. While this may not be true in + /// general, especially not when spilling to memory, the register allocator + /// is good at eliding unnecessary copies. pub fn spill_values(&mut self, file: RegFile, limit: u32) { match file { RegFile::GPR => { diff --git a/src/nouveau/compiler/nak_to_cssa.rs b/src/nouveau/compiler/nak_to_cssa.rs index d1d2ad08a2f..7faa804455c 100644 --- a/src/nouveau/compiler/nak_to_cssa.rs +++ b/src/nouveau/compiler/nak_to_cssa.rs @@ -1,17 +1,6 @@ // Copyright © 2023 Collabora, Ltd. // SPDX-License-Identifier: MIT -// Implements conversion to CSSA as described in "Revisiting Out-of-SSA -// Translation for Correctness, Code Quality, and Efficiency" by Boissinot et. -// al. -// -// The primary difference between this algorithm and that of the Boissinot -// paper is that we don't actually insert parallel copies and remove redundant -// entries. Instead, we treat OpPhiSrcs and OpPhiDsts as as the parallel -// copies with the phi index standing in for all of the SSA values used -// directly by the phi. This lets us avoid adding and removing parallel copies -// and can instead add the parallel copies at the end. - use crate::nak_cfg::CFG; use crate::nak_ir::*; use crate::nak_liveness::{BlockLiveness, Liveness, SimpleLiveness}; @@ -272,6 +261,33 @@ impl<'a> CoalesceGraph<'a> { } impl Function { + /// Convert a function to CSSA (Conventional SSA) form + /// + /// In "Translating Out of Static Single Assignment Form" by Sreedhar, et. + /// al., they define CSSA form via what they call the Phi Congruence + /// Property: + /// + /// > The occurrences of all resources which belong to the same phi + /// > congruence class in a program can be replaced by a representative + /// > resource. After the replacement, the phi instruction can be + /// > eliminated without violating the semantics of the original program. + /// + /// A more compiler-theoretic definition of CSSA form is a version of SSA + /// form in which, for each phi, none of the SSA values involved in the phi + /// (either as a source or destination) interfere. While most of the papers + /// discussing CSSA form do so in the context of out-of-SSA, this property + /// is also useful for SSA-based spilling and register allocation. + /// + /// Our implementation is based on the algorithm described in "Revisiting + /// Out-of-SSA Translation for Correctness, Code Quality, and Effciency" by + /// Boissinot et. al. The primary difference between this algorithm and + /// the one in that paper is that we don't actually insert parallel copies + /// and remove redundant entries. Instead, we treat OpPhiSrcs and OpPhiDsts + /// as as the parallel copies with the phi index standing in for all of the + /// SSA values used directly by the phi. Then, instead of removing copies + /// where the source and destination don't interfere, we insert copies + /// whenever the source or destination and phi index do interfere. This + /// lets us avoid inserting pointless instructions. pub fn to_cssa(&mut self) { let live = SimpleLiveness::for_function(self);