nak: Document spilling and RA
This isn't full documentation but it at least sprinkles paper citations all over as well as descriptions of where we diverge from said papers. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24998>
This commit is contained in:
committed by
Marge Bot
parent
a0bf406057
commit
52acb12f75
@@ -124,8 +124,13 @@ impl FromIterator<SSAValue> for LiveSet {
|
||||
}
|
||||
|
||||
pub trait BlockLiveness {
|
||||
/// Returns true if @val is still live after @ip
|
||||
fn is_live_after_ip(&self, val: &SSAValue, ip: usize) -> bool;
|
||||
|
||||
/// Returns true if @val is live-in to this block
|
||||
fn is_live_in(&self, val: &SSAValue) -> bool;
|
||||
|
||||
/// Returns true if @val is live-out of this block
|
||||
fn is_live_out(&self, val: &SSAValue) -> bool;
|
||||
|
||||
fn get_instr_pressure(&self, ip: usize, instr: &Instr) -> PerRegFile<u8> {
|
||||
@@ -415,6 +420,7 @@ impl NextUseBlockLiveness {
|
||||
self.entry_mut(ssa).add_in_block_use(ip);
|
||||
}
|
||||
|
||||
/// Returns an iterator over all the values which are live-in to this block
|
||||
pub fn iter_live_in<'a>(&'a self) -> impl Iterator<Item = &'a SSAValue> {
|
||||
self.ssa_map.iter().filter_map(|(ssa, entry)| {
|
||||
if entry.defined || entry.uses.is_empty() {
|
||||
@@ -425,6 +431,12 @@ impl NextUseBlockLiveness {
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the IP of the first use of @val
|
||||
///
|
||||
/// The returned IP is relative to the start of this block. If the next use
|
||||
/// is in some successor block, the returned IP is relative to the start of
|
||||
/// this block. If @val is not used in this block and is not live-out, None
|
||||
/// is returned.
|
||||
pub fn first_use(&self, val: &SSAValue) -> Option<usize> {
|
||||
if let Some(entry) = self.ssa_map.get(val) {
|
||||
entry.uses.first().cloned()
|
||||
@@ -433,6 +445,11 @@ impl NextUseBlockLiveness {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the IP of the first use of @val which is greater than or equal
|
||||
/// to @ip
|
||||
///
|
||||
/// All IPs are relative to the start of the block. If the next use is some
|
||||
/// successor block, the returned IP is relative to the start of this block.
|
||||
pub fn next_use_after_or_at_ip(
|
||||
&self,
|
||||
val: &SSAValue,
|
||||
@@ -485,6 +502,12 @@ impl BlockLiveness for NextUseBlockLiveness {
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of Liveness that tracks next-use IPs for each SSAValue
|
||||
///
|
||||
/// Along with the usual liveness information, this tracks next-use IPs for each
|
||||
/// SSAValue. Cross-block next-use IPs computed are as per the global next-use
|
||||
/// distance algorithm described in "Register Spilling and Live-Range Splitting
|
||||
/// for SSA-Form Programs" by Braun and Hack.
|
||||
pub struct NextUseLiveness {
|
||||
blocks: Vec<NextUseBlockLiveness>,
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ fn get_ssa_or_phi(
|
||||
}
|
||||
|
||||
if all_same {
|
||||
let pred_ssa = pred_ssa.expect("Unreachable block");
|
||||
let pred_ssa = pred_ssa.expect("Undefined value");
|
||||
b_defs.insert(ssa, pred_ssa);
|
||||
pred_ssa
|
||||
} else {
|
||||
@@ -119,6 +119,22 @@ fn get_or_insert_phi_srcs<'a>(bb: &'a mut BasicBlock) -> &'a mut OpPhiSrcs {
|
||||
}
|
||||
|
||||
impl Function {
|
||||
/// Repairs SSA form
|
||||
///
|
||||
/// Certain passes such as register spilling may produce a program that is
|
||||
/// no longer in SSA form. This pass is able to repair SSA by inserting
|
||||
/// phis as needed. Even though we do not require dominance or that each
|
||||
/// value be defined once we do require that, for every use of an SSAValue
|
||||
/// and for every path from the start of the program to that use, there must
|
||||
/// be some definition of the value along that path.
|
||||
///
|
||||
/// The algorithm implemented here is based on the one in "Simple and
|
||||
/// Efficient Construction of Static Single Assignment Form" by Braun, et.
|
||||
/// al. The primary difference between our implementation and the paper is
|
||||
/// that we can't rewrite the IR on-the-fly. Instead, we store everything
|
||||
/// in hash tables and handle removing redundant phis with back-edges as a
|
||||
/// separate pass between figuring out where phis are needed and actually
|
||||
/// constructing the phi instructions.
|
||||
pub fn repair_ssa(&mut self) {
|
||||
// First, count the number of defs for each SSA value. This will allow
|
||||
// us to skip any SSA values which only have a single definition in
|
||||
|
||||
@@ -801,6 +801,55 @@ fn spill_values<S: Spill>(
|
||||
}
|
||||
|
||||
impl Function {
|
||||
/// Spill values from @file to fit within @limit registers
|
||||
///
|
||||
/// This pass assumes that the function is already in CSSA form. See
|
||||
/// @to_cssa for more details.
|
||||
///
|
||||
/// The algorithm implemented here is roughly based on "Register Spilling
|
||||
/// and Live-Range Splitting for SSA-Form Programs" by Braun and Hack. The
|
||||
/// primary contributions of the Braun and Hack paper are the global
|
||||
/// next-use distances which are implemented by @NextUseLiveness and a
|
||||
/// heuristic for computing spill sets at block boundaries. The paper
|
||||
/// describes two sets:
|
||||
///
|
||||
/// - W, the set of variables currently resident
|
||||
///
|
||||
/// - S, the set of variables which have been spilled
|
||||
///
|
||||
/// These sets are tracked as we walk instructions and [un]spill values to
|
||||
/// satisfy the given limit. When spills are required we spill the value
|
||||
/// with the nighest next-use IP. At block boundaries, Braun and Hack
|
||||
/// describe a heuristic for determining the starting W and S sets based on
|
||||
/// the W and S from the end of each of the forward edge predecessor blocks.
|
||||
///
|
||||
/// What Braun and Hack do not describe is how to handle phis and parallel
|
||||
/// copies. Because we assume the function is already in CSSA form, we can
|
||||
/// use a fairly simple algorithm. On the first pass, we ignore phi sources
|
||||
/// and assign phi destinations based on W at the start of the block. If
|
||||
/// the phi destination is in W, we leave it alone. If it is not in W, then
|
||||
/// we allocate a new spill value and assign it to the phi destination. In
|
||||
/// a second pass, we handle phi sources based on the destination. If the
|
||||
/// destination is in W, we leave it alone. If the destination is spilled,
|
||||
/// we read from the spill value corresponding to the source, spilling first
|
||||
/// if needed. In the second pass, we also handle spilling across blocks as
|
||||
/// needed for values that do not pass through a phi.
|
||||
///
|
||||
/// A special case is also required for parallel copies because they can
|
||||
/// have an unbounded number of destinations. For any source values not in
|
||||
/// W, we allocate a spill value for the destination and copy in the spill
|
||||
/// register file. For any sources which are in W, we try to leave as much
|
||||
/// in W as possible. However, since source values may not be killed by the
|
||||
/// copy and because one source value may be copied to arbitrarily many
|
||||
/// destinations, that is not always possible. Whenever we need to spill
|
||||
/// values, we spill according to the highest next-use of the destination
|
||||
/// and we spill the source first and then parallel copy the source into a
|
||||
/// spilled destination value.
|
||||
///
|
||||
/// This all assumes that it's better to copy in spill space than to unspill
|
||||
/// just for the sake of a parallel copy. While this may not be true in
|
||||
/// general, especially not when spilling to memory, the register allocator
|
||||
/// is good at eliding unnecessary copies.
|
||||
pub fn spill_values(&mut self, file: RegFile, limit: u32) {
|
||||
match file {
|
||||
RegFile::GPR => {
|
||||
|
||||
@@ -1,17 +1,6 @@
|
||||
// Copyright © 2023 Collabora, Ltd.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
// Implements conversion to CSSA as described in "Revisiting Out-of-SSA
|
||||
// Translation for Correctness, Code Quality, and Efficiency" by Boissinot et.
|
||||
// al.
|
||||
//
|
||||
// The primary difference between this algorithm and that of the Boissinot
|
||||
// paper is that we don't actually insert parallel copies and remove redundant
|
||||
// entries. Instead, we treat OpPhiSrcs and OpPhiDsts as as the parallel
|
||||
// copies with the phi index standing in for all of the SSA values used
|
||||
// directly by the phi. This lets us avoid adding and removing parallel copies
|
||||
// and can instead add the parallel copies at the end.
|
||||
|
||||
use crate::nak_cfg::CFG;
|
||||
use crate::nak_ir::*;
|
||||
use crate::nak_liveness::{BlockLiveness, Liveness, SimpleLiveness};
|
||||
@@ -272,6 +261,33 @@ impl<'a> CoalesceGraph<'a> {
|
||||
}
|
||||
|
||||
impl Function {
|
||||
/// Convert a function to CSSA (Conventional SSA) form
|
||||
///
|
||||
/// In "Translating Out of Static Single Assignment Form" by Sreedhar, et.
|
||||
/// al., they define CSSA form via what they call the Phi Congruence
|
||||
/// Property:
|
||||
///
|
||||
/// > The occurrences of all resources which belong to the same phi
|
||||
/// > congruence class in a program can be replaced by a representative
|
||||
/// > resource. After the replacement, the phi instruction can be
|
||||
/// > eliminated without violating the semantics of the original program.
|
||||
///
|
||||
/// A more compiler-theoretic definition of CSSA form is a version of SSA
|
||||
/// form in which, for each phi, none of the SSA values involved in the phi
|
||||
/// (either as a source or destination) interfere. While most of the papers
|
||||
/// discussing CSSA form do so in the context of out-of-SSA, this property
|
||||
/// is also useful for SSA-based spilling and register allocation.
|
||||
///
|
||||
/// Our implementation is based on the algorithm described in "Revisiting
|
||||
/// Out-of-SSA Translation for Correctness, Code Quality, and Effciency" by
|
||||
/// Boissinot et. al. The primary difference between this algorithm and
|
||||
/// the one in that paper is that we don't actually insert parallel copies
|
||||
/// and remove redundant entries. Instead, we treat OpPhiSrcs and OpPhiDsts
|
||||
/// as as the parallel copies with the phi index standing in for all of the
|
||||
/// SSA values used directly by the phi. Then, instead of removing copies
|
||||
/// where the source and destination don't interfere, we insert copies
|
||||
/// whenever the source or destination and phi index do interfere. This
|
||||
/// lets us avoid inserting pointless instructions.
|
||||
pub fn to_cssa(&mut self) {
|
||||
let live = SimpleLiveness::for_function(self);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user