From 49261faac8449ee603f1c7ee4ba8bbe8c273fdb5 Mon Sep 17 00:00:00 2001 From: Mary Guillemard Date: Mon, 16 Dec 2024 09:27:48 +0100 Subject: [PATCH] pan/util: Move lcra to midgard This is only used by midgard, bifrost have its own custom version of this. Signed-off-by: Mary Guillemard Acked-by: Boris Brezillon Acked-by: Erik Faye-Lund Acked-by: Eric R. Smith Part-of: --- src/panfrost/midgard/compiler.h | 1 - src/panfrost/midgard/midgard_ra.c | 269 ++++++++++++++++++++++++++++++ src/panfrost/util/lcra.c | 256 ---------------------------- src/panfrost/util/lcra.h | 101 ----------- src/panfrost/util/meson.build | 2 - 5 files changed, 269 insertions(+), 360 deletions(-) delete mode 100644 src/panfrost/util/lcra.c delete mode 100644 src/panfrost/util/lcra.h diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 9fb94e9b023..a3a79977d70 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -38,7 +38,6 @@ #include "compiler/glsl_types.h" #include "compiler/nir/nir.h" -#include "panfrost/util/lcra.h" #include "panfrost/util/pan_ir.h" /* Forward declare */ diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 51ba6f7207e..f73d43c548e 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -28,6 +28,275 @@ #include "midgard_ops.h" #include "midgard_quirks.h" +struct lcra_state { + unsigned node_count; + + /* Alignment for node in log2(bytes)+1. Since alignment must be + * non-negative power-of-two, the elements are strictly positive + * integers. Zero is the sentinel for a missing node. In upper word, + * bound. */ + unsigned *alignment; + + /* Linear constraints imposed. Nested array sized upfront, organized as + * linear[node_left][node_right]. That is, calculate indices as: + * + * Each element is itself a bit field denoting whether (c_j - c_i) bias + * is present or not, including negative biases. + * + * Note for Midgard, there are 16 components so the bias is in range + * [-15, 15] so encoded by 32-bit field. */ + + uint32_t *linear; + + /* Per node max modulus constraints */ + uint8_t *modulus; + + /* Classes allow nodes to be partitioned with a starting register. + * Classes cannot interfere; that is, they are true partitions in the + * usual sense of the word. class_count is the number of classes. + * class[] is indexed by a node to get the mapped class. class_start is + * biased to all solutions in the class. */ + + unsigned class_count; + unsigned *class; + unsigned *class_start; + unsigned *class_size; + bool *class_disjoint; + + /* Before solving, forced registers; after solving, solutions. */ + unsigned *solutions; + + /* For register spilling, the costs to spill nodes (as set by the user) + * are in spill_cost[], negative if a node is unspillable. Internally, + * spill_class specifies which class to spill (whichever class failed + * to allocate) */ + + signed *spill_cost; + unsigned spill_class; +}; + +/* This module is the reference implementation of "Linearly Constrained + * Register Allocation". The paper is available in PDF form + * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX + * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md) + */ + +static struct lcra_state * +lcra_alloc_equations(unsigned node_count, unsigned class_count) +{ + struct lcra_state *l = calloc(1, sizeof(*l)); + + l->node_count = node_count; + l->class_count = class_count; + + l->alignment = calloc(sizeof(l->alignment[0]), node_count); + l->linear = calloc(sizeof(l->linear[0]), node_count * node_count); + l->modulus = calloc(sizeof(l->modulus[0]), node_count); + l->class = calloc(sizeof(l->class[0]), node_count); + l->class_start = calloc(sizeof(l->class_start[0]), class_count); + l->class_disjoint = + calloc(sizeof(l->class_disjoint[0]), class_count * class_count); + l->class_size = calloc(sizeof(l->class_size[0]), class_count); + l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count); + l->solutions = calloc(sizeof(l->solutions[0]), node_count); + + memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count); + + return l; +} + +static void +lcra_free(struct lcra_state *l) +{ + if (!l) + return; + + free(l->alignment); + free(l->linear); + free(l->modulus); + free(l->class); + free(l->class_start); + free(l->class_disjoint); + free(l->class_size); + free(l->spill_cost); + free(l->solutions); + + free(l); +} + +static void +lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, + unsigned bound) +{ + l->alignment[node] = (align_log2 + 1) | (bound << 16); +} + +static void +lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2) +{ + l->class_disjoint[(c1 * l->class_count) + c2] = true; + l->class_disjoint[(c2 * l->class_count) + c1] = true; +} + +static void +lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len) +{ + if (node < l->node_count && l->alignment[node]) { + unsigned BA = l->alignment[node]; + unsigned alignment = (BA & 0xffff) - 1; + unsigned bound = BA >> 16; + l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment); + } +} + +static void +lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, + unsigned j, unsigned cmask_j) +{ + if (i == j) + return; + + if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]]) + return; + + uint32_t constraint_fw = 0; + uint32_t constraint_bw = 0; + + for (unsigned D = 0; D < 16; ++D) { + if (cmask_i & (cmask_j << D)) { + constraint_bw |= (1 << (15 + D)); + constraint_fw |= (1 << (15 - D)); + } + + if (cmask_i & (cmask_j >> D)) { + constraint_fw |= (1 << (15 + D)); + constraint_bw |= (1 << (15 - D)); + } + } + + l->linear[j * l->node_count + i] |= constraint_fw; + l->linear[i * l->node_count + j] |= constraint_bw; +} + +static bool +lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i) +{ + unsigned *row = &l->linear[i * l->node_count]; + signed constant = solutions[i]; + + for (unsigned j = 0; j < l->node_count; ++j) { + if (solutions[j] == ~0) + continue; + + signed lhs = solutions[j] - constant; + + if (lhs < -15 || lhs > 15) + continue; + + if (row[j] & (1 << (lhs + 15))) + return false; + } + + return true; +} + +static bool +lcra_solve(struct lcra_state *l) +{ + for (unsigned step = 0; step < l->node_count; ++step) { + if (l->solutions[step] != ~0) + continue; + if (l->alignment[step] == 0) + continue; + + unsigned _class = l->class[step]; + unsigned class_start = l->class_start[_class]; + + unsigned BA = l->alignment[step]; + unsigned shift = (BA & 0xffff) - 1; + unsigned bound = BA >> 16; + + unsigned P = bound >> shift; + unsigned Q = l->modulus[step]; + unsigned r_max = l->class_size[_class]; + unsigned k_max = r_max >> shift; + unsigned m_max = k_max / P; + bool succ = false; + + for (unsigned m = 0; m < m_max; ++m) { + for (unsigned n = 0; n < Q; ++n) { + l->solutions[step] = ((m * P + n) << shift) + class_start; + succ = lcra_test_linear(l, l->solutions, step); + + if (succ) + break; + } + + if (succ) + break; + } + + /* Out of registers - prepare to spill */ + if (!succ) { + l->spill_class = l->class[step]; + return false; + } + } + + return true; +} + +/* Register spilling is implemented with a cost-benefit system. Costs are set + * by the user. Benefits are calculated from the constraints. */ + +static void +lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost) +{ + if (node < l->node_count) + l->spill_cost[node] = cost; +} + +static unsigned +lcra_count_constraints(struct lcra_state *l, unsigned i) +{ + unsigned count = 0; + unsigned *constraints = &l->linear[i * l->node_count]; + + for (unsigned j = 0; j < l->node_count; ++j) + count += util_bitcount(constraints[j]); + + return count; +} + +static signed +lcra_get_best_spill_node(struct lcra_state *l) +{ + /* If there are no constraints on a node, do not pick it to spill under + * any circumstance, or else we would hang rather than fail RA */ + float best_benefit = 0.0; + signed best_node = -1; + + for (unsigned i = 0; i < l->node_count; ++i) { + /* Find spillable nodes */ + if (l->class[i] != l->spill_class) + continue; + if (l->spill_cost[i] < 0) + continue; + + /* Adapted from Chaitin's heuristic */ + float constraints = lcra_count_constraints(l, i); + float cost = (l->spill_cost[i] + 1); + float benefit = constraints / cost; + + if (benefit > best_benefit) { + best_benefit = benefit; + best_node = i; + } + } + + return best_node; +} + struct phys_reg { /* Physical register: 0-31 */ unsigned reg; diff --git a/src/panfrost/util/lcra.c b/src/panfrost/util/lcra.c deleted file mode 100644 index 00585c646a6..00000000000 --- a/src/panfrost/util/lcra.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright (C) 2019 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors (Collabora): - * Alyssa Rosenzweig - */ - -#include "lcra.h" -#include -#include -#include -#include -#include -#include "util/macros.h" -#include "util/u_math.h" - -/* This module is the reference implementation of "Linearly Constrained - * Register Allocation". The paper is available in PDF form - * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX - * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md) - */ - -struct lcra_state * -lcra_alloc_equations(unsigned node_count, unsigned class_count) -{ - struct lcra_state *l = calloc(1, sizeof(*l)); - - l->node_count = node_count; - l->class_count = class_count; - - l->alignment = calloc(sizeof(l->alignment[0]), node_count); - l->linear = calloc(sizeof(l->linear[0]), node_count * node_count); - l->modulus = calloc(sizeof(l->modulus[0]), node_count); - l->class = calloc(sizeof(l->class[0]), node_count); - l->class_start = calloc(sizeof(l->class_start[0]), class_count); - l->class_disjoint = - calloc(sizeof(l->class_disjoint[0]), class_count * class_count); - l->class_size = calloc(sizeof(l->class_size[0]), class_count); - l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count); - l->solutions = calloc(sizeof(l->solutions[0]), node_count); - - memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count); - - return l; -} - -void -lcra_free(struct lcra_state *l) -{ - if (!l) - return; - - free(l->alignment); - free(l->linear); - free(l->modulus); - free(l->class); - free(l->class_start); - free(l->class_disjoint); - free(l->class_size); - free(l->spill_cost); - free(l->solutions); - - free(l); -} - -void -lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, - unsigned bound) -{ - l->alignment[node] = (align_log2 + 1) | (bound << 16); -} - -void -lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2) -{ - l->class_disjoint[(c1 * l->class_count) + c2] = true; - l->class_disjoint[(c2 * l->class_count) + c1] = true; -} - -void -lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len) -{ - if (node < l->node_count && l->alignment[node]) { - unsigned BA = l->alignment[node]; - unsigned alignment = (BA & 0xffff) - 1; - unsigned bound = BA >> 16; - l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment); - } -} - -void -lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, - unsigned j, unsigned cmask_j) -{ - if (i == j) - return; - - if (l->class_disjoint[(l->class[i] * l -> class_count) + l->class[j]]) - return; - - uint32_t constraint_fw = 0; - uint32_t constraint_bw = 0; - - for (unsigned D = 0; D < 16; ++D) { - if (cmask_i & (cmask_j << D)) { - constraint_bw |= (1 << (15 + D)); - constraint_fw |= (1 << (15 - D)); - } - - if (cmask_i & (cmask_j >> D)) { - constraint_fw |= (1 << (15 + D)); - constraint_bw |= (1 << (15 - D)); - } - } - - l->linear[j * l->node_count + i] |= constraint_fw; - l->linear[i * l->node_count + j] |= constraint_bw; -} - -static bool -lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i) -{ - unsigned *row = &l->linear[i * l->node_count]; - signed constant = solutions[i]; - - for (unsigned j = 0; j < l->node_count; ++j) { - if (solutions[j] == ~0) - continue; - - signed lhs = solutions[j] - constant; - - if (lhs < -15 || lhs > 15) - continue; - - if (row[j] & (1 << (lhs + 15))) - return false; - } - - return true; -} - -bool -lcra_solve(struct lcra_state *l) -{ - for (unsigned step = 0; step < l->node_count; ++step) { - if (l->solutions[step] != ~0) - continue; - if (l->alignment[step] == 0) - continue; - - unsigned _class = l->class[step]; - unsigned class_start = l->class_start[_class]; - - unsigned BA = l->alignment[step]; - unsigned shift = (BA & 0xffff) - 1; - unsigned bound = BA >> 16; - - unsigned P = bound >> shift; - unsigned Q = l->modulus[step]; - unsigned r_max = l->class_size[_class]; - unsigned k_max = r_max >> shift; - unsigned m_max = k_max / P; - bool succ = false; - - for (unsigned m = 0; m < m_max; ++m) { - for (unsigned n = 0; n < Q; ++n) { - l->solutions[step] = ((m * P + n) << shift) + class_start; - succ = lcra_test_linear(l, l->solutions, step); - - if (succ) - break; - } - - if (succ) - break; - } - - /* Out of registers - prepare to spill */ - if (!succ) { - l->spill_class = l->class[step]; - return false; - } - } - - return true; -} - -/* Register spilling is implemented with a cost-benefit system. Costs are set - * by the user. Benefits are calculated from the constraints. */ - -void -lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost) -{ - if (node < l->node_count) - l->spill_cost[node] = cost; -} - -static unsigned -lcra_count_constraints(struct lcra_state *l, unsigned i) -{ - unsigned count = 0; - unsigned *constraints = &l->linear[i * l->node_count]; - - for (unsigned j = 0; j < l->node_count; ++j) - count += util_bitcount(constraints[j]); - - return count; -} - -signed -lcra_get_best_spill_node(struct lcra_state *l) -{ - /* If there are no constraints on a node, do not pick it to spill under - * any circumstance, or else we would hang rather than fail RA */ - float best_benefit = 0.0; - signed best_node = -1; - - for (unsigned i = 0; i < l->node_count; ++i) { - /* Find spillable nodes */ - if (l->class[i] != l->spill_class) - continue; - if (l->spill_cost[i] < 0) - continue; - - /* Adapted from Chaitin's heuristic */ - float constraints = lcra_count_constraints(l, i); - float cost = (l->spill_cost[i] + 1); - float benefit = constraints / cost; - - if (benefit > best_benefit) { - best_benefit = benefit; - best_node = i; - } - } - - return best_node; -} diff --git a/src/panfrost/util/lcra.h b/src/panfrost/util/lcra.h deleted file mode 100644 index 0b1ed13400f..00000000000 --- a/src/panfrost/util/lcra.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2019 Collabora, Ltd. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Authors (Collabora): - * Alyssa Rosenzweig - */ - -#ifndef __LCRA_H -#define __LCRA_H - -#include -#include - -struct lcra_state { - unsigned node_count; - - /* Alignment for node in log2(bytes)+1. Since alignment must be - * non-negative power-of-two, the elements are strictly positive - * integers. Zero is the sentinel for a missing node. In upper word, - * bound. */ - unsigned *alignment; - - /* Linear constraints imposed. Nested array sized upfront, organized as - * linear[node_left][node_right]. That is, calculate indices as: - * - * Each element is itself a bit field denoting whether (c_j - c_i) bias - * is present or not, including negative biases. - * - * Note for Midgard, there are 16 components so the bias is in range - * [-15, 15] so encoded by 32-bit field. */ - - uint32_t *linear; - - /* Per node max modulus constraints */ - uint8_t *modulus; - - /* Classes allow nodes to be partitioned with a starting register. - * Classes cannot interfere; that is, they are true partitions in the - * usual sense of the word. class_count is the number of classes. - * class[] is indexed by a node to get the mapped class. class_start is - * biased to all solutions in the class. */ - - unsigned class_count; - unsigned *class; - unsigned *class_start; - unsigned *class_size; - bool *class_disjoint; - - /* Before solving, forced registers; after solving, solutions. */ - unsigned *solutions; - - /* For register spilling, the costs to spill nodes (as set by the user) - * are in spill_cost[], negative if a node is unspillable. Internally, - * spill_class specifies which class to spill (whichever class failed - * to allocate) */ - - signed *spill_cost; - unsigned spill_class; -}; - -struct lcra_state *lcra_alloc_equations(unsigned node_count, - unsigned class_count); - -void lcra_free(struct lcra_state *l); - -void lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2); - -void lcra_set_alignment(struct lcra_state *l, unsigned node, - unsigned align_log2, unsigned bound); - -void lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len); - -void lcra_add_node_interference(struct lcra_state *l, unsigned i, - unsigned cmask_i, unsigned j, unsigned cmask_j); - -bool lcra_solve(struct lcra_state *l); - -void lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost); - -signed lcra_get_best_spill_node(struct lcra_state *l); - -#endif diff --git a/src/panfrost/util/meson.build b/src/panfrost/util/meson.build index 300206603e5..1f0034af1e8 100644 --- a/src/panfrost/util/meson.build +++ b/src/panfrost/util/meson.build @@ -3,8 +3,6 @@ # SPDX-License-Identifier: MIT libpanfrost_util_files = files( - 'lcra.c', - 'lcra.h', 'pan_collect_varyings.c', 'pan_ir.c', 'pan_ir.h',