pan/util: Move lcra to midgard

This is only used by midgard, bifrost have its own custom version of this. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Acked-by: Boris Brezillon <boris.brezillon@collabora.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Acked-by: Eric R. Smith <eric.smith@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32650>
2024-12-16 09:27:48 +01:00
parent 22f70f20c7
commit 49261faac8
5 changed files with 269 additions and 360 deletions
@@ -38,7 +38,6 @@

 #include "compiler/glsl_types.h"
 #include "compiler/nir/nir.h"
-#include "panfrost/util/lcra.h"
 #include "panfrost/util/pan_ir.h"

 /* Forward declare */
@@ -28,6 +28,275 @@
 #include "midgard_ops.h"
 #include "midgard_quirks.h"

+struct lcra_state {
+   unsigned node_count;
+
+   /* Alignment for node in log2(bytes)+1. Since alignment must be
+    * non-negative power-of-two, the elements are strictly positive
+    * integers. Zero is the sentinel for a missing node. In upper word,
+    * bound. */
+   unsigned *alignment;
+
+   /* Linear constraints imposed. Nested array sized upfront, organized as
+    * linear[node_left][node_right]. That is, calculate indices as:
+    *
+    * Each element is itself a bit field denoting whether (c_j - c_i) bias
+    * is present or not, including negative biases.
+    *
+    * Note for Midgard, there are 16 components so the bias is in range
+    * [-15, 15] so encoded by 32-bit field. */
+
+   uint32_t *linear;
+
+   /* Per node max modulus constraints */
+   uint8_t *modulus;
+
+   /* Classes allow nodes to be partitioned with a starting register.
+    * Classes cannot interfere; that is, they are true partitions in the
+    * usual sense of the word. class_count is the number of classes.
+    * class[] is indexed by a node to get the mapped class. class_start is
+    * biased to all solutions in the class. */
+
+   unsigned class_count;
+   unsigned *class;
+   unsigned *class_start;
+   unsigned *class_size;
+   bool *class_disjoint;
+
+   /* Before solving, forced registers; after solving, solutions. */
+   unsigned *solutions;
+
+   /* For register spilling, the costs to spill nodes (as set by the user)
+    * are in spill_cost[], negative if a node is unspillable. Internally,
+    * spill_class specifies which class to spill (whichever class failed
+    * to allocate) */
+
+   signed *spill_cost;
+   unsigned spill_class;
+};
+
+/* This module is the reference implementation of "Linearly Constrained
+ * Register Allocation". The paper is available in PDF form
+ * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
+ * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
+ */
+
+static struct lcra_state *
+lcra_alloc_equations(unsigned node_count, unsigned class_count)
+{
+   struct lcra_state *l = calloc(1, sizeof(*l));
+
+   l->node_count = node_count;
+   l->class_count = class_count;
+
+   l->alignment = calloc(sizeof(l->alignment[0]), node_count);
+   l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
+   l->modulus = calloc(sizeof(l->modulus[0]), node_count);
+   l->class = calloc(sizeof(l->class[0]), node_count);
+   l->class_start = calloc(sizeof(l->class_start[0]), class_count);
+   l->class_disjoint =
+      calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
+   l->class_size = calloc(sizeof(l->class_size[0]), class_count);
+   l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
+   l->solutions = calloc(sizeof(l->solutions[0]), node_count);
+
+   memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
+
+   return l;
+}
+
+static void
+lcra_free(struct lcra_state *l)
+{
+   if (!l)
+      return;
+
+   free(l->alignment);
+   free(l->linear);
+   free(l->modulus);
+   free(l->class);
+   free(l->class_start);
+   free(l->class_disjoint);
+   free(l->class_size);
+   free(l->spill_cost);
+   free(l->solutions);
+
+   free(l);
+}
+
+static void
+lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2,
+                   unsigned bound)
+{
+   l->alignment[node] = (align_log2 + 1) | (bound << 16);
+}
+
+static void
+lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2)
+{
+   l->class_disjoint[(c1 * l->class_count) + c2] = true;
+   l->class_disjoint[(c2 * l->class_count) + c1] = true;
+}
+
+static void
+lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len)
+{
+   if (node < l->node_count && l->alignment[node]) {
+      unsigned BA = l->alignment[node];
+      unsigned alignment = (BA & 0xffff) - 1;
+      unsigned bound = BA >> 16;
+      l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
+   }
+}
+
+static void
+lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i,
+                           unsigned j, unsigned cmask_j)
+{
+   if (i == j)
+      return;
+
+   if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]])
+      return;
+
+   uint32_t constraint_fw = 0;
+   uint32_t constraint_bw = 0;
+
+   for (unsigned D = 0; D < 16; ++D) {
+      if (cmask_i & (cmask_j << D)) {
+         constraint_bw |= (1 << (15 + D));
+         constraint_fw |= (1 << (15 - D));
+      }
+
+      if (cmask_i & (cmask_j >> D)) {
+         constraint_fw |= (1 << (15 + D));
+         constraint_bw |= (1 << (15 - D));
+      }
+   }
+
+   l->linear[j * l->node_count + i] |= constraint_fw;
+   l->linear[i * l->node_count + j] |= constraint_bw;
+}
+
+static bool
+lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
+{
+   unsigned *row = &l->linear[i * l->node_count];
+   signed constant = solutions[i];
+
+   for (unsigned j = 0; j < l->node_count; ++j) {
+      if (solutions[j] == ~0)
+         continue;
+
+      signed lhs = solutions[j] - constant;
+
+      if (lhs < -15 || lhs > 15)
+         continue;
+
+      if (row[j] & (1 << (lhs + 15)))
+         return false;
+   }
+
+   return true;
+}
+
+static bool
+lcra_solve(struct lcra_state *l)
+{
+   for (unsigned step = 0; step < l->node_count; ++step) {
+      if (l->solutions[step] != ~0)
+         continue;
+      if (l->alignment[step] == 0)
+         continue;
+
+      unsigned _class = l->class[step];
+      unsigned class_start = l->class_start[_class];
+
+      unsigned BA = l->alignment[step];
+      unsigned shift = (BA & 0xffff) - 1;
+      unsigned bound = BA >> 16;
+
+      unsigned P = bound >> shift;
+      unsigned Q = l->modulus[step];
+      unsigned r_max = l->class_size[_class];
+      unsigned k_max = r_max >> shift;
+      unsigned m_max = k_max / P;
+      bool succ = false;
+
+      for (unsigned m = 0; m < m_max; ++m) {
+         for (unsigned n = 0; n < Q; ++n) {
+            l->solutions[step] = ((m * P + n) << shift) + class_start;
+            succ = lcra_test_linear(l, l->solutions, step);
+
+            if (succ)
+               break;
+         }
+
+         if (succ)
+            break;
+      }
+
+      /* Out of registers - prepare to spill */
+      if (!succ) {
+         l->spill_class = l->class[step];
+         return false;
+      }
+   }
+
+   return true;
+}
+
+/* Register spilling is implemented with a cost-benefit system. Costs are set
+ * by the user. Benefits are calculated from the constraints. */
+
+static void
+lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost)
+{
+   if (node < l->node_count)
+      l->spill_cost[node] = cost;
+}
+
+static unsigned
+lcra_count_constraints(struct lcra_state *l, unsigned i)
+{
+   unsigned count = 0;
+   unsigned *constraints = &l->linear[i * l->node_count];
+
+   for (unsigned j = 0; j < l->node_count; ++j)
+      count += util_bitcount(constraints[j]);
+
+   return count;
+}
+
+static signed
+lcra_get_best_spill_node(struct lcra_state *l)
+{
+   /* If there are no constraints on a node, do not pick it to spill under
+    * any circumstance, or else we would hang rather than fail RA */
+   float best_benefit = 0.0;
+   signed best_node = -1;
+
+   for (unsigned i = 0; i < l->node_count; ++i) {
+      /* Find spillable nodes */
+      if (l->class[i] != l->spill_class)
+         continue;
+      if (l->spill_cost[i] < 0)
+         continue;
+
+      /* Adapted from Chaitin's heuristic */
+      float constraints = lcra_count_constraints(l, i);
+      float cost = (l->spill_cost[i] + 1);
+      float benefit = constraints / cost;
+
+      if (benefit > best_benefit) {
+         best_benefit = benefit;
+         best_node = i;
+      }
+   }
+
+   return best_node;
+}
+
 struct phys_reg {
   /* Physical register: 0-31 */
   unsigned reg;
@@ -1,256 +0,0 @@
-/*
- * Copyright (C) 2019 Collabora, Ltd.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors (Collabora):
- *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
- */
-
-#include "lcra.h"
-#include <assert.h>
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "util/macros.h"
-#include "util/u_math.h"
-
-/* This module is the reference implementation of "Linearly Constrained
- * Register Allocation". The paper is available in PDF form
- * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
- * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
- */
-
-struct lcra_state *
-lcra_alloc_equations(unsigned node_count, unsigned class_count)
-{
-   struct lcra_state *l = calloc(1, sizeof(*l));
-
-   l->node_count = node_count;
-   l->class_count = class_count;
-
-   l->alignment = calloc(sizeof(l->alignment[0]), node_count);
-   l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
-   l->modulus = calloc(sizeof(l->modulus[0]), node_count);
-   l->class = calloc(sizeof(l->class[0]), node_count);
-   l->class_start = calloc(sizeof(l->class_start[0]), class_count);
-   l->class_disjoint =
-      calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
-   l->class_size = calloc(sizeof(l->class_size[0]), class_count);
-   l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
-   l->solutions = calloc(sizeof(l->solutions[0]), node_count);
-
-   memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
-
-   return l;
-}
-
-void
-lcra_free(struct lcra_state *l)
-{
-   if (!l)
-      return;
-
-   free(l->alignment);
-   free(l->linear);
-   free(l->modulus);
-   free(l->class);
-   free(l->class_start);
-   free(l->class_disjoint);
-   free(l->class_size);
-   free(l->spill_cost);
-   free(l->solutions);
-
-   free(l);
-}
-
-void
-lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2,
-                   unsigned bound)
-{
-   l->alignment[node] = (align_log2 + 1) | (bound << 16);
-}
-
-void
-lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2)
-{
-   l->class_disjoint[(c1 * l->class_count) + c2] = true;
-   l->class_disjoint[(c2 * l->class_count) + c1] = true;
-}
-
-void
-lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len)
-{
-   if (node < l->node_count && l->alignment[node]) {
-      unsigned BA = l->alignment[node];
-      unsigned alignment = (BA & 0xffff) - 1;
-      unsigned bound = BA >> 16;
-      l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
-   }
-}
-
-void
-lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i,
-                           unsigned j, unsigned cmask_j)
-{
-   if (i == j)
-      return;
-
-   if (l->class_disjoint[(l->class[i] * l -> class_count) + l->class[j]])
-      return;
-
-   uint32_t constraint_fw = 0;
-   uint32_t constraint_bw = 0;
-
-   for (unsigned D = 0; D < 16; ++D) {
-      if (cmask_i & (cmask_j << D)) {
-         constraint_bw |= (1 << (15 + D));
-         constraint_fw |= (1 << (15 - D));
-      }
-
-      if (cmask_i & (cmask_j >> D)) {
-         constraint_fw |= (1 << (15 + D));
-         constraint_bw |= (1 << (15 - D));
-      }
-   }
-
-   l->linear[j * l->node_count + i] |= constraint_fw;
-   l->linear[i * l->node_count + j] |= constraint_bw;
-}
-
-static bool
-lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
-{
-   unsigned *row = &l->linear[i * l->node_count];
-   signed constant = solutions[i];
-
-   for (unsigned j = 0; j < l->node_count; ++j) {
-      if (solutions[j] == ~0)
-         continue;
-
-      signed lhs = solutions[j] - constant;
-
-      if (lhs < -15 || lhs > 15)
-         continue;
-
-      if (row[j] & (1 << (lhs + 15)))
-         return false;
-   }
-
-   return true;
-}
-
-bool
-lcra_solve(struct lcra_state *l)
-{
-   for (unsigned step = 0; step < l->node_count; ++step) {
-      if (l->solutions[step] != ~0)
-         continue;
-      if (l->alignment[step] == 0)
-         continue;
-
-      unsigned _class = l->class[step];
-      unsigned class_start = l->class_start[_class];
-
-      unsigned BA = l->alignment[step];
-      unsigned shift = (BA & 0xffff) - 1;
-      unsigned bound = BA >> 16;
-
-      unsigned P = bound >> shift;
-      unsigned Q = l->modulus[step];
-      unsigned r_max = l->class_size[_class];
-      unsigned k_max = r_max >> shift;
-      unsigned m_max = k_max / P;
-      bool succ = false;
-
-      for (unsigned m = 0; m < m_max; ++m) {
-         for (unsigned n = 0; n < Q; ++n) {
-            l->solutions[step] = ((m * P + n) << shift) + class_start;
-            succ = lcra_test_linear(l, l->solutions, step);
-
-            if (succ)
-               break;
-         }
-
-         if (succ)
-            break;
-      }
-
-      /* Out of registers - prepare to spill */
-      if (!succ) {
-         l->spill_class = l->class[step];
-         return false;
-      }
-   }
-
-   return true;
-}
-
-/* Register spilling is implemented with a cost-benefit system. Costs are set
- * by the user. Benefits are calculated from the constraints. */
-
-void
-lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost)
-{
-   if (node < l->node_count)
-      l->spill_cost[node] = cost;
-}
-
-static unsigned
-lcra_count_constraints(struct lcra_state *l, unsigned i)
-{
-   unsigned count = 0;
-   unsigned *constraints = &l->linear[i * l->node_count];
-
-   for (unsigned j = 0; j < l->node_count; ++j)
-      count += util_bitcount(constraints[j]);
-
-   return count;
-}
-
-signed
-lcra_get_best_spill_node(struct lcra_state *l)
-{
-   /* If there are no constraints on a node, do not pick it to spill under
-    * any circumstance, or else we would hang rather than fail RA */
-   float best_benefit = 0.0;
-   signed best_node = -1;
-
-   for (unsigned i = 0; i < l->node_count; ++i) {
-      /* Find spillable nodes */
-      if (l->class[i] != l->spill_class)
-         continue;
-      if (l->spill_cost[i] < 0)
-         continue;
-
-      /* Adapted from Chaitin's heuristic */
-      float constraints = lcra_count_constraints(l, i);
-      float cost = (l->spill_cost[i] + 1);
-      float benefit = constraints / cost;
-
-      if (benefit > best_benefit) {
-         best_benefit = benefit;
-         best_node = i;
-      }
-   }
-
-   return best_node;
-}
@@ -1,101 +0,0 @@
-/*
- * Copyright (C) 2019 Collabora, Ltd.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors (Collabora):
- *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
- */
-
-#ifndef __LCRA_H
-#define __LCRA_H
-
-#include <stdbool.h>
-#include <stdint.h>
-
-struct lcra_state {
-   unsigned node_count;
-
-   /* Alignment for node in log2(bytes)+1. Since alignment must be
-    * non-negative power-of-two, the elements are strictly positive
-    * integers. Zero is the sentinel for a missing node. In upper word,
-    * bound. */
-   unsigned *alignment;
-
-   /* Linear constraints imposed. Nested array sized upfront, organized as
-    * linear[node_left][node_right]. That is, calculate indices as:
-    *
-    * Each element is itself a bit field denoting whether (c_j - c_i) bias
-    * is present or not, including negative biases.
-    *
-    * Note for Midgard, there are 16 components so the bias is in range
-    * [-15, 15] so encoded by 32-bit field. */
-
-   uint32_t *linear;
-
-   /* Per node max modulus constraints */
-   uint8_t *modulus;
-
-   /* Classes allow nodes to be partitioned with a starting register.
-    * Classes cannot interfere; that is, they are true partitions in the
-    * usual sense of the word. class_count is the number of classes.
-    * class[] is indexed by a node to get the mapped class. class_start is
-    * biased to all solutions in the class. */
-
-   unsigned class_count;
-   unsigned *class;
-   unsigned *class_start;
-   unsigned *class_size;
-   bool *class_disjoint;
-
-   /* Before solving, forced registers; after solving, solutions. */
-   unsigned *solutions;
-
-   /* For register spilling, the costs to spill nodes (as set by the user)
-    * are in spill_cost[], negative if a node is unspillable. Internally,
-    * spill_class specifies which class to spill (whichever class failed
-    * to allocate) */
-
-   signed *spill_cost;
-   unsigned spill_class;
-};
-
-struct lcra_state *lcra_alloc_equations(unsigned node_count,
-                                        unsigned class_count);
-
-void lcra_free(struct lcra_state *l);
-
-void lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2);
-
-void lcra_set_alignment(struct lcra_state *l, unsigned node,
-                        unsigned align_log2, unsigned bound);
-
-void lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len);
-
-void lcra_add_node_interference(struct lcra_state *l, unsigned i,
-                                unsigned cmask_i, unsigned j, unsigned cmask_j);
-
-bool lcra_solve(struct lcra_state *l);
-
-void lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost);
-
-signed lcra_get_best_spill_node(struct lcra_state *l);
-
-#endif
@@ -3,8 +3,6 @@
 # SPDX-License-Identifier: MIT

 libpanfrost_util_files = files(
-  'lcra.c',
-  'lcra.h',
  'pan_collect_varyings.c',
  'pan_ir.c',
  'pan_ir.h',