panfrost/midgard: Allocate registers once (per-screen)
This should save a lot of per-compile time by using the RA the way it's actually supposed to be used. Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
@@ -67,7 +67,7 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m
|
||||
.alpha_ref = state->alpha_state.ref_value
|
||||
};
|
||||
|
||||
midgard_compile_shader_nir(s, &program, false);
|
||||
midgard_compile_shader_nir(&ctx->compiler, s, &program, false);
|
||||
|
||||
/* Prepare the compiled binary for upload */
|
||||
int size = program.compiled.size;
|
||||
|
||||
@@ -170,7 +170,7 @@ panfrost_compile_blend_shader(
|
||||
/* Compile the built shader */
|
||||
|
||||
midgard_program program;
|
||||
midgard_compile_shader_nir(shader, &program, true);
|
||||
midgard_compile_shader_nir(&ctx->compiler, shader, &program, true);
|
||||
|
||||
/* Upload the shader */
|
||||
|
||||
|
||||
@@ -91,6 +91,9 @@ struct panfrost_context {
|
||||
/* Gallium context */
|
||||
struct pipe_context base;
|
||||
|
||||
/* Compiler context */
|
||||
struct midgard_screen compiler;
|
||||
|
||||
/* Bound job and map of panfrost_job_key to jobs */
|
||||
struct panfrost_job *job;
|
||||
struct hash_table *jobs;
|
||||
|
||||
@@ -188,6 +188,9 @@ typedef struct compiler_context {
|
||||
nir_shader *nir;
|
||||
gl_shader_stage stage;
|
||||
|
||||
/* The screen we correspond to */
|
||||
struct midgard_screen *screen;
|
||||
|
||||
/* Is internally a blend shader? Depends on stage == FRAGMENT */
|
||||
bool is_blend;
|
||||
|
||||
|
||||
@@ -2395,7 +2395,7 @@ midgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx)
|
||||
}
|
||||
|
||||
int
|
||||
midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend)
|
||||
midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midgard_program *program, bool is_blend)
|
||||
{
|
||||
struct util_dynarray *compiled = &program->compiled;
|
||||
|
||||
@@ -2403,6 +2403,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
|
||||
|
||||
compiler_context ictx = {
|
||||
.nir = nir,
|
||||
.screen = screen,
|
||||
.stage = nir->info.stage,
|
||||
|
||||
.is_blend = is_blend,
|
||||
|
||||
@@ -26,6 +26,24 @@
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/register_allocate.h"
|
||||
|
||||
/* To be shoved inside panfrost_screen for the Gallium driver, or somewhere
|
||||
* else for Vulkan/standalone. The single compiler "screen" to be shared across
|
||||
* all shader compiles, used to store complex initialization (for instance,
|
||||
* related to register allocation) */
|
||||
|
||||
struct midgard_screen {
|
||||
/* Precomputed register allocation sets for varying numbers of work
|
||||
* registers. The zeroeth entry corresponds to 8 work registers. The
|
||||
* eighth entry corresponds to 16 work registers. NULL if this set has
|
||||
* not been allocated yet. */
|
||||
|
||||
struct ra_regs *regs[9];
|
||||
|
||||
/* Work register classes corresponds to the above register sets */
|
||||
unsigned reg_classes[9][4];
|
||||
};
|
||||
|
||||
/* Define the general compiler entry point */
|
||||
|
||||
@@ -92,7 +110,7 @@ typedef struct {
|
||||
} midgard_program;
|
||||
|
||||
int
|
||||
midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend);
|
||||
midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midgard_program *program, bool is_blend);
|
||||
|
||||
/* NIR options are shared between the standalone compiler and the online
|
||||
* compiler. Defining it here is the simplest, though maybe not the Right
|
||||
|
||||
@@ -157,17 +157,12 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, int reg)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* This routine performs the actual register allocation. It should be succeeded
|
||||
* by install_registers */
|
||||
/* This routine creates a register set. Should be called infrequently since
|
||||
* it's slow and can be cached */
|
||||
|
||||
struct ra_graph *
|
||||
allocate_registers(compiler_context *ctx, bool *spilled)
|
||||
static struct ra_regs *
|
||||
create_register_set(unsigned work_count, unsigned *classes)
|
||||
{
|
||||
/* The number of vec4 work registers available depends on when the
|
||||
* uniforms start, so compute that first */
|
||||
|
||||
int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
|
||||
|
||||
int virtual_count = work_count * WORK_STRIDE;
|
||||
|
||||
/* First, initialize the RA */
|
||||
@@ -178,12 +173,10 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
||||
int work_vec2 = ra_alloc_reg_class(regs);
|
||||
int work_vec1 = ra_alloc_reg_class(regs);
|
||||
|
||||
unsigned classes[4] = {
|
||||
work_vec1,
|
||||
work_vec2,
|
||||
work_vec3,
|
||||
work_vec4
|
||||
};
|
||||
classes[0] = work_vec1;
|
||||
classes[1] = work_vec2;
|
||||
classes[2] = work_vec3;
|
||||
classes[3] = work_vec4;
|
||||
|
||||
/* Add the full set of work registers */
|
||||
for (unsigned i = 0; i < work_count; ++i) {
|
||||
@@ -217,6 +210,55 @@ allocate_registers(compiler_context *ctx, bool *spilled)
|
||||
/* We're done setting up */
|
||||
ra_set_finalize(regs, NULL);
|
||||
|
||||
return regs;
|
||||
}
|
||||
|
||||
/* This routine gets a precomputed register set off the screen if it's able, or otherwise it computes one on the fly */
|
||||
|
||||
static struct ra_regs *
|
||||
get_register_set(struct midgard_screen *screen, unsigned work_count, unsigned **classes)
|
||||
{
|
||||
/* Bounds check */
|
||||
assert(work_count >= 8);
|
||||
assert(work_count <= 16);
|
||||
|
||||
/* Compute index */
|
||||
unsigned index = work_count - 8;
|
||||
|
||||
/* Find the reg set */
|
||||
struct ra_regs *cached = screen->regs[index];
|
||||
|
||||
if (cached) {
|
||||
assert(screen->reg_classes[index]);
|
||||
*classes = screen->reg_classes[index];
|
||||
return cached;
|
||||
}
|
||||
|
||||
/* Otherwise, create one */
|
||||
struct ra_regs *created = create_register_set(work_count, screen->reg_classes[index]);
|
||||
|
||||
/* Cache it and use it */
|
||||
screen->regs[index] = created;
|
||||
|
||||
*classes = screen->reg_classes[index];
|
||||
return created;
|
||||
}
|
||||
|
||||
/* This routine performs the actual register allocation. It should be succeeded
|
||||
* by install_registers */
|
||||
|
||||
struct ra_graph *
|
||||
allocate_registers(compiler_context *ctx, bool *spilled)
|
||||
{
|
||||
/* The number of vec4 work registers available depends on when the
|
||||
* uniforms start, so compute that first */
|
||||
int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
|
||||
unsigned *classes = NULL;
|
||||
struct ra_regs *regs = get_register_set(ctx->screen, work_count, &classes);
|
||||
|
||||
assert(regs != NULL);
|
||||
assert(classes != NULL);
|
||||
|
||||
/* No register allocation to do with no SSA */
|
||||
|
||||
if (!ctx->temp_count)
|
||||
|
||||
Reference in New Issue
Block a user