panfrost/midgard: Allocate registers once (per-screen)

This should save a lot of per-compile time by using the RA the way it's
actually supposed to be used.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
This commit is contained in:
Alyssa Rosenzweig
2019-07-23 07:59:00 -07:00
parent 772a5f9814
commit 840b806d64
7 changed files with 86 additions and 19 deletions
+1 -1
View File
@@ -67,7 +67,7 @@ panfrost_shader_compile(struct panfrost_context *ctx, struct mali_shader_meta *m
.alpha_ref = state->alpha_state.ref_value
};
midgard_compile_shader_nir(s, &program, false);
midgard_compile_shader_nir(&ctx->compiler, s, &program, false);
/* Prepare the compiled binary for upload */
int size = program.compiled.size;
@@ -170,7 +170,7 @@ panfrost_compile_blend_shader(
/* Compile the built shader */
midgard_program program;
midgard_compile_shader_nir(shader, &program, true);
midgard_compile_shader_nir(&ctx->compiler, shader, &program, true);
/* Upload the shader */
@@ -91,6 +91,9 @@ struct panfrost_context {
/* Gallium context */
struct pipe_context base;
/* Compiler context */
struct midgard_screen compiler;
/* Bound job and map of panfrost_job_key to jobs */
struct panfrost_job *job;
struct hash_table *jobs;
+3
View File
@@ -188,6 +188,9 @@ typedef struct compiler_context {
nir_shader *nir;
gl_shader_stage stage;
/* The screen we correspond to */
struct midgard_screen *screen;
/* Is internally a blend shader? Depends on stage == FRAGMENT */
bool is_blend;
+2 -1
View File
@@ -2395,7 +2395,7 @@ midgard_get_first_tag_from_block(compiler_context *ctx, unsigned block_idx)
}
int
midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend)
midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midgard_program *program, bool is_blend)
{
struct util_dynarray *compiled = &program->compiled;
@@ -2403,6 +2403,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl
compiler_context ictx = {
.nir = nir,
.screen = screen,
.stage = nir->info.stage,
.is_blend = is_blend,
+19 -1
View File
@@ -26,6 +26,24 @@
#include "compiler/nir/nir.h"
#include "util/u_dynarray.h"
#include "util/register_allocate.h"
/* To be shoved inside panfrost_screen for the Gallium driver, or somewhere
* else for Vulkan/standalone. The single compiler "screen" to be shared across
* all shader compiles, used to store complex initialization (for instance,
* related to register allocation) */
struct midgard_screen {
/* Precomputed register allocation sets for varying numbers of work
* registers. The zeroeth entry corresponds to 8 work registers. The
* eighth entry corresponds to 16 work registers. NULL if this set has
* not been allocated yet. */
struct ra_regs *regs[9];
/* Work register classes corresponds to the above register sets */
unsigned reg_classes[9][4];
};
/* Define the general compiler entry point */
@@ -92,7 +110,7 @@ typedef struct {
} midgard_program;
int
midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_blend);
midgard_compile_shader_nir(struct midgard_screen *screen, nir_shader *nir, midgard_program *program, bool is_blend);
/* NIR options are shared between the standalone compiler and the online
* compiler. Defining it here is the simplest, though maybe not the Right
+57 -15
View File
@@ -157,17 +157,12 @@ index_to_reg(compiler_context *ctx, struct ra_graph *g, int reg)
return r;
}
/* This routine performs the actual register allocation. It should be succeeded
* by install_registers */
/* This routine creates a register set. Should be called infrequently since
* it's slow and can be cached */
struct ra_graph *
allocate_registers(compiler_context *ctx, bool *spilled)
static struct ra_regs *
create_register_set(unsigned work_count, unsigned *classes)
{
/* The number of vec4 work registers available depends on when the
* uniforms start, so compute that first */
int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
int virtual_count = work_count * WORK_STRIDE;
/* First, initialize the RA */
@@ -178,12 +173,10 @@ allocate_registers(compiler_context *ctx, bool *spilled)
int work_vec2 = ra_alloc_reg_class(regs);
int work_vec1 = ra_alloc_reg_class(regs);
unsigned classes[4] = {
work_vec1,
work_vec2,
work_vec3,
work_vec4
};
classes[0] = work_vec1;
classes[1] = work_vec2;
classes[2] = work_vec3;
classes[3] = work_vec4;
/* Add the full set of work registers */
for (unsigned i = 0; i < work_count; ++i) {
@@ -217,6 +210,55 @@ allocate_registers(compiler_context *ctx, bool *spilled)
/* We're done setting up */
ra_set_finalize(regs, NULL);
return regs;
}
/* This routine gets a precomputed register set off the screen if it's able, or otherwise it computes one on the fly */
static struct ra_regs *
get_register_set(struct midgard_screen *screen, unsigned work_count, unsigned **classes)
{
/* Bounds check */
assert(work_count >= 8);
assert(work_count <= 16);
/* Compute index */
unsigned index = work_count - 8;
/* Find the reg set */
struct ra_regs *cached = screen->regs[index];
if (cached) {
assert(screen->reg_classes[index]);
*classes = screen->reg_classes[index];
return cached;
}
/* Otherwise, create one */
struct ra_regs *created = create_register_set(work_count, screen->reg_classes[index]);
/* Cache it and use it */
screen->regs[index] = created;
*classes = screen->reg_classes[index];
return created;
}
/* This routine performs the actual register allocation. It should be succeeded
* by install_registers */
struct ra_graph *
allocate_registers(compiler_context *ctx, bool *spilled)
{
/* The number of vec4 work registers available depends on when the
* uniforms start, so compute that first */
int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
unsigned *classes = NULL;
struct ra_regs *regs = get_register_set(ctx->screen, work_count, &classes);
assert(regs != NULL);
assert(classes != NULL);
/* No register allocation to do with no SSA */
if (!ctx->temp_count)