diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 1cbebdbef2e..edf0c105a19 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -184,6 +184,14 @@ typedef struct midgard_block { * boolean for passes to use as they see fit, provided they * clean up later */ bool visited; + + /* In liveness analysis, these are live masks (per-component) for + * indices for the block. Scalar compilers have the luxury of using + * simple bit fields, but for us, liveness is a vector idea. We use + * 8-bit to allow finegrained tracking up to vec8. If you're + * implementing vec16 on Panfrost... I'm sorry. */ + uint8_t *live_in; + uint8_t *live_out; } midgard_block; typedef struct midgard_bundle { diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index ebd085cd9a3..9d38054f691 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -26,6 +26,7 @@ #include "midgard_ops.h" #include "util/register_allocate.h" #include "util/u_math.h" +#include "util/u_memory.h" /* For work registers, we can subdivide in various ways. So we create * classes for the various sizes and conflict accordingly, keeping in @@ -516,6 +517,165 @@ mir_lower_special_reads(compiler_context *ctx) free(texw); } +/* Routines for liveness analysis */ + +static void +liveness_gen(uint8_t *live, unsigned node, unsigned max, unsigned mask) +{ + if ((node < 0) || (node >= max)) + return; + + live[node] |= mask; +} + +static void +liveness_kill(uint8_t *live, unsigned node, unsigned max, unsigned mask) +{ + if ((node < 0) || (node >= max)) + return; + + live[node] &= ~mask; +} + +/* Updates live_in for a single instruction */ + +static void +liveness_ins_update(uint8_t *live, midgard_instruction *ins, unsigned max) +{ + /* live_in[s] = GEN[s] + (live_out[s] - KILL[s]) */ + + liveness_kill(live, ins->ssa_args.dest, max, ins->mask); + + mir_foreach_src(ins, src) { + unsigned node = ins->ssa_args.src[src]; + unsigned mask = mir_mask_of_read_components(ins, node); + + liveness_gen(live, node, max, mask); + } +} + +/* live_out[s] = sum { p in succ[s] } ( live_in[p] ) */ + +static void +liveness_block_live_out(compiler_context *ctx, midgard_block *blk) +{ + mir_foreach_successor(blk, succ) { + for (unsigned i = 0; i < ctx->temp_count; ++i) + blk->live_out[i] |= succ->live_in[i]; + } +} + +/* Liveness analysis is a backwards-may dataflow analysis pass. Within a block, + * we compute live_out from live_in. The intrablock pass is linear-time. It + * returns whether progress was made. */ + +static bool +liveness_block_update(compiler_context *ctx, midgard_block *blk) +{ + bool progress = false; + + liveness_block_live_out(ctx, blk); + + uint8_t *live = mem_dup(blk->live_out, ctx->temp_count); + + mir_foreach_instr_in_block_rev(blk, ins) + liveness_ins_update(live, ins, ctx->temp_count); + + /* To figure out progress, diff live_in */ + + for (unsigned i = 0; (i < ctx->temp_count) && !progress; ++i) + progress |= (blk->live_in[i] != live[i]); + + free(blk->live_in); + blk->live_in = live; + + return progress; +} + +/* Globally, liveness analysis uses a fixed-point algorithm based on a + * worklist. We initialize a work list with the exit block. We iterate the work + * list to compute live_in from live_out for each block on the work list, + * adding the predecessors of the block to the work list if we made progress. + */ + +static void +mir_compute_liveness( + compiler_context *ctx, + struct ra_graph *g) +{ + /* List of midgard_block */ + struct set *work_list; + + work_list = _mesa_set_create(ctx, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + /* Allocate */ + + mir_foreach_block(ctx, block) { + block->live_in = calloc(ctx->temp_count, 1); + block->live_out = calloc(ctx->temp_count, 1); + } + + /* Initialize the work list with the exit block */ + struct set_entry *cur; + + midgard_block *exit = mir_exit_block(ctx); + cur = _mesa_set_add(work_list, exit); + + /* Iterate the work list */ + + do { + /* Pop off a block */ + midgard_block *blk = (struct midgard_block *) cur->key; + _mesa_set_remove(work_list, cur); + + /* Update its liveness information */ + bool progress = liveness_block_update(ctx, blk); + + /* If we made progress, we need to process the predecessors */ + + if (progress || (blk == exit)) { + mir_foreach_predecessor(blk, pred) + _mesa_set_add(work_list, pred); + } + } while((cur = _mesa_set_next_entry(work_list, NULL)) != NULL); + + /* Now that every block has live_in/live_out computed, we can determine + * interference by walking each block linearly. Take live_out at the + * end of each block and walk the block backwards. */ + + mir_foreach_block(ctx, blk) { + uint8_t *live = calloc(ctx->temp_count, 1); + + mir_foreach_successor(blk, succ) { + for (unsigned i = 0; i < ctx->temp_count; ++i) + live[i] |= succ->live_in[i]; + } + + mir_foreach_instr_in_block_rev(blk, ins) { + /* Mark all registers live after the instruction as + * interfering with the destination */ + + unsigned dest = ins->ssa_args.dest; + + if (dest >= 0 && dest < ctx->temp_count) { + for (unsigned i = 0; i < ctx->temp_count; ++i) + if (live[i]) + ra_add_node_interference(g, dest, i); + } + + /* Update live_in */ + liveness_ins_update(live, ins, ctx->temp_count); + } + } + + mir_foreach_block(ctx, blk) { + free(blk->live_in); + free(blk->live_out); + } +} + /* This routine performs the actual register allocation. It should be succeeded * by install_registers */ @@ -605,76 +765,7 @@ allocate_registers(compiler_context *ctx, bool *spilled) ra_set_node_class(g, i, classes[class]); } - /* Determine liveness */ - - int *live_start = malloc(nodes * sizeof(int)); - int *live_end = malloc(nodes * sizeof(int)); - - /* Initialize as non-existent */ - - for (int i = 0; i < nodes; ++i) { - live_start[i] = live_end[i] = -1; - } - - int d = 0; - - mir_foreach_block(ctx, block) { - mir_foreach_instr_in_block(block, ins) { - if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) { - /* If this destination is not yet live, it is - * now since we just wrote it */ - - int dest = ins->ssa_args.dest; - - if (dest >= 0 && live_start[dest] == -1) - live_start[dest] = d; - } - - /* Since we just used a source, the source might be - * dead now. Scan the rest of the block for - * invocations, and if there are none, the source dies - * */ - - for (int src = 0; src < ARRAY_SIZE(ins->ssa_args.src); ++src) { - int s = ins->ssa_args.src[src]; - - if (s < 0) continue; - - if (s >= SSA_FIXED_MINIMUM) continue; - - if (!mir_is_live_after(ctx, block, ins, s)) { - live_end[s] = d; - } - } - - ++d; - } - } - - /* If a node still hasn't been killed, kill it now */ - - for (int i = 0; i < nodes; ++i) { - /* live_start == -1 most likely indicates a pinned output */ - - if (live_end[i] == -1) - live_end[i] = d; - } - - /* Setup interference between nodes that are live at the same time */ - - for (int i = 0; i < nodes; ++i) { - for (int j = i + 1; j < nodes; ++j) { - bool j_overlaps_i = live_start[j] < live_end[i]; - bool i_overlaps_j = live_end[j] < live_start[i]; - - if (i_overlaps_j || j_overlaps_i) - ra_add_node_interference(g, i, j); - } - } - - /* Cleanup */ - free(live_start); - free(live_end); + mir_compute_liveness(ctx, g); if (!ra_allocate(g)) { *spilled = true;