From 93547d45ceb0a59f429f6029b339c044f8aaabaa Mon Sep 17 00:00:00 2001 From: Mark Collins Date: Tue, 15 Apr 2025 13:01:56 +0200 Subject: [PATCH] ir3/a7xx: Add post-RA pass to track liveness and insert (last) Introduces a backwards dataflow analysis pass to determine when a certain register is always written to prior to being read in a similar manner to SSA liveness but performed after RA which we can use to determine when we can insert (last) on src regs on A7XX. Passing VK-CTS: dEQP-VK.pipeline.* Signed-off-by: Mark Collins Co-Authored-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3.h | 9 +++- src/freedreno/ir3/ir3_legalize.c | 90 ++++++++++++++++++++++++++++++++ src/freedreno/ir3/ir3_print.c | 2 + src/freedreno/ir3/ir3_validate.c | 3 +- 4 files changed, 101 insertions(+), 3 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 1f05a2ab276..311e43b8603 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -148,8 +148,13 @@ typedef enum ir3_register_flags { IR3_REG_EARLY_CLOBBER = BIT(17), /* If this is the last usage of a specific value in the register, the - * register cannot be read without being written to first after this. - * Note: This effectively has the same semantics as IR3_REG_KILL. + * register cannot be read without being written to first after this. + * This maps to the "(last)" attribute on source GPRs in shader + * instructions which was introduced in A7XX. + * + * Note: This effectively has the same semantics as IR3_REG_KILL but + * is tracked after register assignment. Additionally, this doesn't + * cover any const or shared registers. */ IR3_REG_LAST_USE = BIT(18), diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 996fef913ee..21d1e16e78e 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -1720,6 +1720,89 @@ helper_sched(struct ir3_legalize_ctx *ctx, struct ir3 *ir, } } +struct ir3_last_block_data { + /* Whether a read will be done on a register at a later point, it is + * considered safe to set (last) when this is false for a particular + * register. + */ + regmask_t will_read; +}; + +/* Use a backwards dataflow analysis to determine when a certain register is + * always written to prior to being read in a similar manner to SSA liveness + * which we can use to determine when we can insert (last) on src regs. + */ +static void +track_last(struct ir3_legalize_ctx *ctx, struct ir3 *ir, + struct ir3_shader_variant *so) +{ + foreach_block (block, &ir->block_list) { + struct ir3_last_block_data *bd = rzalloc(ctx, struct ir3_last_block_data); + + regmask_init(&bd->will_read, so->mergedregs); + + block->data = bd; + } + + bool progress; + do { + progress = false; + regmask_t will_read; + regmask_init(&will_read, so->mergedregs); + + foreach_block_rev (block, &ir->block_list) { + struct ir3_last_block_data *bd = block->data; + + for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) { + struct ir3_block *succ = block->successors[i]; + if (!succ) + continue; + + struct ir3_last_block_data *succ_bd = succ->data; + regmask_or(&will_read, &will_read, &succ_bd->will_read); + } + + foreach_instr_rev (instr, &block->instr_list) { + for (unsigned i = 0; i < instr->dsts_count; i++) { + struct ir3_register *dst = instr->dsts[i]; + if (dst->flags & (IR3_REG_IMMED | IR3_REG_CONST | + IR3_REG_SHARED | IR3_REG_RT)) { + continue; + } + + regmask_clear(&will_read, dst); + } + + for (unsigned i = 0; i < instr->srcs_count; i++) { + struct ir3_register *src = instr->srcs[i]; + if (src->flags & + (IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_SHARED)) { + continue; + } + + if (!regmask_get(&will_read, src)) { + if (!(src->flags & IR3_REG_LAST_USE)) { + progress = true; + src->flags |= IR3_REG_LAST_USE; + } + } else if (src->flags & IR3_REG_LAST_USE) { + progress = true; + src->flags &= ~IR3_REG_LAST_USE; + } + + /* Setting will_read immediately ensures that only the first src + * of multiple identical srcs will have (last) set. This matches + * the blob's behavior. + */ + regmask_set(&will_read, src); + } + } + + bd->will_read = will_read; + } + } while (progress); +} + bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) { @@ -1893,6 +1976,13 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) progress |= expand_dummy_dests(block); } + /* Note: insert (last) before alias.tex to have the sources that are actually + * read by instructions (as opposed to alias registers) more easily + * available. + */ + if (so->compiler->gen >= 7) + track_last(ctx, ir, so); + ir3_insert_alias_tex(ir); ir3_count_instructions(ir); resolve_jumps(ir); diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index 867b931d430..5e6f5d5a587 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -327,6 +327,8 @@ print_reg_name(struct log_stream *stream, struct ir3_instruction *instr, if (reg->flags & IR3_REG_FIRST_KILL) mesa_log_stream_printf(stream, "(kill)"); + if (reg->flags & IR3_REG_LAST_USE) + mesa_log_stream_printf(stream, "(last)"); if (reg->flags & IR3_REG_UNUSED) mesa_log_stream_printf(stream, "(unused)"); diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 8b730d9dc9d..3db61a8cfc6 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -75,7 +75,8 @@ validate_src(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr, if (reg->flags & IR3_REG_ALIAS) { unsigned valid_flags = IR3_REG_ALIAS | IR3_REG_FIRST_ALIAS | IR3_REG_HALF | IR3_REG_CONST | IR3_REG_IMMED | - IR3_REG_SSA | IR3_REG_KILL | IR3_REG_FIRST_KILL; + IR3_REG_SSA | IR3_REG_KILL | IR3_REG_FIRST_KILL | + IR3_REG_LAST_USE; validate_assert(ctx, !(reg->flags & ~valid_flags)); }