From 233b77878df4d721150046c353bbdc56dc8d83f8 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Fri, 21 Nov 2025 10:51:38 +0100 Subject: [PATCH] ir3/ra: try to allocate overlapping regs for shared subreg movs This was implemented for vector RA but not for shared RA yet. Totals from 1361 (0.77% of 176279) affected shaders: Instrs: 1175437 -> 1170238 (-0.44%); split: -0.45%, +0.01% CodeSize: 2300656 -> 2290258 (-0.45%) NOPs: 221042 -> 220527 (-0.23%); split: -0.48%, +0.25% MOVs: 30645 -> 30643 (-0.01%); split: -0.01%, +0.00% COVs: 47425 -> 47016 (-0.86%) (ss): 35953 -> 35890 (-0.18%); split: -0.21%, +0.03% (sy): 20174 -> 20168 (-0.03%) (ss)-stall: 124094 -> 123625 (-0.38%); split: -0.38%, +0.00% (sy)-stall: 806166 -> 805832 (-0.04%); split: -0.06%, +0.02% Preamble Instrs: 173151 -> 171299 (-1.07%) Cat0: 250836 -> 250321 (-0.21%); split: -0.43%, +0.22% Cat1: 78738 -> 78327 (-0.52%); split: -0.52%, +0.00% Cat2: 386528 -> 382255 (-1.11%) Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3_shared_ra.c | 37 +++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/freedreno/ir3/ir3_shared_ra.c b/src/freedreno/ir3/ir3_shared_ra.c index 17d92bbbb32..9515eb250f0 100644 --- a/src/freedreno/ir3/ir3_shared_ra.c +++ b/src/freedreno/ir3/ir3_shared_ra.c @@ -667,9 +667,46 @@ free_space(struct ra_ctx *ctx, physreg_t start, unsigned size) } } +static physreg_t +try_allocate_src_subreg(struct ra_ctx *ctx, struct ir3_register *reg, + enum ir3_subreg_move subreg_move) +{ + assert(subreg_move != IR3_SUBREG_MOVE_NONE); + + /* Subreg moves always write a half register. */ + assert(reg_elem_size(reg) == 1); + + struct ir3_register *src = reg->instr->srcs[0]; + if (!ra_reg_is_src(src) || !(src->flags & IR3_REG_SHARED)) + return ~0; + + unsigned offset = subreg_move == IR3_SUBREG_MOVE_LOWER ? 0 : 1; + struct ra_interval *src_interval = ra_interval_get(ctx, src->def); + physreg_t src_physreg = ra_interval_get_physreg(src_interval) + offset; + unsigned file_size = reg_file_size(reg); + unsigned size = reg_size(reg); + + if (src_physreg + size <= file_size && + get_reg_specified(ctx, reg, src_physreg)) { + return src_physreg; + } + + return ~0; +} + static physreg_t get_reg(struct ra_ctx *ctx, struct ir3_register *reg, bool src) { + /* For subreg moves (see ir3_is_subreg_move), try to allocate half of their + * full src for their dst. If this succeeds, the instruction can be removed. + */ + enum ir3_subreg_move subreg_move = ir3_is_subreg_move(reg->instr); + if (subreg_move != IR3_SUBREG_MOVE_NONE) { + physreg_t src_reg = try_allocate_src_subreg(ctx, reg, subreg_move); + if (src_reg != (physreg_t)~0) + return src_reg; + } + if (reg->merge_set && reg->merge_set->preferred_reg != (physreg_t)~0) { physreg_t preferred_reg = reg->merge_set->preferred_reg + reg->merge_set_offset;