diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index ae40262810a..079f7161d90 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -3,8 +3,10 @@ * SPDX-License-Identifier: MIT */ +#include "agx_builder.h" #include "agx_compiler.h" #include "agx_minifloat.h" +#include "agx_opcodes.h" /* AGX peephole optimizer responsible for instruction combining. It operates in * a forward direction and a backward direction, in each case traversing in @@ -209,7 +211,7 @@ agx_optimizer_fmov_rev(agx_instr *I, agx_instr *use) } static void -agx_optimizer_copyprop(agx_instr **defs, agx_instr *I) +agx_optimizer_copyprop(agx_context *ctx, agx_instr **defs, agx_instr *I) { agx_foreach_ssa_src(I, s) { agx_index src = I->src[s]; @@ -227,6 +229,28 @@ agx_optimizer_copyprop(agx_instr **defs, agx_instr *I) if (def->src[0].size != src.size) continue; + /* Optimize split(64-bit uniform) so we can get better copyprop of the + * 32-bit uniform parts. This helps reduce moves with 64-bit uniforms. + */ + if (I->op == AGX_OPCODE_SPLIT && def->src[0].type == AGX_INDEX_UNIFORM && + src.size == AGX_SIZE_64 && I->dest[0].size == AGX_SIZE_32) { + + assert(I->nr_dests == 2 && "decomposing a 64-bit scalar"); + agx_builder b = agx_init_builder(ctx, agx_before_instr(I)); + + agx_index lo = def->src[0]; + lo.size = AGX_SIZE_32; + + agx_index hi = lo; + hi.value += 2 /* half of 64-bits = 32-bits = 2 x 16-bits */; + + defs[I->dest[0].value] = agx_mov_to(&b, I->dest[0], lo); + defs[I->dest[1].value] = agx_mov_to(&b, I->dest[1], hi); + + agx_remove_instruction(I); + continue; + } + /* Immediate inlining happens elsewhere */ if (def->src[0].type == AGX_INDEX_IMMEDIATE) continue; @@ -326,7 +350,7 @@ agx_optimizer_forward(agx_context *ctx) { agx_instr **defs = calloc(ctx->alloc, sizeof(*defs)); - agx_foreach_instr_global(ctx, I) { + agx_foreach_instr_global_safe(ctx, I) { struct agx_opcode_info info = agx_opcodes_info[I->op]; agx_foreach_ssa_dest(I, d) { @@ -334,7 +358,7 @@ agx_optimizer_forward(agx_context *ctx) } /* Optimize moves */ - agx_optimizer_copyprop(defs, I); + agx_optimizer_copyprop(ctx, defs, I); /* Propagate fmov down */ if (info.is_float || I->op == AGX_OPCODE_FCMPSEL ||