From e20ae14978305b3bf738265a6041fc4ffc33c012 Mon Sep 17 00:00:00 2001 From: Iago Toral Quiroga Date: Fri, 19 Feb 2021 11:08:36 +0100 Subject: [PATCH] broadcom/compiler: fix ldunif optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we look back for a previous uniform definition we want to start looking from the current position of the cursor, not the end of the current block. The latter only works when translating from NIR, since in that case both always match, but any optimization pass may rewrite code and emit uniforms at any place in the middle of the program. Also, ntq_store_dest expects result to be written by the last instruction to handle the case where it is stored to a NIR register. That won't be the case if the result comes from an optimized uniform, so in that case we need to insert a MOV, like we do in non-uniform control flow. v2: fix ntq_store_dest for optimized uniforms. Fixes: 14af7b3085 ('broadcom/compiler: don't emit redundant ldunif') Reviewed-by: Alejandro PiƱeiro Acked-by: Eric Anholt Part-of: --- src/broadcom/compiler/nir_to_vir.c | 19 +++++++++++++++---- src/broadcom/compiler/vir.c | 3 ++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 30b018de23f..d27b1845408 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -655,6 +655,12 @@ is_ld_signal(const struct v3d_qpu_sig *sig) sig->ldtlbu); } +static inline bool +is_ldunif_signal(const struct v3d_qpu_sig *sig) +{ + return sig->ldunif || sig->ldunifrf; +} + /** * This function is responsible for getting VIR results into the associated * storage for a NIR instruction. @@ -678,8 +684,12 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan, if (!list_is_empty(&c->cur_block->instructions)) last_inst = (struct qinst *)c->cur_block->instructions.prev; - assert((result.file == QFILE_TEMP && - last_inst && last_inst == c->defs[result.index])); + bool is_reused_uniform = + is_ldunif_signal(&c->defs[result.index]->qpu.sig) && + last_inst != c->defs[result.index]; + + assert(result.file == QFILE_TEMP && last_inst && + (last_inst == c->defs[result.index] || is_reused_uniform)); if (dest->is_ssa) { assert(chan < dest->ssa.num_components); @@ -706,8 +716,9 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan, * the store into the nir_register, then emit a MOV * that can be. */ - if (vir_in_nonuniform_control_flow(c) && - is_ld_signal(&c->defs[last_inst->dst.index]->qpu.sig)) { + if (is_reused_uniform || + (vir_in_nonuniform_control_flow(c) && + is_ld_signal(&c->defs[last_inst->dst.index]->qpu.sig))) { result = vir_MOV(c, result); last_inst = c->defs[result.index]; } diff --git a/src/broadcom/compiler/vir.c b/src/broadcom/compiler/vir.c index 11cd22170ae..3d816cbb2b5 100644 --- a/src/broadcom/compiler/vir.c +++ b/src/broadcom/compiler/vir.c @@ -1416,7 +1416,8 @@ try_opt_ldunif(struct v3d_compile *c, uint32_t index, struct qreg *unif) { uint32_t count = 20; struct qinst *prev_inst = NULL; - vir_for_each_inst_rev(inst, c->cur_block) { + list_for_each_entry_from_rev(struct qinst, inst, c->cursor.link->prev, + &c->cur_block->instructions, link) { if ((inst->qpu.sig.ldunif || inst->qpu.sig.ldunifrf) && inst->uniform == index) { prev_inst = inst;