From c43d0e4e4fb0d7faa11f6fe3cd2e90f0805b6f28 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 1 Feb 2024 14:51:23 +0100 Subject: [PATCH] ir3: insert predicate conversions after their source Instead of creating a cmps.s.ne for every use of a predicate, create just one and insert it after the instruction whose def is tested. This reduces the number of compares or, when they are folded into bitwise operations, those operations. It also decreases register pressure on GPRs by increasing pressure on predicate registers. This should be preferred in general since at worst, the predicate register will be spilled to a GPR again. Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/ir3.c | 15 +++++++++++++++ src/freedreno/ir3/ir3.h | 2 ++ src/freedreno/ir3/ir3_context.c | 17 ++++++++++++++++- src/freedreno/ir3/ir3_context.h | 1 + 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 6a89f9d9f96..fd0f18c3703 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -560,6 +560,21 @@ ir3_block_get_last_non_terminator(struct ir3_block *block) return NULL; } +struct ir3_instruction * +ir3_block_get_last_phi(struct ir3_block *block) +{ + struct ir3_instruction *last_phi = NULL; + + foreach_instr (instr, &block->instr_list) { + if (instr->opc != OPC_META_PHI) + break; + + last_phi = instr; + } + + return last_phi; +} + void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred) { diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 9e5d6c02e83..3120dde2fd3 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -703,6 +703,8 @@ struct ir3_instruction *ir3_block_take_terminator(struct ir3_block *block); struct ir3_instruction * ir3_block_get_last_non_terminator(struct ir3_block *block); +struct ir3_instruction *ir3_block_get_last_phi(struct ir3_block *block); + static inline struct ir3_block * ir3_after_preamble(struct ir3 *ir) { diff --git a/src/freedreno/ir3/ir3_context.c b/src/freedreno/ir3/ir3_context.c index 5618188f03a..ecde7e6f033 100644 --- a/src/freedreno/ir3/ir3_context.c +++ b/src/freedreno/ir3/ir3_context.c @@ -73,6 +73,7 @@ ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader *shader, _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); ctx->sel_cond_conversions = _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal); + ctx->predicate_conversions = _mesa_pointer_hash_table_create(ctx); /* TODO: maybe generate some sort of bitmask of what key * lowers vs what shader has (ie. no need to lower @@ -460,7 +461,12 @@ ir3_get_addr1(struct ir3_context *ctx, unsigned const_val) struct ir3_instruction * ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) { - struct ir3_block *b = ctx->block; + struct hash_entry *src_entry = + _mesa_hash_table_search(ctx->predicate_conversions, src); + if (src_entry) + return src_entry->data; + + struct ir3_block *b = src->block; struct ir3_instruction *cond; /* NOTE: we use cpms.s.ne x, 0 to move x into a predicate register */ @@ -472,6 +478,15 @@ ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src) /* condition always goes in predicate register: */ cond->dsts[0]->flags |= IR3_REG_PREDICATE; + /* phi's should stay first in a block */ + if (src->opc == OPC_META_PHI) + ir3_instr_move_after(zero, ir3_block_get_last_phi(src->block)); + else + ir3_instr_move_after(zero, src); + + ir3_instr_move_after(cond, zero); + + _mesa_hash_table_insert(ctx->predicate_conversions, src, cond); return cond; } diff --git a/src/freedreno/ir3/ir3_context.h b/src/freedreno/ir3/ir3_context.h index 322b5de9de3..adb4403aecd 100644 --- a/src/freedreno/ir3/ir3_context.h +++ b/src/freedreno/ir3/ir3_context.h @@ -128,6 +128,7 @@ struct ir3_context { struct hash_table_u64 *addr1_ht; struct hash_table *sel_cond_conversions; + struct hash_table *predicate_conversions; /* last dst array, for indirect we need to insert a var-store. */