From 7036d0fcf73ea0591685f95b8b09591b139d5394 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Fri, 16 Aug 2024 08:19:26 +0200 Subject: [PATCH] ir3: don't modify const state for the binning variant in ir3_cp ir3_cp uses the const state to lower immediates. It doesn't take the binning variant into account so in theory, it could add immediates to the const state for the binning variant, modifying the state after its layout had already been established for the non-binning variant. In practice, this probably works fine as the immediates are the last section of the const state so the layout wouldn't be changed by ir3_cp. However, there is very little benefit in doing this (the chances for an immediate being necessary in the binning variant but not in the non-binning variant are small) so it's better to stick to the global invariant of not changing the const state in the binning variant. This commit makes sure that immediates are never added by ir3_cp for the binning variant. Some refactoring was necessary, however, since the lookup and modification of the const state were a bit intertwined. More specifically, currently, the immediate storage in the const state would always be enlarged by ir3_cp whenever its full, even when it might actually contain the immediate. To fix this, the logic is split in two functions: ir3_const_find_imm() for lookup (allowed to be called by the binning variant), and ir3_const_add_imm for adding immediates (not allowed to be called by the binning variant). Signed-off-by: Job Noorman Part-of: --- src/freedreno/ir3/instr-a3xx.h | 2 ++ src/freedreno/ir3/ir3_cp.c | 39 +++++-------------------- src/freedreno/ir3/ir3_shader.c | 53 ++++++++++++++++++++++++++++++++++ src/freedreno/ir3/ir3_shader.h | 3 ++ 4 files changed, 66 insertions(+), 31 deletions(-) diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index 39471c1f4dd..5032fa10b28 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -524,6 +524,8 @@ regid(int num, int comp) #define REG_P0 62 /* predicate register */ #define REG_P0_X regid(REG_P0, 0) /* p0.x */ +#define INVALID_CONST_REG UINT16_MAX + /* With is_bindless_s2en = 1, this determines whether bindless is enabled and * if so, how to get the (base, index) pair for both sampler and texture. * There is a single base embedded in the instruction, which is always used diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index 4c60c397786..1fecb24f487 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -187,43 +187,20 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n, new_flags &= ~IR3_REG_FNEG; } - /* Reallocate for 4 more elements whenever it's necessary. Note that ir3 - * printing relies on having groups of 4 dwords, so we fill the unused - * slots with a dummy value. - */ - struct ir3_const_state *const_state = ir3_const_state(ctx->so); - if (const_state->immediates_count == const_state->immediates_size) { - const_state->immediates = rerzalloc( - const_state, const_state->immediates, - __typeof__(const_state->immediates[0]), const_state->immediates_size, - const_state->immediates_size + 4); - const_state->immediates_size += 4; + reg->num = ir3_const_find_imm(ctx->so, reg->uim_val); - for (int i = const_state->immediates_count; - i < const_state->immediates_size; i++) - const_state->immediates[i] = 0xd0d0d0d0; - } - - int i; - for (i = 0; i < const_state->immediates_count; i++) { - if (const_state->immediates[i] == reg->uim_val) - break; - } - - if (i == const_state->immediates_count) { - /* Add on a new immediate to be pushed, if we have space left in the - * constbuf. - */ - if (const_state->offsets.immediate + const_state->immediates_count / 4 >= - ir3_max_const(ctx->so)) + if (reg->num == INVALID_CONST_REG) { + /* Don't modify the const state for the binning variant. */ + if (ctx->so->binning_pass) return false; - const_state->immediates[i] = reg->uim_val; - const_state->immediates_count++; + reg->num = ir3_const_add_imm(ctx->so, reg->uim_val); + + if (reg->num == INVALID_CONST_REG) + return false; } reg->flags = new_flags; - reg->num = i + (4 * const_state->offsets.immediate); instr->srcs[n] = reg; diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 80e99a87be1..d4164dec445 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -43,6 +43,59 @@ #include "disasm.h" +static uint16_t +const_imm_index_to_reg(const struct ir3_const_state *const_state, unsigned i) +{ + return i + (4 * const_state->offsets.immediate); +} + +uint16_t +ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm) +{ + const struct ir3_const_state *const_state = ir3_const_state(v); + + for (unsigned i = 0; i < const_state->immediates_count; i++) { + if (const_state->immediates[i] == imm) + return const_imm_index_to_reg(const_state, i); + } + + return INVALID_CONST_REG; +} + +uint16_t +ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm) +{ + struct ir3_const_state *const_state = ir3_const_state(v); + + /* Reallocate for 4 more elements whenever it's necessary. Note that ir3 + * printing relies on having groups of 4 dwords, so we fill the unused + * slots with a dummy value. + */ + if (const_state->immediates_count == const_state->immediates_size) { + const_state->immediates = rerzalloc( + const_state, const_state->immediates, + __typeof__(const_state->immediates[0]), const_state->immediates_size, + const_state->immediates_size + 4); + const_state->immediates_size += 4; + + for (int i = const_state->immediates_count; + i < const_state->immediates_size; i++) { + const_state->immediates[i] = 0xd0d0d0d0; + } + } + + /* Add on a new immediate to be pushed, if we have space left in the + * constbuf. + */ + if (const_state->offsets.immediate + const_state->immediates_count / 4 >= + ir3_max_const(v)) { + return INVALID_CONST_REG; + } + + const_state->immediates[const_state->immediates_count] = imm; + return const_imm_index_to_reg(const_state, const_state->immediates_count++); +} + int ir3_glsl_type_size(const struct glsl_type *type, bool bindless) { diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 4a403ddb333..faa587069b1 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -986,6 +986,9 @@ ir3_max_const(const struct ir3_shader_variant *v) return _ir3_max_const(v, v->key.safe_constlen); } +uint16_t ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm); +uint16_t ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm); + /* Return true if a variant may need to be recompiled due to exceeding the * maximum "safe" constlen. */