diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index 39471c1f4dd..5032fa10b28 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -524,6 +524,8 @@ regid(int num, int comp) #define REG_P0 62 /* predicate register */ #define REG_P0_X regid(REG_P0, 0) /* p0.x */ +#define INVALID_CONST_REG UINT16_MAX + /* With is_bindless_s2en = 1, this determines whether bindless is enabled and * if so, how to get the (base, index) pair for both sampler and texture. * There is a single base embedded in the instruction, which is always used diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index 4c60c397786..1fecb24f487 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -187,43 +187,20 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n, new_flags &= ~IR3_REG_FNEG; } - /* Reallocate for 4 more elements whenever it's necessary. Note that ir3 - * printing relies on having groups of 4 dwords, so we fill the unused - * slots with a dummy value. - */ - struct ir3_const_state *const_state = ir3_const_state(ctx->so); - if (const_state->immediates_count == const_state->immediates_size) { - const_state->immediates = rerzalloc( - const_state, const_state->immediates, - __typeof__(const_state->immediates[0]), const_state->immediates_size, - const_state->immediates_size + 4); - const_state->immediates_size += 4; + reg->num = ir3_const_find_imm(ctx->so, reg->uim_val); - for (int i = const_state->immediates_count; - i < const_state->immediates_size; i++) - const_state->immediates[i] = 0xd0d0d0d0; - } - - int i; - for (i = 0; i < const_state->immediates_count; i++) { - if (const_state->immediates[i] == reg->uim_val) - break; - } - - if (i == const_state->immediates_count) { - /* Add on a new immediate to be pushed, if we have space left in the - * constbuf. - */ - if (const_state->offsets.immediate + const_state->immediates_count / 4 >= - ir3_max_const(ctx->so)) + if (reg->num == INVALID_CONST_REG) { + /* Don't modify the const state for the binning variant. */ + if (ctx->so->binning_pass) return false; - const_state->immediates[i] = reg->uim_val; - const_state->immediates_count++; + reg->num = ir3_const_add_imm(ctx->so, reg->uim_val); + + if (reg->num == INVALID_CONST_REG) + return false; } reg->flags = new_flags; - reg->num = i + (4 * const_state->offsets.immediate); instr->srcs[n] = reg; diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index 80e99a87be1..d4164dec445 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -43,6 +43,59 @@ #include "disasm.h" +static uint16_t +const_imm_index_to_reg(const struct ir3_const_state *const_state, unsigned i) +{ + return i + (4 * const_state->offsets.immediate); +} + +uint16_t +ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm) +{ + const struct ir3_const_state *const_state = ir3_const_state(v); + + for (unsigned i = 0; i < const_state->immediates_count; i++) { + if (const_state->immediates[i] == imm) + return const_imm_index_to_reg(const_state, i); + } + + return INVALID_CONST_REG; +} + +uint16_t +ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm) +{ + struct ir3_const_state *const_state = ir3_const_state(v); + + /* Reallocate for 4 more elements whenever it's necessary. Note that ir3 + * printing relies on having groups of 4 dwords, so we fill the unused + * slots with a dummy value. + */ + if (const_state->immediates_count == const_state->immediates_size) { + const_state->immediates = rerzalloc( + const_state, const_state->immediates, + __typeof__(const_state->immediates[0]), const_state->immediates_size, + const_state->immediates_size + 4); + const_state->immediates_size += 4; + + for (int i = const_state->immediates_count; + i < const_state->immediates_size; i++) { + const_state->immediates[i] = 0xd0d0d0d0; + } + } + + /* Add on a new immediate to be pushed, if we have space left in the + * constbuf. + */ + if (const_state->offsets.immediate + const_state->immediates_count / 4 >= + ir3_max_const(v)) { + return INVALID_CONST_REG; + } + + const_state->immediates[const_state->immediates_count] = imm; + return const_imm_index_to_reg(const_state, const_state->immediates_count++); +} + int ir3_glsl_type_size(const struct glsl_type *type, bool bindless) { diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 4a403ddb333..faa587069b1 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -986,6 +986,9 @@ ir3_max_const(const struct ir3_shader_variant *v) return _ir3_max_const(v, v->key.safe_constlen); } +uint16_t ir3_const_find_imm(struct ir3_shader_variant *v, uint32_t imm); +uint16_t ir3_const_add_imm(struct ir3_shader_variant *v, uint32_t imm); + /* Return true if a variant may need to be recompiled due to exceeding the * maximum "safe" constlen. */