diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h index 5d08c41df2e..a426c0b3d82 100644 --- a/src/freedreno/common/freedreno_dev_info.h +++ b/src/freedreno/common/freedreno_dev_info.h @@ -187,6 +187,8 @@ struct fd_dev_info { /* See ir3_compiler::has_scalar_alu. */ bool has_scalar_alu; + /* See ir3_compiler::has_scalar_predicates. */ + bool has_scalar_predicates; /* See ir3_compiler::has_early_preamble. */ bool has_early_preamble; diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 72df7ae1eb9..8fc040c1e43 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -911,6 +911,7 @@ a7xx_base = A6XXProps( line_width_min = 1.0, line_width_max = 127.5, has_scalar_alu = True, + has_scalar_predicates = True, has_coherent_ubwc_flag_caches = True, has_isam_v = True, has_ssbo_imm_offsets = True, diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 04895c37af3..144e82dc5c5 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1367,7 +1367,8 @@ is_scalar_alu(struct ir3_instruction *instr, instr->opc != OPC_SCAN_CLUSTERS_MACRO && instr->opc != OPC_SCAN_MACRO && instr->opc != OPC_MOVS && - is_alu(instr) && (instr->dsts[0]->flags & IR3_REG_SHARED) && + is_alu(instr) && + (instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) && /* scalar->scalar mov instructions (but NOT cov) were supported before the * scalar ALU was supported, but they still required (ss) whereas on GPUs * that have a scalar ALU they are executed on it and do not require (ss). @@ -1748,7 +1749,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) return true; /* cat2/cat3 scalar ALU instructions must not have regular sources. */ - if (instr->dsts[0]->flags & IR3_REG_SHARED) { + if (instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) { if (!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST))) return false; } diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 035b2387034..569bb844001 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -191,6 +191,12 @@ typedef enum ir3_register_flags { * dummy src and dst of prefetch sam/ldc/resinfo. */ IR3_REG_DUMMY = BIT(23), + + /* Used to mark predicate registers as uniform. Uniform predicate registers + * can be written by the scalar ALU but can only be read as a vector, needing + * (ss) to synchronize like any scalar ALU result. + */ + IR3_REG_UNIFORM = BIT(24), } ir3_register_flags; struct ir3_register { @@ -2189,7 +2195,7 @@ static inline bool is_ss_producer(struct ir3_instruction *instr) { foreach_dst (dst, instr) { - if (dst->flags & IR3_REG_SHARED) + if (dst->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) return true; } diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 29db0b81b71..4c72ae96683 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -246,6 +246,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->predtf_nop_quirk = dev_info->a6xx.predtf_nop_quirk; compiler->prede_nop_quirk = dev_info->a6xx.prede_nop_quirk; compiler->has_scalar_alu = dev_info->a6xx.has_scalar_alu; + compiler->has_scalar_predicates = dev_info->a6xx.has_scalar_predicates; compiler->has_isam_v = dev_info->a6xx.has_isam_v; compiler->has_ssbo_imm_offsets = dev_info->a6xx.has_ssbo_imm_offsets; compiler->fs_must_have_non_zero_constlen_quirk = dev_info->a7xx.fs_must_have_non_zero_constlen_quirk; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 226073c2a3c..878839f847a 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -284,6 +284,11 @@ struct ir3_compiler { */ bool has_scalar_alu; + /* True if cat2 instructions can write predicate registers from the scalar + * ALU. + */ + bool has_scalar_predicates; + bool fs_must_have_non_zero_constlen_quirk; /* On all generations that support scalar ALU, there is also a copy of the diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 2fb6ef19b41..c6cc5c1f847 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -235,6 +235,8 @@ sync_update(struct ir3_legalize_state *state, struct ir3_compiler *compiler, } } else if (reg_is_addr1(dst) && n->block->in_early_preamble) { regmask_set(&state->needs_ss, dst); + } else if (dst->flags & IR3_REG_UNIFORM) { + regmask_set(&state->needs_ss, dst); } } @@ -553,7 +555,7 @@ delay_update(struct ir3_compiler *compiler, if (dst->flags & IR3_REG_RELATIV) dst_cycle += instr->repeat; - if (dst->flags & IR3_REG_SHARED) + if (dst->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) continue; for (unsigned elem = 0; elem < elems; elem++, num++) { @@ -2198,7 +2200,8 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) } } - if (in_preamble && writes_pred(instr)) { + if (in_preamble && writes_pred(instr) && + !(instr->dsts[0]->flags & IR3_REG_UNIFORM)) { pred_in_preamble = true; } } diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index d79dbd2caf4..1c5b3c93244 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -355,6 +355,8 @@ print_reg_name(struct log_stream *stream, struct ir3_instruction *instr, mesa_log_stream_printf(stream, "!"); } + if (reg->flags & IR3_REG_UNIFORM) + mesa_log_stream_printf(stream, "u"); if (reg->flags & IR3_REG_SHARED) mesa_log_stream_printf(stream, "s"); if (reg->flags & IR3_REG_HALF) diff --git a/src/freedreno/ir3/ir3_shared_ra.c b/src/freedreno/ir3/ir3_shared_ra.c index e0c6593a1f7..7f98b3a4452 100644 --- a/src/freedreno/ir3/ir3_shared_ra.c +++ b/src/freedreno/ir3/ir3_shared_ra.c @@ -619,7 +619,7 @@ try_demote_instruction(struct ra_ctx *ctx, struct ir3_instruction *instr) * skipped reloading and just demoted sources directly, so we should never * get here. */ - assert(instr->dsts[0]->flags & IR3_REG_SHARED); + assert(instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)); /* Now we actually demote the instruction */ ra_foreach_src (src, instr) { @@ -637,6 +637,13 @@ try_demote_instruction(struct ra_ctx *ctx, struct ir3_instruction *instr) } } + if (instr->dsts[0]->flags & IR3_REG_UNIFORM) { + instr->dsts[0]->flags &= ~IR3_REG_UNIFORM; + + /* Uniform registers are always predicates which we don't handle here. */ + return true; + } + struct ra_interval *dst_interval = ra_interval_get(ctx, instr->dsts[0]); instr->dsts[0]->flags &= ~IR3_REG_SHARED; ra_interval_init(dst_interval, instr->dsts[0]); @@ -812,7 +819,7 @@ can_demote_src(struct ir3_instruction *instr) full_type(instr->cat1.dst_type) == TYPE_S32))); default: return (!is_alu(instr) && !is_sfu(instr)) || - !(instr->dsts[0]->flags & IR3_REG_SHARED); + !(instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)); } } diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 0a14a6b6c56..3f4d5760e8b 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -60,6 +60,12 @@ validate_reg(struct ir3_validate_ctx *ctx, struct ir3_register *reg) validate_assert(ctx, reg->num >= SHARED_REG_START); validate_assert(ctx, reg->num - SHARED_REG_START < SHARED_REG_SIZE); } + + if (reg->flags & IR3_REG_UNIFORM) { + validate_assert(ctx, ctx->ir->compiler->has_scalar_predicates); + validate_assert(ctx, reg->flags & IR3_REG_PREDICATE); + } + } static void @@ -190,6 +196,9 @@ validate_dst(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr, if (reg->flags & IR3_REG_RELATIV) validate_assert(ctx, instr->address); + if (reg->flags & IR3_REG_UNIFORM) + validate_assert(ctx, opc_cat(instr->opc) == 2); + validate_reg(ctx, reg); }