From 25ab37ae5bcc98bbcd3959aefb48223e49f78560 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Tue, 19 Aug 2025 08:35:37 +0200 Subject: [PATCH] ir3: make backend aware of scalar predicates Predicate registers can be written from the scalar ALU by using a special cat2 encoding: if the dst is encoded as a0.c, the instruction will execute on the scalar ALU and write to p0.c. This commit makes the ir3 backend aware of scalar predicates. A new register flag (IR3_REG_UNIFORM) is added that can be used to mark predicate dsts as being written by the scalar ALU. For such dsts, the same synchronization rules apply as for shared registers written by the scalar ALU (e.g., (ss) is needed to read them from the vector ALU). Scalar predicates can be used in the early preamble, which makes control flow available there. In many ways, the backend treats IR3_REG_UNIFORM the same as IR3_REG_SHARED. A new flag was added because IR3_REG_SHARED is mainly used to denote a separate register file, not as a flag to indicate usage by the scalar ALU. Scalar predicates still use the normal predicate register file but allow it to be written from the scalar ALU. Signed-off-by: Job Noorman Part-of: --- src/freedreno/common/freedreno_dev_info.h | 2 ++ src/freedreno/common/freedreno_devices.py | 1 + src/freedreno/ir3/ir3.c | 5 +++-- src/freedreno/ir3/ir3.h | 8 +++++++- src/freedreno/ir3/ir3_compiler.c | 1 + src/freedreno/ir3/ir3_compiler.h | 5 +++++ src/freedreno/ir3/ir3_legalize.c | 7 +++++-- src/freedreno/ir3/ir3_print.c | 2 ++ src/freedreno/ir3/ir3_shared_ra.c | 11 +++++++++-- src/freedreno/ir3/ir3_validate.c | 9 +++++++++ 10 files changed, 44 insertions(+), 7 deletions(-) diff --git a/src/freedreno/common/freedreno_dev_info.h b/src/freedreno/common/freedreno_dev_info.h index 5d08c41df2e..a426c0b3d82 100644 --- a/src/freedreno/common/freedreno_dev_info.h +++ b/src/freedreno/common/freedreno_dev_info.h @@ -187,6 +187,8 @@ struct fd_dev_info { /* See ir3_compiler::has_scalar_alu. */ bool has_scalar_alu; + /* See ir3_compiler::has_scalar_predicates. */ + bool has_scalar_predicates; /* See ir3_compiler::has_early_preamble. */ bool has_early_preamble; diff --git a/src/freedreno/common/freedreno_devices.py b/src/freedreno/common/freedreno_devices.py index 72df7ae1eb9..8fc040c1e43 100644 --- a/src/freedreno/common/freedreno_devices.py +++ b/src/freedreno/common/freedreno_devices.py @@ -911,6 +911,7 @@ a7xx_base = A6XXProps( line_width_min = 1.0, line_width_max = 127.5, has_scalar_alu = True, + has_scalar_predicates = True, has_coherent_ubwc_flag_caches = True, has_isam_v = True, has_ssbo_imm_offsets = True, diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 04895c37af3..144e82dc5c5 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1367,7 +1367,8 @@ is_scalar_alu(struct ir3_instruction *instr, instr->opc != OPC_SCAN_CLUSTERS_MACRO && instr->opc != OPC_SCAN_MACRO && instr->opc != OPC_MOVS && - is_alu(instr) && (instr->dsts[0]->flags & IR3_REG_SHARED) && + is_alu(instr) && + (instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) && /* scalar->scalar mov instructions (but NOT cov) were supported before the * scalar ALU was supported, but they still required (ss) whereas on GPUs * that have a scalar ALU they are executed on it and do not require (ss). @@ -1748,7 +1749,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) return true; /* cat2/cat3 scalar ALU instructions must not have regular sources. */ - if (instr->dsts[0]->flags & IR3_REG_SHARED) { + if (instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) { if (!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST))) return false; } diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 035b2387034..569bb844001 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -191,6 +191,12 @@ typedef enum ir3_register_flags { * dummy src and dst of prefetch sam/ldc/resinfo. */ IR3_REG_DUMMY = BIT(23), + + /* Used to mark predicate registers as uniform. Uniform predicate registers + * can be written by the scalar ALU but can only be read as a vector, needing + * (ss) to synchronize like any scalar ALU result. + */ + IR3_REG_UNIFORM = BIT(24), } ir3_register_flags; struct ir3_register { @@ -2189,7 +2195,7 @@ static inline bool is_ss_producer(struct ir3_instruction *instr) { foreach_dst (dst, instr) { - if (dst->flags & IR3_REG_SHARED) + if (dst->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) return true; } diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c index 29db0b81b71..4c72ae96683 100644 --- a/src/freedreno/ir3/ir3_compiler.c +++ b/src/freedreno/ir3/ir3_compiler.c @@ -246,6 +246,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id, compiler->predtf_nop_quirk = dev_info->a6xx.predtf_nop_quirk; compiler->prede_nop_quirk = dev_info->a6xx.prede_nop_quirk; compiler->has_scalar_alu = dev_info->a6xx.has_scalar_alu; + compiler->has_scalar_predicates = dev_info->a6xx.has_scalar_predicates; compiler->has_isam_v = dev_info->a6xx.has_isam_v; compiler->has_ssbo_imm_offsets = dev_info->a6xx.has_ssbo_imm_offsets; compiler->fs_must_have_non_zero_constlen_quirk = dev_info->a7xx.fs_must_have_non_zero_constlen_quirk; diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h index 226073c2a3c..878839f847a 100644 --- a/src/freedreno/ir3/ir3_compiler.h +++ b/src/freedreno/ir3/ir3_compiler.h @@ -284,6 +284,11 @@ struct ir3_compiler { */ bool has_scalar_alu; + /* True if cat2 instructions can write predicate registers from the scalar + * ALU. + */ + bool has_scalar_predicates; + bool fs_must_have_non_zero_constlen_quirk; /* On all generations that support scalar ALU, there is also a copy of the diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 2fb6ef19b41..c6cc5c1f847 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -235,6 +235,8 @@ sync_update(struct ir3_legalize_state *state, struct ir3_compiler *compiler, } } else if (reg_is_addr1(dst) && n->block->in_early_preamble) { regmask_set(&state->needs_ss, dst); + } else if (dst->flags & IR3_REG_UNIFORM) { + regmask_set(&state->needs_ss, dst); } } @@ -553,7 +555,7 @@ delay_update(struct ir3_compiler *compiler, if (dst->flags & IR3_REG_RELATIV) dst_cycle += instr->repeat; - if (dst->flags & IR3_REG_SHARED) + if (dst->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) continue; for (unsigned elem = 0; elem < elems; elem++, num++) { @@ -2198,7 +2200,8 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary) } } - if (in_preamble && writes_pred(instr)) { + if (in_preamble && writes_pred(instr) && + !(instr->dsts[0]->flags & IR3_REG_UNIFORM)) { pred_in_preamble = true; } } diff --git a/src/freedreno/ir3/ir3_print.c b/src/freedreno/ir3/ir3_print.c index d79dbd2caf4..1c5b3c93244 100644 --- a/src/freedreno/ir3/ir3_print.c +++ b/src/freedreno/ir3/ir3_print.c @@ -355,6 +355,8 @@ print_reg_name(struct log_stream *stream, struct ir3_instruction *instr, mesa_log_stream_printf(stream, "!"); } + if (reg->flags & IR3_REG_UNIFORM) + mesa_log_stream_printf(stream, "u"); if (reg->flags & IR3_REG_SHARED) mesa_log_stream_printf(stream, "s"); if (reg->flags & IR3_REG_HALF) diff --git a/src/freedreno/ir3/ir3_shared_ra.c b/src/freedreno/ir3/ir3_shared_ra.c index e0c6593a1f7..7f98b3a4452 100644 --- a/src/freedreno/ir3/ir3_shared_ra.c +++ b/src/freedreno/ir3/ir3_shared_ra.c @@ -619,7 +619,7 @@ try_demote_instruction(struct ra_ctx *ctx, struct ir3_instruction *instr) * skipped reloading and just demoted sources directly, so we should never * get here. */ - assert(instr->dsts[0]->flags & IR3_REG_SHARED); + assert(instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)); /* Now we actually demote the instruction */ ra_foreach_src (src, instr) { @@ -637,6 +637,13 @@ try_demote_instruction(struct ra_ctx *ctx, struct ir3_instruction *instr) } } + if (instr->dsts[0]->flags & IR3_REG_UNIFORM) { + instr->dsts[0]->flags &= ~IR3_REG_UNIFORM; + + /* Uniform registers are always predicates which we don't handle here. */ + return true; + } + struct ra_interval *dst_interval = ra_interval_get(ctx, instr->dsts[0]); instr->dsts[0]->flags &= ~IR3_REG_SHARED; ra_interval_init(dst_interval, instr->dsts[0]); @@ -812,7 +819,7 @@ can_demote_src(struct ir3_instruction *instr) full_type(instr->cat1.dst_type) == TYPE_S32))); default: return (!is_alu(instr) && !is_sfu(instr)) || - !(instr->dsts[0]->flags & IR3_REG_SHARED); + !(instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)); } } diff --git a/src/freedreno/ir3/ir3_validate.c b/src/freedreno/ir3/ir3_validate.c index 0a14a6b6c56..3f4d5760e8b 100644 --- a/src/freedreno/ir3/ir3_validate.c +++ b/src/freedreno/ir3/ir3_validate.c @@ -60,6 +60,12 @@ validate_reg(struct ir3_validate_ctx *ctx, struct ir3_register *reg) validate_assert(ctx, reg->num >= SHARED_REG_START); validate_assert(ctx, reg->num - SHARED_REG_START < SHARED_REG_SIZE); } + + if (reg->flags & IR3_REG_UNIFORM) { + validate_assert(ctx, ctx->ir->compiler->has_scalar_predicates); + validate_assert(ctx, reg->flags & IR3_REG_PREDICATE); + } + } static void @@ -190,6 +196,9 @@ validate_dst(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr, if (reg->flags & IR3_REG_RELATIV) validate_assert(ctx, instr->address); + if (reg->flags & IR3_REG_UNIFORM) + validate_assert(ctx, opc_cat(instr->opc) == 2); + validate_reg(ctx, reg); }