ir3: make backend aware of scalar predicates

Predicate registers can be written from the scalar ALU by using a
special cat2 encoding: if the dst is encoded as a0.c, the instruction
will execute on the scalar ALU and write to p0.c.

This commit makes the ir3 backend aware of scalar predicates. A new
register flag (IR3_REG_UNIFORM) is added that can be used to mark
predicate dsts as being written by the scalar ALU. For such dsts, the
same synchronization rules apply as for shared registers written by the
scalar ALU (e.g., (ss) is needed to read them from the vector ALU).
Scalar predicates can be used in the early preamble, which makes control
flow available there.

In many ways, the backend treats IR3_REG_UNIFORM the same as
IR3_REG_SHARED. A new flag was added because IR3_REG_SHARED is mainly
used to denote a separate register file, not as a flag to indicate usage
by the scalar ALU. Scalar predicates still use the normal predicate
register file but allow it to be written from the scalar ALU.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36614>
This commit is contained in:
Job Noorman
2025-08-19 08:35:37 +02:00
committed by Marge Bot
parent bd28a40bd4
commit 25ab37ae5b
10 changed files with 44 additions and 7 deletions
@@ -187,6 +187,8 @@ struct fd_dev_info {
/* See ir3_compiler::has_scalar_alu. */
bool has_scalar_alu;
/* See ir3_compiler::has_scalar_predicates. */
bool has_scalar_predicates;
/* See ir3_compiler::has_early_preamble. */
bool has_early_preamble;
@@ -911,6 +911,7 @@ a7xx_base = A6XXProps(
line_width_min = 1.0,
line_width_max = 127.5,
has_scalar_alu = True,
has_scalar_predicates = True,
has_coherent_ubwc_flag_caches = True,
has_isam_v = True,
has_ssbo_imm_offsets = True,
+3 -2
View File
@@ -1367,7 +1367,8 @@ is_scalar_alu(struct ir3_instruction *instr,
instr->opc != OPC_SCAN_CLUSTERS_MACRO &&
instr->opc != OPC_SCAN_MACRO &&
instr->opc != OPC_MOVS &&
is_alu(instr) && (instr->dsts[0]->flags & IR3_REG_SHARED) &&
is_alu(instr) &&
(instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) &&
/* scalar->scalar mov instructions (but NOT cov) were supported before the
* scalar ALU was supported, but they still required (ss) whereas on GPUs
* that have a scalar ALU they are executed on it and do not require (ss).
@@ -1748,7 +1749,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
return true;
/* cat2/cat3 scalar ALU instructions must not have regular sources. */
if (instr->dsts[0]->flags & IR3_REG_SHARED) {
if (instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) {
if (!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST)))
return false;
}
+7 -1
View File
@@ -191,6 +191,12 @@ typedef enum ir3_register_flags {
* dummy src and dst of prefetch sam/ldc/resinfo.
*/
IR3_REG_DUMMY = BIT(23),
/* Used to mark predicate registers as uniform. Uniform predicate registers
* can be written by the scalar ALU but can only be read as a vector, needing
* (ss) to synchronize like any scalar ALU result.
*/
IR3_REG_UNIFORM = BIT(24),
} ir3_register_flags;
struct ir3_register {
@@ -2189,7 +2195,7 @@ static inline bool
is_ss_producer(struct ir3_instruction *instr)
{
foreach_dst (dst, instr) {
if (dst->flags & IR3_REG_SHARED)
if (dst->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM))
return true;
}
+1
View File
@@ -246,6 +246,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
compiler->predtf_nop_quirk = dev_info->a6xx.predtf_nop_quirk;
compiler->prede_nop_quirk = dev_info->a6xx.prede_nop_quirk;
compiler->has_scalar_alu = dev_info->a6xx.has_scalar_alu;
compiler->has_scalar_predicates = dev_info->a6xx.has_scalar_predicates;
compiler->has_isam_v = dev_info->a6xx.has_isam_v;
compiler->has_ssbo_imm_offsets = dev_info->a6xx.has_ssbo_imm_offsets;
compiler->fs_must_have_non_zero_constlen_quirk = dev_info->a7xx.fs_must_have_non_zero_constlen_quirk;
+5
View File
@@ -284,6 +284,11 @@ struct ir3_compiler {
*/
bool has_scalar_alu;
/* True if cat2 instructions can write predicate registers from the scalar
* ALU.
*/
bool has_scalar_predicates;
bool fs_must_have_non_zero_constlen_quirk;
/* On all generations that support scalar ALU, there is also a copy of the
+5 -2
View File
@@ -235,6 +235,8 @@ sync_update(struct ir3_legalize_state *state, struct ir3_compiler *compiler,
}
} else if (reg_is_addr1(dst) && n->block->in_early_preamble) {
regmask_set(&state->needs_ss, dst);
} else if (dst->flags & IR3_REG_UNIFORM) {
regmask_set(&state->needs_ss, dst);
}
}
@@ -553,7 +555,7 @@ delay_update(struct ir3_compiler *compiler,
if (dst->flags & IR3_REG_RELATIV)
dst_cycle += instr->repeat;
if (dst->flags & IR3_REG_SHARED)
if (dst->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM))
continue;
for (unsigned elem = 0; elem < elems; elem++, num++) {
@@ -2198,7 +2200,8 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
}
}
if (in_preamble && writes_pred(instr)) {
if (in_preamble && writes_pred(instr) &&
!(instr->dsts[0]->flags & IR3_REG_UNIFORM)) {
pred_in_preamble = true;
}
}
+2
View File
@@ -355,6 +355,8 @@ print_reg_name(struct log_stream *stream, struct ir3_instruction *instr,
mesa_log_stream_printf(stream, "!");
}
if (reg->flags & IR3_REG_UNIFORM)
mesa_log_stream_printf(stream, "u");
if (reg->flags & IR3_REG_SHARED)
mesa_log_stream_printf(stream, "s");
if (reg->flags & IR3_REG_HALF)
+9 -2
View File
@@ -619,7 +619,7 @@ try_demote_instruction(struct ra_ctx *ctx, struct ir3_instruction *instr)
* skipped reloading and just demoted sources directly, so we should never
* get here.
*/
assert(instr->dsts[0]->flags & IR3_REG_SHARED);
assert(instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM));
/* Now we actually demote the instruction */
ra_foreach_src (src, instr) {
@@ -637,6 +637,13 @@ try_demote_instruction(struct ra_ctx *ctx, struct ir3_instruction *instr)
}
}
if (instr->dsts[0]->flags & IR3_REG_UNIFORM) {
instr->dsts[0]->flags &= ~IR3_REG_UNIFORM;
/* Uniform registers are always predicates which we don't handle here. */
return true;
}
struct ra_interval *dst_interval = ra_interval_get(ctx, instr->dsts[0]);
instr->dsts[0]->flags &= ~IR3_REG_SHARED;
ra_interval_init(dst_interval, instr->dsts[0]);
@@ -812,7 +819,7 @@ can_demote_src(struct ir3_instruction *instr)
full_type(instr->cat1.dst_type) == TYPE_S32)));
default:
return (!is_alu(instr) && !is_sfu(instr)) ||
!(instr->dsts[0]->flags & IR3_REG_SHARED);
!(instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM));
}
}
+9
View File
@@ -60,6 +60,12 @@ validate_reg(struct ir3_validate_ctx *ctx, struct ir3_register *reg)
validate_assert(ctx, reg->num >= SHARED_REG_START);
validate_assert(ctx, reg->num - SHARED_REG_START < SHARED_REG_SIZE);
}
if (reg->flags & IR3_REG_UNIFORM) {
validate_assert(ctx, ctx->ir->compiler->has_scalar_predicates);
validate_assert(ctx, reg->flags & IR3_REG_PREDICATE);
}
}
static void
@@ -190,6 +196,9 @@ validate_dst(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr,
if (reg->flags & IR3_REG_RELATIV)
validate_assert(ctx, instr->address);
if (reg->flags & IR3_REG_UNIFORM)
validate_assert(ctx, opc_cat(instr->opc) == 2);
validate_reg(ctx, reg);
}