ir3: make backend aware of scalar predicates
Predicate registers can be written from the scalar ALU by using a special cat2 encoding: if the dst is encoded as a0.c, the instruction will execute on the scalar ALU and write to p0.c. This commit makes the ir3 backend aware of scalar predicates. A new register flag (IR3_REG_UNIFORM) is added that can be used to mark predicate dsts as being written by the scalar ALU. For such dsts, the same synchronization rules apply as for shared registers written by the scalar ALU (e.g., (ss) is needed to read them from the vector ALU). Scalar predicates can be used in the early preamble, which makes control flow available there. In many ways, the backend treats IR3_REG_UNIFORM the same as IR3_REG_SHARED. A new flag was added because IR3_REG_SHARED is mainly used to denote a separate register file, not as a flag to indicate usage by the scalar ALU. Scalar predicates still use the normal predicate register file but allow it to be written from the scalar ALU. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36614>
This commit is contained in:
@@ -187,6 +187,8 @@ struct fd_dev_info {
|
||||
|
||||
/* See ir3_compiler::has_scalar_alu. */
|
||||
bool has_scalar_alu;
|
||||
/* See ir3_compiler::has_scalar_predicates. */
|
||||
bool has_scalar_predicates;
|
||||
/* See ir3_compiler::has_early_preamble. */
|
||||
bool has_early_preamble;
|
||||
|
||||
|
||||
@@ -911,6 +911,7 @@ a7xx_base = A6XXProps(
|
||||
line_width_min = 1.0,
|
||||
line_width_max = 127.5,
|
||||
has_scalar_alu = True,
|
||||
has_scalar_predicates = True,
|
||||
has_coherent_ubwc_flag_caches = True,
|
||||
has_isam_v = True,
|
||||
has_ssbo_imm_offsets = True,
|
||||
|
||||
@@ -1367,7 +1367,8 @@ is_scalar_alu(struct ir3_instruction *instr,
|
||||
instr->opc != OPC_SCAN_CLUSTERS_MACRO &&
|
||||
instr->opc != OPC_SCAN_MACRO &&
|
||||
instr->opc != OPC_MOVS &&
|
||||
is_alu(instr) && (instr->dsts[0]->flags & IR3_REG_SHARED) &&
|
||||
is_alu(instr) &&
|
||||
(instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) &&
|
||||
/* scalar->scalar mov instructions (but NOT cov) were supported before the
|
||||
* scalar ALU was supported, but they still required (ss) whereas on GPUs
|
||||
* that have a scalar ALU they are executed on it and do not require (ss).
|
||||
@@ -1748,7 +1749,7 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
|
||||
return true;
|
||||
|
||||
/* cat2/cat3 scalar ALU instructions must not have regular sources. */
|
||||
if (instr->dsts[0]->flags & IR3_REG_SHARED) {
|
||||
if (instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM)) {
|
||||
if (!(flags & (IR3_REG_SHARED | IR3_REG_IMMED | IR3_REG_CONST)))
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -191,6 +191,12 @@ typedef enum ir3_register_flags {
|
||||
* dummy src and dst of prefetch sam/ldc/resinfo.
|
||||
*/
|
||||
IR3_REG_DUMMY = BIT(23),
|
||||
|
||||
/* Used to mark predicate registers as uniform. Uniform predicate registers
|
||||
* can be written by the scalar ALU but can only be read as a vector, needing
|
||||
* (ss) to synchronize like any scalar ALU result.
|
||||
*/
|
||||
IR3_REG_UNIFORM = BIT(24),
|
||||
} ir3_register_flags;
|
||||
|
||||
struct ir3_register {
|
||||
@@ -2189,7 +2195,7 @@ static inline bool
|
||||
is_ss_producer(struct ir3_instruction *instr)
|
||||
{
|
||||
foreach_dst (dst, instr) {
|
||||
if (dst->flags & IR3_REG_SHARED)
|
||||
if (dst->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM))
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -246,6 +246,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
||||
compiler->predtf_nop_quirk = dev_info->a6xx.predtf_nop_quirk;
|
||||
compiler->prede_nop_quirk = dev_info->a6xx.prede_nop_quirk;
|
||||
compiler->has_scalar_alu = dev_info->a6xx.has_scalar_alu;
|
||||
compiler->has_scalar_predicates = dev_info->a6xx.has_scalar_predicates;
|
||||
compiler->has_isam_v = dev_info->a6xx.has_isam_v;
|
||||
compiler->has_ssbo_imm_offsets = dev_info->a6xx.has_ssbo_imm_offsets;
|
||||
compiler->fs_must_have_non_zero_constlen_quirk = dev_info->a7xx.fs_must_have_non_zero_constlen_quirk;
|
||||
|
||||
@@ -284,6 +284,11 @@ struct ir3_compiler {
|
||||
*/
|
||||
bool has_scalar_alu;
|
||||
|
||||
/* True if cat2 instructions can write predicate registers from the scalar
|
||||
* ALU.
|
||||
*/
|
||||
bool has_scalar_predicates;
|
||||
|
||||
bool fs_must_have_non_zero_constlen_quirk;
|
||||
|
||||
/* On all generations that support scalar ALU, there is also a copy of the
|
||||
|
||||
@@ -235,6 +235,8 @@ sync_update(struct ir3_legalize_state *state, struct ir3_compiler *compiler,
|
||||
}
|
||||
} else if (reg_is_addr1(dst) && n->block->in_early_preamble) {
|
||||
regmask_set(&state->needs_ss, dst);
|
||||
} else if (dst->flags & IR3_REG_UNIFORM) {
|
||||
regmask_set(&state->needs_ss, dst);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -553,7 +555,7 @@ delay_update(struct ir3_compiler *compiler,
|
||||
if (dst->flags & IR3_REG_RELATIV)
|
||||
dst_cycle += instr->repeat;
|
||||
|
||||
if (dst->flags & IR3_REG_SHARED)
|
||||
if (dst->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM))
|
||||
continue;
|
||||
|
||||
for (unsigned elem = 0; elem < elems; elem++, num++) {
|
||||
@@ -2198,7 +2200,8 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
|
||||
}
|
||||
}
|
||||
|
||||
if (in_preamble && writes_pred(instr)) {
|
||||
if (in_preamble && writes_pred(instr) &&
|
||||
!(instr->dsts[0]->flags & IR3_REG_UNIFORM)) {
|
||||
pred_in_preamble = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -355,6 +355,8 @@ print_reg_name(struct log_stream *stream, struct ir3_instruction *instr,
|
||||
mesa_log_stream_printf(stream, "!");
|
||||
}
|
||||
|
||||
if (reg->flags & IR3_REG_UNIFORM)
|
||||
mesa_log_stream_printf(stream, "u");
|
||||
if (reg->flags & IR3_REG_SHARED)
|
||||
mesa_log_stream_printf(stream, "s");
|
||||
if (reg->flags & IR3_REG_HALF)
|
||||
|
||||
@@ -619,7 +619,7 @@ try_demote_instruction(struct ra_ctx *ctx, struct ir3_instruction *instr)
|
||||
* skipped reloading and just demoted sources directly, so we should never
|
||||
* get here.
|
||||
*/
|
||||
assert(instr->dsts[0]->flags & IR3_REG_SHARED);
|
||||
assert(instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM));
|
||||
|
||||
/* Now we actually demote the instruction */
|
||||
ra_foreach_src (src, instr) {
|
||||
@@ -637,6 +637,13 @@ try_demote_instruction(struct ra_ctx *ctx, struct ir3_instruction *instr)
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->dsts[0]->flags & IR3_REG_UNIFORM) {
|
||||
instr->dsts[0]->flags &= ~IR3_REG_UNIFORM;
|
||||
|
||||
/* Uniform registers are always predicates which we don't handle here. */
|
||||
return true;
|
||||
}
|
||||
|
||||
struct ra_interval *dst_interval = ra_interval_get(ctx, instr->dsts[0]);
|
||||
instr->dsts[0]->flags &= ~IR3_REG_SHARED;
|
||||
ra_interval_init(dst_interval, instr->dsts[0]);
|
||||
@@ -812,7 +819,7 @@ can_demote_src(struct ir3_instruction *instr)
|
||||
full_type(instr->cat1.dst_type) == TYPE_S32)));
|
||||
default:
|
||||
return (!is_alu(instr) && !is_sfu(instr)) ||
|
||||
!(instr->dsts[0]->flags & IR3_REG_SHARED);
|
||||
!(instr->dsts[0]->flags & (IR3_REG_SHARED | IR3_REG_UNIFORM));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -60,6 +60,12 @@ validate_reg(struct ir3_validate_ctx *ctx, struct ir3_register *reg)
|
||||
validate_assert(ctx, reg->num >= SHARED_REG_START);
|
||||
validate_assert(ctx, reg->num - SHARED_REG_START < SHARED_REG_SIZE);
|
||||
}
|
||||
|
||||
if (reg->flags & IR3_REG_UNIFORM) {
|
||||
validate_assert(ctx, ctx->ir->compiler->has_scalar_predicates);
|
||||
validate_assert(ctx, reg->flags & IR3_REG_PREDICATE);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -190,6 +196,9 @@ validate_dst(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr,
|
||||
if (reg->flags & IR3_REG_RELATIV)
|
||||
validate_assert(ctx, instr->address);
|
||||
|
||||
if (reg->flags & IR3_REG_UNIFORM)
|
||||
validate_assert(ctx, opc_cat(instr->opc) == 2);
|
||||
|
||||
validate_reg(ctx, reg);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user