ir3: use generic lowering for 64b scan/reduce

Now that we use shfl for lowering shuffle operations, the generic
lowering of scan/reduce to shuffles results in faster code than our
custom loop for 64b operations.

Note that this was measured using a micro benchmark on full subgroups.
The generic lowering might be slower when not all invocations are active
but this should be a rare case.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31731>
This commit is contained in:
Job Noorman
2024-11-29 17:09:15 +01:00
committed by Marge Bot
parent 60e1615ced
commit 5e9cf354ca
3 changed files with 33 additions and 9 deletions
+30
View File
@@ -591,6 +591,36 @@ ir3_nir_opt_subgroups(nir_shader *nir, struct ir3_shader_variant *v)
lower_scan_reduce, NULL);
}
bool
ir3_nir_lower_subgroups_filter(const nir_instr *instr, const void *data)
{
if (instr->type != nir_instr_type_intrinsic)
return false;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
switch (intrin->intrinsic) {
case nir_intrinsic_reduce:
case nir_intrinsic_inclusive_scan:
case nir_intrinsic_exclusive_scan:
switch (nir_intrinsic_reduction_op(intrin)) {
case nir_op_imul:
case nir_op_imin:
case nir_op_imax:
case nir_op_umin:
case nir_op_umax:
if (intrin->def.bit_size == 64) {
return true;
}
FALLTHROUGH;
default:
return intrin->def.num_components > 1;
}
default:
return true;
}
}
static bool
filter_64b_scan_reduce(const nir_instr *instr, const void *data)
{
+2 -9
View File
@@ -852,6 +852,8 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
.lower_relative_shuffle = !compiler->has_shfl,
.lower_rotate_to_shuffle = !compiler->has_shfl,
.lower_inverse_ballot = true,
.lower_reduce = true,
.filter = ir3_nir_lower_subgroups_filter,
};
if (!((s->info.stage == MESA_SHADER_COMPUTE) ||
@@ -863,15 +865,6 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
OPT(s, nir_lower_subgroups, &options);
OPT(s, ir3_nir_lower_shuffle, shader);
/* We want to run the 64b lowering after nir_lower_subgroups so that the
* operations have been scalarized. However, the 64b lowering will insert
* some intrinsics (e.g., nir_ballot_find_msb) that need to be lowered
* again.
*/
if (OPT(s, ir3_nir_lower_64b_subgroups)) {
OPT(s, nir_lower_subgroups, &options);
}
}
if ((s->info.stage == MESA_SHADER_COMPUTE) ||
+1
View File
@@ -91,6 +91,7 @@ nir_def *ir3_nir_try_propagate_bit_shift(nir_builder *b,
nir_def *offset,
int32_t shift);
bool ir3_nir_lower_subgroups_filter(const nir_instr *instr, const void *data);
bool ir3_nir_lower_64b_subgroups(nir_shader *nir);
bool ir3_nir_lower_shuffle(nir_shader *nir, struct ir3_shader *shader);
bool ir3_nir_opt_subgroups(nir_shader *nir, struct ir3_shader_variant *v);