ir3: use generic lowering for 64b scan/reduce
Now that we use shfl for lowering shuffle operations, the generic lowering of scan/reduce to shuffles results in faster code than our custom loop for 64b operations. Note that this was measured using a micro benchmark on full subgroups. The generic lowering might be slower when not all invocations are active but this should be a rare case. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31731>
This commit is contained in:
@@ -591,6 +591,36 @@ ir3_nir_opt_subgroups(nir_shader *nir, struct ir3_shader_variant *v)
|
||||
lower_scan_reduce, NULL);
|
||||
}
|
||||
|
||||
bool
|
||||
ir3_nir_lower_subgroups_filter(const nir_instr *instr, const void *data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_reduce:
|
||||
case nir_intrinsic_inclusive_scan:
|
||||
case nir_intrinsic_exclusive_scan:
|
||||
switch (nir_intrinsic_reduction_op(intrin)) {
|
||||
case nir_op_imul:
|
||||
case nir_op_imin:
|
||||
case nir_op_imax:
|
||||
case nir_op_umin:
|
||||
case nir_op_umax:
|
||||
if (intrin->def.bit_size == 64) {
|
||||
return true;
|
||||
}
|
||||
FALLTHROUGH;
|
||||
default:
|
||||
return intrin->def.num_components > 1;
|
||||
}
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
filter_64b_scan_reduce(const nir_instr *instr, const void *data)
|
||||
{
|
||||
|
||||
@@ -852,6 +852,8 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
|
||||
.lower_relative_shuffle = !compiler->has_shfl,
|
||||
.lower_rotate_to_shuffle = !compiler->has_shfl,
|
||||
.lower_inverse_ballot = true,
|
||||
.lower_reduce = true,
|
||||
.filter = ir3_nir_lower_subgroups_filter,
|
||||
};
|
||||
|
||||
if (!((s->info.stage == MESA_SHADER_COMPUTE) ||
|
||||
@@ -863,15 +865,6 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
|
||||
|
||||
OPT(s, nir_lower_subgroups, &options);
|
||||
OPT(s, ir3_nir_lower_shuffle, shader);
|
||||
|
||||
/* We want to run the 64b lowering after nir_lower_subgroups so that the
|
||||
* operations have been scalarized. However, the 64b lowering will insert
|
||||
* some intrinsics (e.g., nir_ballot_find_msb) that need to be lowered
|
||||
* again.
|
||||
*/
|
||||
if (OPT(s, ir3_nir_lower_64b_subgroups)) {
|
||||
OPT(s, nir_lower_subgroups, &options);
|
||||
}
|
||||
}
|
||||
|
||||
if ((s->info.stage == MESA_SHADER_COMPUTE) ||
|
||||
|
||||
@@ -91,6 +91,7 @@ nir_def *ir3_nir_try_propagate_bit_shift(nir_builder *b,
|
||||
nir_def *offset,
|
||||
int32_t shift);
|
||||
|
||||
bool ir3_nir_lower_subgroups_filter(const nir_instr *instr, const void *data);
|
||||
bool ir3_nir_lower_64b_subgroups(nir_shader *nir);
|
||||
bool ir3_nir_lower_shuffle(nir_shader *nir, struct ir3_shader *shader);
|
||||
bool ir3_nir_opt_subgroups(nir_shader *nir, struct ir3_shader_variant *v);
|
||||
|
||||
Reference in New Issue
Block a user