diff --git a/src/freedreno/ir3/ir3_lower_subgroups.c b/src/freedreno/ir3/ir3_lower_subgroups.c index d33e665ac87..fea9eb3be18 100644 --- a/src/freedreno/ir3/ir3_lower_subgroups.c +++ b/src/freedreno/ir3/ir3_lower_subgroups.c @@ -591,6 +591,36 @@ ir3_nir_opt_subgroups(nir_shader *nir, struct ir3_shader_variant *v) lower_scan_reduce, NULL); } +bool +ir3_nir_lower_subgroups_filter(const nir_instr *instr, const void *data) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_reduce: + case nir_intrinsic_inclusive_scan: + case nir_intrinsic_exclusive_scan: + switch (nir_intrinsic_reduction_op(intrin)) { + case nir_op_imul: + case nir_op_imin: + case nir_op_imax: + case nir_op_umin: + case nir_op_umax: + if (intrin->def.bit_size == 64) { + return true; + } + FALLTHROUGH; + default: + return intrin->def.num_components > 1; + } + default: + return true; + } +} + static bool filter_64b_scan_reduce(const nir_instr *instr, const void *data) { diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 0c2b51d5b77..4afc86bf69d 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -852,6 +852,8 @@ ir3_nir_post_finalize(struct ir3_shader *shader) .lower_relative_shuffle = !compiler->has_shfl, .lower_rotate_to_shuffle = !compiler->has_shfl, .lower_inverse_ballot = true, + .lower_reduce = true, + .filter = ir3_nir_lower_subgroups_filter, }; if (!((s->info.stage == MESA_SHADER_COMPUTE) || @@ -863,15 +865,6 @@ ir3_nir_post_finalize(struct ir3_shader *shader) OPT(s, nir_lower_subgroups, &options); OPT(s, ir3_nir_lower_shuffle, shader); - - /* We want to run the 64b lowering after nir_lower_subgroups so that the - * operations have been scalarized. However, the 64b lowering will insert - * some intrinsics (e.g., nir_ballot_find_msb) that need to be lowered - * again. - */ - if (OPT(s, ir3_nir_lower_64b_subgroups)) { - OPT(s, nir_lower_subgroups, &options); - } } if ((s->info.stage == MESA_SHADER_COMPUTE) || diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h index e9f9b617530..4bf947399bf 100644 --- a/src/freedreno/ir3/ir3_nir.h +++ b/src/freedreno/ir3/ir3_nir.h @@ -91,6 +91,7 @@ nir_def *ir3_nir_try_propagate_bit_shift(nir_builder *b, nir_def *offset, int32_t shift); +bool ir3_nir_lower_subgroups_filter(const nir_instr *instr, const void *data); bool ir3_nir_lower_64b_subgroups(nir_shader *nir); bool ir3_nir_lower_shuffle(nir_shader *nir, struct ir3_shader *shader); bool ir3_nir_opt_subgroups(nir_shader *nir, struct ir3_shader_variant *v);