ir3: enable nir_opt_uub

Enable nir_opt_uub in ir3_optimize_loop. To make sure we don't interfere with nir_opt_load_store_vectorize, nir_opt_uub's lowering of imul to umul_16x16 is only enabled after vectorizing. Totals from 140908 (9.33% of 1510605) affected shaders: MaxWaves: 1687210 -> 1713516 (+1.56%); split: +1.59%, -0.03% Instrs: 118073810 -> 116316350 (-1.49%); split: -1.57%, +0.09% CodeSize: 252147038 -> 247992436 (-1.65%); split: -1.77%, +0.13% NOPs: 22177569 -> 22101617 (-0.34%); split: -1.29%, +0.95% MOVs: 5361215 -> 5246163 (-2.15%); split: -2.86%, +0.72% COVs: 1728869 -> 1693953 (-2.02%); split: -2.26%, +0.24% Full: 2083701 -> 2058689 (-1.20%); split: -1.24%, +0.04% (ss): 3013912 -> 2993026 (-0.69%); split: -1.54%, +0.85% (sy): 1746154 -> 1711155 (-2.00%); split: -2.45%, +0.45% (ss)-stall: 10509576 -> 10514455 (+0.05%); split: -0.79%, +0.83% (sy)-stall: 47895875 -> 47061446 (-1.74%); split: -2.53%, +0.79% STPs: 213699 -> 213523 (-0.08%); split: -0.12%, +0.03% LDPs: 77629 -> 77469 (-0.21%); split: -0.32%, +0.11% Preamble Instrs: 33860856 -> 33320325 (-1.60%); split: -2.03%, +0.43% Early Preamble: 62136 -> 62115 (-0.03%); split: +0.02%, -0.05% Constlen: 8306896 -> 8295976 (-0.13%); split: -0.17%, +0.04% Last helper: 48512847 -> 48446850 (-0.14%); split: -0.34%, +0.20% Last baryf: 1457776 -> 1454490 (-0.23%); split: -0.51%, +0.29% Subgroup size: 12116544 -> 12118400 (+0.02%); split: +0.02%, -0.00% Cat0: 24687449 -> 24577585 (-0.45%); split: -1.27%, +0.82% Cat1: 7154983 -> 7004889 (-2.10%); split: -2.65%, +0.55% Cat2: 47291859 -> 46934527 (-0.76%); split: -0.80%, +0.05% Cat3: 27659651 -> 26640290 (-3.69%); split: -3.69%, +0.00% Cat5: 3278715 -> 3278703 (-0.00%); split: -0.00%, +0.00% Cat6: 1672689 -> 1551384 (-7.25%); split: -7.25%, +0.00% Cat7: 3047494 -> 3048002 (+0.02%); split: -0.44%, +0.45% Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37869>
2025-10-28 07:38:37 +01:00
parent c17ec6eeb0
commit fda0490784
2 changed files with 16 additions and 2 deletions
@@ -374,6 +374,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler,
      progress |= OPT(s, nir_lower_pack);
      progress |= OPT(s, nir_lower_bit_size, ir3_lower_bit_size, NULL);
      progress |= OPT(s, nir_opt_constant_folding);
+      progress |= OPT(s, nir_opt_uub, &options->opt_uub_options);

      /* Remove unused components from IO loads. */
      progress |= OPT(s, nir_opt_shrink_vectors, true);
@@ -1249,7 +1250,18 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so,
      ir3_setup_const_state(s, so, ir3_const_state_mut(so));
   }

-   struct ir3_optimize_options optimize_options = {};
+   /* At this point nir_opt_load_store_vectorize has run so it's safe to
+    * optimize imul to umul_16x16. Also call nir_opt_uub manually once to give
+    * it a chance to optimize imul, even if the previous passes didn't make any
+    * progress.
+    */
+   struct ir3_optimize_options optimize_options = {
+      .opt_uub_options = {
+         .opt_imul = true,
+      },
+   };
+
+   progress |= OPT(s, nir_opt_uub, &optimize_options.opt_uub_options);

   /* Cleanup code leftover from lowering passes before opt_preamble */
   if (progress) {
@@ -64,7 +64,9 @@ nir_mem_access_size_align ir3_mem_access_size_align(
 bool ir3_nir_opt_branch_and_or_not(nir_shader *nir);
 bool ir3_nir_opt_triops_bitwise(nir_shader *nir);

-struct ir3_optimize_options {};
+struct ir3_optimize_options {
+   nir_opt_uub_options opt_uub_options;
+};
 bool ir3_optimize_loop(struct ir3_compiler *compiler,
                       struct ir3_optimize_options *options, nir_shader *s);
 void ir3_nir_lower_io_vars_to_temporaries(nir_shader *s);