diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 624f726fcfd..7e5c1adb920 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -374,6 +374,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler,
       progress |= OPT(s, nir_lower_pack);
       progress |= OPT(s, nir_lower_bit_size, ir3_lower_bit_size, NULL);
       progress |= OPT(s, nir_opt_constant_folding);
+      progress |= OPT(s, nir_opt_uub, &options->opt_uub_options);
 
       /* Remove unused components from IO loads. */
       progress |= OPT(s, nir_opt_shrink_vectors, true);
@@ -1249,7 +1250,18 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so,
       ir3_setup_const_state(s, so, ir3_const_state_mut(so));
    }
 
-   struct ir3_optimize_options optimize_options = {};
+   /* At this point nir_opt_load_store_vectorize has run so it's safe to
+    * optimize imul to umul_16x16. Also call nir_opt_uub manually once to give
+    * it a chance to optimize imul, even if the previous passes didn't make any
+    * progress.
+    */
+   struct ir3_optimize_options optimize_options = {
+      .opt_uub_options = {
+         .opt_imul = true,
+      },
+   };
+
+   progress |= OPT(s, nir_opt_uub, &optimize_options.opt_uub_options);
 
    /* Cleanup code leftover from lowering passes before opt_preamble */
    if (progress) {
diff --git a/src/freedreno/ir3/ir3_nir.h b/src/freedreno/ir3/ir3_nir.h
index e6d5d9690b6..b1b8761b083 100644
--- a/src/freedreno/ir3/ir3_nir.h
+++ b/src/freedreno/ir3/ir3_nir.h
@@ -64,7 +64,9 @@ nir_mem_access_size_align ir3_mem_access_size_align(
 bool ir3_nir_opt_branch_and_or_not(nir_shader *nir);
 bool ir3_nir_opt_triops_bitwise(nir_shader *nir);
 
-struct ir3_optimize_options {};
+struct ir3_optimize_options {
+   nir_opt_uub_options opt_uub_options;
+};
 bool ir3_optimize_loop(struct ir3_compiler *compiler,
                        struct ir3_optimize_options *options, nir_shader *s);
 void ir3_nir_lower_io_vars_to_temporaries(nir_shader *s);