From e3e7dad82db882859fc5044d92f9fad6bdba86b4 Mon Sep 17 00:00:00 2001
From: Faith Ekstrand <faith.ekstrand@collabora.com>
Date: Tue, 6 May 2025 17:11:44 -0400
Subject: [PATCH] nak: Stop relying on nir_lower_pack

We represent vectors as packed anyway so all these ops are just data
motion that we already know how to do.  Calling into NIR for these
doesn't really help.  It also avoids potential optimization loops in NIR
where pack op lowering conflicts with itself.  It's simpler just to
handle it all in the back-end and trust our prmt optimization and copy
propagation to clean it all up.

Reviewed-by: Mel Henning <drawoc@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34849>
---
 src/nouveau/compiler/nak/api.rs      |  1 -
 src/nouveau/compiler/nak/from_nir.rs | 15 +++++++++++++--
 src/nouveau/compiler/nak_nir.c       |  1 -
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs
index b3e2a993224..fe8ea3fb2ad 100644
--- a/src/nouveau/compiler/nak/api.rs
+++ b/src/nouveau/compiler/nak/api.rs
@@ -120,7 +120,6 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
     op.lower_fsqrt = dev.sm < 52;
     op.lower_bitfield_extract = dev.sm >= 70;
     op.lower_bitfield_insert = true;
-    op.lower_pack_64_4x16 = true;
     op.lower_pack_half_2x16 = true;
     op.lower_pack_unorm_2x16 = true;
     op.lower_pack_snorm_2x16 = true;
diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs
index b44dc93bb87..6609f340c51 100644
--- a/src/nouveau/compiler/nak/from_nir.rs
+++ b/src/nouveau/compiler/nak/from_nir.rs
@@ -514,9 +514,13 @@ impl<'a> ShaderFromNir<'a> {
         // scattered across multiple dwords
         match alu.op {
             nir_op_mov
+            | nir_op_pack_32_4x8
             | nir_op_pack_32_4x8_split
+            | nir_op_pack_32_2x16
             | nir_op_pack_32_2x16_split
+            | nir_op_pack_64_2x32
             | nir_op_pack_64_2x32_split
+            | nir_op_pack_64_4x16
             | nir_op_vec2
             | nir_op_vec3
             | nir_op_vec4
@@ -531,9 +535,9 @@ impl<'a> ShaderFromNir<'a> {
                 // value in the vec.  This implicitly makes 64-bit sources look
                 // like two 32-bit values
                 let mut srcs = Vec::new();
-                if alu.op == nir_op_mov {
+                if alu.info().num_inputs == 1 {
                     let src = alu.get_src(0);
-                    for c in 0..alu.def.num_components {
+                    for c in 0..alu.src_components(0) {
                         let s = src.swizzle[usize::from(c)];
                         let (src, byte) =
                             self.get_ssa_comp(src.src.as_def(), s);
@@ -1694,12 +1698,19 @@ impl<'a> ShaderFromNir<'a> {
                     b.sel(ovf_lo.into(), sum_lo.into(), 0.into()).into()
                 }
             }
+            nir_op_unpack_32_2x16 | nir_op_unpack_32_4x8 => {
+                b.copy(srcs(0)).into()
+            }
             nir_op_unpack_32_2x16_split_x => {
                 b.prmt(srcs(0), 0.into(), [0, 1, 4, 4]).into()
             }
             nir_op_unpack_32_2x16_split_y => {
                 b.prmt(srcs(0), 0.into(), [2, 3, 4, 4]).into()
             }
+            nir_op_unpack_64_2x32 | nir_op_unpack_64_4x16 => {
+                let src0 = srcs(0).to_ssa();
+                [b.copy(src0[0].into()), b.copy(src0[1].into())].into()
+            }
             nir_op_unpack_64_2x32_split_x => {
                 let src0_x = srcs(0).as_ssa().unwrap()[0];
                 b.copy(src0_x.into()).into()
diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c
index 88aed928b54..4e583e8dab9 100644
--- a/src/nouveau/compiler/nak_nir.c
+++ b/src/nouveau/compiler/nak_nir.c
@@ -171,7 +171,6 @@ optimize_nir(nir_shader *nir, const struct nak_compiler *nak, bool allow_copies)
       OPT(nir, nir_opt_remove_phis);
       OPT(nir, nir_opt_gcm, false);
       OPT(nir, nir_opt_undef);
-      OPT(nir, nir_lower_pack);
    } while (progress);
 
    OPT(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);