nak: Stop relying on nir_lower_pack

We represent vectors as packed anyway so all these ops are just data motion that we already know how to do. Calling into NIR for these doesn't really help. It also avoids potential optimization loops in NIR where pack op lowering conflicts with itself. It's simpler just to handle it all in the back-end and trust our prmt optimization and copy propagation to clean it all up. Reviewed-by: Mel Henning <drawoc@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34849>
2025-05-06 17:11:44 -04:00
parent efd1cddbe9
commit e3e7dad82d
3 changed files with 13 additions and 4 deletions
@@ -120,7 +120,6 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
    op.lower_fsqrt = dev.sm < 52;
    op.lower_bitfield_extract = dev.sm >= 70;
    op.lower_bitfield_insert = true;
-    op.lower_pack_64_4x16 = true;
    op.lower_pack_half_2x16 = true;
    op.lower_pack_unorm_2x16 = true;
    op.lower_pack_snorm_2x16 = true;
@@ -514,9 +514,13 @@ impl<'a> ShaderFromNir<'a> {
        // scattered across multiple dwords
        match alu.op {
            nir_op_mov
+            | nir_op_pack_32_4x8
            | nir_op_pack_32_4x8_split
+            | nir_op_pack_32_2x16
            | nir_op_pack_32_2x16_split
+            | nir_op_pack_64_2x32
            | nir_op_pack_64_2x32_split
+            | nir_op_pack_64_4x16
            | nir_op_vec2
            | nir_op_vec3
            | nir_op_vec4
@@ -531,9 +535,9 @@ impl<'a> ShaderFromNir<'a> {
                // value in the vec.  This implicitly makes 64-bit sources look
                // like two 32-bit values
                let mut srcs = Vec::new();
-                if alu.op == nir_op_mov {
+                if alu.info().num_inputs == 1 {
                    let src = alu.get_src(0);
-                    for c in 0..alu.def.num_components {
+                    for c in 0..alu.src_components(0) {
                        let s = src.swizzle[usize::from(c)];
                        let (src, byte) =
                            self.get_ssa_comp(src.src.as_def(), s);
@@ -1694,12 +1698,19 @@ impl<'a> ShaderFromNir<'a> {
                    b.sel(ovf_lo.into(), sum_lo.into(), 0.into()).into()
                }
            }
+            nir_op_unpack_32_2x16 | nir_op_unpack_32_4x8 => {
+                b.copy(srcs(0)).into()
+            }
            nir_op_unpack_32_2x16_split_x => {
                b.prmt(srcs(0), 0.into(), [0, 1, 4, 4]).into()
            }
            nir_op_unpack_32_2x16_split_y => {
                b.prmt(srcs(0), 0.into(), [2, 3, 4, 4]).into()
            }
+            nir_op_unpack_64_2x32 | nir_op_unpack_64_4x16 => {
+                let src0 = srcs(0).to_ssa();
+                [b.copy(src0[0].into()), b.copy(src0[1].into())].into()
+            }
            nir_op_unpack_64_2x32_split_x => {
                let src0_x = srcs(0).as_ssa().unwrap()[0];
                b.copy(src0_x.into()).into()
@@ -171,7 +171,6 @@ optimize_nir(nir_shader *nir, const struct nak_compiler *nak, bool allow_copies)
      OPT(nir, nir_opt_remove_phis);
      OPT(nir, nir_opt_gcm, false);
      OPT(nir, nir_opt_undef);
-      OPT(nir, nir_lower_pack);
   } while (progress);

   OPT(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);