From e3e7dad82db882859fc5044d92f9fad6bdba86b4 Mon Sep 17 00:00:00 2001 From: Faith Ekstrand Date: Tue, 6 May 2025 17:11:44 -0400 Subject: [PATCH] nak: Stop relying on nir_lower_pack We represent vectors as packed anyway so all these ops are just data motion that we already know how to do. Calling into NIR for these doesn't really help. It also avoids potential optimization loops in NIR where pack op lowering conflicts with itself. It's simpler just to handle it all in the back-end and trust our prmt optimization and copy propagation to clean it all up. Reviewed-by: Mel Henning Part-of: --- src/nouveau/compiler/nak/api.rs | 1 - src/nouveau/compiler/nak/from_nir.rs | 15 +++++++++++++-- src/nouveau/compiler/nak_nir.c | 1 - 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/nouveau/compiler/nak/api.rs b/src/nouveau/compiler/nak/api.rs index b3e2a993224..fe8ea3fb2ad 100644 --- a/src/nouveau/compiler/nak/api.rs +++ b/src/nouveau/compiler/nak/api.rs @@ -120,7 +120,6 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options { op.lower_fsqrt = dev.sm < 52; op.lower_bitfield_extract = dev.sm >= 70; op.lower_bitfield_insert = true; - op.lower_pack_64_4x16 = true; op.lower_pack_half_2x16 = true; op.lower_pack_unorm_2x16 = true; op.lower_pack_snorm_2x16 = true; diff --git a/src/nouveau/compiler/nak/from_nir.rs b/src/nouveau/compiler/nak/from_nir.rs index b44dc93bb87..6609f340c51 100644 --- a/src/nouveau/compiler/nak/from_nir.rs +++ b/src/nouveau/compiler/nak/from_nir.rs @@ -514,9 +514,13 @@ impl<'a> ShaderFromNir<'a> { // scattered across multiple dwords match alu.op { nir_op_mov + | nir_op_pack_32_4x8 | nir_op_pack_32_4x8_split + | nir_op_pack_32_2x16 | nir_op_pack_32_2x16_split + | nir_op_pack_64_2x32 | nir_op_pack_64_2x32_split + | nir_op_pack_64_4x16 | nir_op_vec2 | nir_op_vec3 | nir_op_vec4 @@ -531,9 +535,9 @@ impl<'a> ShaderFromNir<'a> { // value in the vec. This implicitly makes 64-bit sources look // like two 32-bit values let mut srcs = Vec::new(); - if alu.op == nir_op_mov { + if alu.info().num_inputs == 1 { let src = alu.get_src(0); - for c in 0..alu.def.num_components { + for c in 0..alu.src_components(0) { let s = src.swizzle[usize::from(c)]; let (src, byte) = self.get_ssa_comp(src.src.as_def(), s); @@ -1694,12 +1698,19 @@ impl<'a> ShaderFromNir<'a> { b.sel(ovf_lo.into(), sum_lo.into(), 0.into()).into() } } + nir_op_unpack_32_2x16 | nir_op_unpack_32_4x8 => { + b.copy(srcs(0)).into() + } nir_op_unpack_32_2x16_split_x => { b.prmt(srcs(0), 0.into(), [0, 1, 4, 4]).into() } nir_op_unpack_32_2x16_split_y => { b.prmt(srcs(0), 0.into(), [2, 3, 4, 4]).into() } + nir_op_unpack_64_2x32 | nir_op_unpack_64_4x16 => { + let src0 = srcs(0).to_ssa(); + [b.copy(src0[0].into()), b.copy(src0[1].into())].into() + } nir_op_unpack_64_2x32_split_x => { let src0_x = srcs(0).as_ssa().unwrap()[0]; b.copy(src0_x.into()).into() diff --git a/src/nouveau/compiler/nak_nir.c b/src/nouveau/compiler/nak_nir.c index 88aed928b54..4e583e8dab9 100644 --- a/src/nouveau/compiler/nak_nir.c +++ b/src/nouveau/compiler/nak_nir.c @@ -171,7 +171,6 @@ optimize_nir(nir_shader *nir, const struct nak_compiler *nak, bool allow_copies) OPT(nir, nir_opt_remove_phis); OPT(nir, nir_opt_gcm, false); OPT(nir, nir_opt_undef); - OPT(nir, nir_lower_pack); } while (progress); OPT(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);