nak: Stop relying on nir_lower_pack

We represent vectors as packed anyway so all these ops are just data
motion that we already know how to do.  Calling into NIR for these
doesn't really help.  It also avoids potential optimization loops in NIR
where pack op lowering conflicts with itself.  It's simpler just to
handle it all in the back-end and trust our prmt optimization and copy
propagation to clean it all up.

Reviewed-by: Mel Henning <drawoc@darkrefraction.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34849>
This commit is contained in:
Faith Ekstrand
2025-05-06 17:11:44 -04:00
committed by Marge Bot
parent efd1cddbe9
commit e3e7dad82d
3 changed files with 13 additions and 4 deletions

View File

@@ -120,7 +120,6 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
op.lower_fsqrt = dev.sm < 52;
op.lower_bitfield_extract = dev.sm >= 70;
op.lower_bitfield_insert = true;
op.lower_pack_64_4x16 = true;
op.lower_pack_half_2x16 = true;
op.lower_pack_unorm_2x16 = true;
op.lower_pack_snorm_2x16 = true;

View File

@@ -514,9 +514,13 @@ impl<'a> ShaderFromNir<'a> {
// scattered across multiple dwords
match alu.op {
nir_op_mov
| nir_op_pack_32_4x8
| nir_op_pack_32_4x8_split
| nir_op_pack_32_2x16
| nir_op_pack_32_2x16_split
| nir_op_pack_64_2x32
| nir_op_pack_64_2x32_split
| nir_op_pack_64_4x16
| nir_op_vec2
| nir_op_vec3
| nir_op_vec4
@@ -531,9 +535,9 @@ impl<'a> ShaderFromNir<'a> {
// value in the vec. This implicitly makes 64-bit sources look
// like two 32-bit values
let mut srcs = Vec::new();
if alu.op == nir_op_mov {
if alu.info().num_inputs == 1 {
let src = alu.get_src(0);
for c in 0..alu.def.num_components {
for c in 0..alu.src_components(0) {
let s = src.swizzle[usize::from(c)];
let (src, byte) =
self.get_ssa_comp(src.src.as_def(), s);
@@ -1694,12 +1698,19 @@ impl<'a> ShaderFromNir<'a> {
b.sel(ovf_lo.into(), sum_lo.into(), 0.into()).into()
}
}
nir_op_unpack_32_2x16 | nir_op_unpack_32_4x8 => {
b.copy(srcs(0)).into()
}
nir_op_unpack_32_2x16_split_x => {
b.prmt(srcs(0), 0.into(), [0, 1, 4, 4]).into()
}
nir_op_unpack_32_2x16_split_y => {
b.prmt(srcs(0), 0.into(), [2, 3, 4, 4]).into()
}
nir_op_unpack_64_2x32 | nir_op_unpack_64_4x16 => {
let src0 = srcs(0).to_ssa();
[b.copy(src0[0].into()), b.copy(src0[1].into())].into()
}
nir_op_unpack_64_2x32_split_x => {
let src0_x = srcs(0).as_ssa().unwrap()[0];
b.copy(src0_x.into()).into()

View File

@@ -171,7 +171,6 @@ optimize_nir(nir_shader *nir, const struct nak_compiler *nak, bool allow_copies)
OPT(nir, nir_opt_remove_phis);
OPT(nir, nir_opt_gcm, false);
OPT(nir, nir_opt_undef);
OPT(nir, nir_lower_pack);
} while (progress);
OPT(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);