nak: Stop relying on nir_lower_pack
We represent vectors as packed anyway so all these ops are just data motion that we already know how to do. Calling into NIR for these doesn't really help. It also avoids potential optimization loops in NIR where pack op lowering conflicts with itself. It's simpler just to handle it all in the back-end and trust our prmt optimization and copy propagation to clean it all up. Reviewed-by: Mel Henning <drawoc@darkrefraction.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34849>
This commit is contained in:
committed by
Marge Bot
parent
efd1cddbe9
commit
e3e7dad82d
@@ -120,7 +120,6 @@ fn nir_options(dev: &nv_device_info) -> nir_shader_compiler_options {
|
||||
op.lower_fsqrt = dev.sm < 52;
|
||||
op.lower_bitfield_extract = dev.sm >= 70;
|
||||
op.lower_bitfield_insert = true;
|
||||
op.lower_pack_64_4x16 = true;
|
||||
op.lower_pack_half_2x16 = true;
|
||||
op.lower_pack_unorm_2x16 = true;
|
||||
op.lower_pack_snorm_2x16 = true;
|
||||
|
||||
@@ -514,9 +514,13 @@ impl<'a> ShaderFromNir<'a> {
|
||||
// scattered across multiple dwords
|
||||
match alu.op {
|
||||
nir_op_mov
|
||||
| nir_op_pack_32_4x8
|
||||
| nir_op_pack_32_4x8_split
|
||||
| nir_op_pack_32_2x16
|
||||
| nir_op_pack_32_2x16_split
|
||||
| nir_op_pack_64_2x32
|
||||
| nir_op_pack_64_2x32_split
|
||||
| nir_op_pack_64_4x16
|
||||
| nir_op_vec2
|
||||
| nir_op_vec3
|
||||
| nir_op_vec4
|
||||
@@ -531,9 +535,9 @@ impl<'a> ShaderFromNir<'a> {
|
||||
// value in the vec. This implicitly makes 64-bit sources look
|
||||
// like two 32-bit values
|
||||
let mut srcs = Vec::new();
|
||||
if alu.op == nir_op_mov {
|
||||
if alu.info().num_inputs == 1 {
|
||||
let src = alu.get_src(0);
|
||||
for c in 0..alu.def.num_components {
|
||||
for c in 0..alu.src_components(0) {
|
||||
let s = src.swizzle[usize::from(c)];
|
||||
let (src, byte) =
|
||||
self.get_ssa_comp(src.src.as_def(), s);
|
||||
@@ -1694,12 +1698,19 @@ impl<'a> ShaderFromNir<'a> {
|
||||
b.sel(ovf_lo.into(), sum_lo.into(), 0.into()).into()
|
||||
}
|
||||
}
|
||||
nir_op_unpack_32_2x16 | nir_op_unpack_32_4x8 => {
|
||||
b.copy(srcs(0)).into()
|
||||
}
|
||||
nir_op_unpack_32_2x16_split_x => {
|
||||
b.prmt(srcs(0), 0.into(), [0, 1, 4, 4]).into()
|
||||
}
|
||||
nir_op_unpack_32_2x16_split_y => {
|
||||
b.prmt(srcs(0), 0.into(), [2, 3, 4, 4]).into()
|
||||
}
|
||||
nir_op_unpack_64_2x32 | nir_op_unpack_64_4x16 => {
|
||||
let src0 = srcs(0).to_ssa();
|
||||
[b.copy(src0[0].into()), b.copy(src0[1].into())].into()
|
||||
}
|
||||
nir_op_unpack_64_2x32_split_x => {
|
||||
let src0_x = srcs(0).as_ssa().unwrap()[0];
|
||||
b.copy(src0_x.into()).into()
|
||||
|
||||
@@ -171,7 +171,6 @@ optimize_nir(nir_shader *nir, const struct nak_compiler *nak, bool allow_copies)
|
||||
OPT(nir, nir_opt_remove_phis);
|
||||
OPT(nir, nir_opt_gcm, false);
|
||||
OPT(nir, nir_opt_undef);
|
||||
OPT(nir, nir_lower_pack);
|
||||
} while (progress);
|
||||
|
||||
OPT(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
|
||||
|
||||
Reference in New Issue
Block a user