broadcom/compiler: lower packing after vectorization

The vectorization pass can inject 32_2x16 (un)packing opcodes upon successful vectorization of 16-bit operations into 32-bit counterparts, so make sure we lower these to something our backend can handle. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14648>
2022-01-19 09:48:49 +01:00
parent 4b24373137
commit 2a420bdf92
3 changed files with 14 additions and 1 deletions
@@ -1901,7 +1901,14 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
                        .callback = mem_vectorize_callback,
                        .robust_modes = 0,
                };
-                NIR_PASS(progress, s, nir_opt_load_store_vectorize, &vectorize_opts);
+                bool vectorize_progress = false;
+                NIR_PASS(vectorize_progress, s, nir_opt_load_store_vectorize,
+                         &vectorize_opts);
+                if (vectorize_progress) {
+                        NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);
+                        NIR_PASS(progress, s, nir_lower_pack);
+                        progress = true;
+                }

                if (lower_flrp != 0) {
                        bool lower_flrp_progress = false;
@@ -215,6 +215,9 @@ const nir_shader_compiler_options v3dv_nir_options = {
   .lower_unpack_snorm_4x8 = true,
   .lower_pack_half_2x16 = true,
   .lower_unpack_half_2x16 = true,
+   .lower_pack_32_2x16 = true,
+   .lower_pack_32_2x16_split = true,
+   .lower_unpack_32_2x16_split = true,
   /* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and
    * get the tests to pass since it might produce slightly better code.
    */
@@ -705,6 +705,9 @@ static const nir_shader_compiler_options v3d_nir_options = {
        .lower_unpack_snorm_4x8 = true,
        .lower_pack_half_2x16 = true,
        .lower_unpack_half_2x16 = true,
+        .lower_pack_32_2x16 = true,
+        .lower_pack_32_2x16_split = true,
+        .lower_unpack_32_2x16_split = true,
        .lower_fdiv = true,
        .lower_find_lsb = true,
        .lower_ffma16 = true,