amd: enable load/store_shared2_amd for GFX6

Totals from 1509 (2.43% of 62200) affected shaders: (Pitcairn) MaxWaves: 8078 -> 8057 (-0.26%); split: +0.09%, -0.35% Instrs: 977182 -> 951746 (-2.60%); split: -2.62%, +0.02% CodeSize: 4951468 -> 4758192 (-3.90%); split: -3.92%, +0.01% SGPRs: 76704 -> 76696 (-0.01%) VGPRs: 81092 -> 81068 (-0.03%); split: -0.34%, +0.31% Latency: 11663237 -> 11526070 (-1.18%); split: -1.19%, +0.01% InvThroughput: 6198904 -> 6114851 (-1.36%); split: -1.43%, +0.07% VClause: 26656 -> 26655 (-0.00%); split: -0.05%, +0.05% SClause: 22304 -> 22307 (+0.01%); split: -0.03%, +0.04% Copies: 107503 -> 109564 (+1.92%); split: -0.23%, +2.15% Branches: 22917 -> 22918 (+0.00%) PreSGPRs: 42246 -> 42242 (-0.01%); split: -0.01%, +0.00% PreVGPRs: 64561 -> 64761 (+0.31%); split: -0.01%, +0.32% VALU: 600285 -> 601139 (+0.14%); split: -0.26%, +0.40% SALU: 130622 -> 130851 (+0.18%); split: -0.16%, +0.33% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37682>
2025-10-02 19:28:45 +02:00
parent 9abbcbc00e
commit 5682e39e6b
3 changed files with 2 additions and 10 deletions
--- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp
+++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp
@@ -3216,8 +3216,6 @@ visit_access_shared2_amd(isel_context* ctx, nir_intrinsic_instr* instr)
   Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[is_store].ssa));
   Builder bld(ctx->program, ctx->block);

-   assert(bld.program->gfx_level >= GFX7);
-
   bool is64bit = (is_store ? instr->src[0].ssa->bit_size : instr->def.bit_size) == 64;
   uint8_t offset0 = nir_intrinsic_offset0(instr);
   uint8_t offset1 = nir_intrinsic_offset1(instr);
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -583,10 +583,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
         .callback = ac_nir_mem_vectorize_callback,
         .cb_data = &(struct ac_nir_config){gfx_level, !use_llvm},
         .robust_modes = 0,
-         /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
-          * the final offset is not.
-          */
-         .has_shared2_amd = gfx_level >= GFX7,
+         .has_shared2_amd = true,
      };
      NIR_PASS(_, stage->nir, nir_opt_load_store_vectorize, &late_vectorize_opts);
   }
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -960,10 +960,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
                        nir_var_shader_temp,
               .callback = ac_nir_mem_vectorize_callback,
               .cb_data = &(struct ac_nir_config){sel->screen->info.gfx_level, sel->info.base.use_aco_amd},
-               /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
-                * the final offset is not.
-                */
-               .has_shared2_amd = sel->screen->info.gfx_level >= GFX7,
+               .has_shared2_amd = true,
            });

   /* This must be done again if 8-bit or 16-bit buffer stores were vectorized. */