From 5682e39e6b0e78fd67eafb9e532590983d244e52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 2 Oct 2025 19:28:45 +0200 Subject: [PATCH] amd: enable load/store_shared2_amd for GFX6 Totals from 1509 (2.43% of 62200) affected shaders: (Pitcairn) MaxWaves: 8078 -> 8057 (-0.26%); split: +0.09%, -0.35% Instrs: 977182 -> 951746 (-2.60%); split: -2.62%, +0.02% CodeSize: 4951468 -> 4758192 (-3.90%); split: -3.92%, +0.01% SGPRs: 76704 -> 76696 (-0.01%) VGPRs: 81092 -> 81068 (-0.03%); split: -0.34%, +0.31% Latency: 11663237 -> 11526070 (-1.18%); split: -1.19%, +0.01% InvThroughput: 6198904 -> 6114851 (-1.36%); split: -1.43%, +0.07% VClause: 26656 -> 26655 (-0.00%); split: -0.05%, +0.05% SClause: 22304 -> 22307 (+0.01%); split: -0.03%, +0.04% Copies: 107503 -> 109564 (+1.92%); split: -0.23%, +2.15% Branches: 22917 -> 22918 (+0.00%) PreSGPRs: 42246 -> 42242 (-0.01%); split: -0.01%, +0.00% PreVGPRs: 64561 -> 64761 (+0.31%); split: -0.01%, +0.32% VALU: 600285 -> 601139 (+0.14%); split: -0.26%, +0.40% SALU: 130622 -> 130851 (+0.18%); split: -0.16%, +0.33% Part-of: --- .../instruction_selection/aco_select_nir_intrinsics.cpp | 2 -- src/amd/vulkan/radv_pipeline.c | 5 +---- src/gallium/drivers/radeonsi/si_shader.c | 5 +---- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp index 8417fd74714..132dbe5cb1c 100644 --- a/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_nir_intrinsics.cpp @@ -3216,8 +3216,6 @@ visit_access_shared2_amd(isel_context* ctx, nir_intrinsic_instr* instr) Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[is_store].ssa)); Builder bld(ctx->program, ctx->block); - assert(bld.program->gfx_level >= GFX7); - bool is64bit = (is_store ? instr->src[0].ssa->bit_size : instr->def.bit_size) == 64; uint8_t offset0 = nir_intrinsic_offset0(instr); uint8_t offset1 = nir_intrinsic_offset1(instr); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 35e204ffea3..4d23189f692 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -583,10 +583,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat .callback = ac_nir_mem_vectorize_callback, .cb_data = &(struct ac_nir_config){gfx_level, !use_llvm}, .robust_modes = 0, - /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if - * the final offset is not. - */ - .has_shared2_amd = gfx_level >= GFX7, + .has_shared2_amd = true, }; NIR_PASS(_, stage->nir, nir_opt_load_store_vectorize, &late_vectorize_opts); } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 292d4428a41..60e79da6a36 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -960,10 +960,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx * nir_var_shader_temp, .callback = ac_nir_mem_vectorize_callback, .cb_data = &(struct ac_nir_config){sel->screen->info.gfx_level, sel->info.base.use_aco_amd}, - /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if - * the final offset is not. - */ - .has_shared2_amd = sel->screen->info.gfx_level >= GFX7, + .has_shared2_amd = true, }); /* This must be done again if 8-bit or 16-bit buffer stores were vectorized. */