amd: enable load/store_shared2_amd for GFX6

Totals from 1509 (2.43% of 62200) affected shaders: (Pitcairn)

MaxWaves: 8078 -> 8057 (-0.26%); split: +0.09%, -0.35%
Instrs: 977182 -> 951746 (-2.60%); split: -2.62%, +0.02%
CodeSize: 4951468 -> 4758192 (-3.90%); split: -3.92%, +0.01%
SGPRs: 76704 -> 76696 (-0.01%)
VGPRs: 81092 -> 81068 (-0.03%); split: -0.34%, +0.31%
Latency: 11663237 -> 11526070 (-1.18%); split: -1.19%, +0.01%
InvThroughput: 6198904 -> 6114851 (-1.36%); split: -1.43%, +0.07%
VClause: 26656 -> 26655 (-0.00%); split: -0.05%, +0.05%
SClause: 22304 -> 22307 (+0.01%); split: -0.03%, +0.04%
Copies: 107503 -> 109564 (+1.92%); split: -0.23%, +2.15%
Branches: 22917 -> 22918 (+0.00%)
PreSGPRs: 42246 -> 42242 (-0.01%); split: -0.01%, +0.00%
PreVGPRs: 64561 -> 64761 (+0.31%); split: -0.01%, +0.32%
VALU: 600285 -> 601139 (+0.14%); split: -0.26%, +0.40%
SALU: 130622 -> 130851 (+0.18%); split: -0.16%, +0.33%
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37682>
This commit is contained in:
Daniel Schürmann
2025-10-02 19:28:45 +02:00
committed by Marge Bot
parent 9abbcbc00e
commit 5682e39e6b
3 changed files with 2 additions and 10 deletions

View File

@@ -3216,8 +3216,6 @@ visit_access_shared2_amd(isel_context* ctx, nir_intrinsic_instr* instr)
Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[is_store].ssa));
Builder bld(ctx->program, ctx->block);
assert(bld.program->gfx_level >= GFX7);
bool is64bit = (is_store ? instr->src[0].ssa->bit_size : instr->def.bit_size) == 64;
uint8_t offset0 = nir_intrinsic_offset0(instr);
uint8_t offset1 = nir_intrinsic_offset1(instr);

View File

@@ -583,10 +583,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
.callback = ac_nir_mem_vectorize_callback,
.cb_data = &(struct ac_nir_config){gfx_level, !use_llvm},
.robust_modes = 0,
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
* the final offset is not.
*/
.has_shared2_amd = gfx_level >= GFX7,
.has_shared2_amd = true,
};
NIR_PASS(_, stage->nir, nir_opt_load_store_vectorize, &late_vectorize_opts);
}

View File

@@ -960,10 +960,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
nir_var_shader_temp,
.callback = ac_nir_mem_vectorize_callback,
.cb_data = &(struct ac_nir_config){sel->screen->info.gfx_level, sel->info.base.use_aco_amd},
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
* the final offset is not.
*/
.has_shared2_amd = sel->screen->info.gfx_level >= GFX7,
.has_shared2_amd = true,
});
/* This must be done again if 8-bit or 16-bit buffer stores were vectorized. */