amd: enable load/store_shared2_amd for GFX6
Totals from 1509 (2.43% of 62200) affected shaders: (Pitcairn) MaxWaves: 8078 -> 8057 (-0.26%); split: +0.09%, -0.35% Instrs: 977182 -> 951746 (-2.60%); split: -2.62%, +0.02% CodeSize: 4951468 -> 4758192 (-3.90%); split: -3.92%, +0.01% SGPRs: 76704 -> 76696 (-0.01%) VGPRs: 81092 -> 81068 (-0.03%); split: -0.34%, +0.31% Latency: 11663237 -> 11526070 (-1.18%); split: -1.19%, +0.01% InvThroughput: 6198904 -> 6114851 (-1.36%); split: -1.43%, +0.07% VClause: 26656 -> 26655 (-0.00%); split: -0.05%, +0.05% SClause: 22304 -> 22307 (+0.01%); split: -0.03%, +0.04% Copies: 107503 -> 109564 (+1.92%); split: -0.23%, +2.15% Branches: 22917 -> 22918 (+0.00%) PreSGPRs: 42246 -> 42242 (-0.01%); split: -0.01%, +0.00% PreVGPRs: 64561 -> 64761 (+0.31%); split: -0.01%, +0.32% VALU: 600285 -> 601139 (+0.14%); split: -0.26%, +0.40% SALU: 130622 -> 130851 (+0.18%); split: -0.16%, +0.33% Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37682>
This commit is contained in:
committed by
Marge Bot
parent
9abbcbc00e
commit
5682e39e6b
@@ -3216,8 +3216,6 @@ visit_access_shared2_amd(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
Temp address = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[is_store].ssa));
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
assert(bld.program->gfx_level >= GFX7);
|
||||
|
||||
bool is64bit = (is_store ? instr->src[0].ssa->bit_size : instr->def.bit_size) == 64;
|
||||
uint8_t offset0 = nir_intrinsic_offset0(instr);
|
||||
uint8_t offset1 = nir_intrinsic_offset1(instr);
|
||||
|
||||
@@ -583,10 +583,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
||||
.callback = ac_nir_mem_vectorize_callback,
|
||||
.cb_data = &(struct ac_nir_config){gfx_level, !use_llvm},
|
||||
.robust_modes = 0,
|
||||
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
|
||||
* the final offset is not.
|
||||
*/
|
||||
.has_shared2_amd = gfx_level >= GFX7,
|
||||
.has_shared2_amd = true,
|
||||
};
|
||||
NIR_PASS(_, stage->nir, nir_opt_load_store_vectorize, &late_vectorize_opts);
|
||||
}
|
||||
|
||||
@@ -960,10 +960,7 @@ static void run_late_optimization_and_lowering_passes(struct si_nir_shader_ctx *
|
||||
nir_var_shader_temp,
|
||||
.callback = ac_nir_mem_vectorize_callback,
|
||||
.cb_data = &(struct ac_nir_config){sel->screen->info.gfx_level, sel->info.base.use_aco_amd},
|
||||
/* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
|
||||
* the final offset is not.
|
||||
*/
|
||||
.has_shared2_amd = sel->screen->info.gfx_level >= GFX7,
|
||||
.has_shared2_amd = true,
|
||||
});
|
||||
|
||||
/* This must be done again if 8-bit or 16-bit buffer stores were vectorized. */
|
||||
|
||||
Reference in New Issue
Block a user