radv: vectorize lowered shader IO

fossil-db (navi31):
Totals from 2329 (2.93% of 79377) affected shaders:
MaxWaves: 72152 -> 72102 (-0.07%)
Instrs: 1048791 -> 1041920 (-0.66%); split: -0.72%, +0.07%
CodeSize: 5331832 -> 5285572 (-0.87%); split: -0.90%, +0.03%
VGPRs: 113844 -> 113820 (-0.02%); split: -0.14%, +0.12%
Latency: 4349524 -> 4346374 (-0.07%); split: -0.35%, +0.28%
InvThroughput: 609449 -> 609235 (-0.04%); split: -0.27%, +0.24%
VClause: 22613 -> 22451 (-0.72%); split: -1.03%, +0.31%
SClause: 21197 -> 21177 (-0.09%); split: -0.45%, +0.35%
Copies: 81900 -> 82446 (+0.67%); split: -1.51%, +2.18%
PreSGPRs: 94697 -> 93596 (-1.16%); split: -1.23%, +0.07%
PreVGPRs: 69962 -> 70080 (+0.17%); split: -0.01%, +0.18%
VALU: 625247 -> 625390 (+0.02%); split: -0.23%, +0.25%
SALU: 101692 -> 101555 (-0.13%); split: -0.24%, +0.11%
VMEM: 46459 -> 44845 (-3.47%)

fossil-db (navi21):
Totals from 17522 (22.07% of 79377) affected shaders:
MaxWaves: 425698 -> 425460 (-0.06%); split: +0.00%, -0.06%
Instrs: 11444215 -> 11428321 (-0.14%); split: -0.14%, +0.00%
CodeSize: 59227492 -> 59019376 (-0.35%); split: -0.35%, +0.00%
VGPRs: 780920 -> 781208 (+0.04%); split: -0.00%, +0.04%
Latency: 44965072 -> 44926529 (-0.09%); split: -0.12%, +0.03%
InvThroughput: 9718148 -> 9728793 (+0.11%); split: -0.01%, +0.12%
VClause: 225732 -> 225605 (-0.06%); split: -0.10%, +0.04%
SClause: 217196 -> 217160 (-0.02%); split: -0.03%, +0.01%
Copies: 1050351 -> 1065263 (+1.42%); split: -0.03%, +1.45%
PreSGPRs: 747538 -> 747223 (-0.04%); split: -0.05%, +0.01%
PreVGPRs: 626702 -> 626748 (+0.01%); split: -0.00%, +0.01%
VALU: 6629403 -> 6643822 (+0.22%); split: -0.01%, +0.23%
SALU: 1898492 -> 1898452 (-0.00%); split: -0.00%, +0.00%
VMEM: 529942 -> 528361 (-0.30%)

fossil-db (vega10):
Totals from 1791 (2.84% of 62962) affected shaders:
MaxWaves: 12270 -> 12253 (-0.14%); split: +0.01%, -0.15%
Instrs: 602026 -> 597473 (-0.76%); split: -0.83%, +0.08%
CodeSize: 3109872 -> 3071664 (-1.23%); split: -1.26%, +0.03%
SGPRs: 137826 -> 137938 (+0.08%); split: -0.10%, +0.19%
VGPRs: 70364 -> 70520 (+0.22%); split: -0.03%, +0.26%
Latency: 4757850 -> 4781905 (+0.51%); split: -0.35%, +0.86%
InvThroughput: 2296941 -> 2310685 (+0.60%); split: -0.14%, +0.74%
VClause: 14161 -> 14050 (-0.78%); split: -1.23%, +0.44%
SClause: 14058 -> 14077 (+0.14%); split: -0.57%, +0.70%
Copies: 40954 -> 42191 (+3.02%); split: -1.69%, +4.71%
PreSGPRs: 64314 -> 63214 (-1.71%); split: -1.81%, +0.10%
PreVGPRs: 53558 -> 53894 (+0.63%); split: -0.01%, +0.64%
VALU: 449920 -> 450830 (+0.20%); split: -0.19%, +0.39%
SALU: 32973 -> 32839 (-0.41%); split: -0.76%, +0.35%
VMEM: 28796 -> 25151 (-12.66%)

fossil-db (polaris10):
Totals from 1769 (2.86% of 61794) affected shaders:
MaxWaves: 12024 -> 12021 (-0.02%)
Instrs: 474761 -> 470760 (-0.84%); split: -0.94%, +0.10%
CodeSize: 2447964 -> 2420712 (-1.11%); split: -1.15%, +0.04%
SGPRs: 129664 -> 129728 (+0.05%); split: -0.14%, +0.19%
VGPRs: 65216 -> 65560 (+0.53%); split: -0.05%, +0.58%
Latency: 4304734 -> 4318319 (+0.32%); split: -0.41%, +0.72%
InvThroughput: 2114950 -> 2122580 (+0.36%); split: -0.18%, +0.54%
VClause: 10933 -> 10808 (-1.14%); split: -1.42%, +0.27%
SClause: 11430 -> 11446 (+0.14%); split: -0.70%, +0.84%
Copies: 32290 -> 31891 (-1.24%); split: -2.80%, +1.56%
PreSGPRs: 58184 -> 57096 (-1.87%); split: -1.98%, +0.11%
PreVGPRs: 48757 -> 48874 (+0.24%); split: -0.02%, +0.26%
VALU: 359097 -> 358582 (-0.14%); split: -0.25%, +0.11%
SALU: 26279 -> 25934 (-1.31%); split: -1.75%, +0.43%
VMEM: 18825 -> 17247 (-8.38%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29242>
This commit is contained in:
Rhys Perry
2025-01-28 16:39:40 +00:00
committed by Marge Bot
parent 953faac23e
commit c3d27906d8
2 changed files with 11 additions and 6 deletions

View File

@@ -462,10 +462,12 @@ ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigne
low->intrinsic == nir_intrinsic_load_smem_amd ||
low->intrinsic == nir_intrinsic_load_push_constant;
bool is_store = !nir_intrinsic_infos[low->intrinsic].has_dest;
bool is_scratch = low->intrinsic == nir_intrinsic_load_stack ||
low->intrinsic == nir_intrinsic_store_stack ||
low->intrinsic == nir_intrinsic_load_scratch ||
low->intrinsic == nir_intrinsic_store_scratch;
bool swizzled = low->intrinsic == nir_intrinsic_load_stack ||
low->intrinsic == nir_intrinsic_store_stack ||
low->intrinsic == nir_intrinsic_load_scratch ||
low->intrinsic == nir_intrinsic_store_scratch ||
(nir_intrinsic_has_access(low) &&
nir_intrinsic_access(low) & ACCESS_IS_SWIZZLED_AMD);
bool is_shared = low->intrinsic == nir_intrinsic_load_shared ||
low->intrinsic == nir_intrinsic_store_shared ||
low->intrinsic == nir_intrinsic_load_deref ||
@@ -500,6 +502,8 @@ ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigne
case nir_intrinsic_store_deref:
case nir_intrinsic_load_shared:
case nir_intrinsic_store_shared:
case nir_intrinsic_load_buffer_amd:
case nir_intrinsic_store_buffer_amd:
break;
default:
return false;
@@ -521,7 +525,7 @@ ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigne
return false;
/* GFX6-8 only support 32-bit scratch loads/stores. */
if (config->gfx_level <= GFX8 && is_scratch && aligned_new_size > 32)
if (config->gfx_level <= GFX8 && swizzled && aligned_new_size > 32)
return false;
}

View File

@@ -523,7 +523,8 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, nir_opt_cse);
nir_load_store_vectorize_options late_vectorize_opts = {
.modes = nir_var_mem_global, /* for descriptor loads */
.modes =
nir_var_mem_global | nir_var_mem_shared | nir_var_shader_out | nir_var_mem_task_payload | nir_var_shader_in,
.callback = ac_nir_mem_vectorize_callback,
.cb_data = &(struct ac_nir_config){gfx_level, !use_llvm},
.robust_modes = 0,