radv: vectorize lowered shader IO
fossil-db (navi31): Totals from 2329 (2.93% of 79377) affected shaders: MaxWaves: 72152 -> 72102 (-0.07%) Instrs: 1048791 -> 1041920 (-0.66%); split: -0.72%, +0.07% CodeSize: 5331832 -> 5285572 (-0.87%); split: -0.90%, +0.03% VGPRs: 113844 -> 113820 (-0.02%); split: -0.14%, +0.12% Latency: 4349524 -> 4346374 (-0.07%); split: -0.35%, +0.28% InvThroughput: 609449 -> 609235 (-0.04%); split: -0.27%, +0.24% VClause: 22613 -> 22451 (-0.72%); split: -1.03%, +0.31% SClause: 21197 -> 21177 (-0.09%); split: -0.45%, +0.35% Copies: 81900 -> 82446 (+0.67%); split: -1.51%, +2.18% PreSGPRs: 94697 -> 93596 (-1.16%); split: -1.23%, +0.07% PreVGPRs: 69962 -> 70080 (+0.17%); split: -0.01%, +0.18% VALU: 625247 -> 625390 (+0.02%); split: -0.23%, +0.25% SALU: 101692 -> 101555 (-0.13%); split: -0.24%, +0.11% VMEM: 46459 -> 44845 (-3.47%) fossil-db (navi21): Totals from 17522 (22.07% of 79377) affected shaders: MaxWaves: 425698 -> 425460 (-0.06%); split: +0.00%, -0.06% Instrs: 11444215 -> 11428321 (-0.14%); split: -0.14%, +0.00% CodeSize: 59227492 -> 59019376 (-0.35%); split: -0.35%, +0.00% VGPRs: 780920 -> 781208 (+0.04%); split: -0.00%, +0.04% Latency: 44965072 -> 44926529 (-0.09%); split: -0.12%, +0.03% InvThroughput: 9718148 -> 9728793 (+0.11%); split: -0.01%, +0.12% VClause: 225732 -> 225605 (-0.06%); split: -0.10%, +0.04% SClause: 217196 -> 217160 (-0.02%); split: -0.03%, +0.01% Copies: 1050351 -> 1065263 (+1.42%); split: -0.03%, +1.45% PreSGPRs: 747538 -> 747223 (-0.04%); split: -0.05%, +0.01% PreVGPRs: 626702 -> 626748 (+0.01%); split: -0.00%, +0.01% VALU: 6629403 -> 6643822 (+0.22%); split: -0.01%, +0.23% SALU: 1898492 -> 1898452 (-0.00%); split: -0.00%, +0.00% VMEM: 529942 -> 528361 (-0.30%) fossil-db (vega10): Totals from 1791 (2.84% of 62962) affected shaders: MaxWaves: 12270 -> 12253 (-0.14%); split: +0.01%, -0.15% Instrs: 602026 -> 597473 (-0.76%); split: -0.83%, +0.08% CodeSize: 3109872 -> 3071664 (-1.23%); split: -1.26%, +0.03% SGPRs: 137826 -> 137938 (+0.08%); split: -0.10%, +0.19% VGPRs: 70364 -> 70520 (+0.22%); split: -0.03%, +0.26% Latency: 4757850 -> 4781905 (+0.51%); split: -0.35%, +0.86% InvThroughput: 2296941 -> 2310685 (+0.60%); split: -0.14%, +0.74% VClause: 14161 -> 14050 (-0.78%); split: -1.23%, +0.44% SClause: 14058 -> 14077 (+0.14%); split: -0.57%, +0.70% Copies: 40954 -> 42191 (+3.02%); split: -1.69%, +4.71% PreSGPRs: 64314 -> 63214 (-1.71%); split: -1.81%, +0.10% PreVGPRs: 53558 -> 53894 (+0.63%); split: -0.01%, +0.64% VALU: 449920 -> 450830 (+0.20%); split: -0.19%, +0.39% SALU: 32973 -> 32839 (-0.41%); split: -0.76%, +0.35% VMEM: 28796 -> 25151 (-12.66%) fossil-db (polaris10): Totals from 1769 (2.86% of 61794) affected shaders: MaxWaves: 12024 -> 12021 (-0.02%) Instrs: 474761 -> 470760 (-0.84%); split: -0.94%, +0.10% CodeSize: 2447964 -> 2420712 (-1.11%); split: -1.15%, +0.04% SGPRs: 129664 -> 129728 (+0.05%); split: -0.14%, +0.19% VGPRs: 65216 -> 65560 (+0.53%); split: -0.05%, +0.58% Latency: 4304734 -> 4318319 (+0.32%); split: -0.41%, +0.72% InvThroughput: 2114950 -> 2122580 (+0.36%); split: -0.18%, +0.54% VClause: 10933 -> 10808 (-1.14%); split: -1.42%, +0.27% SClause: 11430 -> 11446 (+0.14%); split: -0.70%, +0.84% Copies: 32290 -> 31891 (-1.24%); split: -2.80%, +1.56% PreSGPRs: 58184 -> 57096 (-1.87%); split: -1.98%, +0.11% PreVGPRs: 48757 -> 48874 (+0.24%); split: -0.02%, +0.26% VALU: 359097 -> 358582 (-0.14%); split: -0.25%, +0.11% SALU: 26279 -> 25934 (-1.31%); split: -1.75%, +0.43% VMEM: 18825 -> 17247 (-8.38%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29242>
This commit is contained in:
@@ -462,10 +462,12 @@ ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigne
|
||||
low->intrinsic == nir_intrinsic_load_smem_amd ||
|
||||
low->intrinsic == nir_intrinsic_load_push_constant;
|
||||
bool is_store = !nir_intrinsic_infos[low->intrinsic].has_dest;
|
||||
bool is_scratch = low->intrinsic == nir_intrinsic_load_stack ||
|
||||
low->intrinsic == nir_intrinsic_store_stack ||
|
||||
low->intrinsic == nir_intrinsic_load_scratch ||
|
||||
low->intrinsic == nir_intrinsic_store_scratch;
|
||||
bool swizzled = low->intrinsic == nir_intrinsic_load_stack ||
|
||||
low->intrinsic == nir_intrinsic_store_stack ||
|
||||
low->intrinsic == nir_intrinsic_load_scratch ||
|
||||
low->intrinsic == nir_intrinsic_store_scratch ||
|
||||
(nir_intrinsic_has_access(low) &&
|
||||
nir_intrinsic_access(low) & ACCESS_IS_SWIZZLED_AMD);
|
||||
bool is_shared = low->intrinsic == nir_intrinsic_load_shared ||
|
||||
low->intrinsic == nir_intrinsic_store_shared ||
|
||||
low->intrinsic == nir_intrinsic_load_deref ||
|
||||
@@ -500,6 +502,8 @@ ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigne
|
||||
case nir_intrinsic_store_deref:
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_store_shared:
|
||||
case nir_intrinsic_load_buffer_amd:
|
||||
case nir_intrinsic_store_buffer_amd:
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
@@ -521,7 +525,7 @@ ac_nir_mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigne
|
||||
return false;
|
||||
|
||||
/* GFX6-8 only support 32-bit scratch loads/stores. */
|
||||
if (config->gfx_level <= GFX8 && is_scratch && aligned_new_size > 32)
|
||||
if (config->gfx_level <= GFX8 && swizzled && aligned_new_size > 32)
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -523,7 +523,8 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
||||
NIR_PASS(_, stage->nir, nir_opt_cse);
|
||||
|
||||
nir_load_store_vectorize_options late_vectorize_opts = {
|
||||
.modes = nir_var_mem_global, /* for descriptor loads */
|
||||
.modes =
|
||||
nir_var_mem_global | nir_var_mem_shared | nir_var_shader_out | nir_var_mem_task_payload | nir_var_shader_in,
|
||||
.callback = ac_nir_mem_vectorize_callback,
|
||||
.cb_data = &(struct ac_nir_config){gfx_level, !use_llvm},
|
||||
.robust_modes = 0,
|
||||
|
||||
Reference in New Issue
Block a user