nir: switch indirect IO load lowering to nir_lower_io_indirect_loads for GLSL

This reduces GLSL compile times with the gallium noop driver by 0.6%.

This might decrease register usage and do less code reordering because
nir_lower_io_vars_to_temporaries is no longer called for inputs, which
moved most input loads to the top.

radeonsi+ACO shader-db results are noise.
More uniforms are identified as inlinable.

TOTALS FROM ALL SHADERS (58138):
  VGPRs: 2152680 -> 2158032 (0.25 %)
  Code Size: 71008908 -> 71064812 (0.08 %) bytes
  Max Waves: 916943 -> 916924 (-0.00 %)
  Inline Uniforms: 6395 -> 6414 (0.30 %)

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36018>
This commit is contained in:
Marek Olšák
2025-06-30 02:59:52 -04:00
committed by Marge Bot
parent 3684b93723
commit 2ba2a61101
2 changed files with 21 additions and 7 deletions
+13
View File
@@ -1512,6 +1512,19 @@ gl_nir_lower_optimize_varyings(const struct gl_constants *consts,
for (unsigned i = 0; i < num_shaders; i++) {
nir_shader *nir = shaders[i];
/* Inter-shader code motion in nir_opt_varyings requires that each input
* load is loaded only once when possible, so move all input loads
* to the entry block, so that CSE can deduplicate them.
*
* We only do that for FS. Moving input loads to the beginning could
* increase register usage for other shaders too much.
*/
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
NIR_PASS(_, nir, nir_opt_move_to_top,
nir_move_to_entry_block_only |
nir_move_to_top_input_loads);
}
/* nir_opt_varyings requires scalar IO. Scalarize all varyings (not just
* the ones we optimize) because we want to re-vectorize everything to
* get better vectorization and other goodies from nir_opt_vectorize_io.
+8 -7
View File
@@ -1050,7 +1050,7 @@ type_size_vec4(const struct glsl_type *type, bool bindless)
void
nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
{
if (nir->info.stage == MESA_SHADER_COMPUTE)
if (gl_shader_stage_is_compute(nir->info.stage))
return;
bool lower_indirect_inputs =
@@ -1086,10 +1086,9 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
(nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
nir_sort_variables_by_location(nir, varying_var_mask);
if (lower_indirect_inputs || lower_indirect_outputs) {
if (lower_indirect_outputs) {
NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries,
nir_shader_get_entrypoint(nir), lower_indirect_outputs,
lower_indirect_inputs);
nir_shader_get_entrypoint(nir), true, false);
/* We need to lower all the copy_deref's introduced by lower_io_to-
* _temporaries before calling nir_lower_io.
@@ -1102,9 +1101,7 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
* The problem is that nir_lower_io_vars_to_temporaries doesn't handle TCS.
*/
if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS(_, nir, nir_lower_indirect_derefs,
(lower_indirect_inputs ? nir_var_shader_in : 0) |
(lower_indirect_outputs ? nir_var_shader_out : 0),
NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_out,
UINT32_MAX);
}
}
@@ -1122,6 +1119,10 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
NIR_PASS(_, nir, nir_opt_constant_folding);
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
/* This must be called after nir_io_add_const_offset_to_base. */
if (lower_indirect_inputs)
NIR_PASS(_, nir, nir_lower_io_indirect_loads, nir_var_shader_in);
/* Lower and remove dead derefs and variables to clean up the IR. */
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
NIR_PASS(_, nir, nir_opt_dce);