From 2ba2a61101b35d7549edb7324e509943e4be2edd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 30 Jun 2025 02:59:52 -0400 Subject: [PATCH] nir: switch indirect IO load lowering to nir_lower_io_indirect_loads for GLSL This reduces GLSL compile times with the gallium noop driver by 0.6%. This might decrease register usage and do less code reordering because nir_lower_io_vars_to_temporaries is no longer called for inputs, which moved most input loads to the top. radeonsi+ACO shader-db results are noise. More uniforms are identified as inlinable. TOTALS FROM ALL SHADERS (58138): VGPRs: 2152680 -> 2158032 (0.25 %) Code Size: 71008908 -> 71064812 (0.08 %) bytes Max Waves: 916943 -> 916924 (-0.00 %) Inline Uniforms: 6395 -> 6414 (0.30 %) Reviewed-by: Alyssa Rosenzweig Part-of: --- src/compiler/glsl/gl_nir_linker.c | 13 +++++++++++++ src/compiler/nir/nir_lower_io.c | 15 ++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/compiler/glsl/gl_nir_linker.c b/src/compiler/glsl/gl_nir_linker.c index 9443d75453b..fd51bf5a43f 100644 --- a/src/compiler/glsl/gl_nir_linker.c +++ b/src/compiler/glsl/gl_nir_linker.c @@ -1512,6 +1512,19 @@ gl_nir_lower_optimize_varyings(const struct gl_constants *consts, for (unsigned i = 0; i < num_shaders; i++) { nir_shader *nir = shaders[i]; + /* Inter-shader code motion in nir_opt_varyings requires that each input + * load is loaded only once when possible, so move all input loads + * to the entry block, so that CSE can deduplicate them. + * + * We only do that for FS. Moving input loads to the beginning could + * increase register usage for other shaders too much. + */ + if (nir->info.stage == MESA_SHADER_FRAGMENT) { + NIR_PASS(_, nir, nir_opt_move_to_top, + nir_move_to_entry_block_only | + nir_move_to_top_input_loads); + } + /* nir_opt_varyings requires scalar IO. Scalarize all varyings (not just * the ones we optimize) because we want to re-vectorize everything to * get better vectorization and other goodies from nir_opt_vectorize_io. diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 10e8684a4ef..8dbf9bfd0b9 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -1050,7 +1050,7 @@ type_size_vec4(const struct glsl_type *type, bool bindless) void nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs) { - if (nir->info.stage == MESA_SHADER_COMPUTE) + if (gl_shader_stage_is_compute(nir->info.stage)) return; bool lower_indirect_inputs = @@ -1086,10 +1086,9 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs) (nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0); nir_sort_variables_by_location(nir, varying_var_mask); - if (lower_indirect_inputs || lower_indirect_outputs) { + if (lower_indirect_outputs) { NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries, - nir_shader_get_entrypoint(nir), lower_indirect_outputs, - lower_indirect_inputs); + nir_shader_get_entrypoint(nir), true, false); /* We need to lower all the copy_deref's introduced by lower_io_to- * _temporaries before calling nir_lower_io. @@ -1102,9 +1101,7 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs) * The problem is that nir_lower_io_vars_to_temporaries doesn't handle TCS. */ if (nir->info.stage == MESA_SHADER_TESS_CTRL) { - NIR_PASS(_, nir, nir_lower_indirect_derefs, - (lower_indirect_inputs ? nir_var_shader_in : 0) | - (lower_indirect_outputs ? nir_var_shader_out : 0), + NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_out, UINT32_MAX); } } @@ -1122,6 +1119,10 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs) NIR_PASS(_, nir, nir_opt_constant_folding); NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out); + /* This must be called after nir_io_add_const_offset_to_base. */ + if (lower_indirect_inputs) + NIR_PASS(_, nir, nir_lower_io_indirect_loads, nir_var_shader_in); + /* Lower and remove dead derefs and variables to clean up the IR. */ NIR_PASS(_, nir, nir_lower_vars_to_ssa); NIR_PASS(_, nir, nir_opt_dce);