nir: switch indirect IO load lowering to nir_lower_io_indirect_loads for GLSL
This reduces GLSL compile times with the gallium noop driver by 0.6%. This might decrease register usage and do less code reordering because nir_lower_io_vars_to_temporaries is no longer called for inputs, which moved most input loads to the top. radeonsi+ACO shader-db results are noise. More uniforms are identified as inlinable. TOTALS FROM ALL SHADERS (58138): VGPRs: 2152680 -> 2158032 (0.25 %) Code Size: 71008908 -> 71064812 (0.08 %) bytes Max Waves: 916943 -> 916924 (-0.00 %) Inline Uniforms: 6395 -> 6414 (0.30 %) Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36018>
This commit is contained in:
@@ -1512,6 +1512,19 @@ gl_nir_lower_optimize_varyings(const struct gl_constants *consts,
|
||||
for (unsigned i = 0; i < num_shaders; i++) {
|
||||
nir_shader *nir = shaders[i];
|
||||
|
||||
/* Inter-shader code motion in nir_opt_varyings requires that each input
|
||||
* load is loaded only once when possible, so move all input loads
|
||||
* to the entry block, so that CSE can deduplicate them.
|
||||
*
|
||||
* We only do that for FS. Moving input loads to the beginning could
|
||||
* increase register usage for other shaders too much.
|
||||
*/
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
NIR_PASS(_, nir, nir_opt_move_to_top,
|
||||
nir_move_to_entry_block_only |
|
||||
nir_move_to_top_input_loads);
|
||||
}
|
||||
|
||||
/* nir_opt_varyings requires scalar IO. Scalarize all varyings (not just
|
||||
* the ones we optimize) because we want to re-vectorize everything to
|
||||
* get better vectorization and other goodies from nir_opt_vectorize_io.
|
||||
|
||||
@@ -1050,7 +1050,7 @@ type_size_vec4(const struct glsl_type *type, bool bindless)
|
||||
void
|
||||
nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
|
||||
{
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE)
|
||||
if (gl_shader_stage_is_compute(nir->info.stage))
|
||||
return;
|
||||
|
||||
bool lower_indirect_inputs =
|
||||
@@ -1086,10 +1086,9 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
|
||||
(nir->info.stage != MESA_SHADER_FRAGMENT ? nir_var_shader_out : 0);
|
||||
nir_sort_variables_by_location(nir, varying_var_mask);
|
||||
|
||||
if (lower_indirect_inputs || lower_indirect_outputs) {
|
||||
if (lower_indirect_outputs) {
|
||||
NIR_PASS(_, nir, nir_lower_io_vars_to_temporaries,
|
||||
nir_shader_get_entrypoint(nir), lower_indirect_outputs,
|
||||
lower_indirect_inputs);
|
||||
nir_shader_get_entrypoint(nir), true, false);
|
||||
|
||||
/* We need to lower all the copy_deref's introduced by lower_io_to-
|
||||
* _temporaries before calling nir_lower_io.
|
||||
@@ -1102,9 +1101,7 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
|
||||
* The problem is that nir_lower_io_vars_to_temporaries doesn't handle TCS.
|
||||
*/
|
||||
if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
NIR_PASS(_, nir, nir_lower_indirect_derefs,
|
||||
(lower_indirect_inputs ? nir_var_shader_in : 0) |
|
||||
(lower_indirect_outputs ? nir_var_shader_out : 0),
|
||||
NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_shader_out,
|
||||
UINT32_MAX);
|
||||
}
|
||||
}
|
||||
@@ -1122,6 +1119,10 @@ nir_lower_io_passes(nir_shader *nir, bool renumber_vs_inputs)
|
||||
NIR_PASS(_, nir, nir_opt_constant_folding);
|
||||
NIR_PASS(_, nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
|
||||
|
||||
/* This must be called after nir_io_add_const_offset_to_base. */
|
||||
if (lower_indirect_inputs)
|
||||
NIR_PASS(_, nir, nir_lower_io_indirect_loads, nir_var_shader_in);
|
||||
|
||||
/* Lower and remove dead derefs and variables to clean up the IR. */
|
||||
NIR_PASS(_, nir, nir_lower_vars_to_ssa);
|
||||
NIR_PASS(_, nir, nir_opt_dce);
|
||||
|
||||
Reference in New Issue
Block a user