radeonsi: track NIR progress properly for optimizations in si_get_nir_shader

Just a small code size decrease in 12 shaders.

TOTALS FROM AFFECTED SHADERS (12/58918)
  SGPRS: 600.00 -> 600.00 (0.00 %)
  VGPRS: 528.00 -> 520.00 (-1.52 %)
  Spilled SGPRs: 0.00 -> 0.00 (0.00 %)
  Spilled VGPRs: 0.00 -> 0.00 (0.00 %)
  Private memory VGPRs: 0.00 -> 0.00 (0.00 %)
  Scratch size: 0.00 -> 0.00 (0.00 %) dwords per thread
  Code Size: 39772.00 -> 39688.00 (-0.21 %) bytes
  Max Waves: 180.00 -> 180.00 (0.00 %)
  Outputs: 0.00 -> 0.00 (0.00 %)
  Patch Outputs: 0.00 -> 0.00 (0.00 %)

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26307>
This commit is contained in:
Marek Olšák
2023-11-25 10:49:31 -05:00
committed by Marge Bot
parent e1e35112c8
commit 4f2b794e98
+42 -37
View File
@@ -2050,8 +2050,9 @@ static bool lower_ps_load_color_intrinsic(nir_builder *b, nir_instr *instr, void
return true;
}
static void si_nir_lower_ps_color_input(nir_shader *nir, struct si_shader *shader)
static bool si_nir_lower_ps_color_input(nir_shader *nir, struct si_shader *shader)
{
bool progress = false;
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
nir_builder builder = nir_builder_at(nir_before_impl(impl));
@@ -2123,12 +2124,14 @@ static void si_nir_lower_ps_color_input(nir_shader *nir, struct si_shader *shade
nir_def *is_front_face = nir_load_front_face(b, 1);
colors[i] = nir_bcsel(b, is_front_face, colors[i], back_color);
}
progress = true;
}
/* lower nir_load_color0/1 to use the color value. */
nir_shader_instructions_pass(nir, lower_ps_load_color_intrinsic,
nir_metadata_block_index | nir_metadata_dominance,
colors);
return nir_shader_instructions_pass(nir, lower_ps_load_color_intrinsic,
nir_metadata_block_index | nir_metadata_dominance,
colors) || progress;
}
static void si_nir_emit_polygon_stipple(nir_shader *nir, struct si_shader_args *args)
@@ -2181,6 +2184,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
}
bool progress = false;
bool late_opts = false;
const char *original_name = NULL;
if (unlikely(should_print_nir(nir))) {
@@ -2197,12 +2201,11 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
if (sel->stage <= MESA_SHADER_GEOMETRY)
NIR_PASS(progress, nir, si_nir_kill_outputs, key);
NIR_PASS(
_, nir, ac_nir_lower_tex,
&(ac_nir_lower_tex_options){
.gfx_level = sel->screen->info.gfx_level,
.lower_array_layer_round_even = !sel->screen->info.conformant_trunc_coord,
});
NIR_PASS(progress, nir, ac_nir_lower_tex,
&(ac_nir_lower_tex_options){
.gfx_level = sel->screen->info.gfx_level,
.lower_array_layer_round_even = !sel->screen->info.conformant_trunc_coord,
});
if (nir->info.uses_resource_info_query)
NIR_PASS(progress, nir, ac_nir_lower_resinfo, sel->screen->info.gfx_level);
@@ -2253,10 +2256,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
* TODO: The driver uses a linear search to find a shader variant. This
* can be really slow if we get too many variants due to uniform inlining.
*/
NIR_PASS_V(nir, nir_inline_uniforms,
nir->info.num_inlinable_uniforms,
inlined_uniform_values,
nir->info.inlinable_uniform_dw_offsets);
NIR_PASS_V(nir, nir_inline_uniforms, nir->info.num_inlinable_uniforms,
inlined_uniform_values, nir->info.inlinable_uniform_dw_offsets);
progress = true;
}
@@ -2292,8 +2293,11 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
if (is_last_vgt_stage || is_legacy_gs)
NIR_PASS(progress, nir, si_nir_clamp_vertex_color);
if (progress)
if (progress) {
si_nir_opts(sel->screen, nir, true);
late_opts = true;
progress = false;
}
/* Lower large variables that are always constant with load_constant intrinsics, which
* get turned into PC-relative loads from a data section next to the shader.
@@ -2304,19 +2308,18 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
* The pass crashes if there are dead temps of lowered IO interface types, so remove
* them first.
*/
bool progress2 = false;
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
NIR_PASS(progress2, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
NIR_PASS(progress, nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
/* Loop unrolling caused by uniform inlining can help eliminate indirect indexing, so
* this should be done after that.
*/
progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level);
progress |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level);
if (sel->stage == MESA_SHADER_VERTEX)
progress2 |= si_nir_lower_vs_inputs(nir, shader, args);
NIR_PASS(progress, nir, si_nir_lower_vs_inputs, shader, args);
bool opt_offsets = si_lower_io_to_mem(shader, nir, tcs_vgpr_only_inputs);
progress |= si_lower_io_to_mem(shader, nir, tcs_vgpr_only_inputs);
if (is_last_vgt_stage) {
/* Assign param export indices. */
@@ -2328,7 +2331,6 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
if (key->ge.as_ngg) {
/* Lower last VGT NGG shader stage. */
si_lower_ngg(shader, nir);
opt_offsets = true;
} else if (sel->stage == MESA_SHADER_VERTEX || sel->stage == MESA_SHADER_TESS_EVAL) {
/* Lower last VGT none-NGG VS/TES shader stage. */
unsigned clip_cull_mask =
@@ -2346,12 +2348,14 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
key->ge.opt.kill_layer,
sel->screen->options.vrs2x2);
}
progress = true;
} else if (is_legacy_gs) {
NIR_PASS_V(nir, ac_nir_lower_legacy_gs, false, sel->screen->use_ngg, output_info);
progress = true;
} else if (sel->stage == MESA_SHADER_FRAGMENT && shader->is_monolithic) {
/* two-side color selection and interpolation */
if (sel->info.colors_read)
NIR_PASS_V(nir, si_nir_lower_ps_color_input, shader);
NIR_PASS(progress, nir, si_nir_lower_ps_color_input, shader);
ac_nir_lower_ps_options options = {
.gfx_level = sel->screen->info.gfx_level,
@@ -2383,33 +2387,34 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
if (key->ps.part.prolog.poly_stipple)
NIR_PASS_V(nir, si_nir_emit_polygon_stipple, args);
progress2 = true;
progress = true;
}
NIR_PASS(progress2, nir, nir_opt_idiv_const, 8);
NIR_PASS(progress2, nir, nir_lower_idiv,
NIR_PASS(progress, nir, nir_opt_idiv_const, 8);
NIR_PASS(progress, nir, nir_lower_idiv,
&(nir_lower_idiv_options){
.allow_fp16 = sel->screen->info.gfx_level >= GFX9,
});
NIR_PASS(progress2, nir, ac_nir_lower_intrinsics_to_args, sel->screen->info.gfx_level,
NIR_PASS(progress, nir, ac_nir_lower_intrinsics_to_args, sel->screen->info.gfx_level,
si_select_hw_stage(nir->info.stage, key, sel->screen->info.gfx_level),
&args->ac);
NIR_PASS(progress2, nir, si_nir_lower_abi, shader, args);
NIR_PASS(progress, nir, si_nir_lower_abi, shader, args);
if (progress2 || opt_offsets)
if (progress) {
si_nir_opts(sel->screen, nir, false);
if (opt_offsets) {
static const nir_opt_offsets_options offset_options = {
.uniform_max = 0,
.buffer_max = ~0,
.shared_max = ~0,
};
NIR_PASS_V(nir, nir_opt_offsets, &offset_options);
progress = false;
late_opts = true;
}
if (progress || progress2 || opt_offsets)
static const nir_opt_offsets_options offset_options = {
.uniform_max = 0,
.buffer_max = ~0,
.shared_max = ~0,
};
NIR_PASS_V(nir, nir_opt_offsets, &offset_options);
if (late_opts)
si_nir_late_opts(nir);
/* aco only accept scalar const, must be done after si_nir_late_opts()