radeonsi: remove streamout code from shaders if no streamout buffers are bound

This is an optimization using asynchronous shader compilation.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
This commit is contained in:
Marek Olšák
2022-06-06 05:27:14 -04:00
committed by Marge Bot
parent dbbbe73d05
commit dfa8dcf80e
6 changed files with 24 additions and 9 deletions
@@ -615,7 +615,7 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader)
/* The edgeflag is always stored in the last element that's also
* used for padding to reduce LDS bank conflicts. */
if (shader->selector->info.enabled_streamout_buffer_mask)
if (si_shader_uses_streamout(shader))
lds_vertex_size = 4 * shader->selector->info.num_outputs + 1;
if (gfx10_ngg_writes_user_edgeflags(shader))
lds_vertex_size = MAX2(lds_vertex_size, 1);
@@ -2248,7 +2248,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
{
const struct si_shader_selector *sel = shader->selector;
if (sel->stage == MESA_SHADER_GEOMETRY && sel->info.enabled_streamout_buffer_mask)
if (sel->stage == MESA_SHADER_GEOMETRY && si_shader_uses_streamout(shader))
return 44;
return 8;
+2 -2
View File
@@ -1760,7 +1760,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
}
struct pipe_stream_output_info so = {};
if (sel->info.enabled_streamout_buffer_mask)
if (si_shader_uses_streamout(shader))
nir_gather_stream_output_info(nir, &so);
/* Dump NIR before doing NIR->LLVM conversion in case the
@@ -2501,7 +2501,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
shader->uses_gs_state_outprim = sscreen->use_ngg &&
/* Only used by streamout in vertex shaders. */
sel->stage == MESA_SHADER_VERTEX &&
sel->info.enabled_streamout_buffer_mask;
si_shader_uses_streamout(shader);
if (sel->stage == MESA_SHADER_VERTEX) {
shader->uses_base_instance = sel->info.uses_base_instance ||
+8
View File
@@ -698,6 +698,7 @@ struct si_shader_key_ge {
uint64_t kill_outputs; /* "get_unique_index" bits */
unsigned kill_clip_distances : 8;
unsigned kill_pointsize : 1;
unsigned remove_streamout : 1;
/* For NGG VS and TES. */
unsigned ngg_culling : 13; /* SI_NGG_CULL_* */
@@ -1045,6 +1046,13 @@ static inline bool gfx10_ngg_writes_user_edgeflags(struct si_shader *shader)
shader->selector->info.writes_edgeflag;
}
static inline bool si_shader_uses_streamout(struct si_shader *shader)
{
return shader->selector->stage <= MESA_SHADER_GEOMETRY &&
shader->selector->info.enabled_streamout_buffer_mask &&
!shader->key.ge.opt.remove_streamout;
}
#ifdef __cplusplus
}
#endif
@@ -189,7 +189,7 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
}
if (ctx->stage <= MESA_SHADER_GEOMETRY && ctx->shader->key.ge.as_ngg &&
ctx->shader->selector->info.enabled_streamout_buffer_mask)
si_shader_uses_streamout(ctx->shader))
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256);
ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
@@ -1554,7 +1554,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
}
shader->ctx_reg.ngg.vgt_stages.u.ngg = 1;
shader->ctx_reg.ngg.vgt_stages.u.streamout = !!gs_sel->info.enabled_streamout_buffer_mask;
shader->ctx_reg.ngg.vgt_stages.u.streamout = si_shader_uses_streamout(shader);
shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader);
shader->ctx_reg.ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32;
}
@@ -1745,12 +1745,12 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
if (sscreen->info.gfx_level <= GFX9)
rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
if (!sscreen->use_ngg_streamout) {
if (!sscreen->use_ngg_streamout && si_shader_uses_streamout(shader)) {
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->info.base.xfb_stride[0]) |
S_00B12C_SO_BASE1_EN(!!shader->selector->info.base.xfb_stride[1]) |
S_00B12C_SO_BASE2_EN(!!shader->selector->info.base.xfb_stride[2]) |
S_00B12C_SO_BASE3_EN(!!shader->selector->info.base.xfb_stride[3]) |
S_00B12C_SO_EN(!!info->enabled_streamout_buffer_mask);
S_00B12C_SO_EN(1);
}
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1);
@@ -2216,6 +2216,8 @@ static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_sele
key->ge.opt.kill_pointsize = vs->info.writes_psize &&
sctx->current_rast_prim != PIPE_PRIM_POINTS &&
!sctx->queued.named.rasterizer->polygon_mode_is_points;
key->ge.opt.remove_streamout = vs->info.enabled_streamout_buffer_mask &&
!sctx->streamout.enabled_mask;
}
static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs,
@@ -2223,6 +2225,7 @@ static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_se
{
key->ge.opt.kill_clip_distances = 0;
key->ge.opt.kill_outputs = 0;
key->ge.opt.remove_streamout = 0;
key->ge.opt.ngg_culling = 0;
key->ge.mono.u.vs_export_prim_id = 0;
key->ge.opt.kill_pointsize = 0;
@@ -169,7 +169,11 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
for (; i < sctx->streamout.num_targets; i++)
si_so_target_reference(&sctx->streamout.targets[i], NULL);
sctx->streamout.enabled_mask = enabled_mask;
if (!!sctx->streamout.enabled_mask != !!enabled_mask) {
sctx->streamout.enabled_mask = enabled_mask;
sctx->do_update_shaders = true; /* to keep/remove streamout shader code as an optimization */
}
sctx->streamout.num_targets = num_targets;
sctx->streamout.append_bitmask = append_bitmask;