radeonsi: remove streamout code from shaders if no streamout buffers are bound
This is an optimization using asynchronous shader compilation. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16885>
This commit is contained in:
@@ -615,7 +615,7 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader)
|
||||
|
||||
/* The edgeflag is always stored in the last element that's also
|
||||
* used for padding to reduce LDS bank conflicts. */
|
||||
if (shader->selector->info.enabled_streamout_buffer_mask)
|
||||
if (si_shader_uses_streamout(shader))
|
||||
lds_vertex_size = 4 * shader->selector->info.num_outputs + 1;
|
||||
if (gfx10_ngg_writes_user_edgeflags(shader))
|
||||
lds_vertex_size = MAX2(lds_vertex_size, 1);
|
||||
@@ -2248,7 +2248,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
|
||||
{
|
||||
const struct si_shader_selector *sel = shader->selector;
|
||||
|
||||
if (sel->stage == MESA_SHADER_GEOMETRY && sel->info.enabled_streamout_buffer_mask)
|
||||
if (sel->stage == MESA_SHADER_GEOMETRY && si_shader_uses_streamout(shader))
|
||||
return 44;
|
||||
|
||||
return 8;
|
||||
|
||||
@@ -1760,7 +1760,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
||||
}
|
||||
|
||||
struct pipe_stream_output_info so = {};
|
||||
if (sel->info.enabled_streamout_buffer_mask)
|
||||
if (si_shader_uses_streamout(shader))
|
||||
nir_gather_stream_output_info(nir, &so);
|
||||
|
||||
/* Dump NIR before doing NIR->LLVM conversion in case the
|
||||
@@ -2501,7 +2501,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
|
||||
shader->uses_gs_state_outprim = sscreen->use_ngg &&
|
||||
/* Only used by streamout in vertex shaders. */
|
||||
sel->stage == MESA_SHADER_VERTEX &&
|
||||
sel->info.enabled_streamout_buffer_mask;
|
||||
si_shader_uses_streamout(shader);
|
||||
|
||||
if (sel->stage == MESA_SHADER_VERTEX) {
|
||||
shader->uses_base_instance = sel->info.uses_base_instance ||
|
||||
|
||||
@@ -698,6 +698,7 @@ struct si_shader_key_ge {
|
||||
uint64_t kill_outputs; /* "get_unique_index" bits */
|
||||
unsigned kill_clip_distances : 8;
|
||||
unsigned kill_pointsize : 1;
|
||||
unsigned remove_streamout : 1;
|
||||
|
||||
/* For NGG VS and TES. */
|
||||
unsigned ngg_culling : 13; /* SI_NGG_CULL_* */
|
||||
@@ -1045,6 +1046,13 @@ static inline bool gfx10_ngg_writes_user_edgeflags(struct si_shader *shader)
|
||||
shader->selector->info.writes_edgeflag;
|
||||
}
|
||||
|
||||
static inline bool si_shader_uses_streamout(struct si_shader *shader)
|
||||
{
|
||||
return shader->selector->stage <= MESA_SHADER_GEOMETRY &&
|
||||
shader->selector->info.enabled_streamout_buffer_mask &&
|
||||
!shader->key.ge.opt.remove_streamout;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -189,7 +189,7 @@ void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTy
|
||||
}
|
||||
|
||||
if (ctx->stage <= MESA_SHADER_GEOMETRY && ctx->shader->key.ge.as_ngg &&
|
||||
ctx->shader->selector->info.enabled_streamout_buffer_mask)
|
||||
si_shader_uses_streamout(ctx->shader))
|
||||
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256);
|
||||
|
||||
ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
|
||||
|
||||
@@ -1554,7 +1554,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||
}
|
||||
|
||||
shader->ctx_reg.ngg.vgt_stages.u.ngg = 1;
|
||||
shader->ctx_reg.ngg.vgt_stages.u.streamout = !!gs_sel->info.enabled_streamout_buffer_mask;
|
||||
shader->ctx_reg.ngg.vgt_stages.u.streamout = si_shader_uses_streamout(shader);
|
||||
shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader);
|
||||
shader->ctx_reg.ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32;
|
||||
}
|
||||
@@ -1745,12 +1745,12 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
||||
if (sscreen->info.gfx_level <= GFX9)
|
||||
rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
|
||||
|
||||
if (!sscreen->use_ngg_streamout) {
|
||||
if (!sscreen->use_ngg_streamout && si_shader_uses_streamout(shader)) {
|
||||
rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->info.base.xfb_stride[0]) |
|
||||
S_00B12C_SO_BASE1_EN(!!shader->selector->info.base.xfb_stride[1]) |
|
||||
S_00B12C_SO_BASE2_EN(!!shader->selector->info.base.xfb_stride[2]) |
|
||||
S_00B12C_SO_BASE3_EN(!!shader->selector->info.base.xfb_stride[3]) |
|
||||
S_00B12C_SO_EN(!!info->enabled_streamout_buffer_mask);
|
||||
S_00B12C_SO_EN(1);
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1);
|
||||
@@ -2216,6 +2216,8 @@ static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_sele
|
||||
key->ge.opt.kill_pointsize = vs->info.writes_psize &&
|
||||
sctx->current_rast_prim != PIPE_PRIM_POINTS &&
|
||||
!sctx->queued.named.rasterizer->polygon_mode_is_points;
|
||||
key->ge.opt.remove_streamout = vs->info.enabled_streamout_buffer_mask &&
|
||||
!sctx->streamout.enabled_mask;
|
||||
}
|
||||
|
||||
static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_selector *vs,
|
||||
@@ -2223,6 +2225,7 @@ static void si_clear_vs_key_outputs(struct si_context *sctx, struct si_shader_se
|
||||
{
|
||||
key->ge.opt.kill_clip_distances = 0;
|
||||
key->ge.opt.kill_outputs = 0;
|
||||
key->ge.opt.remove_streamout = 0;
|
||||
key->ge.opt.ngg_culling = 0;
|
||||
key->ge.mono.u.vs_export_prim_id = 0;
|
||||
key->ge.opt.kill_pointsize = 0;
|
||||
|
||||
@@ -169,7 +169,11 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned num_targ
|
||||
for (; i < sctx->streamout.num_targets; i++)
|
||||
si_so_target_reference(&sctx->streamout.targets[i], NULL);
|
||||
|
||||
sctx->streamout.enabled_mask = enabled_mask;
|
||||
if (!!sctx->streamout.enabled_mask != !!enabled_mask) {
|
||||
sctx->streamout.enabled_mask = enabled_mask;
|
||||
sctx->do_update_shaders = true; /* to keep/remove streamout shader code as an optimization */
|
||||
}
|
||||
|
||||
sctx->streamout.num_targets = num_targets;
|
||||
sctx->streamout.append_bitmask = append_bitmask;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user