aco: Refactor select_program to smaller functions.

This prepares for allowing to compile 1 shader at a time
for merged shader stages.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23874>
This commit is contained in:
Timur Kristóf
2023-06-20 14:03:34 +02:00
committed by Marge Bot
parent 623d704de8
commit be11fee2a7
+121 -90
View File
@@ -11231,6 +11231,123 @@ pops_await_overlapped_waves(isel_context* ctx)
bld.reset(ctx->block);
}
void
select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, const bool need_barrier,
if_context* ic_merged_wave_info, const bool check_merged_wave_info,
const bool endif_merged_wave_info)
{
init_context(&ctx, nir);
setup_fp_mode(&ctx, nir);
Program* program = ctx.program;
if (need_startpgm) {
/* Needs to be after init_context() for FS. */
Pseudo_instruction* startpgm = add_startpgm(&ctx);
append_logical_start(ctx.block);
if (unlikely(ctx.options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs))
fix_ls_vgpr_init_bug(&ctx, startpgm);
split_arguments(&ctx, startpgm);
if (!program->info.vs.has_prolog &&
(program->stage.has(SWStage::VS) || program->stage.has(SWStage::TES))) {
Builder(ctx.program, ctx.block).sopp(aco_opcode::s_setprio, -1u, 0x3u);
}
}
if (program->gfx_level == GFX10 && program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER &&
!program->stage.has(SWStage::GS)) {
/* Workaround for Navi1x HW bug to ensure that all NGG waves launch before
* s_sendmsg(GS_ALLOC_REQ).
*/
Builder(ctx.program, ctx.block).sopp(aco_opcode::s_barrier, -1u, 0u);
}
if (check_merged_wave_info) {
const unsigned i =
nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ? 0 : 1;
const Temp cond = merged_wave_info_to_mask(&ctx, i);
begin_divergent_if_then(&ctx, ic_merged_wave_info, cond);
}
if (need_barrier) {
const sync_scope scope = ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq &&
program->wave_size % nir->info.tess.tcs_vertices_out == 0
? scope_subgroup
: scope_workgroup;
Builder(ctx.program, ctx.block)
.barrier(aco_opcode::p_barrier, memory_sync_info(storage_shared, semantic_acqrel, scope),
scope);
}
nir_function_impl* func = nir_shader_get_entrypoint(nir);
visit_cf_list(&ctx, &func->body);
if (ctx.stage == fragment_fs && ctx.program->info.ps.has_epilog) {
create_fs_jump_to_epilog(&ctx);
/* FS epilogs always have at least one color/null export. */
ctx.program->has_color_exports = true;
ctx.block->kind |= block_kind_export_end;
}
if (endif_merged_wave_info) {
begin_divergent_if_else(&ctx, ic_merged_wave_info);
end_divergent_if(&ctx, ic_merged_wave_info);
}
cleanup_context(&ctx);
}
void
select_program_merged(isel_context& ctx, const unsigned shader_count, nir_shader* const* shaders)
{
if_context ic_merged_wave_info;
const bool ngg_gs = ctx.stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && ctx.stage.has(SWStage::GS);
for (unsigned i = 0; i < shader_count; i++) {
nir_shader* nir = shaders[i];
/* We always need to insert p_startpgm at the beginning of the first shader. */
const bool need_startpgm = i == 0;
/* In a merged VS+TCS HS, the VS implementation can be completely empty. */
nir_function_impl* func = nir_shader_get_entrypoint(nir);
const bool empty_shader =
nir_cf_list_is_empty_block(&func->body) &&
((nir->info.stage == MESA_SHADER_VERTEX &&
(ctx.stage == vertex_tess_control_hs || ctx.stage == vertex_geometry_gs)) ||
(nir->info.stage == MESA_SHADER_TESS_EVAL && ctx.stage == tess_eval_geometry_gs));
/* See if we need to emit a check of the merged wave info SGPR. */
const bool check_merged_wave_info =
ctx.tcs_in_out_eq ? i == 0 : (shader_count >= 2 && !empty_shader && !(ngg_gs && i == 1));
const bool endif_merged_wave_info =
ctx.tcs_in_out_eq ? i == 1 : (check_merged_wave_info && !(ngg_gs && i == 1));
/* Skip s_barrier from TCS when VS outputs are not stored in the LDS. */
const bool tcs_skip_barrier =
ctx.stage == vertex_tess_control_hs && ctx.tcs_temp_only_inputs == nir->info.inputs_read;
/* A barrier is usually needed at the beginning of the second shader, with exceptions. */
const bool need_barrier = i != 0 && !ngg_gs && !tcs_skip_barrier;
select_shader(ctx, nir, need_startpgm, need_barrier, &ic_merged_wave_info,
check_merged_wave_info, endif_merged_wave_info);
if (i == 0 && ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq) {
/* Special handling when TCS input and output patch size is the same.
* Outputs of the previous stage are inputs to the next stage.
*/
ctx.inputs = ctx.outputs;
ctx.outputs = shader_io_state();
}
}
}
} /* end namespace */
void
@@ -11244,96 +11361,10 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
if (ctx.stage == raytracing_cs)
return select_program_rt(ctx, shader_count, shaders, args);
if_context ic_merged_wave_info;
bool ngg_gs = ctx.stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && ctx.stage.has(SWStage::GS);
for (unsigned i = 0; i < shader_count; i++) {
nir_shader* nir = shaders[i];
init_context(&ctx, nir);
setup_fp_mode(&ctx, nir);
if (!i) {
/* needs to be after init_context() for FS */
Pseudo_instruction* startpgm = add_startpgm(&ctx);
append_logical_start(ctx.block);
if (unlikely(ctx.options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs))
fix_ls_vgpr_init_bug(&ctx, startpgm);
split_arguments(&ctx, startpgm);
if (!info->vs.has_prolog &&
(program->stage.has(SWStage::VS) || program->stage.has(SWStage::TES))) {
Builder(ctx.program, ctx.block).sopp(aco_opcode::s_setprio, -1u, 0x3u);
}
}
/* In a merged VS+TCS HS, the VS implementation can be completely empty. */
nir_function_impl* func = nir_shader_get_entrypoint(nir);
bool empty_shader =
nir_cf_list_is_empty_block(&func->body) &&
((nir->info.stage == MESA_SHADER_VERTEX &&
(ctx.stage == vertex_tess_control_hs || ctx.stage == vertex_geometry_gs)) ||
(nir->info.stage == MESA_SHADER_TESS_EVAL && ctx.stage == tess_eval_geometry_gs));
bool check_merged_wave_info =
ctx.tcs_in_out_eq ? i == 0 : (shader_count >= 2 && !empty_shader && !(ngg_gs && i == 1));
bool endif_merged_wave_info =
ctx.tcs_in_out_eq ? i == 1 : (check_merged_wave_info && !(ngg_gs && i == 1));
if (program->gfx_level == GFX10 && program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER &&
program->stage.num_sw_stages() == 1) {
/* Workaround for Navi1x HW bug to ensure that all NGG waves launch before
* s_sendmsg(GS_ALLOC_REQ). */
Builder(ctx.program, ctx.block).sopp(aco_opcode::s_barrier, -1u, 0u);
}
if (check_merged_wave_info) {
Temp cond = merged_wave_info_to_mask(&ctx, i);
begin_divergent_if_then(&ctx, &ic_merged_wave_info, cond);
}
if (i) {
Builder bld(ctx.program, ctx.block);
/* Skip s_barrier from TCS when VS outputs are not stored in the LDS. */
bool tcs_skip_barrier = ctx.stage == vertex_tess_control_hs &&
ctx.tcs_temp_only_inputs == nir->info.inputs_read;
if (!ngg_gs && !tcs_skip_barrier) {
sync_scope scope = ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq &&
program->wave_size % nir->info.tess.tcs_vertices_out == 0
? scope_subgroup
: scope_workgroup;
bld.barrier(aco_opcode::p_barrier,
memory_sync_info(storage_shared, semantic_acqrel, scope), scope);
}
}
visit_cf_list(&ctx, &func->body);
if (ctx.stage == fragment_fs && ctx.program->info.ps.has_epilog) {
create_fs_jump_to_epilog(&ctx);
/* FS epilogs always have at least one color/null export. */
ctx.program->has_color_exports = true;
ctx.block->kind |= block_kind_export_end;
}
if (endif_merged_wave_info) {
begin_divergent_if_else(&ctx, &ic_merged_wave_info);
end_divergent_if(&ctx, &ic_merged_wave_info);
}
if (i == 0 && ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq) {
/* Outputs of the previous stage are inputs to the next stage */
ctx.inputs = ctx.outputs;
ctx.outputs = shader_io_state();
}
cleanup_context(&ctx);
if (shader_count >= 2) {
select_program_merged(ctx, shader_count, shaders);
} else {
select_shader(ctx, shaders[0], true, false, NULL, false, false);
}
program->config->float_mode = program->blocks[0].fp_mode.val;