aco: Refactor select_program to smaller functions.

This prepares for allowing to compile 1 shader at a time for merged shader stages. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23874>
2023-06-20 14:03:34 +02:00
parent 623d704de8
commit be11fee2a7
1 changed files with 121 additions and 90 deletions
@@ -11231,6 +11231,123 @@ pops_await_overlapped_waves(isel_context* ctx)
   bld.reset(ctx->block);
 }

+void
+select_shader(isel_context& ctx, nir_shader* nir, const bool need_startpgm, const bool need_barrier,
+              if_context* ic_merged_wave_info, const bool check_merged_wave_info,
+              const bool endif_merged_wave_info)
+{
+   init_context(&ctx, nir);
+   setup_fp_mode(&ctx, nir);
+
+   Program* program = ctx.program;
+
+   if (need_startpgm) {
+      /* Needs to be after init_context() for FS. */
+      Pseudo_instruction* startpgm = add_startpgm(&ctx);
+      append_logical_start(ctx.block);
+
+      if (unlikely(ctx.options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs))
+         fix_ls_vgpr_init_bug(&ctx, startpgm);
+
+      split_arguments(&ctx, startpgm);
+
+      if (!program->info.vs.has_prolog &&
+          (program->stage.has(SWStage::VS) || program->stage.has(SWStage::TES))) {
+         Builder(ctx.program, ctx.block).sopp(aco_opcode::s_setprio, -1u, 0x3u);
+      }
+   }
+
+   if (program->gfx_level == GFX10 && program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER &&
+       !program->stage.has(SWStage::GS)) {
+      /* Workaround for Navi1x HW bug to ensure that all NGG waves launch before
+       * s_sendmsg(GS_ALLOC_REQ).
+       */
+      Builder(ctx.program, ctx.block).sopp(aco_opcode::s_barrier, -1u, 0u);
+   }
+
+   if (check_merged_wave_info) {
+      const unsigned i =
+         nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ? 0 : 1;
+      const Temp cond = merged_wave_info_to_mask(&ctx, i);
+      begin_divergent_if_then(&ctx, ic_merged_wave_info, cond);
+   }
+
+   if (need_barrier) {
+      const sync_scope scope = ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq &&
+                                     program->wave_size % nir->info.tess.tcs_vertices_out == 0
+                                  ? scope_subgroup
+                                  : scope_workgroup;
+
+      Builder(ctx.program, ctx.block)
+         .barrier(aco_opcode::p_barrier, memory_sync_info(storage_shared, semantic_acqrel, scope),
+                  scope);
+   }
+
+   nir_function_impl* func = nir_shader_get_entrypoint(nir);
+   visit_cf_list(&ctx, &func->body);
+
+   if (ctx.stage == fragment_fs && ctx.program->info.ps.has_epilog) {
+      create_fs_jump_to_epilog(&ctx);
+
+      /* FS epilogs always have at least one color/null export. */
+      ctx.program->has_color_exports = true;
+      ctx.block->kind |= block_kind_export_end;
+   }
+
+   if (endif_merged_wave_info) {
+      begin_divergent_if_else(&ctx, ic_merged_wave_info);
+      end_divergent_if(&ctx, ic_merged_wave_info);
+   }
+
+   cleanup_context(&ctx);
+}
+
+void
+select_program_merged(isel_context& ctx, const unsigned shader_count, nir_shader* const* shaders)
+{
+   if_context ic_merged_wave_info;
+   const bool ngg_gs = ctx.stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && ctx.stage.has(SWStage::GS);
+
+   for (unsigned i = 0; i < shader_count; i++) {
+      nir_shader* nir = shaders[i];
+
+      /* We always need to insert p_startpgm at the beginning of the first shader.  */
+      const bool need_startpgm = i == 0;
+
+      /* In a merged VS+TCS HS, the VS implementation can be completely empty. */
+      nir_function_impl* func = nir_shader_get_entrypoint(nir);
+      const bool empty_shader =
+         nir_cf_list_is_empty_block(&func->body) &&
+         ((nir->info.stage == MESA_SHADER_VERTEX &&
+           (ctx.stage == vertex_tess_control_hs || ctx.stage == vertex_geometry_gs)) ||
+          (nir->info.stage == MESA_SHADER_TESS_EVAL && ctx.stage == tess_eval_geometry_gs));
+
+      /* See if we need to emit a check of the merged wave info SGPR. */
+      const bool check_merged_wave_info =
+         ctx.tcs_in_out_eq ? i == 0 : (shader_count >= 2 && !empty_shader && !(ngg_gs && i == 1));
+      const bool endif_merged_wave_info =
+         ctx.tcs_in_out_eq ? i == 1 : (check_merged_wave_info && !(ngg_gs && i == 1));
+
+      /* Skip s_barrier from TCS when VS outputs are not stored in the LDS. */
+      const bool tcs_skip_barrier =
+         ctx.stage == vertex_tess_control_hs && ctx.tcs_temp_only_inputs == nir->info.inputs_read;
+
+      /* A barrier is usually needed at the beginning of the second shader, with exceptions. */
+      const bool need_barrier = i != 0 && !ngg_gs && !tcs_skip_barrier;
+
+      select_shader(ctx, nir, need_startpgm, need_barrier, &ic_merged_wave_info,
+                    check_merged_wave_info, endif_merged_wave_info);
+
+      if (i == 0 && ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq) {
+         /* Special handling when TCS input and output patch size is the same.
+          * Outputs of the previous stage are inputs to the next stage.
+          */
+         ctx.inputs = ctx.outputs;
+         ctx.outputs = shader_io_state();
+      }
+   }
+}
+
 } /* end namespace */

 void
@@ -11244,96 +11361,10 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
   if (ctx.stage == raytracing_cs)
      return select_program_rt(ctx, shader_count, shaders, args);

-   if_context ic_merged_wave_info;
-   bool ngg_gs = ctx.stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && ctx.stage.has(SWStage::GS);
-
-   for (unsigned i = 0; i < shader_count; i++) {
-      nir_shader* nir = shaders[i];
-      init_context(&ctx, nir);
-
-      setup_fp_mode(&ctx, nir);
-
-      if (!i) {
-         /* needs to be after init_context() for FS */
-         Pseudo_instruction* startpgm = add_startpgm(&ctx);
-         append_logical_start(ctx.block);
-
-         if (unlikely(ctx.options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs))
-            fix_ls_vgpr_init_bug(&ctx, startpgm);
-
-         split_arguments(&ctx, startpgm);
-
-         if (!info->vs.has_prolog &&
-             (program->stage.has(SWStage::VS) || program->stage.has(SWStage::TES))) {
-            Builder(ctx.program, ctx.block).sopp(aco_opcode::s_setprio, -1u, 0x3u);
-         }
-      }
-
-      /* In a merged VS+TCS HS, the VS implementation can be completely empty. */
-      nir_function_impl* func = nir_shader_get_entrypoint(nir);
-      bool empty_shader =
-         nir_cf_list_is_empty_block(&func->body) &&
-         ((nir->info.stage == MESA_SHADER_VERTEX &&
-           (ctx.stage == vertex_tess_control_hs || ctx.stage == vertex_geometry_gs)) ||
-          (nir->info.stage == MESA_SHADER_TESS_EVAL && ctx.stage == tess_eval_geometry_gs));
-
-      bool check_merged_wave_info =
-         ctx.tcs_in_out_eq ? i == 0 : (shader_count >= 2 && !empty_shader && !(ngg_gs && i == 1));
-      bool endif_merged_wave_info =
-         ctx.tcs_in_out_eq ? i == 1 : (check_merged_wave_info && !(ngg_gs && i == 1));
-
-      if (program->gfx_level == GFX10 && program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER &&
-          program->stage.num_sw_stages() == 1) {
-         /* Workaround for Navi1x HW bug to ensure that all NGG waves launch before
-          * s_sendmsg(GS_ALLOC_REQ). */
-         Builder(ctx.program, ctx.block).sopp(aco_opcode::s_barrier, -1u, 0u);
-      }
-
-      if (check_merged_wave_info) {
-         Temp cond = merged_wave_info_to_mask(&ctx, i);
-         begin_divergent_if_then(&ctx, &ic_merged_wave_info, cond);
-      }
-
-      if (i) {
-         Builder bld(ctx.program, ctx.block);
-
-         /* Skip s_barrier from TCS when VS outputs are not stored in the LDS. */
-         bool tcs_skip_barrier = ctx.stage == vertex_tess_control_hs &&
-                                 ctx.tcs_temp_only_inputs == nir->info.inputs_read;
-
-         if (!ngg_gs && !tcs_skip_barrier) {
-            sync_scope scope = ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq &&
-                                     program->wave_size % nir->info.tess.tcs_vertices_out == 0
-                                  ? scope_subgroup
-                                  : scope_workgroup;
-            bld.barrier(aco_opcode::p_barrier,
-                        memory_sync_info(storage_shared, semantic_acqrel, scope), scope);
-         }
-      }
-
-      visit_cf_list(&ctx, &func->body);
-
-      if (ctx.stage == fragment_fs && ctx.program->info.ps.has_epilog) {
-         create_fs_jump_to_epilog(&ctx);
-
-         /* FS epilogs always have at least one color/null export. */
-         ctx.program->has_color_exports = true;
-
-         ctx.block->kind |= block_kind_export_end;
-      }
-
-      if (endif_merged_wave_info) {
-         begin_divergent_if_else(&ctx, &ic_merged_wave_info);
-         end_divergent_if(&ctx, &ic_merged_wave_info);
-      }
-
-      if (i == 0 && ctx.stage == vertex_tess_control_hs && ctx.tcs_in_out_eq) {
-         /* Outputs of the previous stage are inputs to the next stage */
-         ctx.inputs = ctx.outputs;
-         ctx.outputs = shader_io_state();
-      }
-
-      cleanup_context(&ctx);
+   if (shader_count >= 2) {
+      select_program_merged(ctx, shader_count, shaders);
+   } else {
+      select_shader(ctx, shaders[0], true, false, NULL, false, false);
   }

   program->config->float_mode = program->blocks[0].fp_mode.val;