intel/brw: Move and reduce scope of run_*() functions

Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30169>
2024-07-12 14:20:57 -07:00
parent c92b8a802e
commit fdb029fe1b
10 changed files with 389 additions and 417 deletions
@@ -2709,36 +2709,6 @@ fs_visitor::allocate_registers(bool allow_spilling)
   brw_fs_lower_scoreboard(*this);
 }

-bool
-fs_visitor::run_vs()
-{
-   assert(stage == MESA_SHADER_VERTEX);
-
-   payload_ = new vs_thread_payload(*this);
-
-   nir_to_brw(this);
-
-   if (failed)
-      return false;
-
-   emit_urb_writes();
-
-   calculate_cfg();
-
-   brw_fs_optimize(*this);
-
-   assign_curb_setup();
-   assign_vs_urb_setup();
-
-   brw_fs_lower_3src_null_dest(*this);
-   brw_fs_workaround_memory_fence_before_eot(*this);
-   brw_fs_workaround_emit_dummy_mov_instruction(*this);
-
-   allocate_registers(true /* allow_spilling */);
-
-   return !failed;
-}
-
 void
 fs_visitor::set_tcs_invocation_id()
 {
@@ -2811,370 +2781,6 @@ fs_visitor::emit_tcs_thread_end()
   inst->eot = true;
 }

-bool
-fs_visitor::run_tcs()
-{
-   assert(stage == MESA_SHADER_TESS_CTRL);
-
-   struct brw_vue_prog_data *vue_prog_data = brw_vue_prog_data(prog_data);
-   const fs_builder bld = fs_builder(this).at_end();
-
-   assert(vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH ||
-          vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_MULTI_PATCH);
-
-   payload_ = new tcs_thread_payload(*this);
-
-   /* Initialize gl_InvocationID */
-   set_tcs_invocation_id();
-
-   const bool fix_dispatch_mask =
-      vue_prog_data->dispatch_mode == INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH &&
-      (nir->info.tess.tcs_vertices_out % 8) != 0;
-
-   /* Fix the disptach mask */
-   if (fix_dispatch_mask) {
-      bld.CMP(bld.null_reg_ud(), invocation_id,
-              brw_imm_ud(nir->info.tess.tcs_vertices_out), BRW_CONDITIONAL_L);
-      bld.IF(BRW_PREDICATE_NORMAL);
-   }
-
-   nir_to_brw(this);
-
-   if (fix_dispatch_mask) {
-      bld.emit(BRW_OPCODE_ENDIF);
-   }
-
-   emit_tcs_thread_end();
-
-   if (failed)
-      return false;
-
-   calculate_cfg();
-
-   brw_fs_optimize(*this);
-
-   assign_curb_setup();
-   assign_tcs_urb_setup();
-
-   brw_fs_lower_3src_null_dest(*this);
-   brw_fs_workaround_memory_fence_before_eot(*this);
-   brw_fs_workaround_emit_dummy_mov_instruction(*this);
-
-   allocate_registers(true /* allow_spilling */);
-
-   return !failed;
-}
-
-bool
-fs_visitor::run_tes()
-{
-   assert(stage == MESA_SHADER_TESS_EVAL);
-
-   payload_ = new tes_thread_payload(*this);
-
-   nir_to_brw(this);
-
-   if (failed)
-      return false;
-
-   emit_urb_writes();
-
-   calculate_cfg();
-
-   brw_fs_optimize(*this);
-
-   assign_curb_setup();
-   assign_tes_urb_setup();
-
-   brw_fs_lower_3src_null_dest(*this);
-   brw_fs_workaround_memory_fence_before_eot(*this);
-   brw_fs_workaround_emit_dummy_mov_instruction(*this);
-
-   allocate_registers(true /* allow_spilling */);
-
-   return !failed;
-}
-
-bool
-fs_visitor::run_gs()
-{
-   assert(stage == MESA_SHADER_GEOMETRY);
-
-   payload_ = new gs_thread_payload(*this);
-
-   const fs_builder bld = fs_builder(this).at_end();
-
-   this->final_gs_vertex_count = bld.vgrf(BRW_TYPE_UD);
-
-   if (gs_compile->control_data_header_size_bits > 0) {
-      /* Create a VGRF to store accumulated control data bits. */
-      this->control_data_bits = bld.vgrf(BRW_TYPE_UD);
-
-      /* If we're outputting more than 32 control data bits, then EmitVertex()
-       * will set control_data_bits to 0 after emitting the first vertex.
-       * Otherwise, we need to initialize it to 0 here.
-       */
-      if (gs_compile->control_data_header_size_bits <= 32) {
-         const fs_builder abld = bld.annotate("initialize control data bits");
-         abld.MOV(this->control_data_bits, brw_imm_ud(0u));
-      }
-   }
-
-   nir_to_brw(this);
-
-   emit_gs_thread_end();
-
-   if (failed)
-      return false;
-
-   calculate_cfg();
-
-   brw_fs_optimize(*this);
-
-   assign_curb_setup();
-   assign_gs_urb_setup();
-
-   brw_fs_lower_3src_null_dest(*this);
-   brw_fs_workaround_memory_fence_before_eot(*this);
-   brw_fs_workaround_emit_dummy_mov_instruction(*this);
-
-   allocate_registers(true /* allow_spilling */);
-
-   return !failed;
-}
-
-/* From the SKL PRM, Volume 16, Workarounds:
- *
- *   0877  3D   Pixel Shader Hang possible when pixel shader dispatched with
- *              only header phases (R0-R2)
- *
- *   WA: Enable a non-header phase (e.g. push constant) when dispatch would
- *       have been header only.
- *
- * Instead of enabling push constants one can alternatively enable one of the
- * inputs. Here one simply chooses "layer" which shouldn't impose much
- * overhead.
- */
-static void
-gfx9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)
-{
-   if (wm_prog_data->num_varying_inputs)
-      return;
-
-   if (wm_prog_data->base.curb_read_length)
-      return;
-
-   wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
-   wm_prog_data->num_varying_inputs = 1;
-
-   brw_compute_urb_setup_index(wm_prog_data);
-}
-
-bool
-fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
-{
-   struct brw_wm_prog_data *wm_prog_data = brw_wm_prog_data(this->prog_data);
-   brw_wm_prog_key *wm_key = (brw_wm_prog_key *) this->key;
-   const fs_builder bld = fs_builder(this).at_end();
-
-   assert(stage == MESA_SHADER_FRAGMENT);
-
-   payload_ = new fs_thread_payload(*this, source_depth_to_render_target);
-
-   if (nir->info.ray_queries > 0)
-      limit_dispatch_width(16, "SIMD32 not supported with ray queries.\n");
-
-   if (do_rep_send) {
-      assert(dispatch_width == 16);
-      emit_repclear_shader();
-   } else {
-      if (nir->info.inputs_read > 0 ||
-          BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
-          (nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) {
-         emit_interpolation_setup();
-      }
-
-      /* We handle discards by keeping track of the still-live pixels in f0.1.
-       * Initialize it with the dispatched pixels.
-       */
-      if (devinfo->ver >= 20 || wm_prog_data->uses_kill) {
-         const unsigned lower_width = MIN2(dispatch_width, 16);
-         for (unsigned i = 0; i < dispatch_width / lower_width; i++) {
-            /* According to the "PS Thread Payload for Normal
-             * Dispatch" pages on the BSpec, the dispatch mask is
-             * stored in R0.15/R1.15 on gfx20+ and in R1.7/R2.7 on
-             * gfx6+.
-             */
-            const brw_reg dispatch_mask =
-               devinfo->ver >= 20 ? xe2_vec1_grf(i, 15) :
-                                    brw_vec1_grf(i + 1, 7);
-            bld.exec_all().group(1, 0)
-               .MOV(brw_sample_mask_reg(bld.group(lower_width, i)),
-                    retype(dispatch_mask, BRW_TYPE_UW));
-         }
-      }
-
-      if (nir->info.writes_memory)
-         wm_prog_data->has_side_effects = true;
-
-      nir_to_brw(this);
-
-      if (failed)
-	 return false;
-
-      emit_fb_writes();
-
-      calculate_cfg();
-
-      brw_fs_optimize(*this);
-
-      assign_curb_setup();
-
-      if (devinfo->ver == 9)
-         gfx9_ps_header_only_workaround(wm_prog_data);
-
-      assign_urb_setup();
-
-      brw_fs_lower_3src_null_dest(*this);
-      brw_fs_workaround_memory_fence_before_eot(*this);
-      brw_fs_workaround_emit_dummy_mov_instruction(*this);
-
-      allocate_registers(allow_spilling);
-   }
-
-   return !failed;
-}
-
-bool
-fs_visitor::run_cs(bool allow_spilling)
-{
-   assert(gl_shader_stage_is_compute(stage));
-   const fs_builder bld = fs_builder(this).at_end();
-
-   payload_ = new cs_thread_payload(*this);
-
-   if (devinfo->platform == INTEL_PLATFORM_HSW && prog_data->total_shared > 0) {
-      /* Move SLM index from g0.0[27:24] to sr0.1[11:8] */
-      const fs_builder abld = bld.exec_all().group(1, 0);
-      abld.MOV(retype(brw_sr0_reg(1), BRW_TYPE_UW),
-               suboffset(retype(brw_vec1_grf(0, 0), BRW_TYPE_UW), 1));
-   }
-
-   nir_to_brw(this);
-
-   if (failed)
-      return false;
-
-   emit_cs_terminate();
-
-   calculate_cfg();
-
-   brw_fs_optimize(*this);
-
-   assign_curb_setup();
-
-   brw_fs_lower_3src_null_dest(*this);
-   brw_fs_workaround_memory_fence_before_eot(*this);
-   brw_fs_workaround_emit_dummy_mov_instruction(*this);
-
-   allocate_registers(allow_spilling);
-
-   return !failed;
-}
-
-bool
-fs_visitor::run_bs(bool allow_spilling)
-{
-   assert(stage >= MESA_SHADER_RAYGEN && stage <= MESA_SHADER_CALLABLE);
-
-   payload_ = new bs_thread_payload(*this);
-
-   nir_to_brw(this);
-
-   if (failed)
-      return false;
-
-   /* TODO(RT): Perhaps rename this? */
-   emit_cs_terminate();
-
-   calculate_cfg();
-
-   brw_fs_optimize(*this);
-
-   assign_curb_setup();
-
-   brw_fs_lower_3src_null_dest(*this);
-   brw_fs_workaround_memory_fence_before_eot(*this);
-   brw_fs_workaround_emit_dummy_mov_instruction(*this);
-
-   allocate_registers(allow_spilling);
-
-   return !failed;
-}
-
-bool
-fs_visitor::run_task(bool allow_spilling)
-{
-   assert(stage == MESA_SHADER_TASK);
-
-   payload_ = new task_mesh_thread_payload(*this);
-
-   nir_to_brw(this);
-
-   if (failed)
-      return false;
-
-   emit_urb_fence();
-
-   emit_cs_terminate();
-
-   calculate_cfg();
-
-   brw_fs_optimize(*this);
-
-   assign_curb_setup();
-
-   brw_fs_lower_3src_null_dest(*this);
-   brw_fs_workaround_memory_fence_before_eot(*this);
-   brw_fs_workaround_emit_dummy_mov_instruction(*this);
-
-   allocate_registers(allow_spilling);
-
-   return !failed;
-}
-
-bool
-fs_visitor::run_mesh(bool allow_spilling)
-{
-   assert(stage == MESA_SHADER_MESH);
-
-   payload_ = new task_mesh_thread_payload(*this);
-
-   nir_to_brw(this);
-
-   if (failed)
-      return false;
-
-   emit_urb_fence();
-
-   emit_cs_terminate();
-
-   calculate_cfg();
-
-   brw_fs_optimize(*this);
-
-   assign_curb_setup();
-
-   brw_fs_lower_3src_null_dest(*this);
-   brw_fs_workaround_memory_fence_before_eot(*this);
-   brw_fs_workaround_emit_dummy_mov_instruction(*this);
-
-   allocate_registers(allow_spilling);
-
-   return !failed;
-}
-
-
 /**
 * Move load_interpolated_input with simple (payload-based) barycentric modes
 * to the top of the program so we don't emit multiple PLNs for the same input.