aco: Use ac_hw_stage instead of aco-specific HWStage.

The new ac_hw_stage is going to be used by drivers as well. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Qiang Yu <yuq825@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23597>
2023-05-13 17:55:54 +02:00
parent cc2307008a
commit 05928f4200
8 changed files with 77 additions and 88 deletions
@@ -994,7 +994,8 @@ fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
      while (it != block.instructions.rend()) {
         if ((*it)->isEXP()) {
            Export_instruction& exp = (*it)->exp();
-            if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) {
+            if (program->stage.hw == AC_HW_VERTEX_SHADER ||
+                program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
               if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= (V_008DFC_SQ_EXP_POS + 3)) {
                  exp.done = true;
                  exported = true;
@@ -1014,19 +1015,19 @@ fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
            /* Do not abort if the main FS has an epilog because it only
             * exports MRTZ (if present) and the epilog exports colors.
             */
-            exported |= program->stage.hw == HWStage::FS && program->info.ps.has_epilog;
+            exported |= program->stage.hw == AC_HW_PIXEL_SHADER && program->info.ps.has_epilog;
         }
         ++it;
      }
   }

   /* GFX10+ FS may not export anything if no discard is used. */
-   bool may_skip_export = program->stage.hw == HWStage::FS && program->gfx_level >= GFX10;
+   bool may_skip_export = program->stage.hw == AC_HW_PIXEL_SHADER && program->gfx_level >= GFX10;

   if (!exported && !may_skip_export) {
      /* Abort in order to avoid a GPU hang. */
-      bool is_vertex_or_ngg =
-         (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG);
+      bool is_vertex_or_ngg = (program->stage.hw == AC_HW_VERTEX_SHADER ||
+                               program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER);
      aco_err(program,
              "Missing export in %s shader:", is_vertex_or_ngg ? "vertex or NGG" : "fragment");
      aco_print_program(program, stderr);
@@ -1221,8 +1222,8 @@ emit_program(Program* program, std::vector<uint32_t>& code, std::vector<struct a
 {
   asm_context ctx(program, symbols);

-   if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::FS ||
-       program->stage.hw == HWStage::NGG)
+   if (program->stage.hw == AC_HW_VERTEX_SHADER || program->stage.hw == AC_HW_PIXEL_SHADER ||
+       program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)
      fix_exports(ctx, code, program);

   for (Block& block : program->blocks) {
@@ -261,7 +261,8 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
      Operand start_exec(bld.lm);

      /* exec seems to need to be manually initialized with combined shaders */
-      if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) {
+      if (ctx.program->stage.num_sw_stages() > 1 ||
+          ctx.program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
         start_exec = Operand::c32_or_c64(-1u, bld.lm == s2);
         bld.copy(Definition(exec, bld.lm), start_exec);
      }
@@ -7239,10 +7239,11 @@ emit_scoped_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
    * - when GS is used on GFX9+, VS->GS and TES->GS I/O is lowered to shared memory
    * - additionally, when NGG is used on GFX10+, shared memory is used for certain features
    */
-   bool shared_storage_used = ctx->stage.hw == HWStage::CS || ctx->stage.hw == HWStage::LS ||
-                              ctx->stage.hw == HWStage::HS ||
-                              (ctx->stage.hw == HWStage::GS && ctx->program->gfx_level >= GFX9) ||
-                              ctx->stage.hw == HWStage::NGG;
+   bool shared_storage_used =
+      ctx->stage.hw == AC_HW_COMPUTE_SHADER || ctx->stage.hw == AC_HW_LOCAL_SHADER ||
+      ctx->stage.hw == AC_HW_HULL_SHADER ||
+      (ctx->stage.hw == AC_HW_LEGACY_GEOMETRY_SHADER && ctx->program->gfx_level >= GFX9) ||
+      ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER;

   if (shared_storage_used)
      storage_allowed |= storage_shared;
@@ -7252,15 +7253,16 @@ emit_scoped_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
      storage_allowed |= storage_task_payload;

   /* Allow VMEM output for all stages that can have outputs. */
-   if ((ctx->stage.hw != HWStage::CS && ctx->stage.hw != HWStage::FS) ||
+   if ((ctx->stage.hw != AC_HW_COMPUTE_SHADER && ctx->stage.hw != AC_HW_PIXEL_SHADER) ||
       ctx->stage.has(SWStage::TS))
      storage_allowed |= storage_vmem_output;

   /* Workgroup barriers can hang merged shaders that can potentially have 0 threads in either half.
    * They are allowed in CS, TCS, and in any NGG shader.
    */
-   ASSERTED bool workgroup_scope_allowed =
-      ctx->stage.hw == HWStage::CS || ctx->stage.hw == HWStage::HS || ctx->stage.hw == HWStage::NGG;
+   ASSERTED bool workgroup_scope_allowed = ctx->stage.hw == AC_HW_COMPUTE_SHADER ||
+                                           ctx->stage.hw == AC_HW_HULL_SHADER ||
+                                           ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER;

   unsigned nir_storage = nir_intrinsic_memory_modes(instr);
   unsigned storage = aco_storage_mode_from_nir_mem_mode(nir_storage);
@@ -7510,7 +7512,7 @@ get_scratch_resource(isel_context* ctx)
      Temp addr_hi =
         bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
      scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
-   } else if (ctx->stage.hw != HWStage::CS) {
+   } else if (ctx->stage.hw != AC_HW_COMPUTE_SHADER) {
      scratch_addr =
         bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero());
   }
@@ -8284,7 +8286,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
   }
   case nir_intrinsic_load_workgroup_id: {
      Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
-      if (ctx->stage.hw == HWStage::CS) {
+      if (ctx->stage.hw == AC_HW_COMPUTE_SHADER) {
         const struct ac_arg* ids = ctx->args->workgroup_ids;
         bld.pseudo(aco_opcode::p_create_vector, Definition(dst),
                    ids[0].used ? Operand(get_arg(ctx, ids[0])) : Operand::zero(),
@@ -8297,7 +8299,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
      break;
   }
   case nir_intrinsic_load_local_invocation_index: {
-      if (ctx->stage.hw == HWStage::LS || ctx->stage.hw == HWStage::HS) {
+      if (ctx->stage.hw == AC_HW_LOCAL_SHADER || ctx->stage.hw == AC_HW_HULL_SHADER) {
         if (ctx->options->gfx_level >= GFX11) {
            /* On GFX11, RelAutoIndex is WaveID * WaveSize + ThreadID. */
            Temp wave_id =
@@ -8312,7 +8314,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
                     get_arg(ctx, ctx->args->vs_rel_patch_id));
         }
         break;
-      } else if (ctx->stage.hw == HWStage::GS || ctx->stage.hw == HWStage::NGG) {
+      } else if (ctx->stage.hw == AC_HW_LEGACY_GEOMETRY_SHADER ||
+                 ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
         bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), thread_id_in_threadgroup(ctx));
         break;
      } else if (ctx->program->workgroup_size <= ctx->program->wave_size) {
@@ -8343,11 +8346,11 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
      break;
   }
   case nir_intrinsic_load_subgroup_id: {
-      if (ctx->stage.hw == HWStage::CS) {
+      if (ctx->stage.hw == AC_HW_COMPUTE_SHADER) {
         bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                  bld.def(s1, scc), get_arg(ctx, ctx->args->tg_size),
                  Operand::c32(0x6u | (0x6u << 16)));
-      } else if (ctx->stage.hw == HWStage::NGG) {
+      } else if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
         /* Get the id of the current wave within the threadgroup (workgroup) */
         bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                  bld.def(s1, scc), get_arg(ctx, ctx->args->merged_wave_info),
@@ -8362,10 +8365,10 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
      break;
   }
   case nir_intrinsic_load_num_subgroups: {
-      if (ctx->stage.hw == HWStage::CS)
+      if (ctx->stage.hw == AC_HW_COMPUTE_SHADER)
         bld.sop2(aco_opcode::s_and_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                  bld.def(s1, scc), Operand::c32(0x3fu), get_arg(ctx, ctx->args->tg_size));
-      else if (ctx->stage.hw == HWStage::NGG)
+      else if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)
         bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                  bld.def(s1, scc), get_arg(ctx, ctx->args->merged_wave_info),
                  Operand::c32(28u | (4u << 16)));
@@ -8922,7 +8925,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
         bld.copy(Definition(dst), get_arg(ctx, ctx->args->tes_patch_id));
         break;
      default:
-         if (ctx->stage.hw == HWStage::NGG && !ctx->stage.has(SWStage::GS)) {
+         if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && !ctx->stage.has(SWStage::GS)) {
            /* In case of NGG, the GS threads always have the primitive ID
             * even if there is no SW GS. */
            bld.copy(Definition(dst), get_arg(ctx, ctx->args->gs_prim_id));
@@ -11249,7 +11252,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
      return select_program_rt(ctx, shader_count, shaders, args);

   if_context ic_merged_wave_info;
-   bool ngg_gs = ctx.stage.hw == HWStage::NGG && ctx.stage.has(SWStage::GS);
+   bool ngg_gs = ctx.stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && ctx.stage.has(SWStage::GS);

   for (unsigned i = 0; i < shader_count; i++) {
      nir_shader* nir = shaders[i];
@@ -11286,7 +11289,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
      bool endif_merged_wave_info =
         ctx.tcs_in_out_eq ? i == 1 : (check_merged_wave_info && !(ngg_gs && i == 1));

-      if (program->gfx_level == GFX10 && program->stage.hw == HWStage::NGG &&
+      if (program->gfx_level == GFX10 && program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER &&
          program->stage.num_sw_stages() == 1) {
         /* Workaround for Navi1x HW bug to ensure that all NGG waves launch before
          * s_sendmsg(GS_ALLOC_REQ). */
@@ -691,45 +691,45 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c

   bool gfx9_plus = options->gfx_level >= GFX9;
   bool ngg = info->is_ngg && options->gfx_level >= GFX10;
-   HWStage hw_stage{};
+   ac_hw_stage hw_stage;
   if (sw_stage == SWStage::VS && info->vs.as_es && !ngg)
-      hw_stage = HWStage::ES;
+      hw_stage = AC_HW_EXPORT_SHADER;
   else if (sw_stage == SWStage::VS && !info->vs.as_ls && !ngg)
-      hw_stage = HWStage::VS;
+      hw_stage = AC_HW_VERTEX_SHADER;
   else if (sw_stage == SWStage::VS && ngg)
-      hw_stage = HWStage::NGG; /* GFX10/NGG: VS without GS uses the HW GS stage */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
   else if (sw_stage == SWStage::GS)
-      hw_stage = HWStage::GS;
+      hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
   else if (sw_stage == SWStage::FS)
-      hw_stage = HWStage::FS;
+      hw_stage = AC_HW_PIXEL_SHADER;
   else if (sw_stage == SWStage::CS)
-      hw_stage = HWStage::CS;
+      hw_stage = AC_HW_COMPUTE_SHADER;
   else if (sw_stage == SWStage::TS)
-      hw_stage = HWStage::CS; /* Task shaders are implemented with compute shaders. */
+      hw_stage = AC_HW_COMPUTE_SHADER;
   else if (sw_stage == SWStage::MS)
-      hw_stage = HWStage::NGG; /* Mesh shaders only work on NGG and on GFX10.3+. */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
   else if (sw_stage == SWStage::VS_GS && gfx9_plus && !ngg)
-      hw_stage = HWStage::GS; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */
+      hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
   else if (sw_stage == SWStage::VS_GS && ngg)
-      hw_stage = HWStage::NGG; /* GFX10+: VS+GS merged into an NGG GS */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
   else if (sw_stage == SWStage::VS && info->vs.as_ls)
-      hw_stage = HWStage::LS; /* GFX6-8: VS is a Local Shader, when tessellation is used */
+      hw_stage = AC_HW_LOCAL_SHADER;
   else if (sw_stage == SWStage::TCS)
-      hw_stage = HWStage::HS; /* GFX6-8: TCS is a Hull Shader */
+      hw_stage = AC_HW_HULL_SHADER;
   else if (sw_stage == SWStage::VS_TCS)
-      hw_stage = HWStage::HS; /* GFX9-10: VS+TCS merged into a Hull Shader */
+      hw_stage = AC_HW_HULL_SHADER;
   else if (sw_stage == SWStage::TES && !info->tes.as_es && !ngg)
-      hw_stage = HWStage::VS; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
+      hw_stage = AC_HW_VERTEX_SHADER;
   else if (sw_stage == SWStage::TES && !info->tes.as_es && ngg)
-      hw_stage = HWStage::NGG; /* GFX10/NGG: TES without GS */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
   else if (sw_stage == SWStage::TES && info->tes.as_es && !ngg)
-      hw_stage = HWStage::ES; /* GFX6-8: TES is an Export Shader */
+      hw_stage = AC_HW_EXPORT_SHADER;
   else if (sw_stage == SWStage::TES_GS && gfx9_plus && !ngg)
-      hw_stage = HWStage::GS; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
+      hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
   else if (sw_stage == SWStage::TES_GS && ngg)
-      hw_stage = HWStage::NGG; /* GFX10+: TES+GS merged into an NGG GS */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
   else if (sw_stage == SWStage::RT)
-      hw_stage = HWStage::CS; /* Raytracing shaders run as CS */
+      hw_stage = AC_HW_COMPUTE_SHADER;
   else
      unreachable("Shader stage not implemented");

@@ -32,6 +32,7 @@
 #include "util/compiler.h"

 #include "ac_binary.h"
+#include "ac_shader_util.h"
 #include "amd_family.h"
 #include <algorithm>
 #include <bitset>
@@ -2003,24 +2004,6 @@ operator|(SWStage a, SWStage b)
   return static_cast<SWStage>(static_cast<uint16_t>(a) | static_cast<uint16_t>(b));
 }

-/*
- * Shader stages as running on the AMD GPU.
- *
- * The relation between HWStages and SWStages is not a one-to-one mapping:
- * Some SWStages are merged by ACO to run on a single HWStage.
- * See README.md for details.
- */
-enum class HWStage : uint8_t {
-   VS,
-   ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
-   GS,  /* Geometry shader on GFX10/legacy and GFX6-9. */
-   NGG, /* Primitive shader, used to implement VS, TES, GS. */
-   LS,  /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
-   HS,  /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
-   FS,
-   CS,
-};
-
 /*
 * Set of SWStages to be merged into a single shader paired with the
 * HWStage it will run on.
@@ -2028,7 +2011,7 @@ enum class HWStage : uint8_t {
 struct Stage {
   constexpr Stage() = default;

-   explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
+   explicit constexpr Stage(ac_hw_stage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}

   /* Check if the given SWStage is included */
   constexpr bool has(SWStage stage) const
@@ -2046,35 +2029,36 @@ struct Stage {
   SWStage sw = SWStage::None;

   /* Active hardware stage */
-   HWStage hw{};
+   ac_hw_stage hw{};
 };

 /* possible settings of Program::stage */
-static constexpr Stage vertex_vs(HWStage::VS, SWStage::VS);
-static constexpr Stage fragment_fs(HWStage::FS, SWStage::FS);
-static constexpr Stage compute_cs(HWStage::CS, SWStage::CS);
-static constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES);
+static constexpr Stage vertex_vs(AC_HW_VERTEX_SHADER, SWStage::VS);
+static constexpr Stage fragment_fs(AC_HW_PIXEL_SHADER, SWStage::FS);
+static constexpr Stage compute_cs(AC_HW_COMPUTE_SHADER, SWStage::CS);
+static constexpr Stage tess_eval_vs(AC_HW_VERTEX_SHADER, SWStage::TES);
 /* Mesh shading pipeline */
-static constexpr Stage task_cs(HWStage::CS, SWStage::TS);
-static constexpr Stage mesh_ngg(HWStage::NGG, SWStage::MS);
+static constexpr Stage task_cs(AC_HW_COMPUTE_SHADER, SWStage::TS);
+static constexpr Stage mesh_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::MS);
 /* GFX10/NGG */
-static constexpr Stage vertex_ngg(HWStage::NGG, SWStage::VS);
-static constexpr Stage vertex_geometry_ngg(HWStage::NGG, SWStage::VS_GS);
-static constexpr Stage tess_eval_ngg(HWStage::NGG, SWStage::TES);
-static constexpr Stage tess_eval_geometry_ngg(HWStage::NGG, SWStage::TES_GS);
+static constexpr Stage vertex_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS);
+static constexpr Stage vertex_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS_GS);
+static constexpr Stage tess_eval_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES);
+static constexpr Stage tess_eval_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES_GS);
 /* GFX9 (and GFX10 if NGG isn't used) */
-static constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
-static constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
-static constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
+static constexpr Stage vertex_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::VS_GS);
+static constexpr Stage vertex_tess_control_hs(AC_HW_HULL_SHADER, SWStage::VS_TCS);
+static constexpr Stage tess_eval_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::TES_GS);
 /* pre-GFX9 */
-static constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tessellation control */
-static constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */
-static constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
-static constexpr Stage tess_eval_es(HWStage::ES,
+static constexpr Stage vertex_ls(AC_HW_LOCAL_SHADER,
+                                 SWStage::VS); /* vertex before tessellation control */
+static constexpr Stage vertex_es(AC_HW_EXPORT_SHADER, SWStage::VS); /* vertex before geometry */
+static constexpr Stage tess_control_hs(AC_HW_HULL_SHADER, SWStage::TCS);
+static constexpr Stage tess_eval_es(AC_HW_EXPORT_SHADER,
                                    SWStage::TES); /* tessellation evaluation before geometry */
-static constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
+static constexpr Stage geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::GS);
 /* Raytracing */
-static constexpr Stage raytracing_cs(HWStage::CS, SWStage::RT);
+static constexpr Stage raytracing_cs(AC_HW_COMPUTE_SHADER, SWStage::RT);

 struct DeviceInfo {
   uint16_t lds_encoding_granule;
@@ -2595,7 +2595,7 @@ lower_to_hw_instr(Program* program)
                  bld.sop1(aco_opcode::p_load_symbol, Definition(reg.advance(4), s1),
                           Operand::c32(aco_symbol_scratch_addr_hi));
                  scratch_addr.setFixed(reg);
-               } else if (program->stage.hw != HWStage::CS) {
+               } else if (program->stage.hw != AC_HW_COMPUTE_SHADER) {
                  bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr,
                           Operand::zero());
                  scratch_addr.setFixed(instr->definitions[0].physReg());
@@ -2005,7 +2005,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
               ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
            break;
         } else if ((ctx.program->stage.num_sw_stages() > 1 ||
-                     ctx.program->stage.hw == HWStage::NGG) &&
+                     ctx.program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) &&
                    instr->pass_flags == 1) {
            /* In case of merged shaders, pass_flags=1 means that all lanes are active (exec=-1), so
             * s_and is unnecessary. */
@@ -1431,7 +1431,7 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
         bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
      private_segment_buffer =
         bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
-   } else if (ctx.program->stage.hw != HWStage::CS) {
+   } else if (ctx.program->stage.hw != AC_HW_COMPUTE_SHADER) {
      private_segment_buffer =
         bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
   }