From 05928f42004a136ea951580e77d952149a4035b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timur=20Krist=C3=B3f?= <timur.kristof@gmail.com>
Date: Sat, 13 May 2023 17:55:54 +0200
Subject: [PATCH] aco: Use ac_hw_stage instead of aco-specific HWStage.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The new ac_hw_stage is going to be used by drivers as well.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23597>
---
 src/amd/compiler/aco_assembler.cpp            | 15 ++---
 src/amd/compiler/aco_insert_exec_mask.cpp     |  3 +-
 .../compiler/aco_instruction_selection.cpp    | 39 ++++++------
 .../aco_instruction_selection_setup.cpp       | 40 ++++++------
 src/amd/compiler/aco_ir.h                     | 62 +++++++------------
 src/amd/compiler/aco_lower_to_hw_instr.cpp    |  2 +-
 src/amd/compiler/aco_optimizer.cpp            |  2 +-
 src/amd/compiler/aco_spill.cpp                |  2 +-
 8 files changed, 77 insertions(+), 88 deletions(-)
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 300ebdfbab9..45065b1471d 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -994,7 +994,8 @@ fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
       while (it != block.instructions.rend()) {
          if ((*it)->isEXP()) {
             Export_instruction& exp = (*it)->exp();
-            if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) {
+            if (program->stage.hw == AC_HW_VERTEX_SHADER ||
+                program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
                if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= (V_008DFC_SQ_EXP_POS + 3)) {
                   exp.done = true;
                   exported = true;
@@ -1014,19 +1015,19 @@ fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
             /* Do not abort if the main FS has an epilog because it only
              * exports MRTZ (if present) and the epilog exports colors.
              */
-            exported |= program->stage.hw == HWStage::FS && program->info.ps.has_epilog;
+            exported |= program->stage.hw == AC_HW_PIXEL_SHADER && program->info.ps.has_epilog;
          }
          ++it;
       }
    }
 
    /* GFX10+ FS may not export anything if no discard is used. */
-   bool may_skip_export = program->stage.hw == HWStage::FS && program->gfx_level >= GFX10;
+   bool may_skip_export = program->stage.hw == AC_HW_PIXEL_SHADER && program->gfx_level >= GFX10;
 
    if (!exported && !may_skip_export) {
       /* Abort in order to avoid a GPU hang. */
-      bool is_vertex_or_ngg =
-         (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG);
+      bool is_vertex_or_ngg = (program->stage.hw == AC_HW_VERTEX_SHADER ||
+                               program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER);
       aco_err(program,
               "Missing export in %s shader:", is_vertex_or_ngg ? "vertex or NGG" : "fragment");
       aco_print_program(program, stderr);
@@ -1221,8 +1222,8 @@ emit_program(Program* program, std::vector<uint32_t>& code, std::vector<struct a
 {
    asm_context ctx(program, symbols);
 
-   if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::FS ||
-       program->stage.hw == HWStage::NGG)
+   if (program->stage.hw == AC_HW_VERTEX_SHADER || program->stage.hw == AC_HW_PIXEL_SHADER ||
+       program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)
       fix_exports(ctx, code, program);
 
    for (Block& block : program->blocks) {
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index 2008270a2d6..859a3cd04d1 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -261,7 +261,8 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
       Operand start_exec(bld.lm);
 
       /* exec seems to need to be manually initialized with combined shaders */
-      if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) {
+      if (ctx.program->stage.num_sw_stages() > 1 ||
+          ctx.program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
          start_exec = Operand::c32_or_c64(-1u, bld.lm == s2);
          bld.copy(Definition(exec, bld.lm), start_exec);
       }
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index c3d3a97667f..90374803266 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -7239,10 +7239,11 @@ emit_scoped_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
     * - when GS is used on GFX9+, VS->GS and TES->GS I/O is lowered to shared memory
     * - additionally, when NGG is used on GFX10+, shared memory is used for certain features
     */
-   bool shared_storage_used = ctx->stage.hw == HWStage::CS || ctx->stage.hw == HWStage::LS ||
-                              ctx->stage.hw == HWStage::HS ||
-                              (ctx->stage.hw == HWStage::GS && ctx->program->gfx_level >= GFX9) ||
-                              ctx->stage.hw == HWStage::NGG;
+   bool shared_storage_used =
+      ctx->stage.hw == AC_HW_COMPUTE_SHADER || ctx->stage.hw == AC_HW_LOCAL_SHADER ||
+      ctx->stage.hw == AC_HW_HULL_SHADER ||
+      (ctx->stage.hw == AC_HW_LEGACY_GEOMETRY_SHADER && ctx->program->gfx_level >= GFX9) ||
+      ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER;
 
    if (shared_storage_used)
       storage_allowed |= storage_shared;
@@ -7252,15 +7253,16 @@ emit_scoped_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
       storage_allowed |= storage_task_payload;
 
    /* Allow VMEM output for all stages that can have outputs. */
-   if ((ctx->stage.hw != HWStage::CS && ctx->stage.hw != HWStage::FS) ||
+   if ((ctx->stage.hw != AC_HW_COMPUTE_SHADER && ctx->stage.hw != AC_HW_PIXEL_SHADER) ||
        ctx->stage.has(SWStage::TS))
       storage_allowed |= storage_vmem_output;
 
    /* Workgroup barriers can hang merged shaders that can potentially have 0 threads in either half.
     * They are allowed in CS, TCS, and in any NGG shader.
     */
-   ASSERTED bool workgroup_scope_allowed =
-      ctx->stage.hw == HWStage::CS || ctx->stage.hw == HWStage::HS || ctx->stage.hw == HWStage::NGG;
+   ASSERTED bool workgroup_scope_allowed = ctx->stage.hw == AC_HW_COMPUTE_SHADER ||
+                                           ctx->stage.hw == AC_HW_HULL_SHADER ||
+                                           ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER;
 
    unsigned nir_storage = nir_intrinsic_memory_modes(instr);
    unsigned storage = aco_storage_mode_from_nir_mem_mode(nir_storage);
@@ -7510,7 +7512,7 @@ get_scratch_resource(isel_context* ctx)
       Temp addr_hi =
          bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
       scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
-   } else if (ctx->stage.hw != HWStage::CS) {
+   } else if (ctx->stage.hw != AC_HW_COMPUTE_SHADER) {
       scratch_addr =
          bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero());
    }
@@ -8284,7 +8286,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
    }
    case nir_intrinsic_load_workgroup_id: {
       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
-      if (ctx->stage.hw == HWStage::CS) {
+      if (ctx->stage.hw == AC_HW_COMPUTE_SHADER) {
          const struct ac_arg* ids = ctx->args->workgroup_ids;
          bld.pseudo(aco_opcode::p_create_vector, Definition(dst),
                     ids[0].used ? Operand(get_arg(ctx, ids[0])) : Operand::zero(),
@@ -8297,7 +8299,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       break;
    }
    case nir_intrinsic_load_local_invocation_index: {
-      if (ctx->stage.hw == HWStage::LS || ctx->stage.hw == HWStage::HS) {
+      if (ctx->stage.hw == AC_HW_LOCAL_SHADER || ctx->stage.hw == AC_HW_HULL_SHADER) {
          if (ctx->options->gfx_level >= GFX11) {
             /* On GFX11, RelAutoIndex is WaveID * WaveSize + ThreadID. */
             Temp wave_id =
@@ -8312,7 +8314,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
                      get_arg(ctx, ctx->args->vs_rel_patch_id));
          }
          break;
-      } else if (ctx->stage.hw == HWStage::GS || ctx->stage.hw == HWStage::NGG) {
+      } else if (ctx->stage.hw == AC_HW_LEGACY_GEOMETRY_SHADER ||
+                 ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
          bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), thread_id_in_threadgroup(ctx));
          break;
       } else if (ctx->program->workgroup_size <= ctx->program->wave_size) {
@@ -8343,11 +8346,11 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       break;
    }
    case nir_intrinsic_load_subgroup_id: {
-      if (ctx->stage.hw == HWStage::CS) {
+      if (ctx->stage.hw == AC_HW_COMPUTE_SHADER) {
          bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                   bld.def(s1, scc), get_arg(ctx, ctx->args->tg_size),
                   Operand::c32(0x6u | (0x6u << 16)));
-      } else if (ctx->stage.hw == HWStage::NGG) {
+      } else if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
          /* Get the id of the current wave within the threadgroup (workgroup) */
          bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                   bld.def(s1, scc), get_arg(ctx, ctx->args->merged_wave_info),
@@ -8362,10 +8365,10 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       break;
    }
    case nir_intrinsic_load_num_subgroups: {
-      if (ctx->stage.hw == HWStage::CS)
+      if (ctx->stage.hw == AC_HW_COMPUTE_SHADER)
          bld.sop2(aco_opcode::s_and_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                   bld.def(s1, scc), Operand::c32(0x3fu), get_arg(ctx, ctx->args->tg_size));
-      else if (ctx->stage.hw == HWStage::NGG)
+      else if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)
          bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                   bld.def(s1, scc), get_arg(ctx, ctx->args->merged_wave_info),
                   Operand::c32(28u | (4u << 16)));
@@ -8922,7 +8925,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
          bld.copy(Definition(dst), get_arg(ctx, ctx->args->tes_patch_id));
          break;
       default:
-         if (ctx->stage.hw == HWStage::NGG && !ctx->stage.has(SWStage::GS)) {
+         if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && !ctx->stage.has(SWStage::GS)) {
             /* In case of NGG, the GS threads always have the primitive ID
              * even if there is no SW GS. */
             bld.copy(Definition(dst), get_arg(ctx, ctx->args->gs_prim_id));
@@ -11249,7 +11252,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
       return select_program_rt(ctx, shader_count, shaders, args);
 
    if_context ic_merged_wave_info;
-   bool ngg_gs = ctx.stage.hw == HWStage::NGG && ctx.stage.has(SWStage::GS);
+   bool ngg_gs = ctx.stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && ctx.stage.has(SWStage::GS);
 
    for (unsigned i = 0; i < shader_count; i++) {
       nir_shader* nir = shaders[i];
@@ -11286,7 +11289,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
       bool endif_merged_wave_info =
          ctx.tcs_in_out_eq ? i == 1 : (check_merged_wave_info && !(ngg_gs && i == 1));
 
-      if (program->gfx_level == GFX10 && program->stage.hw == HWStage::NGG &&
+      if (program->gfx_level == GFX10 && program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER &&
           program->stage.num_sw_stages() == 1) {
          /* Workaround for Navi1x HW bug to ensure that all NGG waves launch before
           * s_sendmsg(GS_ALLOC_REQ). */
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index 0108b2c8398..a707bc39010 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -691,45 +691,45 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
 
    bool gfx9_plus = options->gfx_level >= GFX9;
    bool ngg = info->is_ngg && options->gfx_level >= GFX10;
-   HWStage hw_stage{};
+   ac_hw_stage hw_stage;
    if (sw_stage == SWStage::VS && info->vs.as_es && !ngg)
-      hw_stage = HWStage::ES;
+      hw_stage = AC_HW_EXPORT_SHADER;
    else if (sw_stage == SWStage::VS && !info->vs.as_ls && !ngg)
-      hw_stage = HWStage::VS;
+      hw_stage = AC_HW_VERTEX_SHADER;
    else if (sw_stage == SWStage::VS && ngg)
-      hw_stage = HWStage::NGG; /* GFX10/NGG: VS without GS uses the HW GS stage */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
    else if (sw_stage == SWStage::GS)
-      hw_stage = HWStage::GS;
+      hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
    else if (sw_stage == SWStage::FS)
-      hw_stage = HWStage::FS;
+      hw_stage = AC_HW_PIXEL_SHADER;
    else if (sw_stage == SWStage::CS)
-      hw_stage = HWStage::CS;
+      hw_stage = AC_HW_COMPUTE_SHADER;
    else if (sw_stage == SWStage::TS)
-      hw_stage = HWStage::CS; /* Task shaders are implemented with compute shaders. */
+      hw_stage = AC_HW_COMPUTE_SHADER;
    else if (sw_stage == SWStage::MS)
-      hw_stage = HWStage::NGG; /* Mesh shaders only work on NGG and on GFX10.3+. */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
    else if (sw_stage == SWStage::VS_GS && gfx9_plus && !ngg)
-      hw_stage = HWStage::GS; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */
+      hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
    else if (sw_stage == SWStage::VS_GS && ngg)
-      hw_stage = HWStage::NGG; /* GFX10+: VS+GS merged into an NGG GS */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
    else if (sw_stage == SWStage::VS && info->vs.as_ls)
-      hw_stage = HWStage::LS; /* GFX6-8: VS is a Local Shader, when tessellation is used */
+      hw_stage = AC_HW_LOCAL_SHADER;
    else if (sw_stage == SWStage::TCS)
-      hw_stage = HWStage::HS; /* GFX6-8: TCS is a Hull Shader */
+      hw_stage = AC_HW_HULL_SHADER;
    else if (sw_stage == SWStage::VS_TCS)
-      hw_stage = HWStage::HS; /* GFX9-10: VS+TCS merged into a Hull Shader */
+      hw_stage = AC_HW_HULL_SHADER;
    else if (sw_stage == SWStage::TES && !info->tes.as_es && !ngg)
-      hw_stage = HWStage::VS; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
+      hw_stage = AC_HW_VERTEX_SHADER;
    else if (sw_stage == SWStage::TES && !info->tes.as_es && ngg)
-      hw_stage = HWStage::NGG; /* GFX10/NGG: TES without GS */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
    else if (sw_stage == SWStage::TES && info->tes.as_es && !ngg)
-      hw_stage = HWStage::ES; /* GFX6-8: TES is an Export Shader */
+      hw_stage = AC_HW_EXPORT_SHADER;
    else if (sw_stage == SWStage::TES_GS && gfx9_plus && !ngg)
-      hw_stage = HWStage::GS; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
+      hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
    else if (sw_stage == SWStage::TES_GS && ngg)
-      hw_stage = HWStage::NGG; /* GFX10+: TES+GS merged into an NGG GS */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
    else if (sw_stage == SWStage::RT)
-      hw_stage = HWStage::CS; /* Raytracing shaders run as CS */
+      hw_stage = AC_HW_COMPUTE_SHADER;
    else
       unreachable("Shader stage not implemented");
 
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 08a9a2a2017..e04fcae44e7 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -32,6 +32,7 @@
 #include "util/compiler.h"
 
 #include "ac_binary.h"
+#include "ac_shader_util.h"
 #include "amd_family.h"
 #include <algorithm>
 #include <bitset>
@@ -2003,24 +2004,6 @@ operator|(SWStage a, SWStage b)
    return static_cast<SWStage>(static_cast<uint16_t>(a) | static_cast<uint16_t>(b));
 }
 
-/*
- * Shader stages as running on the AMD GPU.
- *
- * The relation between HWStages and SWStages is not a one-to-one mapping:
- * Some SWStages are merged by ACO to run on a single HWStage.
- * See README.md for details.
- */
-enum class HWStage : uint8_t {
-   VS,
-   ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
-   GS,  /* Geometry shader on GFX10/legacy and GFX6-9. */
-   NGG, /* Primitive shader, used to implement VS, TES, GS. */
-   LS,  /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
-   HS,  /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
-   FS,
-   CS,
-};
-
 /*
  * Set of SWStages to be merged into a single shader paired with the
  * HWStage it will run on.
@@ -2028,7 +2011,7 @@ enum class HWStage : uint8_t {
 struct Stage {
    constexpr Stage() = default;
 
-   explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
+   explicit constexpr Stage(ac_hw_stage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
 
    /* Check if the given SWStage is included */
    constexpr bool has(SWStage stage) const
@@ -2046,35 +2029,36 @@ struct Stage {
    SWStage sw = SWStage::None;
 
    /* Active hardware stage */
-   HWStage hw{};
+   ac_hw_stage hw{};
 };
 
 /* possible settings of Program::stage */
-static constexpr Stage vertex_vs(HWStage::VS, SWStage::VS);
-static constexpr Stage fragment_fs(HWStage::FS, SWStage::FS);
-static constexpr Stage compute_cs(HWStage::CS, SWStage::CS);
-static constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES);
+static constexpr Stage vertex_vs(AC_HW_VERTEX_SHADER, SWStage::VS);
+static constexpr Stage fragment_fs(AC_HW_PIXEL_SHADER, SWStage::FS);
+static constexpr Stage compute_cs(AC_HW_COMPUTE_SHADER, SWStage::CS);
+static constexpr Stage tess_eval_vs(AC_HW_VERTEX_SHADER, SWStage::TES);
 /* Mesh shading pipeline */
-static constexpr Stage task_cs(HWStage::CS, SWStage::TS);
-static constexpr Stage mesh_ngg(HWStage::NGG, SWStage::MS);
+static constexpr Stage task_cs(AC_HW_COMPUTE_SHADER, SWStage::TS);
+static constexpr Stage mesh_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::MS);
 /* GFX10/NGG */
-static constexpr Stage vertex_ngg(HWStage::NGG, SWStage::VS);
-static constexpr Stage vertex_geometry_ngg(HWStage::NGG, SWStage::VS_GS);
-static constexpr Stage tess_eval_ngg(HWStage::NGG, SWStage::TES);
-static constexpr Stage tess_eval_geometry_ngg(HWStage::NGG, SWStage::TES_GS);
+static constexpr Stage vertex_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS);
+static constexpr Stage vertex_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS_GS);
+static constexpr Stage tess_eval_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES);
+static constexpr Stage tess_eval_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES_GS);
 /* GFX9 (and GFX10 if NGG isn't used) */
-static constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
-static constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
-static constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
+static constexpr Stage vertex_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::VS_GS);
+static constexpr Stage vertex_tess_control_hs(AC_HW_HULL_SHADER, SWStage::VS_TCS);
+static constexpr Stage tess_eval_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::TES_GS);
 /* pre-GFX9 */
-static constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tessellation control */
-static constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */
-static constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
-static constexpr Stage tess_eval_es(HWStage::ES,
+static constexpr Stage vertex_ls(AC_HW_LOCAL_SHADER,
+                                 SWStage::VS); /* vertex before tessellation control */
+static constexpr Stage vertex_es(AC_HW_EXPORT_SHADER, SWStage::VS); /* vertex before geometry */
+static constexpr Stage tess_control_hs(AC_HW_HULL_SHADER, SWStage::TCS);
+static constexpr Stage tess_eval_es(AC_HW_EXPORT_SHADER,
                                     SWStage::TES); /* tessellation evaluation before geometry */
-static constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
+static constexpr Stage geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::GS);
 /* Raytracing */
-static constexpr Stage raytracing_cs(HWStage::CS, SWStage::RT);
+static constexpr Stage raytracing_cs(AC_HW_COMPUTE_SHADER, SWStage::RT);
 
 struct DeviceInfo {
    uint16_t lds_encoding_granule;
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index a971e45551a..8ddb2c75f69 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2595,7 +2595,7 @@ lower_to_hw_instr(Program* program)
                   bld.sop1(aco_opcode::p_load_symbol, Definition(reg.advance(4), s1),
                            Operand::c32(aco_symbol_scratch_addr_hi));
                   scratch_addr.setFixed(reg);
-               } else if (program->stage.hw != HWStage::CS) {
+               } else if (program->stage.hw != AC_HW_COMPUTE_SHADER) {
                   bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr,
                            Operand::zero());
                   scratch_addr.setFixed(instr->definitions[0].physReg());
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index ad7dc02a8bd..c60b6817d5d 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2005,7 +2005,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
                ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
             break;
          } else if ((ctx.program->stage.num_sw_stages() > 1 ||
-                     ctx.program->stage.hw == HWStage::NGG) &&
+                     ctx.program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) &&
                     instr->pass_flags == 1) {
             /* In case of merged shaders, pass_flags=1 means that all lanes are active (exec=-1), so
              * s_and is unnecessary. */
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index 1b64d601f29..93b7c975252 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -1431,7 +1431,7 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
          bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
       private_segment_buffer =
          bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
-   } else if (ctx.program->stage.hw != HWStage::CS) {
+   } else if (ctx.program->stage.hw != AC_HW_COMPUTE_SHADER) {
       private_segment_buffer =
          bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
    }