aco: Use ac_hw_stage instead of aco-specific HWStage.
The new ac_hw_stage is going to be used by drivers as well. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Qiang Yu <yuq825@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23597>
This commit is contained in:
@@ -994,7 +994,8 @@ fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
|
||||
while (it != block.instructions.rend()) {
|
||||
if ((*it)->isEXP()) {
|
||||
Export_instruction& exp = (*it)->exp();
|
||||
if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) {
|
||||
if (program->stage.hw == AC_HW_VERTEX_SHADER ||
|
||||
program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
|
||||
if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= (V_008DFC_SQ_EXP_POS + 3)) {
|
||||
exp.done = true;
|
||||
exported = true;
|
||||
@@ -1014,19 +1015,19 @@ fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
|
||||
/* Do not abort if the main FS has an epilog because it only
|
||||
* exports MRTZ (if present) and the epilog exports colors.
|
||||
*/
|
||||
exported |= program->stage.hw == HWStage::FS && program->info.ps.has_epilog;
|
||||
exported |= program->stage.hw == AC_HW_PIXEL_SHADER && program->info.ps.has_epilog;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
/* GFX10+ FS may not export anything if no discard is used. */
|
||||
bool may_skip_export = program->stage.hw == HWStage::FS && program->gfx_level >= GFX10;
|
||||
bool may_skip_export = program->stage.hw == AC_HW_PIXEL_SHADER && program->gfx_level >= GFX10;
|
||||
|
||||
if (!exported && !may_skip_export) {
|
||||
/* Abort in order to avoid a GPU hang. */
|
||||
bool is_vertex_or_ngg =
|
||||
(program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG);
|
||||
bool is_vertex_or_ngg = (program->stage.hw == AC_HW_VERTEX_SHADER ||
|
||||
program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER);
|
||||
aco_err(program,
|
||||
"Missing export in %s shader:", is_vertex_or_ngg ? "vertex or NGG" : "fragment");
|
||||
aco_print_program(program, stderr);
|
||||
@@ -1221,8 +1222,8 @@ emit_program(Program* program, std::vector<uint32_t>& code, std::vector<struct a
|
||||
{
|
||||
asm_context ctx(program, symbols);
|
||||
|
||||
if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::FS ||
|
||||
program->stage.hw == HWStage::NGG)
|
||||
if (program->stage.hw == AC_HW_VERTEX_SHADER || program->stage.hw == AC_HW_PIXEL_SHADER ||
|
||||
program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)
|
||||
fix_exports(ctx, code, program);
|
||||
|
||||
for (Block& block : program->blocks) {
|
||||
|
||||
@@ -261,7 +261,8 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
|
||||
Operand start_exec(bld.lm);
|
||||
|
||||
/* exec seems to need to be manually initialized with combined shaders */
|
||||
if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) {
|
||||
if (ctx.program->stage.num_sw_stages() > 1 ||
|
||||
ctx.program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
|
||||
start_exec = Operand::c32_or_c64(-1u, bld.lm == s2);
|
||||
bld.copy(Definition(exec, bld.lm), start_exec);
|
||||
}
|
||||
|
||||
@@ -7239,10 +7239,11 @@ emit_scoped_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
* - when GS is used on GFX9+, VS->GS and TES->GS I/O is lowered to shared memory
|
||||
* - additionally, when NGG is used on GFX10+, shared memory is used for certain features
|
||||
*/
|
||||
bool shared_storage_used = ctx->stage.hw == HWStage::CS || ctx->stage.hw == HWStage::LS ||
|
||||
ctx->stage.hw == HWStage::HS ||
|
||||
(ctx->stage.hw == HWStage::GS && ctx->program->gfx_level >= GFX9) ||
|
||||
ctx->stage.hw == HWStage::NGG;
|
||||
bool shared_storage_used =
|
||||
ctx->stage.hw == AC_HW_COMPUTE_SHADER || ctx->stage.hw == AC_HW_LOCAL_SHADER ||
|
||||
ctx->stage.hw == AC_HW_HULL_SHADER ||
|
||||
(ctx->stage.hw == AC_HW_LEGACY_GEOMETRY_SHADER && ctx->program->gfx_level >= GFX9) ||
|
||||
ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER;
|
||||
|
||||
if (shared_storage_used)
|
||||
storage_allowed |= storage_shared;
|
||||
@@ -7252,15 +7253,16 @@ emit_scoped_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
storage_allowed |= storage_task_payload;
|
||||
|
||||
/* Allow VMEM output for all stages that can have outputs. */
|
||||
if ((ctx->stage.hw != HWStage::CS && ctx->stage.hw != HWStage::FS) ||
|
||||
if ((ctx->stage.hw != AC_HW_COMPUTE_SHADER && ctx->stage.hw != AC_HW_PIXEL_SHADER) ||
|
||||
ctx->stage.has(SWStage::TS))
|
||||
storage_allowed |= storage_vmem_output;
|
||||
|
||||
/* Workgroup barriers can hang merged shaders that can potentially have 0 threads in either half.
|
||||
* They are allowed in CS, TCS, and in any NGG shader.
|
||||
*/
|
||||
ASSERTED bool workgroup_scope_allowed =
|
||||
ctx->stage.hw == HWStage::CS || ctx->stage.hw == HWStage::HS || ctx->stage.hw == HWStage::NGG;
|
||||
ASSERTED bool workgroup_scope_allowed = ctx->stage.hw == AC_HW_COMPUTE_SHADER ||
|
||||
ctx->stage.hw == AC_HW_HULL_SHADER ||
|
||||
ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER;
|
||||
|
||||
unsigned nir_storage = nir_intrinsic_memory_modes(instr);
|
||||
unsigned storage = aco_storage_mode_from_nir_mem_mode(nir_storage);
|
||||
@@ -7510,7 +7512,7 @@ get_scratch_resource(isel_context* ctx)
|
||||
Temp addr_hi =
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
|
||||
scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
||||
} else if (ctx->stage.hw != HWStage::CS) {
|
||||
} else if (ctx->stage.hw != AC_HW_COMPUTE_SHADER) {
|
||||
scratch_addr =
|
||||
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero());
|
||||
}
|
||||
@@ -8284,7 +8286,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
}
|
||||
case nir_intrinsic_load_workgroup_id: {
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
if (ctx->stage.hw == HWStage::CS) {
|
||||
if (ctx->stage.hw == AC_HW_COMPUTE_SHADER) {
|
||||
const struct ac_arg* ids = ctx->args->workgroup_ids;
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst),
|
||||
ids[0].used ? Operand(get_arg(ctx, ids[0])) : Operand::zero(),
|
||||
@@ -8297,7 +8299,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_local_invocation_index: {
|
||||
if (ctx->stage.hw == HWStage::LS || ctx->stage.hw == HWStage::HS) {
|
||||
if (ctx->stage.hw == AC_HW_LOCAL_SHADER || ctx->stage.hw == AC_HW_HULL_SHADER) {
|
||||
if (ctx->options->gfx_level >= GFX11) {
|
||||
/* On GFX11, RelAutoIndex is WaveID * WaveSize + ThreadID. */
|
||||
Temp wave_id =
|
||||
@@ -8312,7 +8314,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
get_arg(ctx, ctx->args->vs_rel_patch_id));
|
||||
}
|
||||
break;
|
||||
} else if (ctx->stage.hw == HWStage::GS || ctx->stage.hw == HWStage::NGG) {
|
||||
} else if (ctx->stage.hw == AC_HW_LEGACY_GEOMETRY_SHADER ||
|
||||
ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
|
||||
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), thread_id_in_threadgroup(ctx));
|
||||
break;
|
||||
} else if (ctx->program->workgroup_size <= ctx->program->wave_size) {
|
||||
@@ -8343,11 +8346,11 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_subgroup_id: {
|
||||
if (ctx->stage.hw == HWStage::CS) {
|
||||
if (ctx->stage.hw == AC_HW_COMPUTE_SHADER) {
|
||||
bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
|
||||
bld.def(s1, scc), get_arg(ctx, ctx->args->tg_size),
|
||||
Operand::c32(0x6u | (0x6u << 16)));
|
||||
} else if (ctx->stage.hw == HWStage::NGG) {
|
||||
} else if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
|
||||
/* Get the id of the current wave within the threadgroup (workgroup) */
|
||||
bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
|
||||
bld.def(s1, scc), get_arg(ctx, ctx->args->merged_wave_info),
|
||||
@@ -8362,10 +8365,10 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_num_subgroups: {
|
||||
if (ctx->stage.hw == HWStage::CS)
|
||||
if (ctx->stage.hw == AC_HW_COMPUTE_SHADER)
|
||||
bld.sop2(aco_opcode::s_and_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
|
||||
bld.def(s1, scc), Operand::c32(0x3fu), get_arg(ctx, ctx->args->tg_size));
|
||||
else if (ctx->stage.hw == HWStage::NGG)
|
||||
else if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)
|
||||
bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
|
||||
bld.def(s1, scc), get_arg(ctx, ctx->args->merged_wave_info),
|
||||
Operand::c32(28u | (4u << 16)));
|
||||
@@ -8922,7 +8925,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
bld.copy(Definition(dst), get_arg(ctx, ctx->args->tes_patch_id));
|
||||
break;
|
||||
default:
|
||||
if (ctx->stage.hw == HWStage::NGG && !ctx->stage.has(SWStage::GS)) {
|
||||
if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && !ctx->stage.has(SWStage::GS)) {
|
||||
/* In case of NGG, the GS threads always have the primitive ID
|
||||
* even if there is no SW GS. */
|
||||
bld.copy(Definition(dst), get_arg(ctx, ctx->args->gs_prim_id));
|
||||
@@ -11249,7 +11252,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
|
||||
return select_program_rt(ctx, shader_count, shaders, args);
|
||||
|
||||
if_context ic_merged_wave_info;
|
||||
bool ngg_gs = ctx.stage.hw == HWStage::NGG && ctx.stage.has(SWStage::GS);
|
||||
bool ngg_gs = ctx.stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && ctx.stage.has(SWStage::GS);
|
||||
|
||||
for (unsigned i = 0; i < shader_count; i++) {
|
||||
nir_shader* nir = shaders[i];
|
||||
@@ -11286,7 +11289,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
|
||||
bool endif_merged_wave_info =
|
||||
ctx.tcs_in_out_eq ? i == 1 : (check_merged_wave_info && !(ngg_gs && i == 1));
|
||||
|
||||
if (program->gfx_level == GFX10 && program->stage.hw == HWStage::NGG &&
|
||||
if (program->gfx_level == GFX10 && program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER &&
|
||||
program->stage.num_sw_stages() == 1) {
|
||||
/* Workaround for Navi1x HW bug to ensure that all NGG waves launch before
|
||||
* s_sendmsg(GS_ALLOC_REQ). */
|
||||
|
||||
@@ -691,45 +691,45 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
|
||||
|
||||
bool gfx9_plus = options->gfx_level >= GFX9;
|
||||
bool ngg = info->is_ngg && options->gfx_level >= GFX10;
|
||||
HWStage hw_stage{};
|
||||
ac_hw_stage hw_stage;
|
||||
if (sw_stage == SWStage::VS && info->vs.as_es && !ngg)
|
||||
hw_stage = HWStage::ES;
|
||||
hw_stage = AC_HW_EXPORT_SHADER;
|
||||
else if (sw_stage == SWStage::VS && !info->vs.as_ls && !ngg)
|
||||
hw_stage = HWStage::VS;
|
||||
hw_stage = AC_HW_VERTEX_SHADER;
|
||||
else if (sw_stage == SWStage::VS && ngg)
|
||||
hw_stage = HWStage::NGG; /* GFX10/NGG: VS without GS uses the HW GS stage */
|
||||
hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
|
||||
else if (sw_stage == SWStage::GS)
|
||||
hw_stage = HWStage::GS;
|
||||
hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
|
||||
else if (sw_stage == SWStage::FS)
|
||||
hw_stage = HWStage::FS;
|
||||
hw_stage = AC_HW_PIXEL_SHADER;
|
||||
else if (sw_stage == SWStage::CS)
|
||||
hw_stage = HWStage::CS;
|
||||
hw_stage = AC_HW_COMPUTE_SHADER;
|
||||
else if (sw_stage == SWStage::TS)
|
||||
hw_stage = HWStage::CS; /* Task shaders are implemented with compute shaders. */
|
||||
hw_stage = AC_HW_COMPUTE_SHADER;
|
||||
else if (sw_stage == SWStage::MS)
|
||||
hw_stage = HWStage::NGG; /* Mesh shaders only work on NGG and on GFX10.3+. */
|
||||
hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
|
||||
else if (sw_stage == SWStage::VS_GS && gfx9_plus && !ngg)
|
||||
hw_stage = HWStage::GS; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */
|
||||
hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
|
||||
else if (sw_stage == SWStage::VS_GS && ngg)
|
||||
hw_stage = HWStage::NGG; /* GFX10+: VS+GS merged into an NGG GS */
|
||||
hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
|
||||
else if (sw_stage == SWStage::VS && info->vs.as_ls)
|
||||
hw_stage = HWStage::LS; /* GFX6-8: VS is a Local Shader, when tessellation is used */
|
||||
hw_stage = AC_HW_LOCAL_SHADER;
|
||||
else if (sw_stage == SWStage::TCS)
|
||||
hw_stage = HWStage::HS; /* GFX6-8: TCS is a Hull Shader */
|
||||
hw_stage = AC_HW_HULL_SHADER;
|
||||
else if (sw_stage == SWStage::VS_TCS)
|
||||
hw_stage = HWStage::HS; /* GFX9-10: VS+TCS merged into a Hull Shader */
|
||||
hw_stage = AC_HW_HULL_SHADER;
|
||||
else if (sw_stage == SWStage::TES && !info->tes.as_es && !ngg)
|
||||
hw_stage = HWStage::VS; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
|
||||
hw_stage = AC_HW_VERTEX_SHADER;
|
||||
else if (sw_stage == SWStage::TES && !info->tes.as_es && ngg)
|
||||
hw_stage = HWStage::NGG; /* GFX10/NGG: TES without GS */
|
||||
hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
|
||||
else if (sw_stage == SWStage::TES && info->tes.as_es && !ngg)
|
||||
hw_stage = HWStage::ES; /* GFX6-8: TES is an Export Shader */
|
||||
hw_stage = AC_HW_EXPORT_SHADER;
|
||||
else if (sw_stage == SWStage::TES_GS && gfx9_plus && !ngg)
|
||||
hw_stage = HWStage::GS; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
|
||||
hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
|
||||
else if (sw_stage == SWStage::TES_GS && ngg)
|
||||
hw_stage = HWStage::NGG; /* GFX10+: TES+GS merged into an NGG GS */
|
||||
hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
|
||||
else if (sw_stage == SWStage::RT)
|
||||
hw_stage = HWStage::CS; /* Raytracing shaders run as CS */
|
||||
hw_stage = AC_HW_COMPUTE_SHADER;
|
||||
else
|
||||
unreachable("Shader stage not implemented");
|
||||
|
||||
|
||||
+23
-39
@@ -32,6 +32,7 @@
|
||||
#include "util/compiler.h"
|
||||
|
||||
#include "ac_binary.h"
|
||||
#include "ac_shader_util.h"
|
||||
#include "amd_family.h"
|
||||
#include <algorithm>
|
||||
#include <bitset>
|
||||
@@ -2003,24 +2004,6 @@ operator|(SWStage a, SWStage b)
|
||||
return static_cast<SWStage>(static_cast<uint16_t>(a) | static_cast<uint16_t>(b));
|
||||
}
|
||||
|
||||
/*
|
||||
* Shader stages as running on the AMD GPU.
|
||||
*
|
||||
* The relation between HWStages and SWStages is not a one-to-one mapping:
|
||||
* Some SWStages are merged by ACO to run on a single HWStage.
|
||||
* See README.md for details.
|
||||
*/
|
||||
enum class HWStage : uint8_t {
|
||||
VS,
|
||||
ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
|
||||
GS, /* Geometry shader on GFX10/legacy and GFX6-9. */
|
||||
NGG, /* Primitive shader, used to implement VS, TES, GS. */
|
||||
LS, /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
|
||||
HS, /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
|
||||
FS,
|
||||
CS,
|
||||
};
|
||||
|
||||
/*
|
||||
* Set of SWStages to be merged into a single shader paired with the
|
||||
* HWStage it will run on.
|
||||
@@ -2028,7 +2011,7 @@ enum class HWStage : uint8_t {
|
||||
struct Stage {
|
||||
constexpr Stage() = default;
|
||||
|
||||
explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
|
||||
explicit constexpr Stage(ac_hw_stage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
|
||||
|
||||
/* Check if the given SWStage is included */
|
||||
constexpr bool has(SWStage stage) const
|
||||
@@ -2046,35 +2029,36 @@ struct Stage {
|
||||
SWStage sw = SWStage::None;
|
||||
|
||||
/* Active hardware stage */
|
||||
HWStage hw{};
|
||||
ac_hw_stage hw{};
|
||||
};
|
||||
|
||||
/* possible settings of Program::stage */
|
||||
static constexpr Stage vertex_vs(HWStage::VS, SWStage::VS);
|
||||
static constexpr Stage fragment_fs(HWStage::FS, SWStage::FS);
|
||||
static constexpr Stage compute_cs(HWStage::CS, SWStage::CS);
|
||||
static constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES);
|
||||
static constexpr Stage vertex_vs(AC_HW_VERTEX_SHADER, SWStage::VS);
|
||||
static constexpr Stage fragment_fs(AC_HW_PIXEL_SHADER, SWStage::FS);
|
||||
static constexpr Stage compute_cs(AC_HW_COMPUTE_SHADER, SWStage::CS);
|
||||
static constexpr Stage tess_eval_vs(AC_HW_VERTEX_SHADER, SWStage::TES);
|
||||
/* Mesh shading pipeline */
|
||||
static constexpr Stage task_cs(HWStage::CS, SWStage::TS);
|
||||
static constexpr Stage mesh_ngg(HWStage::NGG, SWStage::MS);
|
||||
static constexpr Stage task_cs(AC_HW_COMPUTE_SHADER, SWStage::TS);
|
||||
static constexpr Stage mesh_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::MS);
|
||||
/* GFX10/NGG */
|
||||
static constexpr Stage vertex_ngg(HWStage::NGG, SWStage::VS);
|
||||
static constexpr Stage vertex_geometry_ngg(HWStage::NGG, SWStage::VS_GS);
|
||||
static constexpr Stage tess_eval_ngg(HWStage::NGG, SWStage::TES);
|
||||
static constexpr Stage tess_eval_geometry_ngg(HWStage::NGG, SWStage::TES_GS);
|
||||
static constexpr Stage vertex_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS);
|
||||
static constexpr Stage vertex_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS_GS);
|
||||
static constexpr Stage tess_eval_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES);
|
||||
static constexpr Stage tess_eval_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES_GS);
|
||||
/* GFX9 (and GFX10 if NGG isn't used) */
|
||||
static constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
|
||||
static constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
|
||||
static constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
|
||||
static constexpr Stage vertex_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::VS_GS);
|
||||
static constexpr Stage vertex_tess_control_hs(AC_HW_HULL_SHADER, SWStage::VS_TCS);
|
||||
static constexpr Stage tess_eval_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::TES_GS);
|
||||
/* pre-GFX9 */
|
||||
static constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tessellation control */
|
||||
static constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */
|
||||
static constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
|
||||
static constexpr Stage tess_eval_es(HWStage::ES,
|
||||
static constexpr Stage vertex_ls(AC_HW_LOCAL_SHADER,
|
||||
SWStage::VS); /* vertex before tessellation control */
|
||||
static constexpr Stage vertex_es(AC_HW_EXPORT_SHADER, SWStage::VS); /* vertex before geometry */
|
||||
static constexpr Stage tess_control_hs(AC_HW_HULL_SHADER, SWStage::TCS);
|
||||
static constexpr Stage tess_eval_es(AC_HW_EXPORT_SHADER,
|
||||
SWStage::TES); /* tessellation evaluation before geometry */
|
||||
static constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
|
||||
static constexpr Stage geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::GS);
|
||||
/* Raytracing */
|
||||
static constexpr Stage raytracing_cs(HWStage::CS, SWStage::RT);
|
||||
static constexpr Stage raytracing_cs(AC_HW_COMPUTE_SHADER, SWStage::RT);
|
||||
|
||||
struct DeviceInfo {
|
||||
uint16_t lds_encoding_granule;
|
||||
|
||||
@@ -2595,7 +2595,7 @@ lower_to_hw_instr(Program* program)
|
||||
bld.sop1(aco_opcode::p_load_symbol, Definition(reg.advance(4), s1),
|
||||
Operand::c32(aco_symbol_scratch_addr_hi));
|
||||
scratch_addr.setFixed(reg);
|
||||
} else if (program->stage.hw != HWStage::CS) {
|
||||
} else if (program->stage.hw != AC_HW_COMPUTE_SHADER) {
|
||||
bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr,
|
||||
Operand::zero());
|
||||
scratch_addr.setFixed(instr->definitions[0].physReg());
|
||||
|
||||
@@ -2005,7 +2005,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
|
||||
break;
|
||||
} else if ((ctx.program->stage.num_sw_stages() > 1 ||
|
||||
ctx.program->stage.hw == HWStage::NGG) &&
|
||||
ctx.program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) &&
|
||||
instr->pass_flags == 1) {
|
||||
/* In case of merged shaders, pass_flags=1 means that all lanes are active (exec=-1), so
|
||||
* s_and is unnecessary. */
|
||||
|
||||
@@ -1431,7 +1431,7 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
|
||||
bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
|
||||
private_segment_buffer =
|
||||
bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
|
||||
} else if (ctx.program->stage.hw != HWStage::CS) {
|
||||
} else if (ctx.program->stage.hw != AC_HW_COMPUTE_SHADER) {
|
||||
private_segment_buffer =
|
||||
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user