radeonsi: split outputs_written_before_tes_gs into ls_es_* and tcs_* masks

these will have different values later

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32171>
This commit is contained in:
Marek Olšák
2024-10-02 23:37:01 -04:00
parent 1d16d88e1e
commit 8deb32ac2e
6 changed files with 19 additions and 17 deletions
@@ -371,7 +371,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
nir_def *per_vtx_out_patch_size = NULL;
if (stage == MESA_SHADER_TESS_CTRL) {
const unsigned num_hs_out = util_last_bit64(sel->info.outputs_written_before_tes_gs);
const unsigned num_hs_out = util_last_bit64(sel->info.tcs_outputs_written);
const unsigned out_vtx_size = num_hs_out * 16;
const unsigned out_vtx_per_patch = sel->info.base.tess.tcs_vertices_out;
per_vtx_out_patch_size = nir_imm_int(b, out_vtx_size * out_vtx_per_patch);
+3 -3
View File
@@ -460,7 +460,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args)
/* VS outputs passed via VGPRs to TCS. */
if (shader->key.ge.opt.same_patch_vertices && !sel->info.base.use_aco_amd) {
unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written_before_tes_gs);
unsigned num_outputs = util_last_bit64(shader->selector->info.ls_es_outputs_written);
for (i = 0; i < num_outputs * 4; i++)
ac_add_return(&args->ac, AC_ARG_VGPR);
}
@@ -468,7 +468,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args)
} else {
/* TCS inputs are passed via VGPRs from VS. */
if (shader->key.ge.opt.same_patch_vertices && !sel->info.base.use_aco_amd) {
unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.outputs_written_before_tes_gs);
unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.ls_es_outputs_written);
for (i = 0; i < num_inputs * 4; i++)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
}
@@ -1313,7 +1313,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shad
if (shader->key.ge.as_ls)
num_ls_outputs = si_shader_lshs_vertex_stride(shader) / 16;
else if (shader->selector->stage == MESA_SHADER_TESS_CTRL)
num_hs_outputs = util_last_bit64(shader->selector->info.outputs_written_before_tes_gs);
num_hs_outputs = util_last_bit64(shader->selector->info.tcs_outputs_written);
else if (shader->key.ge.as_es)
num_es_outputs = shader->selector->info.esgs_vertex_stride / 16;
else if (shader->gs_copy_shader)
+3 -1
View File
@@ -479,7 +479,9 @@ struct si_shader_info {
uint64_t inputs_read; /* "get_unique_index" bits */
uint64_t tcs_vgpr_only_inputs; /* TCS inputs that are only in VGPRs, not LDS. */
uint64_t outputs_written_before_tes_gs; /* "get_unique_index" bits */
/* For VS before {TCS, TES, GS} and TES before GS. */
uint64_t ls_es_outputs_written; /* "get_unique_index" bits */
uint64_t tcs_outputs_written; /* "get_unique_index" bits */
uint64_t outputs_written_before_ps; /* "get_unique_index" bits */
uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */
@@ -239,20 +239,20 @@ static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info,
} else if ((slot_semantic <= VARYING_SLOT_VAR31 ||
slot_semantic >= VARYING_SLOT_VAR0_16BIT) &&
slot_semantic != VARYING_SLOT_EDGE) {
uint64_t bit = BITFIELD64_BIT(si_shader_io_get_unique_index(slot_semantic));
/* Ignore outputs that are not passed from VS to PS. */
if (slot_semantic != VARYING_SLOT_POS &&
slot_semantic != VARYING_SLOT_PSIZ &&
slot_semantic != VARYING_SLOT_CLIP_VERTEX &&
slot_semantic != VARYING_SLOT_LAYER) {
info->outputs_written_before_ps |=
BITFIELD64_BIT(si_shader_io_get_unique_index(slot_semantic));
}
slot_semantic != VARYING_SLOT_LAYER)
info->outputs_written_before_ps |= bit;
/* LAYER and VIEWPORT have no effect if they don't feed the rasterizer. */
if (slot_semantic != VARYING_SLOT_LAYER &&
slot_semantic != VARYING_SLOT_VIEWPORT) {
info->outputs_written_before_tes_gs |=
BITFIELD64_BIT(si_shader_io_get_unique_index(slot_semantic));
info->ls_es_outputs_written |= bit;
info->tcs_outputs_written |= bit;
}
}
}
@@ -681,7 +681,7 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir,
nir->info.stage == MESA_SHADER_TESS_CTRL ||
nir->info.stage == MESA_SHADER_TESS_EVAL) {
info->esgs_vertex_stride =
util_last_bit64(info->outputs_written_before_tes_gs) * 16;
util_last_bit64(info->ls_es_outputs_written) * 16;
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
* conflicts, i.e. each vertex will start on a different bank.
@@ -87,7 +87,7 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
unsigned semantic = info->output_semantic[i];
int param = si_shader_io_get_unique_index(semantic);
if (!(info->outputs_written_before_tes_gs & BITFIELD64_BIT(param)))
if (!(info->ls_es_outputs_written & BITFIELD64_BIT(param)))
continue;
for (unsigned chan = 0; chan < 4; chan++) {
@@ -4527,7 +4527,7 @@ bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx)
}
struct si_fixed_func_tcs_shader_key key;
key.outputs_written = sctx->shader.vs.cso->info.outputs_written_before_tes_gs;
key.outputs_written = sctx->shader.vs.cso->info.ls_es_outputs_written;
key.vertices_out = sctx->patch_vertices;
struct hash_entry *entry = _mesa_hash_table_search(
@@ -4606,7 +4606,7 @@ unsigned si_shader_lshs_vertex_stride(struct si_shader *ls)
if (ls->selector->stage == MESA_SHADER_VERTEX && !ls->next_shader) {
assert(ls->key.ge.as_ls);
assert(ls->selector->screen->info.gfx_level <= GFX8 || !ls->is_monolithic);
num_slots = util_last_bit64(ls->selector->info.outputs_written_before_tes_gs);
num_slots = util_last_bit64(ls->selector->info.ls_es_outputs_written);
} else {
struct si_shader *tcs = ls->next_shader ? ls->next_shader : ls;
@@ -4623,7 +4623,7 @@ unsigned si_shader_lshs_vertex_stride(struct si_shader *ls)
/* NIR lowering passes pack LS outputs/HS inputs if the usage masks of both are known. */
num_slots = util_bitcount64(lds_inputs_read);
} else {
num_slots = util_last_bit64(tcs->previous_stage_sel->info.outputs_written_before_tes_gs);
num_slots = util_last_bit64(tcs->previous_stage_sel->info.ls_es_outputs_written);
}
}
@@ -4683,7 +4683,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx)
/* This calculates how shader inputs and outputs among VS, TCS, and TES
* are laid out in LDS. */
unsigned num_tcs_outputs = util_last_bit64(tcs->info.outputs_written_before_tes_gs);
unsigned num_tcs_outputs = util_last_bit64(tcs->info.tcs_outputs_written);
unsigned num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
unsigned num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);