From 8deb32ac2e20a6b1aedda8ff29962045cc3f960c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 2 Oct 2024 23:37:01 -0400 Subject: [PATCH] radeonsi: split outputs_written_before_tes_gs into ls_es_* and tcs_* masks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit these will have different values later Reviewed-by: Timur Kristóf Part-of: --- src/gallium/drivers/radeonsi/si_nir_lower_abi.c | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 6 +++--- src/gallium/drivers/radeonsi/si_shader.h | 4 +++- src/gallium/drivers/radeonsi/si_shader_info.c | 14 +++++++------- src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 2 +- src/gallium/drivers/radeonsi/si_state_shaders.cpp | 8 ++++---- 6 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c index be9e6ccb38d..14319a7d28d 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_abi.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_abi.c @@ -371,7 +371,7 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s nir_def *per_vtx_out_patch_size = NULL; if (stage == MESA_SHADER_TESS_CTRL) { - const unsigned num_hs_out = util_last_bit64(sel->info.outputs_written_before_tes_gs); + const unsigned num_hs_out = util_last_bit64(sel->info.tcs_outputs_written); const unsigned out_vtx_size = num_hs_out * 16; const unsigned out_vtx_per_patch = sel->info.base.tess.tcs_vertices_out; per_vtx_out_patch_size = nir_imm_int(b, out_vtx_size * out_vtx_per_patch); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 67c4de11d0a..724787571b4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -460,7 +460,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) /* VS outputs passed via VGPRs to TCS. */ if (shader->key.ge.opt.same_patch_vertices && !sel->info.base.use_aco_amd) { - unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written_before_tes_gs); + unsigned num_outputs = util_last_bit64(shader->selector->info.ls_es_outputs_written); for (i = 0; i < num_outputs * 4; i++) ac_add_return(&args->ac, AC_ARG_VGPR); } @@ -468,7 +468,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args) } else { /* TCS inputs are passed via VGPRs from VS. */ if (shader->key.ge.opt.same_patch_vertices && !sel->info.base.use_aco_amd) { - unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.outputs_written_before_tes_gs); + unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.ls_es_outputs_written); for (i = 0; i < num_inputs * 4; i++) ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); } @@ -1313,7 +1313,7 @@ void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shad if (shader->key.ge.as_ls) num_ls_outputs = si_shader_lshs_vertex_stride(shader) / 16; else if (shader->selector->stage == MESA_SHADER_TESS_CTRL) - num_hs_outputs = util_last_bit64(shader->selector->info.outputs_written_before_tes_gs); + num_hs_outputs = util_last_bit64(shader->selector->info.tcs_outputs_written); else if (shader->key.ge.as_es) num_es_outputs = shader->selector->info.esgs_vertex_stride / 16; else if (shader->gs_copy_shader) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 136fac2d827..5f51093ed6d 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -479,7 +479,9 @@ struct si_shader_info { uint64_t inputs_read; /* "get_unique_index" bits */ uint64_t tcs_vgpr_only_inputs; /* TCS inputs that are only in VGPRs, not LDS. */ - uint64_t outputs_written_before_tes_gs; /* "get_unique_index" bits */ + /* For VS before {TCS, TES, GS} and TES before GS. */ + uint64_t ls_es_outputs_written; /* "get_unique_index" bits */ + uint64_t tcs_outputs_written; /* "get_unique_index" bits */ uint64_t outputs_written_before_ps; /* "get_unique_index" bits */ uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */ diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index aa6c0988d3f..ff335827226 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -239,20 +239,20 @@ static void scan_io_usage(const nir_shader *nir, struct si_shader_info *info, } else if ((slot_semantic <= VARYING_SLOT_VAR31 || slot_semantic >= VARYING_SLOT_VAR0_16BIT) && slot_semantic != VARYING_SLOT_EDGE) { + uint64_t bit = BITFIELD64_BIT(si_shader_io_get_unique_index(slot_semantic)); + /* Ignore outputs that are not passed from VS to PS. */ if (slot_semantic != VARYING_SLOT_POS && slot_semantic != VARYING_SLOT_PSIZ && slot_semantic != VARYING_SLOT_CLIP_VERTEX && - slot_semantic != VARYING_SLOT_LAYER) { - info->outputs_written_before_ps |= - BITFIELD64_BIT(si_shader_io_get_unique_index(slot_semantic)); - } + slot_semantic != VARYING_SLOT_LAYER) + info->outputs_written_before_ps |= bit; /* LAYER and VIEWPORT have no effect if they don't feed the rasterizer. */ if (slot_semantic != VARYING_SLOT_LAYER && slot_semantic != VARYING_SLOT_VIEWPORT) { - info->outputs_written_before_tes_gs |= - BITFIELD64_BIT(si_shader_io_get_unique_index(slot_semantic)); + info->ls_es_outputs_written |= bit; + info->tcs_outputs_written |= bit; } } } @@ -681,7 +681,7 @@ void si_nir_scan_shader(struct si_screen *sscreen, const struct nir_shader *nir, nir->info.stage == MESA_SHADER_TESS_CTRL || nir->info.stage == MESA_SHADER_TESS_EVAL) { info->esgs_vertex_stride = - util_last_bit64(info->outputs_written_before_tes_gs) * 16; + util_last_bit64(info->ls_es_outputs_written) * 16; /* For the ESGS ring in LDS, add 1 dword to reduce LDS bank * conflicts, i.e. each vertex will start on a different bank. diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index 007d7050ae6..4d9dc6837ad 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -87,7 +87,7 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx) unsigned semantic = info->output_semantic[i]; int param = si_shader_io_get_unique_index(semantic); - if (!(info->outputs_written_before_tes_gs & BITFIELD64_BIT(param))) + if (!(info->ls_es_outputs_written & BITFIELD64_BIT(param))) continue; for (unsigned chan = 0; chan < 4; chan++) { diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index f31f856905e..13cc047872b 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -4527,7 +4527,7 @@ bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx) } struct si_fixed_func_tcs_shader_key key; - key.outputs_written = sctx->shader.vs.cso->info.outputs_written_before_tes_gs; + key.outputs_written = sctx->shader.vs.cso->info.ls_es_outputs_written; key.vertices_out = sctx->patch_vertices; struct hash_entry *entry = _mesa_hash_table_search( @@ -4606,7 +4606,7 @@ unsigned si_shader_lshs_vertex_stride(struct si_shader *ls) if (ls->selector->stage == MESA_SHADER_VERTEX && !ls->next_shader) { assert(ls->key.ge.as_ls); assert(ls->selector->screen->info.gfx_level <= GFX8 || !ls->is_monolithic); - num_slots = util_last_bit64(ls->selector->info.outputs_written_before_tes_gs); + num_slots = util_last_bit64(ls->selector->info.ls_es_outputs_written); } else { struct si_shader *tcs = ls->next_shader ? ls->next_shader : ls; @@ -4623,7 +4623,7 @@ unsigned si_shader_lshs_vertex_stride(struct si_shader *ls) /* NIR lowering passes pack LS outputs/HS inputs if the usage masks of both are known. */ num_slots = util_bitcount64(lds_inputs_read); } else { - num_slots = util_last_bit64(tcs->previous_stage_sel->info.outputs_written_before_tes_gs); + num_slots = util_last_bit64(tcs->previous_stage_sel->info.ls_es_outputs_written); } } @@ -4683,7 +4683,7 @@ void si_update_tess_io_layout_state(struct si_context *sctx) /* This calculates how shader inputs and outputs among VS, TCS, and TES * are laid out in LDS. */ - unsigned num_tcs_outputs = util_last_bit64(tcs->info.outputs_written_before_tes_gs); + unsigned num_tcs_outputs = util_last_bit64(tcs->info.tcs_outputs_written); unsigned num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out; unsigned num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);