diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 8c8944dd5ac..a4dd88aed54 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -2154,7 +2154,7 @@ static inline void si_set_clip_discard_distance(struct si_context *sctx, float d static inline void si_update_ngg_sgpr_state_provoking_vtx(struct si_context *sctx, struct si_shader *hw_vs, bool ngg) { - if (ngg && hw_vs && hw_vs->uses_vs_state_provoking_vertex) { + if (ngg && hw_vs && hw_vs->info.uses_gs_state_provoking_vtx_first) { SET_FIELD(sctx->current_gs_state, GS_STATE_PROVOKING_VTX_FIRST, sctx->queued.named.rasterizer->flatshade_first); } @@ -2163,7 +2163,7 @@ si_update_ngg_sgpr_state_provoking_vtx(struct si_context *sctx, struct si_shader static inline void si_update_ngg_sgpr_state_out_prim(struct si_context *sctx, struct si_shader *hw_vs, bool ngg) { - if (ngg && hw_vs && hw_vs->uses_gs_state_outprim) + if (ngg && hw_vs && hw_vs->info.uses_gs_state_outprim) SET_FIELD(sctx->current_gs_state, GS_STATE_OUTPRIM, sctx->gs_out_prim); } diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index d1d71a595b4..2cb8af8e24b 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -9,6 +9,7 @@ #include "ac_rtld.h" #include "nir.h" #include "nir_builder.h" +#include "nir_range_analysis.h" #include "nir_serialize.h" #include "nir_tcs_info.h" #include "nir_xfb_info.h" @@ -2765,9 +2766,6 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir) case nir_intrinsic_load_instance_id: shader->info.uses_instance_id = true; break; - case nir_intrinsic_load_base_vertex: - shader->info.uses_vs_state_indexed = true; - break; case nir_intrinsic_load_base_instance: shader->info.uses_base_instance = true; break; @@ -2963,6 +2961,42 @@ si_get_shader_variant_info(struct si_shader *shader, nir_shader *nir) } } +/* Late shader variant info for AMD-specific intrinsics. */ +static void +si_get_late_shader_variant_info(struct si_shader *shader, struct si_shader_args *args, + nir_shader *nir) +{ + if ((nir->info.stage != MESA_SHADER_VERTEX || nir->info.vs.blit_sgprs_amd) && + nir->info.stage != MESA_SHADER_TESS_EVAL && + (nir->info.stage != MESA_SHADER_GEOMETRY || !shader->key.ge.as_ngg)) + return; + + nir_foreach_block(block, nir_shader_get_entrypoint(nir)) { + nir_foreach_instr(instr, block) { + if (instr->type == nir_instr_type_intrinsic && + nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_load_scalar_arg_amd && + nir_intrinsic_base(nir_instr_as_intrinsic(instr)) == args->vs_state_bits.arg_index) { + assert(args->vs_state_bits.used); + + /* Gather which VS_STATE and GS_STATE user SGPR bits are used. */ + uint32_t bits_used = nir_def_bits_used(nir_instr_def(instr)); + + if (nir->info.stage == MESA_SHADER_VERTEX && + bits_used & ENCODE_FIELD(VS_STATE_INDEXED, ~0)) + shader->info.uses_vs_state_indexed = true; + + if (!shader->key.ge.as_es && shader->key.ge.as_ngg) { + if (bits_used & ENCODE_FIELD(GS_STATE_PROVOKING_VTX_FIRST, ~0)) + shader->info.uses_gs_state_provoking_vtx_first = true; + + if (bits_used & ENCODE_FIELD(GS_STATE_OUTPRIM, ~0)) + shader->info.uses_gs_state_outprim = true; + } + } + } + } +} + static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders *linked) { memset(linked, 0, sizeof(*linked)); @@ -2990,6 +3024,7 @@ static void get_nir_shaders(struct si_shader *shader, struct si_linked_shaders * if (linked->shader[i].nir) { si_get_shader_variant_info(shader, linked->shader[i].nir); run_late_optimization_and_lowering_passes(&linked->shader[i]); + si_get_late_shader_variant_info(shader, &linked->shader[i].args, linked->shader[i].nir); } } } @@ -3760,6 +3795,8 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler shader->info.uses_base_instance |= shader->previous_stage->info.uses_base_instance; shader->info.uses_draw_id |= shader->previous_stage->info.uses_draw_id; shader->info.uses_vs_state_indexed |= shader->previous_stage->info.uses_vs_state_indexed; + shader->info.uses_gs_state_provoking_vtx_first |= shader->previous_stage->info.uses_gs_state_provoking_vtx_first; + shader->info.uses_gs_state_outprim |= shader->previous_stage->info.uses_gs_state_outprim; } if (shader->epilog) { shader->config.num_sgprs = @@ -3780,22 +3817,6 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler gfx9_get_gs_info(shader->previous_stage_sel, sel, &shader->gs_info); } - shader->uses_vs_state_provoking_vertex = - sscreen->use_ngg && - /* Used to convert triangle strips from GS to triangles. */ - ((sel->stage == MESA_SHADER_GEOMETRY && - util_rast_prim_is_triangles(sel->info.base.gs.output_primitive)) || - (sel->stage == MESA_SHADER_VERTEX && - /* Used to export PrimitiveID from the correct vertex. */ - shader->key.ge.mono.u.vs_export_prim_id)); - - shader->uses_gs_state_outprim = sscreen->use_ngg && - /* Only used by streamout and the PrimID export in vertex - * shaders. */ - sel->stage == MESA_SHADER_VERTEX && - (si_shader_uses_streamout(shader) || - shader->uses_vs_state_provoking_vertex); - si_fix_resource_usage(sscreen, shader); /* Upload. */ diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index eeda76fac17..5b1ce23e9c9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -876,6 +876,8 @@ struct si_shader_binary_info { bool uses_base_instance : 1; bool uses_draw_id : 1; bool uses_vs_state_indexed : 1; /* VS_STATE_INDEXED */ + bool uses_gs_state_provoking_vtx_first : 1; + bool uses_gs_state_outprim : 1; uint8_t nr_pos_exports; uint8_t nr_param_exports; unsigned private_mem_vgprs; @@ -952,10 +954,6 @@ struct si_shader { struct ac_shader_config config; struct si_shader_binary_info info; - /* SI_SGPR_VS_STATE_BITS */ - bool uses_vs_state_provoking_vertex; - bool uses_gs_state_outprim; - /* Shader key + LLVM IR + disassembly + statistics. * Generated for debug contexts only. */ diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 6d25ced2e7e..2406e8966b0 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -54,8 +54,8 @@ static bool si_update_shaders(struct si_context *sctx) struct pipe_context *ctx = (struct pipe_context *)sctx; struct si_shader *old_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current; unsigned old_pa_cl_vs_out_cntl = old_vs ? old_vs->pa_cl_vs_out_cntl : 0; - bool old_uses_vs_state_provoking_vertex = old_vs ? old_vs->uses_vs_state_provoking_vertex : false; - bool old_uses_gs_state_outprim = old_vs ? old_vs->uses_gs_state_outprim : false; + bool old_uses_gs_state_provoking_vertex = old_vs ? old_vs->info.uses_gs_state_provoking_vtx_first : false; + bool old_uses_gs_state_outprim = old_vs ? old_vs->info.uses_gs_state_outprim : false; struct si_shader *old_ps = sctx->shader.ps.current; unsigned old_spi_shader_col_format = old_ps ? old_ps->key.ps.part.epilog.spi_shader_col_format : 0; @@ -268,8 +268,8 @@ static bool si_update_shaders(struct si_context *sctx) si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); /* If we start to use any of these, we need to update the SGPR. */ - if ((hw_vs->uses_vs_state_provoking_vertex && !old_uses_vs_state_provoking_vertex) || - (hw_vs->uses_gs_state_outprim && !old_uses_gs_state_outprim)) { + if ((hw_vs->info.uses_gs_state_provoking_vtx_first && !old_uses_gs_state_provoking_vertex) || + (hw_vs->info.uses_gs_state_outprim && !old_uses_gs_state_outprim)) { si_update_ngg_sgpr_state_out_prim(sctx, hw_vs, NGG); si_update_ngg_sgpr_state_provoking_vtx(sctx, hw_vs, NGG); }