radv: Configure implicit VS primitive ID to be per-primitive.

This is beneficial to applications that rely on
the implicit primitive ID from VS.

- We don't have to disable provoking vertex reuse,
  which results in more efficient vertex processing.
- There is no LDS access needed to export the primitive ID,
  because it is already available to GS threads.
- As a consequence of not needing LDS, we can use this
  together with NGG passthrough mode.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32270>
This commit is contained in:
Timur Kristóf
2024-11-19 23:48:43 +01:00
committed by Marge Bot
parent 95ac0f8d76
commit deab81fb0d
2 changed files with 25 additions and 3 deletions
+7 -2
View File
@@ -783,6 +783,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
options.early_prim_export = info->has_ngg_early_prim_export;
options.passthrough = info->is_ngg_passthrough;
options.export_primitive_id = info->outinfo.export_prim_id;
options.export_primitive_id_per_prim = info->outinfo.export_prim_id_per_primitive;
options.instance_rate_inputs = gfx_state->vi.instance_rate_inputs << VERT_ATTRIB_GENERIC0;
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options);
@@ -2162,7 +2163,9 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
if (pdev->info.gfx_level >= GFX12) {
if (info->gs.vertices_in >= 4) {
gs_vgpr_comp_cnt = 2; /* VGPR2 contains offsets 3-5 */
} else if (info->uses_prim_id || (es_stage == MESA_SHADER_VERTEX && info->outinfo.export_prim_id)) {
} else if (info->uses_prim_id ||
(es_stage == MESA_SHADER_VERTEX &&
(info->outinfo.export_prim_id || info->outinfo.export_prim_id_per_primitive))) {
gs_vgpr_comp_cnt = 1; /* VGPR1 contains PrimitiveID. */
} else {
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0-2, GS invocation ID. */
@@ -2184,7 +2187,9 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
if (info->uses_invocation_id) {
gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
} else if (info->uses_prim_id || (es_stage == MESA_SHADER_VERTEX && info->outinfo.export_prim_id)) {
} else if (info->uses_prim_id ||
(es_stage == MESA_SHADER_VERTEX &&
(info->outinfo.export_prim_id || info->outinfo.export_prim_id_per_primitive))) {
gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
} else if (need_gs_vtx_offset2) {
gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
+18 -1
View File
@@ -435,12 +435,21 @@ radv_set_vs_output_param(struct radv_device *device, const struct nir_shader *ni
memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(outinfo->vs_output_param_offset));
/* Implicit primitive ID for VS and TES is added by ac_nir_lower_legacy_vs / ac_nir_lower_ngg,
* it can be configured as either a per-vertex or per-primitive output depending on the GPU.
*/
const bool implicit_prim_id_per_prim =
export_prim_id && info->is_ngg && pdev->info.gfx_level >= GFX10_3 && nir->info.stage == MESA_SHADER_VERTEX;
const bool implicit_prim_id_per_vertex =
export_prim_id && !implicit_prim_id_per_prim &&
(nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL);
unsigned total_param_exports = 0;
/* Per-vertex outputs */
assign_outinfo_params(outinfo, per_vtx_mask, &total_param_exports, 0);
if (export_prim_id && (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL)) {
if (implicit_prim_id_per_vertex) {
/* Mark the primitive ID as output when it's implicitly exported by VS or TES. */
if (outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] == AC_EXP_PARAM_UNDEFINED)
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = total_param_exports++;
@@ -462,6 +471,14 @@ radv_set_vs_output_param(struct radv_device *device, const struct nir_shader *ni
*/
const unsigned extra_offset = !!(total_param_exports == 0 && pdev->info.gfx_level >= GFX11);
if (implicit_prim_id_per_prim) {
/* Mark the primitive ID as output when it's implicitly exported by VS. */
if (outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] == AC_EXP_PARAM_UNDEFINED)
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = extra_offset + total_param_exports++;
outinfo->export_prim_id_per_primitive = true;
}
/* Per-primitive outputs: the HW needs these to be last. */
assign_outinfo_params(outinfo, per_prim_mask, &total_param_exports, extra_offset);