radv: Configure implicit VS primitive ID to be per-primitive.
This is beneficial to applications that rely on the implicit primitive ID from VS. - We don't have to disable provoking vertex reuse, which results in more efficient vertex processing. - There is no LDS access needed to export the primitive ID, because it is already available to GS threads. - As a consequence of not needing LDS, we can use this together with NGG passthrough mode. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32270>
This commit is contained in:
@@ -783,6 +783,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
||||
options.early_prim_export = info->has_ngg_early_prim_export;
|
||||
options.passthrough = info->is_ngg_passthrough;
|
||||
options.export_primitive_id = info->outinfo.export_prim_id;
|
||||
options.export_primitive_id_per_prim = info->outinfo.export_prim_id_per_primitive;
|
||||
options.instance_rate_inputs = gfx_state->vi.instance_rate_inputs << VERT_ATTRIB_GENERIC0;
|
||||
|
||||
NIR_PASS_V(nir, ac_nir_lower_ngg_nogs, &options);
|
||||
@@ -2162,7 +2163,9 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
|
||||
if (pdev->info.gfx_level >= GFX12) {
|
||||
if (info->gs.vertices_in >= 4) {
|
||||
gs_vgpr_comp_cnt = 2; /* VGPR2 contains offsets 3-5 */
|
||||
} else if (info->uses_prim_id || (es_stage == MESA_SHADER_VERTEX && info->outinfo.export_prim_id)) {
|
||||
} else if (info->uses_prim_id ||
|
||||
(es_stage == MESA_SHADER_VERTEX &&
|
||||
(info->outinfo.export_prim_id || info->outinfo.export_prim_id_per_primitive))) {
|
||||
gs_vgpr_comp_cnt = 1; /* VGPR1 contains PrimitiveID. */
|
||||
} else {
|
||||
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0-2, GS invocation ID. */
|
||||
@@ -2184,7 +2187,9 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi
|
||||
|
||||
if (info->uses_invocation_id) {
|
||||
gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
|
||||
} else if (info->uses_prim_id || (es_stage == MESA_SHADER_VERTEX && info->outinfo.export_prim_id)) {
|
||||
} else if (info->uses_prim_id ||
|
||||
(es_stage == MESA_SHADER_VERTEX &&
|
||||
(info->outinfo.export_prim_id || info->outinfo.export_prim_id_per_primitive))) {
|
||||
gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
|
||||
} else if (need_gs_vtx_offset2) {
|
||||
gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
|
||||
|
||||
@@ -435,12 +435,21 @@ radv_set_vs_output_param(struct radv_device *device, const struct nir_shader *ni
|
||||
|
||||
memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED, sizeof(outinfo->vs_output_param_offset));
|
||||
|
||||
/* Implicit primitive ID for VS and TES is added by ac_nir_lower_legacy_vs / ac_nir_lower_ngg,
|
||||
* it can be configured as either a per-vertex or per-primitive output depending on the GPU.
|
||||
*/
|
||||
const bool implicit_prim_id_per_prim =
|
||||
export_prim_id && info->is_ngg && pdev->info.gfx_level >= GFX10_3 && nir->info.stage == MESA_SHADER_VERTEX;
|
||||
const bool implicit_prim_id_per_vertex =
|
||||
export_prim_id && !implicit_prim_id_per_prim &&
|
||||
(nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL);
|
||||
|
||||
unsigned total_param_exports = 0;
|
||||
|
||||
/* Per-vertex outputs */
|
||||
assign_outinfo_params(outinfo, per_vtx_mask, &total_param_exports, 0);
|
||||
|
||||
if (export_prim_id && (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL)) {
|
||||
if (implicit_prim_id_per_vertex) {
|
||||
/* Mark the primitive ID as output when it's implicitly exported by VS or TES. */
|
||||
if (outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] == AC_EXP_PARAM_UNDEFINED)
|
||||
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = total_param_exports++;
|
||||
@@ -462,6 +471,14 @@ radv_set_vs_output_param(struct radv_device *device, const struct nir_shader *ni
|
||||
*/
|
||||
const unsigned extra_offset = !!(total_param_exports == 0 && pdev->info.gfx_level >= GFX11);
|
||||
|
||||
if (implicit_prim_id_per_prim) {
|
||||
/* Mark the primitive ID as output when it's implicitly exported by VS. */
|
||||
if (outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] == AC_EXP_PARAM_UNDEFINED)
|
||||
outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = extra_offset + total_param_exports++;
|
||||
|
||||
outinfo->export_prim_id_per_primitive = true;
|
||||
}
|
||||
|
||||
/* Per-primitive outputs: the HW needs these to be last. */
|
||||
assign_outinfo_params(outinfo, per_prim_mask, &total_param_exports, extra_offset);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user