From 3626bc2daa7e22743a1e5edc70f037ee38ab4dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 15 Mar 2023 03:54:48 -0400 Subject: [PATCH] ac/nir: don't emit duplicated parameter exports Can you spot the problem? exp param0 v6, v5, v5, v5 exp param1 v7, off, off, off exp param1 v7, off, off, off radeonsi uses ac_nir_optimize_outputs to eliminate output stores with identical SSA defs (i.e. duplicated), which then causes 2 outputs to map to the same parameter export. This is a regression. The old LLVM code was correctly emitting each export only once. vs_output_param_mask was supposed to be used for this instead of vs_output_param_offset. Fixes: 80506be31bf3 - ac/nir/ngg,radv,radeonsi: nogs use ac_nir_export_(position|parameter) Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Qiang Yu Part-of: --- src/amd/common/ac_nir.c | 18 ++++++++++++++++++ src/amd/common/ac_nir_lower_ngg.c | 14 +++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 3bdf9c7675f..34e14633e1b 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -259,6 +259,8 @@ ac_nir_export_parameter(nir_builder *b, nir_ssa_def *(*outputs_16bit_lo)[4], nir_ssa_def *(*outputs_16bit_hi)[4]) { + uint32_t exported_params = 0; + u_foreach_bit64 (slot, outputs_written) { unsigned offset = param_offsets[slot]; if (offset > AC_EXP_PARAM_OFFSET_31) @@ -274,10 +276,18 @@ ac_nir_export_parameter(nir_builder *b, if (!write_mask) continue; + /* Since param_offsets[] can map multiple varying slots to the same + * param export index (that's radeonsi-specific behavior), we need to + * do this so as not to emit duplicated exports. + */ + if (exported_params & BITFIELD_BIT(offset)) + continue; + nir_export_amd( b, get_export_output(b, outputs[slot]), .base = V_008DFC_SQ_EXP_PARAM + offset, .write_mask = write_mask); + exported_params |= BITFIELD_BIT(offset); } u_foreach_bit (slot, outputs_written_16bit) { @@ -295,6 +305,13 @@ ac_nir_export_parameter(nir_builder *b, if (!write_mask) continue; + /* Since param_offsets[] can map multiple varying slots to the same + * param export index (that's radeonsi-specific behavior), we need to + * do this so as not to emit duplicated exports. + */ + if (exported_params & BITFIELD_BIT(offset)) + continue; + nir_ssa_def *vec[4]; nir_ssa_def *undef = nir_ssa_undef(b, 1, 16); for (int i = 0; i < 4; i++) { @@ -307,6 +324,7 @@ ac_nir_export_parameter(nir_builder *b, b, nir_vec(b, vec, 4), .base = V_008DFC_SQ_EXP_PARAM + offset, .write_mask = write_mask); + exported_params |= BITFIELD_BIT(offset); } } diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 4b20018a5d9..9cf6fb75959 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -2185,9 +2185,20 @@ export_vertex_params_gfx11(nir_builder *b, nir_ssa_def *export_tid, nir_ssa_def nir_ssa_def *voffset = nir_imm_int(b, 0); nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); + uint32_t exported_params = 0; + for (unsigned i = 0; i < num_outputs; i++) { gl_varying_slot slot = outputs[i].slot; - nir_ssa_def *soffset = nir_iadd_imm(b, attr_offset, vs_output_param_offset[slot] * 16 * 32); + unsigned offset = vs_output_param_offset[slot]; + + /* Since vs_output_param_offset[] can map multiple varying slots to + * the same param export index (that's radeonsi-specific behavior), + * we need to do this so as not to emit duplicated exports. + */ + if (exported_params & BITFIELD_BIT(offset)) + continue; + + nir_ssa_def *soffset = nir_iadd_imm(b, attr_offset, offset * 16 * 32); nir_ssa_def *comp[4]; for (unsigned j = 0; j < 4; j++) @@ -2195,6 +2206,7 @@ export_vertex_params_gfx11(nir_builder *b, nir_ssa_def *export_tid, nir_ssa_def nir_store_buffer_amd(b, nir_vec(b, comp, 4), attr_rsrc, voffset, soffset, vindex, .memory_modes = nir_var_shader_out, .access = ACCESS_COHERENT | ACCESS_IS_SWIZZLED_AMD); + exported_params |= BITFIELD_BIT(offset); } nir_pop_if(b, NULL);