diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 3bdf9c7675f..34e14633e1b 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -259,6 +259,8 @@ ac_nir_export_parameter(nir_builder *b, nir_ssa_def *(*outputs_16bit_lo)[4], nir_ssa_def *(*outputs_16bit_hi)[4]) { + uint32_t exported_params = 0; + u_foreach_bit64 (slot, outputs_written) { unsigned offset = param_offsets[slot]; if (offset > AC_EXP_PARAM_OFFSET_31) @@ -274,10 +276,18 @@ ac_nir_export_parameter(nir_builder *b, if (!write_mask) continue; + /* Since param_offsets[] can map multiple varying slots to the same + * param export index (that's radeonsi-specific behavior), we need to + * do this so as not to emit duplicated exports. + */ + if (exported_params & BITFIELD_BIT(offset)) + continue; + nir_export_amd( b, get_export_output(b, outputs[slot]), .base = V_008DFC_SQ_EXP_PARAM + offset, .write_mask = write_mask); + exported_params |= BITFIELD_BIT(offset); } u_foreach_bit (slot, outputs_written_16bit) { @@ -295,6 +305,13 @@ ac_nir_export_parameter(nir_builder *b, if (!write_mask) continue; + /* Since param_offsets[] can map multiple varying slots to the same + * param export index (that's radeonsi-specific behavior), we need to + * do this so as not to emit duplicated exports. + */ + if (exported_params & BITFIELD_BIT(offset)) + continue; + nir_ssa_def *vec[4]; nir_ssa_def *undef = nir_ssa_undef(b, 1, 16); for (int i = 0; i < 4; i++) { @@ -307,6 +324,7 @@ ac_nir_export_parameter(nir_builder *b, b, nir_vec(b, vec, 4), .base = V_008DFC_SQ_EXP_PARAM + offset, .write_mask = write_mask); + exported_params |= BITFIELD_BIT(offset); } } diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 4b20018a5d9..9cf6fb75959 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -2185,9 +2185,20 @@ export_vertex_params_gfx11(nir_builder *b, nir_ssa_def *export_tid, nir_ssa_def nir_ssa_def *voffset = nir_imm_int(b, 0); nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); + uint32_t exported_params = 0; + for (unsigned i = 0; i < num_outputs; i++) { gl_varying_slot slot = outputs[i].slot; - nir_ssa_def *soffset = nir_iadd_imm(b, attr_offset, vs_output_param_offset[slot] * 16 * 32); + unsigned offset = vs_output_param_offset[slot]; + + /* Since vs_output_param_offset[] can map multiple varying slots to + * the same param export index (that's radeonsi-specific behavior), + * we need to do this so as not to emit duplicated exports. + */ + if (exported_params & BITFIELD_BIT(offset)) + continue; + + nir_ssa_def *soffset = nir_iadd_imm(b, attr_offset, offset * 16 * 32); nir_ssa_def *comp[4]; for (unsigned j = 0; j < 4; j++) @@ -2195,6 +2206,7 @@ export_vertex_params_gfx11(nir_builder *b, nir_ssa_def *export_tid, nir_ssa_def nir_store_buffer_amd(b, nir_vec(b, comp, 4), attr_rsrc, voffset, soffset, vindex, .memory_modes = nir_var_shader_out, .access = ACCESS_COHERENT | ACCESS_IS_SWIZZLED_AMD); + exported_params |= BITFIELD_BIT(offset); } nir_pop_if(b, NULL);