ac/nir: don't emit duplicated parameter exports
Can you spot the problem?
exp param0 v6, v5, v5, v5
exp param1 v7, off, off, off
exp param1 v7, off, off, off
radeonsi uses ac_nir_optimize_outputs to eliminate output stores with
identical SSA defs (i.e. duplicated), which then causes 2 outputs to
map to the same parameter export.
This is a regression. The old LLVM code was correctly emitting each
export only once. vs_output_param_mask was supposed to be used for
this instead of vs_output_param_offset.
Fixes: 80506be31b - ac/nir/ngg,radv,radeonsi: nogs use ac_nir_export_(position|parameter)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21920>
This commit is contained in:
@@ -259,6 +259,8 @@ ac_nir_export_parameter(nir_builder *b,
|
||||
nir_ssa_def *(*outputs_16bit_lo)[4],
|
||||
nir_ssa_def *(*outputs_16bit_hi)[4])
|
||||
{
|
||||
uint32_t exported_params = 0;
|
||||
|
||||
u_foreach_bit64 (slot, outputs_written) {
|
||||
unsigned offset = param_offsets[slot];
|
||||
if (offset > AC_EXP_PARAM_OFFSET_31)
|
||||
@@ -274,10 +276,18 @@ ac_nir_export_parameter(nir_builder *b,
|
||||
if (!write_mask)
|
||||
continue;
|
||||
|
||||
/* Since param_offsets[] can map multiple varying slots to the same
|
||||
* param export index (that's radeonsi-specific behavior), we need to
|
||||
* do this so as not to emit duplicated exports.
|
||||
*/
|
||||
if (exported_params & BITFIELD_BIT(offset))
|
||||
continue;
|
||||
|
||||
nir_export_amd(
|
||||
b, get_export_output(b, outputs[slot]),
|
||||
.base = V_008DFC_SQ_EXP_PARAM + offset,
|
||||
.write_mask = write_mask);
|
||||
exported_params |= BITFIELD_BIT(offset);
|
||||
}
|
||||
|
||||
u_foreach_bit (slot, outputs_written_16bit) {
|
||||
@@ -295,6 +305,13 @@ ac_nir_export_parameter(nir_builder *b,
|
||||
if (!write_mask)
|
||||
continue;
|
||||
|
||||
/* Since param_offsets[] can map multiple varying slots to the same
|
||||
* param export index (that's radeonsi-specific behavior), we need to
|
||||
* do this so as not to emit duplicated exports.
|
||||
*/
|
||||
if (exported_params & BITFIELD_BIT(offset))
|
||||
continue;
|
||||
|
||||
nir_ssa_def *vec[4];
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 16);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
@@ -307,6 +324,7 @@ ac_nir_export_parameter(nir_builder *b,
|
||||
b, nir_vec(b, vec, 4),
|
||||
.base = V_008DFC_SQ_EXP_PARAM + offset,
|
||||
.write_mask = write_mask);
|
||||
exported_params |= BITFIELD_BIT(offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2185,9 +2185,20 @@ export_vertex_params_gfx11(nir_builder *b, nir_ssa_def *export_tid, nir_ssa_def
|
||||
nir_ssa_def *voffset = nir_imm_int(b, 0);
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
|
||||
uint32_t exported_params = 0;
|
||||
|
||||
for (unsigned i = 0; i < num_outputs; i++) {
|
||||
gl_varying_slot slot = outputs[i].slot;
|
||||
nir_ssa_def *soffset = nir_iadd_imm(b, attr_offset, vs_output_param_offset[slot] * 16 * 32);
|
||||
unsigned offset = vs_output_param_offset[slot];
|
||||
|
||||
/* Since vs_output_param_offset[] can map multiple varying slots to
|
||||
* the same param export index (that's radeonsi-specific behavior),
|
||||
* we need to do this so as not to emit duplicated exports.
|
||||
*/
|
||||
if (exported_params & BITFIELD_BIT(offset))
|
||||
continue;
|
||||
|
||||
nir_ssa_def *soffset = nir_iadd_imm(b, attr_offset, offset * 16 * 32);
|
||||
|
||||
nir_ssa_def *comp[4];
|
||||
for (unsigned j = 0; j < 4; j++)
|
||||
@@ -2195,6 +2206,7 @@ export_vertex_params_gfx11(nir_builder *b, nir_ssa_def *export_tid, nir_ssa_def
|
||||
nir_store_buffer_amd(b, nir_vec(b, comp, 4), attr_rsrc, voffset, soffset, vindex,
|
||||
.memory_modes = nir_var_shader_out,
|
||||
.access = ACCESS_COHERENT | ACCESS_IS_SWIZZLED_AMD);
|
||||
exported_params |= BITFIELD_BIT(offset);
|
||||
}
|
||||
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
Reference in New Issue
Block a user