ac/nir: don't emit duplicated parameter exports

Can you spot the problem?
    exp param0 v6, v5, v5, v5
    exp param1 v7, off, off, off
    exp param1 v7, off, off, off

radeonsi uses ac_nir_optimize_outputs to eliminate output stores with
identical SSA defs (i.e. duplicated), which then causes 2 outputs to
map to the same parameter export.

This is a regression. The old LLVM code was correctly emitting each
export only once. vs_output_param_mask was supposed to be used for
this instead of vs_output_param_offset.

Fixes: 80506be31b - ac/nir/ngg,radv,radeonsi: nogs use ac_nir_export_(position|parameter)

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21920>
This commit is contained in:
Marek Olšák
2023-03-15 03:54:48 -04:00
committed by Marge Bot
parent c86d6c8af8
commit 3626bc2daa
2 changed files with 31 additions and 1 deletions
+18
View File
@@ -259,6 +259,8 @@ ac_nir_export_parameter(nir_builder *b,
nir_ssa_def *(*outputs_16bit_lo)[4],
nir_ssa_def *(*outputs_16bit_hi)[4])
{
uint32_t exported_params = 0;
u_foreach_bit64 (slot, outputs_written) {
unsigned offset = param_offsets[slot];
if (offset > AC_EXP_PARAM_OFFSET_31)
@@ -274,10 +276,18 @@ ac_nir_export_parameter(nir_builder *b,
if (!write_mask)
continue;
/* Since param_offsets[] can map multiple varying slots to the same
* param export index (that's radeonsi-specific behavior), we need to
* do this so as not to emit duplicated exports.
*/
if (exported_params & BITFIELD_BIT(offset))
continue;
nir_export_amd(
b, get_export_output(b, outputs[slot]),
.base = V_008DFC_SQ_EXP_PARAM + offset,
.write_mask = write_mask);
exported_params |= BITFIELD_BIT(offset);
}
u_foreach_bit (slot, outputs_written_16bit) {
@@ -295,6 +305,13 @@ ac_nir_export_parameter(nir_builder *b,
if (!write_mask)
continue;
/* Since param_offsets[] can map multiple varying slots to the same
* param export index (that's radeonsi-specific behavior), we need to
* do this so as not to emit duplicated exports.
*/
if (exported_params & BITFIELD_BIT(offset))
continue;
nir_ssa_def *vec[4];
nir_ssa_def *undef = nir_ssa_undef(b, 1, 16);
for (int i = 0; i < 4; i++) {
@@ -307,6 +324,7 @@ ac_nir_export_parameter(nir_builder *b,
b, nir_vec(b, vec, 4),
.base = V_008DFC_SQ_EXP_PARAM + offset,
.write_mask = write_mask);
exported_params |= BITFIELD_BIT(offset);
}
}
+13 -1
View File
@@ -2185,9 +2185,20 @@ export_vertex_params_gfx11(nir_builder *b, nir_ssa_def *export_tid, nir_ssa_def
nir_ssa_def *voffset = nir_imm_int(b, 0);
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
uint32_t exported_params = 0;
for (unsigned i = 0; i < num_outputs; i++) {
gl_varying_slot slot = outputs[i].slot;
nir_ssa_def *soffset = nir_iadd_imm(b, attr_offset, vs_output_param_offset[slot] * 16 * 32);
unsigned offset = vs_output_param_offset[slot];
/* Since vs_output_param_offset[] can map multiple varying slots to
* the same param export index (that's radeonsi-specific behavior),
* we need to do this so as not to emit duplicated exports.
*/
if (exported_params & BITFIELD_BIT(offset))
continue;
nir_ssa_def *soffset = nir_iadd_imm(b, attr_offset, offset * 16 * 32);
nir_ssa_def *comp[4];
for (unsigned j = 0; j < 4; j++)
@@ -2195,6 +2206,7 @@ export_vertex_params_gfx11(nir_builder *b, nir_ssa_def *export_tid, nir_ssa_def
nir_store_buffer_amd(b, nir_vec(b, comp, 4), attr_rsrc, voffset, soffset, vindex,
.memory_modes = nir_var_shader_out,
.access = ACCESS_COHERENT | ACCESS_IS_SWIZZLED_AMD);
exported_params |= BITFIELD_BIT(offset);
}
nir_pop_if(b, NULL);