ac/nir: Introduce ac_nir_store_parameters_to_attr_ring.

This function is going to be used for storing parameter outputs
to the attribute ring, instead of the current implementation.

It is going to be taken into use in the following commits.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32640>
This commit is contained in:
Timur Kristóf
2024-12-20 12:15:57 -06:00
parent c4b45f1ec8
commit f38680aa1c
2 changed files with 85 additions and 0 deletions
+77
View File
@@ -592,6 +592,83 @@ ac_nir_export_parameters(nir_builder *b,
}
}
void
ac_nir_store_parameters_to_attr_ring(nir_builder *b,
const uint8_t *param_offsets,
const uint64_t outputs_written,
const uint16_t outputs_written_16bit,
ac_nir_prerast_out *out,
nir_def *export_tid, nir_def *num_export_threads)
{
nir_def *attr_rsrc = nir_load_ring_attr_amd(b);
/* We should always store full vec4s in groups of 8 lanes for the best performance even if
* some of them are garbage or have unused components, so align the number of export threads
* to 8.
*/
num_export_threads = nir_iand_imm(b, nir_iadd_imm(b, num_export_threads, 7), ~7);
if (!export_tid)
nir_push_if(b, nir_is_subgroup_invocation_lt_amd(b, num_export_threads));
else
nir_push_if(b, nir_ult(b, export_tid, num_export_threads));
nir_def *attr_offset = nir_load_ring_attr_offset_amd(b);
nir_def *vindex = nir_load_local_invocation_index(b);
nir_def *voffset = nir_imm_int(b, 0);
nir_def *undef = nir_undef(b, 1, 32);
uint32_t exported_params = 0;
u_foreach_bit64 (slot, outputs_written) {
const unsigned offset = param_offsets[slot];
if (offset > AC_EXP_PARAM_OFFSET_31)
continue;
if (exported_params & BITFIELD_BIT(offset))
continue;
nir_def *comp[4];
for (unsigned j = 0; j < 4; j++) {
comp[j] = out->outputs[slot][j] ? out->outputs[slot][j] : undef;
}
nir_store_buffer_amd(b, nir_vec(b, comp, 4), attr_rsrc, voffset, attr_offset, vindex,
.base = offset * 16,
.memory_modes = nir_var_shader_out,
.access = ACCESS_COHERENT | ACCESS_IS_SWIZZLED_AMD);
exported_params |= BITFIELD_BIT(offset);
}
u_foreach_bit (i, outputs_written_16bit) {
const unsigned offset = param_offsets[VARYING_SLOT_VAR0_16BIT + i];
if (offset > AC_EXP_PARAM_OFFSET_31)
continue;
if (exported_params & BITFIELD_BIT(offset))
continue;
nir_def *comp[4];
for (unsigned j = 0; j < 4; j++) {
nir_def *lo = out->outputs_16bit_lo[i][j] ? out->outputs_16bit_lo[i][j] : undef;
nir_def *hi = out->outputs_16bit_hi[i][j] ? out->outputs_16bit_hi[i][j] : undef;
comp[j] = nir_pack_32_2x16_split(b, lo, hi);
}
nir_store_buffer_amd(b, nir_vec(b, comp, 4), attr_rsrc, voffset, attr_offset, vindex,
.base = offset * 16,
.memory_modes = nir_var_shader_out,
.access = ACCESS_COHERENT | ACCESS_IS_SWIZZLED_AMD);
exported_params |= BITFIELD_BIT(offset);
}
nir_pop_if(b, NULL);
}
unsigned
ac_nir_map_io_location(unsigned location,
uint64_t mask,
+8
View File
@@ -109,6 +109,14 @@ ac_nir_export_parameters(nir_builder *b,
uint16_t outputs_written_16bit,
ac_nir_prerast_out *out);
void
ac_nir_store_parameters_to_attr_ring(nir_builder *b,
const uint8_t *param_offsets,
const uint64_t outputs_written,
const uint16_t outputs_written_16bit,
ac_nir_prerast_out *out,
nir_def *export_tid, nir_def *num_export_threads);
nir_def *
ac_nir_calc_io_off(nir_builder *b,
nir_intrinsic_instr *intrin,