From 1c005e72f456b222ed28790731df5db8dbbfd7e7 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 30 Sep 2022 19:29:43 +0100 Subject: [PATCH] ac/nir: add legacy streamout and GS copy shader helpers Signed-off-by: Rhys Perry Reviewed-by: Qiang Yu Part-of: --- src/amd/common/ac_nir.c | 213 ++++++++++++++++++++++++++++++++++++++++ src/amd/common/ac_nir.h | 13 +++ 2 files changed, 226 insertions(+) diff --git a/src/amd/common/ac_nir.c b/src/amd/common/ac_nir.c index 49134cf33df..f939640242c 100644 --- a/src/amd/common/ac_nir.c +++ b/src/amd/common/ac_nir.c @@ -108,3 +108,216 @@ ac_nir_lower_indirect_derefs(nir_shader *shader, NIR_PASS(progress, shader, nir_lower_indirect_derefs, indirect_mask, UINT32_MAX); return progress; } + +static void +emit_streamout(nir_builder *b, const struct pipe_stream_output_info *info, unsigned stream, + nir_ssa_def *const outputs[64][4]) +{ + nir_ssa_def *so_vtx_count = nir_ubfe_imm(b, nir_load_streamout_config_amd(b), 16, 7); + nir_ssa_def *tid = nir_load_subgroup_invocation(b); + + nir_push_if(b, nir_ilt(b, tid, so_vtx_count)); + nir_ssa_def *so_write_index = nir_load_streamout_write_index_amd(b); + + nir_ssa_def *so_buffers[PIPE_MAX_SO_BUFFERS]; + nir_ssa_def *so_write_offset[PIPE_MAX_SO_BUFFERS]; + for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { + uint16_t stride = info->stride[i]; + if (!stride) + continue; + + so_buffers[i] = nir_load_streamout_buffer_amd(b, i); + + nir_ssa_def *offset = nir_load_streamout_offset_amd(b, i); + offset = nir_iadd(b, nir_imul_imm(b, nir_iadd(b, so_write_index, tid), stride * 4), + nir_imul_imm(b, offset, 4)); + so_write_offset[i] = offset; + } + + nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); + for (unsigned i = 0; i < info->num_outputs; i++) { + const struct pipe_stream_output *output = &info->output[i]; + if (stream != output->stream) + continue; + + nir_ssa_def *vec[4] = {undef, undef, undef, undef}; + uint8_t mask = 0; + for (unsigned j = 0; j < output->num_components; j++) { + if (outputs[output->register_index][output->start_component + j]) { + vec[j] = outputs[output->register_index][output->start_component + j]; + mask |= 1 << j; + } + } + + if (!mask) + continue; + + unsigned buffer = output->output_buffer; + nir_ssa_def *data = nir_vec(b, vec, output->num_components); + nir_ssa_def *zero = nir_imm_int(b, 0); + nir_store_buffer_amd(b, data, so_buffers[buffer], so_write_offset[buffer], zero, zero, + .base = output->dst_offset * 4, .slc_amd = true, .write_mask = mask, + .access = ACCESS_COHERENT); + } + + nir_pop_if(b, NULL); +} + +nir_shader * +ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, + const struct pipe_stream_output_info *so_info, size_t num_outputs, + const uint8_t *output_usage_mask, const uint8_t *output_streams, + const uint8_t *output_semantics, + const uint8_t num_stream_output_components[4]) +{ + assert(num_outputs <= 64); + + nir_builder b = nir_builder_init_simple_shader( + MESA_SHADER_VERTEX, gs_nir->options, "gs_copy"); + + nir_foreach_shader_out_variable(var, gs_nir) + nir_shader_add_variable(b.shader, nir_variable_clone(var, b.shader)); + + nir_ssa_def *gsvs_ring = nir_load_ring_gsvs_amd(&b); + + nir_ssa_def *stream_id = NULL; + if (so_info->num_outputs) + stream_id = nir_ubfe_imm(&b, nir_load_streamout_config_amd(&b), 24, 2); + + nir_ssa_def *vtx_offset = nir_imul_imm(&b, nir_load_vertex_id_zero_base(&b), 4); + nir_ssa_def *undef = nir_ssa_undef(&b, 1, 32); + nir_ssa_def *zero = nir_imm_zero(&b, 1, 32); + + for (unsigned stream = 0; stream < 4; stream++) { + if (stream > 0 && (!stream_id || !num_stream_output_components[stream])) + continue; + + if (stream_id) + nir_push_if(&b, nir_ieq_imm(&b, stream_id, stream)); + + uint32_t offset = 0; + uint64_t output_mask = 0; + nir_ssa_def *outputs[64][4] = {{0}}; + for (unsigned i = 0; i < num_outputs; i++) { + unsigned mask = output_usage_mask[i]; + if (!mask) + continue; + + u_foreach_bit (j, mask) { + if (((output_streams[i] >> (j * 2)) & 0x3) != stream) + continue; + + outputs[i][j] = nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero, + .base = offset, .is_swizzled = false, + .slc_amd = true, .access = ACCESS_COHERENT); + + offset += gs_nir->info.gs.vertices_out * 16 * 4; + } + + output_mask |= 1ull << i; + } + + if (stream_id) + emit_streamout(&b, so_info, stream, outputs); + + if (stream == 0) { + u_foreach_bit64 (i, output_mask) { + uint8_t mask = 0; + nir_ssa_def *vec[4]; + for (unsigned j = 0; j < 4; j++) { + vec[j] = outputs[i][j] ? outputs[i][j] : undef; + mask |= (outputs[i][j] ? 1 : 0) << j; + } + + gl_varying_slot location = output_semantics ? output_semantics[i] : i; + nir_store_output(&b, nir_vec(&b, vec, 4), zero, .base = i, .write_mask = mask, + .src_type = nir_type_uint32, + .io_semantics = {.location = location, .num_slots = 1}); + } + + nir_export_vertex_amd(&b); + } + + if (stream_id) + nir_push_else(&b, NULL); + } + + b.shader->info.clip_distance_array_size = gs_nir->info.clip_distance_array_size; + b.shader->info.cull_distance_array_size = gs_nir->info.cull_distance_array_size; + + return b.shader; +} + +static void +gather_outputs(nir_builder *b, nir_function_impl *impl, nir_ssa_def *outputs[64][4]) +{ + /* Assume: + * - the shader used nir_lower_io_to_temporaries + * - 64-bit outputs are lowered + * - no indirect indexing is present + */ + nir_foreach_block(block, impl) { + nir_foreach_instr (instr, block) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_store_output) + continue; + + assert(nir_src_is_const(intrin->src[1]) && !nir_src_as_uint(intrin->src[1])); + + unsigned slot = nir_intrinsic_base(intrin); + u_foreach_bit (i, nir_intrinsic_write_mask(intrin)) { + unsigned comp = nir_intrinsic_component(intrin) + i; + outputs[slot][comp] = nir_channel(b, intrin->src[0].ssa, i); + } + } + } +} + +void +ac_nir_lower_legacy_vs(nir_shader *nir, int primitive_id_location, + const struct pipe_stream_output_info *so_info) +{ + nir_function_impl *impl = nir_shader_get_entrypoint(nir); + nir_metadata preserved = nir_metadata_block_index | nir_metadata_dominance; + + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_after_cf_list(&impl->body); + + if (primitive_id_location >= 0) { + /* When the primitive ID is read by FS, we must ensure that it's exported by the previous + * vertex stage because it's implicit for VS or TES (but required by the Vulkan spec for GS + * or MS). + */ + nir_variable *var = nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), NULL); + var->data.location = VARYING_SLOT_PRIMITIVE_ID; + var->data.interpolation = INTERP_MODE_NONE; + var->data.driver_location = primitive_id_location; + + nir_store_output( + &b, nir_load_primitive_id(&b), nir_imm_int(&b, 0), .base = primitive_id_location, + .src_type = nir_type_int32, + .io_semantics = (nir_io_semantics){.location = var->data.location, .num_slots = 1}); + + /* Update outputs_written to reflect that the pass added a new output. */ + nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID); + } + + if (so_info && so_info->num_outputs) { + /* 26.1. Transform Feedback of Vulkan 1.3.229 spec: + * > The size of each component of an output variable must be at least 32-bits. + * We lower 64-bit outputs. + */ + nir_ssa_def *outputs[64][4] = {{0}}; + gather_outputs(&b, impl, outputs); + + emit_streamout(&b, so_info, 0, outputs); + preserved = nir_metadata_none; + } + + nir_export_vertex_amd(&b); + nir_metadata_preserve(impl, preserved); +} diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h index 81771ae0376..5845a61b764 100644 --- a/src/amd/common/ac_nir.h +++ b/src/amd/common/ac_nir.h @@ -27,9 +27,11 @@ #define AC_NIR_H #include "nir.h" +#include "nir_builder.h" #include "ac_shader_args.h" #include "ac_shader_util.h" #include "amd_family.h" +#include "pipe/p_state.h" #ifdef __cplusplus extern "C" { @@ -179,6 +181,17 @@ ac_nir_lower_global_access(nir_shader *shader); bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level); +nir_shader * +ac_nir_create_gs_copy_shader(const nir_shader *gs_nir, + const struct pipe_stream_output_info *so_info, size_t num_outputs, + const uint8_t *output_usage_mask, const uint8_t *output_streams, + const uint8_t *output_semantics, + const uint8_t num_stream_output_components[4]); + +void +ac_nir_lower_legacy_vs(nir_shader *nir, int primitive_id_location, + const struct pipe_stream_output_info *so_info); + #ifdef __cplusplus } #endif