ac/nir/legacy: Use new pre-rasterization output info helper.

For legacy VS/TES and GS.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28936>
This commit is contained in:
Timur Kristóf
2024-04-25 17:29:51 +02:00
parent 4ac0727f87
commit 72a73a6f8a
+33 -84
View File
@@ -658,17 +658,8 @@ ac_nir_lower_indirect_derefs(nir_shader *shader,
return progress;
}
struct shader_outputs {
nir_def *data[VARYING_SLOT_MAX][4];
nir_def *data_16bit_lo[16][4];
nir_def *data_16bit_hi[16][4];
nir_alu_type (*type_16bit_lo)[4];
nir_alu_type (*type_16bit_hi)[4];
};
static nir_def **
get_output_and_type(struct shader_outputs *outputs, unsigned slot, bool high_16bits,
get_output_and_type(ac_nir_prerast_out *out, unsigned slot, bool high_16bits,
nir_alu_type **types)
{
nir_def **data;
@@ -678,17 +669,17 @@ get_output_and_type(struct shader_outputs *outputs, unsigned slot, bool high_16b
* to 32bit. Vulkan is not allowed to streamout output less than 32bit.
*/
if (slot < VARYING_SLOT_VAR0_16BIT) {
data = outputs->data[slot];
data = out->outputs[slot];
type = NULL;
} else {
unsigned index = slot - VARYING_SLOT_VAR0_16BIT;
if (high_16bits) {
data = outputs->data_16bit_hi[index];
type = outputs->type_16bit_hi[index];
data = out->outputs_16bit_hi[index];
type = out->types_16bit_hi[index];
} else {
data = outputs->data_16bit_lo[index];
type = outputs->type_16bit_lo[index];
data = out->outputs[index];
type = out->types_16bit_lo[index];
}
}
@@ -697,8 +688,7 @@ get_output_and_type(struct shader_outputs *outputs, unsigned slot, bool high_16b
}
static void
emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info,
struct shader_outputs *outputs)
emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info, ac_nir_prerast_out *out)
{
nir_def *so_vtx_count = nir_ubfe_imm(b, nir_load_streamout_config_amd(b), 16, 7);
nir_def *tid = nir_load_subgroup_invocation(b);
@@ -726,7 +716,7 @@ emit_streamout(nir_builder *b, unsigned stream, nir_xfb_info *info,
nir_alu_type *output_type;
nir_def **output_data =
get_output_and_type(outputs, output->location, output->high_16bits, &output_type);
get_output_and_type(out, output->location, output->high_16bits, &output_type);
nir_def *vec[4] = {undef, undef, undef, undef};
uint8_t mask = 0;
@@ -801,17 +791,18 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
nir_push_if(&b, nir_ieq_imm(&b, stream_id, stream));
uint32_t offset = 0;
struct shader_outputs outputs = {
.type_16bit_lo = output_info->types_16bit_lo,
.type_16bit_hi = output_info->types_16bit_hi,
};
ac_nir_prerast_out out = {0};
if (output_info->types_16bit_lo)
memcpy(&out.types_16bit_lo, output_info->types_16bit_lo, sizeof(out.types_16bit_lo));
if (output_info->types_16bit_hi)
memcpy(&out.types_16bit_hi, output_info->types_16bit_hi, sizeof(out.types_16bit_hi));
u_foreach_bit64 (i, gs_nir->info.outputs_written) {
u_foreach_bit (j, output_info->usage_mask[i]) {
if (((output_info->streams[i] >> (j * 2)) & 0x3) != stream)
continue;
outputs.data[i][j] =
out.outputs[i][j] =
nir_load_buffer_amd(&b, 1, 32, gsvs_ring, vtx_offset, zero, zero,
.base = offset,
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
@@ -819,9 +810,9 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
/* clamp legacy color output */
if (i == VARYING_SLOT_COL0 || i == VARYING_SLOT_COL1 ||
i == VARYING_SLOT_BFC0 || i == VARYING_SLOT_BFC1) {
nir_def *color = outputs.data[i][j];
nir_def *color = out.outputs[i][j];
nir_def *clamp = nir_load_clamp_vertex_color_amd(&b);
outputs.data[i][j] = nir_bcsel(&b, clamp, nir_fsat(&b, color), color);
out.outputs[i][j] = nir_bcsel(&b, clamp, nir_fsat(&b, color), color);
}
offset += gs_nir->info.gs.vertices_out * 16 * 4;
@@ -843,17 +834,17 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
.access = ACCESS_COHERENT | ACCESS_NON_TEMPORAL);
if (has_lo_16bit)
outputs.data_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, data);
out.outputs_16bit_lo[i][j] = nir_unpack_32_2x16_split_x(&b, data);
if (has_hi_16bit)
outputs.data_16bit_hi[i][j] = nir_unpack_32_2x16_split_y(&b, data);
out.outputs_16bit_hi[i][j] = nir_unpack_32_2x16_split_y(&b, data);
offset += gs_nir->info.gs.vertices_out * 16 * 4;
}
}
if (stream_id)
emit_streamout(&b, stream, info, &outputs);
emit_streamout(&b, stream, info, &out);
if (stream == 0) {
uint64_t export_outputs = b.shader->info.outputs_written | VARYING_BIT_POS;
@@ -863,15 +854,15 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
export_outputs &= ~VARYING_BIT_LAYER;
ac_nir_export_position(&b, gfx_level, clip_cull_mask, !has_param_exports,
force_vrs, true, export_outputs, outputs.data, NULL);
force_vrs, true, export_outputs, out.outputs, NULL);
if (has_param_exports) {
ac_nir_export_parameters(&b, param_offsets,
b.shader->info.outputs_written,
b.shader->info.outputs_written_16bit,
outputs.data,
outputs.data_16bit_lo,
outputs.data_16bit_hi);
out.outputs,
out.outputs_16bit_lo,
out.outputs_16bit_hi);
}
}
@@ -886,7 +877,7 @@ ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
}
static void
gather_outputs(nir_builder *b, nir_function_impl *impl, struct shader_outputs *outputs)
gather_outputs(nir_builder *b, nir_function_impl *impl, ac_nir_prerast_out *out)
{
/* Assume:
* - the shader used nir_lower_io_to_temporaries
@@ -902,44 +893,7 @@ gather_outputs(nir_builder *b, nir_function_impl *impl, struct shader_outputs *o
if (intrin->intrinsic != nir_intrinsic_store_output)
continue;
assert(nir_src_is_const(intrin->src[1]) && !nir_src_as_uint(intrin->src[1]));
nir_alu_type type = nir_intrinsic_src_type(intrin);
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
nir_alu_type *output_type;
nir_def **output_data =
get_output_and_type(outputs, sem.location, sem.high_16bits, &output_type);
b->cursor = nir_after_instr(instr);
nir_def *store_val = intrin->src[0].ssa;
/* 16-bit output stored in a normal varying slot that isn't a dedicated 16-bit slot. */
const bool non_dedicated_16bit = sem.location < VARYING_SLOT_VAR0_16BIT && store_val->bit_size == 16;
u_foreach_bit (i, nir_intrinsic_write_mask(intrin)) {
unsigned comp = nir_intrinsic_component(intrin) + i;
nir_def *store_component = nir_channel(b, store_val, i);
if (non_dedicated_16bit) {
if (sem.high_16bits) {
nir_def *lo = output_data[comp] ? nir_unpack_32_2x16_split_x(b, output_data[comp]) : nir_imm_intN_t(b, 0, 16);
output_data[comp] = nir_pack_32_2x16_split(b, lo, store_component);
} else {
nir_def *hi = output_data[comp] ? nir_unpack_32_2x16_split_y(b, output_data[comp]) : nir_imm_intN_t(b, 0, 16);
output_data[comp] = nir_pack_32_2x16_split(b, store_component, hi);
}
if (output_type)
output_type[comp] = nir_type_uint32;
} else {
output_data[comp] = store_component;
if (output_type)
output_type[comp] = type;
}
}
/* remove all store output instruction */
ac_nir_gather_prerast_store_output_info(b, intrin, out);
nir_instr_remove(instr);
}
}
@@ -962,13 +916,8 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
nir_builder b = nir_builder_at(nir_after_impl(impl));
nir_alu_type output_types_16bit_lo[16][4];
nir_alu_type output_types_16bit_hi[16][4];
struct shader_outputs outputs = {
.type_16bit_lo = output_types_16bit_lo,
.type_16bit_hi = output_types_16bit_hi,
};
gather_outputs(&b, impl, &outputs);
ac_nir_prerast_out out = {0};
gather_outputs(&b, impl, &out);
b.cursor = nir_after_impl(impl);
if (export_primitive_id) {
@@ -976,14 +925,14 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
* vertex stage because it's implicit for VS or TES (but required by the Vulkan spec for GS
* or MS).
*/
outputs.data[VARYING_SLOT_PRIMITIVE_ID][0] = nir_load_primitive_id(&b);
out.outputs[VARYING_SLOT_PRIMITIVE_ID][0] = nir_load_primitive_id(&b);
/* Update outputs_written to reflect that the pass added a new output. */
nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID);
}
if (!disable_streamout && nir->xfb_info) {
emit_streamout(&b, 0, nir->xfb_info, &outputs);
emit_streamout(&b, 0, nir->xfb_info, &out);
preserved = nir_metadata_none;
}
@@ -994,15 +943,15 @@ ac_nir_lower_legacy_vs(nir_shader *nir,
export_outputs &= ~VARYING_BIT_LAYER;
ac_nir_export_position(&b, gfx_level, clip_cull_mask, !has_param_exports,
force_vrs, true, export_outputs, outputs.data, NULL);
force_vrs, true, export_outputs, out.outputs, NULL);
if (has_param_exports) {
ac_nir_export_parameters(&b, param_offsets,
nir->info.outputs_written,
nir->info.outputs_written_16bit,
outputs.data,
outputs.data_16bit_lo,
outputs.data_16bit_hi);
out.outputs,
out.outputs_16bit_lo,
out.outputs_16bit_hi);
}
nir_metadata_preserve(impl, preserved);