diff --git a/src/asahi/lib/agx_nir_lower_msaa.c b/src/asahi/lib/agx_nir_lower_msaa.c new file mode 100644 index 00000000000..d72595a47b7 --- /dev/null +++ b/src/asahi/lib/agx_nir_lower_msaa.c @@ -0,0 +1,238 @@ +/* + * Copyright 2023 Alyssa Rosenzweig + * Copyright 2021 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "agx_tilebuffer.h" +#include "nir.h" +#include "nir_builder.h" + +#define ALL_SAMPLES (0xFF) + +static bool +lower_wrapped(nir_builder *b, nir_instr *instr, void *data) +{ + nir_ssa_def *sample_id = data; + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + b->cursor = nir_before_instr(instr); + + switch (intr->intrinsic) { + case nir_intrinsic_load_sample_id: { + unsigned size = nir_dest_bit_size(intr->dest); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_u2uN(b, sample_id, size)); + nir_instr_remove(instr); + return true; + } + + case nir_intrinsic_load_local_pixel_agx: + case nir_intrinsic_store_local_pixel_agx: + case nir_intrinsic_store_zs_agx: + case nir_intrinsic_sample_mask_agx: { + /* Fragment I/O inside the loop should only affect one sample. */ + unsigned mask_index = + (intr->intrinsic == nir_intrinsic_store_local_pixel_agx) ? 1 : 0; + + nir_ssa_def *mask = intr->src[mask_index].ssa; + nir_ssa_def *id_mask = nir_ishl(b, nir_imm_intN_t(b, 1, mask->bit_size), + nir_u2u32(b, sample_id)); + nir_src_rewrite_ssa(&intr->src[mask_index], nir_iand(b, mask, id_mask)); + return true; + } + + default: + return false; + } +} + +/* + * In a monolithic pixel shader, we wrap the fragment shader in a loop over + * each sample, and then let optimizations (like loop unrolling) go to town. + * This lowering is not compatible with fragment epilogues, which require + * something similar at the binary level since the NIR is long gone by then. + */ +static bool +agx_nir_wrap_per_sample_loop(nir_shader *shader, uint8_t nr_samples) +{ + assert(nr_samples > 1); + + /* Get the original function */ + nir_function_impl *impl = nir_shader_get_entrypoint(shader); + + nir_cf_list list; + nir_cf_extract(&list, nir_before_block(nir_start_block(impl)), + nir_after_block(nir_impl_last_block(impl))); + + /* Create a builder for the wrapped function */ + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_after_block(nir_start_block(impl)); + + nir_variable *i = + nir_local_variable_create(impl, glsl_uintN_t_type(16), NULL); + nir_store_var(&b, i, nir_imm_intN_t(&b, 0, 16), ~0); + nir_ssa_def *index = NULL; + + /* Create a loop in the wrapped function */ + nir_loop *loop = nir_push_loop(&b); + { + index = nir_load_var(&b, i); + nir_push_if(&b, nir_uge(&b, index, nir_imm_intN_t(&b, nr_samples, 16))); + { + nir_jump(&b, nir_jump_break); + } + nir_pop_if(&b, NULL); + + b.cursor = nir_cf_reinsert(&list, b.cursor); + nir_store_var(&b, i, nir_iadd_imm(&b, index, 1), ~0); + } + nir_pop_loop(&b, loop); + + /* We've mucked about with control flow */ + nir_metadata_preserve(impl, nir_metadata_none); + + /* Use the loop counter as the sample ID each iteration */ + nir_shader_instructions_pass( + shader, lower_wrapped, nir_metadata_block_index | nir_metadata_dominance, + index); + return true; +} + +static bool +lower_sample_mask_write(nir_builder *b, nir_instr *instr, void *data) +{ + struct agx_msaa_state *state = data; + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + b->cursor = nir_before_instr(instr); + + nir_ssa_def *mask; + if (intr->intrinsic == nir_intrinsic_sample_mask_agx) { + /* For alpha-to-coverage */ + assert(nir_src_as_uint(intr->src[0]) == ALL_SAMPLES && "not wrapped"); + mask = intr->src[1].ssa; + } else if (intr->intrinsic == nir_intrinsic_store_output) { + nir_io_semantics sem = nir_intrinsic_io_semantics(intr); + if (sem.location != FRAG_RESULT_SAMPLE_MASK) + return false; + + /* Sample mask writes are ignored unless multisampling is used. */ + if (state->nr_samples == 1) { + nir_instr_remove(instr); + return true; + } + + mask = nir_u2u16(b, intr->src[0].ssa); + } else { + return false; + } + + /* The Vulkan spec says: + * + * If sample shading is enabled, bits written to SampleMask + * corresponding to samples that are not being shaded by the fragment + * shader invocation are ignored. + * + * That will be satisfied by outputting gl_SampleMask for the whole pixel + * and then lowering sample shading after (splitting up sample_mask + * targets). + */ + if (state->api_sample_mask) + mask = nir_iand(b, mask, nir_load_api_sample_mask_agx(b)); + + nir_sample_mask_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16), mask); + nir_instr_remove(instr); + return true; +} + +/* + * Apply API sample mask to sample mask inputs, lowering: + * + * sample_mask_in --> sample_mask_in & api_sample_mask + */ +static bool +lower_sample_mask_read(nir_builder *b, nir_instr *instr, UNUSED void *_) +{ + if (instr->type != nir_instr_type_intrinsic) + return false; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + b->cursor = nir_after_instr(instr); + + if (intr->intrinsic != nir_intrinsic_load_sample_mask_in) + return false; + + nir_ssa_def *old = &intr->dest.ssa; + nir_ssa_def *lowered = nir_iand( + b, old, nir_u2uN(b, nir_load_api_sample_mask_agx(b), old->bit_size)); + + nir_ssa_def_rewrite_uses_after(old, lowered, lowered->parent_instr); + return true; +} + +/* glSampleMask(x) --> gl_SampleMask = x */ +static void +insert_sample_mask_write(nir_shader *s) +{ + /* nir_lower_io_to_temporaries ensures that stores are in the last block */ + nir_function_impl *impl = nir_shader_get_entrypoint(s); + + nir_builder b; + nir_builder_init(&b, impl); + b.cursor = nir_before_block(nir_start_block(impl)); + + /* Load the desired API sample mask */ + nir_ssa_def *api_sample_mask = nir_load_api_sample_mask_agx(&b); + + /* Kill samples that are not covered by the mask using the AGX instruction */ + nir_ssa_def *all_samples = nir_imm_intN_t(&b, ALL_SAMPLES, 16); + nir_sample_mask_agx(&b, all_samples, api_sample_mask); + s->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); +} + +/* + * Lower a fragment shader into a monolithic pixel shader, with static sample + * count, blend state, and tilebuffer formats in the shader key. For dynamic, + * epilogs must be used, which have separate lowerings. + */ +bool +agx_nir_lower_monolithic_msaa(nir_shader *shader, struct agx_msaa_state *state) +{ + assert(shader->info.stage == MESA_SHADER_FRAGMENT); + assert(state->nr_samples == 1 || state->nr_samples == 2 || + state->nr_samples == 4); + + if (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) { + /* Sample mask writes need to be lowered. This includes an API sample mask + * lowering. + */ + nir_shader_instructions_pass( + shader, lower_sample_mask_write, + nir_metadata_block_index | nir_metadata_dominance, state); + } else if ((state->nr_samples > 1) && state->api_sample_mask) { + /* If there's no sample mask write, we need to add one of our own for the + * API-level sample masking to work. + */ + insert_sample_mask_write(shader); + } + + /* Additional, sample_mask_in needs to account for the API-level mask */ + nir_shader_instructions_pass( + shader, lower_sample_mask_read, + nir_metadata_block_index | nir_metadata_dominance, &state->nr_samples); + + /* In single sampled programs, interpolateAtSample needs to return the + * center pixel. TODO: Generalize for dynamic sample count. + */ + if (state->nr_samples == 1) + nir_lower_single_sampled(shader); + else if (shader->info.fs.uses_sample_shading) + agx_nir_wrap_per_sample_loop(shader, state->nr_samples); + + return true; +} diff --git a/src/asahi/lib/agx_tilebuffer.h b/src/asahi/lib/agx_tilebuffer.h index 4e57e265ca6..238abb88d7f 100644 --- a/src/asahi/lib/agx_tilebuffer.h +++ b/src/asahi/lib/agx_tilebuffer.h @@ -50,6 +50,16 @@ bool agx_nir_lower_tilebuffer(struct nir_shader *shader, struct agx_tilebuffer_layout *tib, uint8_t *colormasks, bool *translucent); +struct agx_msaa_state { + uint8_t nr_samples; + + /* Enable API sample mask lowering (e.g. glSampleMask) */ + bool api_sample_mask; +}; + +bool agx_nir_lower_monolithic_msaa(struct nir_shader *shader, + struct agx_msaa_state *state); + void agx_usc_tilebuffer(struct agx_usc_builder *b, struct agx_tilebuffer_layout *tib); diff --git a/src/asahi/lib/meson.build b/src/asahi/lib/meson.build index 131e8ec3907..c63b2840051 100644 --- a/src/asahi/lib/meson.build +++ b/src/asahi/lib/meson.build @@ -11,6 +11,7 @@ libasahi_lib_files = files( 'agx_formats.c', 'agx_meta.c', 'agx_tilebuffer.c', + 'agx_nir_lower_msaa.c', 'agx_nir_lower_tilebuffer.c', 'agx_nir_lower_vbo.c', 'agx_ppp.h',