asahi: Add passes to lower MSAA
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23480>
This commit is contained in:
committed by
Marge Bot
parent
70b8babe3c
commit
f28962e29a
@@ -0,0 +1,238 @@
|
||||
/*
|
||||
* Copyright 2023 Alyssa Rosenzweig
|
||||
* Copyright 2021 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "agx_tilebuffer.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
#define ALL_SAMPLES (0xFF)
|
||||
|
||||
static bool
|
||||
lower_wrapped(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
nir_ssa_def *sample_id = data;
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_load_sample_id: {
|
||||
unsigned size = nir_dest_bit_size(intr->dest);
|
||||
nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_u2uN(b, sample_id, size));
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_local_pixel_agx:
|
||||
case nir_intrinsic_store_local_pixel_agx:
|
||||
case nir_intrinsic_store_zs_agx:
|
||||
case nir_intrinsic_sample_mask_agx: {
|
||||
/* Fragment I/O inside the loop should only affect one sample. */
|
||||
unsigned mask_index =
|
||||
(intr->intrinsic == nir_intrinsic_store_local_pixel_agx) ? 1 : 0;
|
||||
|
||||
nir_ssa_def *mask = intr->src[mask_index].ssa;
|
||||
nir_ssa_def *id_mask = nir_ishl(b, nir_imm_intN_t(b, 1, mask->bit_size),
|
||||
nir_u2u32(b, sample_id));
|
||||
nir_src_rewrite_ssa(&intr->src[mask_index], nir_iand(b, mask, id_mask));
|
||||
return true;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* In a monolithic pixel shader, we wrap the fragment shader in a loop over
|
||||
* each sample, and then let optimizations (like loop unrolling) go to town.
|
||||
* This lowering is not compatible with fragment epilogues, which require
|
||||
* something similar at the binary level since the NIR is long gone by then.
|
||||
*/
|
||||
static bool
|
||||
agx_nir_wrap_per_sample_loop(nir_shader *shader, uint8_t nr_samples)
|
||||
{
|
||||
assert(nr_samples > 1);
|
||||
|
||||
/* Get the original function */
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
||||
|
||||
nir_cf_list list;
|
||||
nir_cf_extract(&list, nir_before_block(nir_start_block(impl)),
|
||||
nir_after_block(nir_impl_last_block(impl)));
|
||||
|
||||
/* Create a builder for the wrapped function */
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
b.cursor = nir_after_block(nir_start_block(impl));
|
||||
|
||||
nir_variable *i =
|
||||
nir_local_variable_create(impl, glsl_uintN_t_type(16), NULL);
|
||||
nir_store_var(&b, i, nir_imm_intN_t(&b, 0, 16), ~0);
|
||||
nir_ssa_def *index = NULL;
|
||||
|
||||
/* Create a loop in the wrapped function */
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
{
|
||||
index = nir_load_var(&b, i);
|
||||
nir_push_if(&b, nir_uge(&b, index, nir_imm_intN_t(&b, nr_samples, 16)));
|
||||
{
|
||||
nir_jump(&b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(&b, NULL);
|
||||
|
||||
b.cursor = nir_cf_reinsert(&list, b.cursor);
|
||||
nir_store_var(&b, i, nir_iadd_imm(&b, index, 1), ~0);
|
||||
}
|
||||
nir_pop_loop(&b, loop);
|
||||
|
||||
/* We've mucked about with control flow */
|
||||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
|
||||
/* Use the loop counter as the sample ID each iteration */
|
||||
nir_shader_instructions_pass(
|
||||
shader, lower_wrapped, nir_metadata_block_index | nir_metadata_dominance,
|
||||
index);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_sample_mask_write(nir_builder *b, nir_instr *instr, void *data)
|
||||
{
|
||||
struct agx_msaa_state *state = data;
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
nir_ssa_def *mask;
|
||||
if (intr->intrinsic == nir_intrinsic_sample_mask_agx) {
|
||||
/* For alpha-to-coverage */
|
||||
assert(nir_src_as_uint(intr->src[0]) == ALL_SAMPLES && "not wrapped");
|
||||
mask = intr->src[1].ssa;
|
||||
} else if (intr->intrinsic == nir_intrinsic_store_output) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
|
||||
if (sem.location != FRAG_RESULT_SAMPLE_MASK)
|
||||
return false;
|
||||
|
||||
/* Sample mask writes are ignored unless multisampling is used. */
|
||||
if (state->nr_samples == 1) {
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
mask = nir_u2u16(b, intr->src[0].ssa);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* The Vulkan spec says:
|
||||
*
|
||||
* If sample shading is enabled, bits written to SampleMask
|
||||
* corresponding to samples that are not being shaded by the fragment
|
||||
* shader invocation are ignored.
|
||||
*
|
||||
* That will be satisfied by outputting gl_SampleMask for the whole pixel
|
||||
* and then lowering sample shading after (splitting up sample_mask
|
||||
* targets).
|
||||
*/
|
||||
if (state->api_sample_mask)
|
||||
mask = nir_iand(b, mask, nir_load_api_sample_mask_agx(b));
|
||||
|
||||
nir_sample_mask_agx(b, nir_imm_intN_t(b, ALL_SAMPLES, 16), mask);
|
||||
nir_instr_remove(instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Apply API sample mask to sample mask inputs, lowering:
|
||||
*
|
||||
* sample_mask_in --> sample_mask_in & api_sample_mask
|
||||
*/
|
||||
static bool
|
||||
lower_sample_mask_read(nir_builder *b, nir_instr *instr, UNUSED void *_)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
b->cursor = nir_after_instr(instr);
|
||||
|
||||
if (intr->intrinsic != nir_intrinsic_load_sample_mask_in)
|
||||
return false;
|
||||
|
||||
nir_ssa_def *old = &intr->dest.ssa;
|
||||
nir_ssa_def *lowered = nir_iand(
|
||||
b, old, nir_u2uN(b, nir_load_api_sample_mask_agx(b), old->bit_size));
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(old, lowered, lowered->parent_instr);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* glSampleMask(x) --> gl_SampleMask = x */
|
||||
static void
|
||||
insert_sample_mask_write(nir_shader *s)
|
||||
{
|
||||
/* nir_lower_io_to_temporaries ensures that stores are in the last block */
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(s);
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
b.cursor = nir_before_block(nir_start_block(impl));
|
||||
|
||||
/* Load the desired API sample mask */
|
||||
nir_ssa_def *api_sample_mask = nir_load_api_sample_mask_agx(&b);
|
||||
|
||||
/* Kill samples that are not covered by the mask using the AGX instruction */
|
||||
nir_ssa_def *all_samples = nir_imm_intN_t(&b, ALL_SAMPLES, 16);
|
||||
nir_sample_mask_agx(&b, all_samples, api_sample_mask);
|
||||
s->info.outputs_written |= BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lower a fragment shader into a monolithic pixel shader, with static sample
|
||||
* count, blend state, and tilebuffer formats in the shader key. For dynamic,
|
||||
* epilogs must be used, which have separate lowerings.
|
||||
*/
|
||||
bool
|
||||
agx_nir_lower_monolithic_msaa(nir_shader *shader, struct agx_msaa_state *state)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_FRAGMENT);
|
||||
assert(state->nr_samples == 1 || state->nr_samples == 2 ||
|
||||
state->nr_samples == 4);
|
||||
|
||||
if (shader->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
|
||||
/* Sample mask writes need to be lowered. This includes an API sample mask
|
||||
* lowering.
|
||||
*/
|
||||
nir_shader_instructions_pass(
|
||||
shader, lower_sample_mask_write,
|
||||
nir_metadata_block_index | nir_metadata_dominance, state);
|
||||
} else if ((state->nr_samples > 1) && state->api_sample_mask) {
|
||||
/* If there's no sample mask write, we need to add one of our own for the
|
||||
* API-level sample masking to work.
|
||||
*/
|
||||
insert_sample_mask_write(shader);
|
||||
}
|
||||
|
||||
/* Additional, sample_mask_in needs to account for the API-level mask */
|
||||
nir_shader_instructions_pass(
|
||||
shader, lower_sample_mask_read,
|
||||
nir_metadata_block_index | nir_metadata_dominance, &state->nr_samples);
|
||||
|
||||
/* In single sampled programs, interpolateAtSample needs to return the
|
||||
* center pixel. TODO: Generalize for dynamic sample count.
|
||||
*/
|
||||
if (state->nr_samples == 1)
|
||||
nir_lower_single_sampled(shader);
|
||||
else if (shader->info.fs.uses_sample_shading)
|
||||
agx_nir_wrap_per_sample_loop(shader, state->nr_samples);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -50,6 +50,16 @@ bool agx_nir_lower_tilebuffer(struct nir_shader *shader,
|
||||
struct agx_tilebuffer_layout *tib,
|
||||
uint8_t *colormasks, bool *translucent);
|
||||
|
||||
struct agx_msaa_state {
|
||||
uint8_t nr_samples;
|
||||
|
||||
/* Enable API sample mask lowering (e.g. glSampleMask) */
|
||||
bool api_sample_mask;
|
||||
};
|
||||
|
||||
bool agx_nir_lower_monolithic_msaa(struct nir_shader *shader,
|
||||
struct agx_msaa_state *state);
|
||||
|
||||
void agx_usc_tilebuffer(struct agx_usc_builder *b,
|
||||
struct agx_tilebuffer_layout *tib);
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ libasahi_lib_files = files(
|
||||
'agx_formats.c',
|
||||
'agx_meta.c',
|
||||
'agx_tilebuffer.c',
|
||||
'agx_nir_lower_msaa.c',
|
||||
'agx_nir_lower_tilebuffer.c',
|
||||
'agx_nir_lower_vbo.c',
|
||||
'agx_ppp.h',
|
||||
|
||||
Reference in New Issue
Block a user