nir/tcs_info: gather for all patch outputs whether they're written by all invocs

This substantially rewrites the pass. It also makes it easier to read.
Tested thoroughly by a shader test.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35195>
This commit is contained in:
Marek Olšák
2025-04-21 08:16:33 -04:00
committed by Marge Bot
parent 6b265d9d7d
commit a3632d7d88
2 changed files with 173 additions and 92 deletions
+153 -81
View File
@@ -17,20 +17,6 @@ get_tess_level_component(nir_intrinsic_instr *intr)
nir_intrinsic_component(intr);
}
static unsigned
get_inst_tesslevel_writemask(nir_intrinsic_instr *intr)
{
if (intr->intrinsic != nir_intrinsic_store_output)
return 0;
unsigned location = nir_intrinsic_io_semantics(intr).location;
if (location != VARYING_SLOT_TESS_LEVEL_OUTER &&
location != VARYING_SLOT_TESS_LEVEL_INNER)
return 0;
return nir_intrinsic_write_mask(intr) << get_tess_level_component(intr);
}
static bool
is_tcs_output_barrier(nir_intrinsic_instr *intr)
{
@@ -40,78 +26,143 @@ is_tcs_output_barrier(nir_intrinsic_instr *intr)
nir_intrinsic_execution_scope(intr) >= SCOPE_WORKGROUP;
}
/* 32 patch outputs + 2 tess level outputs with 8 channels per output.
* The last 4 channels are for high 16 bits of the first 4 channels.
*/
#define NUM_OUTPUTS 34
#define NUM_BITS (NUM_OUTPUTS * 8)
struct writemasks {
BITSET_DECLARE(chan_mask, NUM_BITS);
};
static void
scan_tess_levels(struct exec_list *cf_list, unsigned *upper_block_tl_writemask,
unsigned *cond_block_tl_writemask,
bool *all_invocs_define_tess_levels, bool is_nested_cf)
accum_result_defined_by_all_invocs(struct writemasks *outer_block_writemasks,
struct writemasks *cond_block_writemasks,
uint64_t *result_mask)
{
struct writemasks tmp;
/* tmp contains those channels that are only written conditionally.
* Such channels can't be proven to be written by all invocations.
*
* tmp = cond_block_writemasks & ~outer_block_writemasks
*/
BITSET_COPY(tmp.chan_mask, outer_block_writemasks->chan_mask);
BITSET_NOT(tmp.chan_mask);
BITSET_AND(tmp.chan_mask,
cond_block_writemasks->chan_mask, tmp.chan_mask);
/* Mark outputs as not written by all invocations if they are written
* conditionally.
*/
unsigned i;
BITSET_FOREACH_SET(i, tmp.chan_mask, NUM_BITS) {
*result_mask &= ~BITFIELD64_BIT(i / 8);
}
}
static void
scan_cf_list_defined_by_all_invocs(struct exec_list *cf_list,
struct writemasks *outer_block_writemasks,
struct writemasks *cond_block_writemasks,
uint64_t *result_mask, bool is_nested_cf)
{
foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
switch (cf_node->type) {
case nir_cf_node_block: {
nir_block *block = nir_cf_node_as_block(cf_node);
nir_foreach_instr(instr, block) {
case nir_cf_node_block:
nir_foreach_instr(instr, nir_cf_node_as_block(cf_node)) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (!is_tcs_output_barrier(intrin)) {
*upper_block_tl_writemask |= get_inst_tesslevel_writemask(intrin);
if (is_tcs_output_barrier(intrin)) {
/* This is a barrier. If it's in nested control flow, put this
* in the too hard basket. In GLSL this is not possible but it is
* in SPIR-V.
*/
if (is_nested_cf) {
*result_mask = 0;
return;
}
/* The following case must be prevented:
* gl_TessLevelInner = ...;
* barrier();
* if (gl_InvocationID == 1)
* gl_TessLevelInner = ...;
*
* If you consider disjoint code segments separated by barriers,
* each such segment that writes patch output channels should write
* the same channels in all codepaths within that segment.
*/
if (!BITSET_IS_EMPTY(outer_block_writemasks->chan_mask) ||
!BITSET_IS_EMPTY(cond_block_writemasks->chan_mask)) {
accum_result_defined_by_all_invocs(outer_block_writemasks,
cond_block_writemasks,
result_mask);
/* Analyze the next code segment from scratch. */
BITSET_ZERO(outer_block_writemasks->chan_mask);
BITSET_ZERO(cond_block_writemasks->chan_mask);
}
continue;
}
/* This is a barrier. If it's in nested control flow, put this
* in the too hard basket. In GLSL this is not possible but it is
* in SPIR-V.
*/
if (is_nested_cf) {
*all_invocs_define_tess_levels = false;
return;
}
if (intrin->intrinsic == nir_intrinsic_store_output) {
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
/* The following case must be prevented:
* gl_TessLevelInner = ...;
* barrier();
* if (gl_InvocationID == 1)
* gl_TessLevelInner = ...;
*
* If you consider disjoint code segments separated by barriers,
* each such segment that writes tess level channels should write
* the same channels in all codepaths within that segment.
*/
if (*upper_block_tl_writemask || *cond_block_tl_writemask) {
/* Accumulate the result: */
*all_invocs_define_tess_levels &=
!(*cond_block_tl_writemask & ~(*upper_block_tl_writemask));
if (sem.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
sem.location == VARYING_SLOT_TESS_LEVEL_INNER ||
(sem.location >= VARYING_SLOT_PATCH0 &&
sem.location <= VARYING_SLOT_PATCH31)) {
unsigned index = sem.location >= VARYING_SLOT_PATCH0 ?
sem.location - VARYING_SLOT_PATCH0 :
(32 + sem.location - VARYING_SLOT_TESS_LEVEL_OUTER);
unsigned writemask = nir_intrinsic_write_mask(intrin) <<
(nir_intrinsic_component(intrin) +
sem.high_16bits * 4);
/* Analyze the next code segment from scratch. */
*upper_block_tl_writemask = 0;
*cond_block_tl_writemask = 0;
u_foreach_bit(i, writemask) {
BITSET_SET(outer_block_writemasks->chan_mask, index * 8 + i);
}
}
}
}
break;
}
case nir_cf_node_if: {
unsigned then_tesslevel_writemask = 0;
unsigned else_tesslevel_writemask = 0;
struct writemasks then_writemasks = {0};
struct writemasks else_writemasks = {0};
nir_if *if_stmt = nir_cf_node_as_if(cf_node);
scan_tess_levels(&if_stmt->then_list, &then_tesslevel_writemask,
cond_block_tl_writemask,
all_invocs_define_tess_levels, true);
scan_cf_list_defined_by_all_invocs(&if_stmt->then_list, &then_writemasks,
cond_block_writemasks, result_mask,
true);
scan_tess_levels(&if_stmt->else_list, &else_tesslevel_writemask,
cond_block_tl_writemask,
all_invocs_define_tess_levels, true);
scan_cf_list_defined_by_all_invocs(&if_stmt->else_list, &else_writemasks,
cond_block_writemasks, result_mask,
true);
if (then_tesslevel_writemask || else_tesslevel_writemask) {
if (!BITSET_IS_EMPTY(then_writemasks.chan_mask) ||
!BITSET_IS_EMPTY(else_writemasks.chan_mask)) {
/* If both statements write the same tess level channels,
* we can say that the upper block writes them too.
* we can say that the outer block writes them too.
*/
*upper_block_tl_writemask |= then_tesslevel_writemask &
else_tesslevel_writemask;
*cond_block_tl_writemask |= then_tesslevel_writemask |
else_tesslevel_writemask;
struct writemasks tmp;
/* outer_block_writemasks |= then_writemasks & else_writemasks */
BITSET_AND(tmp.chan_mask,
then_writemasks.chan_mask, else_writemasks.chan_mask);
BITSET_OR(outer_block_writemasks->chan_mask,
outer_block_writemasks->chan_mask, tmp.chan_mask);
/* cond_block_writemasks |= then_writemasks | else_writemasks */
BITSET_OR(tmp.chan_mask,
then_writemasks.chan_mask, else_writemasks.chan_mask);
BITSET_OR(cond_block_writemasks->chan_mask,
cond_block_writemasks->chan_mask, tmp.chan_mask);
}
break;
}
@@ -119,9 +170,9 @@ scan_tess_levels(struct exec_list *cf_list, unsigned *upper_block_tl_writemask,
nir_loop *loop = nir_cf_node_as_loop(cf_node);
assert(!nir_loop_has_continue_construct(loop));
scan_tess_levels(&loop->body, cond_block_tl_writemask,
cond_block_tl_writemask,
all_invocs_define_tess_levels, true);
scan_cf_list_defined_by_all_invocs(&loop->body, cond_block_writemasks,
cond_block_writemasks, result_mask,
true);
break;
}
default:
@@ -130,40 +181,62 @@ scan_tess_levels(struct exec_list *cf_list, unsigned *upper_block_tl_writemask,
}
}
static bool
all_invocations_define_tess_levels(const struct nir_shader *nir)
static void
analyze_patch_outputs(const struct nir_shader *nir, nir_tcs_info *info)
{
assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
unsigned tess_levels_written =
(nir->info.outputs_written & VARYING_BIT_TESS_LEVEL_OUTER ? 0x1 : 0) |
(nir->info.outputs_written & VARYING_BIT_TESS_LEVEL_INNER ? 0x2 : 0);
/* Trivial case, nothing to do. */
if (nir->info.tess.tcs_vertices_out == 1) {
info->patch_outputs_defined_by_all_invoc = nir->info.patch_outputs_written;
info->all_invocations_define_tess_levels = true;
info->tess_levels_defined_by_all_invoc = tess_levels_written;
return;
}
/* The pass works as follows:
*
* If all codepaths write tess levels, we can say that all invocations
* define tess level values. Whether a tess level value is defined is
* If all codepaths write patch outputs, we can say that all invocations
* define patch output values. Whether a patch output value is defined is
* determined for each component separately.
*/
unsigned main_block_tl_writemask = 0; /* if main block writes tess levels */
unsigned cond_block_tl_writemask = 0; /* if cond block writes tess levels */
struct writemasks main_block_writemasks = {0}; /* if main block writes per-patch outputs */
struct writemasks cond_block_writemasks = {0}; /* if cond block writes per-patch outputs */
/* Initial value = true. Here the pass will accumulate results from
* multiple segments surrounded by barriers. If tess levels aren't
* multiple segments surrounded by barriers. If patch outputs aren't
* written at all, it's a shader bug and we don't care if this will be
* true.
*/
bool result = true;
uint64_t result_mask = BITFIELD64_MASK(NUM_OUTPUTS);
nir_foreach_function_impl(impl, nir) {
scan_tess_levels(&impl->body, &main_block_tl_writemask,
&cond_block_tl_writemask,
&result, false);
scan_cf_list_defined_by_all_invocs(&impl->body, &main_block_writemasks,
&cond_block_writemasks, &result_mask,
false);
}
/* Accumulate the result for the last code segment separated by a
* barrier.
*/
if (main_block_tl_writemask || cond_block_tl_writemask)
result &= !(cond_block_tl_writemask & ~main_block_tl_writemask);
if (!BITSET_IS_EMPTY(main_block_writemasks.chan_mask) ||
!BITSET_IS_EMPTY(cond_block_writemasks.chan_mask)) {
accum_result_defined_by_all_invocs(&main_block_writemasks,
&cond_block_writemasks, &result_mask);
}
return result;
/* Unwritten outputs are always set. Only channels that are set
* conditionally aren't set.
*/
info->patch_outputs_defined_by_all_invoc =
result_mask & nir->info.patch_outputs_written;
info->tess_levels_defined_by_all_invoc =
(result_mask >> 32) & tess_levels_written;
info->all_invocations_define_tess_levels =
info->tess_levels_defined_by_all_invoc == tess_levels_written;
}
/* It's OK to pass UNSPECIFIED to prim and spacing. */
@@ -173,8 +246,7 @@ nir_gather_tcs_info(const nir_shader *nir, nir_tcs_info *info,
enum gl_tess_spacing spacing)
{
memset(info, 0, sizeof(*info));
info->all_invocations_define_tess_levels =
all_invocations_define_tess_levels(nir);
analyze_patch_outputs(nir, info);
unsigned tess_level_writes_le_zero = 0;
unsigned tess_level_writes_le_one = 0;
+20 -11
View File
@@ -36,36 +36,45 @@ extern "C" {
#endif
typedef struct nir_tcs_info {
/* Whether all invocations write tess level outputs.
*
* This is useful when a pass wants to read tess level values at the end
* of the shader. If this is true, the pass doesn't have to insert a barrier
* and use output loads, it can just use the SSA defs that are being stored
* (or phis thereof) to get the tess level output values.
/* The bitmask of patch outputs that are always written by all invocations
* in all execution paths.
*
* This is useful when a pass wants to read patch output values at the end
* of the shader. If this is true, the pass doesn't have to insert a barrier
* and use output loads, it can just use the SSA defs that are being stored
* (or phis thereof) to get the patch output values.
*/
uint32_t patch_outputs_defined_by_all_invoc;
/* The bitmask of tess level outputs that are written by all invocations.
* Bit 0 is outer levels, bit 1 is inner levels.
*/
bool all_invocations_define_tess_levels;
uint8_t tess_levels_defined_by_all_invoc : 2;
/* Whether all tess levels that are written in all invocations. */
bool all_invocations_define_tess_levels : 1;
/* Whether any of the outer tess level components is effectively 0, meaning
* that the shader discards the patch. NaNs and negative values are included
* in this. If the patch is discarded, inner tess levels have no effect.
*/
bool all_tess_levels_are_effectively_zero;
bool all_tess_levels_are_effectively_zero : 1;
/* Whether all tess levels are effectively 1, meaning that the tessellator
* behaves as if they were 1. There is a range of values that lead to that
* behavior depending on the tessellation spacing.
*/
bool all_tess_levels_are_effectively_one;
bool all_tess_levels_are_effectively_one : 1;
/* Whether the shader uses a barrier synchronizing TCS output stores.
* For example, passes that write an output at the beginning of the shader
* and load it at the end can use this to determine whether they have to
* insert a barrier or whether the shader already contains a barrier.
*/
bool always_executes_barrier;
bool always_executes_barrier : 1;
/* Whether outer tess levels <= 0 are written anywhere in the shader. */
bool discards_patches;
bool discards_patches : 1;
} nir_tcs_info;
void