nir/tcs_info: gather for all patch outputs whether they're written by all invocs
This substantially rewrites the pass. It also makes it easier to read. Tested thoroughly by a shader test. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35195>
This commit is contained in:
@@ -17,20 +17,6 @@ get_tess_level_component(nir_intrinsic_instr *intr)
|
||||
nir_intrinsic_component(intr);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_inst_tesslevel_writemask(nir_intrinsic_instr *intr)
|
||||
{
|
||||
if (intr->intrinsic != nir_intrinsic_store_output)
|
||||
return 0;
|
||||
|
||||
unsigned location = nir_intrinsic_io_semantics(intr).location;
|
||||
if (location != VARYING_SLOT_TESS_LEVEL_OUTER &&
|
||||
location != VARYING_SLOT_TESS_LEVEL_INNER)
|
||||
return 0;
|
||||
|
||||
return nir_intrinsic_write_mask(intr) << get_tess_level_component(intr);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_tcs_output_barrier(nir_intrinsic_instr *intr)
|
||||
{
|
||||
@@ -40,78 +26,143 @@ is_tcs_output_barrier(nir_intrinsic_instr *intr)
|
||||
nir_intrinsic_execution_scope(intr) >= SCOPE_WORKGROUP;
|
||||
}
|
||||
|
||||
/* 32 patch outputs + 2 tess level outputs with 8 channels per output.
|
||||
* The last 4 channels are for high 16 bits of the first 4 channels.
|
||||
*/
|
||||
#define NUM_OUTPUTS 34
|
||||
#define NUM_BITS (NUM_OUTPUTS * 8)
|
||||
|
||||
struct writemasks {
|
||||
BITSET_DECLARE(chan_mask, NUM_BITS);
|
||||
};
|
||||
|
||||
static void
|
||||
scan_tess_levels(struct exec_list *cf_list, unsigned *upper_block_tl_writemask,
|
||||
unsigned *cond_block_tl_writemask,
|
||||
bool *all_invocs_define_tess_levels, bool is_nested_cf)
|
||||
accum_result_defined_by_all_invocs(struct writemasks *outer_block_writemasks,
|
||||
struct writemasks *cond_block_writemasks,
|
||||
uint64_t *result_mask)
|
||||
{
|
||||
struct writemasks tmp;
|
||||
|
||||
/* tmp contains those channels that are only written conditionally.
|
||||
* Such channels can't be proven to be written by all invocations.
|
||||
*
|
||||
* tmp = cond_block_writemasks & ~outer_block_writemasks
|
||||
*/
|
||||
BITSET_COPY(tmp.chan_mask, outer_block_writemasks->chan_mask);
|
||||
BITSET_NOT(tmp.chan_mask);
|
||||
BITSET_AND(tmp.chan_mask,
|
||||
cond_block_writemasks->chan_mask, tmp.chan_mask);
|
||||
|
||||
/* Mark outputs as not written by all invocations if they are written
|
||||
* conditionally.
|
||||
*/
|
||||
unsigned i;
|
||||
BITSET_FOREACH_SET(i, tmp.chan_mask, NUM_BITS) {
|
||||
*result_mask &= ~BITFIELD64_BIT(i / 8);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
scan_cf_list_defined_by_all_invocs(struct exec_list *cf_list,
|
||||
struct writemasks *outer_block_writemasks,
|
||||
struct writemasks *cond_block_writemasks,
|
||||
uint64_t *result_mask, bool is_nested_cf)
|
||||
{
|
||||
foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
|
||||
switch (cf_node->type) {
|
||||
case nir_cf_node_block: {
|
||||
nir_block *block = nir_cf_node_as_block(cf_node);
|
||||
nir_foreach_instr(instr, block) {
|
||||
case nir_cf_node_block:
|
||||
nir_foreach_instr(instr, nir_cf_node_as_block(cf_node)) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (!is_tcs_output_barrier(intrin)) {
|
||||
*upper_block_tl_writemask |= get_inst_tesslevel_writemask(intrin);
|
||||
if (is_tcs_output_barrier(intrin)) {
|
||||
/* This is a barrier. If it's in nested control flow, put this
|
||||
* in the too hard basket. In GLSL this is not possible but it is
|
||||
* in SPIR-V.
|
||||
*/
|
||||
if (is_nested_cf) {
|
||||
*result_mask = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* The following case must be prevented:
|
||||
* gl_TessLevelInner = ...;
|
||||
* barrier();
|
||||
* if (gl_InvocationID == 1)
|
||||
* gl_TessLevelInner = ...;
|
||||
*
|
||||
* If you consider disjoint code segments separated by barriers,
|
||||
* each such segment that writes patch output channels should write
|
||||
* the same channels in all codepaths within that segment.
|
||||
*/
|
||||
if (!BITSET_IS_EMPTY(outer_block_writemasks->chan_mask) ||
|
||||
!BITSET_IS_EMPTY(cond_block_writemasks->chan_mask)) {
|
||||
accum_result_defined_by_all_invocs(outer_block_writemasks,
|
||||
cond_block_writemasks,
|
||||
result_mask);
|
||||
|
||||
/* Analyze the next code segment from scratch. */
|
||||
BITSET_ZERO(outer_block_writemasks->chan_mask);
|
||||
BITSET_ZERO(cond_block_writemasks->chan_mask);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* This is a barrier. If it's in nested control flow, put this
|
||||
* in the too hard basket. In GLSL this is not possible but it is
|
||||
* in SPIR-V.
|
||||
*/
|
||||
if (is_nested_cf) {
|
||||
*all_invocs_define_tess_levels = false;
|
||||
return;
|
||||
}
|
||||
if (intrin->intrinsic == nir_intrinsic_store_output) {
|
||||
nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
|
||||
|
||||
/* The following case must be prevented:
|
||||
* gl_TessLevelInner = ...;
|
||||
* barrier();
|
||||
* if (gl_InvocationID == 1)
|
||||
* gl_TessLevelInner = ...;
|
||||
*
|
||||
* If you consider disjoint code segments separated by barriers,
|
||||
* each such segment that writes tess level channels should write
|
||||
* the same channels in all codepaths within that segment.
|
||||
*/
|
||||
if (*upper_block_tl_writemask || *cond_block_tl_writemask) {
|
||||
/* Accumulate the result: */
|
||||
*all_invocs_define_tess_levels &=
|
||||
!(*cond_block_tl_writemask & ~(*upper_block_tl_writemask));
|
||||
if (sem.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
|
||||
sem.location == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||
(sem.location >= VARYING_SLOT_PATCH0 &&
|
||||
sem.location <= VARYING_SLOT_PATCH31)) {
|
||||
unsigned index = sem.location >= VARYING_SLOT_PATCH0 ?
|
||||
sem.location - VARYING_SLOT_PATCH0 :
|
||||
(32 + sem.location - VARYING_SLOT_TESS_LEVEL_OUTER);
|
||||
unsigned writemask = nir_intrinsic_write_mask(intrin) <<
|
||||
(nir_intrinsic_component(intrin) +
|
||||
sem.high_16bits * 4);
|
||||
|
||||
/* Analyze the next code segment from scratch. */
|
||||
*upper_block_tl_writemask = 0;
|
||||
*cond_block_tl_writemask = 0;
|
||||
u_foreach_bit(i, writemask) {
|
||||
BITSET_SET(outer_block_writemasks->chan_mask, index * 8 + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_cf_node_if: {
|
||||
unsigned then_tesslevel_writemask = 0;
|
||||
unsigned else_tesslevel_writemask = 0;
|
||||
struct writemasks then_writemasks = {0};
|
||||
struct writemasks else_writemasks = {0};
|
||||
nir_if *if_stmt = nir_cf_node_as_if(cf_node);
|
||||
|
||||
scan_tess_levels(&if_stmt->then_list, &then_tesslevel_writemask,
|
||||
cond_block_tl_writemask,
|
||||
all_invocs_define_tess_levels, true);
|
||||
scan_cf_list_defined_by_all_invocs(&if_stmt->then_list, &then_writemasks,
|
||||
cond_block_writemasks, result_mask,
|
||||
true);
|
||||
|
||||
scan_tess_levels(&if_stmt->else_list, &else_tesslevel_writemask,
|
||||
cond_block_tl_writemask,
|
||||
all_invocs_define_tess_levels, true);
|
||||
scan_cf_list_defined_by_all_invocs(&if_stmt->else_list, &else_writemasks,
|
||||
cond_block_writemasks, result_mask,
|
||||
true);
|
||||
|
||||
if (then_tesslevel_writemask || else_tesslevel_writemask) {
|
||||
if (!BITSET_IS_EMPTY(then_writemasks.chan_mask) ||
|
||||
!BITSET_IS_EMPTY(else_writemasks.chan_mask)) {
|
||||
/* If both statements write the same tess level channels,
|
||||
* we can say that the upper block writes them too.
|
||||
* we can say that the outer block writes them too.
|
||||
*/
|
||||
*upper_block_tl_writemask |= then_tesslevel_writemask &
|
||||
else_tesslevel_writemask;
|
||||
*cond_block_tl_writemask |= then_tesslevel_writemask |
|
||||
else_tesslevel_writemask;
|
||||
struct writemasks tmp;
|
||||
|
||||
/* outer_block_writemasks |= then_writemasks & else_writemasks */
|
||||
BITSET_AND(tmp.chan_mask,
|
||||
then_writemasks.chan_mask, else_writemasks.chan_mask);
|
||||
BITSET_OR(outer_block_writemasks->chan_mask,
|
||||
outer_block_writemasks->chan_mask, tmp.chan_mask);
|
||||
|
||||
/* cond_block_writemasks |= then_writemasks | else_writemasks */
|
||||
BITSET_OR(tmp.chan_mask,
|
||||
then_writemasks.chan_mask, else_writemasks.chan_mask);
|
||||
BITSET_OR(cond_block_writemasks->chan_mask,
|
||||
cond_block_writemasks->chan_mask, tmp.chan_mask);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -119,9 +170,9 @@ scan_tess_levels(struct exec_list *cf_list, unsigned *upper_block_tl_writemask,
|
||||
nir_loop *loop = nir_cf_node_as_loop(cf_node);
|
||||
assert(!nir_loop_has_continue_construct(loop));
|
||||
|
||||
scan_tess_levels(&loop->body, cond_block_tl_writemask,
|
||||
cond_block_tl_writemask,
|
||||
all_invocs_define_tess_levels, true);
|
||||
scan_cf_list_defined_by_all_invocs(&loop->body, cond_block_writemasks,
|
||||
cond_block_writemasks, result_mask,
|
||||
true);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -130,40 +181,62 @@ scan_tess_levels(struct exec_list *cf_list, unsigned *upper_block_tl_writemask,
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
all_invocations_define_tess_levels(const struct nir_shader *nir)
|
||||
static void
|
||||
analyze_patch_outputs(const struct nir_shader *nir, nir_tcs_info *info)
|
||||
{
|
||||
assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
|
||||
unsigned tess_levels_written =
|
||||
(nir->info.outputs_written & VARYING_BIT_TESS_LEVEL_OUTER ? 0x1 : 0) |
|
||||
(nir->info.outputs_written & VARYING_BIT_TESS_LEVEL_INNER ? 0x2 : 0);
|
||||
|
||||
/* Trivial case, nothing to do. */
|
||||
if (nir->info.tess.tcs_vertices_out == 1) {
|
||||
info->patch_outputs_defined_by_all_invoc = nir->info.patch_outputs_written;
|
||||
info->all_invocations_define_tess_levels = true;
|
||||
info->tess_levels_defined_by_all_invoc = tess_levels_written;
|
||||
return;
|
||||
}
|
||||
|
||||
/* The pass works as follows:
|
||||
*
|
||||
* If all codepaths write tess levels, we can say that all invocations
|
||||
* define tess level values. Whether a tess level value is defined is
|
||||
* If all codepaths write patch outputs, we can say that all invocations
|
||||
* define patch output values. Whether a patch output value is defined is
|
||||
* determined for each component separately.
|
||||
*/
|
||||
unsigned main_block_tl_writemask = 0; /* if main block writes tess levels */
|
||||
unsigned cond_block_tl_writemask = 0; /* if cond block writes tess levels */
|
||||
struct writemasks main_block_writemasks = {0}; /* if main block writes per-patch outputs */
|
||||
struct writemasks cond_block_writemasks = {0}; /* if cond block writes per-patch outputs */
|
||||
|
||||
/* Initial value = true. Here the pass will accumulate results from
|
||||
* multiple segments surrounded by barriers. If tess levels aren't
|
||||
* multiple segments surrounded by barriers. If patch outputs aren't
|
||||
* written at all, it's a shader bug and we don't care if this will be
|
||||
* true.
|
||||
*/
|
||||
bool result = true;
|
||||
uint64_t result_mask = BITFIELD64_MASK(NUM_OUTPUTS);
|
||||
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
scan_tess_levels(&impl->body, &main_block_tl_writemask,
|
||||
&cond_block_tl_writemask,
|
||||
&result, false);
|
||||
scan_cf_list_defined_by_all_invocs(&impl->body, &main_block_writemasks,
|
||||
&cond_block_writemasks, &result_mask,
|
||||
false);
|
||||
}
|
||||
|
||||
/* Accumulate the result for the last code segment separated by a
|
||||
* barrier.
|
||||
*/
|
||||
if (main_block_tl_writemask || cond_block_tl_writemask)
|
||||
result &= !(cond_block_tl_writemask & ~main_block_tl_writemask);
|
||||
if (!BITSET_IS_EMPTY(main_block_writemasks.chan_mask) ||
|
||||
!BITSET_IS_EMPTY(cond_block_writemasks.chan_mask)) {
|
||||
accum_result_defined_by_all_invocs(&main_block_writemasks,
|
||||
&cond_block_writemasks, &result_mask);
|
||||
}
|
||||
|
||||
return result;
|
||||
/* Unwritten outputs are always set. Only channels that are set
|
||||
* conditionally aren't set.
|
||||
*/
|
||||
info->patch_outputs_defined_by_all_invoc =
|
||||
result_mask & nir->info.patch_outputs_written;
|
||||
info->tess_levels_defined_by_all_invoc =
|
||||
(result_mask >> 32) & tess_levels_written;
|
||||
info->all_invocations_define_tess_levels =
|
||||
info->tess_levels_defined_by_all_invoc == tess_levels_written;
|
||||
}
|
||||
|
||||
/* It's OK to pass UNSPECIFIED to prim and spacing. */
|
||||
@@ -173,8 +246,7 @@ nir_gather_tcs_info(const nir_shader *nir, nir_tcs_info *info,
|
||||
enum gl_tess_spacing spacing)
|
||||
{
|
||||
memset(info, 0, sizeof(*info));
|
||||
info->all_invocations_define_tess_levels =
|
||||
all_invocations_define_tess_levels(nir);
|
||||
analyze_patch_outputs(nir, info);
|
||||
|
||||
unsigned tess_level_writes_le_zero = 0;
|
||||
unsigned tess_level_writes_le_one = 0;
|
||||
|
||||
@@ -36,36 +36,45 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct nir_tcs_info {
|
||||
/* Whether all invocations write tess level outputs.
|
||||
*
|
||||
* This is useful when a pass wants to read tess level values at the end
|
||||
* of the shader. If this is true, the pass doesn't have to insert a barrier
|
||||
* and use output loads, it can just use the SSA defs that are being stored
|
||||
* (or phis thereof) to get the tess level output values.
|
||||
/* The bitmask of patch outputs that are always written by all invocations
|
||||
* in all execution paths.
|
||||
*
|
||||
* This is useful when a pass wants to read patch output values at the end
|
||||
* of the shader. If this is true, the pass doesn't have to insert a barrier
|
||||
* and use output loads, it can just use the SSA defs that are being stored
|
||||
* (or phis thereof) to get the patch output values.
|
||||
*/
|
||||
uint32_t patch_outputs_defined_by_all_invoc;
|
||||
|
||||
/* The bitmask of tess level outputs that are written by all invocations.
|
||||
* Bit 0 is outer levels, bit 1 is inner levels.
|
||||
*/
|
||||
bool all_invocations_define_tess_levels;
|
||||
uint8_t tess_levels_defined_by_all_invoc : 2;
|
||||
|
||||
/* Whether all tess levels that are written in all invocations. */
|
||||
bool all_invocations_define_tess_levels : 1;
|
||||
|
||||
/* Whether any of the outer tess level components is effectively 0, meaning
|
||||
* that the shader discards the patch. NaNs and negative values are included
|
||||
* in this. If the patch is discarded, inner tess levels have no effect.
|
||||
*/
|
||||
bool all_tess_levels_are_effectively_zero;
|
||||
bool all_tess_levels_are_effectively_zero : 1;
|
||||
|
||||
/* Whether all tess levels are effectively 1, meaning that the tessellator
|
||||
* behaves as if they were 1. There is a range of values that lead to that
|
||||
* behavior depending on the tessellation spacing.
|
||||
*/
|
||||
bool all_tess_levels_are_effectively_one;
|
||||
bool all_tess_levels_are_effectively_one : 1;
|
||||
|
||||
/* Whether the shader uses a barrier synchronizing TCS output stores.
|
||||
* For example, passes that write an output at the beginning of the shader
|
||||
* and load it at the end can use this to determine whether they have to
|
||||
* insert a barrier or whether the shader already contains a barrier.
|
||||
*/
|
||||
bool always_executes_barrier;
|
||||
bool always_executes_barrier : 1;
|
||||
|
||||
/* Whether outer tess levels <= 0 are written anywhere in the shader. */
|
||||
bool discards_patches;
|
||||
bool discards_patches : 1;
|
||||
} nir_tcs_info;
|
||||
|
||||
void
|
||||
|
||||
Reference in New Issue
Block a user