diff --git a/src/compiler/nir/nir_gather_tcs_info.c b/src/compiler/nir/nir_gather_tcs_info.c index b23a3aa859d..543bf7e96ca 100644 --- a/src/compiler/nir/nir_gather_tcs_info.c +++ b/src/compiler/nir/nir_gather_tcs_info.c @@ -17,20 +17,6 @@ get_tess_level_component(nir_intrinsic_instr *intr) nir_intrinsic_component(intr); } -static unsigned -get_inst_tesslevel_writemask(nir_intrinsic_instr *intr) -{ - if (intr->intrinsic != nir_intrinsic_store_output) - return 0; - - unsigned location = nir_intrinsic_io_semantics(intr).location; - if (location != VARYING_SLOT_TESS_LEVEL_OUTER && - location != VARYING_SLOT_TESS_LEVEL_INNER) - return 0; - - return nir_intrinsic_write_mask(intr) << get_tess_level_component(intr); -} - static bool is_tcs_output_barrier(nir_intrinsic_instr *intr) { @@ -40,78 +26,143 @@ is_tcs_output_barrier(nir_intrinsic_instr *intr) nir_intrinsic_execution_scope(intr) >= SCOPE_WORKGROUP; } +/* 32 patch outputs + 2 tess level outputs with 8 channels per output. + * The last 4 channels are for high 16 bits of the first 4 channels. + */ +#define NUM_OUTPUTS 34 +#define NUM_BITS (NUM_OUTPUTS * 8) + +struct writemasks { + BITSET_DECLARE(chan_mask, NUM_BITS); +}; + static void -scan_tess_levels(struct exec_list *cf_list, unsigned *upper_block_tl_writemask, - unsigned *cond_block_tl_writemask, - bool *all_invocs_define_tess_levels, bool is_nested_cf) +accum_result_defined_by_all_invocs(struct writemasks *outer_block_writemasks, + struct writemasks *cond_block_writemasks, + uint64_t *result_mask) +{ + struct writemasks tmp; + + /* tmp contains those channels that are only written conditionally. + * Such channels can't be proven to be written by all invocations. + * + * tmp = cond_block_writemasks & ~outer_block_writemasks + */ + BITSET_COPY(tmp.chan_mask, outer_block_writemasks->chan_mask); + BITSET_NOT(tmp.chan_mask); + BITSET_AND(tmp.chan_mask, + cond_block_writemasks->chan_mask, tmp.chan_mask); + + /* Mark outputs as not written by all invocations if they are written + * conditionally. + */ + unsigned i; + BITSET_FOREACH_SET(i, tmp.chan_mask, NUM_BITS) { + *result_mask &= ~BITFIELD64_BIT(i / 8); + } +} + +static void +scan_cf_list_defined_by_all_invocs(struct exec_list *cf_list, + struct writemasks *outer_block_writemasks, + struct writemasks *cond_block_writemasks, + uint64_t *result_mask, bool is_nested_cf) { foreach_list_typed(nir_cf_node, cf_node, node, cf_list) { switch (cf_node->type) { - case nir_cf_node_block: { - nir_block *block = nir_cf_node_as_block(cf_node); - nir_foreach_instr(instr, block) { + case nir_cf_node_block: + nir_foreach_instr(instr, nir_cf_node_as_block(cf_node)) { if (instr->type != nir_instr_type_intrinsic) continue; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (!is_tcs_output_barrier(intrin)) { - *upper_block_tl_writemask |= get_inst_tesslevel_writemask(intrin); + if (is_tcs_output_barrier(intrin)) { + /* This is a barrier. If it's in nested control flow, put this + * in the too hard basket. In GLSL this is not possible but it is + * in SPIR-V. + */ + if (is_nested_cf) { + *result_mask = 0; + return; + } + + /* The following case must be prevented: + * gl_TessLevelInner = ...; + * barrier(); + * if (gl_InvocationID == 1) + * gl_TessLevelInner = ...; + * + * If you consider disjoint code segments separated by barriers, + * each such segment that writes patch output channels should write + * the same channels in all codepaths within that segment. + */ + if (!BITSET_IS_EMPTY(outer_block_writemasks->chan_mask) || + !BITSET_IS_EMPTY(cond_block_writemasks->chan_mask)) { + accum_result_defined_by_all_invocs(outer_block_writemasks, + cond_block_writemasks, + result_mask); + + /* Analyze the next code segment from scratch. */ + BITSET_ZERO(outer_block_writemasks->chan_mask); + BITSET_ZERO(cond_block_writemasks->chan_mask); + } continue; } - /* This is a barrier. If it's in nested control flow, put this - * in the too hard basket. In GLSL this is not possible but it is - * in SPIR-V. - */ - if (is_nested_cf) { - *all_invocs_define_tess_levels = false; - return; - } + if (intrin->intrinsic == nir_intrinsic_store_output) { + nir_io_semantics sem = nir_intrinsic_io_semantics(intrin); - /* The following case must be prevented: - * gl_TessLevelInner = ...; - * barrier(); - * if (gl_InvocationID == 1) - * gl_TessLevelInner = ...; - * - * If you consider disjoint code segments separated by barriers, - * each such segment that writes tess level channels should write - * the same channels in all codepaths within that segment. - */ - if (*upper_block_tl_writemask || *cond_block_tl_writemask) { - /* Accumulate the result: */ - *all_invocs_define_tess_levels &= - !(*cond_block_tl_writemask & ~(*upper_block_tl_writemask)); + if (sem.location == VARYING_SLOT_TESS_LEVEL_OUTER || + sem.location == VARYING_SLOT_TESS_LEVEL_INNER || + (sem.location >= VARYING_SLOT_PATCH0 && + sem.location <= VARYING_SLOT_PATCH31)) { + unsigned index = sem.location >= VARYING_SLOT_PATCH0 ? + sem.location - VARYING_SLOT_PATCH0 : + (32 + sem.location - VARYING_SLOT_TESS_LEVEL_OUTER); + unsigned writemask = nir_intrinsic_write_mask(intrin) << + (nir_intrinsic_component(intrin) + + sem.high_16bits * 4); - /* Analyze the next code segment from scratch. */ - *upper_block_tl_writemask = 0; - *cond_block_tl_writemask = 0; + u_foreach_bit(i, writemask) { + BITSET_SET(outer_block_writemasks->chan_mask, index * 8 + i); + } + } } } break; - } + case nir_cf_node_if: { - unsigned then_tesslevel_writemask = 0; - unsigned else_tesslevel_writemask = 0; + struct writemasks then_writemasks = {0}; + struct writemasks else_writemasks = {0}; nir_if *if_stmt = nir_cf_node_as_if(cf_node); - scan_tess_levels(&if_stmt->then_list, &then_tesslevel_writemask, - cond_block_tl_writemask, - all_invocs_define_tess_levels, true); + scan_cf_list_defined_by_all_invocs(&if_stmt->then_list, &then_writemasks, + cond_block_writemasks, result_mask, + true); - scan_tess_levels(&if_stmt->else_list, &else_tesslevel_writemask, - cond_block_tl_writemask, - all_invocs_define_tess_levels, true); + scan_cf_list_defined_by_all_invocs(&if_stmt->else_list, &else_writemasks, + cond_block_writemasks, result_mask, + true); - if (then_tesslevel_writemask || else_tesslevel_writemask) { + if (!BITSET_IS_EMPTY(then_writemasks.chan_mask) || + !BITSET_IS_EMPTY(else_writemasks.chan_mask)) { /* If both statements write the same tess level channels, - * we can say that the upper block writes them too. + * we can say that the outer block writes them too. */ - *upper_block_tl_writemask |= then_tesslevel_writemask & - else_tesslevel_writemask; - *cond_block_tl_writemask |= then_tesslevel_writemask | - else_tesslevel_writemask; + struct writemasks tmp; + + /* outer_block_writemasks |= then_writemasks & else_writemasks */ + BITSET_AND(tmp.chan_mask, + then_writemasks.chan_mask, else_writemasks.chan_mask); + BITSET_OR(outer_block_writemasks->chan_mask, + outer_block_writemasks->chan_mask, tmp.chan_mask); + + /* cond_block_writemasks |= then_writemasks | else_writemasks */ + BITSET_OR(tmp.chan_mask, + then_writemasks.chan_mask, else_writemasks.chan_mask); + BITSET_OR(cond_block_writemasks->chan_mask, + cond_block_writemasks->chan_mask, tmp.chan_mask); } break; } @@ -119,9 +170,9 @@ scan_tess_levels(struct exec_list *cf_list, unsigned *upper_block_tl_writemask, nir_loop *loop = nir_cf_node_as_loop(cf_node); assert(!nir_loop_has_continue_construct(loop)); - scan_tess_levels(&loop->body, cond_block_tl_writemask, - cond_block_tl_writemask, - all_invocs_define_tess_levels, true); + scan_cf_list_defined_by_all_invocs(&loop->body, cond_block_writemasks, + cond_block_writemasks, result_mask, + true); break; } default: @@ -130,40 +181,62 @@ scan_tess_levels(struct exec_list *cf_list, unsigned *upper_block_tl_writemask, } } -static bool -all_invocations_define_tess_levels(const struct nir_shader *nir) +static void +analyze_patch_outputs(const struct nir_shader *nir, nir_tcs_info *info) { assert(nir->info.stage == MESA_SHADER_TESS_CTRL); + unsigned tess_levels_written = + (nir->info.outputs_written & VARYING_BIT_TESS_LEVEL_OUTER ? 0x1 : 0) | + (nir->info.outputs_written & VARYING_BIT_TESS_LEVEL_INNER ? 0x2 : 0); + + /* Trivial case, nothing to do. */ + if (nir->info.tess.tcs_vertices_out == 1) { + info->patch_outputs_defined_by_all_invoc = nir->info.patch_outputs_written; + info->all_invocations_define_tess_levels = true; + info->tess_levels_defined_by_all_invoc = tess_levels_written; + return; + } /* The pass works as follows: * - * If all codepaths write tess levels, we can say that all invocations - * define tess level values. Whether a tess level value is defined is + * If all codepaths write patch outputs, we can say that all invocations + * define patch output values. Whether a patch output value is defined is * determined for each component separately. */ - unsigned main_block_tl_writemask = 0; /* if main block writes tess levels */ - unsigned cond_block_tl_writemask = 0; /* if cond block writes tess levels */ + struct writemasks main_block_writemasks = {0}; /* if main block writes per-patch outputs */ + struct writemasks cond_block_writemasks = {0}; /* if cond block writes per-patch outputs */ /* Initial value = true. Here the pass will accumulate results from - * multiple segments surrounded by barriers. If tess levels aren't + * multiple segments surrounded by barriers. If patch outputs aren't * written at all, it's a shader bug and we don't care if this will be * true. */ - bool result = true; + uint64_t result_mask = BITFIELD64_MASK(NUM_OUTPUTS); nir_foreach_function_impl(impl, nir) { - scan_tess_levels(&impl->body, &main_block_tl_writemask, - &cond_block_tl_writemask, - &result, false); + scan_cf_list_defined_by_all_invocs(&impl->body, &main_block_writemasks, + &cond_block_writemasks, &result_mask, + false); } /* Accumulate the result for the last code segment separated by a * barrier. */ - if (main_block_tl_writemask || cond_block_tl_writemask) - result &= !(cond_block_tl_writemask & ~main_block_tl_writemask); + if (!BITSET_IS_EMPTY(main_block_writemasks.chan_mask) || + !BITSET_IS_EMPTY(cond_block_writemasks.chan_mask)) { + accum_result_defined_by_all_invocs(&main_block_writemasks, + &cond_block_writemasks, &result_mask); + } - return result; + /* Unwritten outputs are always set. Only channels that are set + * conditionally aren't set. + */ + info->patch_outputs_defined_by_all_invoc = + result_mask & nir->info.patch_outputs_written; + info->tess_levels_defined_by_all_invoc = + (result_mask >> 32) & tess_levels_written; + info->all_invocations_define_tess_levels = + info->tess_levels_defined_by_all_invoc == tess_levels_written; } /* It's OK to pass UNSPECIFIED to prim and spacing. */ @@ -173,8 +246,7 @@ nir_gather_tcs_info(const nir_shader *nir, nir_tcs_info *info, enum gl_tess_spacing spacing) { memset(info, 0, sizeof(*info)); - info->all_invocations_define_tess_levels = - all_invocations_define_tess_levels(nir); + analyze_patch_outputs(nir, info); unsigned tess_level_writes_le_zero = 0; unsigned tess_level_writes_le_one = 0; diff --git a/src/compiler/nir/nir_tcs_info.h b/src/compiler/nir/nir_tcs_info.h index 1056c127d58..45eed646066 100644 --- a/src/compiler/nir/nir_tcs_info.h +++ b/src/compiler/nir/nir_tcs_info.h @@ -36,36 +36,45 @@ extern "C" { #endif typedef struct nir_tcs_info { - /* Whether all invocations write tess level outputs. - * - * This is useful when a pass wants to read tess level values at the end - * of the shader. If this is true, the pass doesn't have to insert a barrier - * and use output loads, it can just use the SSA defs that are being stored - * (or phis thereof) to get the tess level output values. + /* The bitmask of patch outputs that are always written by all invocations + * in all execution paths. + * + * This is useful when a pass wants to read patch output values at the end + * of the shader. If this is true, the pass doesn't have to insert a barrier + * and use output loads, it can just use the SSA defs that are being stored + * (or phis thereof) to get the patch output values. + */ + uint32_t patch_outputs_defined_by_all_invoc; + + /* The bitmask of tess level outputs that are written by all invocations. + * Bit 0 is outer levels, bit 1 is inner levels. */ - bool all_invocations_define_tess_levels; + uint8_t tess_levels_defined_by_all_invoc : 2; + + /* Whether all tess levels that are written in all invocations. */ + bool all_invocations_define_tess_levels : 1; /* Whether any of the outer tess level components is effectively 0, meaning * that the shader discards the patch. NaNs and negative values are included * in this. If the patch is discarded, inner tess levels have no effect. */ - bool all_tess_levels_are_effectively_zero; + bool all_tess_levels_are_effectively_zero : 1; /* Whether all tess levels are effectively 1, meaning that the tessellator * behaves as if they were 1. There is a range of values that lead to that * behavior depending on the tessellation spacing. */ - bool all_tess_levels_are_effectively_one; + bool all_tess_levels_are_effectively_one : 1; /* Whether the shader uses a barrier synchronizing TCS output stores. * For example, passes that write an output at the beginning of the shader * and load it at the end can use this to determine whether they have to * insert a barrier or whether the shader already contains a barrier. */ - bool always_executes_barrier; + bool always_executes_barrier : 1; /* Whether outer tess levels <= 0 are written anywhere in the shader. */ - bool discards_patches; + bool discards_patches : 1; } nir_tcs_info; void