Files
mesa/src/compiler/nir/nir_opt_constant_folding.c
T
Antonio Ospite ddf2aa3a4d build: avoid redefining unreachable() which is standard in C23
In the C23 standard unreachable() is now a predefined function-like
macro in <stddef.h>

See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in

And this causes build errors when building for C23:

-----------------------------------------------------------------------
In file included from ../src/util/log.h:30,
                 from ../src/util/log.c:30:
../src/util/macros.h:123:9: warning: "unreachable" redefined
  123 | #define unreachable(str)    \
      |         ^~~~~~~~~~~
In file included from ../src/util/macros.h:31:
/usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition
  456 | #define unreachable() (__builtin_unreachable ())
      |         ^~~~~~~~~~~
-----------------------------------------------------------------------

So don't redefine it with the same name, but use the name UNREACHABLE()
to also signify it's a macro.

Using a different name also makes sense because the behavior of the
macro was extending the one of __builtin_unreachable() anyway, and it
also had a different signature, accepting one argument, compared to the
standard unreachable() with no arguments.

This change improves the chances of building mesa with the C23 standard,
which for instance is the default in recent AOSP versions.

All the instances of the macro, including the definition, were updated
with the following command line:

  git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \
  while read file; \
  do \
    sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \
  done && \
  sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-31 17:49:42 +00:00

452 lines
14 KiB
C

/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <math.h>
#include "nir.h"
#include "nir_builder.h"
#include "nir_constant_expressions.h"
#include "nir_deref.h"
/*
* Implements SSA-based constant folding.
*/
struct constant_fold_state {
bool has_load_constant;
bool has_indirect_load_const;
};
static bool
try_fold_alu(nir_builder *b, nir_alu_instr *alu)
{
nir_const_value src[NIR_ALU_MAX_INPUTS][NIR_MAX_VEC_COMPONENTS];
/* In the case that any outputs/inputs have unsized types, then we need to
* guess the bit-size. In this case, the validator ensures that all
* bit-sizes match so we can just take the bit-size from first
* output/input with an unsized type. If all the outputs/inputs are sized
* then we don't need to guess the bit-size at all because the code we
* generate for constant opcodes in this case already knows the sizes of
* the types involved and does not need the provided bit-size for anything
* (although it still requires to receive a valid bit-size).
*/
unsigned bit_size = 0;
if (!nir_alu_type_get_type_size(nir_op_infos[alu->op].output_type))
bit_size = alu->def.bit_size;
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
if (bit_size == 0 &&
!nir_alu_type_get_type_size(nir_op_infos[alu->op].input_types[i]))
bit_size = alu->src[i].src.ssa->bit_size;
nir_instr *src_instr = alu->src[i].src.ssa->parent_instr;
if (src_instr->type != nir_instr_type_load_const)
return false;
nir_load_const_instr *load_const = nir_instr_as_load_const(src_instr);
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(alu, i);
j++) {
src[i][j] = load_const->value[alu->src[i].swizzle[j]];
}
}
if (bit_size == 0)
bit_size = 32;
nir_const_value dest[NIR_MAX_VEC_COMPONENTS];
nir_const_value *srcs[NIR_ALU_MAX_INPUTS];
memset(dest, 0, sizeof(dest));
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; ++i)
srcs[i] = src[i];
nir_eval_const_opcode(alu->op, dest, alu->def.num_components,
bit_size, srcs,
b->shader->info.float_controls_execution_mode);
b->cursor = nir_before_instr(&alu->instr);
nir_def *imm = nir_build_imm(b, alu->def.num_components,
alu->def.bit_size,
dest);
nir_def_replace(&alu->def, imm);
nir_instr_free(&alu->instr);
return true;
}
static nir_const_value *
const_value_for_deref(nir_deref_instr *deref)
{
if (!nir_deref_mode_is(deref, nir_var_mem_constant))
return NULL;
nir_deref_path path;
nir_deref_path_init(&path, deref, NULL);
if (path.path[0]->deref_type != nir_deref_type_var)
goto fail;
nir_variable *var = path.path[0]->var;
assert(var->data.mode == nir_var_mem_constant);
if (var->constant_initializer == NULL)
goto fail;
if (var->constant_initializer->is_null_constant) {
/* Doesn't matter what casts are in the way, it's all zeros */
nir_deref_path_finish(&path);
return var->constant_initializer->values;
}
nir_constant *c = var->constant_initializer;
nir_const_value *v = NULL; /* Vector value for array-deref-of-vec */
for (unsigned i = 1; path.path[i] != NULL; i++) {
nir_deref_instr *p = path.path[i];
switch (p->deref_type) {
case nir_deref_type_var:
UNREACHABLE("Deref paths can only start with a var deref");
case nir_deref_type_array: {
assert(v == NULL);
if (!nir_src_is_const(p->arr.index))
goto fail;
uint64_t idx = nir_src_as_uint(p->arr.index);
if (c->num_elements > 0) {
assert(glsl_type_is_array(path.path[i - 1]->type));
if (idx >= c->num_elements)
goto fail;
c = c->elements[idx];
} else {
assert(glsl_type_is_vector(path.path[i - 1]->type));
assert(glsl_type_is_scalar(p->type));
if (idx >= NIR_MAX_VEC_COMPONENTS)
goto fail;
v = &c->values[idx];
}
break;
}
case nir_deref_type_struct:
assert(glsl_type_is_struct(path.path[i - 1]->type));
assert(v == NULL && c->num_elements > 0);
if (p->strct.index >= c->num_elements)
goto fail;
c = c->elements[p->strct.index];
break;
default:
goto fail;
}
}
/* We have to have ended at a vector */
assert(c->num_elements == 0);
nir_deref_path_finish(&path);
return v ? v : c->values;
fail:
nir_deref_path_finish(&path);
return NULL;
}
static bool
try_fold_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
struct constant_fold_state *state)
{
switch (intrin->intrinsic) {
case nir_intrinsic_demote_if:
case nir_intrinsic_terminate_if:
if (nir_src_is_const(intrin->src[0])) {
if (nir_src_as_bool(intrin->src[0])) {
b->cursor = nir_before_instr(&intrin->instr);
nir_intrinsic_op op;
switch (intrin->intrinsic) {
case nir_intrinsic_demote_if:
op = nir_intrinsic_demote;
break;
case nir_intrinsic_terminate_if:
op = nir_intrinsic_terminate;
break;
default:
UNREACHABLE("invalid intrinsic");
}
nir_intrinsic_instr *new_instr =
nir_intrinsic_instr_create(b->shader, op);
nir_builder_instr_insert(b, &new_instr->instr);
}
nir_instr_remove(&intrin->instr);
return true;
}
return false;
case nir_intrinsic_load_deref: {
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
nir_const_value *v = const_value_for_deref(deref);
if (v) {
b->cursor = nir_before_instr(&intrin->instr);
nir_def *val = nir_build_imm(b, intrin->def.num_components,
intrin->def.bit_size, v);
nir_def_replace(&intrin->def, val);
return true;
}
return false;
}
case nir_intrinsic_load_constant: {
state->has_load_constant = true;
if (!nir_src_is_const(intrin->src[0])) {
state->has_indirect_load_const = true;
return false;
}
unsigned offset = nir_src_as_uint(intrin->src[0]);
unsigned base = nir_intrinsic_base(intrin);
unsigned range = nir_intrinsic_range(intrin);
assert(base + range <= b->shader->constant_data_size);
b->cursor = nir_before_instr(&intrin->instr);
nir_def *val;
if (offset >= range) {
val = nir_undef(b, intrin->def.num_components,
intrin->def.bit_size);
} else {
nir_const_value imm[NIR_MAX_VEC_COMPONENTS];
memset(imm, 0, sizeof(imm));
uint8_t *data = (uint8_t *)b->shader->constant_data + base;
for (unsigned i = 0; i < intrin->num_components; i++) {
unsigned bytes = intrin->def.bit_size / 8;
bytes = MIN2(bytes, range - offset);
memcpy(&imm[i].u64, data + offset, bytes);
offset += bytes;
}
val = nir_build_imm(b, intrin->def.num_components,
intrin->def.bit_size, imm);
}
nir_def_replace(&intrin->def, val);
return true;
}
case nir_intrinsic_ddx:
case nir_intrinsic_ddx_fine:
case nir_intrinsic_ddx_coarse:
case nir_intrinsic_ddy:
case nir_intrinsic_ddy_fine:
case nir_intrinsic_ddy_coarse: {
if (!nir_src_is_const(intrin->src[0]))
return false;
/* Derivative of a constant is zero, except for NaNs and Infs */
nir_const_value imm[NIR_MAX_VEC_COMPONENTS];
unsigned sz = intrin->def.bit_size;
b->cursor = nir_before_instr(&intrin->instr);
for (unsigned i = 0; i < intrin->def.num_components; i++) {
bool finite = isfinite(nir_src_comp_as_float(intrin->src[0], i));
imm[i] = nir_const_value_for_float(finite ? 0 : NAN, sz);
}
nir_def_replace(&intrin->def,
nir_build_imm(b, intrin->def.num_components, sz, imm));
return true;
}
case nir_intrinsic_vote_any:
case nir_intrinsic_vote_all:
case nir_intrinsic_read_invocation:
case nir_intrinsic_read_first_invocation:
case nir_intrinsic_as_uniform:
case nir_intrinsic_shuffle:
case nir_intrinsic_shuffle_xor:
case nir_intrinsic_shuffle_up:
case nir_intrinsic_shuffle_down:
case nir_intrinsic_quad_broadcast:
case nir_intrinsic_quad_swap_horizontal:
case nir_intrinsic_quad_swap_vertical:
case nir_intrinsic_quad_swap_diagonal:
case nir_intrinsic_quad_swizzle_amd:
case nir_intrinsic_masked_swizzle_amd:
/* All of these have the data payload in the first source. They may
* have a second source with a shuffle index but that doesn't matter if
* the data is constant.
*/
if (nir_src_is_const(intrin->src[0])) {
nir_def_replace(&intrin->def, intrin->src[0].ssa);
return true;
}
return false;
case nir_intrinsic_vote_feq:
case nir_intrinsic_vote_ieq:
if (nir_src_is_const(intrin->src[0])) {
b->cursor = nir_before_instr(&intrin->instr);
nir_def_replace(&intrin->def, nir_imm_true(b));
return true;
}
return false;
case nir_intrinsic_inverse_ballot: {
if (!nir_src_is_const(intrin->src[0]))
return false;
bool constant_true = true;
bool constant_false = true;
for (unsigned i = 0; i < nir_src_num_components(intrin->src[0]); i++) {
int64_t value = nir_src_comp_as_int(intrin->src[0], i);
constant_true &= value == -1;
constant_false &= value == 0;
}
if (!constant_true && !constant_false)
return false;
b->cursor = nir_before_instr(&intrin->instr);
nir_def_replace(&intrin->def, nir_imm_bool(b, constant_true));
return true;
}
default:
return false;
}
}
static bool
try_fold_txb_to_tex(nir_builder *b, nir_tex_instr *tex)
{
assert(tex->op == nir_texop_txb);
const int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
/* nir_to_tgsi_lower_tex mangles many kinds of texture instructions,
* including txb, into invalid states. It removes the special
* parameters and appends the values to the texture coordinate.
*/
if (bias_idx < 0)
return false;
if (nir_src_is_const(tex->src[bias_idx].src) &&
nir_src_as_float(tex->src[bias_idx].src) == 0.0) {
nir_tex_instr_remove_src(tex, bias_idx);
tex->op = nir_texop_tex;
return true;
}
return false;
}
static bool
try_fold_tex_offset(nir_tex_instr *tex, unsigned *index,
nir_tex_src_type src_type)
{
const int src_idx = nir_tex_instr_src_index(tex, src_type);
if (src_idx < 0)
return false;
if (!nir_src_is_const(tex->src[src_idx].src))
return false;
*index += nir_src_as_uint(tex->src[src_idx].src);
nir_tex_instr_remove_src(tex, src_idx);
return true;
}
static bool
try_fold_texel_offset_src(nir_tex_instr *tex)
{
int offset_src = nir_tex_instr_src_index(tex, nir_tex_src_offset);
if (offset_src < 0)
return false;
unsigned size = nir_tex_instr_src_size(tex, offset_src);
nir_tex_src *src = &tex->src[offset_src];
for (unsigned i = 0; i < size; i++) {
nir_scalar comp = nir_scalar_resolved(src->src.ssa, i);
if (!nir_scalar_is_const(comp) || nir_scalar_as_uint(comp) != 0)
return false;
}
nir_tex_instr_remove_src(tex, offset_src);
return true;
}
static bool
try_fold_tex(nir_builder *b, nir_tex_instr *tex)
{
bool progress = false;
progress |= try_fold_tex_offset(tex, &tex->texture_index,
nir_tex_src_texture_offset);
progress |= try_fold_tex_offset(tex, &tex->sampler_index,
nir_tex_src_sampler_offset);
/* txb with a bias of constant zero is just tex. */
if (tex->op == nir_texop_txb)
progress |= try_fold_txb_to_tex(b, tex);
/* tex with a zero offset is just tex. */
progress |= try_fold_texel_offset_src(tex);
return progress;
}
static bool
try_fold_instr(nir_builder *b, nir_instr *instr, void *_state)
{
switch (instr->type) {
case nir_instr_type_alu:
return try_fold_alu(b, nir_instr_as_alu(instr));
case nir_instr_type_intrinsic:
return try_fold_intrinsic(b, nir_instr_as_intrinsic(instr), _state);
case nir_instr_type_tex:
return try_fold_tex(b, nir_instr_as_tex(instr));
default:
/* Don't know how to constant fold */
return false;
}
}
bool
nir_opt_constant_folding(nir_shader *shader)
{
struct constant_fold_state state;
state.has_load_constant = false;
state.has_indirect_load_const = false;
bool progress = nir_shader_instructions_pass(shader, try_fold_instr,
nir_metadata_control_flow,
&state);
/* This doesn't free the constant data if there are no constant loads because
* the data might still be used but the loads have been lowered to load_ubo
*/
if (state.has_load_constant && !state.has_indirect_load_const &&
shader->constant_data_size) {
ralloc_free(shader->constant_data);
shader->constant_data = NULL;
shader->constant_data_size = 0;
}
return progress;
}