intel/elk: Use common code in intel/compiler
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27563>
This commit is contained in:
@@ -36,7 +36,7 @@
|
||||
#include "brw_eu_defines.h"
|
||||
#include "brw_eu.h"
|
||||
#include "brw_shader.h"
|
||||
#include "intel_gfx_ver_enum.h"
|
||||
#include "../intel_gfx_ver_enum.h"
|
||||
#include "dev/intel_debug.h"
|
||||
|
||||
#include "util/u_debug.h"
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
#include "brw_cfg.h"
|
||||
#include "brw_dead_control_flow.h"
|
||||
#include "brw_private.h"
|
||||
#include "intel_nir.h"
|
||||
#include "../intel_nir.h"
|
||||
#include "shader_enums.h"
|
||||
#include "dev/intel_debug.h"
|
||||
#include "dev/intel_wa.h"
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "../intel_nir.h"
|
||||
#include "brw_nir.h"
|
||||
#include "brw_shader.h"
|
||||
#include "dev/intel_debug.h"
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
* Tessellaton control shader specific code derived from the vec4_visitor class.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "../intel_nir.h"
|
||||
#include "brw_nir.h"
|
||||
#include "brw_vec4_tcs.h"
|
||||
#include "brw_fs.h"
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2015 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef INTEL_GFX_VER_ENUM_H
|
||||
#define INTEL_GFX_VER_ENUM_H
|
||||
|
||||
#include "util/macros.h"
|
||||
#include "dev/intel_device_info.h"
|
||||
|
||||
enum gfx_ver {
|
||||
GFX4 = (1 << 0),
|
||||
GFX45 = (1 << 1),
|
||||
GFX5 = (1 << 2),
|
||||
GFX6 = (1 << 3),
|
||||
GFX7 = (1 << 4),
|
||||
GFX75 = (1 << 5),
|
||||
GFX8 = (1 << 6),
|
||||
GFX9 = (1 << 7),
|
||||
GFX10 = (1 << 8),
|
||||
GFX11 = (1 << 9),
|
||||
GFX12 = (1 << 10),
|
||||
GFX125 = (1 << 11),
|
||||
XE2 = (1 << 12),
|
||||
GFX_ALL = ~0
|
||||
};
|
||||
|
||||
#define GFX_LT(ver) ((ver) - 1)
|
||||
#define GFX_GE(ver) (~GFX_LT(ver))
|
||||
#define GFX_LE(ver) (GFX_LT(ver) | (ver))
|
||||
|
||||
static inline enum gfx_ver
|
||||
gfx_ver_from_devinfo(const struct intel_device_info *devinfo)
|
||||
{
|
||||
switch (devinfo->verx10) {
|
||||
case 40: return GFX4;
|
||||
case 45: return GFX45;
|
||||
case 50: return GFX5;
|
||||
case 60: return GFX6;
|
||||
case 70: return GFX7;
|
||||
case 75: return GFX75;
|
||||
case 80: return GFX8;
|
||||
case 90: return GFX9;
|
||||
case 110: return GFX11;
|
||||
case 120: return GFX12;
|
||||
case 125: return GFX125;
|
||||
case 200: return XE2;
|
||||
default:
|
||||
unreachable("not reached");
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,28 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2023 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
|
||||
bool
|
||||
intel_nir_pulls_at_sample(nir_shader *shader)
|
||||
{
|
||||
nir_foreach_function_impl(impl, shader) {
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_load_barycentric_at_sample)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2023 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef INTEL_NIR_H
|
||||
#define INTEL_NIR_H
|
||||
|
||||
#include "nir.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct intel_device_info;
|
||||
|
||||
void intel_nir_apply_tcs_quads_workaround(nir_shader *nir);
|
||||
bool intel_nir_blockify_uniform_loads(nir_shader *shader,
|
||||
const struct intel_device_info *devinfo);
|
||||
bool intel_nir_clamp_image_1d_2d_array_sizes(nir_shader *shader);
|
||||
bool intel_nir_clamp_per_vertex_loads(nir_shader *shader);
|
||||
bool intel_nir_cleanup_resource_intel(nir_shader *shader);
|
||||
|
||||
bool intel_nir_lower_conversions(nir_shader *nir);
|
||||
bool intel_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir);
|
||||
bool intel_nir_lower_non_uniform_resource_intel(nir_shader *shader);
|
||||
bool intel_nir_lower_patch_vertices_in(nir_shader *shader, unsigned input_vertices);
|
||||
bool intel_nir_lower_shading_rate_output(nir_shader *nir);
|
||||
bool intel_nir_lower_sparse_intrinsics(nir_shader *nir);
|
||||
|
||||
struct intel_nir_lower_texture_opts {
|
||||
bool combined_lod_and_array_index;
|
||||
};
|
||||
bool intel_nir_lower_texture(nir_shader *nir,
|
||||
const struct intel_nir_lower_texture_opts *opts);
|
||||
|
||||
bool intel_nir_opt_peephole_ffma(nir_shader *shader);
|
||||
bool intel_nir_opt_peephole_imul32x16(nir_shader *shader);
|
||||
|
||||
bool intel_nir_pulls_at_sample(nir_shader *shader);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* INTEL_NIR_H */
|
||||
@@ -1,116 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "dev/intel_device_info.h"
|
||||
#include "intel_nir.h"
|
||||
#include "isl/isl.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
static bool
|
||||
intel_nir_blockify_uniform_loads_instr(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *cb_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
const struct intel_device_info *devinfo = cb_data;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
/* BDW PRMs, Volume 7: 3D-Media-GPGPU: OWord Block ReadWrite:
|
||||
*
|
||||
* "The surface base address must be OWord-aligned."
|
||||
*
|
||||
* We can't make that guarantee with SSBOs where the alignment is
|
||||
* 4bytes.
|
||||
*/
|
||||
if (devinfo->ver < 9)
|
||||
return false;
|
||||
|
||||
if (nir_src_is_divergent(intrin->src[1]))
|
||||
return false;
|
||||
|
||||
if (intrin->def.bit_size != 32)
|
||||
return false;
|
||||
|
||||
/* Without the LSC, we can only do block loads of at least 4dwords (1
|
||||
* oword).
|
||||
*/
|
||||
if (!devinfo->has_lsc && intrin->def.num_components < 4)
|
||||
return false;
|
||||
|
||||
intrin->intrinsic =
|
||||
intrin->intrinsic == nir_intrinsic_load_ubo ?
|
||||
nir_intrinsic_load_ubo_uniform_block_intel :
|
||||
nir_intrinsic_load_ssbo_uniform_block_intel;
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_load_shared:
|
||||
/* Block loads on shared memory are not supported before the LSC. */
|
||||
if (!devinfo->has_lsc)
|
||||
return false;
|
||||
|
||||
if (nir_src_is_divergent(intrin->src[0]))
|
||||
return false;
|
||||
|
||||
if (intrin->def.bit_size != 32)
|
||||
return false;
|
||||
|
||||
intrin->intrinsic = nir_intrinsic_load_shared_uniform_block_intel;
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_load_global_constant:
|
||||
if (nir_src_is_divergent(intrin->src[0]))
|
||||
return false;
|
||||
|
||||
if (intrin->def.bit_size != 32)
|
||||
return false;
|
||||
|
||||
/* Without the LSC, we can only do block loads of at least 4dwords (1
|
||||
* oword).
|
||||
*/
|
||||
if (!devinfo->has_lsc && intrin->def.num_components < 4)
|
||||
return false;
|
||||
|
||||
intrin->intrinsic = nir_intrinsic_load_global_constant_uniform_block_intel;
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_blockify_uniform_loads(nir_shader *shader,
|
||||
const struct intel_device_info *devinfo)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader,
|
||||
intel_nir_blockify_uniform_loads_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance |
|
||||
nir_metadata_live_defs,
|
||||
(void *) devinfo);
|
||||
}
|
||||
@@ -1,144 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2020 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "intel_nir.h"
|
||||
|
||||
/**
|
||||
* Wa_1806565034:
|
||||
*
|
||||
* Gfx12+ allows to set RENDER_SURFACE_STATE::SurfaceArray to 1 only if
|
||||
* array_len > 1. Setting RENDER_SURFACE_STATE::SurfaceArray to 0 results in
|
||||
* the HW RESINFO message to report an array size of 0 which breaks texture
|
||||
* array size queries.
|
||||
*
|
||||
* This NIR pass works around this by patching the array size with a
|
||||
* MAX(array_size, 1) for array textures.
|
||||
*/
|
||||
|
||||
static bool
|
||||
intel_nir_clamp_image_1d_2d_array_sizes_instr(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
UNUSED void *cb_data)
|
||||
{
|
||||
nir_def *image_size = NULL;
|
||||
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_image_size:
|
||||
case nir_intrinsic_bindless_image_size:
|
||||
if (!nir_intrinsic_image_array(intr))
|
||||
break;
|
||||
|
||||
image_size = &intr->def;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_image_deref_size: {
|
||||
nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
|
||||
|
||||
assert(glsl_type_is_image(deref->type));
|
||||
|
||||
if (!glsl_sampler_type_is_array(deref->type))
|
||||
break;
|
||||
|
||||
image_size = &intr->def;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_instr_type_tex: {
|
||||
nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
|
||||
if (tex_instr->op != nir_texop_txs)
|
||||
break;
|
||||
|
||||
if (!tex_instr->is_array)
|
||||
break;
|
||||
|
||||
image_size = &tex_instr->def;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!image_size)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_after_instr(instr);
|
||||
|
||||
nir_def *components[4];
|
||||
/* OR all the sizes for all components but the last. */
|
||||
nir_def *or_components = nir_imm_int(b, 0);
|
||||
for (int i = 0; i < image_size->num_components; i++) {
|
||||
if (i == (image_size->num_components - 1)) {
|
||||
nir_def *null_or_size[2] = {
|
||||
nir_imm_int(b, 0),
|
||||
nir_imax(b, nir_channel(b, image_size, i),
|
||||
nir_imm_int(b, 1)),
|
||||
};
|
||||
nir_def *vec2_null_or_size = nir_vec(b, null_or_size, 2);
|
||||
|
||||
/* Using the ORed sizes select either the element 0 or 1
|
||||
* from this vec2. For NULL textures which have a size of
|
||||
* 0x0x0, we'll select the first element which is 0 and for
|
||||
* the rest MAX(depth, 1).
|
||||
*/
|
||||
components[i] =
|
||||
nir_vector_extract(b, vec2_null_or_size,
|
||||
nir_imin(b, or_components,
|
||||
nir_imm_int(b, 1)));
|
||||
} else {
|
||||
components[i] = nir_channel(b, image_size, i);
|
||||
or_components = nir_ior(b, components[i], or_components);
|
||||
}
|
||||
}
|
||||
nir_def *image_size_replacement =
|
||||
nir_vec(b, components, image_size->num_components);
|
||||
|
||||
b->cursor = nir_after_instr(instr);
|
||||
|
||||
nir_def_rewrite_uses_after(image_size,
|
||||
image_size_replacement,
|
||||
image_size_replacement->parent_instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_clamp_image_1d_2d_array_sizes(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader,
|
||||
intel_nir_clamp_image_1d_2d_array_sizes_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
}
|
||||
@@ -1,108 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2022 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Limit input per vertex input accesses. This is useful for the tesselation stages.
|
||||
* On Gfx12.5+ out of bound accesses generate hangs.
|
||||
*
|
||||
* This pass operates on derefs, it must be called before shader inputs are
|
||||
* lowered.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "compiler/nir/nir_deref.h"
|
||||
|
||||
static bool
|
||||
clamp_per_vertex_loads_instr(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
void *cb_data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_deref)
|
||||
return false;
|
||||
|
||||
nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr);
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
if (var == NULL || (var->data.mode & nir_var_shader_in) == 0)
|
||||
return false;
|
||||
|
||||
nir_deref_path path;
|
||||
nir_deref_path_init(&path, deref, cb_data);
|
||||
|
||||
bool progress = false;
|
||||
for (unsigned i = 0; path.path[i]; i++) {
|
||||
if (path.path[i]->deref_type != nir_deref_type_array)
|
||||
continue;
|
||||
|
||||
b->cursor = nir_before_instr(&path.path[i]->instr);
|
||||
|
||||
nir_src_rewrite(&path.path[i]->arr.index,
|
||||
nir_umin(b, path.path[i]->arr.index.ssa, nir_iadd_imm(b, nir_load_patch_vertices_in(b), -1)));
|
||||
|
||||
progress = true;
|
||||
break;
|
||||
}
|
||||
|
||||
nir_deref_path_finish(&path);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_clamp_per_vertex_loads(nir_shader *shader)
|
||||
{
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
bool ret = nir_shader_intrinsics_pass(shader, clamp_per_vertex_loads_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
mem_ctx);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_patch_vertices_instr(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
void *cb_data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_load_patch_vertices_in)
|
||||
return false;
|
||||
|
||||
unsigned *input_vertices = cb_data;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
nir_def_rewrite_uses(&intrin->def, nir_imm_int(b, *input_vertices));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_lower_patch_vertices_in(nir_shader *shader, unsigned input_vertices)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(shader, lower_patch_vertices_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
&input_vertices);
|
||||
}
|
||||
@@ -1,115 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2018 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
static void
|
||||
split_conversion(nir_builder *b, nir_alu_instr *alu, nir_alu_type src_type,
|
||||
nir_alu_type tmp_type, nir_alu_type dst_type)
|
||||
{
|
||||
b->cursor = nir_before_instr(&alu->instr);
|
||||
nir_def *src = nir_ssa_for_alu_src(b, alu, 0);
|
||||
nir_def *tmp = nir_type_convert(b, src, src_type, tmp_type, nir_rounding_mode_undef);
|
||||
nir_def *res = nir_type_convert(b, tmp, tmp_type, dst_type, nir_rounding_mode_undef);
|
||||
nir_def_rewrite_uses(&alu->def, res);
|
||||
nir_instr_remove(&alu->instr);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_alu_instr(nir_builder *b, nir_alu_instr *alu)
|
||||
{
|
||||
unsigned src_bit_size = nir_src_bit_size(alu->src[0].src);
|
||||
nir_alu_type src_type = nir_op_infos[alu->op].input_types[0];
|
||||
nir_alu_type src_full_type = (nir_alu_type) (src_type | src_bit_size);
|
||||
|
||||
unsigned dst_bit_size = alu->def.bit_size;
|
||||
nir_alu_type dst_full_type = nir_op_infos[alu->op].output_type;
|
||||
nir_alu_type dst_type = nir_alu_type_get_base_type(dst_full_type);
|
||||
|
||||
/* BDW PRM, vol02, Command Reference Instructions, mov - MOVE:
|
||||
*
|
||||
* "There is no direct conversion from HF to DF or DF to HF.
|
||||
* Use two instructions and F (Float) as an intermediate type.
|
||||
*
|
||||
* There is no direct conversion from HF to Q/UQ or Q/UQ to HF.
|
||||
* Use two instructions and F (Float) or a word integer type
|
||||
* or a DWord integer type as an intermediate type."
|
||||
*
|
||||
* It is important that the intermediate conversion happens through a
|
||||
* 32-bit float type so we don't lose range when we convert from
|
||||
* a 64-bit integer.
|
||||
*/
|
||||
unsigned int64_types = nir_type_int64 | nir_type_uint64;
|
||||
if ((src_full_type == nir_type_float16 && (dst_full_type & int64_types)) ||
|
||||
((src_full_type & int64_types) && dst_full_type == nir_type_float16)) {
|
||||
split_conversion(b, alu, src_type, nir_type_float | 32,
|
||||
dst_type | dst_bit_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
/* SKL PRM, vol 02a, Command Reference: Instructions, Move:
|
||||
*
|
||||
* "There is no direct conversion from B/UB to DF or DF to B/UB. Use
|
||||
* two instructions and a word or DWord intermediate type."
|
||||
*
|
||||
* "There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB.
|
||||
* Use two instructions and a word or DWord intermediate integer
|
||||
* type."
|
||||
*
|
||||
* It is important that we use a 32-bit integer matching the sign of the
|
||||
* destination as the intermediate type so we avoid any chance of rtne
|
||||
* rounding happening before the conversion to integer (which is expected
|
||||
* to round towards zero) in double to byte conversions.
|
||||
*/
|
||||
if ((src_bit_size == 8 && dst_bit_size == 64) ||
|
||||
(src_bit_size == 64 && dst_bit_size == 8)) {
|
||||
split_conversion(b, alu, src_type, dst_type | 32, dst_type | dst_bit_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_instr(nir_builder *b, nir_instr *instr, UNUSED void *cb_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return false;
|
||||
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
|
||||
if (!nir_op_infos[alu->op].is_conversion)
|
||||
return false;
|
||||
|
||||
return lower_alu_instr(b, alu);
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_lower_conversions(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, lower_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
}
|
||||
@@ -1,307 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
struct lower_intrinsics_state {
|
||||
nir_shader *nir;
|
||||
nir_function_impl *impl;
|
||||
bool progress;
|
||||
nir_builder builder;
|
||||
};
|
||||
|
||||
static void
|
||||
compute_local_index_id(nir_builder *b,
|
||||
nir_shader *nir,
|
||||
nir_def **local_index,
|
||||
nir_def **local_id)
|
||||
{
|
||||
nir_def *subgroup_id = nir_load_subgroup_id(b);
|
||||
|
||||
nir_def *thread_local_id =
|
||||
nir_imul(b, subgroup_id, nir_load_simd_width_intel(b));
|
||||
nir_def *channel = nir_load_subgroup_invocation(b);
|
||||
nir_def *linear = nir_iadd(b, channel, thread_local_id);
|
||||
|
||||
nir_def *size_x;
|
||||
nir_def *size_y;
|
||||
if (nir->info.workgroup_size_variable) {
|
||||
nir_def *size_xyz = nir_load_workgroup_size(b);
|
||||
size_x = nir_channel(b, size_xyz, 0);
|
||||
size_y = nir_channel(b, size_xyz, 1);
|
||||
} else {
|
||||
size_x = nir_imm_int(b, nir->info.workgroup_size[0]);
|
||||
size_y = nir_imm_int(b, nir->info.workgroup_size[1]);
|
||||
}
|
||||
nir_def *size_xy = nir_imul(b, size_x, size_y);
|
||||
|
||||
/* The local invocation index and ID must respect the following
|
||||
*
|
||||
* gl_LocalInvocationID.x =
|
||||
* gl_LocalInvocationIndex % gl_WorkGroupSize.x;
|
||||
* gl_LocalInvocationID.y =
|
||||
* (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
|
||||
* gl_WorkGroupSize.y;
|
||||
* gl_LocalInvocationID.z =
|
||||
* (gl_LocalInvocationIndex /
|
||||
* (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
|
||||
* gl_WorkGroupSize.z;
|
||||
*
|
||||
* However, the final % gl_WorkGroupSize.z does nothing unless we
|
||||
* accidentally end up with a gl_LocalInvocationIndex that is too
|
||||
* large so it can safely be omitted.
|
||||
*/
|
||||
|
||||
nir_def *id_x, *id_y, *id_z;
|
||||
switch (nir->info.cs.derivative_group) {
|
||||
case DERIVATIVE_GROUP_NONE:
|
||||
if (nir->info.num_images == 0 &&
|
||||
nir->info.num_textures == 0) {
|
||||
/* X-major lid order. Optimal for linear accesses only,
|
||||
* which are usually buffers. X,Y ordering will look like:
|
||||
* (0,0) (1,0) (2,0) ... (size_x-1,0) (0,1) (1,1) ...
|
||||
*/
|
||||
id_x = nir_umod(b, linear, size_x);
|
||||
id_y = nir_umod(b, nir_udiv(b, linear, size_x), size_y);
|
||||
*local_index = linear;
|
||||
} else if (!nir->info.workgroup_size_variable &&
|
||||
nir->info.workgroup_size[1] % 4 == 0) {
|
||||
/* 1x4 block X-major lid order. Same as X-major except increments in
|
||||
* blocks of width=1 height=4. Always optimal for tileY and usually
|
||||
* optimal for linear accesses.
|
||||
* x = (linear / 4) % size_x
|
||||
* y = ((linear % 4) + (linear / 4 / size_x) * 4) % size_y
|
||||
* X,Y ordering will look like: (0,0) (0,1) (0,2) (0,3) (1,0) (1,1)
|
||||
* (1,2) (1,3) (2,0) ... (size_x-1,3) (0,4) (0,5) (0,6) (0,7) (1,4) ...
|
||||
*/
|
||||
const unsigned height = 4;
|
||||
nir_def *block = nir_udiv_imm(b, linear, height);
|
||||
id_x = nir_umod(b, block, size_x);
|
||||
id_y = nir_umod(b,
|
||||
nir_iadd(b,
|
||||
nir_umod_imm(b, linear, height),
|
||||
nir_imul_imm(b,
|
||||
nir_udiv(b, block, size_x),
|
||||
height)),
|
||||
size_y);
|
||||
} else {
|
||||
/* Y-major lid order. Optimal for tileY accesses only,
|
||||
* which are usually images. X,Y ordering will look like:
|
||||
* (0,0) (0,1) (0,2) ... (0,size_y-1) (1,0) (1,1) ...
|
||||
*/
|
||||
id_y = nir_umod(b, linear, size_y);
|
||||
id_x = nir_umod(b, nir_udiv(b, linear, size_y), size_x);
|
||||
}
|
||||
|
||||
id_z = nir_udiv(b, linear, size_xy);
|
||||
*local_id = nir_vec3(b, id_x, id_y, id_z);
|
||||
if (!*local_index) {
|
||||
*local_index = nir_iadd(b, nir_iadd(b, id_x,
|
||||
nir_imul(b, id_y, size_x)),
|
||||
nir_imul(b, id_z, size_xy));
|
||||
}
|
||||
break;
|
||||
case DERIVATIVE_GROUP_LINEAR:
|
||||
/* For linear, just set the local invocation index linearly,
|
||||
* and calculate local invocation ID from that.
|
||||
*/
|
||||
id_x = nir_umod(b, linear, size_x);
|
||||
id_y = nir_umod(b, nir_udiv(b, linear, size_x), size_y);
|
||||
id_z = nir_udiv(b, linear, size_xy);
|
||||
*local_id = nir_vec3(b, id_x, id_y, id_z);
|
||||
*local_index = linear;
|
||||
break;
|
||||
case DERIVATIVE_GROUP_QUADS: {
|
||||
/* For quads, first we figure out the 2x2 grid the invocation
|
||||
* belongs to -- treating extra Z layers as just more rows.
|
||||
* Then map that into local invocation ID (trivial) and local
|
||||
* invocation index. Skipping Z simplify index calculation.
|
||||
*/
|
||||
|
||||
nir_def *one = nir_imm_int(b, 1);
|
||||
nir_def *double_size_x = nir_ishl(b, size_x, one);
|
||||
|
||||
/* ID within a pair of rows, where each group of 4 is 2x2 quad. */
|
||||
nir_def *row_pair_id = nir_umod(b, linear, double_size_x);
|
||||
nir_def *y_row_pairs = nir_udiv(b, linear, double_size_x);
|
||||
|
||||
nir_def *x =
|
||||
nir_ior(b,
|
||||
nir_iand(b, row_pair_id, one),
|
||||
nir_iand(b, nir_ishr(b, row_pair_id, one),
|
||||
nir_imm_int(b, 0xfffffffe)));
|
||||
nir_def *y =
|
||||
nir_ior(b,
|
||||
nir_ishl(b, y_row_pairs, one),
|
||||
nir_iand(b, nir_ishr(b, row_pair_id, one), one));
|
||||
|
||||
*local_id = nir_vec3(b, x,
|
||||
nir_umod(b, y, size_y),
|
||||
nir_udiv(b, y, size_y));
|
||||
*local_index = nir_iadd(b, x, nir_imul(b, y, size_x));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("invalid derivative group");
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
|
||||
nir_block *block)
|
||||
{
|
||||
bool progress = false;
|
||||
nir_builder *b = &state->builder;
|
||||
nir_shader *nir = state->nir;
|
||||
|
||||
/* Reuse calculated values inside the block. */
|
||||
nir_def *local_index = NULL;
|
||||
nir_def *local_id = NULL;
|
||||
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
|
||||
|
||||
b->cursor = nir_after_instr(&intrinsic->instr);
|
||||
|
||||
nir_def *sysval;
|
||||
switch (intrinsic->intrinsic) {
|
||||
case nir_intrinsic_load_local_invocation_index:
|
||||
case nir_intrinsic_load_local_invocation_id: {
|
||||
if (!local_index && !nir->info.workgroup_size_variable) {
|
||||
const uint16_t *ws = nir->info.workgroup_size;
|
||||
if (ws[0] * ws[1] * ws[2] == 1) {
|
||||
nir_def *zero = nir_imm_int(b, 0);
|
||||
local_index = zero;
|
||||
local_id = nir_replicate(b, zero, 3);
|
||||
}
|
||||
}
|
||||
|
||||
if (!local_index) {
|
||||
if (nir->info.stage == MESA_SHADER_TASK ||
|
||||
nir->info.stage == MESA_SHADER_MESH) {
|
||||
/* Will be lowered by nir_emit_task_mesh_intrinsic() using
|
||||
* information from the payload.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
/* First time we are using those, so let's calculate them. */
|
||||
assert(!local_id);
|
||||
compute_local_index_id(b, nir, &local_index, &local_id);
|
||||
}
|
||||
|
||||
assert(local_id);
|
||||
assert(local_index);
|
||||
if (intrinsic->intrinsic == nir_intrinsic_load_local_invocation_id)
|
||||
sysval = local_id;
|
||||
else
|
||||
sysval = local_index;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_num_subgroups: {
|
||||
nir_def *size;
|
||||
if (state->nir->info.workgroup_size_variable) {
|
||||
nir_def *size_xyz = nir_load_workgroup_size(b);
|
||||
nir_def *size_x = nir_channel(b, size_xyz, 0);
|
||||
nir_def *size_y = nir_channel(b, size_xyz, 1);
|
||||
nir_def *size_z = nir_channel(b, size_xyz, 2);
|
||||
size = nir_imul(b, nir_imul(b, size_x, size_y), size_z);
|
||||
} else {
|
||||
size = nir_imm_int(b, nir->info.workgroup_size[0] *
|
||||
nir->info.workgroup_size[1] *
|
||||
nir->info.workgroup_size[2]);
|
||||
}
|
||||
|
||||
/* Calculate the equivalent of DIV_ROUND_UP. */
|
||||
nir_def *simd_width = nir_load_simd_width_intel(b);
|
||||
sysval =
|
||||
nir_udiv(b, nir_iadd_imm(b, nir_iadd(b, size, simd_width), -1),
|
||||
simd_width);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (intrinsic->def.bit_size == 64)
|
||||
sysval = nir_u2u64(b, sysval);
|
||||
|
||||
nir_def_rewrite_uses(&intrinsic->def, sysval);
|
||||
nir_instr_remove(&intrinsic->instr);
|
||||
|
||||
state->progress = true;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void
|
||||
lower_cs_intrinsics_convert_impl(struct lower_intrinsics_state *state)
|
||||
{
|
||||
state->builder = nir_builder_create(state->impl);
|
||||
|
||||
nir_foreach_block(block, state->impl) {
|
||||
lower_cs_intrinsics_convert_block(state, block);
|
||||
}
|
||||
|
||||
nir_metadata_preserve(state->impl,
|
||||
nir_metadata_block_index | nir_metadata_dominance);
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_lower_cs_intrinsics(nir_shader *nir)
|
||||
{
|
||||
assert(gl_shader_stage_uses_workgroup(nir->info.stage));
|
||||
|
||||
struct lower_intrinsics_state state = {
|
||||
.nir = nir,
|
||||
};
|
||||
|
||||
/* Constraints from NV_compute_shader_derivatives. */
|
||||
if (gl_shader_stage_is_compute(nir->info.stage) &&
|
||||
!nir->info.workgroup_size_variable) {
|
||||
if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS) {
|
||||
assert(nir->info.workgroup_size[0] % 2 == 0);
|
||||
assert(nir->info.workgroup_size[1] % 2 == 0);
|
||||
} else if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_LINEAR) {
|
||||
ASSERTED unsigned workgroup_size =
|
||||
nir->info.workgroup_size[0] *
|
||||
nir->info.workgroup_size[1] *
|
||||
nir->info.workgroup_size[2];
|
||||
assert(workgroup_size % 4 == 0);
|
||||
}
|
||||
}
|
||||
|
||||
nir_foreach_function_impl(impl, nir) {
|
||||
state.impl = impl;
|
||||
lower_cs_intrinsics_convert_impl(&state);
|
||||
}
|
||||
|
||||
return state.progress;
|
||||
}
|
||||
@@ -1,80 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2023 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Lower non uniform at sample messages to the interpolator.
|
||||
*
|
||||
* This is pretty much identical to what nir_lower_non_uniform_access() does.
|
||||
* We do it here because otherwise GCM would undo this optimization. Also we
|
||||
* can assume divergence analysis here.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
static bool
|
||||
intel_nir_lower_non_uniform_barycentric_at_sample_instr(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *cb_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_barycentric_at_sample)
|
||||
return false;
|
||||
|
||||
if (nir_src_is_always_uniform(intrin->src[0]) ||
|
||||
!nir_src_is_divergent(intrin->src[0]))
|
||||
return false;
|
||||
|
||||
nir_def *sample_id = intrin->src[0].ssa;
|
||||
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_push_loop(b);
|
||||
{
|
||||
nir_def *first_sample_id = nir_read_first_invocation(b, sample_id);
|
||||
|
||||
nir_push_if(b, nir_ieq(b, sample_id, first_sample_id));
|
||||
{
|
||||
nir_builder_instr_insert(b, &intrin->instr);
|
||||
|
||||
nir_src_rewrite(&intrin->src[0], first_sample_id);
|
||||
|
||||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir)
|
||||
{
|
||||
return nir_shader_instructions_pass(
|
||||
nir,
|
||||
intel_nir_lower_non_uniform_barycentric_at_sample_instr,
|
||||
nir_metadata_none,
|
||||
NULL);
|
||||
}
|
||||
@@ -1,319 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2023 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "util/u_dynarray.h"
|
||||
|
||||
#include "intel_nir.h"
|
||||
|
||||
static bool
|
||||
nir_instr_is_resource_intel(nir_instr *instr)
|
||||
{
|
||||
return instr->type == nir_instr_type_intrinsic &&
|
||||
nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_resource_intel;
|
||||
}
|
||||
|
||||
static bool
|
||||
add_src_instr(nir_src *src, void *state)
|
||||
{
|
||||
struct util_dynarray *inst_array = state;
|
||||
util_dynarray_foreach(inst_array, nir_instr *, instr_ptr) {
|
||||
if (*instr_ptr == src->ssa->parent_instr)
|
||||
return true;
|
||||
}
|
||||
|
||||
util_dynarray_append(inst_array, nir_instr *, src->ssa->parent_instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_intrinsic_instr *
|
||||
find_resource_intel(struct util_dynarray *inst_array,
|
||||
nir_def *def)
|
||||
{
|
||||
/* If resouce_intel is already directly in front of the instruction, there
|
||||
* is nothing to do.
|
||||
*/
|
||||
if (nir_instr_is_resource_intel(def->parent_instr))
|
||||
return NULL;
|
||||
|
||||
util_dynarray_append(inst_array, nir_instr *, def->parent_instr);
|
||||
|
||||
unsigned idx = 0, scan_index = 0;
|
||||
while (idx < util_dynarray_num_elements(inst_array, nir_instr *)) {
|
||||
nir_instr *instr = *util_dynarray_element(inst_array, nir_instr *, idx++);
|
||||
|
||||
for (; scan_index < util_dynarray_num_elements(inst_array, nir_instr *); scan_index++) {
|
||||
nir_instr *scan_instr = *util_dynarray_element(inst_array, nir_instr *, scan_index);
|
||||
if (nir_instr_is_resource_intel(scan_instr))
|
||||
return nir_instr_as_intrinsic(scan_instr);
|
||||
}
|
||||
|
||||
nir_foreach_src(instr, add_src_instr, inst_array);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_nir_lower_non_uniform_intrinsic(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
struct util_dynarray *inst_array)
|
||||
{
|
||||
unsigned source;
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_get_ssbo_size:
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
case nir_intrinsic_ssbo_atomic_swap:
|
||||
case nir_intrinsic_load_ssbo_block_intel:
|
||||
case nir_intrinsic_store_ssbo_block_intel:
|
||||
case nir_intrinsic_load_ubo_uniform_block_intel:
|
||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||
case nir_intrinsic_image_load_raw_intel:
|
||||
case nir_intrinsic_image_store_raw_intel:
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_store:
|
||||
case nir_intrinsic_image_atomic:
|
||||
case nir_intrinsic_image_atomic_swap:
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_store:
|
||||
case nir_intrinsic_bindless_image_atomic:
|
||||
case nir_intrinsic_bindless_image_atomic_swap:
|
||||
case nir_intrinsic_image_size:
|
||||
case nir_intrinsic_bindless_image_size:
|
||||
source = 0;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_ssbo:
|
||||
source = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
util_dynarray_clear(inst_array);
|
||||
|
||||
nir_intrinsic_instr *old_resource_intel =
|
||||
find_resource_intel(inst_array, intrin->src[source].ssa);
|
||||
if (old_resource_intel == NULL)
|
||||
return false;
|
||||
|
||||
nir_instr *new_instr =
|
||||
nir_instr_clone(b->shader, &old_resource_intel->instr);
|
||||
|
||||
nir_instr_insert(b->cursor, new_instr);
|
||||
|
||||
nir_intrinsic_instr *new_resource_intel =
|
||||
nir_instr_as_intrinsic(new_instr);
|
||||
|
||||
nir_src_rewrite(&new_resource_intel->src[1], intrin->src[source].ssa);
|
||||
nir_src_rewrite(&intrin->src[source], &new_resource_intel->def);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_nir_lower_non_uniform_tex(nir_builder *b,
|
||||
nir_tex_instr *tex,
|
||||
struct util_dynarray *inst_array)
|
||||
{
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
bool progress = false;
|
||||
for (unsigned s = 0; s < tex->num_srcs; s++) {
|
||||
if (tex->src[s].src_type != nir_tex_src_texture_handle &&
|
||||
tex->src[s].src_type != nir_tex_src_sampler_handle)
|
||||
continue;
|
||||
|
||||
util_dynarray_clear(inst_array);
|
||||
|
||||
nir_intrinsic_instr *old_resource_intel =
|
||||
find_resource_intel(inst_array, tex->src[s].src.ssa);
|
||||
if (old_resource_intel == NULL)
|
||||
continue;
|
||||
|
||||
nir_instr *new_instr =
|
||||
nir_instr_clone(b->shader, &old_resource_intel->instr);
|
||||
|
||||
nir_instr_insert(b->cursor, new_instr);
|
||||
|
||||
nir_intrinsic_instr *new_resource_intel =
|
||||
nir_instr_as_intrinsic(new_instr);
|
||||
|
||||
nir_src_rewrite(&new_resource_intel->src[1], tex->src[s].src.ssa);
|
||||
nir_src_rewrite(&tex->src[s].src, &new_resource_intel->def);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_nir_lower_non_uniform_instr(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *cb_data)
|
||||
{
|
||||
struct util_dynarray *inst_array = cb_data;
|
||||
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic:
|
||||
return intel_nir_lower_non_uniform_intrinsic(b,
|
||||
nir_instr_as_intrinsic(instr),
|
||||
inst_array);
|
||||
|
||||
case nir_instr_type_tex:
|
||||
return intel_nir_lower_non_uniform_tex(b,
|
||||
nir_instr_as_tex(instr),
|
||||
inst_array);
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** This pass rematerializes resource_intel intrinsics closer to their use.
|
||||
*
|
||||
* For example will turn this :
|
||||
* ssa_1 = iadd ...
|
||||
* ssa_2 = resource_intel ..., ssa_1, ...
|
||||
* ssa_3 = read_first_invocation ssa_2
|
||||
* ssa_4 = load_ssbo ssa_3, ...
|
||||
*
|
||||
* into this :
|
||||
* ssa_1 = iadd ...
|
||||
* ssa_3 = read_first_invocation ssa_1
|
||||
* ssa_5 = resource_intel ..., ssa_3, ...
|
||||
* ssa_4 = load_ssbo ssa_5, ...
|
||||
*
|
||||
* The goal is to have the resource_intel immediately before its use so that
|
||||
* the backend compiler can know how the load_ssbo should be compiled (binding
|
||||
* table or bindless access, etc...).
|
||||
*/
|
||||
bool
|
||||
intel_nir_lower_non_uniform_resource_intel(nir_shader *shader)
|
||||
{
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
struct util_dynarray inst_array;
|
||||
util_dynarray_init(&inst_array, mem_ctx);
|
||||
|
||||
bool ret = nir_shader_instructions_pass(shader,
|
||||
intel_nir_lower_non_uniform_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
&inst_array);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool
|
||||
skip_resource_intel_cleanup(nir_instr *instr)
|
||||
{
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_tex:
|
||||
return true;
|
||||
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin =
|
||||
nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_ubo:
|
||||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_store_ssbo:
|
||||
case nir_intrinsic_get_ssbo_size:
|
||||
case nir_intrinsic_ssbo_atomic:
|
||||
case nir_intrinsic_ssbo_atomic_swap:
|
||||
case nir_intrinsic_load_ssbo_block_intel:
|
||||
case nir_intrinsic_store_ssbo_block_intel:
|
||||
case nir_intrinsic_load_ssbo_uniform_block_intel:
|
||||
case nir_intrinsic_image_load_raw_intel:
|
||||
case nir_intrinsic_image_store_raw_intel:
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_store:
|
||||
case nir_intrinsic_image_atomic:
|
||||
case nir_intrinsic_image_atomic_swap:
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_store:
|
||||
case nir_intrinsic_bindless_image_atomic:
|
||||
case nir_intrinsic_bindless_image_atomic_swap:
|
||||
case nir_intrinsic_image_size:
|
||||
case nir_intrinsic_bindless_image_size:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_nir_cleanup_resource_intel_instr(nir_builder *b,
|
||||
nir_intrinsic_instr *intrin,
|
||||
void *cb_data)
|
||||
{
|
||||
if (intrin->intrinsic != nir_intrinsic_resource_intel)
|
||||
return false;
|
||||
|
||||
bool progress = false;
|
||||
nir_foreach_use_safe(src, &intrin->def) {
|
||||
if (!nir_src_is_if(src) && skip_resource_intel_cleanup(nir_src_parent_instr(src)))
|
||||
continue;
|
||||
|
||||
progress = true;
|
||||
nir_src_rewrite(src, intrin->src[1].ssa);
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
/** This pass removes unnecessary resource_intel intrinsics
|
||||
*
|
||||
* This pass must not be run before intel_nir_lower_non_uniform_resource_intel.
|
||||
*/
|
||||
bool
|
||||
intel_nir_cleanup_resource_intel(nir_shader *shader)
|
||||
{
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
bool ret = nir_shader_intrinsics_pass(shader,
|
||||
intel_nir_cleanup_resource_intel_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1,108 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Lower the shading rate output from the bit field format described in the
|
||||
* SPIRV spec :
|
||||
*
|
||||
* bit | name | description
|
||||
* 0 | Vertical2Pixels | Fragment invocation covers 2 pixels vertically
|
||||
* 1 | Vertical4Pixels | Fragment invocation covers 4 pixels vertically
|
||||
* 2 | Horizontal2Pixels | Fragment invocation covers 2 pixels horizontally
|
||||
* 3 | Horizontal4Pixels | Fragment invocation covers 4 pixels horizontally
|
||||
*
|
||||
* into a single dword composed of 2 fp16 to be stored in the dword 0 of the
|
||||
* VUE header.
|
||||
*
|
||||
* When no horizontal/vertical bits are set, the size in pixel size in that
|
||||
* dimension is assumed to be 1.
|
||||
*
|
||||
* According to the specification, the shading rate output can be read &
|
||||
* written. A read after a write should report a different value if the
|
||||
* implementation decides on different primitive shading rate for some reason.
|
||||
* This is never the case in our implementation.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
static bool
|
||||
lower_shading_rate_output_instr(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
UNUSED void *_state)
|
||||
{
|
||||
nir_intrinsic_op op = intrin->intrinsic;
|
||||
|
||||
if (op != nir_intrinsic_load_output &&
|
||||
op != nir_intrinsic_store_output &&
|
||||
op != nir_intrinsic_load_per_primitive_output &&
|
||||
op != nir_intrinsic_store_per_primitive_output)
|
||||
return false;
|
||||
|
||||
struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin);
|
||||
if (io.location != VARYING_SLOT_PRIMITIVE_SHADING_RATE)
|
||||
return false;
|
||||
|
||||
bool is_store = op == nir_intrinsic_store_output ||
|
||||
op == nir_intrinsic_store_per_primitive_output;
|
||||
|
||||
b->cursor = is_store ? nir_before_instr(&intrin->instr) : nir_after_instr(&intrin->instr);
|
||||
|
||||
if (is_store) {
|
||||
nir_def *bit_field = intrin->src[0].ssa;
|
||||
nir_def *fp16_x =
|
||||
nir_i2f16(b,
|
||||
nir_ishl(b, nir_imm_int(b, 1),
|
||||
nir_ishr_imm(b, bit_field, 2)));
|
||||
nir_def *fp16_y =
|
||||
nir_i2f16(b,
|
||||
nir_ishl(b, nir_imm_int(b, 1),
|
||||
nir_iand_imm(b, bit_field, 0x3)));
|
||||
nir_def *packed_fp16_xy = nir_pack_32_2x16_split(b, fp16_x, fp16_y);
|
||||
|
||||
nir_src_rewrite(&intrin->src[0], packed_fp16_xy);
|
||||
} else {
|
||||
nir_def *packed_fp16_xy = &intrin->def;
|
||||
|
||||
nir_def *u32_x =
|
||||
nir_i2i32(b, nir_unpack_32_2x16_split_x(b, packed_fp16_xy));
|
||||
nir_def *u32_y =
|
||||
nir_i2i32(b, nir_unpack_32_2x16_split_y(b, packed_fp16_xy));
|
||||
|
||||
nir_def *bit_field =
|
||||
nir_ior(b, nir_ishl_imm(b, nir_ushr_imm(b, u32_x, 1), 2),
|
||||
nir_ushr_imm(b, u32_y, 1));
|
||||
|
||||
nir_def_rewrite_uses_after(packed_fp16_xy, bit_field,
|
||||
bit_field->parent_instr);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_lower_shading_rate_output(nir_shader *nir)
|
||||
{
|
||||
return nir_shader_intrinsics_pass(nir, lower_shading_rate_output_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance, NULL);
|
||||
}
|
||||
@@ -1,247 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2023 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
/*
|
||||
* This pass lowers a few of the sparse instructions to something HW can
|
||||
* handle.
|
||||
*
|
||||
* The image_*_sparse_load intrinsics are lowered into 2 instructions, a
|
||||
* regular image_*_load intrinsic and a sparse texture txf operation and
|
||||
* reconstructs the sparse vector of the original intrinsic using the 2 new
|
||||
* values. We need to do this because our backend implements image load/store
|
||||
* using the dataport and the dataport unit doesn't provide residency
|
||||
* information. We need to use the sampler for residency.
|
||||
*
|
||||
* The is_sparse_texels_resident intrinsic is lowered to a bit checking
|
||||
* operation as the data reported by the sampler is a single bit per lane in
|
||||
* the first component.
|
||||
*
|
||||
* The tex_* instructions with a compare value need to be lower into 2
|
||||
* instructions due to a HW limitation :
|
||||
*
|
||||
* SKL PRMs, Volume 7: 3D-Media-GPGPU, Messages, SIMD Payloads :
|
||||
*
|
||||
* "The Pixel Null Mask field, when enabled via the Pixel Null Mask Enable
|
||||
* will be incorect for sample_c when applied to a surface with 64-bit per
|
||||
* texel format such as R16G16BA16_UNORM. Pixel Null mask Enable may
|
||||
* incorrectly report pixels as referencing a Null surface."
|
||||
*/
|
||||
|
||||
static void
|
||||
lower_is_sparse_texels_resident(nir_builder *b, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_def_rewrite_uses(
|
||||
&intrin->def,
|
||||
nir_i2b(b, nir_iand(b, intrin->src[0].ssa,
|
||||
nir_ishl(b, nir_imm_int(b, 1),
|
||||
nir_load_subgroup_invocation(b)))));
|
||||
}
|
||||
|
||||
static void
|
||||
lower_sparse_residency_code_and(nir_builder *b, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_def_rewrite_uses(
|
||||
&intrin->def,
|
||||
nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa));
|
||||
}
|
||||
|
||||
static void
|
||||
lower_sparse_image_load(nir_builder *b, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
b->cursor = nir_instr_remove(&intrin->instr);
|
||||
|
||||
nir_def *img_load;
|
||||
nir_intrinsic_instr *new_intrin;
|
||||
if (intrin->intrinsic == nir_intrinsic_image_sparse_load) {
|
||||
img_load = nir_image_load(b,
|
||||
intrin->num_components - 1,
|
||||
intrin->def.bit_size,
|
||||
intrin->src[0].ssa,
|
||||
intrin->src[1].ssa,
|
||||
intrin->src[2].ssa,
|
||||
intrin->src[3].ssa);
|
||||
new_intrin = nir_instr_as_intrinsic(img_load->parent_instr);
|
||||
nir_intrinsic_set_range_base(new_intrin, nir_intrinsic_range_base(intrin));
|
||||
} else {
|
||||
img_load = nir_bindless_image_load(b,
|
||||
intrin->num_components - 1,
|
||||
intrin->def.bit_size,
|
||||
intrin->src[0].ssa,
|
||||
intrin->src[1].ssa,
|
||||
intrin->src[2].ssa,
|
||||
intrin->src[3].ssa);
|
||||
new_intrin = nir_instr_as_intrinsic(img_load->parent_instr);
|
||||
}
|
||||
|
||||
nir_intrinsic_set_image_array(new_intrin, nir_intrinsic_image_array(intrin));
|
||||
nir_intrinsic_set_image_dim(new_intrin, nir_intrinsic_image_dim(intrin));
|
||||
nir_intrinsic_set_format(new_intrin, nir_intrinsic_format(intrin));
|
||||
nir_intrinsic_set_access(new_intrin, nir_intrinsic_access(intrin));
|
||||
nir_intrinsic_set_dest_type(new_intrin, nir_intrinsic_dest_type(intrin));
|
||||
|
||||
nir_def *dests[NIR_MAX_VEC_COMPONENTS];
|
||||
for (unsigned i = 0; i < intrin->num_components - 1; i++) {
|
||||
dests[i] = nir_channel(b, img_load, i);
|
||||
}
|
||||
|
||||
/* Use texture instruction to compute residency */
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
|
||||
|
||||
tex->op = nir_texop_txf;
|
||||
/* We don't care about the dest type since we're not using any of that
|
||||
* data.
|
||||
*/
|
||||
tex->dest_type = nir_type_float32;
|
||||
tex->is_array = nir_intrinsic_image_array(intrin);
|
||||
tex->is_shadow = false;
|
||||
tex->sampler_index = 0;
|
||||
tex->is_sparse = true;
|
||||
|
||||
tex->src[0].src_type = intrin->intrinsic == nir_intrinsic_image_sparse_load ?
|
||||
nir_tex_src_texture_offset :
|
||||
nir_tex_src_texture_handle;
|
||||
tex->src[0].src = nir_src_for_ssa(intrin->src[0].ssa);
|
||||
|
||||
tex->coord_components = nir_image_intrinsic_coord_components(intrin);
|
||||
nir_def *coord;
|
||||
if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE &&
|
||||
nir_intrinsic_image_array(intrin)) {
|
||||
tex->coord_components++;
|
||||
|
||||
nir_def *img_layer = nir_channel(b, intrin->src[1].ssa, 2);
|
||||
nir_def *tex_slice = nir_idiv(b, img_layer, nir_imm_int(b, 6));
|
||||
nir_def *tex_face =
|
||||
nir_iadd(b, img_layer, nir_ineg(b, nir_imul_imm(b, tex_slice, 6)));
|
||||
nir_def *comps[4] = {
|
||||
nir_channel(b, intrin->src[1].ssa, 0),
|
||||
nir_channel(b, intrin->src[1].ssa, 1),
|
||||
tex_face,
|
||||
tex_slice
|
||||
};
|
||||
coord = nir_vec(b, comps, 4);
|
||||
} else {
|
||||
coord = nir_channels(b, intrin->src[1].ssa,
|
||||
nir_component_mask(tex->coord_components));
|
||||
}
|
||||
tex->src[1].src_type = nir_tex_src_coord;
|
||||
tex->src[1].src = nir_src_for_ssa(coord);
|
||||
|
||||
tex->src[2].src_type = nir_tex_src_lod;
|
||||
tex->src[2].src = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
|
||||
nir_def_init(&tex->instr, &tex->def, 5,
|
||||
intrin->def.bit_size);
|
||||
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
|
||||
dests[intrin->num_components - 1] = nir_channel(b, &tex->def, 4);
|
||||
|
||||
nir_def_rewrite_uses(
|
||||
&intrin->def,
|
||||
nir_vec(b, dests, intrin->num_components));
|
||||
}
|
||||
|
||||
static void
|
||||
lower_tex_compare(nir_builder *b, nir_tex_instr *tex, int compare_idx)
|
||||
{
|
||||
b->cursor = nir_after_instr(&tex->instr);
|
||||
|
||||
/* Clone the original instruction */
|
||||
nir_tex_instr *sparse_tex = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
|
||||
nir_def_init(&sparse_tex->instr, &sparse_tex->def,
|
||||
tex->def.num_components, tex->def.bit_size);
|
||||
nir_builder_instr_insert(b, &sparse_tex->instr);
|
||||
|
||||
/* Drop the compare source on the cloned instruction */
|
||||
nir_tex_instr_remove_src(sparse_tex, compare_idx);
|
||||
|
||||
/* Drop the residency query on the original tex instruction */
|
||||
tex->is_sparse = false;
|
||||
tex->def.num_components = tex->def.num_components - 1;
|
||||
|
||||
nir_def *new_comps[NIR_MAX_VEC_COMPONENTS];
|
||||
for (unsigned i = 0; i < tex->def.num_components; i++)
|
||||
new_comps[i] = nir_channel(b, &tex->def, i);
|
||||
new_comps[tex->def.num_components] =
|
||||
nir_channel(b, &sparse_tex->def, tex->def.num_components);
|
||||
|
||||
nir_def *new_vec = nir_vec(b, new_comps, sparse_tex->def.num_components);
|
||||
|
||||
nir_def_rewrite_uses_after(&tex->def, new_vec, new_vec->parent_instr);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_sparse_intrinsics(nir_builder *b, nir_instr *instr, void *cb_data)
|
||||
{
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_image_sparse_load:
|
||||
case nir_intrinsic_bindless_image_sparse_load:
|
||||
lower_sparse_image_load(b, intrin);
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_is_sparse_texels_resident:
|
||||
lower_is_sparse_texels_resident(b, intrin);
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_sparse_residency_code_and:
|
||||
lower_sparse_residency_code_and(b, intrin);
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
case nir_instr_type_tex: {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
int comp_idx = nir_tex_instr_src_index(tex, nir_tex_src_comparator);
|
||||
if (comp_idx != -1 && tex->is_sparse) {
|
||||
lower_tex_compare(b, tex, comp_idx);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_lower_sparse_intrinsics(nir_shader *nir)
|
||||
{
|
||||
return nir_shader_instructions_pass(nir, lower_sparse_intrinsics,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
}
|
||||
@@ -1,133 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2024 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "intel_nir.h"
|
||||
|
||||
/**
|
||||
* Pack either the explicit LOD or LOD bias and the array index together.
|
||||
*/
|
||||
static bool
|
||||
pack_lod_and_array_index(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
/* If 32-bit texture coordinates are used, pack either the explicit LOD or
|
||||
* LOD bias and the array index into a single (32-bit) value.
|
||||
*/
|
||||
int lod_index = nir_tex_instr_src_index(tex, nir_tex_src_lod);
|
||||
if (lod_index < 0) {
|
||||
lod_index = nir_tex_instr_src_index(tex, nir_tex_src_bias);
|
||||
|
||||
/* The explicit LOD or LOD bias may not be found if this lowering has
|
||||
* already occured. The explicit LOD may also not be found in some
|
||||
* cases where it is zero.
|
||||
*/
|
||||
if (lod_index < 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(nir_tex_instr_src_type(tex, lod_index) == nir_type_float);
|
||||
|
||||
/* Also do not perform this packing if the explicit LOD is zero. */
|
||||
if (tex->op == nir_texop_txl &&
|
||||
nir_src_is_const(tex->src[lod_index].src) &&
|
||||
nir_src_as_float(tex->src[lod_index].src) == 0.0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
|
||||
assert(coord_index >= 0);
|
||||
|
||||
nir_def *lod = tex->src[lod_index].src.ssa;
|
||||
nir_def *coord = tex->src[coord_index].src.ssa;
|
||||
|
||||
assert(nir_tex_instr_src_type(tex, coord_index) == nir_type_float);
|
||||
|
||||
if (coord->bit_size < 32)
|
||||
return false;
|
||||
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
/* First, combine the two values. The packing format is a little weird.
|
||||
* The explicit LOD / LOD bias is stored as float, as normal. However, the
|
||||
* array index is converted to an integer and smashed into the low 9 bits.
|
||||
*/
|
||||
const unsigned array_index = tex->coord_components - 1;
|
||||
|
||||
nir_def *clamped_ai =
|
||||
nir_umin(b,
|
||||
nir_f2u32(b, nir_fround_even(b, nir_channel(b, coord,
|
||||
array_index))),
|
||||
nir_imm_int(b, 511));
|
||||
|
||||
nir_def *lod_ai = nir_ior(b, nir_iand_imm(b, lod, 0xfffffe00), clamped_ai);
|
||||
|
||||
/* Second, replace the coordinate with a new value that has one fewer
|
||||
* component (i.e., drop the array index).
|
||||
*/
|
||||
nir_def *reduced_coord = nir_trim_vector(b, coord, 2);
|
||||
tex->coord_components--;
|
||||
|
||||
/* Finally, remove the old sources and add the new. */
|
||||
nir_src_rewrite(&tex->src[coord_index].src, reduced_coord);
|
||||
|
||||
nir_tex_instr_remove_src(tex, lod_index);
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_backend1, lod_ai);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_nir_lower_texture_instr(nir_builder *b, nir_instr *instr, void *cb_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_tex)
|
||||
return false;
|
||||
|
||||
const struct intel_nir_lower_texture_opts *opts = cb_data;
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
|
||||
switch (tex->op) {
|
||||
case nir_texop_txl:
|
||||
case nir_texop_txb:
|
||||
if (tex->is_array &&
|
||||
tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
|
||||
opts->combined_lod_and_array_index) {
|
||||
return pack_lod_and_array_index(b, tex);
|
||||
}
|
||||
return false;
|
||||
default:
|
||||
/* Nothing to do */
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_lower_texture(nir_shader *shader,
|
||||
const struct intel_nir_lower_texture_opts *opts)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader,
|
||||
intel_nir_lower_texture_instr,
|
||||
nir_metadata_none,
|
||||
(void *)opts);
|
||||
}
|
||||
@@ -1,253 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
/*
|
||||
* Implements a small peephole optimization that looks for a multiply that
|
||||
* is only ever used in an add and replaces both with an fma.
|
||||
*/
|
||||
|
||||
static inline bool
|
||||
are_all_uses_fadd(nir_def *def)
|
||||
{
|
||||
nir_foreach_use_including_if(use_src, def) {
|
||||
if (nir_src_is_if(use_src))
|
||||
return false;
|
||||
|
||||
nir_instr *use_instr = nir_src_parent_instr(use_src);
|
||||
if (use_instr->type != nir_instr_type_alu)
|
||||
return false;
|
||||
|
||||
nir_alu_instr *use_alu = nir_instr_as_alu(use_instr);
|
||||
switch (use_alu->op) {
|
||||
case nir_op_fadd:
|
||||
break; /* This one's ok */
|
||||
|
||||
case nir_op_mov:
|
||||
case nir_op_fneg:
|
||||
case nir_op_fabs:
|
||||
if (!are_all_uses_fadd(&use_alu->def))
|
||||
return false;
|
||||
break;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static nir_alu_instr *
|
||||
get_mul_for_src(nir_alu_src *src, unsigned num_components,
|
||||
uint8_t *swizzle, bool *negate, bool *abs)
|
||||
{
|
||||
uint8_t swizzle_tmp[NIR_MAX_VEC_COMPONENTS];
|
||||
|
||||
nir_instr *instr = src->src.ssa->parent_instr;
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return NULL;
|
||||
|
||||
nir_alu_instr *alu = nir_instr_as_alu(instr);
|
||||
|
||||
/* We want to bail if any of the other ALU operations involved is labeled
|
||||
* exact. One reason for this is that, while the value that is changing is
|
||||
* actually the result of the add and not the multiply, the intention of
|
||||
* the user when they specify an exact multiply is that they want *that*
|
||||
* value and what they don't care about is the add. Another reason is that
|
||||
* SPIR-V explicitly requires this behaviour.
|
||||
*/
|
||||
if (alu->exact)
|
||||
return NULL;
|
||||
|
||||
switch (alu->op) {
|
||||
case nir_op_mov:
|
||||
alu = get_mul_for_src(&alu->src[0], alu->def.num_components,
|
||||
swizzle, negate, abs);
|
||||
break;
|
||||
|
||||
case nir_op_fneg:
|
||||
alu = get_mul_for_src(&alu->src[0], alu->def.num_components,
|
||||
swizzle, negate, abs);
|
||||
*negate = !*negate;
|
||||
break;
|
||||
|
||||
case nir_op_fabs:
|
||||
alu = get_mul_for_src(&alu->src[0], alu->def.num_components,
|
||||
swizzle, negate, abs);
|
||||
*negate = false;
|
||||
*abs = true;
|
||||
break;
|
||||
|
||||
case nir_op_fmul:
|
||||
/* Only absorb a fmul into a ffma if the fmul is only used in fadd
|
||||
* operations. This prevents us from being too aggressive with our
|
||||
* fusing which can actually lead to more instructions.
|
||||
*/
|
||||
if (!are_all_uses_fadd(&alu->def))
|
||||
return NULL;
|
||||
break;
|
||||
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!alu)
|
||||
return NULL;
|
||||
|
||||
/* Copy swizzle data before overwriting it to avoid setting a wrong swizzle.
|
||||
*
|
||||
* Example:
|
||||
* Former swizzle[] = xyzw
|
||||
* src->swizzle[] = zyxx
|
||||
*
|
||||
* Expected output swizzle = zyxx
|
||||
* If we reuse swizzle in the loop, then output swizzle would be zyzz.
|
||||
*/
|
||||
memcpy(swizzle_tmp, swizzle, NIR_MAX_VEC_COMPONENTS*sizeof(uint8_t));
|
||||
for (int i = 0; i < num_components; i++)
|
||||
swizzle[i] = swizzle_tmp[src->swizzle[i]];
|
||||
|
||||
return alu;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a list of (at least two) nir_alu_src's, tells if any of them is a
|
||||
* constant value and is used only once.
|
||||
*/
|
||||
static bool
|
||||
any_alu_src_is_a_constant(nir_alu_src srcs[])
|
||||
{
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if (srcs[i].src.ssa->parent_instr->type == nir_instr_type_load_const) {
|
||||
nir_load_const_instr *load_const =
|
||||
nir_instr_as_load_const (srcs[i].src.ssa->parent_instr);
|
||||
|
||||
if (list_is_singular(&load_const->def.uses))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_nir_opt_peephole_ffma_instr(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
UNUSED void *cb_data)
|
||||
{
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return false;
|
||||
|
||||
nir_alu_instr *add = nir_instr_as_alu(instr);
|
||||
if (add->op != nir_op_fadd)
|
||||
return false;
|
||||
|
||||
if (add->exact)
|
||||
return false;
|
||||
|
||||
|
||||
/* This, is the case a + a. We would rather handle this with an
|
||||
* algebraic reduction than fuse it. Also, we want to only fuse
|
||||
* things where the multiply is used only once and, in this case,
|
||||
* it would be used twice by the same instruction.
|
||||
*/
|
||||
if (add->src[0].src.ssa == add->src[1].src.ssa)
|
||||
return false;
|
||||
|
||||
nir_alu_instr *mul;
|
||||
uint8_t add_mul_src, swizzle[NIR_MAX_VEC_COMPONENTS];
|
||||
bool negate, abs;
|
||||
for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) {
|
||||
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
|
||||
swizzle[i] = i;
|
||||
|
||||
negate = false;
|
||||
abs = false;
|
||||
|
||||
mul = get_mul_for_src(&add->src[add_mul_src],
|
||||
add->def.num_components,
|
||||
swizzle, &negate, &abs);
|
||||
|
||||
if (mul != NULL)
|
||||
break;
|
||||
}
|
||||
|
||||
if (mul == NULL)
|
||||
return false;
|
||||
|
||||
unsigned bit_size = add->def.bit_size;
|
||||
|
||||
nir_def *mul_src[2];
|
||||
mul_src[0] = mul->src[0].src.ssa;
|
||||
mul_src[1] = mul->src[1].src.ssa;
|
||||
|
||||
/* If any of the operands of the fmul and any of the fadd is a constant,
|
||||
* we bypass because it will be more efficient as the constants will be
|
||||
* propagated as operands, potentially saving two load_const instructions.
|
||||
*/
|
||||
if (any_alu_src_is_a_constant(mul->src) &&
|
||||
any_alu_src_is_a_constant(add->src)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
b->cursor = nir_before_instr(&add->instr);
|
||||
|
||||
if (abs) {
|
||||
for (unsigned i = 0; i < 2; i++)
|
||||
mul_src[i] = nir_fabs(b, mul_src[i]);
|
||||
}
|
||||
|
||||
if (negate)
|
||||
mul_src[0] = nir_fneg(b, mul_src[0]);
|
||||
|
||||
nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma);
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
ffma->src[i].src = nir_src_for_ssa(mul_src[i]);
|
||||
for (unsigned j = 0; j < add->def.num_components; j++)
|
||||
ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
|
||||
}
|
||||
nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src]);
|
||||
|
||||
nir_def_init(&ffma->instr, &ffma->def,
|
||||
add->def.num_components, bit_size);
|
||||
nir_def_rewrite_uses(&add->def, &ffma->def);
|
||||
|
||||
nir_builder_instr_insert(b, &ffma->instr);
|
||||
assert(list_is_empty(&add->def.uses));
|
||||
nir_instr_remove(&add->instr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_opt_peephole_ffma(nir_shader *shader)
|
||||
{
|
||||
return nir_shader_instructions_pass(shader, intel_nir_opt_peephole_ffma_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
NULL);
|
||||
}
|
||||
@@ -1,319 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2022 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "intel_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
/**
|
||||
* Implement a peephole pass to convert integer multiplications to imul32x16.
|
||||
*/
|
||||
|
||||
struct pass_data {
|
||||
struct hash_table *range_ht;
|
||||
};
|
||||
|
||||
static void
|
||||
replace_imul_instr(nir_builder *b, nir_alu_instr *imul, unsigned small_val,
|
||||
nir_op new_opcode)
|
||||
{
|
||||
assert(small_val == 0 || small_val == 1);
|
||||
|
||||
b->cursor = nir_before_instr(&imul->instr);
|
||||
|
||||
nir_alu_instr *imul_32x16 = nir_alu_instr_create(b->shader, new_opcode);
|
||||
|
||||
nir_alu_src_copy(&imul_32x16->src[0], &imul->src[1 - small_val]);
|
||||
nir_alu_src_copy(&imul_32x16->src[1], &imul->src[small_val]);
|
||||
|
||||
nir_def_init(&imul_32x16->instr, &imul_32x16->def,
|
||||
imul->def.num_components, 32);
|
||||
|
||||
nir_def_rewrite_uses(&imul->def,
|
||||
&imul_32x16->def);
|
||||
|
||||
nir_builder_instr_insert(b, &imul_32x16->instr);
|
||||
|
||||
nir_instr_remove(&imul->instr);
|
||||
nir_instr_free(&imul->instr);
|
||||
}
|
||||
|
||||
enum root_operation {
|
||||
non_unary = 0,
|
||||
integer_neg = 1 << 0,
|
||||
integer_abs = 1 << 1,
|
||||
integer_neg_abs = integer_neg | integer_abs,
|
||||
invalid_root = 255
|
||||
};
|
||||
|
||||
static enum root_operation
|
||||
signed_integer_range_analysis(nir_shader *shader, struct hash_table *range_ht,
|
||||
nir_scalar scalar, int *lo, int *hi)
|
||||
{
|
||||
if (nir_scalar_is_const(scalar)) {
|
||||
*lo = nir_scalar_as_int(scalar);
|
||||
*hi = *lo;
|
||||
return non_unary;
|
||||
}
|
||||
|
||||
if (nir_scalar_is_alu(scalar)) {
|
||||
switch (nir_scalar_alu_op(scalar)) {
|
||||
case nir_op_iabs:
|
||||
signed_integer_range_analysis(shader, range_ht,
|
||||
nir_scalar_chase_alu_src(scalar, 0),
|
||||
lo, hi);
|
||||
|
||||
if (*lo == INT32_MIN) {
|
||||
*hi = INT32_MAX;
|
||||
} else {
|
||||
const int32_t a = abs(*lo);
|
||||
const int32_t b = abs(*hi);
|
||||
|
||||
*lo = MIN2(a, b);
|
||||
*hi = MAX2(a, b);
|
||||
}
|
||||
|
||||
/* Absolute value wipes out any inner negations, and it is redundant
|
||||
* with any inner absolute values.
|
||||
*/
|
||||
return integer_abs;
|
||||
|
||||
case nir_op_ineg: {
|
||||
const enum root_operation root =
|
||||
signed_integer_range_analysis(shader, range_ht,
|
||||
nir_scalar_chase_alu_src(scalar, 0),
|
||||
lo, hi);
|
||||
|
||||
if (*lo == INT32_MIN) {
|
||||
*hi = INT32_MAX;
|
||||
} else {
|
||||
const int32_t a = -(*lo);
|
||||
const int32_t b = -(*hi);
|
||||
|
||||
*lo = MIN2(a, b);
|
||||
*hi = MAX2(a, b);
|
||||
}
|
||||
|
||||
/* Negation of a negation cancels out, but negation of absolute value
|
||||
* must preserve the integer_abs bit.
|
||||
*/
|
||||
return root ^ integer_neg;
|
||||
}
|
||||
|
||||
case nir_op_imax: {
|
||||
int src0_lo, src0_hi;
|
||||
int src1_lo, src1_hi;
|
||||
|
||||
signed_integer_range_analysis(shader, range_ht,
|
||||
nir_scalar_chase_alu_src(scalar, 0),
|
||||
&src0_lo, &src0_hi);
|
||||
signed_integer_range_analysis(shader, range_ht,
|
||||
nir_scalar_chase_alu_src(scalar, 1),
|
||||
&src1_lo, &src1_hi);
|
||||
|
||||
*lo = MAX2(src0_lo, src1_lo);
|
||||
*hi = MAX2(src0_hi, src1_hi);
|
||||
|
||||
return non_unary;
|
||||
}
|
||||
|
||||
case nir_op_imin: {
|
||||
int src0_lo, src0_hi;
|
||||
int src1_lo, src1_hi;
|
||||
|
||||
signed_integer_range_analysis(shader, range_ht,
|
||||
nir_scalar_chase_alu_src(scalar, 0),
|
||||
&src0_lo, &src0_hi);
|
||||
signed_integer_range_analysis(shader, range_ht,
|
||||
nir_scalar_chase_alu_src(scalar, 1),
|
||||
&src1_lo, &src1_hi);
|
||||
|
||||
*lo = MIN2(src0_lo, src1_lo);
|
||||
*hi = MIN2(src0_hi, src1_hi);
|
||||
|
||||
return non_unary;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Any value with the sign-bit set is problematic. Consider the case when
|
||||
* bound is 0x80000000. As an unsigned value, this means the value must be
|
||||
* in the range [0, 0x80000000]. As a signed value, it means the value must
|
||||
* be in the range [0, INT_MAX] or it must be INT_MIN.
|
||||
*
|
||||
* If bound is -2, it means the value is either in the range [INT_MIN, -2]
|
||||
* or it is in the range [0, INT_MAX].
|
||||
*
|
||||
* This function only returns a single, contiguous range. The union of the
|
||||
* two ranges for any value of bound with the sign-bit set is [INT_MIN,
|
||||
* INT_MAX].
|
||||
*/
|
||||
const int32_t bound = nir_unsigned_upper_bound(shader, range_ht,
|
||||
scalar, NULL);
|
||||
if (bound < 0) {
|
||||
*lo = INT32_MIN;
|
||||
*hi = INT32_MAX;
|
||||
} else {
|
||||
*lo = 0;
|
||||
*hi = bound;
|
||||
}
|
||||
|
||||
return non_unary;
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_nir_opt_peephole_imul32x16_instr(nir_builder *b,
|
||||
nir_instr *instr,
|
||||
void *cb_data)
|
||||
{
|
||||
struct pass_data *d = (struct pass_data *) cb_data;
|
||||
struct hash_table *range_ht = d->range_ht;
|
||||
|
||||
if (instr->type != nir_instr_type_alu)
|
||||
return false;
|
||||
|
||||
nir_alu_instr *imul = nir_instr_as_alu(instr);
|
||||
if (imul->op != nir_op_imul)
|
||||
return false;
|
||||
|
||||
if (imul->def.bit_size != 32)
|
||||
return false;
|
||||
|
||||
nir_op new_opcode = nir_num_opcodes;
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < 2; i++) {
|
||||
if (!nir_src_is_const(imul->src[i].src))
|
||||
continue;
|
||||
|
||||
int64_t lo = INT64_MAX;
|
||||
int64_t hi = INT64_MIN;
|
||||
|
||||
for (unsigned comp = 0; comp < imul->def.num_components; comp++) {
|
||||
int64_t v = nir_src_comp_as_int(imul->src[i].src, comp);
|
||||
|
||||
if (v < lo)
|
||||
lo = v;
|
||||
|
||||
if (v > hi)
|
||||
hi = v;
|
||||
}
|
||||
|
||||
if (lo >= INT16_MIN && hi <= INT16_MAX) {
|
||||
new_opcode = nir_op_imul_32x16;
|
||||
break;
|
||||
} else if (lo >= 0 && hi <= UINT16_MAX) {
|
||||
new_opcode = nir_op_umul_32x16;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (new_opcode != nir_num_opcodes) {
|
||||
replace_imul_instr(b, imul, i, new_opcode);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (imul->def.num_components > 1)
|
||||
return false;
|
||||
|
||||
const nir_scalar imul_scalar = { &imul->def, 0 };
|
||||
int idx = -1;
|
||||
enum root_operation prev_root = invalid_root;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
/* All constants were previously processed. There is nothing more to
|
||||
* learn from a constant here.
|
||||
*/
|
||||
if (imul->src[i].src.ssa->parent_instr->type == nir_instr_type_load_const)
|
||||
continue;
|
||||
|
||||
nir_scalar scalar = nir_scalar_chase_alu_src(imul_scalar, i);
|
||||
int lo = INT32_MIN;
|
||||
int hi = INT32_MAX;
|
||||
|
||||
const enum root_operation root =
|
||||
signed_integer_range_analysis(b->shader, range_ht, scalar, &lo, &hi);
|
||||
|
||||
/* Copy propagation (in the backend) has trouble handling cases like
|
||||
*
|
||||
* mov(8) g60<1>D -g59<8,8,1>D
|
||||
* mul(8) g61<1>D g63<8,8,1>D g60<16,8,2>W
|
||||
*
|
||||
* If g59 had absolute value instead of negation, even improved copy
|
||||
* propagation would not be able to make progress.
|
||||
*
|
||||
* In cases where both sources to the integer multiplication can fit in
|
||||
* 16-bits, choose the source that does not have a source modifier.
|
||||
*/
|
||||
if (root < prev_root) {
|
||||
if (lo >= INT16_MIN && hi <= INT16_MAX) {
|
||||
new_opcode = nir_op_imul_32x16;
|
||||
idx = i;
|
||||
prev_root = root;
|
||||
|
||||
if (root == non_unary)
|
||||
break;
|
||||
} else if (lo >= 0 && hi <= UINT16_MAX) {
|
||||
new_opcode = nir_op_umul_32x16;
|
||||
idx = i;
|
||||
prev_root = root;
|
||||
|
||||
if (root == non_unary)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (new_opcode == nir_num_opcodes) {
|
||||
assert(idx == -1);
|
||||
assert(prev_root == invalid_root);
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(idx != -1);
|
||||
assert(prev_root != invalid_root);
|
||||
|
||||
replace_imul_instr(b, imul, idx, new_opcode);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
intel_nir_opt_peephole_imul32x16(nir_shader *shader)
|
||||
{
|
||||
struct pass_data cb_data;
|
||||
|
||||
cb_data.range_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
|
||||
bool progress = nir_shader_instructions_pass(shader,
|
||||
intel_nir_opt_peephole_imul32x16_instr,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance,
|
||||
&cb_data);
|
||||
|
||||
_mesa_hash_table_destroy(cb_data.range_ht, NULL);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
#include "intel_nir.h"
|
||||
|
||||
/**
|
||||
* Implements the WaPreventHSTessLevelsInterference workaround (for Gfx7-8).
|
||||
*
|
||||
* From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
|
||||
* definition of the patch header layouts):
|
||||
*
|
||||
* "HW Bug: The Tessellation stage will incorrectly add domain points
|
||||
* along patch edges under the following conditions, which may result
|
||||
* in conformance failures and/or cracking artifacts:
|
||||
*
|
||||
* * QUAD domain
|
||||
* * INTEGER partitioning
|
||||
* * All three TessFactors in a given U or V direction (e.g., V
|
||||
* direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
|
||||
* * All three TessFactors in the other direction are > 1.0 and all
|
||||
* round up to the same integer value (e.g, U direction:
|
||||
* VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
|
||||
*
|
||||
* The suggested workaround (to be implemented as part of the postamble
|
||||
* to the HS shader in the HS kernel) is:
|
||||
*
|
||||
* if (
|
||||
* (TF[UEQ0] > 1.0) ||
|
||||
* (TF[VEQ0] > 1.0) ||
|
||||
* (TF[UEQ1] > 1.0) ||
|
||||
* (TF[VEQ1] > 1.0) ||
|
||||
* (TF[INSIDE_U] > 1.0) ||
|
||||
* (TF[INSIDE_V] > 1.0) )
|
||||
* {
|
||||
* TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
|
||||
* TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
|
||||
* }"
|
||||
*
|
||||
* There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes
|
||||
* that the above workaround fails to fix certain GL/ES CTS tests which
|
||||
* have inside tessellation factors of -1.0. This can be explained by
|
||||
* a quote from the ARB_tessellation_shader specification:
|
||||
*
|
||||
* "If "equal_spacing" is used, the floating-point tessellation level is
|
||||
* first clamped to the range [1,<max>], where <max> is implementation-
|
||||
* dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
|
||||
*
|
||||
* In other words, the actual inner tessellation factor used is
|
||||
* clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped
|
||||
* value against 1.0. To accomplish this, we change the comparison from
|
||||
* (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
|
||||
*/
|
||||
|
||||
static inline nir_def *
|
||||
load_output(nir_builder *b, int num_components, int offset, int component)
|
||||
{
|
||||
return nir_load_output(b, num_components, 32, nir_imm_int(b, 0),
|
||||
.base = offset,
|
||||
.component = component);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_quads_workaround(nir_builder *b, nir_block *block)
|
||||
{
|
||||
b->cursor = nir_after_block_before_jump(block);
|
||||
|
||||
nir_def *inner = load_output(b, 2, 0, 2);
|
||||
nir_def *outer = load_output(b, 4, 1, 0);
|
||||
|
||||
nir_def *any_greater_than_1 =
|
||||
nir_ior(b, nir_bany(b, nir_fgt_imm(b, outer, 1.0f)),
|
||||
nir_bany(b, nir_fgt_imm(b, inner, 1.0f)));
|
||||
|
||||
nir_push_if(b, any_greater_than_1);
|
||||
|
||||
inner = nir_bcsel(b, nir_fle_imm(b, inner, 1.0f),
|
||||
nir_imm_float(b, 2.0f), inner);
|
||||
|
||||
nir_store_output(b, inner, nir_imm_int(b, 0),
|
||||
.component = 2,
|
||||
.write_mask = WRITEMASK_XY);
|
||||
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
intel_nir_apply_tcs_quads_workaround(nir_shader *nir)
|
||||
{
|
||||
assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
|
||||
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
|
||||
nir_builder b = nir_builder_create(impl);
|
||||
|
||||
/* emit_quads_workaround() inserts an if statement into each block,
|
||||
* which splits it in two. This changes the set of predecessors of
|
||||
* the end block. We want to process the original set, so to be safe,
|
||||
* save it off to an array first.
|
||||
*/
|
||||
const unsigned num_end_preds = impl->end_block->predecessors->entries;
|
||||
nir_block *end_preds[num_end_preds];
|
||||
unsigned i = 0;
|
||||
|
||||
set_foreach(impl->end_block->predecessors, entry) {
|
||||
end_preds[i++] = (nir_block *) entry->key;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_end_preds; i++) {
|
||||
emit_quads_workaround(&b, end_preds[i]);
|
||||
}
|
||||
|
||||
nir_metadata_preserve(impl, nir_metadata_none);
|
||||
}
|
||||
@@ -1,185 +0,0 @@
|
||||
/*
|
||||
* Copyright 2024 Intel Corporation
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef INTEL_SHADER_ENUMS_H
|
||||
#define INTEL_SHADER_ENUMS_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "compiler/shader_enums.h"
|
||||
#include "util/enum_operators.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum intel_msaa_flags {
|
||||
/** Must be set whenever any dynamic MSAA is used
|
||||
*
|
||||
* This flag mostly exists to let us assert that the driver understands
|
||||
* dynamic MSAA so we don't run into trouble with drivers that don't.
|
||||
*/
|
||||
INTEL_MSAA_FLAG_ENABLE_DYNAMIC = (1 << 0),
|
||||
|
||||
/** True if the framebuffer is multisampled */
|
||||
INTEL_MSAA_FLAG_MULTISAMPLE_FBO = (1 << 1),
|
||||
|
||||
/** True if this shader has been dispatched per-sample */
|
||||
INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2),
|
||||
|
||||
/** True if inputs should be interpolated per-sample by default */
|
||||
INTEL_MSAA_FLAG_PERSAMPLE_INTERP = (1 << 3),
|
||||
|
||||
/** True if this shader has been dispatched with alpha-to-coverage */
|
||||
INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE = (1 << 4),
|
||||
|
||||
/** True if this shader has been dispatched coarse
|
||||
*
|
||||
* This is intentionally chose to be bit 15 to correspond to the coarse bit
|
||||
* in the pixel interpolator messages.
|
||||
*/
|
||||
INTEL_MSAA_FLAG_COARSE_PI_MSG = (1 << 15),
|
||||
|
||||
/** True if this shader has been dispatched coarse
|
||||
*
|
||||
* This is intentionally chose to be bit 18 to correspond to the coarse bit
|
||||
* in the render target messages.
|
||||
*/
|
||||
INTEL_MSAA_FLAG_COARSE_RT_WRITES = (1 << 18),
|
||||
};
|
||||
MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(intel_msaa_flags)
|
||||
|
||||
/**
|
||||
* @defgroup Tessellator parameter enumerations.
|
||||
*
|
||||
* These correspond to the hardware values in 3DSTATE_TE, and are provided
|
||||
* as part of the tessellation evaluation shader.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
enum intel_tess_partitioning {
|
||||
INTEL_TESS_PARTITIONING_INTEGER = 0,
|
||||
INTEL_TESS_PARTITIONING_ODD_FRACTIONAL = 1,
|
||||
INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL = 2,
|
||||
};
|
||||
|
||||
enum intel_tess_output_topology {
|
||||
INTEL_TESS_OUTPUT_TOPOLOGY_POINT = 0,
|
||||
INTEL_TESS_OUTPUT_TOPOLOGY_LINE = 1,
|
||||
INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2,
|
||||
INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3,
|
||||
};
|
||||
|
||||
enum intel_tess_domain {
|
||||
INTEL_TESS_DOMAIN_QUAD = 0,
|
||||
INTEL_TESS_DOMAIN_TRI = 1,
|
||||
INTEL_TESS_DOMAIN_ISOLINE = 2,
|
||||
};
|
||||
/** @} */
|
||||
|
||||
enum intel_shader_dispatch_mode {
|
||||
INTEL_DISPATCH_MODE_4X1_SINGLE = 0,
|
||||
INTEL_DISPATCH_MODE_4X2_DUAL_INSTANCE = 1,
|
||||
INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT = 2,
|
||||
INTEL_DISPATCH_MODE_SIMD8 = 3,
|
||||
|
||||
INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH = 0,
|
||||
INTEL_DISPATCH_MODE_TCS_MULTI_PATCH = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* Data structure recording the relationship between the gl_varying_slot enum
|
||||
* and "slots" within the vertex URB entry (VUE). A "slot" is defined as a
|
||||
* single octaword within the VUE (128 bits).
|
||||
*
|
||||
* Note that each BRW register contains 256 bits (2 octawords), so when
|
||||
* accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two
|
||||
* consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as
|
||||
* in a vertex shader), each register corresponds to a single VUE slot, since
|
||||
* it contains data for two separate vertices.
|
||||
*/
|
||||
struct intel_vue_map {
|
||||
/**
|
||||
* Bitfield representing all varying slots that are (a) stored in this VUE
|
||||
* map, and (b) actually written by the shader. Does not include any of
|
||||
* the additional varying slots defined in brw_varying_slot.
|
||||
*/
|
||||
uint64_t slots_valid;
|
||||
|
||||
/**
|
||||
* Is this VUE map for a separate shader pipeline?
|
||||
*
|
||||
* Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched
|
||||
* without the linker having a chance to dead code eliminate unused varyings.
|
||||
*
|
||||
* This means that we have to use a fixed slot layout, based on the output's
|
||||
* location field, rather than assigning slots in a compact contiguous block.
|
||||
*/
|
||||
bool separate;
|
||||
|
||||
/**
|
||||
* Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are
|
||||
* not stored in a slot (because they are not written, or because
|
||||
* additional processing is applied before storing them in the VUE), the
|
||||
* value is -1.
|
||||
*/
|
||||
signed char varying_to_slot[VARYING_SLOT_TESS_MAX];
|
||||
|
||||
/**
|
||||
* Map from VUE slot to gl_varying_slot value. For slots that do not
|
||||
* directly correspond to a gl_varying_slot, the value comes from
|
||||
* brw_varying_slot.
|
||||
*
|
||||
* For slots that are not in use, the value is BRW_VARYING_SLOT_PAD.
|
||||
*/
|
||||
signed char slot_to_varying[VARYING_SLOT_TESS_MAX];
|
||||
|
||||
/**
|
||||
* Total number of VUE slots in use
|
||||
*/
|
||||
int num_slots;
|
||||
|
||||
/**
|
||||
* Number of position VUE slots. If num_pos_slots > 1, primitive
|
||||
* replication is being used.
|
||||
*/
|
||||
int num_pos_slots;
|
||||
|
||||
/**
|
||||
* Number of per-patch VUE slots. Only valid for tessellation control
|
||||
* shader outputs and tessellation evaluation shader inputs.
|
||||
*/
|
||||
int num_per_patch_slots;
|
||||
|
||||
/**
|
||||
* Number of per-vertex VUE slots. Only valid for tessellation control
|
||||
* shader outputs and tessellation evaluation shader inputs.
|
||||
*/
|
||||
int num_per_vertex_slots;
|
||||
};
|
||||
|
||||
struct intel_cs_dispatch_info {
|
||||
uint32_t group_size;
|
||||
uint32_t simd_size;
|
||||
uint32_t threads;
|
||||
|
||||
/* RightExecutionMask field used in GPGPU_WALKER. */
|
||||
uint32_t right_mask;
|
||||
};
|
||||
|
||||
enum PACKED intel_compute_walk_order {
|
||||
INTEL_WALK_ORDER_XYZ = 0,
|
||||
INTEL_WALK_ORDER_XZY = 1,
|
||||
INTEL_WALK_ORDER_YXZ = 2,
|
||||
INTEL_WALK_ORDER_YZX = 3,
|
||||
INTEL_WALK_ORDER_ZXY = 4,
|
||||
INTEL_WALK_ORDER_ZYX = 5,
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
#endif /* INTEL_SHADER_ENUMS_H */
|
||||
Reference in New Issue
Block a user