From 8a57012ff487d077a86478bc764697f2f27b8ff3 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Fri, 19 Jan 2024 13:02:48 -0800 Subject: [PATCH] intel/elk: Use common code in intel/compiler Reviewed-by: Ian Romanick Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/elk/brw_eu.c | 2 +- src/intel/compiler/elk/brw_fs.cpp | 2 +- src/intel/compiler/elk/brw_nir.c | 2 +- src/intel/compiler/elk/brw_vec4_tcs.cpp | 2 +- src/intel/compiler/elk/intel_gfx_ver_enum.h | 72 ---- src/intel/compiler/elk/intel_nir.c | 28 -- src/intel/compiler/elk/intel_nir.h | 46 --- .../elk/intel_nir_blockify_uniform_loads.c | 116 ------- .../intel_nir_clamp_image_1d_2d_array_sizes.c | 144 -------- .../elk/intel_nir_clamp_per_vertex_loads.c | 108 ------ .../elk/intel_nir_lower_conversions.c | 115 ------- .../elk/intel_nir_lower_cs_intrinsics.c | 307 ----------------- ..._lower_non_uniform_barycentric_at_sample.c | 80 ----- ...tel_nir_lower_non_uniform_resource_intel.c | 319 ------------------ .../elk/intel_nir_lower_shading_rate_output.c | 108 ------ .../compiler/elk/intel_nir_lower_sparse.c | 247 -------------- .../compiler/elk/intel_nir_lower_texture.c | 133 -------- .../elk/intel_nir_opt_peephole_ffma.c | 253 -------------- .../elk/intel_nir_opt_peephole_imul32x16.c | 319 ------------------ .../compiler/elk/intel_nir_tcs_workarounds.c | 134 -------- src/intel/compiler/elk/intel_shader_enums.h | 185 ---------- 21 files changed, 4 insertions(+), 2718 deletions(-) delete mode 100644 src/intel/compiler/elk/intel_gfx_ver_enum.h delete mode 100644 src/intel/compiler/elk/intel_nir.c delete mode 100644 src/intel/compiler/elk/intel_nir.h delete mode 100644 src/intel/compiler/elk/intel_nir_blockify_uniform_loads.c delete mode 100644 src/intel/compiler/elk/intel_nir_clamp_image_1d_2d_array_sizes.c delete mode 100644 src/intel/compiler/elk/intel_nir_clamp_per_vertex_loads.c delete mode 100644 src/intel/compiler/elk/intel_nir_lower_conversions.c delete mode 100644 src/intel/compiler/elk/intel_nir_lower_cs_intrinsics.c delete mode 100644 src/intel/compiler/elk/intel_nir_lower_non_uniform_barycentric_at_sample.c delete mode 100644 src/intel/compiler/elk/intel_nir_lower_non_uniform_resource_intel.c delete mode 100644 src/intel/compiler/elk/intel_nir_lower_shading_rate_output.c delete mode 100644 src/intel/compiler/elk/intel_nir_lower_sparse.c delete mode 100644 src/intel/compiler/elk/intel_nir_lower_texture.c delete mode 100644 src/intel/compiler/elk/intel_nir_opt_peephole_ffma.c delete mode 100644 src/intel/compiler/elk/intel_nir_opt_peephole_imul32x16.c delete mode 100644 src/intel/compiler/elk/intel_nir_tcs_workarounds.c delete mode 100644 src/intel/compiler/elk/intel_shader_enums.h diff --git a/src/intel/compiler/elk/brw_eu.c b/src/intel/compiler/elk/brw_eu.c index d6b94f3441d..1e7ca88493e 100644 --- a/src/intel/compiler/elk/brw_eu.c +++ b/src/intel/compiler/elk/brw_eu.c @@ -36,7 +36,7 @@ #include "brw_eu_defines.h" #include "brw_eu.h" #include "brw_shader.h" -#include "intel_gfx_ver_enum.h" +#include "../intel_gfx_ver_enum.h" #include "dev/intel_debug.h" #include "util/u_debug.h" diff --git a/src/intel/compiler/elk/brw_fs.cpp b/src/intel/compiler/elk/brw_fs.cpp index fcd81fe03c1..55002057c37 100644 --- a/src/intel/compiler/elk/brw_fs.cpp +++ b/src/intel/compiler/elk/brw_fs.cpp @@ -37,7 +37,7 @@ #include "brw_cfg.h" #include "brw_dead_control_flow.h" #include "brw_private.h" -#include "intel_nir.h" +#include "../intel_nir.h" #include "shader_enums.h" #include "dev/intel_debug.h" #include "dev/intel_wa.h" diff --git a/src/intel/compiler/elk/brw_nir.c b/src/intel/compiler/elk/brw_nir.c index ff2bbbc239a..12a567db616 100644 --- a/src/intel/compiler/elk/brw_nir.c +++ b/src/intel/compiler/elk/brw_nir.c @@ -21,7 +21,7 @@ * IN THE SOFTWARE. */ -#include "intel_nir.h" +#include "../intel_nir.h" #include "brw_nir.h" #include "brw_shader.h" #include "dev/intel_debug.h" diff --git a/src/intel/compiler/elk/brw_vec4_tcs.cpp b/src/intel/compiler/elk/brw_vec4_tcs.cpp index 827bba3c59d..0441996e857 100644 --- a/src/intel/compiler/elk/brw_vec4_tcs.cpp +++ b/src/intel/compiler/elk/brw_vec4_tcs.cpp @@ -27,7 +27,7 @@ * Tessellaton control shader specific code derived from the vec4_visitor class. */ -#include "intel_nir.h" +#include "../intel_nir.h" #include "brw_nir.h" #include "brw_vec4_tcs.h" #include "brw_fs.h" diff --git a/src/intel/compiler/elk/intel_gfx_ver_enum.h b/src/intel/compiler/elk/intel_gfx_ver_enum.h deleted file mode 100644 index 6ca55abe8a1..00000000000 --- a/src/intel/compiler/elk/intel_gfx_ver_enum.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef INTEL_GFX_VER_ENUM_H -#define INTEL_GFX_VER_ENUM_H - -#include "util/macros.h" -#include "dev/intel_device_info.h" - -enum gfx_ver { - GFX4 = (1 << 0), - GFX45 = (1 << 1), - GFX5 = (1 << 2), - GFX6 = (1 << 3), - GFX7 = (1 << 4), - GFX75 = (1 << 5), - GFX8 = (1 << 6), - GFX9 = (1 << 7), - GFX10 = (1 << 8), - GFX11 = (1 << 9), - GFX12 = (1 << 10), - GFX125 = (1 << 11), - XE2 = (1 << 12), - GFX_ALL = ~0 -}; - -#define GFX_LT(ver) ((ver) - 1) -#define GFX_GE(ver) (~GFX_LT(ver)) -#define GFX_LE(ver) (GFX_LT(ver) | (ver)) - -static inline enum gfx_ver -gfx_ver_from_devinfo(const struct intel_device_info *devinfo) -{ - switch (devinfo->verx10) { - case 40: return GFX4; - case 45: return GFX45; - case 50: return GFX5; - case 60: return GFX6; - case 70: return GFX7; - case 75: return GFX75; - case 80: return GFX8; - case 90: return GFX9; - case 110: return GFX11; - case 120: return GFX12; - case 125: return GFX125; - case 200: return XE2; - default: - unreachable("not reached"); - } -} - -#endif diff --git a/src/intel/compiler/elk/intel_nir.c b/src/intel/compiler/elk/intel_nir.c deleted file mode 100644 index de71f56a513..00000000000 --- a/src/intel/compiler/elk/intel_nir.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2014-2023 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#include "intel_nir.h" - -bool -intel_nir_pulls_at_sample(nir_shader *shader) -{ - nir_foreach_function_impl(impl, shader) { - nir_foreach_block(block, impl) { - nir_foreach_instr(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - if (intrin->intrinsic == nir_intrinsic_load_barycentric_at_sample) - return true; - } - } - } - - return false; -} - - diff --git a/src/intel/compiler/elk/intel_nir.h b/src/intel/compiler/elk/intel_nir.h deleted file mode 100644 index 29f83011a94..00000000000 --- a/src/intel/compiler/elk/intel_nir.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2015-2023 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#ifndef INTEL_NIR_H -#define INTEL_NIR_H - -#include "nir.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct intel_device_info; - -void intel_nir_apply_tcs_quads_workaround(nir_shader *nir); -bool intel_nir_blockify_uniform_loads(nir_shader *shader, - const struct intel_device_info *devinfo); -bool intel_nir_clamp_image_1d_2d_array_sizes(nir_shader *shader); -bool intel_nir_clamp_per_vertex_loads(nir_shader *shader); -bool intel_nir_cleanup_resource_intel(nir_shader *shader); - -bool intel_nir_lower_conversions(nir_shader *nir); -bool intel_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir); -bool intel_nir_lower_non_uniform_resource_intel(nir_shader *shader); -bool intel_nir_lower_patch_vertices_in(nir_shader *shader, unsigned input_vertices); -bool intel_nir_lower_shading_rate_output(nir_shader *nir); -bool intel_nir_lower_sparse_intrinsics(nir_shader *nir); - -struct intel_nir_lower_texture_opts { - bool combined_lod_and_array_index; -}; -bool intel_nir_lower_texture(nir_shader *nir, - const struct intel_nir_lower_texture_opts *opts); - -bool intel_nir_opt_peephole_ffma(nir_shader *shader); -bool intel_nir_opt_peephole_imul32x16(nir_shader *shader); - -bool intel_nir_pulls_at_sample(nir_shader *shader); - -#ifdef __cplusplus -} -#endif - -#endif /* INTEL_NIR_H */ diff --git a/src/intel/compiler/elk/intel_nir_blockify_uniform_loads.c b/src/intel/compiler/elk/intel_nir_blockify_uniform_loads.c deleted file mode 100644 index 2ad0a117a34..00000000000 --- a/src/intel/compiler/elk/intel_nir_blockify_uniform_loads.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright © 2018 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "dev/intel_device_info.h" -#include "intel_nir.h" -#include "isl/isl.h" -#include "nir_builder.h" - -static bool -intel_nir_blockify_uniform_loads_instr(nir_builder *b, - nir_instr *instr, - void *cb_data) -{ - if (instr->type != nir_instr_type_intrinsic) - return false; - - const struct intel_device_info *devinfo = cb_data; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ssbo: - /* BDW PRMs, Volume 7: 3D-Media-GPGPU: OWord Block ReadWrite: - * - * "The surface base address must be OWord-aligned." - * - * We can't make that guarantee with SSBOs where the alignment is - * 4bytes. - */ - if (devinfo->ver < 9) - return false; - - if (nir_src_is_divergent(intrin->src[1])) - return false; - - if (intrin->def.bit_size != 32) - return false; - - /* Without the LSC, we can only do block loads of at least 4dwords (1 - * oword). - */ - if (!devinfo->has_lsc && intrin->def.num_components < 4) - return false; - - intrin->intrinsic = - intrin->intrinsic == nir_intrinsic_load_ubo ? - nir_intrinsic_load_ubo_uniform_block_intel : - nir_intrinsic_load_ssbo_uniform_block_intel; - return true; - - case nir_intrinsic_load_shared: - /* Block loads on shared memory are not supported before the LSC. */ - if (!devinfo->has_lsc) - return false; - - if (nir_src_is_divergent(intrin->src[0])) - return false; - - if (intrin->def.bit_size != 32) - return false; - - intrin->intrinsic = nir_intrinsic_load_shared_uniform_block_intel; - return true; - - case nir_intrinsic_load_global_constant: - if (nir_src_is_divergent(intrin->src[0])) - return false; - - if (intrin->def.bit_size != 32) - return false; - - /* Without the LSC, we can only do block loads of at least 4dwords (1 - * oword). - */ - if (!devinfo->has_lsc && intrin->def.num_components < 4) - return false; - - intrin->intrinsic = nir_intrinsic_load_global_constant_uniform_block_intel; - return true; - - default: - return false; - } -} - -bool -intel_nir_blockify_uniform_loads(nir_shader *shader, - const struct intel_device_info *devinfo) -{ - return nir_shader_instructions_pass(shader, - intel_nir_blockify_uniform_loads_instr, - nir_metadata_block_index | - nir_metadata_dominance | - nir_metadata_live_defs, - (void *) devinfo); -} diff --git a/src/intel/compiler/elk/intel_nir_clamp_image_1d_2d_array_sizes.c b/src/intel/compiler/elk/intel_nir_clamp_image_1d_2d_array_sizes.c deleted file mode 100644 index 2f2f907c5d1..00000000000 --- a/src/intel/compiler/elk/intel_nir_clamp_image_1d_2d_array_sizes.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright © 2020 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "compiler/nir/nir_builder.h" -#include "intel_nir.h" - -/** - * Wa_1806565034: - * - * Gfx12+ allows to set RENDER_SURFACE_STATE::SurfaceArray to 1 only if - * array_len > 1. Setting RENDER_SURFACE_STATE::SurfaceArray to 0 results in - * the HW RESINFO message to report an array size of 0 which breaks texture - * array size queries. - * - * This NIR pass works around this by patching the array size with a - * MAX(array_size, 1) for array textures. - */ - -static bool -intel_nir_clamp_image_1d_2d_array_sizes_instr(nir_builder *b, - nir_instr *instr, - UNUSED void *cb_data) -{ - nir_def *image_size = NULL; - - switch (instr->type) { - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - - switch (intr->intrinsic) { - case nir_intrinsic_image_size: - case nir_intrinsic_bindless_image_size: - if (!nir_intrinsic_image_array(intr)) - break; - - image_size = &intr->def; - break; - - case nir_intrinsic_image_deref_size: { - nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); - - assert(glsl_type_is_image(deref->type)); - - if (!glsl_sampler_type_is_array(deref->type)) - break; - - image_size = &intr->def; - break; - } - - default: - break; - } - break; - } - - case nir_instr_type_tex: { - nir_tex_instr *tex_instr = nir_instr_as_tex(instr); - if (tex_instr->op != nir_texop_txs) - break; - - if (!tex_instr->is_array) - break; - - image_size = &tex_instr->def; - break; - } - - default: - break; - } - - if (!image_size) - return false; - - b->cursor = nir_after_instr(instr); - - nir_def *components[4]; - /* OR all the sizes for all components but the last. */ - nir_def *or_components = nir_imm_int(b, 0); - for (int i = 0; i < image_size->num_components; i++) { - if (i == (image_size->num_components - 1)) { - nir_def *null_or_size[2] = { - nir_imm_int(b, 0), - nir_imax(b, nir_channel(b, image_size, i), - nir_imm_int(b, 1)), - }; - nir_def *vec2_null_or_size = nir_vec(b, null_or_size, 2); - - /* Using the ORed sizes select either the element 0 or 1 - * from this vec2. For NULL textures which have a size of - * 0x0x0, we'll select the first element which is 0 and for - * the rest MAX(depth, 1). - */ - components[i] = - nir_vector_extract(b, vec2_null_or_size, - nir_imin(b, or_components, - nir_imm_int(b, 1))); - } else { - components[i] = nir_channel(b, image_size, i); - or_components = nir_ior(b, components[i], or_components); - } - } - nir_def *image_size_replacement = - nir_vec(b, components, image_size->num_components); - - b->cursor = nir_after_instr(instr); - - nir_def_rewrite_uses_after(image_size, - image_size_replacement, - image_size_replacement->parent_instr); - - return true; -} - -bool -intel_nir_clamp_image_1d_2d_array_sizes(nir_shader *shader) -{ - return nir_shader_instructions_pass(shader, - intel_nir_clamp_image_1d_2d_array_sizes_instr, - nir_metadata_block_index | - nir_metadata_dominance, - NULL); -} diff --git a/src/intel/compiler/elk/intel_nir_clamp_per_vertex_loads.c b/src/intel/compiler/elk/intel_nir_clamp_per_vertex_loads.c deleted file mode 100644 index b9fafa82f56..00000000000 --- a/src/intel/compiler/elk/intel_nir_clamp_per_vertex_loads.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2022 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* - * Limit input per vertex input accesses. This is useful for the tesselation stages. - * On Gfx12.5+ out of bound accesses generate hangs. - * - * This pass operates on derefs, it must be called before shader inputs are - * lowered. - */ - -#include "intel_nir.h" -#include "compiler/nir/nir_builder.h" -#include "compiler/nir/nir_deref.h" - -static bool -clamp_per_vertex_loads_instr(nir_builder *b, nir_intrinsic_instr *intrin, - void *cb_data) -{ - if (intrin->intrinsic != nir_intrinsic_load_deref) - return false; - - nir_deref_instr *deref = nir_instr_as_deref(intrin->src[0].ssa->parent_instr); - nir_variable *var = nir_deref_instr_get_variable(deref); - if (var == NULL || (var->data.mode & nir_var_shader_in) == 0) - return false; - - nir_deref_path path; - nir_deref_path_init(&path, deref, cb_data); - - bool progress = false; - for (unsigned i = 0; path.path[i]; i++) { - if (path.path[i]->deref_type != nir_deref_type_array) - continue; - - b->cursor = nir_before_instr(&path.path[i]->instr); - - nir_src_rewrite(&path.path[i]->arr.index, - nir_umin(b, path.path[i]->arr.index.ssa, nir_iadd_imm(b, nir_load_patch_vertices_in(b), -1))); - - progress = true; - break; - } - - nir_deref_path_finish(&path); - - return progress; -} - -bool -intel_nir_clamp_per_vertex_loads(nir_shader *shader) -{ - void *mem_ctx = ralloc_context(NULL); - - bool ret = nir_shader_intrinsics_pass(shader, clamp_per_vertex_loads_instr, - nir_metadata_block_index | - nir_metadata_dominance, - mem_ctx); - - ralloc_free(mem_ctx); - - return ret; -} - -static bool -lower_patch_vertices_instr(nir_builder *b, nir_intrinsic_instr *intrin, - void *cb_data) -{ - if (intrin->intrinsic != nir_intrinsic_load_patch_vertices_in) - return false; - - unsigned *input_vertices = cb_data; - - b->cursor = nir_before_instr(&intrin->instr); - - nir_def_rewrite_uses(&intrin->def, nir_imm_int(b, *input_vertices)); - - return true; -} - -bool -intel_nir_lower_patch_vertices_in(nir_shader *shader, unsigned input_vertices) -{ - return nir_shader_intrinsics_pass(shader, lower_patch_vertices_instr, - nir_metadata_block_index | - nir_metadata_dominance, - &input_vertices); -} diff --git a/src/intel/compiler/elk/intel_nir_lower_conversions.c b/src/intel/compiler/elk/intel_nir_lower_conversions.c deleted file mode 100644 index e0dde853349..00000000000 --- a/src/intel/compiler/elk/intel_nir_lower_conversions.c +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright © 2018 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "intel_nir.h" -#include "compiler/nir/nir_builder.h" - -static void -split_conversion(nir_builder *b, nir_alu_instr *alu, nir_alu_type src_type, - nir_alu_type tmp_type, nir_alu_type dst_type) -{ - b->cursor = nir_before_instr(&alu->instr); - nir_def *src = nir_ssa_for_alu_src(b, alu, 0); - nir_def *tmp = nir_type_convert(b, src, src_type, tmp_type, nir_rounding_mode_undef); - nir_def *res = nir_type_convert(b, tmp, tmp_type, dst_type, nir_rounding_mode_undef); - nir_def_rewrite_uses(&alu->def, res); - nir_instr_remove(&alu->instr); -} - -static bool -lower_alu_instr(nir_builder *b, nir_alu_instr *alu) -{ - unsigned src_bit_size = nir_src_bit_size(alu->src[0].src); - nir_alu_type src_type = nir_op_infos[alu->op].input_types[0]; - nir_alu_type src_full_type = (nir_alu_type) (src_type | src_bit_size); - - unsigned dst_bit_size = alu->def.bit_size; - nir_alu_type dst_full_type = nir_op_infos[alu->op].output_type; - nir_alu_type dst_type = nir_alu_type_get_base_type(dst_full_type); - - /* BDW PRM, vol02, Command Reference Instructions, mov - MOVE: - * - * "There is no direct conversion from HF to DF or DF to HF. - * Use two instructions and F (Float) as an intermediate type. - * - * There is no direct conversion from HF to Q/UQ or Q/UQ to HF. - * Use two instructions and F (Float) or a word integer type - * or a DWord integer type as an intermediate type." - * - * It is important that the intermediate conversion happens through a - * 32-bit float type so we don't lose range when we convert from - * a 64-bit integer. - */ - unsigned int64_types = nir_type_int64 | nir_type_uint64; - if ((src_full_type == nir_type_float16 && (dst_full_type & int64_types)) || - ((src_full_type & int64_types) && dst_full_type == nir_type_float16)) { - split_conversion(b, alu, src_type, nir_type_float | 32, - dst_type | dst_bit_size); - return true; - } - - /* SKL PRM, vol 02a, Command Reference: Instructions, Move: - * - * "There is no direct conversion from B/UB to DF or DF to B/UB. Use - * two instructions and a word or DWord intermediate type." - * - * "There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB. - * Use two instructions and a word or DWord intermediate integer - * type." - * - * It is important that we use a 32-bit integer matching the sign of the - * destination as the intermediate type so we avoid any chance of rtne - * rounding happening before the conversion to integer (which is expected - * to round towards zero) in double to byte conversions. - */ - if ((src_bit_size == 8 && dst_bit_size == 64) || - (src_bit_size == 64 && dst_bit_size == 8)) { - split_conversion(b, alu, src_type, dst_type | 32, dst_type | dst_bit_size); - return true; - } - - return false; -} - -static bool -lower_instr(nir_builder *b, nir_instr *instr, UNUSED void *cb_data) -{ - if (instr->type != nir_instr_type_alu) - return false; - - nir_alu_instr *alu = nir_instr_as_alu(instr); - - if (!nir_op_infos[alu->op].is_conversion) - return false; - - return lower_alu_instr(b, alu); -} - -bool -intel_nir_lower_conversions(nir_shader *shader) -{ - return nir_shader_instructions_pass(shader, lower_instr, - nir_metadata_block_index | - nir_metadata_dominance, - NULL); -} diff --git a/src/intel/compiler/elk/intel_nir_lower_cs_intrinsics.c b/src/intel/compiler/elk/intel_nir_lower_cs_intrinsics.c deleted file mode 100644 index 2ec364a9ebb..00000000000 --- a/src/intel/compiler/elk/intel_nir_lower_cs_intrinsics.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "intel_nir.h" -#include "compiler/nir/nir_builder.h" - -struct lower_intrinsics_state { - nir_shader *nir; - nir_function_impl *impl; - bool progress; - nir_builder builder; -}; - -static void -compute_local_index_id(nir_builder *b, - nir_shader *nir, - nir_def **local_index, - nir_def **local_id) -{ - nir_def *subgroup_id = nir_load_subgroup_id(b); - - nir_def *thread_local_id = - nir_imul(b, subgroup_id, nir_load_simd_width_intel(b)); - nir_def *channel = nir_load_subgroup_invocation(b); - nir_def *linear = nir_iadd(b, channel, thread_local_id); - - nir_def *size_x; - nir_def *size_y; - if (nir->info.workgroup_size_variable) { - nir_def *size_xyz = nir_load_workgroup_size(b); - size_x = nir_channel(b, size_xyz, 0); - size_y = nir_channel(b, size_xyz, 1); - } else { - size_x = nir_imm_int(b, nir->info.workgroup_size[0]); - size_y = nir_imm_int(b, nir->info.workgroup_size[1]); - } - nir_def *size_xy = nir_imul(b, size_x, size_y); - - /* The local invocation index and ID must respect the following - * - * gl_LocalInvocationID.x = - * gl_LocalInvocationIndex % gl_WorkGroupSize.x; - * gl_LocalInvocationID.y = - * (gl_LocalInvocationIndex / gl_WorkGroupSize.x) % - * gl_WorkGroupSize.y; - * gl_LocalInvocationID.z = - * (gl_LocalInvocationIndex / - * (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) % - * gl_WorkGroupSize.z; - * - * However, the final % gl_WorkGroupSize.z does nothing unless we - * accidentally end up with a gl_LocalInvocationIndex that is too - * large so it can safely be omitted. - */ - - nir_def *id_x, *id_y, *id_z; - switch (nir->info.cs.derivative_group) { - case DERIVATIVE_GROUP_NONE: - if (nir->info.num_images == 0 && - nir->info.num_textures == 0) { - /* X-major lid order. Optimal for linear accesses only, - * which are usually buffers. X,Y ordering will look like: - * (0,0) (1,0) (2,0) ... (size_x-1,0) (0,1) (1,1) ... - */ - id_x = nir_umod(b, linear, size_x); - id_y = nir_umod(b, nir_udiv(b, linear, size_x), size_y); - *local_index = linear; - } else if (!nir->info.workgroup_size_variable && - nir->info.workgroup_size[1] % 4 == 0) { - /* 1x4 block X-major lid order. Same as X-major except increments in - * blocks of width=1 height=4. Always optimal for tileY and usually - * optimal for linear accesses. - * x = (linear / 4) % size_x - * y = ((linear % 4) + (linear / 4 / size_x) * 4) % size_y - * X,Y ordering will look like: (0,0) (0,1) (0,2) (0,3) (1,0) (1,1) - * (1,2) (1,3) (2,0) ... (size_x-1,3) (0,4) (0,5) (0,6) (0,7) (1,4) ... - */ - const unsigned height = 4; - nir_def *block = nir_udiv_imm(b, linear, height); - id_x = nir_umod(b, block, size_x); - id_y = nir_umod(b, - nir_iadd(b, - nir_umod_imm(b, linear, height), - nir_imul_imm(b, - nir_udiv(b, block, size_x), - height)), - size_y); - } else { - /* Y-major lid order. Optimal for tileY accesses only, - * which are usually images. X,Y ordering will look like: - * (0,0) (0,1) (0,2) ... (0,size_y-1) (1,0) (1,1) ... - */ - id_y = nir_umod(b, linear, size_y); - id_x = nir_umod(b, nir_udiv(b, linear, size_y), size_x); - } - - id_z = nir_udiv(b, linear, size_xy); - *local_id = nir_vec3(b, id_x, id_y, id_z); - if (!*local_index) { - *local_index = nir_iadd(b, nir_iadd(b, id_x, - nir_imul(b, id_y, size_x)), - nir_imul(b, id_z, size_xy)); - } - break; - case DERIVATIVE_GROUP_LINEAR: - /* For linear, just set the local invocation index linearly, - * and calculate local invocation ID from that. - */ - id_x = nir_umod(b, linear, size_x); - id_y = nir_umod(b, nir_udiv(b, linear, size_x), size_y); - id_z = nir_udiv(b, linear, size_xy); - *local_id = nir_vec3(b, id_x, id_y, id_z); - *local_index = linear; - break; - case DERIVATIVE_GROUP_QUADS: { - /* For quads, first we figure out the 2x2 grid the invocation - * belongs to -- treating extra Z layers as just more rows. - * Then map that into local invocation ID (trivial) and local - * invocation index. Skipping Z simplify index calculation. - */ - - nir_def *one = nir_imm_int(b, 1); - nir_def *double_size_x = nir_ishl(b, size_x, one); - - /* ID within a pair of rows, where each group of 4 is 2x2 quad. */ - nir_def *row_pair_id = nir_umod(b, linear, double_size_x); - nir_def *y_row_pairs = nir_udiv(b, linear, double_size_x); - - nir_def *x = - nir_ior(b, - nir_iand(b, row_pair_id, one), - nir_iand(b, nir_ishr(b, row_pair_id, one), - nir_imm_int(b, 0xfffffffe))); - nir_def *y = - nir_ior(b, - nir_ishl(b, y_row_pairs, one), - nir_iand(b, nir_ishr(b, row_pair_id, one), one)); - - *local_id = nir_vec3(b, x, - nir_umod(b, y, size_y), - nir_udiv(b, y, size_y)); - *local_index = nir_iadd(b, x, nir_imul(b, y, size_x)); - break; - } - default: - unreachable("invalid derivative group"); - } -} - -static bool -lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state, - nir_block *block) -{ - bool progress = false; - nir_builder *b = &state->builder; - nir_shader *nir = state->nir; - - /* Reuse calculated values inside the block. */ - nir_def *local_index = NULL; - nir_def *local_id = NULL; - - nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); - - b->cursor = nir_after_instr(&intrinsic->instr); - - nir_def *sysval; - switch (intrinsic->intrinsic) { - case nir_intrinsic_load_local_invocation_index: - case nir_intrinsic_load_local_invocation_id: { - if (!local_index && !nir->info.workgroup_size_variable) { - const uint16_t *ws = nir->info.workgroup_size; - if (ws[0] * ws[1] * ws[2] == 1) { - nir_def *zero = nir_imm_int(b, 0); - local_index = zero; - local_id = nir_replicate(b, zero, 3); - } - } - - if (!local_index) { - if (nir->info.stage == MESA_SHADER_TASK || - nir->info.stage == MESA_SHADER_MESH) { - /* Will be lowered by nir_emit_task_mesh_intrinsic() using - * information from the payload. - */ - continue; - } - - /* First time we are using those, so let's calculate them. */ - assert(!local_id); - compute_local_index_id(b, nir, &local_index, &local_id); - } - - assert(local_id); - assert(local_index); - if (intrinsic->intrinsic == nir_intrinsic_load_local_invocation_id) - sysval = local_id; - else - sysval = local_index; - break; - } - - case nir_intrinsic_load_num_subgroups: { - nir_def *size; - if (state->nir->info.workgroup_size_variable) { - nir_def *size_xyz = nir_load_workgroup_size(b); - nir_def *size_x = nir_channel(b, size_xyz, 0); - nir_def *size_y = nir_channel(b, size_xyz, 1); - nir_def *size_z = nir_channel(b, size_xyz, 2); - size = nir_imul(b, nir_imul(b, size_x, size_y), size_z); - } else { - size = nir_imm_int(b, nir->info.workgroup_size[0] * - nir->info.workgroup_size[1] * - nir->info.workgroup_size[2]); - } - - /* Calculate the equivalent of DIV_ROUND_UP. */ - nir_def *simd_width = nir_load_simd_width_intel(b); - sysval = - nir_udiv(b, nir_iadd_imm(b, nir_iadd(b, size, simd_width), -1), - simd_width); - break; - } - - default: - continue; - } - - if (intrinsic->def.bit_size == 64) - sysval = nir_u2u64(b, sysval); - - nir_def_rewrite_uses(&intrinsic->def, sysval); - nir_instr_remove(&intrinsic->instr); - - state->progress = true; - } - - return progress; -} - -static void -lower_cs_intrinsics_convert_impl(struct lower_intrinsics_state *state) -{ - state->builder = nir_builder_create(state->impl); - - nir_foreach_block(block, state->impl) { - lower_cs_intrinsics_convert_block(state, block); - } - - nir_metadata_preserve(state->impl, - nir_metadata_block_index | nir_metadata_dominance); -} - -bool -intel_nir_lower_cs_intrinsics(nir_shader *nir) -{ - assert(gl_shader_stage_uses_workgroup(nir->info.stage)); - - struct lower_intrinsics_state state = { - .nir = nir, - }; - - /* Constraints from NV_compute_shader_derivatives. */ - if (gl_shader_stage_is_compute(nir->info.stage) && - !nir->info.workgroup_size_variable) { - if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS) { - assert(nir->info.workgroup_size[0] % 2 == 0); - assert(nir->info.workgroup_size[1] % 2 == 0); - } else if (nir->info.cs.derivative_group == DERIVATIVE_GROUP_LINEAR) { - ASSERTED unsigned workgroup_size = - nir->info.workgroup_size[0] * - nir->info.workgroup_size[1] * - nir->info.workgroup_size[2]; - assert(workgroup_size % 4 == 0); - } - } - - nir_foreach_function_impl(impl, nir) { - state.impl = impl; - lower_cs_intrinsics_convert_impl(&state); - } - - return state.progress; -} diff --git a/src/intel/compiler/elk/intel_nir_lower_non_uniform_barycentric_at_sample.c b/src/intel/compiler/elk/intel_nir_lower_non_uniform_barycentric_at_sample.c deleted file mode 100644 index 49fd1b44add..00000000000 --- a/src/intel/compiler/elk/intel_nir_lower_non_uniform_barycentric_at_sample.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright © 2023 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* - * Lower non uniform at sample messages to the interpolator. - * - * This is pretty much identical to what nir_lower_non_uniform_access() does. - * We do it here because otherwise GCM would undo this optimization. Also we - * can assume divergence analysis here. - */ - -#include "intel_nir.h" -#include "compiler/nir/nir_builder.h" - -static bool -intel_nir_lower_non_uniform_barycentric_at_sample_instr(nir_builder *b, - nir_instr *instr, - void *cb_data) -{ - if (instr->type != nir_instr_type_intrinsic) - return false; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != nir_intrinsic_load_barycentric_at_sample) - return false; - - if (nir_src_is_always_uniform(intrin->src[0]) || - !nir_src_is_divergent(intrin->src[0])) - return false; - - nir_def *sample_id = intrin->src[0].ssa; - - b->cursor = nir_instr_remove(&intrin->instr); - - nir_push_loop(b); - { - nir_def *first_sample_id = nir_read_first_invocation(b, sample_id); - - nir_push_if(b, nir_ieq(b, sample_id, first_sample_id)); - { - nir_builder_instr_insert(b, &intrin->instr); - - nir_src_rewrite(&intrin->src[0], first_sample_id); - - nir_jump(b, nir_jump_break); - } - } - - return true; -} - -bool -intel_nir_lower_non_uniform_barycentric_at_sample(nir_shader *nir) -{ - return nir_shader_instructions_pass( - nir, - intel_nir_lower_non_uniform_barycentric_at_sample_instr, - nir_metadata_none, - NULL); -} diff --git a/src/intel/compiler/elk/intel_nir_lower_non_uniform_resource_intel.c b/src/intel/compiler/elk/intel_nir_lower_non_uniform_resource_intel.c deleted file mode 100644 index 78314897d82..00000000000 --- a/src/intel/compiler/elk/intel_nir_lower_non_uniform_resource_intel.c +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Copyright © 2023 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "compiler/nir/nir_builder.h" -#include "util/u_dynarray.h" - -#include "intel_nir.h" - -static bool -nir_instr_is_resource_intel(nir_instr *instr) -{ - return instr->type == nir_instr_type_intrinsic && - nir_instr_as_intrinsic(instr)->intrinsic == nir_intrinsic_resource_intel; -} - -static bool -add_src_instr(nir_src *src, void *state) -{ - struct util_dynarray *inst_array = state; - util_dynarray_foreach(inst_array, nir_instr *, instr_ptr) { - if (*instr_ptr == src->ssa->parent_instr) - return true; - } - - util_dynarray_append(inst_array, nir_instr *, src->ssa->parent_instr); - - return true; -} - -static nir_intrinsic_instr * -find_resource_intel(struct util_dynarray *inst_array, - nir_def *def) -{ - /* If resouce_intel is already directly in front of the instruction, there - * is nothing to do. - */ - if (nir_instr_is_resource_intel(def->parent_instr)) - return NULL; - - util_dynarray_append(inst_array, nir_instr *, def->parent_instr); - - unsigned idx = 0, scan_index = 0; - while (idx < util_dynarray_num_elements(inst_array, nir_instr *)) { - nir_instr *instr = *util_dynarray_element(inst_array, nir_instr *, idx++); - - for (; scan_index < util_dynarray_num_elements(inst_array, nir_instr *); scan_index++) { - nir_instr *scan_instr = *util_dynarray_element(inst_array, nir_instr *, scan_index); - if (nir_instr_is_resource_intel(scan_instr)) - return nir_instr_as_intrinsic(scan_instr); - } - - nir_foreach_src(instr, add_src_instr, inst_array); - } - - return NULL; -} - -static bool -intel_nir_lower_non_uniform_intrinsic(nir_builder *b, - nir_intrinsic_instr *intrin, - struct util_dynarray *inst_array) -{ - unsigned source; - switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ssbo: - case nir_intrinsic_get_ssbo_size: - case nir_intrinsic_ssbo_atomic: - case nir_intrinsic_ssbo_atomic_swap: - case nir_intrinsic_load_ssbo_block_intel: - case nir_intrinsic_store_ssbo_block_intel: - case nir_intrinsic_load_ubo_uniform_block_intel: - case nir_intrinsic_load_ssbo_uniform_block_intel: - case nir_intrinsic_image_load_raw_intel: - case nir_intrinsic_image_store_raw_intel: - case nir_intrinsic_image_load: - case nir_intrinsic_image_store: - case nir_intrinsic_image_atomic: - case nir_intrinsic_image_atomic_swap: - case nir_intrinsic_bindless_image_load: - case nir_intrinsic_bindless_image_store: - case nir_intrinsic_bindless_image_atomic: - case nir_intrinsic_bindless_image_atomic_swap: - case nir_intrinsic_image_size: - case nir_intrinsic_bindless_image_size: - source = 0; - break; - - case nir_intrinsic_store_ssbo: - source = 1; - break; - - default: - return false; - } - - b->cursor = nir_before_instr(&intrin->instr); - - util_dynarray_clear(inst_array); - - nir_intrinsic_instr *old_resource_intel = - find_resource_intel(inst_array, intrin->src[source].ssa); - if (old_resource_intel == NULL) - return false; - - nir_instr *new_instr = - nir_instr_clone(b->shader, &old_resource_intel->instr); - - nir_instr_insert(b->cursor, new_instr); - - nir_intrinsic_instr *new_resource_intel = - nir_instr_as_intrinsic(new_instr); - - nir_src_rewrite(&new_resource_intel->src[1], intrin->src[source].ssa); - nir_src_rewrite(&intrin->src[source], &new_resource_intel->def); - - return true; -} - -static bool -intel_nir_lower_non_uniform_tex(nir_builder *b, - nir_tex_instr *tex, - struct util_dynarray *inst_array) -{ - b->cursor = nir_before_instr(&tex->instr); - - bool progress = false; - for (unsigned s = 0; s < tex->num_srcs; s++) { - if (tex->src[s].src_type != nir_tex_src_texture_handle && - tex->src[s].src_type != nir_tex_src_sampler_handle) - continue; - - util_dynarray_clear(inst_array); - - nir_intrinsic_instr *old_resource_intel = - find_resource_intel(inst_array, tex->src[s].src.ssa); - if (old_resource_intel == NULL) - continue; - - nir_instr *new_instr = - nir_instr_clone(b->shader, &old_resource_intel->instr); - - nir_instr_insert(b->cursor, new_instr); - - nir_intrinsic_instr *new_resource_intel = - nir_instr_as_intrinsic(new_instr); - - nir_src_rewrite(&new_resource_intel->src[1], tex->src[s].src.ssa); - nir_src_rewrite(&tex->src[s].src, &new_resource_intel->def); - - progress = true; - } - - return progress; -} - -static bool -intel_nir_lower_non_uniform_instr(nir_builder *b, - nir_instr *instr, - void *cb_data) -{ - struct util_dynarray *inst_array = cb_data; - - switch (instr->type) { - case nir_instr_type_intrinsic: - return intel_nir_lower_non_uniform_intrinsic(b, - nir_instr_as_intrinsic(instr), - inst_array); - - case nir_instr_type_tex: - return intel_nir_lower_non_uniform_tex(b, - nir_instr_as_tex(instr), - inst_array); - - default: - return false; - } -} - -/** This pass rematerializes resource_intel intrinsics closer to their use. - * - * For example will turn this : - * ssa_1 = iadd ... - * ssa_2 = resource_intel ..., ssa_1, ... - * ssa_3 = read_first_invocation ssa_2 - * ssa_4 = load_ssbo ssa_3, ... - * - * into this : - * ssa_1 = iadd ... - * ssa_3 = read_first_invocation ssa_1 - * ssa_5 = resource_intel ..., ssa_3, ... - * ssa_4 = load_ssbo ssa_5, ... - * - * The goal is to have the resource_intel immediately before its use so that - * the backend compiler can know how the load_ssbo should be compiled (binding - * table or bindless access, etc...). - */ -bool -intel_nir_lower_non_uniform_resource_intel(nir_shader *shader) -{ - void *mem_ctx = ralloc_context(NULL); - - struct util_dynarray inst_array; - util_dynarray_init(&inst_array, mem_ctx); - - bool ret = nir_shader_instructions_pass(shader, - intel_nir_lower_non_uniform_instr, - nir_metadata_block_index | - nir_metadata_dominance, - &inst_array); - - ralloc_free(mem_ctx); - - return ret; -} - -static bool -skip_resource_intel_cleanup(nir_instr *instr) -{ - switch (instr->type) { - case nir_instr_type_tex: - return true; - - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *intrin = - nir_instr_as_intrinsic(instr); - switch (intrin->intrinsic) { - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ssbo: - case nir_intrinsic_store_ssbo: - case nir_intrinsic_get_ssbo_size: - case nir_intrinsic_ssbo_atomic: - case nir_intrinsic_ssbo_atomic_swap: - case nir_intrinsic_load_ssbo_block_intel: - case nir_intrinsic_store_ssbo_block_intel: - case nir_intrinsic_load_ssbo_uniform_block_intel: - case nir_intrinsic_image_load_raw_intel: - case nir_intrinsic_image_store_raw_intel: - case nir_intrinsic_image_load: - case nir_intrinsic_image_store: - case nir_intrinsic_image_atomic: - case nir_intrinsic_image_atomic_swap: - case nir_intrinsic_bindless_image_load: - case nir_intrinsic_bindless_image_store: - case nir_intrinsic_bindless_image_atomic: - case nir_intrinsic_bindless_image_atomic_swap: - case nir_intrinsic_image_size: - case nir_intrinsic_bindless_image_size: - return true; - - default: - return false; - } - } - - default: - return false; - } -} - -static bool -intel_nir_cleanup_resource_intel_instr(nir_builder *b, - nir_intrinsic_instr *intrin, - void *cb_data) -{ - if (intrin->intrinsic != nir_intrinsic_resource_intel) - return false; - - bool progress = false; - nir_foreach_use_safe(src, &intrin->def) { - if (!nir_src_is_if(src) && skip_resource_intel_cleanup(nir_src_parent_instr(src))) - continue; - - progress = true; - nir_src_rewrite(src, intrin->src[1].ssa); - } - - return progress; -} - -/** This pass removes unnecessary resource_intel intrinsics - * - * This pass must not be run before intel_nir_lower_non_uniform_resource_intel. - */ -bool -intel_nir_cleanup_resource_intel(nir_shader *shader) -{ - void *mem_ctx = ralloc_context(NULL); - - bool ret = nir_shader_intrinsics_pass(shader, - intel_nir_cleanup_resource_intel_instr, - nir_metadata_block_index | - nir_metadata_dominance, - NULL); - - ralloc_free(mem_ctx); - - return ret; -} diff --git a/src/intel/compiler/elk/intel_nir_lower_shading_rate_output.c b/src/intel/compiler/elk/intel_nir_lower_shading_rate_output.c deleted file mode 100644 index 18c89f8bea0..00000000000 --- a/src/intel/compiler/elk/intel_nir_lower_shading_rate_output.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2021 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* - * Lower the shading rate output from the bit field format described in the - * SPIRV spec : - * - * bit | name | description - * 0 | Vertical2Pixels | Fragment invocation covers 2 pixels vertically - * 1 | Vertical4Pixels | Fragment invocation covers 4 pixels vertically - * 2 | Horizontal2Pixels | Fragment invocation covers 2 pixels horizontally - * 3 | Horizontal4Pixels | Fragment invocation covers 4 pixels horizontally - * - * into a single dword composed of 2 fp16 to be stored in the dword 0 of the - * VUE header. - * - * When no horizontal/vertical bits are set, the size in pixel size in that - * dimension is assumed to be 1. - * - * According to the specification, the shading rate output can be read & - * written. A read after a write should report a different value if the - * implementation decides on different primitive shading rate for some reason. - * This is never the case in our implementation. - */ - -#include "intel_nir.h" -#include "compiler/nir/nir_builder.h" - -static bool -lower_shading_rate_output_instr(nir_builder *b, nir_intrinsic_instr *intrin, - UNUSED void *_state) -{ - nir_intrinsic_op op = intrin->intrinsic; - - if (op != nir_intrinsic_load_output && - op != nir_intrinsic_store_output && - op != nir_intrinsic_load_per_primitive_output && - op != nir_intrinsic_store_per_primitive_output) - return false; - - struct nir_io_semantics io = nir_intrinsic_io_semantics(intrin); - if (io.location != VARYING_SLOT_PRIMITIVE_SHADING_RATE) - return false; - - bool is_store = op == nir_intrinsic_store_output || - op == nir_intrinsic_store_per_primitive_output; - - b->cursor = is_store ? nir_before_instr(&intrin->instr) : nir_after_instr(&intrin->instr); - - if (is_store) { - nir_def *bit_field = intrin->src[0].ssa; - nir_def *fp16_x = - nir_i2f16(b, - nir_ishl(b, nir_imm_int(b, 1), - nir_ishr_imm(b, bit_field, 2))); - nir_def *fp16_y = - nir_i2f16(b, - nir_ishl(b, nir_imm_int(b, 1), - nir_iand_imm(b, bit_field, 0x3))); - nir_def *packed_fp16_xy = nir_pack_32_2x16_split(b, fp16_x, fp16_y); - - nir_src_rewrite(&intrin->src[0], packed_fp16_xy); - } else { - nir_def *packed_fp16_xy = &intrin->def; - - nir_def *u32_x = - nir_i2i32(b, nir_unpack_32_2x16_split_x(b, packed_fp16_xy)); - nir_def *u32_y = - nir_i2i32(b, nir_unpack_32_2x16_split_y(b, packed_fp16_xy)); - - nir_def *bit_field = - nir_ior(b, nir_ishl_imm(b, nir_ushr_imm(b, u32_x, 1), 2), - nir_ushr_imm(b, u32_y, 1)); - - nir_def_rewrite_uses_after(packed_fp16_xy, bit_field, - bit_field->parent_instr); - } - - return true; -} - -bool -intel_nir_lower_shading_rate_output(nir_shader *nir) -{ - return nir_shader_intrinsics_pass(nir, lower_shading_rate_output_instr, - nir_metadata_block_index | - nir_metadata_dominance, NULL); -} diff --git a/src/intel/compiler/elk/intel_nir_lower_sparse.c b/src/intel/compiler/elk/intel_nir_lower_sparse.c deleted file mode 100644 index f7625c2dd0d..00000000000 --- a/src/intel/compiler/elk/intel_nir_lower_sparse.c +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (c) 2023 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "intel_nir.h" -#include "compiler/nir/nir_builder.h" - -/* - * This pass lowers a few of the sparse instructions to something HW can - * handle. - * - * The image_*_sparse_load intrinsics are lowered into 2 instructions, a - * regular image_*_load intrinsic and a sparse texture txf operation and - * reconstructs the sparse vector of the original intrinsic using the 2 new - * values. We need to do this because our backend implements image load/store - * using the dataport and the dataport unit doesn't provide residency - * information. We need to use the sampler for residency. - * - * The is_sparse_texels_resident intrinsic is lowered to a bit checking - * operation as the data reported by the sampler is a single bit per lane in - * the first component. - * - * The tex_* instructions with a compare value need to be lower into 2 - * instructions due to a HW limitation : - * - * SKL PRMs, Volume 7: 3D-Media-GPGPU, Messages, SIMD Payloads : - * - * "The Pixel Null Mask field, when enabled via the Pixel Null Mask Enable - * will be incorect for sample_c when applied to a surface with 64-bit per - * texel format such as R16G16BA16_UNORM. Pixel Null mask Enable may - * incorrectly report pixels as referencing a Null surface." - */ - -static void -lower_is_sparse_texels_resident(nir_builder *b, nir_intrinsic_instr *intrin) -{ - b->cursor = nir_instr_remove(&intrin->instr); - - nir_def_rewrite_uses( - &intrin->def, - nir_i2b(b, nir_iand(b, intrin->src[0].ssa, - nir_ishl(b, nir_imm_int(b, 1), - nir_load_subgroup_invocation(b))))); -} - -static void -lower_sparse_residency_code_and(nir_builder *b, nir_intrinsic_instr *intrin) -{ - b->cursor = nir_instr_remove(&intrin->instr); - - nir_def_rewrite_uses( - &intrin->def, - nir_iand(b, intrin->src[0].ssa, intrin->src[1].ssa)); -} - -static void -lower_sparse_image_load(nir_builder *b, nir_intrinsic_instr *intrin) -{ - b->cursor = nir_instr_remove(&intrin->instr); - - nir_def *img_load; - nir_intrinsic_instr *new_intrin; - if (intrin->intrinsic == nir_intrinsic_image_sparse_load) { - img_load = nir_image_load(b, - intrin->num_components - 1, - intrin->def.bit_size, - intrin->src[0].ssa, - intrin->src[1].ssa, - intrin->src[2].ssa, - intrin->src[3].ssa); - new_intrin = nir_instr_as_intrinsic(img_load->parent_instr); - nir_intrinsic_set_range_base(new_intrin, nir_intrinsic_range_base(intrin)); - } else { - img_load = nir_bindless_image_load(b, - intrin->num_components - 1, - intrin->def.bit_size, - intrin->src[0].ssa, - intrin->src[1].ssa, - intrin->src[2].ssa, - intrin->src[3].ssa); - new_intrin = nir_instr_as_intrinsic(img_load->parent_instr); - } - - nir_intrinsic_set_image_array(new_intrin, nir_intrinsic_image_array(intrin)); - nir_intrinsic_set_image_dim(new_intrin, nir_intrinsic_image_dim(intrin)); - nir_intrinsic_set_format(new_intrin, nir_intrinsic_format(intrin)); - nir_intrinsic_set_access(new_intrin, nir_intrinsic_access(intrin)); - nir_intrinsic_set_dest_type(new_intrin, nir_intrinsic_dest_type(intrin)); - - nir_def *dests[NIR_MAX_VEC_COMPONENTS]; - for (unsigned i = 0; i < intrin->num_components - 1; i++) { - dests[i] = nir_channel(b, img_load, i); - } - - /* Use texture instruction to compute residency */ - nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3); - - tex->op = nir_texop_txf; - /* We don't care about the dest type since we're not using any of that - * data. - */ - tex->dest_type = nir_type_float32; - tex->is_array = nir_intrinsic_image_array(intrin); - tex->is_shadow = false; - tex->sampler_index = 0; - tex->is_sparse = true; - - tex->src[0].src_type = intrin->intrinsic == nir_intrinsic_image_sparse_load ? - nir_tex_src_texture_offset : - nir_tex_src_texture_handle; - tex->src[0].src = nir_src_for_ssa(intrin->src[0].ssa); - - tex->coord_components = nir_image_intrinsic_coord_components(intrin); - nir_def *coord; - if (nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE && - nir_intrinsic_image_array(intrin)) { - tex->coord_components++; - - nir_def *img_layer = nir_channel(b, intrin->src[1].ssa, 2); - nir_def *tex_slice = nir_idiv(b, img_layer, nir_imm_int(b, 6)); - nir_def *tex_face = - nir_iadd(b, img_layer, nir_ineg(b, nir_imul_imm(b, tex_slice, 6))); - nir_def *comps[4] = { - nir_channel(b, intrin->src[1].ssa, 0), - nir_channel(b, intrin->src[1].ssa, 1), - tex_face, - tex_slice - }; - coord = nir_vec(b, comps, 4); - } else { - coord = nir_channels(b, intrin->src[1].ssa, - nir_component_mask(tex->coord_components)); - } - tex->src[1].src_type = nir_tex_src_coord; - tex->src[1].src = nir_src_for_ssa(coord); - - tex->src[2].src_type = nir_tex_src_lod; - tex->src[2].src = nir_src_for_ssa(nir_imm_int(b, 0)); - - nir_def_init(&tex->instr, &tex->def, 5, - intrin->def.bit_size); - - nir_builder_instr_insert(b, &tex->instr); - - dests[intrin->num_components - 1] = nir_channel(b, &tex->def, 4); - - nir_def_rewrite_uses( - &intrin->def, - nir_vec(b, dests, intrin->num_components)); -} - -static void -lower_tex_compare(nir_builder *b, nir_tex_instr *tex, int compare_idx) -{ - b->cursor = nir_after_instr(&tex->instr); - - /* Clone the original instruction */ - nir_tex_instr *sparse_tex = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr)); - nir_def_init(&sparse_tex->instr, &sparse_tex->def, - tex->def.num_components, tex->def.bit_size); - nir_builder_instr_insert(b, &sparse_tex->instr); - - /* Drop the compare source on the cloned instruction */ - nir_tex_instr_remove_src(sparse_tex, compare_idx); - - /* Drop the residency query on the original tex instruction */ - tex->is_sparse = false; - tex->def.num_components = tex->def.num_components - 1; - - nir_def *new_comps[NIR_MAX_VEC_COMPONENTS]; - for (unsigned i = 0; i < tex->def.num_components; i++) - new_comps[i] = nir_channel(b, &tex->def, i); - new_comps[tex->def.num_components] = - nir_channel(b, &sparse_tex->def, tex->def.num_components); - - nir_def *new_vec = nir_vec(b, new_comps, sparse_tex->def.num_components); - - nir_def_rewrite_uses_after(&tex->def, new_vec, new_vec->parent_instr); -} - -static bool -lower_sparse_intrinsics(nir_builder *b, nir_instr *instr, void *cb_data) -{ - switch (instr->type) { - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - switch (intrin->intrinsic) { - case nir_intrinsic_image_sparse_load: - case nir_intrinsic_bindless_image_sparse_load: - lower_sparse_image_load(b, intrin); - return true; - - case nir_intrinsic_is_sparse_texels_resident: - lower_is_sparse_texels_resident(b, intrin); - return true; - - case nir_intrinsic_sparse_residency_code_and: - lower_sparse_residency_code_and(b, intrin); - return true; - - default: - return false; - } - } - - case nir_instr_type_tex: { - nir_tex_instr *tex = nir_instr_as_tex(instr); - int comp_idx = nir_tex_instr_src_index(tex, nir_tex_src_comparator); - if (comp_idx != -1 && tex->is_sparse) { - lower_tex_compare(b, tex, comp_idx); - return true; - } - return false; - } - - default: - return false; - } -} - -bool -intel_nir_lower_sparse_intrinsics(nir_shader *nir) -{ - return nir_shader_instructions_pass(nir, lower_sparse_intrinsics, - nir_metadata_block_index | - nir_metadata_dominance, - NULL); -} diff --git a/src/intel/compiler/elk/intel_nir_lower_texture.c b/src/intel/compiler/elk/intel_nir_lower_texture.c deleted file mode 100644 index d1b34022024..00000000000 --- a/src/intel/compiler/elk/intel_nir_lower_texture.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright © 2024 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "compiler/nir/nir_builder.h" -#include "intel_nir.h" - -/** - * Pack either the explicit LOD or LOD bias and the array index together. - */ -static bool -pack_lod_and_array_index(nir_builder *b, nir_tex_instr *tex) -{ - /* If 32-bit texture coordinates are used, pack either the explicit LOD or - * LOD bias and the array index into a single (32-bit) value. - */ - int lod_index = nir_tex_instr_src_index(tex, nir_tex_src_lod); - if (lod_index < 0) { - lod_index = nir_tex_instr_src_index(tex, nir_tex_src_bias); - - /* The explicit LOD or LOD bias may not be found if this lowering has - * already occured. The explicit LOD may also not be found in some - * cases where it is zero. - */ - if (lod_index < 0) - return false; - } - - assert(nir_tex_instr_src_type(tex, lod_index) == nir_type_float); - - /* Also do not perform this packing if the explicit LOD is zero. */ - if (tex->op == nir_texop_txl && - nir_src_is_const(tex->src[lod_index].src) && - nir_src_as_float(tex->src[lod_index].src) == 0.0) { - return false; - } - - const int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); - assert(coord_index >= 0); - - nir_def *lod = tex->src[lod_index].src.ssa; - nir_def *coord = tex->src[coord_index].src.ssa; - - assert(nir_tex_instr_src_type(tex, coord_index) == nir_type_float); - - if (coord->bit_size < 32) - return false; - - b->cursor = nir_before_instr(&tex->instr); - - /* First, combine the two values. The packing format is a little weird. - * The explicit LOD / LOD bias is stored as float, as normal. However, the - * array index is converted to an integer and smashed into the low 9 bits. - */ - const unsigned array_index = tex->coord_components - 1; - - nir_def *clamped_ai = - nir_umin(b, - nir_f2u32(b, nir_fround_even(b, nir_channel(b, coord, - array_index))), - nir_imm_int(b, 511)); - - nir_def *lod_ai = nir_ior(b, nir_iand_imm(b, lod, 0xfffffe00), clamped_ai); - - /* Second, replace the coordinate with a new value that has one fewer - * component (i.e., drop the array index). - */ - nir_def *reduced_coord = nir_trim_vector(b, coord, 2); - tex->coord_components--; - - /* Finally, remove the old sources and add the new. */ - nir_src_rewrite(&tex->src[coord_index].src, reduced_coord); - - nir_tex_instr_remove_src(tex, lod_index); - nir_tex_instr_add_src(tex, nir_tex_src_backend1, lod_ai); - - return true; -} - -static bool -intel_nir_lower_texture_instr(nir_builder *b, nir_instr *instr, void *cb_data) -{ - if (instr->type != nir_instr_type_tex) - return false; - - const struct intel_nir_lower_texture_opts *opts = cb_data; - nir_tex_instr *tex = nir_instr_as_tex(instr); - - switch (tex->op) { - case nir_texop_txl: - case nir_texop_txb: - if (tex->is_array && - tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && - opts->combined_lod_and_array_index) { - return pack_lod_and_array_index(b, tex); - } - return false; - default: - /* Nothing to do */ - return false; - } - - return false; -} - -bool -intel_nir_lower_texture(nir_shader *shader, - const struct intel_nir_lower_texture_opts *opts) -{ - return nir_shader_instructions_pass(shader, - intel_nir_lower_texture_instr, - nir_metadata_none, - (void *)opts); -} diff --git a/src/intel/compiler/elk/intel_nir_opt_peephole_ffma.c b/src/intel/compiler/elk/intel_nir_opt_peephole_ffma.c deleted file mode 100644 index 6b19f7eb65c..00000000000 --- a/src/intel/compiler/elk/intel_nir_opt_peephole_ffma.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "intel_nir.h" -#include "compiler/nir/nir_builder.h" - -/* - * Implements a small peephole optimization that looks for a multiply that - * is only ever used in an add and replaces both with an fma. - */ - -static inline bool -are_all_uses_fadd(nir_def *def) -{ - nir_foreach_use_including_if(use_src, def) { - if (nir_src_is_if(use_src)) - return false; - - nir_instr *use_instr = nir_src_parent_instr(use_src); - if (use_instr->type != nir_instr_type_alu) - return false; - - nir_alu_instr *use_alu = nir_instr_as_alu(use_instr); - switch (use_alu->op) { - case nir_op_fadd: - break; /* This one's ok */ - - case nir_op_mov: - case nir_op_fneg: - case nir_op_fabs: - if (!are_all_uses_fadd(&use_alu->def)) - return false; - break; - - default: - return false; - } - } - - return true; -} - -static nir_alu_instr * -get_mul_for_src(nir_alu_src *src, unsigned num_components, - uint8_t *swizzle, bool *negate, bool *abs) -{ - uint8_t swizzle_tmp[NIR_MAX_VEC_COMPONENTS]; - - nir_instr *instr = src->src.ssa->parent_instr; - if (instr->type != nir_instr_type_alu) - return NULL; - - nir_alu_instr *alu = nir_instr_as_alu(instr); - - /* We want to bail if any of the other ALU operations involved is labeled - * exact. One reason for this is that, while the value that is changing is - * actually the result of the add and not the multiply, the intention of - * the user when they specify an exact multiply is that they want *that* - * value and what they don't care about is the add. Another reason is that - * SPIR-V explicitly requires this behaviour. - */ - if (alu->exact) - return NULL; - - switch (alu->op) { - case nir_op_mov: - alu = get_mul_for_src(&alu->src[0], alu->def.num_components, - swizzle, negate, abs); - break; - - case nir_op_fneg: - alu = get_mul_for_src(&alu->src[0], alu->def.num_components, - swizzle, negate, abs); - *negate = !*negate; - break; - - case nir_op_fabs: - alu = get_mul_for_src(&alu->src[0], alu->def.num_components, - swizzle, negate, abs); - *negate = false; - *abs = true; - break; - - case nir_op_fmul: - /* Only absorb a fmul into a ffma if the fmul is only used in fadd - * operations. This prevents us from being too aggressive with our - * fusing which can actually lead to more instructions. - */ - if (!are_all_uses_fadd(&alu->def)) - return NULL; - break; - - default: - return NULL; - } - - if (!alu) - return NULL; - - /* Copy swizzle data before overwriting it to avoid setting a wrong swizzle. - * - * Example: - * Former swizzle[] = xyzw - * src->swizzle[] = zyxx - * - * Expected output swizzle = zyxx - * If we reuse swizzle in the loop, then output swizzle would be zyzz. - */ - memcpy(swizzle_tmp, swizzle, NIR_MAX_VEC_COMPONENTS*sizeof(uint8_t)); - for (int i = 0; i < num_components; i++) - swizzle[i] = swizzle_tmp[src->swizzle[i]]; - - return alu; -} - -/** - * Given a list of (at least two) nir_alu_src's, tells if any of them is a - * constant value and is used only once. - */ -static bool -any_alu_src_is_a_constant(nir_alu_src srcs[]) -{ - for (unsigned i = 0; i < 2; i++) { - if (srcs[i].src.ssa->parent_instr->type == nir_instr_type_load_const) { - nir_load_const_instr *load_const = - nir_instr_as_load_const (srcs[i].src.ssa->parent_instr); - - if (list_is_singular(&load_const->def.uses)) - return true; - } - } - - return false; -} - -static bool -intel_nir_opt_peephole_ffma_instr(nir_builder *b, - nir_instr *instr, - UNUSED void *cb_data) -{ - if (instr->type != nir_instr_type_alu) - return false; - - nir_alu_instr *add = nir_instr_as_alu(instr); - if (add->op != nir_op_fadd) - return false; - - if (add->exact) - return false; - - - /* This, is the case a + a. We would rather handle this with an - * algebraic reduction than fuse it. Also, we want to only fuse - * things where the multiply is used only once and, in this case, - * it would be used twice by the same instruction. - */ - if (add->src[0].src.ssa == add->src[1].src.ssa) - return false; - - nir_alu_instr *mul; - uint8_t add_mul_src, swizzle[NIR_MAX_VEC_COMPONENTS]; - bool negate, abs; - for (add_mul_src = 0; add_mul_src < 2; add_mul_src++) { - for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) - swizzle[i] = i; - - negate = false; - abs = false; - - mul = get_mul_for_src(&add->src[add_mul_src], - add->def.num_components, - swizzle, &negate, &abs); - - if (mul != NULL) - break; - } - - if (mul == NULL) - return false; - - unsigned bit_size = add->def.bit_size; - - nir_def *mul_src[2]; - mul_src[0] = mul->src[0].src.ssa; - mul_src[1] = mul->src[1].src.ssa; - - /* If any of the operands of the fmul and any of the fadd is a constant, - * we bypass because it will be more efficient as the constants will be - * propagated as operands, potentially saving two load_const instructions. - */ - if (any_alu_src_is_a_constant(mul->src) && - any_alu_src_is_a_constant(add->src)) { - return false; - } - - b->cursor = nir_before_instr(&add->instr); - - if (abs) { - for (unsigned i = 0; i < 2; i++) - mul_src[i] = nir_fabs(b, mul_src[i]); - } - - if (negate) - mul_src[0] = nir_fneg(b, mul_src[0]); - - nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma); - - for (unsigned i = 0; i < 2; i++) { - ffma->src[i].src = nir_src_for_ssa(mul_src[i]); - for (unsigned j = 0; j < add->def.num_components; j++) - ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]]; - } - nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src]); - - nir_def_init(&ffma->instr, &ffma->def, - add->def.num_components, bit_size); - nir_def_rewrite_uses(&add->def, &ffma->def); - - nir_builder_instr_insert(b, &ffma->instr); - assert(list_is_empty(&add->def.uses)); - nir_instr_remove(&add->instr); - - return true; -} - -bool -intel_nir_opt_peephole_ffma(nir_shader *shader) -{ - return nir_shader_instructions_pass(shader, intel_nir_opt_peephole_ffma_instr, - nir_metadata_block_index | - nir_metadata_dominance, - NULL); -} diff --git a/src/intel/compiler/elk/intel_nir_opt_peephole_imul32x16.c b/src/intel/compiler/elk/intel_nir_opt_peephole_imul32x16.c deleted file mode 100644 index c42cc5a8b14..00000000000 --- a/src/intel/compiler/elk/intel_nir_opt_peephole_imul32x16.c +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Copyright © 2022 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "intel_nir.h" -#include "compiler/nir/nir_builder.h" - -/** - * Implement a peephole pass to convert integer multiplications to imul32x16. - */ - -struct pass_data { - struct hash_table *range_ht; -}; - -static void -replace_imul_instr(nir_builder *b, nir_alu_instr *imul, unsigned small_val, - nir_op new_opcode) -{ - assert(small_val == 0 || small_val == 1); - - b->cursor = nir_before_instr(&imul->instr); - - nir_alu_instr *imul_32x16 = nir_alu_instr_create(b->shader, new_opcode); - - nir_alu_src_copy(&imul_32x16->src[0], &imul->src[1 - small_val]); - nir_alu_src_copy(&imul_32x16->src[1], &imul->src[small_val]); - - nir_def_init(&imul_32x16->instr, &imul_32x16->def, - imul->def.num_components, 32); - - nir_def_rewrite_uses(&imul->def, - &imul_32x16->def); - - nir_builder_instr_insert(b, &imul_32x16->instr); - - nir_instr_remove(&imul->instr); - nir_instr_free(&imul->instr); -} - -enum root_operation { - non_unary = 0, - integer_neg = 1 << 0, - integer_abs = 1 << 1, - integer_neg_abs = integer_neg | integer_abs, - invalid_root = 255 -}; - -static enum root_operation -signed_integer_range_analysis(nir_shader *shader, struct hash_table *range_ht, - nir_scalar scalar, int *lo, int *hi) -{ - if (nir_scalar_is_const(scalar)) { - *lo = nir_scalar_as_int(scalar); - *hi = *lo; - return non_unary; - } - - if (nir_scalar_is_alu(scalar)) { - switch (nir_scalar_alu_op(scalar)) { - case nir_op_iabs: - signed_integer_range_analysis(shader, range_ht, - nir_scalar_chase_alu_src(scalar, 0), - lo, hi); - - if (*lo == INT32_MIN) { - *hi = INT32_MAX; - } else { - const int32_t a = abs(*lo); - const int32_t b = abs(*hi); - - *lo = MIN2(a, b); - *hi = MAX2(a, b); - } - - /* Absolute value wipes out any inner negations, and it is redundant - * with any inner absolute values. - */ - return integer_abs; - - case nir_op_ineg: { - const enum root_operation root = - signed_integer_range_analysis(shader, range_ht, - nir_scalar_chase_alu_src(scalar, 0), - lo, hi); - - if (*lo == INT32_MIN) { - *hi = INT32_MAX; - } else { - const int32_t a = -(*lo); - const int32_t b = -(*hi); - - *lo = MIN2(a, b); - *hi = MAX2(a, b); - } - - /* Negation of a negation cancels out, but negation of absolute value - * must preserve the integer_abs bit. - */ - return root ^ integer_neg; - } - - case nir_op_imax: { - int src0_lo, src0_hi; - int src1_lo, src1_hi; - - signed_integer_range_analysis(shader, range_ht, - nir_scalar_chase_alu_src(scalar, 0), - &src0_lo, &src0_hi); - signed_integer_range_analysis(shader, range_ht, - nir_scalar_chase_alu_src(scalar, 1), - &src1_lo, &src1_hi); - - *lo = MAX2(src0_lo, src1_lo); - *hi = MAX2(src0_hi, src1_hi); - - return non_unary; - } - - case nir_op_imin: { - int src0_lo, src0_hi; - int src1_lo, src1_hi; - - signed_integer_range_analysis(shader, range_ht, - nir_scalar_chase_alu_src(scalar, 0), - &src0_lo, &src0_hi); - signed_integer_range_analysis(shader, range_ht, - nir_scalar_chase_alu_src(scalar, 1), - &src1_lo, &src1_hi); - - *lo = MIN2(src0_lo, src1_lo); - *hi = MIN2(src0_hi, src1_hi); - - return non_unary; - } - - default: - break; - } - } - - /* Any value with the sign-bit set is problematic. Consider the case when - * bound is 0x80000000. As an unsigned value, this means the value must be - * in the range [0, 0x80000000]. As a signed value, it means the value must - * be in the range [0, INT_MAX] or it must be INT_MIN. - * - * If bound is -2, it means the value is either in the range [INT_MIN, -2] - * or it is in the range [0, INT_MAX]. - * - * This function only returns a single, contiguous range. The union of the - * two ranges for any value of bound with the sign-bit set is [INT_MIN, - * INT_MAX]. - */ - const int32_t bound = nir_unsigned_upper_bound(shader, range_ht, - scalar, NULL); - if (bound < 0) { - *lo = INT32_MIN; - *hi = INT32_MAX; - } else { - *lo = 0; - *hi = bound; - } - - return non_unary; -} - -static bool -intel_nir_opt_peephole_imul32x16_instr(nir_builder *b, - nir_instr *instr, - void *cb_data) -{ - struct pass_data *d = (struct pass_data *) cb_data; - struct hash_table *range_ht = d->range_ht; - - if (instr->type != nir_instr_type_alu) - return false; - - nir_alu_instr *imul = nir_instr_as_alu(instr); - if (imul->op != nir_op_imul) - return false; - - if (imul->def.bit_size != 32) - return false; - - nir_op new_opcode = nir_num_opcodes; - - unsigned i; - for (i = 0; i < 2; i++) { - if (!nir_src_is_const(imul->src[i].src)) - continue; - - int64_t lo = INT64_MAX; - int64_t hi = INT64_MIN; - - for (unsigned comp = 0; comp < imul->def.num_components; comp++) { - int64_t v = nir_src_comp_as_int(imul->src[i].src, comp); - - if (v < lo) - lo = v; - - if (v > hi) - hi = v; - } - - if (lo >= INT16_MIN && hi <= INT16_MAX) { - new_opcode = nir_op_imul_32x16; - break; - } else if (lo >= 0 && hi <= UINT16_MAX) { - new_opcode = nir_op_umul_32x16; - break; - } - } - - if (new_opcode != nir_num_opcodes) { - replace_imul_instr(b, imul, i, new_opcode); - return true; - } - - if (imul->def.num_components > 1) - return false; - - const nir_scalar imul_scalar = { &imul->def, 0 }; - int idx = -1; - enum root_operation prev_root = invalid_root; - - for (i = 0; i < 2; i++) { - /* All constants were previously processed. There is nothing more to - * learn from a constant here. - */ - if (imul->src[i].src.ssa->parent_instr->type == nir_instr_type_load_const) - continue; - - nir_scalar scalar = nir_scalar_chase_alu_src(imul_scalar, i); - int lo = INT32_MIN; - int hi = INT32_MAX; - - const enum root_operation root = - signed_integer_range_analysis(b->shader, range_ht, scalar, &lo, &hi); - - /* Copy propagation (in the backend) has trouble handling cases like - * - * mov(8) g60<1>D -g59<8,8,1>D - * mul(8) g61<1>D g63<8,8,1>D g60<16,8,2>W - * - * If g59 had absolute value instead of negation, even improved copy - * propagation would not be able to make progress. - * - * In cases where both sources to the integer multiplication can fit in - * 16-bits, choose the source that does not have a source modifier. - */ - if (root < prev_root) { - if (lo >= INT16_MIN && hi <= INT16_MAX) { - new_opcode = nir_op_imul_32x16; - idx = i; - prev_root = root; - - if (root == non_unary) - break; - } else if (lo >= 0 && hi <= UINT16_MAX) { - new_opcode = nir_op_umul_32x16; - idx = i; - prev_root = root; - - if (root == non_unary) - break; - } - } - } - - if (new_opcode == nir_num_opcodes) { - assert(idx == -1); - assert(prev_root == invalid_root); - return false; - } - - assert(idx != -1); - assert(prev_root != invalid_root); - - replace_imul_instr(b, imul, idx, new_opcode); - return true; -} - -bool -intel_nir_opt_peephole_imul32x16(nir_shader *shader) -{ - struct pass_data cb_data; - - cb_data.range_ht = _mesa_pointer_hash_table_create(NULL); - - bool progress = nir_shader_instructions_pass(shader, - intel_nir_opt_peephole_imul32x16_instr, - nir_metadata_block_index | - nir_metadata_dominance, - &cb_data); - - _mesa_hash_table_destroy(cb_data.range_ht, NULL); - - return progress; -} - diff --git a/src/intel/compiler/elk/intel_nir_tcs_workarounds.c b/src/intel/compiler/elk/intel_nir_tcs_workarounds.c deleted file mode 100644 index 269259ff312..00000000000 --- a/src/intel/compiler/elk/intel_nir_tcs_workarounds.c +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "compiler/nir/nir_builder.h" -#include "intel_nir.h" - -/** - * Implements the WaPreventHSTessLevelsInterference workaround (for Gfx7-8). - * - * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the - * definition of the patch header layouts): - * - * "HW Bug: The Tessellation stage will incorrectly add domain points - * along patch edges under the following conditions, which may result - * in conformance failures and/or cracking artifacts: - * - * * QUAD domain - * * INTEGER partitioning - * * All three TessFactors in a given U or V direction (e.g., V - * direction: UEQ0, InsideV, UEQ1) are all exactly 1.0 - * * All three TessFactors in the other direction are > 1.0 and all - * round up to the same integer value (e.g, U direction: - * VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4) - * - * The suggested workaround (to be implemented as part of the postamble - * to the HS shader in the HS kernel) is: - * - * if ( - * (TF[UEQ0] > 1.0) || - * (TF[VEQ0] > 1.0) || - * (TF[UEQ1] > 1.0) || - * (TF[VEQ1] > 1.0) || - * (TF[INSIDE_U] > 1.0) || - * (TF[INSIDE_V] > 1.0) ) - * { - * TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U]; - * TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V]; - * }" - * - * There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes - * that the above workaround fails to fix certain GL/ES CTS tests which - * have inside tessellation factors of -1.0. This can be explained by - * a quote from the ARB_tessellation_shader specification: - * - * "If "equal_spacing" is used, the floating-point tessellation level is - * first clamped to the range [1,], where is implementation- - * dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)." - * - * In other words, the actual inner tessellation factor used is - * clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped - * value against 1.0. To accomplish this, we change the comparison from - * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0). - */ - -static inline nir_def * -load_output(nir_builder *b, int num_components, int offset, int component) -{ - return nir_load_output(b, num_components, 32, nir_imm_int(b, 0), - .base = offset, - .component = component); -} - -static void -emit_quads_workaround(nir_builder *b, nir_block *block) -{ - b->cursor = nir_after_block_before_jump(block); - - nir_def *inner = load_output(b, 2, 0, 2); - nir_def *outer = load_output(b, 4, 1, 0); - - nir_def *any_greater_than_1 = - nir_ior(b, nir_bany(b, nir_fgt_imm(b, outer, 1.0f)), - nir_bany(b, nir_fgt_imm(b, inner, 1.0f))); - - nir_push_if(b, any_greater_than_1); - - inner = nir_bcsel(b, nir_fle_imm(b, inner, 1.0f), - nir_imm_float(b, 2.0f), inner); - - nir_store_output(b, inner, nir_imm_int(b, 0), - .component = 2, - .write_mask = WRITEMASK_XY); - - nir_pop_if(b, NULL); -} - -void -intel_nir_apply_tcs_quads_workaround(nir_shader *nir) -{ - assert(nir->info.stage == MESA_SHADER_TESS_CTRL); - - nir_function_impl *impl = nir_shader_get_entrypoint(nir); - - nir_builder b = nir_builder_create(impl); - - /* emit_quads_workaround() inserts an if statement into each block, - * which splits it in two. This changes the set of predecessors of - * the end block. We want to process the original set, so to be safe, - * save it off to an array first. - */ - const unsigned num_end_preds = impl->end_block->predecessors->entries; - nir_block *end_preds[num_end_preds]; - unsigned i = 0; - - set_foreach(impl->end_block->predecessors, entry) { - end_preds[i++] = (nir_block *) entry->key; - } - - for (i = 0; i < num_end_preds; i++) { - emit_quads_workaround(&b, end_preds[i]); - } - - nir_metadata_preserve(impl, nir_metadata_none); -} diff --git a/src/intel/compiler/elk/intel_shader_enums.h b/src/intel/compiler/elk/intel_shader_enums.h deleted file mode 100644 index 98ea9f25b0d..00000000000 --- a/src/intel/compiler/elk/intel_shader_enums.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright 2024 Intel Corporation - * SPDX-License-Identifier: MIT - */ - -#ifndef INTEL_SHADER_ENUMS_H -#define INTEL_SHADER_ENUMS_H - -#include - -#include "compiler/shader_enums.h" -#include "util/enum_operators.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum intel_msaa_flags { - /** Must be set whenever any dynamic MSAA is used - * - * This flag mostly exists to let us assert that the driver understands - * dynamic MSAA so we don't run into trouble with drivers that don't. - */ - INTEL_MSAA_FLAG_ENABLE_DYNAMIC = (1 << 0), - - /** True if the framebuffer is multisampled */ - INTEL_MSAA_FLAG_MULTISAMPLE_FBO = (1 << 1), - - /** True if this shader has been dispatched per-sample */ - INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH = (1 << 2), - - /** True if inputs should be interpolated per-sample by default */ - INTEL_MSAA_FLAG_PERSAMPLE_INTERP = (1 << 3), - - /** True if this shader has been dispatched with alpha-to-coverage */ - INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE = (1 << 4), - - /** True if this shader has been dispatched coarse - * - * This is intentionally chose to be bit 15 to correspond to the coarse bit - * in the pixel interpolator messages. - */ - INTEL_MSAA_FLAG_COARSE_PI_MSG = (1 << 15), - - /** True if this shader has been dispatched coarse - * - * This is intentionally chose to be bit 18 to correspond to the coarse bit - * in the render target messages. - */ - INTEL_MSAA_FLAG_COARSE_RT_WRITES = (1 << 18), -}; -MESA_DEFINE_CPP_ENUM_BITFIELD_OPERATORS(intel_msaa_flags) - -/** - * @defgroup Tessellator parameter enumerations. - * - * These correspond to the hardware values in 3DSTATE_TE, and are provided - * as part of the tessellation evaluation shader. - * - * @{ - */ -enum intel_tess_partitioning { - INTEL_TESS_PARTITIONING_INTEGER = 0, - INTEL_TESS_PARTITIONING_ODD_FRACTIONAL = 1, - INTEL_TESS_PARTITIONING_EVEN_FRACTIONAL = 2, -}; - -enum intel_tess_output_topology { - INTEL_TESS_OUTPUT_TOPOLOGY_POINT = 0, - INTEL_TESS_OUTPUT_TOPOLOGY_LINE = 1, - INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW = 2, - INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW = 3, -}; - -enum intel_tess_domain { - INTEL_TESS_DOMAIN_QUAD = 0, - INTEL_TESS_DOMAIN_TRI = 1, - INTEL_TESS_DOMAIN_ISOLINE = 2, -}; -/** @} */ - -enum intel_shader_dispatch_mode { - INTEL_DISPATCH_MODE_4X1_SINGLE = 0, - INTEL_DISPATCH_MODE_4X2_DUAL_INSTANCE = 1, - INTEL_DISPATCH_MODE_4X2_DUAL_OBJECT = 2, - INTEL_DISPATCH_MODE_SIMD8 = 3, - - INTEL_DISPATCH_MODE_TCS_SINGLE_PATCH = 0, - INTEL_DISPATCH_MODE_TCS_MULTI_PATCH = 2, -}; - -/** - * Data structure recording the relationship between the gl_varying_slot enum - * and "slots" within the vertex URB entry (VUE). A "slot" is defined as a - * single octaword within the VUE (128 bits). - * - * Note that each BRW register contains 256 bits (2 octawords), so when - * accessing the VUE in URB_NOSWIZZLE mode, each register corresponds to two - * consecutive VUE slots. When accessing the VUE in URB_INTERLEAVED mode (as - * in a vertex shader), each register corresponds to a single VUE slot, since - * it contains data for two separate vertices. - */ -struct intel_vue_map { - /** - * Bitfield representing all varying slots that are (a) stored in this VUE - * map, and (b) actually written by the shader. Does not include any of - * the additional varying slots defined in brw_varying_slot. - */ - uint64_t slots_valid; - - /** - * Is this VUE map for a separate shader pipeline? - * - * Separable programs (GL_ARB_separate_shader_objects) can be mixed and matched - * without the linker having a chance to dead code eliminate unused varyings. - * - * This means that we have to use a fixed slot layout, based on the output's - * location field, rather than assigning slots in a compact contiguous block. - */ - bool separate; - - /** - * Map from gl_varying_slot value to VUE slot. For gl_varying_slots that are - * not stored in a slot (because they are not written, or because - * additional processing is applied before storing them in the VUE), the - * value is -1. - */ - signed char varying_to_slot[VARYING_SLOT_TESS_MAX]; - - /** - * Map from VUE slot to gl_varying_slot value. For slots that do not - * directly correspond to a gl_varying_slot, the value comes from - * brw_varying_slot. - * - * For slots that are not in use, the value is BRW_VARYING_SLOT_PAD. - */ - signed char slot_to_varying[VARYING_SLOT_TESS_MAX]; - - /** - * Total number of VUE slots in use - */ - int num_slots; - - /** - * Number of position VUE slots. If num_pos_slots > 1, primitive - * replication is being used. - */ - int num_pos_slots; - - /** - * Number of per-patch VUE slots. Only valid for tessellation control - * shader outputs and tessellation evaluation shader inputs. - */ - int num_per_patch_slots; - - /** - * Number of per-vertex VUE slots. Only valid for tessellation control - * shader outputs and tessellation evaluation shader inputs. - */ - int num_per_vertex_slots; -}; - -struct intel_cs_dispatch_info { - uint32_t group_size; - uint32_t simd_size; - uint32_t threads; - - /* RightExecutionMask field used in GPGPU_WALKER. */ - uint32_t right_mask; -}; - -enum PACKED intel_compute_walk_order { - INTEL_WALK_ORDER_XYZ = 0, - INTEL_WALK_ORDER_XZY = 1, - INTEL_WALK_ORDER_YXZ = 2, - INTEL_WALK_ORDER_YZX = 3, - INTEL_WALK_ORDER_ZXY = 4, - INTEL_WALK_ORDER_ZYX = 5, -}; - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* INTEL_SHADER_ENUMS_H */