From 92beca9aa5337f0a515e2bbebabf753a7c7c9934 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 24 Sep 2025 16:00:42 +0100 Subject: [PATCH] nir/lower_tex: optimize txd(coord, ddx/ddy(coord)) fossil-db (gfx1201): Totals from 73 (0.09% of 79839) affected shaders: MaxWaves: 1668 -> 1670 (+0.12%) Instrs: 352537 -> 347991 (-1.29%); split: -1.29%, +0.00% CodeSize: 1924140 -> 1887660 (-1.90%); split: -1.90%, +0.00% VGPRs: 6360 -> 6324 (-0.57%) Latency: 3891330 -> 3888192 (-0.08%); split: -0.10%, +0.02% InvThroughput: 789998 -> 783583 (-0.81%); split: -0.84%, +0.03% VClause: 6409 -> 6408 (-0.02%); split: -0.06%, +0.05% SClause: 4071 -> 4102 (+0.76%); split: -0.10%, +0.86% Copies: 16756 -> 16316 (-2.63%); split: -2.94%, +0.32% PreVGPRs: 5456 -> 5432 (-0.44%); split: -0.57%, +0.13% VALU: 232982 -> 228117 (-2.09%) SALU: 32853 -> 32848 (-0.02%); split: -0.05%, +0.03% VMEM: 9234 -> 9237 (+0.03%) Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/compiler/nir/nir.h | 5 + src/compiler/nir/nir_lower_tex.c | 105 +++++++++++++++++- .../nir/nir_shader_compiler_options.h | 6 + 3 files changed, 113 insertions(+), 3 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 3dab326bd04..7666f68cad1 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -5843,12 +5843,17 @@ typedef struct nir_lower_tex_options { */ bool lower_index_to_offset; + /* Optimize txd(coord, ddxy_coarse(coord)) to tex(coord). */ + bool optimize_txd; + /** * Payload data to be sent to callback / filter functions. */ void *callback_data; } nir_lower_tex_options; +unsigned nir_tex_parse_txd_coords(nir_shader *shader, nir_tex_instr *tex, nir_instr **ddxy_instrs); + /** Lowers complex texture instructions to simpler ones */ bool nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options); diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c index 407c09758aa..b6b7485b76e 100644 --- a/src/compiler/nir/nir_lower_tex.c +++ b/src/compiler/nir/nir_lower_tex.c @@ -42,6 +42,7 @@ #include "nir_builder.h" #include "nir_builtin_builder.h" #include "nir_format_convert.h" +#include "nir_loop_analyze.h" typedef struct nir_const_value_3_4 { nir_const_value v[3][4]; @@ -1530,16 +1531,105 @@ lower_index_to_offset(nir_builder *b, nir_tex_instr *tex) return progress; } +unsigned +nir_tex_parse_txd_coords(nir_shader *shader, nir_tex_instr *tex, nir_instr **ddxy_instrs) +{ + if (tex->op != nir_texop_txd) + return 0; + + /* Non-uniform texture samples with implicit LOD might require that the resource is quad-uniform. */ + if (tex->texture_non_uniform || tex->sampler_non_uniform) + return 0; + + nir_def *coord = nir_get_tex_src(tex, nir_tex_src_coord); + nir_def *ddxy[] = { nir_get_tex_src(tex, nir_tex_src_ddx), nir_get_tex_src(tex, nir_tex_src_ddy) }; + assert(coord && ddxy[0] && ddxy[0]); + for (unsigned i = 0; i < ddxy[0]->num_components; i++) { + nir_scalar coord_comp = nir_scalar_resolved(coord, i); + for (unsigned j = 0; j < 2; j++) { + nir_scalar ddxy_comp = nir_scalar_resolved(ddxy[j], i); + if (!nir_scalar_is_intrinsic(ddxy_comp)) + return 0; + + nir_intrinsic_op op = nir_scalar_intrinsic_op(ddxy_comp); + bool coarse_default = shader->options->coarse_ddx; + if (j == 0 && (op != nir_intrinsic_ddx || !coarse_default) && + op != nir_intrinsic_ddx_coarse) + return 0; + if (j == 1 && (op != nir_intrinsic_ddy || !coarse_default) && + op != nir_intrinsic_ddy_coarse) + return 0; + + ddxy_instrs[i * 2 + j] = ddxy_comp.def->parent_instr; + + nir_def *def = nir_def_as_intrinsic(ddxy_comp.def)->src[0].ssa; + ddxy_comp = nir_scalar_resolved(def, ddxy_comp.comp); + if (!nir_scalar_equal(coord_comp, ddxy_comp)) + return 0; + } + } + + return ddxy[0]->num_components; +} + +static bool +optimize_txd(nir_shader *shader, nir_tex_instr *tex, unsigned prev_terminate_return) +{ + nir_instr *ddxy_instrs[NIR_MAX_VEC_COMPONENTS * 2]; + unsigned size = nir_tex_parse_txd_coords(shader, tex, ddxy_instrs); + if (!size) + return false; + + for (unsigned i = 0; i < size; i++) { + nir_instr *instr = ddxy_instrs[i]; + if (instr->block->cf_node.parent != tex->instr.block->cf_node.parent) + return false; + + if (prev_terminate_return > instr->index) + return false; + + nir_cf_node *cur = &tex->instr.block->cf_node; + while (cur != &instr->block->cf_node) { + cur = nir_cf_node_prev(cur); + if (contains_other_jump(cur, NULL)) + return false; + } + } + + tex->op = nir_texop_tex; + nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx)); + nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy)); + return true; +} + static bool nir_lower_tex_block(nir_block *block, nir_builder *b, const nir_lower_tex_options *options, - const struct nir_shader_compiler_options *compiler_options) + const struct nir_shader_compiler_options *compiler_options, + unsigned *prev_terminate_return) { bool progress = false; nir_foreach_instr_safe(instr, block) { - if (instr->type != nir_instr_type_tex) + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_terminate: + case nir_intrinsic_terminate_if: + *prev_terminate_return = instr->index; + break; + default: + break; + } continue; + } else if (instr->type == nir_instr_type_jump) { + if (nir_instr_as_jump(instr)->type == nir_jump_halt || + nir_instr_as_jump(instr)->type == nir_jump_return) + *prev_terminate_return = instr->index; + continue; + } else if (instr->type != nir_instr_type_tex) { + continue; + } nir_tex_instr *tex = nir_instr_as_tex(instr); bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim)); @@ -1717,6 +1807,12 @@ nir_lower_tex_block(nir_block *block, nir_builder *b, progress = true; } + /* saturate_src() replaces tex with txd, so skip if sat_mask!=0. */ + if (options->optimize_txd && tex->op == nir_texop_txd && !sat_mask && + nir_shader_supports_implicit_lod(b->shader)) { + progress |= optimize_txd(b->shader, tex, *prev_terminate_return); + } + if (tex->op == nir_texop_txd && (options->lower_txd || (options->lower_txd_clamp && has_min_lod) || @@ -1810,8 +1906,11 @@ nir_lower_tex_impl(nir_function_impl *impl, bool progress = false; nir_builder builder = nir_builder_create(impl); + nir_metadata_require(impl, nir_metadata_instr_index); + + unsigned prev_terminate_return = 0; nir_foreach_block(block, impl) { - progress |= nir_lower_tex_block(block, &builder, options, compiler_options); + progress |= nir_lower_tex_block(block, &builder, options, compiler_options, &prev_terminate_return); } nir_progress(true, impl, nir_metadata_control_flow); diff --git a/src/compiler/nir/nir_shader_compiler_options.h b/src/compiler/nir/nir_shader_compiler_options.h index 04c06729f38..38da0fc3bc0 100644 --- a/src/compiler/nir/nir_shader_compiler_options.h +++ b/src/compiler/nir/nir_shader_compiler_options.h @@ -787,6 +787,12 @@ typedef struct nir_shader_compiler_options { /** Whether derivative intrinsics must be scalarized. */ bool scalarize_ddx; + /** + * Whether unspecified derivative intrinsics are always coarse. + * If this is false, they might be either coarse or fine. + */ + bool coarse_ddx; + /** * Assign a range of driver locations to per-view outputs, with unique * slots for each view. If unset, per-view outputs will be treated