From f7939f2fdc200259c8af3380854ac16f7360b28d Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Mon, 16 Jun 2025 11:38:53 -0700
Subject: [PATCH] nir/range_analysis: Handle bfi and bitfield_select in
 get_alu_uub

I noticed some things related to this while implementing support for
bitfield_select / BFN in BRW.

shader-db:

Lunar Lake
total instructions in shared programs: 17183140 -> 17183128 (<.01%)
instructions in affected programs: 3830 -> 3818 (-0.31%)
helped: 6 / HURT: 0

total cycles in shared programs: 889936934 -> 889936056 (<.01%)
cycles in affected programs: 253758 -> 252880 (-0.35%)
helped: 4 / HURT: 2

No shader-db changes on any other Intel platform.

fossil-db:

Lunar Lake
Totals:
Instrs: 233285343 -> 233284796 (-0.00%); split: -0.00%, +0.00%
Cycle count: 32756777978 -> 32756399804 (-0.00%); split: -0.00%, +0.00%
Max live registers: 71738646 -> 71738626 (-0.00%)
Non SSA regs after NIR: 67837900 -> 67837902 (+0.00%)

Totals from 177 (0.02% of 790723) affected shaders:
Instrs: 389849 -> 389302 (-0.14%); split: -0.14%, +0.00%
Cycle count: 356341872 -> 355963698 (-0.11%); split: -0.11%, +0.01%
Max live registers: 39364 -> 39344 (-0.05%)
Non SSA regs after NIR: 70453 -> 70455 (+0.00%)

Meteor Lake, DG2, and Ice Lake had similar results. (Meteor Lake shown)
Totals:
Instrs: 264095611 -> 264095358 (-0.00%)
Cycle count: 26555705299 -> 26554303407 (-0.01%); split: -0.01%, +0.00%
Fill count: 613233 -> 613231 (-0.00%)

Totals from 123 (0.01% of 905547) affected shaders:
Instrs: 334830 -> 334577 (-0.08%)
Cycle count: 326531667 -> 325129775 (-0.43%); split: -0.65%, +0.22%
Fill count: 4145 -> 4143 (-0.05%)

Tiger Lake and Skylake had similar results. (Tiger Lake shown)
Totals:
Instrs: 269733849 -> 269733590 (-0.00%)
Cycle count: 25240548036 -> 25241435039 (+0.00%); split: -0.00%, +0.01%

Totals from 123 (0.01% of 903812) affected shaders:
Instrs: 338617 -> 338358 (-0.08%)
Cycle count: 326605644 -> 327492647 (+0.27%); split: -0.13%, +0.40%

Reviewed-by: Matt Turner <mattst88@gmail.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37186>
---
 src/compiler/nir/nir_range_analysis.c | 53 +++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c
index 0d65ecab8cf..c7690a0a942 100644
--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -1837,7 +1837,9 @@ get_alu_uub(struct analysis_state *state, struct scalar_query q, uint32_t *resul
    case nir_op_bcsel:
    case nir_op_b32csel:
    case nir_op_ubfe:
+   case nir_op_bfi:
    case nir_op_bfm:
+   case nir_op_bitfield_select:
    case nir_op_extract_u8:
    case nir_op_extract_i8:
    case nir_op_extract_u16:
@@ -1999,6 +2001,57 @@ get_alu_uub(struct analysis_state *state, struct scalar_query q, uint32_t *resul
       }
       break;
    }
+
+   case nir_op_bfi: {
+      nir_scalar src0_scalar = nir_scalar_chase_alu_src(q.scalar, 0);
+      const uint64_t s1 = bitmask(util_last_bit64(src[1]));
+      const uint64_t s2 = bitmask(util_last_bit64(src[2]));
+
+      if (nir_scalar_is_const(src0_scalar)) {
+         const uint64_t s0 = nir_scalar_as_uint(src0_scalar);
+
+         /* This case should be eliminated by opt_algebraic. */
+         if (s0 == 0) {
+            *result = s2;
+         } else {
+            const int x = ffsll(s0) - 1;
+            *result = (s0 & (s1 << x)) | (~s0 & s2);
+         }
+      } else {
+         const uint64_t s0 = bitmask(util_last_bit64(src[0]));
+
+         /* Due to the unpredictable shift, the true maximum value of (s0 &
+          * (s1 << x)) cannot be known. However, it cannot be larger than
+          * s0.
+          *
+          * inot doesn't work in get_alu_uub. It is known that (~s0 & s2)
+          * cannot be larger than s2, so just use s2 as a loose upper bound.
+          */
+         *result = s0 | s2;
+      }
+      break;
+   }
+
+   case nir_op_bitfield_select: {
+      nir_scalar src0_scalar = nir_scalar_chase_alu_src(q.scalar, 0);
+      const uint64_t s1 = bitmask(util_last_bit64(src[1]));
+      const uint64_t s2 = bitmask(util_last_bit64(src[2]));
+
+      if (nir_scalar_is_const(src0_scalar)) {
+         const uint64_t s0 = nir_scalar_as_uint(src0_scalar);
+
+         *result = (s0 & s1) | (~s0 & s2);
+      } else {
+         const uint64_t s0 = bitmask(util_last_bit64(src[0]));
+
+         /* inot doesn't work in get_alu_uub. It is known that (~s0 & s2)
+          * cannot be larger than s2, so just use s2 as a loose upper bound.
+          */
+         *result = (s0 & s1) | s2;
+      }
+      break;
+   }
+
    /* limited floating-point support for f2u32(fmul(load_input(), <constant>)) */
    case nir_op_f2i32:
    case nir_op_f2u32: