From a48a044cf6eb80fec923c38f5687d29f7f01e254 Mon Sep 17 00:00:00 2001
From: Ian Romanick <ian.d.romanick@intel.com>
Date: Fri, 17 Jan 2020 15:16:10 -0800
Subject: [PATCH] nir/algebraic: Simplify equality comparisons of b2T with 1 or
 0

Adding the b2i(a) == 1 and b2i(a) != 1 patterns also helps prevent
regressions when spurious negations are removed from integer equality
comparisons, as is done in !33498.

v2: Make all variables part of the iteration instead of calculating some
of them. Suggested by Alyssa.

shader-db:

All Intel platforms had similar results. (Lunar Lake shown)
total instructions in shared programs: 16973331 -> 16973309 (<.01%)
instructions in affected programs: 266 -> 244 (-8.27%)
helped: 2 / HURT: 0

total cycles in shared programs: 915620774 -> 915620550 (<.01%)
cycles in affected programs: 4360 -> 4136 (-5.14%)
helped: 2 / HURT: 0

fossil-db:

All Intel platforms had similar results. (Lunar Lake shown)
Totals:
Instrs: 209748011 -> 209748003 (-0.00%)
Cycle count: 30514920286 -> 30514920400 (+0.00%); split: -0.00%, +0.00%
Non SSA regs after NIR: 237334726 -> 237334710 (-0.00%)

Totals from 8 (0.00% of 706651) affected shaders:
Instrs: 16956 -> 16948 (-0.05%)
Cycle count: 261052 -> 261166 (+0.04%); split: -0.92%, +0.96%
Non SSA regs after NIR: 20000 -> 19984 (-0.08%)

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33648>
---
 src/compiler/nir/nir_opt_algebraic.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index ffb4a2e84be..c409e44fe73 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -784,12 +784,8 @@ optimizations.extend([
    (('ieq', ('iadd', a, b), a), ('ieq', b, 0)),
    (('ine', ('iadd', a, b), a), ('ine', b, 0)),
 
-   (('feq', ('b2f', 'a@1'), 0.0), ('inot', a)),
    (('fge', 0.0, ('b2f', 'a@1')), ('inot', a)),
-   (('fneu', ('b2f', 'a@1'), 0.0), a),
    (('flt',  0.0, ('b2f', 'a@1')), a),
-   (('ieq', ('b2i', 'a@1'), 0),   ('inot', a)),
-   (('ine', ('b2i', 'a@1'), 0),   a),
    (('ieq', 'a@1', False), ('inot', a)),
    (('ieq', 'a@1', True), a),
    (('ine', 'a@1', False), a),
@@ -3230,6 +3226,15 @@ for i in range(2, 4 + 1):
              ((to_mp, vec_inst + suffix_in), vec_inst + out_mp, '!options->vectorize_vec2_16bit')
           ]
 
+for b2t, xne, xeq, zero, one in (('b2i', 'ine', 'ieq', 0, 1),
+                                 ('b2f', 'fneu', 'feq', 0.0, 1.0)):
+    optimizations += [
+        ((xeq, (b2t, 'a@1'), zero), ('inot', a)),
+        ((xeq, (b2t, 'a@1'), one),  a),
+        ((xne, (b2t, 'a@1'), zero), a),
+        ((xne, (b2t, 'a@1'), one),  ('inot', a)),
+    ]
+
 # This section contains "late" optimizations that should be run before
 # creating ffmas and calling regular optimizations for the final time.
 # Optimizations should go here if they help code generation and conflict