From 06eb9fb12599cf792f9bec36f5c5149258fde1dc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Tue, 1 Mar 2022 15:21:02 -0800 Subject: [PATCH] nir/algebraic: Optimize some cases of (sXX(a, b) != 0.0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I noticed the SGE case while looking at the output of shaders/closed/steam/trine-2/fp-3.shader_test on i915g. These are especially bad on i915 that needs two instructions to implement SNE. An alternative would be to duplicate the sne(sXX(a, b), 0.0) rules in an algebraic pass that occurs after bool_to_float. Doing the work earlier seems preferable. i915 total instructions in shared programs: 788274 -> 788223 (<.01%) instructions in affected programs: 666 -> 615 (-7.66%) helped: 5 HURT: 0 helped stats (abs) min: 9 max: 12 x̄: 10.20 x̃: 9 helped stats (rel) min: 5.00% max: 11.11% x̄: 8.12% x̃: 8.16% 95% mean confidence interval for instructions value: -12.24 -8.16 95% mean confidence interval for instructions %-change: -10.81% -5.43% Instructions are helped. LOST: 0 GAINED: 2 The two gained shaders are assembly fragment programs in Euro Truck Simulator 2. Reviewed-by: Emma Anholt Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 5f8757c1651..bed82909759 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1158,6 +1158,23 @@ optimizations.extend([ (('bany_fnequal3', a, b), ('ior', ('ior', ('fneu', 'a.x', 'b.x'), ('fneu', 'a.y', 'b.y')), ('fneu', 'a.z', 'b.z')), 'options->lower_vector_cmp'), (('bany_fnequal4', a, b), ('ior', ('ior', ('fneu', 'a.x', 'b.x'), ('fneu', 'a.y', 'b.y')), ('ior', ('fneu', 'a.z', 'b.z'), ('fneu', 'a.w', 'b.w'))), 'options->lower_vector_cmp'), + (('feq', ('seq', a, b), 1.0), ('feq', a, b)), + (('feq', ('sne', a, b), 1.0), ('fneu', a, b)), + (('feq', ('slt', a, b), 1.0), ('flt', a, b)), + (('feq', ('sge', a, b), 1.0), ('fge', a, b)), + (('fneu', ('seq', a, b), 0.0), ('feq', a, b)), + (('fneu', ('sne', a, b), 0.0), ('fneu', a, b)), + (('fneu', ('slt', a, b), 0.0), ('flt', a, b)), + (('fneu', ('sge', a, b), 0.0), ('fge', a, b)), + (('feq', ('seq', a, b), 0.0), ('fneu', a, b)), + (('feq', ('sne', a, b), 0.0), ('feq', a, b)), + (('feq', ('slt', a, b), 0.0), ('fge', a, b)), + (('feq', ('sge', a, b), 0.0), ('flt', a, b)), + (('fneu', ('seq', a, b), 1.0), ('fneu', a, b)), + (('fneu', ('sne', a, b), 1.0), ('feq', a, b)), + (('fneu', ('slt', a, b), 1.0), ('fge', a, b)), + (('fneu', ('sge', a, b), 1.0), ('flt', a, b)), + (('fneu', ('fneg', a), a), ('fneu', a, 0.0)), (('feq', ('fneg', a), a), ('feq', a, 0.0)), # Emulating booleans