From a2a2fbc5101d6e6b5d18903c7a0bd84037dbdddc Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 25 Mar 2021 13:13:21 -0700 Subject: [PATCH] nir/algebraic: Fix NaN-unsafe fcsel patterns For example, the proof for this pattern (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, c, b)), would be bcsel(a < 0, b, c) bcsel(!(a < 0), c, b) bcsel(a >= 0, c, b) fcsel_ge(a, c, b) However, !(a < 0) => (a >= 0) is well known to produce different results if `a` is NaN. Instead of that replacement, use this replacement: bcsel(a < 0, b, c) bcsel(-0 < -a, b, c) bcsel(0 < -a, b, c) fcsel_gt(-a, b, c) This is NaN-safe and exact. Reviewed-by: Alyssa Rosenzweig Reviewed-by: Jason Ekstrand Fixes: 0f5b3c37c5d ("nir: Add opcodes for fused comp + csel and optimizations") Part-of: --- src/compiler/nir/nir_opt_algebraic.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index c851cd76c72..73ee5d419d6 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1985,9 +1985,9 @@ optimizations.extend([ (('imul24', a, 0), (0)), (('fcsel', ('slt', 0, a), b, c), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"), - (('fcsel', ('slt', a, 0), b, c), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"), + (('fcsel', ('slt', a, 0), b, c), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), (('fcsel', ('sge', a, 0), b, c), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"), - (('fcsel', ('sge', 0, a), b, c), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"), + (('fcsel', ('sge', 0, a), b, c), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), (('bcsel', ('ilt', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, b, c), "options->has_fused_comp_and_csel"), (('bcsel', ('ilt', 'a@32', 0), 'b@32', 'c@32'), ('i32csel_ge', a, c, b), "options->has_fused_comp_and_csel"), @@ -1995,9 +1995,9 @@ optimizations.extend([ (('bcsel', ('ige', 0, 'a@32'), 'b@32', 'c@32'), ('i32csel_gt', a, c, b), "options->has_fused_comp_and_csel"), (('bcsel', ('flt', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, b, c), "options->has_fused_comp_and_csel"), - (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, c, b), "options->has_fused_comp_and_csel"), + (('bcsel', ('flt', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_gt', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), (('bcsel', ('fge', 'a@32', 0), 'b@32', 'c@32'), ('fcsel_ge', a, b, c), "options->has_fused_comp_and_csel"), - (('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_gt', a, c, b), "options->has_fused_comp_and_csel"), + (('bcsel', ('fge', 0, 'a@32'), 'b@32', 'c@32'), ('fcsel_ge', ('fneg', a), b, c), "options->has_fused_comp_and_csel"), ])