From 540e84bedbfbc884ce982640f7da9319441a4aa0 Mon Sep 17 00:00:00 2001
From: Konstantin Seurer <konstantin.seurer@gmail.com>
Date: Fri, 20 Sep 2024 11:13:11 +0200
Subject: [PATCH] gallivm: Preserve -0 and nan

Some operations need additional or different code to preserve the sign
of 0 or nan.

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31273>
---
 src/gallium/auxiliary/gallivm/lp_bld_arit.c | 30 +++++++++++++++++----
 src/gallium/auxiliary/gallivm/lp_bld_nir.c  |  3 +++
 src/gallium/drivers/llvmpipe/lp_screen.c    |  1 +
 3 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index d0264e42c9f..cb33061cf2e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -946,12 +946,15 @@ lp_build_mul(struct lp_build_context *bld,
    assert(lp_check_value(type, a));
    assert(lp_check_value(type, b));
 
-   if (a == bld->zero)
-      return bld->zero;
+   if (!type.floating || !type.nan_preserve) {
+      if (a == bld->zero)
+         return bld->zero;
+      if (b == bld->zero)
+         return bld->zero;
+   }
+
    if (a == bld->one)
       return b;
-   if (b == bld->zero)
-      return bld->zero;
    if (b == bld->one)
       return a;
    if (a == bld->undef || b == bld->undef)
@@ -2055,6 +2058,12 @@ lp_build_trunc(struct lp_build_context *bld,
       trunc = LLVMBuildFPToSI(builder, a, int_vec_type, "");
       res = LLVMBuildSIToFP(builder, trunc, vec_type, "floor.trunc");
 
+      if (type.signed_zero_preserve) {
+         char intrinsic[64];
+         lp_format_intrinsic(intrinsic, 64, "llvm.copysign", bld->vec_type);
+         res = lp_build_intrinsic_binary(builder, intrinsic, vec_type, res, a);
+      }
+
       /* mask out sign bit */
       anosign = lp_build_abs(bld, a);
       /*
@@ -2113,6 +2122,17 @@ lp_build_round(struct lp_build_context *bld,
       res = lp_build_iround(bld, a);
       res = LLVMBuildSIToFP(builder, res, vec_type, "");
 
+      if (type.signed_zero_preserve) {
+         LLVMValueRef sign_mask =
+            lp_build_const_int_vec(bld->gallivm, type, 1llu << (type.width - 1));
+         LLVMValueRef a_sign = LLVMBuildBitCast(builder, a, int_vec_type, "");
+         a_sign = LLVMBuildAnd(builder, a_sign, sign_mask, "");
+
+         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
+         res = LLVMBuildOr(builder, res, a_sign, "");
+         res = LLVMBuildBitCast(builder, res, vec_type, "");
+      }
+
       /* mask out sign bit */
       anosign = lp_build_abs(bld, a);
       /*
@@ -3076,7 +3096,7 @@ lp_build_pow(struct lp_build_context *bld,
                    __func__);
    }
 
-   LLVMValueRef cmp = lp_build_cmp(bld, PIPE_FUNC_EQUAL, x, lp_build_const_vec(bld->gallivm, bld->type, 0.0f));
+   LLVMValueRef cmp = lp_build_cmp_ordered(bld, PIPE_FUNC_EQUAL, x, lp_build_const_vec(bld->gallivm, bld->type, 0.0f));
    LLVMValueRef res = lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2_safe(bld, x), y));
 
    res = lp_build_select(bld, cmp, lp_build_const_vec(bld->gallivm, bld->type, 0.0f), res);
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
index d7150e91c20..6390fffca0a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
@@ -3005,6 +3005,9 @@ lp_build_opt_nir(struct nir_shader *nir)
 
    NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
    NIR_PASS_V(nir, nir_lower_fp16_casts, nir_lower_fp16_all | nir_lower_fp16_split_fp64);
+
+   NIR_PASS(_, nir, nir_lower_alu);
+
    do {
       progress = false;
       NIR_PASS(progress, nir, nir_opt_constant_folding);
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 433bc33beb4..6a7a9c5417d 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -670,6 +670,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
    .support_16bit_alu = true,
    .lower_fisnormal = true,
    .lower_fquantize2f16 = true,
+   .lower_fminmax_signed_zero = true,
    .driver_functions = true,
    .scalarize_ddx = true,
    .support_indirect_inputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES),