gallivm: Preserve -0 and nan

Some operations need additional or different code to preserve the sign of 0 or nan. Reviewed-by: Dave Airlie <airlied@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31273>
2024-09-20 11:13:11 +02:00
parent f5db70cb24
commit 540e84bedb
3 changed files with 29 additions and 5 deletions
@@ -946,12 +946,15 @@ lp_build_mul(struct lp_build_context *bld,
   assert(lp_check_value(type, a));
   assert(lp_check_value(type, b));

-   if (a == bld->zero)
-      return bld->zero;
+   if (!type.floating || !type.nan_preserve) {
+      if (a == bld->zero)
+         return bld->zero;
+      if (b == bld->zero)
+         return bld->zero;
+   }
+
   if (a == bld->one)
      return b;
-   if (b == bld->zero)
-      return bld->zero;
   if (b == bld->one)
      return a;
   if (a == bld->undef || b == bld->undef)
@@ -2055,6 +2058,12 @@ lp_build_trunc(struct lp_build_context *bld,
      trunc = LLVMBuildFPToSI(builder, a, int_vec_type, "");
      res = LLVMBuildSIToFP(builder, trunc, vec_type, "floor.trunc");

+      if (type.signed_zero_preserve) {
+         char intrinsic[64];
+         lp_format_intrinsic(intrinsic, 64, "llvm.copysign", bld->vec_type);
+         res = lp_build_intrinsic_binary(builder, intrinsic, vec_type, res, a);
+      }
+
      /* mask out sign bit */
      anosign = lp_build_abs(bld, a);
      /*
@@ -2113,6 +2122,17 @@ lp_build_round(struct lp_build_context *bld,
      res = lp_build_iround(bld, a);
      res = LLVMBuildSIToFP(builder, res, vec_type, "");

+      if (type.signed_zero_preserve) {
+         LLVMValueRef sign_mask =
+            lp_build_const_int_vec(bld->gallivm, type, 1llu << (type.width - 1));
+         LLVMValueRef a_sign = LLVMBuildBitCast(builder, a, int_vec_type, "");
+         a_sign = LLVMBuildAnd(builder, a_sign, sign_mask, "");
+
+         res = LLVMBuildBitCast(builder, res, int_vec_type, "");
+         res = LLVMBuildOr(builder, res, a_sign, "");
+         res = LLVMBuildBitCast(builder, res, vec_type, "");
+      }
+
      /* mask out sign bit */
      anosign = lp_build_abs(bld, a);
      /*
@@ -3076,7 +3096,7 @@ lp_build_pow(struct lp_build_context *bld,
                   __func__);
   }

-   LLVMValueRef cmp = lp_build_cmp(bld, PIPE_FUNC_EQUAL, x, lp_build_const_vec(bld->gallivm, bld->type, 0.0f));
+   LLVMValueRef cmp = lp_build_cmp_ordered(bld, PIPE_FUNC_EQUAL, x, lp_build_const_vec(bld->gallivm, bld->type, 0.0f));
   LLVMValueRef res = lp_build_exp2(bld, lp_build_mul(bld, lp_build_log2_safe(bld, x), y));

   res = lp_build_select(bld, cmp, lp_build_const_vec(bld->gallivm, bld->type, 0.0f), res);
@@ -3005,6 +3005,9 @@ lp_build_opt_nir(struct nir_shader *nir)

   NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
   NIR_PASS_V(nir, nir_lower_fp16_casts, nir_lower_fp16_all | nir_lower_fp16_split_fp64);
+
+   NIR_PASS(_, nir, nir_lower_alu);
+
   do {
      progress = false;
      NIR_PASS(progress, nir, nir_opt_constant_folding);
@@ -670,6 +670,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
   .support_16bit_alu = true,
   .lower_fisnormal = true,
   .lower_fquantize2f16 = true,
+   .lower_fminmax_signed_zero = true,
   .driver_functions = true,
   .scalarize_ddx = true,
   .support_indirect_inputs = (uint8_t)BITFIELD_MASK(PIPE_SHADER_TYPES),