gallivm: Fix NaN behavior of min and max

Like softpipe in mesa!10419, llvmpipe suffers from improper handling of NaN in nir_op_fmax and nir_op_fmin. nir_op_fsat is already handled correctly. OpenCL strictly requires the "NaN cleansing" behavior, so all of the functionality is in place. Just make the graphics APIs use the OpenCL path. The majority of the possible performance penalty incurred here should be resolved in the next commit. v2: Add updated checksum for bgfx/39-assao.rdc trace. Rendering goes from mostly garbage to looking correct to me. Reviewed-by: Roland Scheidegger <sroland@vmware.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10532>
2021-04-28 15:30:41 -07:00
parent 8af325d192
commit b3f3287eac
3 changed files with 14 additions and 11 deletions
@@ -521,7 +521,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
   struct gallivm_state *gallivm = bld_base->base.gallivm;
   LLVMBuilderRef builder = gallivm->builder;
   LLVMValueRef result;
-   enum gallivm_nan_behavior minmax_nan = bld_base->shader->info.stage == MESA_SHADER_KERNEL ? GALLIVM_NAN_RETURN_OTHER : GALLIVM_NAN_BEHAVIOR_UNDEFINED;
+
   switch (op) {
   case nir_op_b2f32:
      result = emit_b2f(bld_base, src[0], 32);
@@ -677,9 +677,19 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
   case nir_op_flt32:
      result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
      break;
-   case nir_op_fmin:
-      result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
+   case nir_op_fmax:
+   case nir_op_fmin: {
+      enum gallivm_nan_behavior minmax_nan = GALLIVM_NAN_RETURN_OTHER;
+
+      if (op == nir_op_fmin) {
+         result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
+                                   src[0], src[1], minmax_nan);
+      } else {
+         result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
+                                   src[0], src[1], minmax_nan);
+      }
      break;
+   }
   case nir_op_fmod: {
      struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
      result = lp_build_div(flt_bld, src[0], src[1]);
@@ -692,9 +702,6 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
      result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
                            src[0], src[1]);
      break;
-   case nir_op_fmax:
-      result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
-      break;
   case nir_op_fneu32:
      result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
      break;
@@ -158,10 +158,6 @@ spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/simple-ms8: skip
 spec/ext_shader_image_load_formatted/execution/image_checkerboard: skip
 spec/glsl-1.10/preprocessor/extension-defined-test: skip
 spec/glsl-1.10/preprocessor/extension-if-1: skip
-spec/glsl-1.20/execution/fs-nan-builtin-max: fail
-spec/glsl-1.20/execution/fs-nan-builtin-min: fail
-spec/glsl-1.20/execution/vs-nan-builtin-max: fail
-spec/glsl-1.20/execution/vs-nan-builtin-min: fail
 spec/glsl-1.30/execution/range_analysis_fsat_of_nan: fail
 spec/glsl-1.50/execution/compatibility/clipping/gs-clip-vertex-const-accept: skip
 spec/glsl-1.50/execution/compatibility/clipping/gs-clip-vertex-const-reject: skip
@@ -169,7 +169,7 @@ traces:
  - path: bgfx/39-assao.rdc
    expectations:
      - device: gl-vmware-llvmpipe
-        checksum: dfe7796f4bd2b758baf253714e92c8da
+        checksum: e10e7a0e3a604e0bf6a77b4a01d81f54
  - path: bgfx/40-svt.rdc
    expectations:
      - device: gl-vmware-llvmpipe