gallivm: Fix NaN behavior of min and max

Like softpipe in mesa!10419, llvmpipe suffers from improper handling
of NaN in nir_op_fmax and nir_op_fmin.  nir_op_fsat is already handled
correctly.  OpenCL strictly requires the "NaN cleansing" behavior, so
all of the functionality is in place.  Just make the graphics APIs use
the OpenCL path.

The majority of the possible performance penalty incurred here should
be resolved in the next commit.

v2: Add updated checksum for bgfx/39-assao.rdc trace.  Rendering goes
from mostly garbage to looking correct to me.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10532>
This commit is contained in:
Ian Romanick
2021-04-28 15:30:41 -07:00
committed by Marge Bot
parent 8af325d192
commit b3f3287eac
3 changed files with 14 additions and 11 deletions
+13 -6
View File
@@ -521,7 +521,7 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
struct gallivm_state *gallivm = bld_base->base.gallivm;
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef result;
enum gallivm_nan_behavior minmax_nan = bld_base->shader->info.stage == MESA_SHADER_KERNEL ? GALLIVM_NAN_RETURN_OTHER : GALLIVM_NAN_BEHAVIOR_UNDEFINED;
switch (op) {
case nir_op_b2f32:
result = emit_b2f(bld_base, src[0], 32);
@@ -677,9 +677,19 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
case nir_op_flt32:
result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
break;
case nir_op_fmin:
result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
case nir_op_fmax:
case nir_op_fmin: {
enum gallivm_nan_behavior minmax_nan = GALLIVM_NAN_RETURN_OTHER;
if (op == nir_op_fmin) {
result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
src[0], src[1], minmax_nan);
} else {
result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
src[0], src[1], minmax_nan);
}
break;
}
case nir_op_fmod: {
struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
result = lp_build_div(flt_bld, src[0], src[1]);
@@ -692,9 +702,6 @@ static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
src[0], src[1]);
break;
case nir_op_fmax:
result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1], minmax_nan);
break;
case nir_op_fneu32:
result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
break;
@@ -158,10 +158,6 @@ spec/ext_shader_framebuffer_fetch_non_coherent/execution/gles3/simple-ms8: skip
spec/ext_shader_image_load_formatted/execution/image_checkerboard: skip
spec/glsl-1.10/preprocessor/extension-defined-test: skip
spec/glsl-1.10/preprocessor/extension-if-1: skip
spec/glsl-1.20/execution/fs-nan-builtin-max: fail
spec/glsl-1.20/execution/fs-nan-builtin-min: fail
spec/glsl-1.20/execution/vs-nan-builtin-max: fail
spec/glsl-1.20/execution/vs-nan-builtin-min: fail
spec/glsl-1.30/execution/range_analysis_fsat_of_nan: fail
spec/glsl-1.50/execution/compatibility/clipping/gs-clip-vertex-const-accept: skip
spec/glsl-1.50/execution/compatibility/clipping/gs-clip-vertex-const-reject: skip
@@ -169,7 +169,7 @@ traces:
- path: bgfx/39-assao.rdc
expectations:
- device: gl-vmware-llvmpipe
checksum: dfe7796f4bd2b758baf253714e92c8da
checksum: e10e7a0e3a604e0bf6a77b4a01d81f54
- path: bgfx/40-svt.rdc
expectations:
- device: gl-vmware-llvmpipe