From 0cf25f559f8dc99d59637e74650c38d6428433f3 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 11 Mar 2021 09:46:07 -0800 Subject: [PATCH] spirv: Generate shorter code for SpvOpFUnord comparisons No shader-db or fossil-db changes on any Intel platform. v2: Keep the flt <-> fge switcharoo local to the SpvOpFUnordLessThan, etc. handling. Add a comment explaining why the suboptimal SpvOpFUnordEqual implementation is used here. Suggested by Caio. Reviewed-by: Caio Marcelo de Oliveira Filho Part-of: --- src/compiler/spirv/vtn_alu.c | 38 ++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/src/compiler/spirv/vtn_alu.c b/src/compiler/spirv/vtn_alu.c index 54019c37396..a9bdb4c3cf1 100644 --- a/src/compiler/spirv/vtn_alu.c +++ b/src/compiler/spirv/vtn_alu.c @@ -600,7 +600,29 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, break; } - case SpvOpFUnordEqual: + case SpvOpFUnordEqual: { + const bool save_exact = b->nb.exact; + + b->nb.exact = true; + + /* This could also be implemented as !(a < b || b < a). If one or both + * of the source are numbers, later optimization passes can easily + * eliminate the isnan() checks. This may trim the sequence down to a + * single (a == b) operation. Otherwise, the optimizer can transform + * whatever is left to !(a < b || b < a). Since some applications will + * open-code this sequence, these optimizations are needed anyway. + */ + dest->def = + nir_ior(&b->nb, + nir_feq(&b->nb, src[0], src[1]), + nir_ior(&b->nb, + nir_fneu(&b->nb, src[0], src[0]), + nir_fneu(&b->nb, src[1], src[1]))); + + b->nb.exact = save_exact; + break; + } + case SpvOpFUnordLessThan: case SpvOpFUnordGreaterThan: case SpvOpFUnordLessThanEqual: @@ -623,12 +645,16 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, b->nb.exact = true; + /* Use the property FUnordLessThan(a, b) ≡ !FOrdGreaterThanEqual(a, b). */ + switch (op) { + case nir_op_fge: op = nir_op_flt; break; + case nir_op_flt: op = nir_op_fge; break; + default: unreachable("Impossible opcode."); + } + dest->def = - nir_ior(&b->nb, - nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL), - nir_ior(&b->nb, - nir_fneu(&b->nb, src[0], src[0]), - nir_fneu(&b->nb, src[1], src[1]))); + nir_inot(&b->nb, + nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL)); b->nb.exact = save_exact; break;