From 317d07484e69fa7dc6a13d5f873ec9fe14fdc790 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Fri, 14 Feb 2025 20:13:58 +0100 Subject: [PATCH] nir: improve fsqrt range analysis Foz-DB Navi21: Totals from 3 (0.00% of 79377) affected shaders: MaxWaves: 88 -> 96 (+9.09%) Instrs: 1058 -> 951 (-10.11%) CodeSize: 5964 -> 5368 (-9.99%) VGPRs: 104 -> 96 (-7.69%) Latency: 15283 -> 14099 (-7.75%); split: -8.37%, +0.62% InvThroughput: 4951 -> 4238 (-14.40%) Copies: 81 -> 76 (-6.17%) PreVGPRs: 93 -> 84 (-9.68%) VALU: 820 -> 737 (-10.12%) SALU: 115 -> 91 (-20.87%) Reviewed-by: Ian Romanick Part-of: --- src/compiler/nir/nir_range_analysis.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index b98daf679f8..4761e493b02 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -594,6 +594,7 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32 case nir_op_fabs: case nir_op_fexp2: case nir_op_frcp: + case nir_op_fsqrt: case nir_op_frsq: case nir_op_fneg: case nir_op_fsat: @@ -1122,9 +1123,23 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32 }; break; - case nir_op_fsqrt: - r = (struct ssa_result_range){ ge_zero, false, false, false }; + case nir_op_fsqrt: { + const struct ssa_result_range left = unpack_data(src_res[0]); + + /* sqrt(NaN) and sqrt(< 0) is NaN. */ + if (left.range == eq_zero || left.range == ge_zero || left.range == gt_zero) { + r.is_a_number = left.is_a_number; + /* Only sqrt(Inf) is Inf. */ + r.is_finite = left.is_finite; + } + + if (left.range == gt_zero || left.range == ne_zero) + r.range = gt_zero; + else + r.range = ge_zero; + break; + } case nir_op_frsq: { const struct ssa_result_range left = unpack_data(src_res[0]);