From caf89c97deb888bdd0fe44e6e04b3db3cbd7b5f3 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Wed, 30 Jul 2025 15:04:06 +0200 Subject: [PATCH] nir/range_analysis: look through f2f Foz-DB Navi31: Totals from 93 (0.12% of 80273) affected shaders: Instrs: 123927 -> 121073 (-2.30%); split: -2.30%, +0.00% CodeSize: 670832 -> 653332 (-2.61%); split: -2.61%, +0.00% Latency: 337678 -> 322803 (-4.41%); split: -4.41%, +0.00% InvThroughput: 63277 -> 61083 (-3.47%) VClause: 460 -> 373 (-18.91%) SClause: 2178 -> 2100 (-3.58%) Copies: 7637 -> 7744 (+1.40%) PreSGPRs: 4414 -> 4287 (-2.88%) PreVGPRs: 4229 -> 4230 (+0.02%) VALU: 77375 -> 75693 (-2.17%) SALU: 16497 -> 16383 (-0.69%); split: -0.73%, +0.04% VMEM: 561 -> 477 (-14.97%) SMEM: 3197 -> 3113 (-2.63%) Reviewed-by: Emma Anholt Part-of: --- src/compiler/nir/nir_range_analysis.c | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index b09cb959ce7..f058a515d25 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -640,6 +640,11 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32 case nir_op_fceil: case nir_op_ftrunc: case nir_op_ffract: + case nir_op_f2f16: + case nir_op_f2f16_rtz: + case nir_op_f2f16_rtne: + case nir_op_f2f32: + case nir_op_f2f64: case nir_op_fdot2: case nir_op_fdot3: case nir_op_fdot4: @@ -823,6 +828,32 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32 break; + case nir_op_f2f16: + case nir_op_f2f16_rtz: + case nir_op_f2f16_rtne: + case nir_op_f2f32: + case nir_op_f2f64: { + r = unpack_data(src_res[0]); + + bool rtz = alu->op == nir_op_f2f16_rtz; + if (alu->op != nir_op_f2f16_rtne && alu->op != nir_op_f2f16_rtz) { + nir_shader *shader = nir_cf_node_get_function(&alu->instr.block->cf_node)->function->shader; + unsigned execution_mode = shader->info.float_controls_execution_mode; + rtz = nir_is_rounding_mode_rtz(execution_mode, alu->def.bit_size); + } + + if (alu->src[0].src.ssa->bit_size > alu->def.bit_size) { + /* Unless we are rounding towards zero, large values can create Inf. */ + if (!rtz && r.range != eq_zero) + r.is_finite = false; + + /* Underflow can create new zeros. */ + r.range = union_ranges(r.range, eq_zero); + } + + break; + } + case nir_op_fabs: r = unpack_data(src_res[0]);