From 8ee54400733f1f26e175c0065ad69e6a49402969 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 13 Jun 2025 11:06:49 +0100 Subject: [PATCH] nir/uub: improve ishl/imul with constant sources fossil-db (navi21): Totals from 1 (0.00% of 79653) affected shaders: Instrs: 1339 -> 1338 (-0.07%) CodeSize: 7244 -> 7240 (-0.06%) Latency: 19827 -> 19822 (-0.03%) InvThroughput: 9913 -> 9911 (-0.02%) SALU: 419 -> 418 (-0.24%) Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/compiler/nir/nir_range_analysis.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_range_analysis.c b/src/compiler/nir/nir_range_analysis.c index 163b187bb16..94859979276 100644 --- a/src/compiler/nir/nir_range_analysis.c +++ b/src/compiler/nir/nir_range_analysis.c @@ -1861,12 +1861,29 @@ get_alu_uub(struct analysis_state *state, struct uub_query q, uint32_t *result, uint32_t src1 = MIN2(src[1], q.scalar.def->bit_size - 1u); if (util_last_bit64(src[0]) + src1 <= q.scalar.def->bit_size) /* check overflow */ *result = src[0] << src1; + + nir_scalar src1_scalar = nir_scalar_chase_alu_src(q.scalar, 1); + if (nir_scalar_is_const(src1_scalar)) { + uint32_t const_val = 1u << (nir_scalar_as_uint(src1_scalar) & (q.scalar.def->bit_size - 1u)); + *result = MIN2(*result, max / const_val * const_val); + } break; } - case nir_op_imul: + case nir_op_imul: { if (src[0] == 0 || (src[0] * src[1]) / src[0] == src[1]) /* check overflow */ *result = src[0] * src[1]; + + nir_scalar src0_scalar = nir_scalar_chase_alu_src(q.scalar, 0); + nir_scalar src1_scalar = nir_scalar_chase_alu_src(q.scalar, 1); + if (nir_scalar_is_const(src0_scalar)) { + uint32_t const_val = nir_scalar_as_uint(src0_scalar); + *result = MIN2(*result, max / const_val * const_val); + } else if (nir_scalar_is_const(src1_scalar)) { + uint32_t const_val = nir_scalar_as_uint(src1_scalar); + *result = MIN2(*result, max / const_val * const_val); + } break; + } case nir_op_ushr: { nir_scalar src1_scalar = nir_scalar_chase_alu_src(q.scalar, 1); uint32_t mask = q.scalar.def->bit_size - 1u;