nir/opt_offsets: fix progress determination with offsets that add to zero

If the offset is iadd(iadd(iadd(a, 1), b), -1), try_extract_const_addition
will create a dead iadd(a, b) and claim that it didn't modify the shader.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Marek Olšák <maraeo@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36370>
This commit is contained in:
Rhys Perry
2025-05-16 16:57:44 +01:00
committed by Marge Bot
parent 9aad852af8
commit c10e495182
+16 -15
View File
@@ -35,18 +35,18 @@ typedef struct
bool progress;
} opt_offsets_state;
static nir_scalar
try_extract_const_addition(nir_builder *b, nir_scalar val, opt_offsets_state *state, unsigned *out_const,
static bool
try_extract_const_addition(nir_builder *b, opt_offsets_state *state, nir_scalar *out_val, unsigned *out_const,
uint32_t max, bool need_nuw)
{
val = nir_scalar_chase_movs(val);
nir_scalar val = nir_scalar_chase_movs(*out_val);
if (!nir_scalar_is_alu(val))
return val;
return false;
nir_alu_instr *alu = nir_def_as_alu(val.def);
if (alu->op != nir_op_iadd)
return val;
return false;
nir_scalar src[2] = {
{ alu->src[0].src.ssa, alu->src[0].swizzle[val.comp] },
@@ -70,7 +70,7 @@ try_extract_const_addition(nir_builder *b, nir_scalar val, opt_offsets_state *st
uint32_t ub1 = nir_unsigned_upper_bound(b->shader, state->range_ht, src[1], NULL);
if ((UINT32_MAX - ub0) < ub1)
return val;
return false;
/* We proved that unsigned wrap won't be possible, so we can set the flag too. */
alu->no_unsigned_wrap = true;
@@ -83,21 +83,23 @@ try_extract_const_addition(nir_builder *b, nir_scalar val, opt_offsets_state *st
uint32_t offset = nir_scalar_as_uint(src[i]);
if (offset + *out_const <= max) {
*out_const += offset;
return try_extract_const_addition(b, src[1 - i], state, out_const, max, need_nuw);
try_extract_const_addition(b, state, &src[1 - i], out_const, max, need_nuw);
*out_val = src[1 - i];
return true;
}
}
}
uint32_t orig_offset = *out_const;
src[0] = try_extract_const_addition(b, src[0], state, out_const, max, need_nuw);
src[1] = try_extract_const_addition(b, src[1], state, out_const, max, need_nuw);
if (*out_const == orig_offset)
return val;
bool changed_src0 = try_extract_const_addition(b, state, &src[0], out_const, max, need_nuw);
bool changed_src1 = try_extract_const_addition(b, state, &src[1], out_const, max, need_nuw);
if (!changed_src0 && !changed_src1)
return false;
b->cursor = nir_before_instr(&alu->instr);
nir_def *r = nir_iadd(b, nir_mov_scalar(b, src[0]),
nir_mov_scalar(b, src[1]));
return nir_get_scalar(r, 0);
*out_val = nir_get_scalar(r, 0);
return true;
}
static bool
@@ -126,8 +128,7 @@ try_fold_load_store(nir_builder *b,
if (!nir_src_is_const(*off_src)) {
uint32_t add_offset = 0;
nir_scalar val = { .def = off_src->ssa, .comp = 0 };
val = try_extract_const_addition(b, val, state, &add_offset, max - off_const, need_nuw);
if (add_offset == 0)
if (!try_extract_const_addition(b, state, &val, &add_offset, max - off_const, need_nuw))
return false;
off_const += add_offset;
b->cursor = nir_before_instr(&intrin->instr);