nir/loop_analyze: Fix get_iteration for nir_op_ine
I discovered this problem because adding an algebraic transformation to
convert some uge and ult to ieq or ine caused a couple loops to stop
unrolling. Consider the loop:
uint i = 0;
while (true) {
if (i >= 1)
break;
i++;
}
This loop clearly executes exactly one time. Note that uge(x, 1) is
equivalent to ine(x, 0). Changing the condition to 'if (i != 0)' will
also execute exactly one time.
In the added test cases, uge_once correctly get an exact loop trip count
of 1. Without the changes to nir_loop_analyze.c, the ine_once case
detects a maximum loop trip count of zero and does not get an exact loop
trip count.
No changes in shader-db or fossil-db.
v2: Move nir_op_fneu changes to a separate commit.
v3: Rename test cases.
Fixes: 6772a17acc ("nir: Add a loop analysis pass")
Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19732>
This commit is contained in:
@@ -232,3 +232,273 @@ TEST_F(nir_loop_analyze_test, zero_iterations_ine)
|
||||
EXPECT_EQ(0, loop->info->max_trip_count);
|
||||
EXPECT_TRUE(loop->info->exact_trip_count_known);
|
||||
}
|
||||
|
||||
TEST_F(nir_loop_analyze_test, one_iteration_uge)
|
||||
{
|
||||
/* Create IR:
|
||||
*
|
||||
* uint i = 0;
|
||||
* while (true) {
|
||||
* if (i >= 1)
|
||||
* break;
|
||||
*
|
||||
* i++;
|
||||
* }
|
||||
*/
|
||||
nir_ssa_def *ssa_0 = nir_imm_int(&b, 0);
|
||||
nir_ssa_def *ssa_1 = nir_imm_int(&b, 1);
|
||||
|
||||
nir_phi_instr *const phi = nir_phi_instr_create(b.shader);
|
||||
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
{
|
||||
nir_ssa_dest_init(&phi->instr, &phi->dest,
|
||||
ssa_0->num_components, ssa_0->bit_size,
|
||||
NULL);
|
||||
|
||||
nir_phi_instr_add_src(phi, ssa_0->parent_instr->block,
|
||||
nir_src_for_ssa(ssa_0));
|
||||
|
||||
nir_ssa_def *ssa_4 = &phi->dest.ssa;
|
||||
nir_ssa_def *ssa_2 = nir_uge(&b, ssa_4, ssa_1);
|
||||
|
||||
nir_if *nif = nir_push_if(&b, ssa_2);
|
||||
{
|
||||
nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break);
|
||||
nir_builder_instr_insert(&b, &jump->instr);
|
||||
}
|
||||
nir_pop_if(&b, nif);
|
||||
|
||||
nir_ssa_def *ssa_3 = nir_iadd(&b, ssa_4, ssa_1);
|
||||
|
||||
nir_phi_instr_add_src(phi, ssa_3->parent_instr->block,
|
||||
nir_src_for_ssa(ssa_3));
|
||||
}
|
||||
nir_pop_loop(&b, loop);
|
||||
|
||||
b.cursor = nir_before_block(nir_loop_first_block(loop));
|
||||
nir_builder_instr_insert(&b, &phi->instr);
|
||||
|
||||
/* At this point, we should have:
|
||||
*
|
||||
* impl main {
|
||||
* block block_0:
|
||||
* // preds:
|
||||
* vec1 32 ssa_0 = load_const (0x00000000 = 0.000000)
|
||||
* vec1 32 ssa_1 = load_const (0x00000001 = 0.000000)
|
||||
* // succs: block_1
|
||||
* loop {
|
||||
* block block_1:
|
||||
* // preds: block_0 block_4
|
||||
* vec1 32 ssa_4 = phi block_0: ssa_0, block_4: ssa_3
|
||||
* vec1 1 ssa_2 = uge ssa_4, ssa_1
|
||||
* // succs: block_2 block_3
|
||||
* if ssa_2 {
|
||||
* block block_2:
|
||||
* // preds: block_1
|
||||
* break
|
||||
* // succs: block_5
|
||||
* } else {
|
||||
* block block_3:
|
||||
* // preds: block_1
|
||||
* // succs: block_4
|
||||
* }
|
||||
* block block_4:
|
||||
* // preds: block_3
|
||||
* vec1 32 ssa_3 = iadd ssa_4, ssa_1
|
||||
* // succs: block_1
|
||||
* }
|
||||
* block block_5:
|
||||
* // preds: block_2
|
||||
* // succs: block_6
|
||||
* block block_6:
|
||||
* }
|
||||
*/
|
||||
nir_validate_shader(b.shader, "input");
|
||||
|
||||
nir_loop_analyze_impl(b.impl, nir_var_all, false);
|
||||
|
||||
ASSERT_NE((void *)0, loop->info);
|
||||
EXPECT_EQ(1, loop->info->max_trip_count);
|
||||
EXPECT_TRUE(loop->info->exact_trip_count_known);
|
||||
}
|
||||
|
||||
TEST_F(nir_loop_analyze_test, one_iteration_ine)
|
||||
{
|
||||
/* Create IR:
|
||||
*
|
||||
* uint i = 0;
|
||||
* while (true) {
|
||||
* if (i != 0)
|
||||
* break;
|
||||
*
|
||||
* i++;
|
||||
* }
|
||||
*/
|
||||
nir_ssa_def *ssa_0 = nir_imm_int(&b, 0);
|
||||
nir_ssa_def *ssa_1 = nir_imm_int(&b, 1);
|
||||
|
||||
nir_phi_instr *const phi = nir_phi_instr_create(b.shader);
|
||||
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
{
|
||||
nir_ssa_dest_init(&phi->instr, &phi->dest,
|
||||
ssa_0->num_components, ssa_0->bit_size,
|
||||
NULL);
|
||||
|
||||
nir_phi_instr_add_src(phi, ssa_0->parent_instr->block,
|
||||
nir_src_for_ssa(ssa_0));
|
||||
|
||||
nir_ssa_def *ssa_4 = &phi->dest.ssa;
|
||||
nir_ssa_def *ssa_2 = nir_ine(&b, ssa_4, ssa_0);
|
||||
|
||||
nir_if *nif = nir_push_if(&b, ssa_2);
|
||||
{
|
||||
nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break);
|
||||
nir_builder_instr_insert(&b, &jump->instr);
|
||||
}
|
||||
nir_pop_if(&b, nif);
|
||||
|
||||
nir_ssa_def *ssa_3 = nir_iadd(&b, ssa_4, ssa_1);
|
||||
|
||||
nir_phi_instr_add_src(phi, ssa_3->parent_instr->block,
|
||||
nir_src_for_ssa(ssa_3));
|
||||
}
|
||||
nir_pop_loop(&b, loop);
|
||||
|
||||
b.cursor = nir_before_block(nir_loop_first_block(loop));
|
||||
nir_builder_instr_insert(&b, &phi->instr);
|
||||
|
||||
/* At this point, we should have:
|
||||
*
|
||||
* impl main {
|
||||
* block block_0:
|
||||
* // preds:
|
||||
* vec1 32 ssa_0 = load_const (0x00000000 = 0.000000)
|
||||
* vec1 32 ssa_1 = load_const (0x00000001 = 0.000000)
|
||||
* // succs: block_1
|
||||
* loop {
|
||||
* block block_1:
|
||||
* // preds: block_0 block_4
|
||||
* vec1 32 ssa_4 = phi block_0: ssa_0, block_4: ssa_3
|
||||
* vec1 1 ssa_2 = ine ssa_4, ssa_0
|
||||
* // succs: block_2 block_3
|
||||
* if ssa_2 {
|
||||
* block block_2:
|
||||
* // preds: block_1
|
||||
* break
|
||||
* // succs: block_5
|
||||
* } else {
|
||||
* block block_3:
|
||||
* // preds: block_1
|
||||
* // succs: block_4
|
||||
* }
|
||||
* block block_4:
|
||||
* // preds: block_3
|
||||
* vec1 32 ssa_3 = iadd ssa_4, ssa_1
|
||||
* // succs: block_1
|
||||
* }
|
||||
* block block_5:
|
||||
* // preds: block_2
|
||||
* // succs: block_6
|
||||
* block block_6:
|
||||
* }
|
||||
*/
|
||||
nir_validate_shader(b.shader, "input");
|
||||
|
||||
nir_loop_analyze_impl(b.impl, nir_var_all, false);
|
||||
|
||||
ASSERT_NE((void *)0, loop->info);
|
||||
EXPECT_EQ(1, loop->info->max_trip_count);
|
||||
EXPECT_TRUE(loop->info->exact_trip_count_known);
|
||||
}
|
||||
|
||||
TEST_F(nir_loop_analyze_test, one_iteration_ieq)
|
||||
{
|
||||
/* Create IR:
|
||||
*
|
||||
* uint i = 0;
|
||||
* while (true) {
|
||||
* if (i == 1)
|
||||
* break;
|
||||
*
|
||||
* i++;
|
||||
* }
|
||||
*/
|
||||
nir_ssa_def *ssa_0 = nir_imm_int(&b, 0);
|
||||
nir_ssa_def *ssa_1 = nir_imm_int(&b, 1);
|
||||
|
||||
nir_phi_instr *const phi = nir_phi_instr_create(b.shader);
|
||||
|
||||
nir_loop *loop = nir_push_loop(&b);
|
||||
{
|
||||
nir_ssa_dest_init(&phi->instr, &phi->dest,
|
||||
ssa_0->num_components, ssa_0->bit_size,
|
||||
NULL);
|
||||
|
||||
nir_phi_instr_add_src(phi, ssa_0->parent_instr->block,
|
||||
nir_src_for_ssa(ssa_0));
|
||||
|
||||
nir_ssa_def *ssa_4 = &phi->dest.ssa;
|
||||
nir_ssa_def *ssa_2 = nir_ieq(&b, ssa_4, ssa_1);
|
||||
|
||||
nir_if *nif = nir_push_if(&b, ssa_2);
|
||||
{
|
||||
nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break);
|
||||
nir_builder_instr_insert(&b, &jump->instr);
|
||||
}
|
||||
nir_pop_if(&b, nif);
|
||||
|
||||
nir_ssa_def *ssa_3 = nir_iadd(&b, ssa_4, ssa_1);
|
||||
|
||||
nir_phi_instr_add_src(phi, ssa_3->parent_instr->block,
|
||||
nir_src_for_ssa(ssa_3));
|
||||
}
|
||||
nir_pop_loop(&b, loop);
|
||||
|
||||
b.cursor = nir_before_block(nir_loop_first_block(loop));
|
||||
nir_builder_instr_insert(&b, &phi->instr);
|
||||
|
||||
/* At this point, we should have:
|
||||
*
|
||||
* impl main {
|
||||
* block block_0:
|
||||
* // preds:
|
||||
* vec1 32 ssa_0 = load_const (0x00000000 = 0.000000)
|
||||
* vec1 32 ssa_1 = load_const (0x00000001 = 0.000000)
|
||||
* // succs: block_1
|
||||
* loop {
|
||||
* block block_1:
|
||||
* // preds: block_0 block_4
|
||||
* vec1 32 ssa_4 = phi block_0: ssa_0, block_4: ssa_3
|
||||
* vec1 1 ssa_2 = ine ssa_4, ssa_0
|
||||
* // succs: block_2 block_3
|
||||
* if ssa_2 {
|
||||
* block block_2:
|
||||
* // preds: block_1
|
||||
* break
|
||||
* // succs: block_5
|
||||
* } else {
|
||||
* block block_3:
|
||||
* // preds: block_1
|
||||
* // succs: block_4
|
||||
* }
|
||||
* block block_4:
|
||||
* // preds: block_3
|
||||
* vec1 32 ssa_3 = iadd ssa_4, ssa_1
|
||||
* // succs: block_1
|
||||
* }
|
||||
* block block_5:
|
||||
* // preds: block_2
|
||||
* // succs: block_6
|
||||
* block block_6:
|
||||
* }
|
||||
*/
|
||||
nir_validate_shader(b.shader, "input");
|
||||
|
||||
nir_loop_analyze_impl(b.impl, nir_var_all, false);
|
||||
|
||||
ASSERT_NE((void *)0, loop->info);
|
||||
EXPECT_EQ(1, loop->info->max_trip_count);
|
||||
EXPECT_TRUE(loop->info->exact_trip_count_known);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user