From d9f014401bf842bbc0f57987570e34c9ce080cc4 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 9 Aug 2021 15:05:33 -0700 Subject: [PATCH] nir/loop_analyze: Fix get_iteration for nir_op_ine I discovered this problem because adding an algebraic transformation to convert some uge and ult to ieq or ine caused a couple loops to stop unrolling. Consider the loop: uint i = 0; while (true) { if (i >= 1) break; i++; } This loop clearly executes exactly one time. Note that uge(x, 1) is equivalent to ine(x, 0). Changing the condition to 'if (i != 0)' will also execute exactly one time. In the added test cases, uge_once correctly get an exact loop trip count of 1. Without the changes to nir_loop_analyze.c, the ine_once case detects a maximum loop trip count of zero and does not get an exact loop trip count. No changes in shader-db or fossil-db. v2: Move nir_op_fneu changes to a separate commit. v3: Rename test cases. Fixes: 6772a17acc8 ("nir: Add a loop analysis pass") Reviewed-by: Timothy Arceri Part-of: --- src/compiler/nir/nir_loop_analyze.c | 9 +- src/compiler/nir/tests/loop_analyze_tests.cpp | 270 ++++++++++++++++++ 2 files changed, 278 insertions(+), 1 deletion(-) diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c index 309cf580db9..4fd192489ef 100644 --- a/src/compiler/nir/nir_loop_analyze.c +++ b/src/compiler/nir/nir_loop_analyze.c @@ -754,10 +754,17 @@ get_iteration(nir_op cond_op, nir_const_value initial, nir_const_value step, nir_const_value span, iter; switch (cond_op) { + case nir_op_ine: + /* In order for execution to be here, limit must be the same as initial. + * Otherwise will_break_on_first_iteration would have returned false. + * If step is zero, the loop is infinite. Otherwise the loop will + * execute once. + */ + return step.u64 == 0 ? -1 : 1; + case nir_op_ige: case nir_op_ilt: case nir_op_ieq: - case nir_op_ine: span = eval_const_binop(nir_op_isub, bit_size, limit, initial, execution_mode); iter = eval_const_binop(nir_op_idiv, bit_size, span, step, diff --git a/src/compiler/nir/tests/loop_analyze_tests.cpp b/src/compiler/nir/tests/loop_analyze_tests.cpp index a59813d8495..7ee24b73670 100644 --- a/src/compiler/nir/tests/loop_analyze_tests.cpp +++ b/src/compiler/nir/tests/loop_analyze_tests.cpp @@ -232,3 +232,273 @@ TEST_F(nir_loop_analyze_test, zero_iterations_ine) EXPECT_EQ(0, loop->info->max_trip_count); EXPECT_TRUE(loop->info->exact_trip_count_known); } + +TEST_F(nir_loop_analyze_test, one_iteration_uge) +{ + /* Create IR: + * + * uint i = 0; + * while (true) { + * if (i >= 1) + * break; + * + * i++; + * } + */ + nir_ssa_def *ssa_0 = nir_imm_int(&b, 0); + nir_ssa_def *ssa_1 = nir_imm_int(&b, 1); + + nir_phi_instr *const phi = nir_phi_instr_create(b.shader); + + nir_loop *loop = nir_push_loop(&b); + { + nir_ssa_dest_init(&phi->instr, &phi->dest, + ssa_0->num_components, ssa_0->bit_size, + NULL); + + nir_phi_instr_add_src(phi, ssa_0->parent_instr->block, + nir_src_for_ssa(ssa_0)); + + nir_ssa_def *ssa_4 = &phi->dest.ssa; + nir_ssa_def *ssa_2 = nir_uge(&b, ssa_4, ssa_1); + + nir_if *nif = nir_push_if(&b, ssa_2); + { + nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break); + nir_builder_instr_insert(&b, &jump->instr); + } + nir_pop_if(&b, nif); + + nir_ssa_def *ssa_3 = nir_iadd(&b, ssa_4, ssa_1); + + nir_phi_instr_add_src(phi, ssa_3->parent_instr->block, + nir_src_for_ssa(ssa_3)); + } + nir_pop_loop(&b, loop); + + b.cursor = nir_before_block(nir_loop_first_block(loop)); + nir_builder_instr_insert(&b, &phi->instr); + + /* At this point, we should have: + * + * impl main { + * block block_0: + * // preds: + * vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) + * vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 block_4 + * vec1 32 ssa_4 = phi block_0: ssa_0, block_4: ssa_3 + * vec1 1 ssa_2 = uge ssa_4, ssa_1 + * // succs: block_2 block_3 + * if ssa_2 { + * block block_2: + * // preds: block_1 + * break + * // succs: block_5 + * } else { + * block block_3: + * // preds: block_1 + * // succs: block_4 + * } + * block block_4: + * // preds: block_3 + * vec1 32 ssa_3 = iadd ssa_4, ssa_1 + * // succs: block_1 + * } + * block block_5: + * // preds: block_2 + * // succs: block_6 + * block block_6: + * } + */ + nir_validate_shader(b.shader, "input"); + + nir_loop_analyze_impl(b.impl, nir_var_all, false); + + ASSERT_NE((void *)0, loop->info); + EXPECT_EQ(1, loop->info->max_trip_count); + EXPECT_TRUE(loop->info->exact_trip_count_known); +} + +TEST_F(nir_loop_analyze_test, one_iteration_ine) +{ + /* Create IR: + * + * uint i = 0; + * while (true) { + * if (i != 0) + * break; + * + * i++; + * } + */ + nir_ssa_def *ssa_0 = nir_imm_int(&b, 0); + nir_ssa_def *ssa_1 = nir_imm_int(&b, 1); + + nir_phi_instr *const phi = nir_phi_instr_create(b.shader); + + nir_loop *loop = nir_push_loop(&b); + { + nir_ssa_dest_init(&phi->instr, &phi->dest, + ssa_0->num_components, ssa_0->bit_size, + NULL); + + nir_phi_instr_add_src(phi, ssa_0->parent_instr->block, + nir_src_for_ssa(ssa_0)); + + nir_ssa_def *ssa_4 = &phi->dest.ssa; + nir_ssa_def *ssa_2 = nir_ine(&b, ssa_4, ssa_0); + + nir_if *nif = nir_push_if(&b, ssa_2); + { + nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break); + nir_builder_instr_insert(&b, &jump->instr); + } + nir_pop_if(&b, nif); + + nir_ssa_def *ssa_3 = nir_iadd(&b, ssa_4, ssa_1); + + nir_phi_instr_add_src(phi, ssa_3->parent_instr->block, + nir_src_for_ssa(ssa_3)); + } + nir_pop_loop(&b, loop); + + b.cursor = nir_before_block(nir_loop_first_block(loop)); + nir_builder_instr_insert(&b, &phi->instr); + + /* At this point, we should have: + * + * impl main { + * block block_0: + * // preds: + * vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) + * vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 block_4 + * vec1 32 ssa_4 = phi block_0: ssa_0, block_4: ssa_3 + * vec1 1 ssa_2 = ine ssa_4, ssa_0 + * // succs: block_2 block_3 + * if ssa_2 { + * block block_2: + * // preds: block_1 + * break + * // succs: block_5 + * } else { + * block block_3: + * // preds: block_1 + * // succs: block_4 + * } + * block block_4: + * // preds: block_3 + * vec1 32 ssa_3 = iadd ssa_4, ssa_1 + * // succs: block_1 + * } + * block block_5: + * // preds: block_2 + * // succs: block_6 + * block block_6: + * } + */ + nir_validate_shader(b.shader, "input"); + + nir_loop_analyze_impl(b.impl, nir_var_all, false); + + ASSERT_NE((void *)0, loop->info); + EXPECT_EQ(1, loop->info->max_trip_count); + EXPECT_TRUE(loop->info->exact_trip_count_known); +} + +TEST_F(nir_loop_analyze_test, one_iteration_ieq) +{ + /* Create IR: + * + * uint i = 0; + * while (true) { + * if (i == 1) + * break; + * + * i++; + * } + */ + nir_ssa_def *ssa_0 = nir_imm_int(&b, 0); + nir_ssa_def *ssa_1 = nir_imm_int(&b, 1); + + nir_phi_instr *const phi = nir_phi_instr_create(b.shader); + + nir_loop *loop = nir_push_loop(&b); + { + nir_ssa_dest_init(&phi->instr, &phi->dest, + ssa_0->num_components, ssa_0->bit_size, + NULL); + + nir_phi_instr_add_src(phi, ssa_0->parent_instr->block, + nir_src_for_ssa(ssa_0)); + + nir_ssa_def *ssa_4 = &phi->dest.ssa; + nir_ssa_def *ssa_2 = nir_ieq(&b, ssa_4, ssa_1); + + nir_if *nif = nir_push_if(&b, ssa_2); + { + nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break); + nir_builder_instr_insert(&b, &jump->instr); + } + nir_pop_if(&b, nif); + + nir_ssa_def *ssa_3 = nir_iadd(&b, ssa_4, ssa_1); + + nir_phi_instr_add_src(phi, ssa_3->parent_instr->block, + nir_src_for_ssa(ssa_3)); + } + nir_pop_loop(&b, loop); + + b.cursor = nir_before_block(nir_loop_first_block(loop)); + nir_builder_instr_insert(&b, &phi->instr); + + /* At this point, we should have: + * + * impl main { + * block block_0: + * // preds: + * vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) + * vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 block_4 + * vec1 32 ssa_4 = phi block_0: ssa_0, block_4: ssa_3 + * vec1 1 ssa_2 = ine ssa_4, ssa_0 + * // succs: block_2 block_3 + * if ssa_2 { + * block block_2: + * // preds: block_1 + * break + * // succs: block_5 + * } else { + * block block_3: + * // preds: block_1 + * // succs: block_4 + * } + * block block_4: + * // preds: block_3 + * vec1 32 ssa_3 = iadd ssa_4, ssa_1 + * // succs: block_1 + * } + * block block_5: + * // preds: block_2 + * // succs: block_6 + * block block_6: + * } + */ + nir_validate_shader(b.shader, "input"); + + nir_loop_analyze_impl(b.impl, nir_var_all, false); + + ASSERT_NE((void *)0, loop->info); + EXPECT_EQ(1, loop->info->max_trip_count); + EXPECT_TRUE(loop->info->exact_trip_count_known); +}