From 63b0692eac5cbbce0669b0a8c256e10a50eb7da5 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 10 Oct 2024 14:17:34 +0100 Subject: [PATCH] aco: don't use uniform continues if exec might be empty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 25 ++++++++---- src/amd/compiler/tests/test_isel.cpp | 38 +++++++++++++++++++ 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 2b1e8609ffc..1d18ae9ca09 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10249,6 +10249,14 @@ emit_loop_jump(isel_context* ctx, bool is_break) append_logical_end(ctx->block); unsigned idx = ctx->block->index; + /* If exec is empty inside uniform control flow in a loop, we can assume that all invocations + * of the loop are inactive. Breaking from the loop is the right thing to do in that case. + * We shouldn't perform a uniform continue, or else we might never reach a break. + */ + bool potentially_empty_exec = ctx->cf_info.exec.potentially_empty_discard || + ctx->cf_info.exec.potentially_empty_break || + ctx->cf_info.exec.potentially_empty_continue; + if (is_break) { logical_target = ctx->cf_info.parent_loop.exit; add_logical_edge(idx, logical_target); @@ -10274,7 +10282,7 @@ emit_loop_jump(isel_context* ctx, bool is_break) add_logical_edge(idx, logical_target); ctx->block->kind |= block_kind_continue; - if (!ctx->cf_info.parent_if.is_divergent) { + if (!ctx->cf_info.parent_if.is_divergent && !potentially_empty_exec) { /* uniform continue - directly jump to the loop header */ ctx->block->kind |= block_kind_uniform; ctx->cf_info.has_branch = true; @@ -10283,14 +10291,17 @@ emit_loop_jump(isel_context* ctx, bool is_break) return; } - /* for potential uniform breaks after this continue, - we must ensure that they are handled correctly */ - ctx->cf_info.parent_loop.has_divergent_continue = true; ctx->cf_info.parent_loop.has_divergent_branch = true; - if (!ctx->cf_info.exec.potentially_empty_continue) { - ctx->cf_info.exec.potentially_empty_continue = true; - ctx->cf_info.exec.potentially_empty_continue_depth = ctx->block->loop_nest_depth; + if (ctx->cf_info.parent_if.is_divergent) { + /* for potential uniform breaks after this continue, + we must ensure that they are handled correctly */ + ctx->cf_info.parent_loop.has_divergent_continue = true; + + if (!ctx->cf_info.exec.potentially_empty_continue) { + ctx->cf_info.exec.potentially_empty_continue = true; + ctx->cf_info.exec.potentially_empty_continue_depth = ctx->block->loop_nest_depth; + } } } diff --git a/src/amd/compiler/tests/test_isel.cpp b/src/amd/compiler/tests/test_isel.cpp index 78bdeb47743..58317bc0d1b 100644 --- a/src/amd/compiler/tests/test_isel.cpp +++ b/src/amd/compiler/tests/test_isel.cpp @@ -1741,3 +1741,41 @@ BEGIN_TEST(isel.cf.empty_exec.repair_ssa) finish_isel_test(); END_TEST + +/* + * loop { + * if (uniform) { + * terminate_if + * //potentially empty + * continue + * } + * //potentially empty + * break + * } + */ +BEGIN_TEST(isel.cf.empty_exec.loop_uniform_continue) + if (!setup_nir_cs(GFX11)) + return; + + nir_push_loop(nb); + { + //>> BB1 + //! /* logical preds: BB0, BB2, / linear preds: BB0, BB3, / kind: uniform, loop-header, */ + + nir_push_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 0)); + { + //>> BB2 + //! /* logical preds: BB1, / linear preds: BB1, / kind: continue, discard, */ + nir_terminate_if(nb, nir_unit_test_uniform_amd(nb, 1, 1, .base = 1)); + nir_jump(nb, nir_jump_continue); + } + nir_pop_if(nb, NULL); + + //>> BB6 + //! /* logical preds: BB5, / linear preds: BB4, BB5, / kind: uniform, break, */ + nir_jump(nb, nir_jump_break); + } + nir_pop_loop(nb, NULL); + + finish_isel_test(); +END_TEST