From 39264a90c36f0258aa32176fdaa824bb584cdc97 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 19 Jul 2024 18:14:24 +0100 Subject: [PATCH] aco: consider exec empty after divergent continue then divergent break MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For: loop { if (divergent) continue if (divergent) break //exec is potentially empty here loop { if (divergent) break } } If a subset of invocations take the continue and then the rest take the break, then exec will be empty after the break. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 51 ++++++++++++++++--- src/amd/compiler/aco_instruction_selection.h | 10 ++-- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index da97551d42f..2917703e21c 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -56,11 +56,13 @@ struct if_context { bool divergent_old; bool exec_potentially_empty_discard_old; bool exec_potentially_empty_break_old; + bool exec_potentially_empty_continue_old; bool had_divergent_discard_old; bool had_divergent_discard_then; bool has_divergent_continue_old; bool has_divergent_continue_then; uint16_t exec_potentially_empty_break_depth_old; + uint16_t exec_potentially_empty_continue_depth_old; unsigned BB_if_idx; unsigned invert_idx; @@ -10284,8 +10286,14 @@ end_loop(isel_context* ctx, loop_context* lc) Builder bld(ctx->program, ctx->block); append_logical_end(ctx->block); + /* No need to check exec_potentially_empty_break/continue originating inside the loop. In the + * only case where it's possible at this point (divergent break after divergent continue), we + * should continue anyway. */ if (ctx->cf_info.exec_potentially_empty_discard || - ctx->cf_info.exec_potentially_empty_break) { + (ctx->cf_info.exec_potentially_empty_break && + ctx->cf_info.exec_potentially_empty_break_depth < ctx->block->loop_nest_depth) || + (ctx->cf_info.exec_potentially_empty_continue && + ctx->cf_info.exec_potentially_empty_continue_depth < ctx->block->loop_nest_depth)) { /* Discards can result in code running with an empty exec mask. * This would result in divergent breaks not ever being taken. As a * workaround, break the loop when the loop mask is empty instead of @@ -10337,6 +10345,10 @@ end_loop(isel_context* ctx, loop_context* lc) ctx->cf_info.parent_if.is_divergent = lc->divergent_if_old; if (!ctx->block->loop_nest_depth && !ctx->cf_info.parent_if.is_divergent) ctx->cf_info.exec_potentially_empty_discard = false; + if (ctx->block->loop_nest_depth < ctx->cf_info.exec_potentially_empty_break_depth) { + ctx->cf_info.exec_potentially_empty_break = false; + ctx->cf_info.exec_potentially_empty_break_depth = UINT16_MAX; + } } void @@ -10362,6 +10374,11 @@ emit_loop_jump(isel_context* ctx, bool is_break) return; } ctx->cf_info.parent_loop.has_divergent_branch = true; + + if (!ctx->cf_info.exec_potentially_empty_break) { + ctx->cf_info.exec_potentially_empty_break = true; + ctx->cf_info.exec_potentially_empty_break_depth = ctx->block->loop_nest_depth; + } } else { logical_target = &ctx->program->blocks[ctx->cf_info.parent_loop.header_idx]; add_logical_edge(idx, logical_target); @@ -10380,11 +10397,11 @@ emit_loop_jump(isel_context* ctx, bool is_break) we must ensure that they are handled correctly */ ctx->cf_info.parent_loop.has_divergent_continue = true; ctx->cf_info.parent_loop.has_divergent_branch = true; - } - if (ctx->cf_info.parent_if.is_divergent && !ctx->cf_info.exec_potentially_empty_break) { - ctx->cf_info.exec_potentially_empty_break = true; - ctx->cf_info.exec_potentially_empty_break_depth = ctx->block->loop_nest_depth; + if (!ctx->cf_info.exec_potentially_empty_continue) { + ctx->cf_info.exec_potentially_empty_continue = true; + ctx->cf_info.exec_potentially_empty_continue_depth = ctx->block->loop_nest_depth; + } } /* remove critical edges from linear CFG */ @@ -10595,7 +10612,10 @@ begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond, ic->exec_potentially_empty_discard_old = ctx->cf_info.exec_potentially_empty_discard; ic->exec_potentially_empty_break_old = ctx->cf_info.exec_potentially_empty_break; + ic->exec_potentially_empty_continue_old = ctx->cf_info.exec_potentially_empty_continue; ic->exec_potentially_empty_break_depth_old = ctx->cf_info.exec_potentially_empty_break_depth; + ic->exec_potentially_empty_continue_depth_old = + ctx->cf_info.exec_potentially_empty_continue_depth; ic->divergent_old = ctx->cf_info.parent_if.is_divergent; ic->had_divergent_discard_old = ctx->cf_info.had_divergent_discard; ctx->cf_info.parent_if.is_divergent = true; @@ -10604,6 +10624,8 @@ begin_divergent_if_then(isel_context* ctx, if_context* ic, Temp cond, ctx->cf_info.exec_potentially_empty_discard = false; ctx->cf_info.exec_potentially_empty_break = false; ctx->cf_info.exec_potentially_empty_break_depth = UINT16_MAX; + ctx->cf_info.exec_potentially_empty_continue = false; + ctx->cf_info.exec_potentially_empty_continue_depth = UINT16_MAX; /** emit logical then block */ ctx->program->next_divergent_if_logical_depth++; @@ -10658,10 +10680,16 @@ begin_divergent_if_else(isel_context* ctx, if_context* ic, ic->exec_potentially_empty_break_old |= ctx->cf_info.exec_potentially_empty_break; ic->exec_potentially_empty_break_depth_old = std::min( ic->exec_potentially_empty_break_depth_old, ctx->cf_info.exec_potentially_empty_break_depth); + ic->exec_potentially_empty_continue_old |= ctx->cf_info.exec_potentially_empty_continue; + ic->exec_potentially_empty_continue_depth_old = + std::min(ic->exec_potentially_empty_continue_depth_old, + ctx->cf_info.exec_potentially_empty_continue_depth); /* divergent branches use cbranch_execz */ ctx->cf_info.exec_potentially_empty_discard = false; ctx->cf_info.exec_potentially_empty_break = false; ctx->cf_info.exec_potentially_empty_break_depth = UINT16_MAX; + ctx->cf_info.exec_potentially_empty_continue = false; + ctx->cf_info.exec_potentially_empty_continue_depth = UINT16_MAX; ic->had_divergent_discard_then = ctx->cf_info.had_divergent_discard; ctx->cf_info.had_divergent_discard = ic->had_divergent_discard_old; @@ -10715,16 +10743,27 @@ end_divergent_if(isel_context* ctx, if_context* ic) ctx->cf_info.exec_potentially_empty_break |= ic->exec_potentially_empty_break_old; ctx->cf_info.exec_potentially_empty_break_depth = std::min( ic->exec_potentially_empty_break_depth_old, ctx->cf_info.exec_potentially_empty_break_depth); + ctx->cf_info.exec_potentially_empty_continue |= ic->exec_potentially_empty_continue_old; + ctx->cf_info.exec_potentially_empty_continue_depth = + std::min(ic->exec_potentially_empty_continue_depth_old, + ctx->cf_info.exec_potentially_empty_continue_depth); if (ctx->block->loop_nest_depth == ctx->cf_info.exec_potentially_empty_break_depth && - !ctx->cf_info.parent_if.is_divergent) { + !ctx->cf_info.parent_if.is_divergent && !ctx->cf_info.parent_loop.has_divergent_continue) { ctx->cf_info.exec_potentially_empty_break = false; ctx->cf_info.exec_potentially_empty_break_depth = UINT16_MAX; } + if (ctx->block->loop_nest_depth == ctx->cf_info.exec_potentially_empty_continue_depth && + !ctx->cf_info.parent_if.is_divergent) { + ctx->cf_info.exec_potentially_empty_continue = false; + ctx->cf_info.exec_potentially_empty_continue_depth = UINT16_MAX; + } /* uniform control flow never has an empty exec-mask */ if (!ctx->block->loop_nest_depth && !ctx->cf_info.parent_if.is_divergent) { ctx->cf_info.exec_potentially_empty_discard = false; ctx->cf_info.exec_potentially_empty_break = false; ctx->cf_info.exec_potentially_empty_break_depth = UINT16_MAX; + ctx->cf_info.exec_potentially_empty_continue = false; + ctx->cf_info.exec_potentially_empty_continue_depth = UINT16_MAX; } ctx->cf_info.had_divergent_discard |= ic->had_divergent_discard_then; diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h index c44849de6cc..f581650ea94 100644 --- a/src/amd/compiler/aco_instruction_selection.h +++ b/src/amd/compiler/aco_instruction_selection.h @@ -61,10 +61,14 @@ struct isel_context { bool exec_potentially_empty_discard = false; /* set to false when loop_nest_depth==0 && parent_if.is_divergent==false */ uint16_t exec_potentially_empty_break_depth = UINT16_MAX; - /* Set to false when loop_nest_depth==exec_potentially_empty_break_depth - * and parent_if.is_divergent==false. Called _break but it's also used for - * loop continues. */ + /* Set to false when loop_nest_depth==exec_potentially_empty_break_depth, + * parent_if.is_divergent==false and parent_loop.has_divergent_continue==false. Also set to + * false if loop_nest_depth