From ed020008b57744f7b9093da0f4c13bdedffb712c Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 15 Jan 2021 16:00:35 +0000 Subject: [PATCH] aco: rewrite setting of Exact_Branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_insert_exec_mask.cpp | 76 +++++++++++++++++++---- 1 file changed, 65 insertions(+), 11 deletions(-) diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index 9c184f956ce..633b8e3422f 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -154,16 +154,6 @@ void get_block_needs(wqm_ctx &ctx, exec_ctx &exec_ctx, Block* block) mark_block_wqm(ctx, block_idx); block_idx++; } - } else if (ctx.loop && !ctx.wqm) { - /* Ensure a branch never results in an exec mask with only helper - * invocations (which can cause a loop to repeat infinitively if it's - * break branches are done in exact). */ - unsigned block_idx = block->index; - do { - if ((ctx.program->blocks[block_idx].kind & block_kind_branch)) - exec_ctx.info[block_idx].block_needs |= Exact_Branch; - block_idx++; - } while (!(ctx.program->blocks[block_idx].kind & block_kind_top_level)); } ctx.loop = false; @@ -235,6 +225,50 @@ void get_block_needs(wqm_ctx &ctx, exec_ctx &exec_ctx, Block* block) ctx.loop = true; } +void handle_exact_loops(wqm_ctx& ctx, exec_ctx& exec_ctx, unsigned preheader) +{ + unsigned header = preheader + 1; + assert(exec_ctx.program->blocks[header].kind & block_kind_loop_header); + + unsigned exit = header + 1; + for (; exit < exec_ctx.program->blocks.size(); exit++) { + Block& exit_block = exec_ctx.program->blocks[exit]; + if ((exit_block.kind & block_kind_loop_exit) && exit_block.loop_nest_depth == 0) + break; + } + assert(exit != exec_ctx.program->blocks.size()); + + int parent_branch = preheader; + unsigned rel_branch_depth = 0; + for (; parent_branch >= 0; parent_branch--) { + Block& branch = exec_ctx.program->blocks[parent_branch]; + if (branch.kind & block_kind_branch) { + if (rel_branch_depth == 0) + break; + rel_branch_depth--; + } + + /* top-level blocks should never have empty exact exec masks */ + if (branch.kind & block_kind_top_level) + return; + + if (branch.kind & block_kind_merge) + rel_branch_depth++; + } + assert(parent_branch >= 0); + + Block& branch = exec_ctx.program->blocks[parent_branch]; + assert(branch.kind & block_kind_branch); + if (ctx.branch_wqm[parent_branch]) { + /* The branch can't be done in Exact because some other blocks in it + * are in WQM. So instead, ensure that the loop breaks are done in WQM. */ + for (unsigned pred_idx : exec_ctx.program->blocks[exit].logical_preds) + mark_block_wqm(ctx, pred_idx); + } else { + exec_ctx.info[parent_branch].block_needs |= Exact_Branch; + } +} + void calculate_wqm_needs(exec_ctx& exec_ctx) { wqm_ctx ctx(exec_ctx.program); @@ -243,7 +277,27 @@ void calculate_wqm_needs(exec_ctx& exec_ctx) unsigned block_index = *std::prev(ctx.worklist.end()); ctx.worklist.erase(std::prev(ctx.worklist.end())); - get_block_needs(ctx, exec_ctx, &exec_ctx.program->blocks[block_index]); + Block& block = exec_ctx.program->blocks[block_index]; + get_block_needs(ctx, exec_ctx, &block); + + /* If an outer loop and it's nested loops does not need WQM, + * add_branch_code() will ensure that it enters in Exact. We have to + * ensure that the exact exec mask is not empty by adding Exact_Branch to + * the outer divergent branch. + * + * If the loop or a nested loop needs WQM, branch_wqm will be true for the + * preheader. + */ + if (block.kind & block_kind_top_level && block.index != exec_ctx.program->blocks.size() - 1) { + unsigned preheader = block.index; + do { + Block& preheader_block = exec_ctx.program->blocks[preheader]; + if ((preheader_block.kind & block_kind_loop_preheader) && + preheader_block.loop_nest_depth == 0 && !ctx.branch_wqm[preheader]) + handle_exact_loops(ctx, exec_ctx, preheader); + preheader++; + } while (!(exec_ctx.program->blocks[preheader].kind & block_kind_top_level)); + } } uint8_t ever_again_needs = 0;