From 8a175b02bc271190a9b7fec895c0a587da8ee084 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 11 Jul 2024 15:15:33 +0100 Subject: [PATCH] aco: use repair pass for LCSSA workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This makes instruction selection simpler and fixes potential issues with allocated_vec or the optimizer moving SGPR uses out of the loop. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- .../compiler/aco_instruction_selection.cpp | 95 +------------------ .../aco_instruction_selection_setup.cpp | 2 +- 2 files changed, 4 insertions(+), 93 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1c5c74321f9..01ef4b6fb8c 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -10219,6 +10219,9 @@ end_loop(isel_context* ctx, loop_context* lc) if (!ctx->cf_info.parent_loop.has_divergent_branch) add_logical_edge(block_idx, &ctx->program->blocks[loop_header_idx]); ctx->block = &ctx->program->blocks[block_idx]; + + /* SGPR temporaries might need loop exit phis to be created. */ + ctx->program->should_repair_ssa = true; } else { ctx->block->kind |= (block_kind_continue | block_kind_uniform); if (!ctx->cf_info.parent_loop.has_divergent_branch) @@ -10392,94 +10395,6 @@ visit_block(isel_context* ctx, nir_block* block) } } -static bool -all_uses_inside_loop(nir_def* def, nir_block* block_before_loop, nir_block* block_after_loop) -{ - nir_foreach_use_including_if (use, def) { - if (nir_src_is_if(use)) { - nir_block* branch_block = - nir_cf_node_as_block(nir_cf_node_prev(&nir_src_parent_if(use)->cf_node)); - if (branch_block->index <= block_before_loop->index || branch_block->index >= block_after_loop->index) - return false; - } else { - nir_instr* instr = nir_src_parent_instr(use); - if ((instr->block->index <= block_before_loop->index || instr->block->index >= block_after_loop->index) && - !(instr->type == nir_instr_type_phi && instr->block == block_after_loop)) { - return false; - } - } - } - - return true; -} - -Temp -rename_temp(const std::map& renames, Temp tmp) -{ - auto it = renames.find(tmp.id()); - if (it != renames.end()) - return Temp(it->second, tmp.regClass()); - return tmp; -} - -static void -lcssa_workaround(isel_context* ctx, nir_loop* loop) -{ - nir_block* block_before_loop = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); - nir_block* block_after_loop = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); - - std::map renames; - nir_foreach_block_in_cf_node (block, &loop->cf_node) { - /* These values are reachable from the loop exit even when continue_or_break is used. We - * shouldn't create phis with undef operands in case the contents are important even if exec - * is zero (for example, memory access addresses). */ - if (nir_block_dominates(block, nir_loop_last_block(loop))) - continue; - - /* Definitions in this block are not reachable from the loop exit, and so all uses are inside - * the loop. */ - if (!nir_block_dominates(block, block_after_loop)) - continue; - - nir_foreach_instr (instr, block) { - nir_def* def = nir_instr_def(instr); - if (!def) - continue; - - Temp tmp = get_ssa_temp(ctx, def); - if (!tmp.is_linear() || all_uses_inside_loop(def, block_before_loop, block_after_loop)) - continue; - - Temp new_tmp = ctx->program->allocateTmp(tmp.regClass()); - aco_ptr phi(create_instruction(aco_opcode::p_phi, Format::PSEUDO, - ctx->block->logical_preds.size(), 1)); - for (unsigned i = 0; i < ctx->block->logical_preds.size(); i++) - phi->operands[i] = Operand(new_tmp); - phi->definitions[0] = Definition(tmp); - ctx->block->instructions.emplace(ctx->block->instructions.begin(), std::move(phi)); - - renames.emplace(tmp.id(), new_tmp.id()); - } - } - - if (renames.empty()) - return; - - for (unsigned i = ctx->block->index - 1; - ctx->program->blocks[i].loop_nest_depth > ctx->block->loop_nest_depth; i--) { - for (aco_ptr& instr : ctx->program->blocks[i].instructions) { - for (Definition& def : instr->definitions) { - if (def.isTemp()) - def.setTemp(rename_temp(renames, def.getTemp())); - } - for (Operand& op : instr->operands) { - if (op.isTemp()) - op.setTemp(rename_temp(renames, op.getTemp())); - } - } - } -} - static void begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond); static void begin_uniform_if_else(isel_context* ctx, if_context* ic); static void end_uniform_if(isel_context* ctx, if_context* ic); @@ -10494,10 +10409,6 @@ visit_loop(isel_context* ctx, nir_loop* loop) visit_cf_list(ctx, &loop->body); end_loop(ctx, &lc); - - /* Create extra LCSSA phis for continue_or_break */ - if (ctx->block->linear_preds.size() > ctx->block->logical_preds.size()) - lcssa_workaround(ctx, loop); } static void diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index f4632f3fbef..a322072194e 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -383,7 +383,7 @@ init_context(isel_context* ctx, nir_shader* shader) nir_metadata_preserve(impl, nir_metadata_none); /* we'll need these for isel */ - nir_metadata_require(impl, nir_metadata_block_index | nir_metadata_dominance); + nir_metadata_require(impl, nir_metadata_block_index); if (ctx->options->dump_preoptir) { fprintf(stderr, "NIR shader before instruction selection:\n");