aco: use repair pass for LCSSA workaround

This makes instruction selection simpler and fixes potential issues with
allocated_vec or the optimizer moving SGPR uses out of the loop.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31143>
This commit is contained in:
Rhys Perry
2024-07-11 15:15:33 +01:00
committed by Marge Bot
parent 5de990f5a9
commit 8a175b02bc
2 changed files with 4 additions and 93 deletions
+3 -92
View File
@@ -10219,6 +10219,9 @@ end_loop(isel_context* ctx, loop_context* lc)
if (!ctx->cf_info.parent_loop.has_divergent_branch)
add_logical_edge(block_idx, &ctx->program->blocks[loop_header_idx]);
ctx->block = &ctx->program->blocks[block_idx];
/* SGPR temporaries might need loop exit phis to be created. */
ctx->program->should_repair_ssa = true;
} else {
ctx->block->kind |= (block_kind_continue | block_kind_uniform);
if (!ctx->cf_info.parent_loop.has_divergent_branch)
@@ -10392,94 +10395,6 @@ visit_block(isel_context* ctx, nir_block* block)
}
}
static bool
all_uses_inside_loop(nir_def* def, nir_block* block_before_loop, nir_block* block_after_loop)
{
nir_foreach_use_including_if (use, def) {
if (nir_src_is_if(use)) {
nir_block* branch_block =
nir_cf_node_as_block(nir_cf_node_prev(&nir_src_parent_if(use)->cf_node));
if (branch_block->index <= block_before_loop->index || branch_block->index >= block_after_loop->index)
return false;
} else {
nir_instr* instr = nir_src_parent_instr(use);
if ((instr->block->index <= block_before_loop->index || instr->block->index >= block_after_loop->index) &&
!(instr->type == nir_instr_type_phi && instr->block == block_after_loop)) {
return false;
}
}
}
return true;
}
Temp
rename_temp(const std::map<unsigned, unsigned>& renames, Temp tmp)
{
auto it = renames.find(tmp.id());
if (it != renames.end())
return Temp(it->second, tmp.regClass());
return tmp;
}
static void
lcssa_workaround(isel_context* ctx, nir_loop* loop)
{
nir_block* block_before_loop = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
nir_block* block_after_loop = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
std::map<unsigned, unsigned> renames;
nir_foreach_block_in_cf_node (block, &loop->cf_node) {
/* These values are reachable from the loop exit even when continue_or_break is used. We
* shouldn't create phis with undef operands in case the contents are important even if exec
* is zero (for example, memory access addresses). */
if (nir_block_dominates(block, nir_loop_last_block(loop)))
continue;
/* Definitions in this block are not reachable from the loop exit, and so all uses are inside
* the loop. */
if (!nir_block_dominates(block, block_after_loop))
continue;
nir_foreach_instr (instr, block) {
nir_def* def = nir_instr_def(instr);
if (!def)
continue;
Temp tmp = get_ssa_temp(ctx, def);
if (!tmp.is_linear() || all_uses_inside_loop(def, block_before_loop, block_after_loop))
continue;
Temp new_tmp = ctx->program->allocateTmp(tmp.regClass());
aco_ptr<Instruction> phi(create_instruction(aco_opcode::p_phi, Format::PSEUDO,
ctx->block->logical_preds.size(), 1));
for (unsigned i = 0; i < ctx->block->logical_preds.size(); i++)
phi->operands[i] = Operand(new_tmp);
phi->definitions[0] = Definition(tmp);
ctx->block->instructions.emplace(ctx->block->instructions.begin(), std::move(phi));
renames.emplace(tmp.id(), new_tmp.id());
}
}
if (renames.empty())
return;
for (unsigned i = ctx->block->index - 1;
ctx->program->blocks[i].loop_nest_depth > ctx->block->loop_nest_depth; i--) {
for (aco_ptr<Instruction>& instr : ctx->program->blocks[i].instructions) {
for (Definition& def : instr->definitions) {
if (def.isTemp())
def.setTemp(rename_temp(renames, def.getTemp()));
}
for (Operand& op : instr->operands) {
if (op.isTemp())
op.setTemp(rename_temp(renames, op.getTemp()));
}
}
}
}
static void begin_uniform_if_then(isel_context* ctx, if_context* ic, Temp cond);
static void begin_uniform_if_else(isel_context* ctx, if_context* ic);
static void end_uniform_if(isel_context* ctx, if_context* ic);
@@ -10494,10 +10409,6 @@ visit_loop(isel_context* ctx, nir_loop* loop)
visit_cf_list(ctx, &loop->body);
end_loop(ctx, &lc);
/* Create extra LCSSA phis for continue_or_break */
if (ctx->block->linear_preds.size() > ctx->block->logical_preds.size())
lcssa_workaround(ctx, loop);
}
static void
@@ -383,7 +383,7 @@ init_context(isel_context* ctx, nir_shader* shader)
nir_metadata_preserve(impl, nir_metadata_none);
/* we'll need these for isel */
nir_metadata_require(impl, nir_metadata_block_index | nir_metadata_dominance);
nir_metadata_require(impl, nir_metadata_block_index);
if (ctx->options->dump_preoptir) {
fprintf(stderr, "NIR shader before instruction selection:\n");