From ea765162c32c4665a384304fb5d1d668ade23cb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 12 Feb 2025 12:25:46 +0100 Subject: [PATCH] aco/ssa_elimination: create a single parallelcopy instruction for linear and logical phis Totals from 6651 (8.38% of 79377) affected shaders: (Navi31) Instrs: 14722896 -> 14722290 (-0.00%); split: -0.01%, +0.00% CodeSize: 77992072 -> 77989284 (-0.00%); split: -0.01%, +0.00% Latency: 160542885 -> 160541215 (-0.00%); split: -0.00%, +0.00% InvThroughput: 24543177 -> 24542710 (-0.00%); split: -0.00%, +0.00% Part-of: --- src/amd/compiler/aco_ssa_elimination.cpp | 53 ++++++------------------ 1 file changed, 13 insertions(+), 40 deletions(-) diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 2ef9685435c..820475cba62 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -18,16 +18,13 @@ struct phi_info { }; struct ssa_elimination_ctx { - /* The outer vectors should be indexed by block index. The inner vectors store phi information - * for each block. */ - std::vector logical_phi_info; - std::vector linear_phi_info; + /* The outer vectors should be indexed by block index. + * The inner vectors store phi information for each block. + */ + std::vector phi_infos; Program* program; - ssa_elimination_ctx(Program* program_) - : logical_phi_info(program_->blocks.size()), linear_phi_info(program_->blocks.size()), - program(program_) - {} + ssa_elimination_ctx(Program* program_) : phi_infos(program_->blocks.size()), program(program_) {} }; void @@ -48,9 +45,7 @@ collect_phi_info(ssa_elimination_ctx& ctx) Block::edge_vec& preds = phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds; - uint32_t pred_idx = preds[i]; - auto& info = phi->opcode == aco_opcode::p_phi ? ctx.logical_phi_info[pred_idx] - : ctx.linear_phi_info[pred_idx]; + auto& info = ctx.phi_infos[preds[i]]; info.copies.emplace_back(phi->definitions[0], phi->operands[i]); if (phi->pseudo().needs_scratch_reg) { info.needs_scratch_reg = true; @@ -64,45 +59,23 @@ collect_phi_info(ssa_elimination_ctx& ctx) void insert_parallelcopies(ssa_elimination_ctx& ctx) { - /* insert the parallelcopies from logical phis before branch */ + /* insert parallelcopies for the phis at the end of blocks just before the branch */ for (unsigned block_idx = 0; block_idx < ctx.program->blocks.size(); ++block_idx) { - auto& logical_phi_info = ctx.logical_phi_info[block_idx]; - if (logical_phi_info.copies.empty()) + auto& phi_info = ctx.phi_infos[block_idx]; + if (phi_info.copies.empty()) continue; Block& block = ctx.program->blocks[block_idx]; aco_ptr pc{create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, - logical_phi_info.copies.size(), - logical_phi_info.copies.size())}; + phi_info.copies.size(), phi_info.copies.size())}; unsigned i = 0; - for (auto& pair : logical_phi_info.copies) { + for (auto& pair : phi_info.copies) { pc->definitions[i] = pair.first; pc->operands[i] = pair.second; i++; } - pc->pseudo().needs_scratch_reg = false; - auto it = std::prev(block.instructions.end()); - block.instructions.insert(it, std::move(pc)); - } - - /* insert parallelcopies for the linear phis at the end of blocks just before the branch */ - for (unsigned block_idx = 0; block_idx < ctx.program->blocks.size(); ++block_idx) { - auto& linear_phi_info = ctx.linear_phi_info[block_idx]; - if (linear_phi_info.copies.empty()) - continue; - - Block& block = ctx.program->blocks[block_idx]; - aco_ptr pc{create_instruction(aco_opcode::p_parallelcopy, Format::PSEUDO, - linear_phi_info.copies.size(), - linear_phi_info.copies.size())}; - unsigned i = 0; - for (auto& pair : linear_phi_info.copies) { - pc->definitions[i] = pair.first; - pc->operands[i] = pair.second; - i++; - } - pc->pseudo().scratch_sgpr = linear_phi_info.scratch_sgpr; - pc->pseudo().needs_scratch_reg = linear_phi_info.needs_scratch_reg; + pc->pseudo().scratch_sgpr = phi_info.scratch_sgpr; + pc->pseudo().needs_scratch_reg = phi_info.needs_scratch_reg; auto it = std::prev(block.instructions.end()); block.instructions.insert(it, std::move(pc)); }