From 4ac47ad1cd7976d7effbbfae37fa69e26a288ad2 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 30 Jun 2020 15:33:18 +0100 Subject: [PATCH] aco: combine DPP into VALU after RA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mostly helps a bunch of Cyberpunk 2077 shaders. fossil-db (Siena Cichlid): Totals from 26 (0.02% of 150170) affected shaders: CodeSize: 83208 -> 81528 (-2.02%) Instrs: 14728 -> 14308 (-2.85%) Latency: 48041 -> 47793 (-0.52%) InvThroughput: 10836 -> 10578 (-2.38%) Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_optimizer_postRA.cpp | 55 +++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index e612292e822..84ee6ef01c7 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -22,6 +22,7 @@ * */ +#include "aco_builder.h" #include "aco_ir.h" #include @@ -338,6 +339,58 @@ try_optimize_scc_nocompare(pr_opt_ctx& ctx, aco_ptr& instr) } } +void +try_combine_dpp(pr_opt_ctx& ctx, aco_ptr& instr) +{ + if (!instr->isVALU() || instr->isDPP() || !can_use_DPP(instr, false)) + return; + + for (unsigned i = 0; i < MIN2(2, instr->operands.size()); i++) { + Idx op_instr_idx = last_writer_idx(ctx, instr->operands[i]); + if (!op_instr_idx.found()) + continue; + + Instruction* mov = ctx.get(op_instr_idx); + if (mov->opcode != aco_opcode::v_mov_b32 || !mov->isDPP()) + continue; + + /* If we aren't going to remove the v_mov_b32, we have to ensure that it doesn't overwrite + * it's own operand before we use it. + */ + if (mov->definitions[0].physReg() == mov->operands[0].physReg() && + (!mov->definitions[0].tempId() || ctx.uses[mov->definitions[0].tempId()] > 1)) + continue; + + Idx mov_src_idx = last_writer_idx(ctx, mov->operands[0]); + if (is_instr_after(mov_src_idx, op_instr_idx)) + continue; + + if (i && !can_swap_operands(instr, &instr->opcode)) + continue; + + /* anything else doesn't make sense in SSA */ + assert(mov->dpp().row_mask == 0xf && mov->dpp().bank_mask == 0xf); + + if (--ctx.uses[mov->definitions[0].tempId()]) + ctx.uses[mov->operands[0].tempId()]++; + + convert_to_DPP(instr); + + DPP_instruction* dpp = &instr->dpp(); + if (i) { + std::swap(dpp->operands[0], dpp->operands[1]); + std::swap(dpp->neg[0], dpp->neg[1]); + std::swap(dpp->abs[0], dpp->abs[1]); + } + dpp->operands[0] = mov->operands[0]; + dpp->dpp_ctrl = mov->dpp().dpp_ctrl; + dpp->bound_ctrl = true; + dpp->neg[0] ^= mov->dpp().neg[0] && !dpp->abs[0]; + dpp->abs[0] |= mov->dpp().abs[0]; + return; + } +} + void process_instruction(pr_opt_ctx& ctx, aco_ptr& instr) { @@ -345,6 +398,8 @@ process_instruction(pr_opt_ctx& ctx, aco_ptr& instr) try_optimize_scc_nocompare(ctx, instr); + try_combine_dpp(ctx, instr); + if (instr) save_reg_writes(ctx, instr);