From d5398b62da1913e7224c826da0dbd5fa88436f18 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 27 Apr 2023 16:22:52 +0100 Subject: [PATCH] aco/ra: create M0-affinities for s_sendmsg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2 by Timur Kristóf: Do not add the affinity for instructions that can't write m0 reliably, such as readlane-like instructions on GFX8. Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_ir.cpp | 21 ++++++++++++++++++++ src/amd/compiler/aco_ir.h | 1 + src/amd/compiler/aco_register_allocation.cpp | 8 ++++++++ 3 files changed, 30 insertions(+) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index ef04a89c780..03f6931be1c 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -479,6 +479,27 @@ can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx) } } +bool +can_write_m0(amd_gfx_level gfx_level, const aco_ptr& instr) +{ + if (instr->isSALU()) + return true; + + if (instr->isVALU()) + return gfx_level >= GFX9; + + switch (instr->opcode) { + case aco_opcode::p_parallelcopy: + case aco_opcode::p_extract: + case aco_opcode::p_insert: + return true; + case aco_opcode::p_reload: + return gfx_level >= GFX9; + default: + return false; + } +} + bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op) { diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 6ae4571abd8..a39e949847f 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1804,6 +1804,7 @@ bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op); uint8_t get_gfx11_true16_mask(aco_opcode op); bool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr& instr, bool pre_ra); bool can_use_DPP(const aco_ptr& instr, bool pre_ra, bool dpp8); +bool can_write_m0(amd_gfx_level gfx_level, const aco_ptr& instr); /* updates "instr" and returns the old instruction (or NULL if no update was needed) */ aco_ptr convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr& instr); aco_ptr convert_to_DPP(aco_ptr& instr, bool dpp8); diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index ca9b8702419..fba6e1c012c 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -55,6 +55,7 @@ struct assignment { struct { bool assigned : 1; bool vcc : 1; + bool m0 : 1; }; uint8_t _ = 0; }; @@ -1653,6 +1654,11 @@ get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp, if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, vcc)) return vcc; } + if (ctx.assignments[temp.id()].m0) { + if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) && + can_write_m0(ctx.program->gfx_level, instr)) + return m0; + } std::optional res; @@ -2461,6 +2467,8 @@ get_affinities(ra_ctx& ctx, std::vector& live_out_per_block) if (!instr->definitions[1].isKill() && instr->operands[0].isTemp() && instr->operands[1].isFixed() && instr->operands[1].physReg() == exec) ctx.assignments[instr->operands[0].tempId()].vcc = true; + } else if (instr->opcode == aco_opcode::s_sendmsg) { + ctx.assignments[instr->operands[0].tempId()].m0 = true; } /* add operands to live variables */