diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index ef04a89c780..03f6931be1c 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -479,6 +479,27 @@ can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx) } } +bool +can_write_m0(amd_gfx_level gfx_level, const aco_ptr& instr) +{ + if (instr->isSALU()) + return true; + + if (instr->isVALU()) + return gfx_level >= GFX9; + + switch (instr->opcode) { + case aco_opcode::p_parallelcopy: + case aco_opcode::p_extract: + case aco_opcode::p_insert: + return true; + case aco_opcode::p_reload: + return gfx_level >= GFX9; + default: + return false; + } +} + bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op) { diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 6ae4571abd8..a39e949847f 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1804,6 +1804,7 @@ bool instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op); uint8_t get_gfx11_true16_mask(aco_opcode op); bool can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr& instr, bool pre_ra); bool can_use_DPP(const aco_ptr& instr, bool pre_ra, bool dpp8); +bool can_write_m0(amd_gfx_level gfx_level, const aco_ptr& instr); /* updates "instr" and returns the old instruction (or NULL if no update was needed) */ aco_ptr convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr& instr); aco_ptr convert_to_DPP(aco_ptr& instr, bool dpp8); diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index ca9b8702419..fba6e1c012c 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -55,6 +55,7 @@ struct assignment { struct { bool assigned : 1; bool vcc : 1; + bool m0 : 1; }; uint8_t _ = 0; }; @@ -1653,6 +1654,11 @@ get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp, if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, vcc)) return vcc; } + if (ctx.assignments[temp.id()].m0) { + if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) && + can_write_m0(ctx.program->gfx_level, instr)) + return m0; + } std::optional res; @@ -2461,6 +2467,8 @@ get_affinities(ra_ctx& ctx, std::vector& live_out_per_block) if (!instr->definitions[1].isKill() && instr->operands[0].isTemp() && instr->operands[1].isFixed() && instr->operands[1].physReg() == exec) ctx.assignments[instr->operands[0].tempId()].vcc = true; + } else if (instr->opcode == aco_opcode::s_sendmsg) { + ctx.assignments[instr->operands[0].tempId()].m0 = true; } /* add operands to live variables */