diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 6a7b65ce31f..3a1fddc4331 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -716,6 +716,14 @@ gen(Instruction* instr, wait_ctx& ctx) update_counters(ctx, event_sendmsg); break; } + case Format::SOP1: { + if (instr->opcode == aco_opcode::s_sendmsg_rtn_b32 || + instr->opcode == aco_opcode::s_sendmsg_rtn_b64) { + update_counters(ctx, event_sendmsg); + insert_wait_entry(ctx, instr->definitions[0], event_sendmsg); + } + break; + } default: break; } } diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 8b2fa3591ef..db977fe486b 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -202,6 +202,12 @@ struct InstrPred { } switch (a->format) { + case Format::SOP1: { + if (a->opcode == aco_opcode::s_sendmsg_rtn_b32 || + a->opcode == aco_opcode::s_sendmsg_rtn_b64) + return false; + return true; + } case Format::SOPK: { if (a->opcode == aco_opcode::s_getreg_b32) return false; diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 88498ef4565..d588b8000e4 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -568,7 +568,8 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards) /* don't move non-reorderable instructions */ if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime || instr->opcode == aco_opcode::s_setprio || instr->opcode == aco_opcode::s_getreg_b32 || - instr->opcode == aco_opcode::p_init_scratch || instr->opcode == aco_opcode::p_jump_to_epilog) + instr->opcode == aco_opcode::p_init_scratch || instr->opcode == aco_opcode::p_jump_to_epilog || + instr->opcode == aco_opcode::s_sendmsg_rtn_b32 || instr->opcode == aco_opcode::s_sendmsg_rtn_b64) return hazard_fail_unreorderable; memory_event_set instr_set; @@ -644,7 +645,10 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector& registe int16_t k = 0; /* don't move s_memtime/s_memrealtime */ - if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime) + if (current->opcode == aco_opcode::s_memtime || + current->opcode == aco_opcode::s_memrealtime || + current->opcode == aco_opcode::s_sendmsg_rtn_b32 || + current->opcode == aco_opcode::s_sendmsg_rtn_b64) return; /* first, check if we have instructions before current to move down */