diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index c7088a498b5..94c41173d6e 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -595,14 +595,14 @@ void emit_block(asm_context& ctx, std::vector& out, Block& block) void fix_exports(asm_context& ctx, std::vector& out, Program* program) { - for (int idx = program->blocks.size() - 1; idx >= 0; idx--) { - Block& block = program->blocks[idx]; + for (Block& block : program->blocks) { + if (!(block.kind & block_kind_export_end)) + continue; std::vector>::reverse_iterator it = block.instructions.rbegin(); - bool endBlock = false; bool exported = false; while ( it != block.instructions.rend()) { - if ((*it)->format == Format::EXP && endBlock) { + if ((*it)->format == Format::EXP) { Export_instruction* exp = static_cast((*it).get()); if (program->stage & hw_vs) { if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) { @@ -618,14 +618,9 @@ void fix_exports(asm_context& ctx, std::vector& out, Program* program) } } else if ((*it)->definitions.size() && (*it)->definitions[0].physReg() == exec) break; - else if ((*it)->opcode == aco_opcode::s_endpgm) { - if (endBlock) - break; - endBlock = true; - } ++it; } - if (!endBlock || exported) + if (exported) continue; /* we didn't find an Export instruction and have to insert a null export */ aco_ptr exp{create_instruction(aco_opcode::exp, Format::EXP, 4, 0)}; @@ -639,7 +634,7 @@ void fix_exports(asm_context& ctx, std::vector& out, Program* program) exp->dest = 9; /* NULL */ else exp->dest = V_008DFC_SQ_EXP_POS; - /* insert the null export 1 instruction before endpgm */ + /* insert the null export 1 instruction before branch/endpgm */ block.instructions.insert(block.instructions.end() - 1, std::move(exp)); } } diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index c3e9cf9f913..4d464cc935e 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8874,7 +8874,7 @@ void select_program(Program *program, program->config->float_mode = program->blocks[0].fp_mode.val; append_logical_end(ctx.block); - ctx.block->kind |= block_kind_uniform; + ctx.block->kind |= block_kind_uniform | block_kind_export_end; Builder bld(ctx.program, ctx.block); if (ctx.program->wb_smem_l1_on_end) bld.smem(aco_opcode::s_dcache_wb, false); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 24d1acf2b79..d3ebecc081e 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1002,6 +1002,7 @@ enum block_kind { block_kind_uses_discard_if = 1 << 12, block_kind_needs_lowering = 1 << 13, block_kind_uses_demote = 1 << 14, + block_kind_export_end = 1 << 15, };