diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 1dd6f15124b..666037af087 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1196,7 +1196,7 @@ static int insert_nop_r6xx(struct r600_bytecode *bc, int max_slots) } /* load AR register from gpr (bc->ar_reg) with MOVA_INT */ -static int load_ar_r6xx(struct r600_bytecode *bc) +static int load_ar_r6xx(struct r600_bytecode *bc, bool for_src) { struct r600_bytecode_alu alu; int r; @@ -1207,6 +1207,10 @@ static int load_ar_r6xx(struct r600_bytecode *bc) /* hack to avoid making MOVA the last instruction in the clause */ if ((bc->cf_last->ndw>>1) >= 110) bc->force_add_cf = 1; + else if (for_src) { + insert_nop_r6xx(bc, 4); + bc->nalu_groups++; + } memset(&alu, 0, sizeof(alu)); alu.op = ALU_OP1_MOVA_GPR_INT; @@ -1224,13 +1228,13 @@ static int load_ar_r6xx(struct r600_bytecode *bc) } /* load AR register from gpr (bc->ar_reg) with MOVA_INT */ -int r600_load_ar(struct r600_bytecode *bc) +int r600_load_ar(struct r600_bytecode *bc, bool for_src) { struct r600_bytecode_alu alu; int r; if (bc->ar_handling) - return load_ar_r6xx(bc); + return load_ar_r6xx(bc, for_src); if (bc->ar_loaded) return 0; @@ -1306,10 +1310,10 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, /* Check AR usage and load it if required */ for (i = 0; i < 3; i++) if (nalu->src[i].rel && !bc->ar_loaded) - r600_load_ar(bc); + r600_load_ar(bc, true); if (nalu->dst.rel && !bc->ar_loaded) - r600_load_ar(bc); + r600_load_ar(bc, false); /* Setup the kcache for this ALU instruction. This will start a new * ALU clause if needed. */ diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index 50f210827bb..47fd3ff0779 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -329,7 +329,7 @@ void r600_bytecode_special_constants(uint32_t value, unsigned *sel); void r600_bytecode_disasm(struct r600_bytecode *bc); void r600_bytecode_alu_read(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1); -int r600_load_ar(struct r600_bytecode *bc); +int r600_load_ar(struct r600_bytecode *bc, bool for_src); int cm_bytecode_add_cf_end(struct r600_bytecode *bc); @@ -355,7 +355,7 @@ void eg_bytecode_export_read(struct r600_bytecode *bc, void r600_vertex_data_type(enum pipe_format pformat, unsigned *format, unsigned *num_format, unsigned *format_comp, unsigned *endian); -int r600_load_ar(struct r600_bytecode *bc); +int r600_load_ar(struct r600_bytecode *bc, bool for_src); static inline int fp64_switch(int i) { diff --git a/src/gallium/drivers/r600/sfn/sfn_assembler.cpp b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp index 0bb6e16f8d3..cfb7d720936 100644 --- a/src/gallium/drivers/r600/sfn/sfn_assembler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_assembler.cpp @@ -413,7 +413,7 @@ void AssamblerVisitor::visit(const AluGroup& group) m_last_addr = addr.first; m_bc->ar_loaded = 0; - r600_load_ar(m_bc); + r600_load_ar(m_bc, group.addr_for_src()); } } else { emit_index_reg(*addr.first, 0); @@ -849,7 +849,7 @@ void AssamblerVisitor::visit(const IfInstr& instr) } auto pred = instr.predicate(); - auto [addr, dummy ] = pred->indirect_addr(); {} + auto [addr, dummy0, dummy1 ] = pred->indirect_addr(); {} if (addr) { if (!m_last_addr || !m_bc->ar_loaded || !m_last_addr->equal_to(*addr)) { @@ -858,7 +858,7 @@ void AssamblerVisitor::visit(const IfInstr& instr) m_last_addr = addr; m_bc->ar_loaded = 0; - r600_load_ar(m_bc); + r600_load_ar(m_bc, true); } } diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index d3d00706fdd..8a2370fde35 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -622,23 +622,23 @@ void ResolveIndirectArrayAddr::visit(const UniformValue& value) } } -std::pair AluInstr::indirect_addr() const +std::tuple AluInstr::indirect_addr() const { ResolveIndirectArrayAddr visitor; if (m_dest) { m_dest->accept(visitor); if (visitor.addr) - return {visitor.addr, false}; + return {visitor.addr, false, false}; } for (auto s: m_src) { s->accept(visitor); if (visitor.addr) { - return {visitor.addr, visitor.is_index}; + return {visitor.addr, !visitor.is_index, visitor.is_index}; } } - return {nullptr, false}; + return {nullptr, false, false}; } AluGroup *AluInstr::split(ValueFactory& vf) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h index 1d54e30baf8..b1ed854c925 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.h @@ -145,7 +145,7 @@ public: static const std::set last; static const std::set last_write; - std::pair indirect_addr() const; + std::tuple indirect_addr() const; void add_extra_dependency(PVirtualValue reg); diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp index 3b602f7849c..379919422bc 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.cpp @@ -231,13 +231,14 @@ bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle) bool AluGroup::update_indirect_access(AluInstr *instr) { - auto indirect_addr = instr->indirect_addr(); + auto [indirect_addr, for_src, is_index ] = instr->indirect_addr(); - if (indirect_addr.first) { + if (indirect_addr) { if (!m_addr_used) { - m_addr_used = indirect_addr.first; - m_addr_is_index = indirect_addr.second; - } else if (!indirect_addr.first->equal_to(*m_addr_used)) { + m_addr_used = indirect_addr; + m_addr_for_src = for_src; + m_addr_is_index = is_index; + } else if (!indirect_addr->equal_to(*m_addr_used)) { return false; } } diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h index faabe01987d..df58df8204a 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alugroup.h @@ -86,6 +86,8 @@ public: static bool has_t() { return s_max_slots == 5;} + bool addr_for_src() const { return m_addr_for_src;} + private: void forward_set_blockid(int id, int index) override; bool do_ready() const override; @@ -108,6 +110,7 @@ private: int m_nesting_depth{0}; bool m_has_lds_op{false}; bool m_addr_is_index{false}; + bool m_addr_for_src{false}; }; diff --git a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp index 7cb8744bbb3..a80cc924cb2 100644 --- a/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_scheduler.cpp @@ -824,7 +824,7 @@ bool BlockSheduler::collect_ready_alu_vec(std::list& ready, std::lis auto opinfo = alu_ops.find((*i)->opcode()); assert(opinfo != alu_ops.end()); if (opinfo->second.can_channel(AluOp::t, m_chip_class) && - !(*i)->indirect_addr().first) + !std::get<0>((*i)->indirect_addr())) priority = -1; }