r600/sfn: when emitting fp64 op2 groups pre-load values
Since the group is created from the onset, we have to make
sure that four or eight src values don't have a readport
conflict, so force a pre-loading of the values to registers
evenly distributed over the channels and let copy-propagation
take care of cleaning up un-neccesary moves.
Fixes: 79ca456b48
r600/sfn: rewrite NIR backend
Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28840>
This commit is contained in:
@@ -2081,6 +2081,14 @@ emit_alu_op2_64bit(const nir_alu_instr& alu,
|
||||
|
||||
int num_emit0 = opcode == op2_mul_64 ? 3 : 1;
|
||||
|
||||
std::array<std::array<PRegister, 4>,2> tmp;
|
||||
for (unsigned k = 0; k < alu.def.num_components; ++k) {
|
||||
tmp[k][0] = shader.emit_load_to_register(value_factory.src64(alu.src[order[0]], k, 1), 0);
|
||||
tmp[k][1] = shader.emit_load_to_register(value_factory.src64(alu.src[order[1]], k, 1), 1);
|
||||
tmp[k][2] = shader.emit_load_to_register(value_factory.src64(alu.src[order[0]], k, 0), 2);
|
||||
tmp[k][3] = shader.emit_load_to_register(value_factory.src64(alu.src[order[1]], k, 0), 3);
|
||||
}
|
||||
|
||||
assert(num_emit0 == 1 || alu.def.num_components == 1);
|
||||
|
||||
for (unsigned k = 0; k < alu.def.num_components; ++k) {
|
||||
@@ -2091,8 +2099,8 @@ emit_alu_op2_64bit(const nir_alu_instr& alu,
|
||||
|
||||
ir = new AluInstr(opcode,
|
||||
dest,
|
||||
value_factory.src64(alu.src[order[0]], k, 1),
|
||||
value_factory.src64(alu.src[order[1]], k, 1),
|
||||
tmp[k][0],
|
||||
tmp[k][1],
|
||||
i < 2 ? AluInstr::write : AluInstr::empty);
|
||||
group->add_instruction(ir);
|
||||
}
|
||||
@@ -2102,8 +2110,8 @@ emit_alu_op2_64bit(const nir_alu_instr& alu,
|
||||
|
||||
ir = new AluInstr(opcode,
|
||||
dest,
|
||||
value_factory.src64(alu.src[order[0]], k, 0),
|
||||
value_factory.src64(alu.src[order[1]], k, 0),
|
||||
tmp[k][2],
|
||||
tmp[k][3],
|
||||
i == 1 ? AluInstr::write : AluInstr::empty);
|
||||
group->add_instruction(ir);
|
||||
}
|
||||
|
||||
@@ -918,13 +918,14 @@ lds_op_from_intrinsic(nir_atomic_op op, bool ret)
|
||||
}
|
||||
|
||||
PRegister
|
||||
Shader::emit_load_to_register(PVirtualValue src)
|
||||
Shader::emit_load_to_register(PVirtualValue src, int chan)
|
||||
{
|
||||
assert(src);
|
||||
PRegister dest = src->as_register();
|
||||
|
||||
if (!dest) {
|
||||
dest = value_factory().temp_register();
|
||||
if (!dest || chan >= 0) {
|
||||
dest = value_factory().temp_register(chan);
|
||||
dest->set_pin(pin_free);
|
||||
emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write));
|
||||
}
|
||||
return dest;
|
||||
|
||||
@@ -241,7 +241,7 @@ public:
|
||||
return m_rat_return_address;
|
||||
}
|
||||
|
||||
PRegister emit_load_to_register(PVirtualValue src);
|
||||
PRegister emit_load_to_register(PVirtualValue src, int chan = -1);
|
||||
|
||||
virtual unsigned image_size_const_offset() { return 0;}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user