From 07995b98a865be87f22fd89d027362bf20d275a0 Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Sat, 20 Apr 2024 21:58:44 +0200 Subject: [PATCH] r600/sfn: when emitting fp64 op2 groups pre-load values Since the group is created from the onset, we have to make sure that four or eight src values don't have a readport conflict, so force a pre-loading of the values to registers evenly distributed over the channels and let copy-propagation take care of cleaning up un-neccesary moves. Fixes: 79ca456b4837b3bc21cf9ef3c03c505c4b4909f6 r600/sfn: rewrite NIR backend Signed-off-by: Gert Wollny Part-of: --- src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp | 16 ++++++++++++---- src/gallium/drivers/r600/sfn/sfn_shader.cpp | 7 ++++--- src/gallium/drivers/r600/sfn/sfn_shader.h | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp index 175b3d8919a..d658a7f8cbf 100644 --- a/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_instr_alu.cpp @@ -2081,6 +2081,14 @@ emit_alu_op2_64bit(const nir_alu_instr& alu, int num_emit0 = opcode == op2_mul_64 ? 3 : 1; + std::array,2> tmp; + for (unsigned k = 0; k < alu.def.num_components; ++k) { + tmp[k][0] = shader.emit_load_to_register(value_factory.src64(alu.src[order[0]], k, 1), 0); + tmp[k][1] = shader.emit_load_to_register(value_factory.src64(alu.src[order[1]], k, 1), 1); + tmp[k][2] = shader.emit_load_to_register(value_factory.src64(alu.src[order[0]], k, 0), 2); + tmp[k][3] = shader.emit_load_to_register(value_factory.src64(alu.src[order[1]], k, 0), 3); + } + assert(num_emit0 == 1 || alu.def.num_components == 1); for (unsigned k = 0; k < alu.def.num_components; ++k) { @@ -2091,8 +2099,8 @@ emit_alu_op2_64bit(const nir_alu_instr& alu, ir = new AluInstr(opcode, dest, - value_factory.src64(alu.src[order[0]], k, 1), - value_factory.src64(alu.src[order[1]], k, 1), + tmp[k][0], + tmp[k][1], i < 2 ? AluInstr::write : AluInstr::empty); group->add_instruction(ir); } @@ -2102,8 +2110,8 @@ emit_alu_op2_64bit(const nir_alu_instr& alu, ir = new AluInstr(opcode, dest, - value_factory.src64(alu.src[order[0]], k, 0), - value_factory.src64(alu.src[order[1]], k, 0), + tmp[k][2], + tmp[k][3], i == 1 ? AluInstr::write : AluInstr::empty); group->add_instruction(ir); } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index 51e2d346cdb..e5189636b4b 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -918,13 +918,14 @@ lds_op_from_intrinsic(nir_atomic_op op, bool ret) } PRegister -Shader::emit_load_to_register(PVirtualValue src) +Shader::emit_load_to_register(PVirtualValue src, int chan) { assert(src); PRegister dest = src->as_register(); - if (!dest) { - dest = value_factory().temp_register(); + if (!dest || chan >= 0) { + dest = value_factory().temp_register(chan); + dest->set_pin(pin_free); emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write)); } return dest; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.h b/src/gallium/drivers/r600/sfn/sfn_shader.h index cac41e43087..ef176363866 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader.h @@ -241,7 +241,7 @@ public: return m_rat_return_address; } - PRegister emit_load_to_register(PVirtualValue src); + PRegister emit_load_to_register(PVirtualValue src, int chan = -1); virtual unsigned image_size_const_offset() { return 0;}