From 00b752565cfdbe6d8447a4c072bf857bf5697e2a Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Thu, 10 Jul 2025 16:27:29 +0200 Subject: [PATCH] r600/sfn: Add support for indirect VS input read I've overlooked that unconditionallowering of indirect VS inputs had been dropped. Since VS inputs are stored in consecutive registers one can implement the indirect access without additional lowering, it just needs a proper declararion of the registers forming the array. v2: - Fix formatting (Patrick Lerda) - Use allocator for std::map to avvoid menory leak (Patrick Lerda) Fixes: a43bfffe1e7464f876f68dfc98b886b47a4d9690 r600: Correct nir_indirect_supported_mask Signed-off-by: Gert Wollny Part-of: --- .../drivers/r600/sfn/sfn_shader_vs.cpp | 62 +++++++++++++------ src/gallium/drivers/r600/sfn/sfn_shader_vs.h | 6 ++ .../drivers/r600/sfn/sfn_valuefactory.cpp | 18 ++++++ .../drivers/r600/sfn/sfn_valuefactory.h | 1 + 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp index 42fe846fea4..e1fc0e4b158 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp @@ -417,9 +417,13 @@ VertexShader::do_scan_instruction(nir_instr *instr) switch (intr->intrinsic) { case nir_intrinsic_load_input: { - int vtx_register = nir_intrinsic_base(intr) + 1; + int vtx_register = + nir_intrinsic_base(intr) + nir_intrinsic_io_semantics(intr).num_slots; if (m_last_vertex_attribute_register < vtx_register) m_last_vertex_attribute_register = vtx_register; + if (nir_intrinsic_io_semantics(intr).num_slots > 1) + m_input_array_ranges[nir_intrinsic_base(intr) + 1] = + nir_intrinsic_io_semantics(intr).num_slots; return true; } case nir_intrinsic_store_output: { @@ -474,27 +478,38 @@ VertexShader::do_scan_instruction(nir_instr *instr) bool VertexShader::load_input(nir_intrinsic_instr *intr) { - unsigned driver_location = nir_intrinsic_base(intr); + unsigned range_base = nir_intrinsic_base(intr) + 1; unsigned location = nir_intrinsic_io_semantics(intr).location; auto& vf = value_factory(); - AluInstr *ir = nullptr; - if (location < VERT_ATTRIB_MAX) { - for (unsigned i = 0; i < intr->def.num_components; ++i) { - auto src = vf.allocate_pinned_register(driver_location + 1, i); - src->set_flag(Register::ssa); - vf.inject_value(intr->def, i, src); - } - if (ir) - ir->set_alu_flag(alu_last_instr); - - ShaderInput input(driver_location); - input.set_gpr(driver_location + 1); - add_input(input); - return true; + if (location >= VERT_ATTRIB_MAX) { + fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location); + return false; } - fprintf(stderr, "r600-NIR: Unimplemented load_deref for %d\n", location); - return false; + + for (auto [start, array] : m_input_arrays) { + if (range_base >= start && range_base < start + array->size()) { + auto addr = vf.src(intr->src[0], 0); + for (unsigned i = 0; i < intr->def.num_components; ++i) { + auto src = array->element(0, addr, i); + auto dst = vf.dest(intr->def, i, pin_free); + emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::write)); + } + return true; + } + } + + /* We didn't find an array so inject the value and add the register lazily */ + for (unsigned i = 0; i < intr->def.num_components; ++i) { + auto src = vf.allocate_pinned_register(range_base, i); + src->set_flag(Register::ssa); + vf.inject_value(intr->def, i, src); + } + + ShaderInput input(range_base - 1); + input.set_gpr(range_base); + add_input(input); + return true; } int @@ -529,6 +544,17 @@ VertexShader::do_allocate_reserved_registers() m_draw_parameters_enabled = true; } + for (auto [start, size] : m_input_array_ranges) { + auto array = value_factory().allocate_pinned_array(start, size, 4); + m_input_arrays[start] = array; + + for (int i = 0; i < size; ++i) { + ShaderInput input(start + i - 1); + input.set_gpr(start + i); + add_input(input); + } + } + return m_last_vertex_attribute_register + 1; } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.h b/src/gallium/drivers/r600/sfn/sfn_shader_vs.h index 38383a11a22..03dc7ead085 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vs.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.h @@ -156,6 +156,12 @@ private: void do_get_shader_info(r600_shader *sh_info) override; VertexExportStage *m_export_stage{nullptr}; + template + using ArrayMap = + std::map, Allocator>>; + + ArrayMap m_input_array_ranges; + ArrayMap m_input_arrays; int m_last_vertex_attribute_register{0}; PRegister m_vertex_id{nullptr}; PRegister m_instance_id{nullptr}; diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp index 61749190859..a718b2099d0 100644 --- a/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.cpp @@ -147,6 +147,24 @@ ValueFactory::allocate_pinned_vec4(int sel, bool is_ssa) return retval; } +LocalArray * +ValueFactory::allocate_pinned_array(int start, int size, int channels) +{ + auto array = new LocalArray(start, channels, 4, 0); + + for (int i = 0; i < channels; ++i) { + RegisterKey key(start, i, vp_array); + m_registers[key] = array; + } + + for (auto reg : *array) { + reg->set_pin(pin_fully); + reg->set_flag(Register::pin_start); + reg->set_flag(Register::ssa); + } + return array; +} + void ValueFactory::inject_value(const nir_def& def, int chan, PVirtualValue value) { diff --git a/src/gallium/drivers/r600/sfn/sfn_valuefactory.h b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h index b8400384fa9..bcf67a59f4e 100644 --- a/src/gallium/drivers/r600/sfn/sfn_valuefactory.h +++ b/src/gallium/drivers/r600/sfn/sfn_valuefactory.h @@ -211,6 +211,7 @@ public: bool allocate_registers(const nir_intrinsic_instr_alloc& regs); PRegister allocate_pinned_register(int sel, int chan); RegisterVec4 allocate_pinned_vec4(int sel, bool is_ssa); + LocalArray *allocate_pinned_array(int start, int size, int channels); /* Inject a predefined value for a given dest value * (usually the result of a sysvalue load) */