From f837fec2134be7717108defcecdef47d0802bb25 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 7 Jun 2023 17:33:46 +0100 Subject: [PATCH] aco/gfx11: use s_clause with stores MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fossil-db (gfx1100): Totals from 3535 (2.65% of 133428) affected shaders: Instrs: 1963996 -> 1968979 (+0.25%) CodeSize: 10077784 -> 10097716 (+0.20%) Latency: 13264216 -> 13264741 (+0.00%) InvThroughput: 2995676 -> 3004083 (+0.28%); split: -0.00%, +0.28% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_form_hard_clauses.cpp | 16 +++-- src/amd/compiler/tests/test_hard_clause.cpp | 75 +++++++++++---------- 2 files changed, 48 insertions(+), 43 deletions(-) diff --git a/src/amd/compiler/aco_form_hard_clauses.cpp b/src/amd/compiler/aco_form_hard_clauses.cpp index 32c0b56fdf2..90ace0b9fb8 100644 --- a/src/amd/compiler/aco_form_hard_clauses.cpp +++ b/src/amd/compiler/aco_form_hard_clauses.cpp @@ -55,16 +55,18 @@ void emit_clause(Builder& bld, unsigned num_instrs, aco_ptr* instrs) { unsigned start = 0; + unsigned end = num_instrs; - /* skip any stores at the start */ - for (; (start < num_instrs) && instrs[start]->definitions.empty(); start++) - bld.insert(std::move(instrs[start])); + if (bld.program->gfx_level < GFX11) { + /* skip any stores at the start */ + for (; (start < num_instrs) && instrs[start]->definitions.empty(); start++) + bld.insert(std::move(instrs[start])); + + for (end = start; (end < num_instrs) && !instrs[end]->definitions.empty(); end++) + ; + } - unsigned end = start; - for (; (end < num_instrs) && !instrs[end]->definitions.empty(); end++) - ; unsigned clause_size = end - start; - if (clause_size > 1) bld.sopp(aco_opcode::s_clause, -1, clause_size - 1); diff --git a/src/amd/compiler/tests/test_hard_clause.cpp b/src/amd/compiler/tests/test_hard_clause.cpp index c9e26a070b8..57662db729f 100644 --- a/src/amd/compiler/tests/test_hard_clause.cpp +++ b/src/amd/compiler/tests/test_hard_clause.cpp @@ -315,45 +315,48 @@ BEGIN_TEST(form_hard_clauses.heuristic) END_TEST BEGIN_TEST(form_hard_clauses.stores) - if (!setup_cs(NULL, GFX10)) - return; + for (amd_gfx_level gfx : {GFX10, GFX11}) { + if (!setup_cs(NULL, gfx)) + continue; - //>> p_unit_test 0 - //; search_re('buffer_store_dword') - //; search_re('buffer_store_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); - create_mubuf_store(); - create_mubuf_store(); + //>> p_unit_test 0 + //~gfx11! s_clause imm:1 + //; search_re('buffer_store_dword') + //; search_re('buffer_store_dword') + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); + create_mubuf_store(); + create_mubuf_store(); - //>> p_unit_test 1 - //! s_clause imm:1 - //; search_re('buffer_load_dword') - //; search_re('buffer_load_dword') - //; search_re('buffer_store_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); - create_mubuf(); - create_mubuf(); - create_mubuf_store(); + //>> p_unit_test 1 + //! s_clause imm:1 + //; search_re('buffer_load_dword') + //; search_re('buffer_load_dword') + //; search_re('buffer_store_dword') + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); + create_mubuf(); + create_mubuf(); + create_mubuf_store(); - //>> p_unit_test 2 - //; search_re('buffer_store_dword') - //! s_clause imm:1 - //; search_re('buffer_load_dword') - //; search_re('buffer_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); - create_mubuf_store(); - create_mubuf(); - create_mubuf(); + //>> p_unit_test 2 + //; search_re('buffer_store_dword') + //! s_clause imm:1 + //; search_re('buffer_load_dword') + //; search_re('buffer_load_dword') + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); + create_mubuf_store(); + create_mubuf(); + create_mubuf(); - /* Unclear whether this is the best behaviour */ - //>> p_unit_test 3 - //; search_re('buffer_load_dword') - //; search_re('buffer_store_dword') - //; search_re('buffer_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); - create_mubuf(); - create_mubuf_store(); - create_mubuf(); + /* Unclear whether this is the best behaviour */ + //>> p_unit_test 3 + //; search_re('buffer_load_dword') + //; search_re('buffer_store_dword') + //; search_re('buffer_load_dword') + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); + create_mubuf(); + create_mubuf_store(); + create_mubuf(); - finish_form_hard_clause_test(); + finish_form_hard_clause_test(); + } END_TEST