From e79a8219d23b82f239d3ce7bda133bbe07cf29af Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 16 May 2024 17:10:57 +0100 Subject: [PATCH] aco/gfx12: sign-extend s_getpc_b64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_assembler.cpp | 8 +++- src/amd/compiler/aco_lower_to_hw_instr.cpp | 4 ++ src/amd/compiler/tests/test_assembler.cpp | 50 ++++++++++++---------- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 88ff204999a..203e8719595 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -1537,7 +1537,8 @@ emit_long_jump(asm_context& ctx, SALU_instruction* branch, bool backwards, case aco_opcode::s_cbranch_execnz: inv = aco_opcode::s_cbranch_execz; break; default: unreachable("Unhandled long jump."); } - instr.reset(bld.sopp(inv, 6)); + unsigned size = ctx.gfx_level >= GFX12 ? 7 : 6; + instr.reset(bld.sopp(inv, size)); emit_sopp_instruction(ctx, out, instr.get(), true); } @@ -1545,6 +1546,11 @@ emit_long_jump(asm_context& ctx, SALU_instruction* branch, bool backwards, instr.reset(bld.sop1(aco_opcode::s_getpc_b64, def).instr); emit_instruction(ctx, out, instr.get()); + if (ctx.gfx_level >= GFX12) { + instr.reset(bld.sop1(aco_opcode::s_sext_i32_i16, def_tmp_hi, op_tmp_hi).instr); + emit_instruction(ctx, out, instr.get()); + } + instr.reset( bld.sop2(aco_opcode::s_addc_u32, def_tmp_lo, op_tmp_lo, Operand::literal32(0)).instr); emit_instruction(ctx, out, instr.get()); diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index acdd1f673e2..c80d2cb6a3e 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2402,6 +2402,8 @@ lower_to_hw_instr(Program* program) unsigned id = instr->definitions[0].tempId(); PhysReg reg = instr->definitions[0].physReg(); bld.sop1(aco_opcode::p_constaddr_getpc, instr->definitions[0], Operand::c32(id)); + if (ctx.program->gfx_level >= GFX12) + bld.sop1(aco_opcode::s_sext_i32_i16, Definition(reg.advance(4), s1), Operand(reg.advance(4), s1)); bld.sop2(aco_opcode::p_constaddr_addlo, Definition(reg, s1), bld.def(s1, scc), Operand(reg, s1), instr->operands[0], Operand::c32(id)); /* s_addc_u32 not needed because the program is in a 32-bit VA range */ @@ -2424,6 +2426,8 @@ lower_to_hw_instr(Program* program) unsigned id = instr->definitions[0].tempId(); PhysReg reg = instr->definitions[0].physReg(); bld.sop1(aco_opcode::p_resumeaddr_getpc, instr->definitions[0], Operand::c32(id)); + if (ctx.program->gfx_level >= GFX12) + bld.sop1(aco_opcode::s_sext_i32_i16, Definition(reg.advance(4), s1), Operand(reg.advance(4), s1)); bld.sop2(aco_opcode::p_resumeaddr_addlo, Definition(reg, s1), bld.def(s1, scc), Operand(reg, s1), Operand::c32(resume_block_idx), Operand::c32(id)); /* s_addc_u32 not needed because the program is in a 32-bit VA range */ diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp index 64cd68b1b9e..57a6b3c5d87 100644 --- a/src/amd/compiler/tests/test_assembler.cpp +++ b/src/amd/compiler/tests/test_assembler.cpp @@ -85,35 +85,39 @@ BEGIN_TEST(assembler.long_jump.unconditional_forwards) END_TEST BEGIN_TEST(assembler.long_jump.conditional_forwards) - if (!setup_cs(NULL, (amd_gfx_level)GFX10)) - return; + for (amd_gfx_level gfx : filter_gfx_levels({GFX10, GFX12})) { + if (!setup_cs(NULL, gfx)) + continue; - //! BB0: - //! s_cbranch_scc1 BB1 ; bf850006 - //! s_getpc_b64 s[0:1] ; be801f00 - //! s_addc_u32 s0, s0, 0x20014 ; 8200ff00 00020014 - //! s_bitcmp1_b32 s0, 0 ; bf0d8000 - //! s_bitset0_b32 s0, 0 ; be801b80 - //! s_setpc_b64 s[0:1] ; be802000 - bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2); + //! BB0: + //! s_cbranch_scc1 BB1 ; $_ + //! s_getpc_b64 s[0:1] ; $_ + //~gfx12! s_sext_i32_i16 s1, s1 ; $_ + //~gfx10! s_addc_u32 s0, s0, 0x20014 ; $_ $_ + //~gfx12! s_add_co_ci_u32 s0, s0, 0x20014 ; $_ $_ + //! s_bitcmp1_b32 s0, 0 ; $_ + //! s_bitset0_b32 s0, 0 ; $_ + //! s_setpc_b64 s[0:1] ; $_ + bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2); - bld.reset(program->create_and_insert_block()); + bld.reset(program->create_and_insert_block()); - //! BB1: - //! s_nop 0 ; bf800000 - //!(then repeated 32767 times) - for (unsigned i = 0; i < INT16_MAX + 1; i++) - bld.sopp(aco_opcode::s_nop, 0); + //! BB1: + //! s_nop 0 ; bf800000 + //!(then repeated 32767 times) + for (unsigned i = 0; i < INT16_MAX + 1; i++) + bld.sopp(aco_opcode::s_nop, 0); - //! BB2: - //! s_endpgm ; bf810000 - bld.reset(program->create_and_insert_block()); + //! BB2: + //! s_endpgm ; $_ + bld.reset(program->create_and_insert_block()); - program->blocks[1].linear_preds.push_back(0u); - program->blocks[2].linear_preds.push_back(0u); - program->blocks[2].linear_preds.push_back(1u); + program->blocks[1].linear_preds.push_back(0u); + program->blocks[2].linear_preds.push_back(0u); + program->blocks[2].linear_preds.push_back(1u); - finish_assembler_test(); + finish_assembler_test(); + } END_TEST BEGIN_TEST(assembler.long_jump.unconditional_backwards)