From 80652de67ba990cee16ee68f51d86a156ad40e2e Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 28 Mar 2024 14:19:46 +0100 Subject: [PATCH] aco/ra: use SDWA for 16bit instructions when the second byte is blocked Found by inspection, I think this can happen with pack_32_4x8(f2u8(a@16)), which will use v_cvt_u16_f16 (a 16bit instruction) with a v1b definition. No Foz-DB changes on Navi21. Cc: mesa-stable Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 40542b0187f..a0e3f94ce56 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -47,7 +47,8 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr& instr, unsigned idx RegClass rc); std::pair get_subdword_definition_info(Program* program, const aco_ptr& instr, RegClass rc); -void add_subdword_definition(Program* program, aco_ptr& instr, PhysReg reg); +void add_subdword_definition(Program* program, aco_ptr& instr, PhysReg reg, + bool allow_16bit_write); struct assignment { PhysReg reg; @@ -697,7 +698,8 @@ get_subdword_definition_info(Program* program, const aco_ptr& instr } void -add_subdword_definition(Program* program, aco_ptr& instr, PhysReg reg) +add_subdword_definition(Program* program, aco_ptr& instr, PhysReg reg, + bool allow_16bit_write) { if (instr->isPseudo()) return; @@ -706,7 +708,7 @@ add_subdword_definition(Program* program, aco_ptr& instr, PhysReg r amd_gfx_level gfx_level = program->gfx_level; assert(instr->definitions[0].bytes() <= 2); - if (reg.byte() == 0 && instr_is_16bit(gfx_level, instr->opcode)) + if (reg.byte() == 0 && allow_16bit_write && instr_is_16bit(gfx_level, instr->opcode)) return; /* use SDWA */ @@ -715,6 +717,8 @@ add_subdword_definition(Program* program, aco_ptr& instr, PhysReg r return; } + assert(allow_16bit_write); + if (instr->opcode == aco_opcode::v_fma_mixlo_f16) { instr->opcode = aco_opcode::v_fma_mixhi_f16; return; @@ -3230,7 +3234,8 @@ register_allocation(Program* program, live& live_vars, ra_test_policy policy) PhysReg reg = get_reg(ctx, register_file, tmp, parallelcopy, instr); definition->setFixed(reg); if (reg.byte() || register_file.test(reg, 4)) { - add_subdword_definition(program, instr, reg); + bool allow_16bit_write = reg.byte() % 2 == 0 && !register_file.test(reg, 2); + add_subdword_definition(program, instr, reg, allow_16bit_write); definition = &instr->definitions[i]; /* add_subdword_definition can invalidate the reference */ }