From c8123b67e04ca6b7f6afd8dc1303ea797c7aa028 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 17 May 2024 19:14:37 +0100 Subject: [PATCH] aco/gfx12: don't create v_fmac_legacy_f32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_ir.cpp | 3 ++- src/amd/compiler/aco_ir.h | 1 + src/amd/compiler/aco_register_allocation.cpp | 8 ++++++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 8400a5dc6df..09551902ba8 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -155,7 +155,8 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO || program->family == CHIP_HAWAII) program->dev.has_fast_fma32 = true; - program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level >= GFX10; + program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level == GFX10; + program->dev.has_fmac_legacy32 = program->gfx_level >= GFX10_3 && program->gfx_level < GFX12; program->dev.fused_mad_mix = program->gfx_level >= GFX10; if (program->family == CHIP_VEGA12 || program->family == CHIP_VEGA20 || diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index fb62821398b..d1a2678e54e 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2011,6 +2011,7 @@ struct DeviceInfo { unsigned simd_per_cu; bool has_fast_fma32 = false; bool has_mac_legacy32 = false; + bool has_fmac_legacy32 = false; bool fused_mad_mix = false; bool xnack_enabled = false; bool sram_ecc_enabled = false; diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 6de9761e008..ddcdb3f5bae 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2641,11 +2641,15 @@ get_affinities(ra_ctx& ctx, std::vector& live_out_per_block) break; case aco_opcode::v_mad_legacy_f32: - case aco_opcode::v_fma_legacy_f32: if (instr->usesModifiers() || !ctx.program->dev.has_mac_legacy32) continue; op = instr->operands[2]; break; + case aco_opcode::v_fma_legacy_f32: + if (instr->usesModifiers() || !ctx.program->dev.has_fmac_legacy32) + continue; + op = instr->operands[2]; + break; default: continue; } @@ -2741,7 +2745,7 @@ optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_fil (instr->opcode != aco_opcode::v_fma_f16 || program->gfx_level < GFX10) && (instr->opcode != aco_opcode::v_pk_fma_f16 || program->gfx_level < GFX10) && (instr->opcode != aco_opcode::v_mad_legacy_f32 || !program->dev.has_mac_legacy32) && - (instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_mac_legacy32) && + (instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_fmac_legacy32) && (instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) || !instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() || instr->operands[2].getTemp().type() != RegType::vgpr ||