diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 8400a5dc6df..09551902ba8 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -155,7 +155,8 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO || program->family == CHIP_HAWAII) program->dev.has_fast_fma32 = true; - program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level >= GFX10; + program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level == GFX10; + program->dev.has_fmac_legacy32 = program->gfx_level >= GFX10_3 && program->gfx_level < GFX12; program->dev.fused_mad_mix = program->gfx_level >= GFX10; if (program->family == CHIP_VEGA12 || program->family == CHIP_VEGA20 || diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index fb62821398b..d1a2678e54e 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2011,6 +2011,7 @@ struct DeviceInfo { unsigned simd_per_cu; bool has_fast_fma32 = false; bool has_mac_legacy32 = false; + bool has_fmac_legacy32 = false; bool fused_mad_mix = false; bool xnack_enabled = false; bool sram_ecc_enabled = false; diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 6de9761e008..ddcdb3f5bae 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2641,11 +2641,15 @@ get_affinities(ra_ctx& ctx, std::vector& live_out_per_block) break; case aco_opcode::v_mad_legacy_f32: - case aco_opcode::v_fma_legacy_f32: if (instr->usesModifiers() || !ctx.program->dev.has_mac_legacy32) continue; op = instr->operands[2]; break; + case aco_opcode::v_fma_legacy_f32: + if (instr->usesModifiers() || !ctx.program->dev.has_fmac_legacy32) + continue; + op = instr->operands[2]; + break; default: continue; } @@ -2741,7 +2745,7 @@ optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_fil (instr->opcode != aco_opcode::v_fma_f16 || program->gfx_level < GFX10) && (instr->opcode != aco_opcode::v_pk_fma_f16 || program->gfx_level < GFX10) && (instr->opcode != aco_opcode::v_mad_legacy_f32 || !program->dev.has_mac_legacy32) && - (instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_mac_legacy32) && + (instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_fmac_legacy32) && (instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) || !instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() || instr->operands[2].getTemp().type() != RegType::vgpr ||