From cfb745592d7d1a6e69a062a2ef781f96fcf59894 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 27 Nov 2025 12:53:52 +0100 Subject: [PATCH] amd: add ac_cu_info::has_mad32 flag and use in ACO Part-of: --- src/amd/common/ac_gpu_info.c | 1 + src/amd/common/ac_gpu_info.h | 2 ++ src/amd/compiler/aco_ir.cpp | 1 + src/amd/compiler/aco_ir.h | 1 + src/amd/compiler/aco_optimizer.cpp | 3 +-- 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 247077e481c..feac58e7d35 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -310,6 +310,7 @@ ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_ info->family == CHIP_VEGA12 || info->family == CHIP_VEGA20 || info->family == CHIP_MI100 || info->family == CHIP_MI200 || info->family == CHIP_GFX940; + cu_info->has_mad32 = info->gfx_level == GFX9 ? info->family <= CHIP_MI200 : info->gfx_level < GFX10_3; cu_info->has_packed_math_16bit = info->gfx_level >= GFX9; cu_info->has_accelerated_dot_product = info->family == CHIP_VEGA20 || diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 1e95c9e2aa8..6149929c95a 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -52,6 +52,8 @@ struct ac_cu_info { * Otherwise, unfused v_mad_mix* is available on GFX9. */ bool has_fma_mix : 1; + /* Whether chips support unfused multiply-add instructions. */ + bool has_mad32 : 1; /* Whether chips support double rate packed math instructions. */ bool has_packed_math_16bit : 1; /* Whether chips support dot product instructions. A subset of these support a smaller diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index a1d3ea20c49..bd5d4a7e90b 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -112,6 +112,7 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info, program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level == GFX10; program->dev.has_fmac_legacy32 = program->gfx_level >= GFX10_3 && program->gfx_level < GFX12; program->dev.fused_mad_mix = options->cu_info->has_fma_mix; + program->dev.has_mad32 = options->cu_info->has_mad32; if (program->gfx_level >= GFX12) { program->dev.scratch_global_offset_min = -8388608; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index c432d37f230..77639517f96 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2245,6 +2245,7 @@ struct DeviceInfo { bool has_fast_fma32 = false; bool has_mac_legacy32 = false; bool has_fmac_legacy32 = false; + bool has_mad32 = false; bool fused_mad_mix = false; bool xnack_enabled = false; bool sram_ecc_enabled = false; diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 224f8bcce39..00f82aa5532 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -4202,8 +4202,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) combine_instr_pattern{aco_opcode::src_op, aco_opcode::res_op, mask, swizzle, __VA_ARGS__}) if (info.opcode == aco_opcode::v_add_f32) { - if (ctx.program->gfx_level < GFX10_3 && ctx.program->family != CHIP_GFX940 && - ctx.fp_mode.denorm32 == 0) { + if (ctx.program->dev.has_mad32 && ctx.fp_mode.denorm32 == 0) { add_opt(v_mul_f32, v_mad_f32, 0x3, "120"); add_opt(v_mul_legacy_f32, v_mad_legacy_f32, 0x3, "120"); }