amd: add ac_cu_info::has_mad32 flag and use in ACO
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38701>
This commit is contained in:
committed by
Marge Bot
parent
1e3db50170
commit
cfb745592d
@@ -310,6 +310,7 @@ ac_fill_cu_info(struct radeon_info *info, struct drm_amdgpu_info_device *device_
|
||||
info->family == CHIP_VEGA12 || info->family == CHIP_VEGA20 ||
|
||||
info->family == CHIP_MI100 || info->family == CHIP_MI200 ||
|
||||
info->family == CHIP_GFX940;
|
||||
cu_info->has_mad32 = info->gfx_level == GFX9 ? info->family <= CHIP_MI200 : info->gfx_level < GFX10_3;
|
||||
cu_info->has_packed_math_16bit = info->gfx_level >= GFX9;
|
||||
cu_info->has_accelerated_dot_product =
|
||||
info->family == CHIP_VEGA20 ||
|
||||
|
||||
@@ -52,6 +52,8 @@ struct ac_cu_info {
|
||||
* Otherwise, unfused v_mad_mix* is available on GFX9.
|
||||
*/
|
||||
bool has_fma_mix : 1;
|
||||
/* Whether chips support unfused multiply-add instructions. */
|
||||
bool has_mad32 : 1;
|
||||
/* Whether chips support double rate packed math instructions. */
|
||||
bool has_packed_math_16bit : 1;
|
||||
/* Whether chips support dot product instructions. A subset of these support a smaller
|
||||
|
||||
@@ -112,6 +112,7 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
||||
program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level == GFX10;
|
||||
program->dev.has_fmac_legacy32 = program->gfx_level >= GFX10_3 && program->gfx_level < GFX12;
|
||||
program->dev.fused_mad_mix = options->cu_info->has_fma_mix;
|
||||
program->dev.has_mad32 = options->cu_info->has_mad32;
|
||||
|
||||
if (program->gfx_level >= GFX12) {
|
||||
program->dev.scratch_global_offset_min = -8388608;
|
||||
|
||||
@@ -2245,6 +2245,7 @@ struct DeviceInfo {
|
||||
bool has_fast_fma32 = false;
|
||||
bool has_mac_legacy32 = false;
|
||||
bool has_fmac_legacy32 = false;
|
||||
bool has_mad32 = false;
|
||||
bool fused_mad_mix = false;
|
||||
bool xnack_enabled = false;
|
||||
bool sram_ecc_enabled = false;
|
||||
|
||||
@@ -4202,8 +4202,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
combine_instr_pattern{aco_opcode::src_op, aco_opcode::res_op, mask, swizzle, __VA_ARGS__})
|
||||
|
||||
if (info.opcode == aco_opcode::v_add_f32) {
|
||||
if (ctx.program->gfx_level < GFX10_3 && ctx.program->family != CHIP_GFX940 &&
|
||||
ctx.fp_mode.denorm32 == 0) {
|
||||
if (ctx.program->dev.has_mad32 && ctx.fp_mode.denorm32 == 0) {
|
||||
add_opt(v_mul_f32, v_mad_f32, 0x3, "120");
|
||||
add_opt(v_mul_legacy_f32, v_mad_legacy_f32, 0x3, "120");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user