aco: use v_fma_mix for f2f32 and f2f16 on gfx11 if wave64
v_fma_mix can be dual issued, trade some code size for throughput. Foz-DB GFX1100: Totals from 8204 (6.08% of 134864) affected shaders: CodeSize: 89608584 -> 89693968 (+0.10%) Latency: 160744811 -> 160699309 (-0.03%); split: -0.03%, +0.00% InvThroughput: 19737977 -> 19678308 (-0.30%) Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21402>
This commit is contained in:
@@ -1383,14 +1383,15 @@ BEGIN_TEST(optimize.mad_mix.input_conv.modifiers)
|
||||
writeout(14, fmul(f2f32(ext_ushort(a, 1)), a));
|
||||
|
||||
//~gfx(9|10)! v1: %res15_cvt = v_cvt_f32_f16 %a dst_sel:uword0 src0_sel:dword
|
||||
//~gfx11! v1: %res16_cvt1 = v_cvt_f32_f16 %a
|
||||
//~gfx11! v1: %res16_cvt1 = v_fma_mix_f32 lo(%a), 1.0, -0
|
||||
//~gfx11! v1: %res15_cvt = p_extract %res16_cvt1, 0, 16, 0
|
||||
//! v1: %res15 = v_mul_f32 %res15_cvt, %a
|
||||
//! p_unit_test 15, %res15
|
||||
writeout(15, fmul(ext_ushort(f2f32(a), 0), a));
|
||||
|
||||
//! v1: %res16_cvt = v_cvt_f32_f16 %a
|
||||
//~gfx(9|10)! v1: %res16_cvt = v_cvt_f32_f16 %a
|
||||
//~gfx(9|10)! v1: %res16 = v_mul_f32 %res16_cvt, %a dst_sel:dword src0_sel:uword1 src1_sel:dword
|
||||
//~gfx11! v1: %res16_cvt = v_fma_mix_f32 lo(%a), 1.0, -0
|
||||
//~gfx11! v1: %res16_ext = p_extract %res16_cvt, 1, 16, 0
|
||||
//~gfx11! v1: %res16 = v_mul_f32 %res16_ext, %a
|
||||
//! p_unit_test 16, %res16
|
||||
|
||||
Reference in New Issue
Block a user