Files
mesa/src
Marek Olšák 2596ae2b6e radeonsi: emit PS exports last
This effectively removes s_waitcnt instructions after FP16 exports.

Before:

    v_cvt_pkrtz_f16_f32_e32 v0, v0, v1   ; 5E000300
    v_cvt_pkrtz_f16_f32_e32 v1, v2, v3   ; 5E020702
    exp 15, 0, 1, 0, 0, v0, v1, v0, v0   ; F800040F 00000100
    s_waitcnt expcnt(0)                  ; BF8C0F0F
    v_cvt_pkrtz_f16_f32_e32 v0, v4, v5   ; 5E000B04
    v_cvt_pkrtz_f16_f32_e32 v1, v6, v7   ; 5E020F06
    exp 15, 1, 1, 0, 0, v0, v1, v0, v0   ; F800041F 00000100
    s_waitcnt expcnt(0)                  ; BF8C0F0F
    v_cvt_pkrtz_f16_f32_e32 v0, v8, v9   ; 5E001308
    v_cvt_pkrtz_f16_f32_e32 v1, v10, v11 ; 5E02170A
    exp 15, 2, 1, 0, 0, v0, v1, v0, v0   ; F800042F 00000100
    s_waitcnt expcnt(0)                  ; BF8C0F0F
    v_cvt_pkrtz_f16_f32_e32 v0, v12, v13 ; 5E001B0C
    v_cvt_pkrtz_f16_f32_e32 v1, v14, v15 ; 5E021F0E
    exp 15, 3, 1, 1, 1, v0, v1, v0, v0   ; F8001C3F 00000100
    s_endpgm                             ; BF810000

After:

    v_cvt_pkrtz_f16_f32_e32 v0, v0, v1   ; 5E000300
    v_cvt_pkrtz_f16_f32_e32 v1, v2, v3   ; 5E020702
    v_cvt_pkrtz_f16_f32_e32 v2, v4, v5   ; 5E040B04
    v_cvt_pkrtz_f16_f32_e32 v3, v6, v7   ; 5E060F06
    exp 15, 0, 1, 0, 0, v0, v1, v0, v0   ; F800040F 00000100
    v_cvt_pkrtz_f16_f32_e32 v4, v8, v9   ; 5E081308
    v_cvt_pkrtz_f16_f32_e32 v5, v10, v11 ; 5E0A170A
    exp 15, 1, 1, 0, 0, v2, v3, v0, v0   ; F800041F 00000302
    v_cvt_pkrtz_f16_f32_e32 v6, v12, v13 ; 5E0C1B0C
    v_cvt_pkrtz_f16_f32_e32 v7, v14, v15 ; 5E0E1F0E
    exp 15, 2, 1, 0, 0, v4, v5, v0, v0   ; F800042F 00000504
    exp 15, 3, 1, 1, 1, v6, v7, v0, v0   ; F8001C3F 00000706
    s_endpgm                             ; BF810000

Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
2016-07-19 23:45:06 +02:00
..
2016-07-19 23:45:06 +02:00
2016-06-23 13:55:03 -07:00
2016-07-08 16:46:17 -07:00