aco: use ac_hw_cache_flags

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29243>
This commit is contained in:
Rhys Perry
2024-05-14 18:34:01 +01:00
committed by Marge Bot
parent cdaf269924
commit b41f0f6cc1
9 changed files with 244 additions and 205 deletions
+64 -21
View File
@@ -411,13 +411,19 @@ BEGIN_TEST(assembler.smem)
//! s_load_b32 s4, s[16:17], s8 offset:0x2a ; f4000108 1000002a
bld.smem(aco_opcode::s_load_dword, dst, op_s2, Operand::c32(42), op_s1);
ac_hw_cache_flags cache_coherent;
ac_hw_cache_flags cache_non_temporal;
cache_coherent.value = ac_glc;
cache_non_temporal.value = ac_dlc;
//~gfx11! s_buffer_load_b32 s4, s[32:35], s8 glc ; f4204110 10000000
//~gfx12! s_buffer_load_b32 s4, s[32:35], s8 offset:0x0 scope:SCOPE_SYS ; f4620110 10000000
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().glc = true;
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().cache = cache_coherent;
//~gfx11! s_buffer_load_b32 s4, s[32:35], s8 dlc ; f4202110 10000000
//~gfx12! (then repeated 1 times)
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().dlc = true;
bld.smem(aco_opcode::s_buffer_load_dword, dst, op_s4, op_s1)->smem().cache =
cache_non_temporal;
finish_assembler_test();
}
@@ -482,22 +488,31 @@ BEGIN_TEST(assembler.mubuf)
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false);
/* Various flags */
ac_hw_cache_flags cache_coherent;
ac_hw_cache_flags cache_sys_coherent;
ac_hw_cache_flags cache_non_temporal;
ac_hw_cache_flags cache_atomic_rtn;
cache_coherent.value = ac_glc;
cache_sys_coherent.value = ac_slc;
cache_non_temporal.value = ac_dlc;
cache_atomic_rtn.value = ac_glc;
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 glc ; e0504000 80082a80
//~gfx12! buffer_load_b32 v42, off, s[32:35], null scope:SCOPE_SYS ; c405007c 008c402a 00000000
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.glc = true;
.cache = cache_coherent;
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 dlc ; e0502000 80082a80
//~gfx12! (then repeated 2 times)
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.dlc = true;
.cache = cache_non_temporal;
//~gfx11! buffer_load_b32 v42, off, s[32:35], 0 slc ; e0501000 80082a80
bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
->mubuf()
.slc = true;
.cache = cache_sys_coherent;
//; if llvm_ver >= 16 and variant == 'gfx11':
//; insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe ; e0500000 80282a80')
@@ -562,7 +577,7 @@ BEGIN_TEST(assembler.mubuf)
bld.mubuf(aco_opcode::buffer_atomic_add, Definition(op_v1.physReg(), v1), op_s4, Operand(v1),
Operand::zero(), op_v1, 0, false)
->mubuf()
.glc = true;
.cache = cache_atomic_rtn;
finish_assembler_test();
}
@@ -632,25 +647,32 @@ BEGIN_TEST(assembler.mtbuf)
false);
/* Various flags */
ac_hw_cache_flags cache_coherent;
ac_hw_cache_flags cache_sys_coherent;
ac_hw_cache_flags cache_non_temporal;
cache_coherent.value = ac_glc;
cache_sys_coherent.value = ac_slc;
cache_non_temporal.value = ac_dlc;
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
//~gfx12! tbuffer_load_format_x v42, off, s[32:35], null format:[BUF_FMT_32_32_FLOAT] scope:SCOPE_SYS ; c420007c 190c402a 00000080
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.glc = true;
.cache = cache_coherent;
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
//~gfx12! (then repeated 2 times)
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.dlc = true;
.cache = cache_non_temporal;
//~gfx11! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
nfmt, 0, false)
->mtbuf()
.slc = true;
.cache = cache_sys_coherent;
//; if llvm_ver >= 16 and variant == 'gfx11':
//; insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80')
@@ -718,19 +740,28 @@ BEGIN_TEST(assembler.mimg)
0x1;
/* Various flags */
ac_hw_cache_flags cache_coherent;
ac_hw_cache_flags cache_sys_coherent;
ac_hw_cache_flags cache_non_temporal;
ac_hw_cache_flags cache_atomic_rtn;
cache_coherent.value = ac_glc;
cache_sys_coherent.value = ac_slc;
cache_non_temporal.value = ac_dlc;
cache_atomic_rtn.value = ac_glc;
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D dlc ; f06c2f00 2010540a
//~gfx12! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D scope:SCOPE_SYS ; e7c6c000 100c8054 0000000a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().dlc =
true;
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().cache =
cache_non_temporal;
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D glc ; f06c4f00 2010540a
//~gfx12! (then repeated 2 times)
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().glc =
true;
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().cache =
cache_coherent;
//~gfx11! image_sample v[84:87], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D slc ; f06c1f00 2010540a
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().slc =
true;
bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1)->mimg().cache =
cache_sys_coherent;
//~gfx11! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; f06c0f00 2030540a
//~gfx12! image_sample v[84:88], v10, s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_1D tfe ; e7c6c008 10008054 0000000a
@@ -799,7 +830,7 @@ BEGIN_TEST(assembler.mimg)
//~gfx11! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D glc ; f0304f04 00100a14
//~gfx12! image_atomic_add_uint v10, [v20, v21, v0, v0], s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D th:TH_ATOMIC_RETURN ; d3c30001 0010800a 00001514
bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
op_v1, op_v2, 0xf, false, false, false, true)
op_v1, op_v2, 0xf, false, false, false, cache_atomic_rtn)
->mimg()
.dim = ac_image_2d;
@@ -876,16 +907,28 @@ BEGIN_TEST(assembler.flat)
bld.global(aco_opcode::global_load_dword, dst_v1, op_v2, Operand(s1), 84);
/* Various flags */
ac_hw_cache_flags cache_coherent;
ac_hw_cache_flags cache_sys_coherent;
ac_hw_cache_flags cache_non_temporal;
ac_hw_cache_flags cache_atomic_rtn;
cache_coherent.value = ac_glc;
cache_sys_coherent.value = ac_slc;
cache_non_temporal.value = ac_dlc;
cache_atomic_rtn.value = ac_glc;
//~gfx11! flat_load_b32 v42, v[20:21] slc ; dc508000 2a7c0014
//~gfx12! flat_load_b32 v42, v[20:21] scope:SCOPE_SYS ; ec05007c 000c002a 00000014
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().slc = true;
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().cache =
cache_sys_coherent;
//~gfx11! flat_load_b32 v42, v[20:21] glc ; dc504000 2a7c0014
//~gfx12! (then repeated 2 times)
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().glc = true;
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().cache =
cache_coherent;
//~gfx11! flat_load_b32 v42, v[20:21] dlc ; dc502000 2a7c0014
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().dlc = true;
bld.flat(aco_opcode::flat_load_dword, dst_v1, op_v2, Operand(s1))->flat().cache =
cache_non_temporal;
/* Stores */
//~gfx11! flat_store_b32 v[20:21], v10 ; dc680000 007c0a14
@@ -895,8 +938,8 @@ BEGIN_TEST(assembler.flat)
/* Atomic with return */
//~gfx11! global_atomic_add_u32 v42, v[20:21], v10, off glc ; dcd64000 2a7c0a14
//~gfx12! global_atomic_add_u32 v42, v[20:21], v10, off th:TH_ATOMIC_RETURN ; ee0d407c 0510002a 00000014
bld.global(aco_opcode::global_atomic_add, dst_v1, op_v2, Operand(s1), op_v1)->global().glc =
true;
bld.global(aco_opcode::global_atomic_add, dst_v1, op_v2, Operand(s1), op_v1)->global().cache =
cache_atomic_rtn;
finish_assembler_test();
}