From 5d5b1fc4722fa8db9b74b20d113c3f85d3f6bcb9 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Fri, 22 Jan 2021 19:51:56 +0200 Subject: [PATCH] freedreno/ir3: add a6xx global atomics and separate atomic opcodes Separating atomic opcodes makes possible to express a6xx global atomics which take iova in SRC1. They would be needed by VK_KHR_buffer_device_address. The change also makes easier to distiguish atomics in conditions. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/.gitlab-ci/reference/crash.log | 12 +- src/freedreno/ir3/disasm-a3xx.c | 33 ++++ src/freedreno/ir3/instr-a3xx.h | 104 ++++++++++- src/freedreno/ir3/ir3.c | 5 +- src/freedreno/ir3/ir3.h | 68 ++++---- src/freedreno/ir3/ir3_a4xx.c | 36 ++-- src/freedreno/ir3/ir3_a6xx.c | 36 ++-- src/freedreno/ir3/ir3_legalize.c | 15 +- src/freedreno/ir3/ir3_lexer.l | 23 +++ src/freedreno/ir3/ir3_parser.y | 81 +++++++-- src/freedreno/ir3/tests/disasm.c | 11 +- src/freedreno/isa/encode.c | 5 +- src/freedreno/isa/ir3-cat6.xml | 173 ++++++++++++++++--- 13 files changed, 465 insertions(+), 137 deletions(-) diff --git a/src/freedreno/.gitlab-ci/reference/crash.log b/src/freedreno/.gitlab-ci/reference/crash.log index e26db4d25f3..45edfb3aeb7 100644 --- a/src/freedreno/.gitlab-ci/reference/crash.log +++ b/src/freedreno/.gitlab-ci/reference/crash.log @@ -4638,12 +4638,12 @@ shader-blocks: size: 2048 :0:0000:0000[00000000x_00003002x] nop :0:0001:0001[00000000x_00000000x] nop - :6:0002:0002[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000 - :6:0003:0003[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000 - :6:0004:0004[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000 - :6:0005:0005[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000 - :6:0006:0006[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000 - :6:0007:0007[deadbeefx_deadbeefx] (sy)(jp)atomic.xor.typed.4d.u8.4.l r59.w, l[r45.z], 222 ; dontcare bits in atomic.xor: 00000000000000ee, WARNING: unexpected bits[41:48] in #instruction-cat6-a3xx-atomic: 00000000000000df vs 0000000000000000, WARNING: unexpected bits[53:53] in #instruction-cat6-a3xx-atomic: 0000000000000001 vs 0000000000000000 + :6:0002:0002[deadbeefx_deadbeefx] no match: deadbeefdeadbeef + :6:0003:0003[deadbeefx_deadbeefx] no match: deadbeefdeadbeef + :6:0004:0004[deadbeefx_deadbeefx] no match: deadbeefdeadbeef + :6:0005:0005[deadbeefx_deadbeefx] no match: deadbeefdeadbeef + :6:0006:0006[deadbeefx_deadbeefx] no match: deadbeefdeadbeef + :6:0007:0007[deadbeefx_deadbeefx] no match: deadbeefdeadbeef ----------------------------------------------- 8192 (0x2000) bytes 000000: 00003002 00000000 00000000 00000000 |.0..............| diff --git a/src/freedreno/ir3/disasm-a3xx.c b/src/freedreno/ir3/disasm-a3xx.c index 3044fe145de..24b3805085c 100644 --- a/src/freedreno/ir3/disasm-a3xx.c +++ b/src/freedreno/ir3/disasm-a3xx.c @@ -333,6 +333,39 @@ static const struct opc_info { OPC(6, OPC_ATOMIC_AND, atomic.and), OPC(6, OPC_ATOMIC_OR, atomic.or), OPC(6, OPC_ATOMIC_XOR, atomic.xor), + OPC(6, OPC_ATOMIC_B_ADD, atomic.b.add), + OPC(6, OPC_ATOMIC_B_SUB, atomic.b.sub), + OPC(6, OPC_ATOMIC_B_XCHG, atomic.b.xchg), + OPC(6, OPC_ATOMIC_B_INC, atomic.b.inc), + OPC(6, OPC_ATOMIC_B_DEC, atomic.b.dec), + OPC(6, OPC_ATOMIC_B_CMPXCHG, atomic.b.cmpxchg), + OPC(6, OPC_ATOMIC_B_MIN, atomic.b.min), + OPC(6, OPC_ATOMIC_B_MAX, atomic.b.max), + OPC(6, OPC_ATOMIC_B_AND, atomic.b.and), + OPC(6, OPC_ATOMIC_B_OR, atomic.b.or), + OPC(6, OPC_ATOMIC_B_XOR, atomic.b.xor), + OPC(6, OPC_ATOMIC_S_ADD, atomic.s.add), + OPC(6, OPC_ATOMIC_S_SUB, atomic.s.sub), + OPC(6, OPC_ATOMIC_S_XCHG, atomic.s.xchg), + OPC(6, OPC_ATOMIC_S_INC, atomic.s.inc), + OPC(6, OPC_ATOMIC_S_DEC, atomic.s.dec), + OPC(6, OPC_ATOMIC_S_CMPXCHG, atomic.s.cmpxchg), + OPC(6, OPC_ATOMIC_S_MIN, atomic.s.min), + OPC(6, OPC_ATOMIC_S_MAX, atomic.s.max), + OPC(6, OPC_ATOMIC_S_AND, atomic.s.and), + OPC(6, OPC_ATOMIC_S_OR, atomic.s.or), + OPC(6, OPC_ATOMIC_S_XOR, atomic.s.xor), + OPC(6, OPC_ATOMIC_G_ADD, atomic.g.add), + OPC(6, OPC_ATOMIC_G_SUB, atomic.g.sub), + OPC(6, OPC_ATOMIC_G_XCHG, atomic.g.xchg), + OPC(6, OPC_ATOMIC_G_INC, atomic.g.inc), + OPC(6, OPC_ATOMIC_G_DEC, atomic.g.dec), + OPC(6, OPC_ATOMIC_G_CMPXCHG, atomic.g.cmpxchg), + OPC(6, OPC_ATOMIC_G_MIN, atomic.g.min), + OPC(6, OPC_ATOMIC_G_MAX, atomic.g.max), + OPC(6, OPC_ATOMIC_G_AND, atomic.g.and), + OPC(6, OPC_ATOMIC_G_OR, atomic.g.or), + OPC(6, OPC_ATOMIC_G_XOR, atomic.g.xor), OPC(6, OPC_LDGB, ldgb), OPC(6, OPC_STGB, stgb), OPC(6, OPC_STIB, stib), diff --git a/src/freedreno/ir3/instr-a3xx.h b/src/freedreno/ir3/instr-a3xx.h index 8957182b2aa..8a85f575ddb 100644 --- a/src/freedreno/ir3/instr-a3xx.h +++ b/src/freedreno/ir3/instr-a3xx.h @@ -306,11 +306,35 @@ typedef enum { OPC_ATOMIC_B_OR = _OPC(6, 53), OPC_ATOMIC_B_XOR = _OPC(6, 54), - OPC_LDG_A = _OPC(6, 55), - OPC_STG_A = _OPC(6, 56), + OPC_ATOMIC_S_ADD = _OPC(6, 55), + OPC_ATOMIC_S_SUB = _OPC(6, 56), + OPC_ATOMIC_S_XCHG = _OPC(6, 57), + OPC_ATOMIC_S_INC = _OPC(6, 58), + OPC_ATOMIC_S_DEC = _OPC(6, 59), + OPC_ATOMIC_S_CMPXCHG = _OPC(6, 60), + OPC_ATOMIC_S_MIN = _OPC(6, 61), + OPC_ATOMIC_S_MAX = _OPC(6, 62), + OPC_ATOMIC_S_AND = _OPC(6, 63), + OPC_ATOMIC_S_OR = _OPC(6, 64), + OPC_ATOMIC_S_XOR = _OPC(6, 65), - OPC_SPILL_MACRO = _OPC(6, 57), - OPC_RELOAD_MACRO = _OPC(6, 58), + OPC_ATOMIC_G_ADD = _OPC(6, 66), + OPC_ATOMIC_G_SUB = _OPC(6, 67), + OPC_ATOMIC_G_XCHG = _OPC(6, 68), + OPC_ATOMIC_G_INC = _OPC(6, 69), + OPC_ATOMIC_G_DEC = _OPC(6, 70), + OPC_ATOMIC_G_CMPXCHG = _OPC(6, 71), + OPC_ATOMIC_G_MIN = _OPC(6, 72), + OPC_ATOMIC_G_MAX = _OPC(6, 73), + OPC_ATOMIC_G_AND = _OPC(6, 74), + OPC_ATOMIC_G_OR = _OPC(6, 75), + OPC_ATOMIC_G_XOR = _OPC(6, 76), + + OPC_LDG_A = _OPC(6, 77), + OPC_STG_A = _OPC(6, 78), + + OPC_SPILL_MACRO = _OPC(6, 79), + OPC_RELOAD_MACRO = _OPC(6, 80), /* category 7: */ OPC_BAR = _OPC(7, 0), @@ -592,7 +616,7 @@ is_madsh(opc_t opc) } static inline bool -is_atomic(opc_t opc) +is_local_atomic(opc_t opc) { switch (opc) { case OPC_ATOMIC_ADD: @@ -612,6 +636,76 @@ is_atomic(opc_t opc) } } +static inline bool +is_global_a3xx_atomic(opc_t opc) +{ + switch (opc) { + case OPC_ATOMIC_S_ADD: + case OPC_ATOMIC_S_SUB: + case OPC_ATOMIC_S_XCHG: + case OPC_ATOMIC_S_INC: + case OPC_ATOMIC_S_DEC: + case OPC_ATOMIC_S_CMPXCHG: + case OPC_ATOMIC_S_MIN: + case OPC_ATOMIC_S_MAX: + case OPC_ATOMIC_S_AND: + case OPC_ATOMIC_S_OR: + case OPC_ATOMIC_S_XOR: + return true; + default: + return false; + } +} + +static inline bool +is_global_a6xx_atomic(opc_t opc) +{ + switch (opc) { + case OPC_ATOMIC_G_ADD: + case OPC_ATOMIC_G_SUB: + case OPC_ATOMIC_G_XCHG: + case OPC_ATOMIC_G_INC: + case OPC_ATOMIC_G_DEC: + case OPC_ATOMIC_G_CMPXCHG: + case OPC_ATOMIC_G_MIN: + case OPC_ATOMIC_G_MAX: + case OPC_ATOMIC_G_AND: + case OPC_ATOMIC_G_OR: + case OPC_ATOMIC_G_XOR: + return true; + default: + return false; + } +} + +static inline bool +is_bindless_atomic(opc_t opc) +{ + switch (opc) { + case OPC_ATOMIC_B_ADD: + case OPC_ATOMIC_B_SUB: + case OPC_ATOMIC_B_XCHG: + case OPC_ATOMIC_B_INC: + case OPC_ATOMIC_B_DEC: + case OPC_ATOMIC_B_CMPXCHG: + case OPC_ATOMIC_B_MIN: + case OPC_ATOMIC_B_MAX: + case OPC_ATOMIC_B_AND: + case OPC_ATOMIC_B_OR: + case OPC_ATOMIC_B_XOR: + return true; + default: + return false; + } +} + +static inline bool +is_atomic(opc_t opc) +{ + return is_local_atomic(opc) || is_global_a3xx_atomic(opc) || + is_global_a6xx_atomic(opc) || is_bindless_atomic(opc); +} + static inline bool is_ssbo(opc_t opc) { diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index db116f01017..766a7adec7b 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -952,10 +952,11 @@ ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags) /* disallow immediates in anything but the SSBO slot argument for * cat6 instructions: */ - if (is_atomic(instr->opc) && (n != 0)) + if (is_global_a3xx_atomic(instr->opc) && (n != 0)) return false; - if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G)) + if (is_local_atomic(instr->opc) || is_global_a6xx_atomic(instr->opc) || + is_bindless_atomic(instr->opc)) return false; if (instr->opc == OPC_STG && (n == 2)) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index c5470d28ad4..25a5f36731f 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -282,19 +282,18 @@ struct ir3_instruction { IR3_INSTR_P = 0x080, IR3_INSTR_S = 0x100, IR3_INSTR_S2EN = 0x200, - IR3_INSTR_G = 0x400, - IR3_INSTR_SAT = 0x800, + IR3_INSTR_SAT = 0x400, /* (cat5/cat6) Bindless */ - IR3_INSTR_B = 0x1000, + IR3_INSTR_B = 0x800, /* (cat5/cat6) nonuniform */ - IR3_INSTR_NONUNIF = 0x02000, + IR3_INSTR_NONUNIF = 0x1000, /* (cat5-only) Get some parts of the encoding from a1.x */ - IR3_INSTR_A1EN = 0x04000, + IR3_INSTR_A1EN = 0x02000, /* meta-flags, for intermediate stages of IR, ie. * before register assignment is done: */ - IR3_INSTR_MARK = 0x08000, - IR3_INSTR_UNUSED = 0x10000, + IR3_INSTR_MARK = 0x04000, + IR3_INSTR_UNUSED = 0x08000, } flags; uint8_t repeat; uint8_t nop; @@ -2183,17 +2182,28 @@ INSTR3NODST(STIB); INSTR2(LDIB); INSTR5(LDG_A); INSTR6NODST(STG_A); -INSTR3F(G, ATOMIC_ADD) -INSTR3F(G, ATOMIC_SUB) -INSTR3F(G, ATOMIC_XCHG) -INSTR3F(G, ATOMIC_INC) -INSTR3F(G, ATOMIC_DEC) -INSTR3F(G, ATOMIC_CMPXCHG) -INSTR3F(G, ATOMIC_MIN) -INSTR3F(G, ATOMIC_MAX) -INSTR3F(G, ATOMIC_AND) -INSTR3F(G, ATOMIC_OR) -INSTR3F(G, ATOMIC_XOR) +INSTR2(ATOMIC_G_ADD) +INSTR2(ATOMIC_G_SUB) +INSTR2(ATOMIC_G_XCHG) +INSTR2(ATOMIC_G_INC) +INSTR2(ATOMIC_G_DEC) +INSTR2(ATOMIC_G_CMPXCHG) +INSTR2(ATOMIC_G_MIN) +INSTR2(ATOMIC_G_MAX) +INSTR2(ATOMIC_G_AND) +INSTR2(ATOMIC_G_OR) +INSTR2(ATOMIC_G_XOR) +INSTR3(ATOMIC_B_ADD) +INSTR3(ATOMIC_B_SUB) +INSTR3(ATOMIC_B_XCHG) +INSTR3(ATOMIC_B_INC) +INSTR3(ATOMIC_B_DEC) +INSTR3(ATOMIC_B_CMPXCHG) +INSTR3(ATOMIC_B_MIN) +INSTR3(ATOMIC_B_MAX) +INSTR3(ATOMIC_B_AND) +INSTR3(ATOMIC_B_OR) +INSTR3(ATOMIC_B_XOR) #elif GPU >= 400 INSTR3(LDGB) #if GPU >= 500 @@ -2201,17 +2211,17 @@ INSTR3(LDIB) #endif INSTR4NODST(STGB) INSTR4NODST(STIB) -INSTR4F(G, ATOMIC_ADD) -INSTR4F(G, ATOMIC_SUB) -INSTR4F(G, ATOMIC_XCHG) -INSTR4F(G, ATOMIC_INC) -INSTR4F(G, ATOMIC_DEC) -INSTR4F(G, ATOMIC_CMPXCHG) -INSTR4F(G, ATOMIC_MIN) -INSTR4F(G, ATOMIC_MAX) -INSTR4F(G, ATOMIC_AND) -INSTR4F(G, ATOMIC_OR) -INSTR4F(G, ATOMIC_XOR) +INSTR4(ATOMIC_S_ADD) +INSTR4(ATOMIC_S_SUB) +INSTR4(ATOMIC_S_XCHG) +INSTR4(ATOMIC_S_INC) +INSTR4(ATOMIC_S_DEC) +INSTR4(ATOMIC_S_CMPXCHG) +INSTR4(ATOMIC_S_MIN) +INSTR4(ATOMIC_S_MAX) +INSTR4(ATOMIC_S_AND) +INSTR4(ATOMIC_S_OR) +INSTR4(ATOMIC_S_XOR) #endif /* cat7 instructions: */ diff --git a/src/freedreno/ir3/ir3_a4xx.c b/src/freedreno/ir3/ir3_a4xx.c index 2339fa3d67a..1b69a6c1fe3 100644 --- a/src/freedreno/ir3/ir3_a4xx.c +++ b/src/freedreno/ir3/ir3_a4xx.c @@ -135,39 +135,39 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_ssbo_atomic_add_ir3: - atomic = ir3_ATOMIC_ADD_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); + atomic = ir3_ATOMIC_S_ADD(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); break; case nir_intrinsic_ssbo_atomic_imin_ir3: - atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); + atomic = ir3_ATOMIC_S_MIN(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); type = TYPE_S32; break; case nir_intrinsic_ssbo_atomic_umin_ir3: - atomic = ir3_ATOMIC_MIN_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); + atomic = ir3_ATOMIC_S_MIN(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); break; case nir_intrinsic_ssbo_atomic_imax_ir3: - atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); + atomic = ir3_ATOMIC_S_MAX(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); type = TYPE_S32; break; case nir_intrinsic_ssbo_atomic_umax_ir3: - atomic = ir3_ATOMIC_MAX_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); + atomic = ir3_ATOMIC_S_MAX(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); break; case nir_intrinsic_ssbo_atomic_and_ir3: - atomic = ir3_ATOMIC_AND_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); + atomic = ir3_ATOMIC_S_AND(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); break; case nir_intrinsic_ssbo_atomic_or_ir3: - atomic = ir3_ATOMIC_OR_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); + atomic = ir3_ATOMIC_S_OR(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); break; case nir_intrinsic_ssbo_atomic_xor_ir3: - atomic = ir3_ATOMIC_XOR_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); + atomic = ir3_ATOMIC_S_XOR(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); break; case nir_intrinsic_ssbo_atomic_exchange_ir3: - atomic = ir3_ATOMIC_XCHG_G(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); + atomic = ir3_ATOMIC_S_XCHG(b, ssbo, 0, data, 0, src3, 0, byte_offset, 0); break; case nir_intrinsic_ssbo_atomic_comp_swap_ir3: /* for cmpxchg, src0 is [ui]vec2(data, compare): */ data = ir3_collect(b, src3, data); struct ir3_instruction *dword_offset = ir3_get_src(ctx, &intr->src[4])[0]; - atomic = ir3_ATOMIC_CMPXCHG_G(b, ssbo, 0, data, 0, dword_offset, 0, + atomic = ir3_ATOMIC_S_CMPXCHG(b, ssbo, 0, data, 0, dword_offset, 0, byte_offset, 0); break; default: @@ -311,32 +311,32 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_image_atomic_add: - atomic = ir3_ATOMIC_ADD_G(b, image, 0, src0, 0, src1, 0, src2, 0); + atomic = ir3_ATOMIC_S_ADD(b, image, 0, src0, 0, src1, 0, src2, 0); break; case nir_intrinsic_image_atomic_imin: case nir_intrinsic_image_atomic_umin: - atomic = ir3_ATOMIC_MIN_G(b, image, 0, src0, 0, src1, 0, src2, 0); + atomic = ir3_ATOMIC_S_MIN(b, image, 0, src0, 0, src1, 0, src2, 0); break; case nir_intrinsic_image_atomic_imax: case nir_intrinsic_image_atomic_umax: - atomic = ir3_ATOMIC_MAX_G(b, image, 0, src0, 0, src1, 0, src2, 0); + atomic = ir3_ATOMIC_S_MAX(b, image, 0, src0, 0, src1, 0, src2, 0); break; case nir_intrinsic_image_atomic_and: - atomic = ir3_ATOMIC_AND_G(b, image, 0, src0, 0, src1, 0, src2, 0); + atomic = ir3_ATOMIC_S_AND(b, image, 0, src0, 0, src1, 0, src2, 0); break; case nir_intrinsic_image_atomic_or: - atomic = ir3_ATOMIC_OR_G(b, image, 0, src0, 0, src1, 0, src2, 0); + atomic = ir3_ATOMIC_S_OR(b, image, 0, src0, 0, src1, 0, src2, 0); break; case nir_intrinsic_image_atomic_xor: - atomic = ir3_ATOMIC_XOR_G(b, image, 0, src0, 0, src1, 0, src2, 0); + atomic = ir3_ATOMIC_S_XOR(b, image, 0, src0, 0, src1, 0, src2, 0); break; case nir_intrinsic_image_atomic_exchange: - atomic = ir3_ATOMIC_XCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0); + atomic = ir3_ATOMIC_S_XCHG(b, image, 0, src0, 0, src1, 0, src2, 0); break; case nir_intrinsic_image_atomic_comp_swap: /* for cmpxchg, src0 is [ui]vec2(data, compare): */ src0 = ir3_collect(b, ir3_get_src(ctx, &intr->src[4])[0], src0); - atomic = ir3_ATOMIC_CMPXCHG_G(b, image, 0, src0, 0, src1, 0, src2, 0); + atomic = ir3_ATOMIC_S_CMPXCHG(b, image, 0, src0, 0, src1, 0, src2, 0); break; default: unreachable("boo"); diff --git a/src/freedreno/ir3/ir3_a6xx.c b/src/freedreno/ir3/ir3_a6xx.c index 04efc0adfe0..70a4752417b 100644 --- a/src/freedreno/ir3/ir3_a6xx.c +++ b/src/freedreno/ir3/ir3_a6xx.c @@ -144,36 +144,36 @@ emit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_ssbo_atomic_add_ir3: - atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_ssbo_atomic_imin_ir3: - atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0); type = TYPE_S32; break; case nir_intrinsic_ssbo_atomic_umin_ir3: - atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_ssbo_atomic_imax_ir3: - atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0); type = TYPE_S32; break; case nir_intrinsic_ssbo_atomic_umax_ir3: - atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_ssbo_atomic_and_ir3: - atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_ssbo_atomic_or_ir3: - atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_ssbo_atomic_xor_ir3: - atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_ssbo_atomic_exchange_ir3: - atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_ssbo_atomic_comp_swap_ir3: - atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0); break; default: unreachable("boo"); @@ -288,39 +288,39 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) switch (intr->intrinsic) { case nir_intrinsic_image_atomic_add: case nir_intrinsic_bindless_image_atomic_add: - atomic = ir3_ATOMIC_ADD_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_image_atomic_imin: case nir_intrinsic_image_atomic_umin: case nir_intrinsic_bindless_image_atomic_imin: case nir_intrinsic_bindless_image_atomic_umin: - atomic = ir3_ATOMIC_MIN_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_image_atomic_imax: case nir_intrinsic_image_atomic_umax: case nir_intrinsic_bindless_image_atomic_imax: case nir_intrinsic_bindless_image_atomic_umax: - atomic = ir3_ATOMIC_MAX_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_image_atomic_and: case nir_intrinsic_bindless_image_atomic_and: - atomic = ir3_ATOMIC_AND_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_image_atomic_or: case nir_intrinsic_bindless_image_atomic_or: - atomic = ir3_ATOMIC_OR_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_image_atomic_xor: case nir_intrinsic_bindless_image_atomic_xor: - atomic = ir3_ATOMIC_XOR_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_image_atomic_exchange: case nir_intrinsic_bindless_image_atomic_exchange: - atomic = ir3_ATOMIC_XCHG_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0); break; case nir_intrinsic_image_atomic_comp_swap: case nir_intrinsic_bindless_image_atomic_comp_swap: - atomic = ir3_ATOMIC_CMPXCHG_G(b, ibo, 0, src0, 0, src1, 0); + atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0); break; default: unreachable("boo"); diff --git a/src/freedreno/ir3/ir3_legalize.c b/src/freedreno/ir3/ir3_legalize.c index 59765f7317b..eaa393bc1a2 100644 --- a/src/freedreno/ir3/ir3_legalize.c +++ b/src/freedreno/ir3/ir3_legalize.c @@ -273,19 +273,18 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block) else regmask_set(&state->needs_sy, n->dsts[0]); } else if (is_atomic(n->opc)) { - if (n->flags & IR3_INSTR_G) { - if (ctx->compiler->gen >= 6) { - /* New encoding, returns result via second src: */ - regmask_set(&state->needs_sy, n->srcs[2]); - } else { - regmask_set(&state->needs_sy, n->dsts[0]); - } + if (is_bindless_atomic(n->opc)) { + regmask_set(&state->needs_sy, n->srcs[2]); + } else if (is_global_a3xx_atomic(n->opc) || + is_global_a6xx_atomic(n->opc)) { + regmask_set(&state->needs_sy, n->dsts[0]); } else { regmask_set(&state->needs_ss, n->dsts[0]); } } - if (is_ssbo(n->opc) || (is_atomic(n->opc) && (n->flags & IR3_INSTR_G))) + if (is_ssbo(n->opc) || is_global_a3xx_atomic(n->opc) || + is_bindless_atomic(n->opc)) ctx->so->has_ssbo = true; /* both tex/sfu appear to not always immediately consume diff --git a/src/freedreno/ir3/ir3_lexer.l b/src/freedreno/ir3/ir3_lexer.l index 6b9ecab8f5a..2353a049eb9 100644 --- a/src/freedreno/ir3/ir3_lexer.l +++ b/src/freedreno/ir3/ir3_lexer.l @@ -339,6 +339,29 @@ static int parse_w(const char *str) "atomic.b.and" return TOKEN(T_OP_ATOMIC_B_AND); "atomic.b.or" return TOKEN(T_OP_ATOMIC_B_OR); "atomic.b.xor" return TOKEN(T_OP_ATOMIC_B_XOR); +"atomic.s.add" return TOKEN(T_OP_ATOMIC_S_ADD); +"atomic.s.sub" return TOKEN(T_OP_ATOMIC_S_SUB); +"atomic.s.xchg" return TOKEN(T_OP_ATOMIC_S_XCHG); +"atomic.s.inc" return TOKEN(T_OP_ATOMIC_S_INC); +"atomic.s.dec" return TOKEN(T_OP_ATOMIC_S_DEC); +"atomic.s.cmpxchg" return TOKEN(T_OP_ATOMIC_S_CMPXCHG); +"atomic.s.min" return TOKEN(T_OP_ATOMIC_S_MIN); +"atomic.s.max" return TOKEN(T_OP_ATOMIC_S_MAX); +"atomic.s.and" return TOKEN(T_OP_ATOMIC_S_AND); +"atomic.s.or" return TOKEN(T_OP_ATOMIC_S_OR); +"atomic.s.xor" return TOKEN(T_OP_ATOMIC_S_XOR); +"atomic.g.add" return TOKEN(T_OP_ATOMIC_G_ADD); +"atomic.g.sub" return TOKEN(T_OP_ATOMIC_G_SUB); +"atomic.g.xchg" return TOKEN(T_OP_ATOMIC_G_XCHG); +"atomic.g.inc" return TOKEN(T_OP_ATOMIC_G_INC); +"atomic.g.dec" return TOKEN(T_OP_ATOMIC_G_DEC); +"atomic.g.cmpxchg" return TOKEN(T_OP_ATOMIC_G_CMPXCHG); +"atomic.g.min" return TOKEN(T_OP_ATOMIC_G_MIN); +"atomic.g.max" return TOKEN(T_OP_ATOMIC_G_MAX); +"atomic.g.and" return TOKEN(T_OP_ATOMIC_G_AND); +"atomic.g.or" return TOKEN(T_OP_ATOMIC_G_OR); +"atomic.g.xor" return TOKEN(T_OP_ATOMIC_G_XOR); + "ldgb" return TOKEN(T_OP_LDGB); "stgb" return TOKEN(T_OP_STGB); "stib" return TOKEN(T_OP_STIB); diff --git a/src/freedreno/ir3/ir3_parser.y b/src/freedreno/ir3/ir3_parser.y index d41c38bc681..674e2fe2003 100644 --- a/src/freedreno/ir3/ir3_parser.y +++ b/src/freedreno/ir3/ir3_parser.y @@ -569,6 +569,28 @@ static void print_token(FILE *file, int type, YYSTYPE value) %token T_OP_ATOMIC_B_AND %token T_OP_ATOMIC_B_OR %token T_OP_ATOMIC_B_XOR +%token T_OP_ATOMIC_S_ADD +%token T_OP_ATOMIC_S_SUB +%token T_OP_ATOMIC_S_XCHG +%token T_OP_ATOMIC_S_INC +%token T_OP_ATOMIC_S_DEC +%token T_OP_ATOMIC_S_CMPXCHG +%token T_OP_ATOMIC_S_MIN +%token T_OP_ATOMIC_S_MAX +%token T_OP_ATOMIC_S_AND +%token T_OP_ATOMIC_S_OR +%token T_OP_ATOMIC_S_XOR +%token T_OP_ATOMIC_G_ADD +%token T_OP_ATOMIC_G_SUB +%token T_OP_ATOMIC_G_XCHG +%token T_OP_ATOMIC_G_INC +%token T_OP_ATOMIC_G_DEC +%token T_OP_ATOMIC_G_CMPXCHG +%token T_OP_ATOMIC_G_MIN +%token T_OP_ATOMIC_G_MAX +%token T_OP_ATOMIC_G_AND +%token T_OP_ATOMIC_G_OR +%token T_OP_ATOMIC_G_XOR %token T_OP_LDGB %token T_OP_STGB %token T_OP_STIB @@ -1020,7 +1042,7 @@ cat6_imm_offset: offset { new_src(0, IR3_REG_IMMED)->iim_val = $1; } cat6_offset: cat6_imm_offset | '+' src cat6_dst_offset: offset { instr->cat6.dst_offset = $1; } -| '+' src { instr->flags |= IR3_INSTR_G; } +| '+' src cat6_immed: integer { instr->cat6.iim_val = $1; } @@ -1068,14 +1090,39 @@ cat6_atomic_opc: T_OP_ATOMIC_ADD { new_instr(OPC_ATOMIC_ADD); } | T_OP_ATOMIC_OR { new_instr(OPC_ATOMIC_OR); } | T_OP_ATOMIC_XOR { new_instr(OPC_ATOMIC_XOR); } -cat6_atomic_g: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src { - instr->flags |= IR3_INSTR_G; - } +cat6_a3xx_atomic_opc: T_OP_ATOMIC_S_ADD { new_instr(OPC_ATOMIC_S_ADD); } +| T_OP_ATOMIC_S_SUB { new_instr(OPC_ATOMIC_S_SUB); } +| T_OP_ATOMIC_S_XCHG { new_instr(OPC_ATOMIC_S_XCHG); } +| T_OP_ATOMIC_S_INC { new_instr(OPC_ATOMIC_S_INC); } +| T_OP_ATOMIC_S_DEC { new_instr(OPC_ATOMIC_S_DEC); } +| T_OP_ATOMIC_S_CMPXCHG { new_instr(OPC_ATOMIC_S_CMPXCHG); } +| T_OP_ATOMIC_S_MIN { new_instr(OPC_ATOMIC_S_MIN); } +| T_OP_ATOMIC_S_MAX { new_instr(OPC_ATOMIC_S_MAX); } +| T_OP_ATOMIC_S_AND { new_instr(OPC_ATOMIC_S_AND); } +| T_OP_ATOMIC_S_OR { new_instr(OPC_ATOMIC_S_OR); } +| T_OP_ATOMIC_S_XOR { new_instr(OPC_ATOMIC_S_XOR); } + +cat6_a6xx_atomic_opc: T_OP_ATOMIC_G_ADD { new_instr(OPC_ATOMIC_G_ADD); } +| T_OP_ATOMIC_G_SUB { new_instr(OPC_ATOMIC_G_SUB); } +| T_OP_ATOMIC_G_XCHG { new_instr(OPC_ATOMIC_G_XCHG); } +| T_OP_ATOMIC_G_INC { new_instr(OPC_ATOMIC_G_INC); } +| T_OP_ATOMIC_G_DEC { new_instr(OPC_ATOMIC_G_DEC); } +| T_OP_ATOMIC_G_CMPXCHG { new_instr(OPC_ATOMIC_G_CMPXCHG); } +| T_OP_ATOMIC_G_MIN { new_instr(OPC_ATOMIC_G_MIN); } +| T_OP_ATOMIC_G_MAX { new_instr(OPC_ATOMIC_G_MAX); } +| T_OP_ATOMIC_G_AND { new_instr(OPC_ATOMIC_G_AND); } +| T_OP_ATOMIC_G_OR { new_instr(OPC_ATOMIC_G_OR); } +| T_OP_ATOMIC_G_XOR { new_instr(OPC_ATOMIC_G_XOR); } + +cat6_a3xx_atomic_s: cat6_a3xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' 'g' '[' cat6_reg_or_immed ']' ',' src ',' src ',' src + +cat6_a6xx_atomic_g: cat6_a6xx_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'g' dst_reg ',' src ',' src cat6_atomic_l: cat6_atomic_opc cat6_typed cat6_dim cat6_type '.' cat6_immed '.' 'l' dst_reg ',' 'l' '[' cat6_reg_or_immed ']' ',' src -cat6_atomic: cat6_atomic_g -| cat6_atomic_l +cat6_atomic: cat6_atomic_l +| cat6_a3xx_atomic_s +| cat6_a6xx_atomic_g cat6_ibo_opc_1src: T_OP_RESINFO { new_instr(OPC_RESINFO); } @@ -1104,17 +1151,17 @@ cat6_reg_or_immed: src cat6_bindless_ibo_opc_1src: T_OP_RESINFO_B { new_instr(OPC_RESINFO); } -cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_ADD)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_SUB)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_XCHG)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_INC)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_DEC)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_CMPXCHG)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_MIN)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_MAX)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_AND)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_OR)->flags |= IR3_INSTR_G; dummy_dst(); } -| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_XOR)->flags |= IR3_INSTR_G; dummy_dst(); } +cat6_bindless_ibo_opc_2src: T_OP_ATOMIC_B_ADD { new_instr(OPC_ATOMIC_B_ADD); dummy_dst(); } +| T_OP_ATOMIC_B_SUB { new_instr(OPC_ATOMIC_B_SUB); dummy_dst(); } +| T_OP_ATOMIC_B_XCHG { new_instr(OPC_ATOMIC_B_XCHG); dummy_dst(); } +| T_OP_ATOMIC_B_INC { new_instr(OPC_ATOMIC_B_INC); dummy_dst(); } +| T_OP_ATOMIC_B_DEC { new_instr(OPC_ATOMIC_B_DEC); dummy_dst(); } +| T_OP_ATOMIC_B_CMPXCHG { new_instr(OPC_ATOMIC_B_CMPXCHG); dummy_dst(); } +| T_OP_ATOMIC_B_MIN { new_instr(OPC_ATOMIC_B_MIN); dummy_dst(); } +| T_OP_ATOMIC_B_MAX { new_instr(OPC_ATOMIC_B_MAX); dummy_dst(); } +| T_OP_ATOMIC_B_AND { new_instr(OPC_ATOMIC_B_AND); dummy_dst(); } +| T_OP_ATOMIC_B_OR { new_instr(OPC_ATOMIC_B_OR); dummy_dst(); } +| T_OP_ATOMIC_B_XOR { new_instr(OPC_ATOMIC_B_XOR); dummy_dst(); } | T_OP_STIB_B { new_instr(OPC_STIB); dummy_dst(); } cat6_bindless_ibo_opc_2src_dst: T_OP_LDIB_B { new_instr(OPC_LDIB); } diff --git a/src/freedreno/ir3/tests/disasm.c b/src/freedreno/ir3/tests/disasm.c index 77b42e3f120..679c843bb3f 100644 --- a/src/freedreno/ir3/tests/disasm.c +++ b/src/freedreno/ir3/tests/disasm.c @@ -336,14 +336,17 @@ static const struct test { /* Atomic: */ #if 0 /* TODO our encoding differs in b53 for these two */ - INSTR_5XX(c4d60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"), - INSTR_5XX(c4160205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"), + INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"), + INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"), #else - INSTR_5XX(c4f60002_00008001, "atomic.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"), - INSTR_5XX(c4360205_03000001, "atomic.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"), + INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"), + INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"), #endif INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"), + /* dEQP-VK.glsl.atomic_operations.add_unsigned_compute_reference */ + INSTR_6XX(c4160002_02000001, "atomic.g.add.untyped.1d.u32.1.g r0.z, r0.x, r0.z"), + /* Bindless atomic: */ INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */ INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */ diff --git a/src/freedreno/isa/encode.c b/src/freedreno/isa/encode.c index ad9dbd2e39b..1c638cc171d 100644 --- a/src/freedreno/isa/encode.c +++ b/src/freedreno/isa/encode.c @@ -113,9 +113,6 @@ __instruction_case(struct encode_state *s, struct ir3_instruction *instr) } } else if (instr->opc == OPC_DEMOTE) { return OPC_KILL; - } else if ((instr->block->shader->compiler->gen >= 6) && - is_atomic(instr->opc) && (instr->flags & IR3_INSTR_G)) { - return instr->opc - OPC_ATOMIC_ADD + OPC_ATOMIC_B_ADD; } else if (s->compiler->gen >= 6) { if (instr->opc == OPC_RESINFO) { return OPC_RESINFO_B; @@ -243,7 +240,7 @@ extract_cat6_DESC_MODE(struct ir3_instruction *instr) static inline struct ir3_register * extract_cat6_SRC(struct ir3_instruction *instr, unsigned n) { - if (instr->flags & IR3_INSTR_G) { + if (is_global_a3xx_atomic(instr->opc)) { n++; } assert(n < instr->srcs_count); diff --git a/src/freedreno/isa/ir3-cat6.xml b/src/freedreno/isa/ir3-cat6.xml index 8d1b9aa6e24..220ac9f0401 100644 --- a/src/freedreno/isa/ir3-cat6.xml +++ b/src/freedreno/isa/ir3-cat6.xml @@ -26,6 +26,14 @@ SOFTWARE. @@ -482,16 +490,6 @@ SOFTWARE. to still have an extra src. For now, match that. - - - {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3} - - - - - - - {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.l {DST}, l[{SRC1}], {SRC2} @@ -500,7 +498,6 @@ SOFTWARE. 1 - xxxxxxxx @@ -514,30 +511,29 @@ SOFTWARE. x - 00000000 - - 0 - !!(src->flags & IR3_INSTR_G) src src->cat6.d - 1 src->cat6.iim_val - 1 - src->srcs[0] - !!(src->srcs[0]->flags & IR3_REG_IMMED) extract_cat6_SRC(src, 0) !!(extract_cat6_SRC(src, 0)->flags & IR3_REG_IMMED) extract_cat6_SRC(src, 1) !!(extract_cat6_SRC(src, 1)->flags & IR3_REG_IMMED) - extract_cat6_SRC(src, 2) - !!(extract_cat6_SRC(src, 2)->flags & IR3_REG_IMMED) - + + 00000000 + 00000000 + 0 + 0 + + + - + @@ -585,6 +581,136 @@ SOFTWARE. 11010 + + + Pre-a6xx atomics for Image/SSBO + + + + + + {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, g[{SSBO}], {SRC1}, {SRC2}, {SRC3} + + + + + + + 1 + + + + src->srcs[0] + !!(src->srcs[0]->flags & IR3_REG_IMMED) + extract_cat6_SRC(src, 2) + !!(extract_cat6_SRC(src, 2)->flags & IR3_REG_IMMED) + + + + + 10000 + + + + 10001 + + + + 10010 + + + + 10011 + + + + 10100 + + + + 10101 + + + + 10110 + + + + 10111 + + + + 11000 + + + + 11001 + + + + 11010 + + + + + a6xx+ global atomics which take iova in SRC1 + + + + + + {SY}{JP}{NAME}.{TYPED}.{D}d.{TYPE}.{TYPE_SIZE}.g {DST}, {SRC1}, {SRC2} + + + 00000000 + 00000000 + 1 + 0 + + + + 10000 + + + + 10001 + + + + 10010 + + + + 10011 + + + + 10100 + + + + 10101 + + + + 10110 + + + + 10111 + + + + 11000 + + + + 11001 + + + + 11010 + - - {G} - - {TYPED}