From ddbec45b6fc7b7eacac844243624061fae7fa042 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 2 Dec 2022 21:41:22 -0500 Subject: [PATCH] agx: Plumb in store instruction This will be used for compute kernels (and transform feedback) in the (near) future. For now, let's get the opcode plumbed in the backend to reduce some of the rebase pain. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_opcodes.py | 6 ++++++ src/asahi/compiler/agx_optimizer.c | 8 ++++++-- src/asahi/compiler/agx_pack.c | 14 +++++++++----- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index 17c34c3ebfd..3c97e24a8db 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -239,6 +239,12 @@ op("device_load", encoding_32 = (0x05, 0x7F, 6, 8), srcs = 2, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_reorder = False) +# sources are value, base, index +# TODO: Consider permitting the short form +op("device_store", + encoding_32 = (0x45 | (1 << 47), 0, 8, _), + dests = 0, srcs = 3, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_eliminate = False) + # sources are value, index # TODO: Consider permitting the short form op("uniform_store", diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index f7bc842012a..6f5a23fd9da 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -132,6 +132,8 @@ agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, unsigned srcs, continue; if (I->op == AGX_OPCODE_ZS_EMIT && s != 0) continue; + if (I->op == AGX_OPCODE_DEVICE_STORE && s != 2) + continue; if (float_src) { bool fp16 = (def->dest[0].size == AGX_SIZE_16); @@ -194,16 +196,18 @@ agx_optimizer_copyprop(agx_instr **defs, agx_instr *I) I->op == AGX_OPCODE_TEXTURE_SAMPLE || (I->op == AGX_OPCODE_DEVICE_LOAD && (s != 0 || def->src[0].value >= 256)) || + (I->op == AGX_OPCODE_DEVICE_STORE && + (s != 1 || def->src[0].value >= 256)) || I->op == AGX_OPCODE_PHI || I->op == AGX_OPCODE_ZS_EMIT || I->op == AGX_OPCODE_ST_TILE || I->op == AGX_OPCODE_LD_TILE || I->op == AGX_OPCODE_BLOCK_IMAGE_STORE || - /*I->op == AGX_OPCODE_DEVICE_STORE ||*/ I->op == AGX_OPCODE_UNIFORM_STORE || I->op == AGX_OPCODE_ST_VARY)) continue; /* ALU instructions cannot take 64-bit */ if (def->src[0].size == AGX_SIZE_64 && - !(I->op == AGX_OPCODE_DEVICE_LOAD && s == 0)) + !(I->op == AGX_OPCODE_DEVICE_LOAD && s == 0) && + !(I->op == AGX_OPCODE_DEVICE_STORE && s == 1)) continue; agx_replace_src(I, s, def->src[0]); diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index 7becd507ecf..e1c314f435f 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -505,9 +505,11 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, } case AGX_OPCODE_DEVICE_LOAD: + case AGX_OPCODE_DEVICE_STORE: case AGX_OPCODE_UNIFORM_STORE: { + bool is_device_store = I->op == AGX_OPCODE_DEVICE_STORE; bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE; - bool is_store = is_uniform_store; + bool is_store = is_device_store || is_uniform_store; bool has_base = !is_uniform_store; /* Uniform stores internally packed as 16-bit. Fix up the format, mask, @@ -523,11 +525,13 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, reg.size = AGX_SIZE_16; } + unsigned offset_src = (has_base ? 1 : 0) + (is_store ? 1 : 0); + bool Rt, At = false, Ot; unsigned R = agx_pack_memory_reg(reg, &Rt); - unsigned A = has_base ? agx_pack_memory_base(I->src[0], &At) : 0; - unsigned O = agx_pack_memory_index( - I->src[(has_base ? 1 : 0) + (is_store ? 1 : 0)], &Ot); + unsigned A = + has_base ? agx_pack_memory_base(I->src[is_store ? 1 : 0], &At) : 0; + unsigned O = agx_pack_memory_index(I->src[offset_src], &Ot); unsigned u1 = is_uniform_store ? 0 : 1; // XXX unsigned u3 = 0; unsigned u4 = is_uniform_store ? 0 : 4; // XXX @@ -541,7 +545,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx_opcodes_info[I->op].encoding.exact | ((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) | ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) | - (Ot ? (1 << 24) : 0) | (I->src[1].abs ? (1 << 25) : 0) | + (Ot ? (1 << 24) : 0) | (I->src[offset_src].abs ? (1 << 25) : 0) | (is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) | (u3 << 28) | (I->scoreboard << 30) | (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |