diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index 17c34c3ebfd..3c97e24a8db 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -239,6 +239,12 @@ op("device_load", encoding_32 = (0x05, 0x7F, 6, 8), srcs = 2, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_reorder = False) +# sources are value, base, index +# TODO: Consider permitting the short form +op("device_store", + encoding_32 = (0x45 | (1 << 47), 0, 8, _), + dests = 0, srcs = 3, imms = [FORMAT, MASK, SHIFT, SCOREBOARD], can_eliminate = False) + # sources are value, index # TODO: Consider permitting the short form op("uniform_store", diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index f7bc842012a..6f5a23fd9da 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -132,6 +132,8 @@ agx_optimizer_inline_imm(agx_instr **defs, agx_instr *I, unsigned srcs, continue; if (I->op == AGX_OPCODE_ZS_EMIT && s != 0) continue; + if (I->op == AGX_OPCODE_DEVICE_STORE && s != 2) + continue; if (float_src) { bool fp16 = (def->dest[0].size == AGX_SIZE_16); @@ -194,16 +196,18 @@ agx_optimizer_copyprop(agx_instr **defs, agx_instr *I) I->op == AGX_OPCODE_TEXTURE_SAMPLE || (I->op == AGX_OPCODE_DEVICE_LOAD && (s != 0 || def->src[0].value >= 256)) || + (I->op == AGX_OPCODE_DEVICE_STORE && + (s != 1 || def->src[0].value >= 256)) || I->op == AGX_OPCODE_PHI || I->op == AGX_OPCODE_ZS_EMIT || I->op == AGX_OPCODE_ST_TILE || I->op == AGX_OPCODE_LD_TILE || I->op == AGX_OPCODE_BLOCK_IMAGE_STORE || - /*I->op == AGX_OPCODE_DEVICE_STORE ||*/ I->op == AGX_OPCODE_UNIFORM_STORE || I->op == AGX_OPCODE_ST_VARY)) continue; /* ALU instructions cannot take 64-bit */ if (def->src[0].size == AGX_SIZE_64 && - !(I->op == AGX_OPCODE_DEVICE_LOAD && s == 0)) + !(I->op == AGX_OPCODE_DEVICE_LOAD && s == 0) && + !(I->op == AGX_OPCODE_DEVICE_STORE && s == 1)) continue; agx_replace_src(I, s, def->src[0]); diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index 7becd507ecf..e1c314f435f 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -505,9 +505,11 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, } case AGX_OPCODE_DEVICE_LOAD: + case AGX_OPCODE_DEVICE_STORE: case AGX_OPCODE_UNIFORM_STORE: { + bool is_device_store = I->op == AGX_OPCODE_DEVICE_STORE; bool is_uniform_store = I->op == AGX_OPCODE_UNIFORM_STORE; - bool is_store = is_uniform_store; + bool is_store = is_device_store || is_uniform_store; bool has_base = !is_uniform_store; /* Uniform stores internally packed as 16-bit. Fix up the format, mask, @@ -523,11 +525,13 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, reg.size = AGX_SIZE_16; } + unsigned offset_src = (has_base ? 1 : 0) + (is_store ? 1 : 0); + bool Rt, At = false, Ot; unsigned R = agx_pack_memory_reg(reg, &Rt); - unsigned A = has_base ? agx_pack_memory_base(I->src[0], &At) : 0; - unsigned O = agx_pack_memory_index( - I->src[(has_base ? 1 : 0) + (is_store ? 1 : 0)], &Ot); + unsigned A = + has_base ? agx_pack_memory_base(I->src[is_store ? 1 : 0], &At) : 0; + unsigned O = agx_pack_memory_index(I->src[offset_src], &Ot); unsigned u1 = is_uniform_store ? 0 : 1; // XXX unsigned u3 = 0; unsigned u4 = is_uniform_store ? 0 : 4; // XXX @@ -541,7 +545,7 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx_opcodes_info[I->op].encoding.exact | ((format & BITFIELD_MASK(3)) << 7) | ((R & BITFIELD_MASK(6)) << 10) | ((A & BITFIELD_MASK(4)) << 16) | ((O & BITFIELD_MASK(4)) << 20) | - (Ot ? (1 << 24) : 0) | (I->src[1].abs ? (1 << 25) : 0) | + (Ot ? (1 << 24) : 0) | (I->src[offset_src].abs ? (1 << 25) : 0) | (is_uniform_store ? (2 << 25) : 0) | (u1 << 26) | (At << 27) | (u3 << 28) | (I->scoreboard << 30) | (((uint64_t)((O >> 4) & BITFIELD_MASK(4))) << 32) |