diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 2cbaea74c18..eb9805176bb 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -783,6 +783,21 @@ agx_emit_store(agx_builder *b, nir_intrinsic_instr *instr) shift, nir_is_coherent(instr)); } +/* + * Hardware bindless texture sources are specified as a 64-bit uniform base + * address summed with a 32-bit register index. We model in NIR with the + * bindless_image_agx intrinsic. + */ +static agx_index +agx_translate_bindless_handle(agx_builder *b, nir_src *handle, agx_index *base) +{ + nir_intrinsic_instr *intr = nir_src_as_intrinsic(*handle); + assert(intr->intrinsic == nir_intrinsic_bindless_image_agx); + + *base = agx_uniform(nir_intrinsic_desc_set(intr), AGX_SIZE_64); + return agx_src_index(&intr->src[0]); +} + /* Preambles write directly to uniform registers, so move from uniform to GPR */ static agx_instr * agx_emit_load_preamble(agx_builder *b, agx_index dst, @@ -804,8 +819,16 @@ agx_emit_load_preamble(agx_builder *b, agx_index dst, static agx_instr * agx_emit_store_preamble(agx_builder *b, nir_intrinsic_instr *instr) { - agx_index vec = agx_src_index(&instr->src[0]); + nir_preamble_class cls = nir_intrinsic_preamble_class(instr); unsigned base = nir_intrinsic_base(instr); + + if (cls == nir_preamble_class_image) { + agx_index heap, offset; + offset = agx_translate_bindless_handle(b, &instr->src[0], &heap); + return agx_tex_state_store(b, heap, offset, base / 2); + } + + agx_index vec = agx_src_index(&instr->src[0]); unsigned stride = agx_size_align_16(vec.size); unsigned nr = nir_src_num_components(instr->src[0]); @@ -854,21 +877,6 @@ agx_tex_dim(enum glsl_sampler_dim dim, bool array) } } -/* - * Hardware bindless texture sources are specified as a 64-bit uniform base - * address summed with a 32-bit register index. We model in NIR with the - * bindless_image_agx intrinsic. - */ -static agx_index -agx_translate_bindless_handle(agx_builder *b, nir_src *handle, agx_index *base) -{ - nir_intrinsic_instr *intr = nir_src_as_intrinsic(*handle); - assert(intr->intrinsic == nir_intrinsic_bindless_image_agx); - - *base = agx_uniform(nir_intrinsic_desc_set(intr), AGX_SIZE_64); - return agx_src_index(&intr->src[0]); -} - static agx_instr * agx_emit_block_image_store(agx_builder *b, nir_intrinsic_instr *instr) { diff --git a/src/asahi/compiler/agx_lower_uniform_sources.c b/src/asahi/compiler/agx_lower_uniform_sources.c index 246284af6a2..3b19623fdc1 100644 --- a/src/asahi/compiler/agx_lower_uniform_sources.c +++ b/src/asahi/compiler/agx_lower_uniform_sources.c @@ -30,6 +30,11 @@ agx_instr_accepts_uniform(enum agx_opcode op, unsigned src_index, assert(!(src_index == 2 && high) && "texture heap always low"); return !high && (src_index == 1 || src_index == 2); + case AGX_OPCODE_TEX_STATE_STORE: + case AGX_OPCODE_SAMPLER_STATE_STORE: + assert(!high && "texture heap always low"); + return !high && src_index == 0; + case AGX_OPCODE_DEVICE_LOAD: return src_index == 0 && !high; case AGX_OPCODE_DEVICE_STORE: diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index 2214d01bcbf..f5afa4623e9 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -301,6 +301,12 @@ op("uniform_store", encoding = ((0b111 << 27) | 0b1000101 | (1 << 47), 8, _), dests = 0, srcs = 2, imms = [MASK], can_eliminate = False) +# sources are base, offset +for kind, bit in [("tex", 1 << 6), ("sampler", 0)]: + op(f"{kind}_state_store", encoding = (0b10101101 | bit | (1 << 20), 8, _), + dests = 0, srcs = 2, imms = [IMM, SCOREBOARD], can_eliminate = False, + schedule_class = "store") + # sources are value, base, index op("atomic", encoding = (0x15 | (1 << 26) | (1 << 31) | (5 << 44), 8, _), diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index f4625f1bc4e..ef9cd20428d 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -548,7 +548,9 @@ agx_optimizer_forward(agx_context *ctx) /* Inline immediates if we can. TODO: systematic */ if (I->op != AGX_OPCODE_COLLECT && I->op != AGX_OPCODE_IMAGE_LOAD && I->op != AGX_OPCODE_TEXTURE_LOAD && - I->op != AGX_OPCODE_UNIFORM_STORE && I->op != AGX_OPCODE_EXPORT) + I->op != AGX_OPCODE_UNIFORM_STORE && I->op != AGX_OPCODE_EXPORT && + I->op != AGX_OPCODE_TEX_STATE_STORE && + I->op != AGX_OPCODE_SAMPLER_STATE_STORE) agx_optimizer_inline_imm(defs, I); if (I->op == AGX_OPCODE_IF_ICMP) { diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index a20ffb07727..b51be8bfff4 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -922,6 +922,23 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, break; } + case AGX_OPCODE_TEX_STATE_STORE: + case AGX_OPCODE_SAMPLER_STATE_STORE: { + unsigned U, Tt, T = agx_pack_texture(I, I->src[0], I->src[1], &U, &Tt); + + pack_assert(I, U < (1 << 4)); + pack_assert(I, (T & 1) == 0); + pack_assert(I, T < (1 << 8)); + pack_assert(I, I->imm < (1 << 7)); + + uint64_t raw = agx_opcodes_info[I->op].encoding.exact | (I->imm << 8) | + (I->scoreboard << 16) | ((uint64_t)(T >> 1) << 27) | + ((uint64_t)(U << 1) << 58); + + memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8); + break; + } + case AGX_OPCODE_IMAGE_WRITE: { bool Ct, Dt, Rt, Cs; unsigned Tt;