diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c index 6882ab9d0fb..ea31fa3b4cd 100644 --- a/src/asahi/compiler/agx_compile.c +++ b/src/asahi/compiler/agx_compile.c @@ -612,10 +612,18 @@ agx_emit_store_preamble(agx_builder *b, nir_intrinsic_instr *instr) agx_index vec = agx_src_index(&instr->src[0]); unsigned base = nir_intrinsic_base(instr); unsigned stride = agx_size_align_16(vec.size); + unsigned nr = nir_src_num_components(instr->src[0]); - for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) { - agx_uniform_store(b, agx_extract_nir_src(b, instr->src[0], i), - agx_immediate(base + i * stride)); + for (unsigned i = 0; i < nr; i += (4 / stride)) { + agx_index data[4] = {0}; + unsigned count = MIN2(4 / stride, nr - i); + + for (unsigned c = 0; c < count; ++c) { + data[c] = agx_extract_nir_src(b, instr->src[0], i + c); + } + + agx_uniform_store(b, agx_emit_collect(b, count, data), + agx_immediate(base + i * stride), BITFIELD_MASK(count)); } return NULL; diff --git a/src/asahi/compiler/agx_opcodes.py b/src/asahi/compiler/agx_opcodes.py index 3dd027a1597..a08b63689d7 100644 --- a/src/asahi/compiler/agx_opcodes.py +++ b/src/asahi/compiler/agx_opcodes.py @@ -322,7 +322,7 @@ op("local_store", # TODO: Consider permitting the short form op("uniform_store", encoding_32 = ((0b111 << 27) | 0b1000101 | (1 << 47), 0, 8, _), - dests = 0, srcs = 2, can_eliminate = False) + dests = 0, srcs = 2, imms = [MASK], can_eliminate = False) # sources are value, base, index op("atomic", diff --git a/src/asahi/compiler/agx_pack.c b/src/asahi/compiler/agx_pack.c index a276cb1c8d8..b7884a2cfbd 100644 --- a/src/asahi/compiler/agx_pack.c +++ b/src/asahi/compiler/agx_pack.c @@ -674,16 +674,25 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, bool is_store = is_device_store || is_uniform_store; bool has_base = !is_uniform_store; - /* Uniform stores internally packed as 16-bit. Fix up the format, mask, - * and size so we can use scalar 32-bit values in the IR and avoid - * special casing earlier in the compiler. + /* Uniform stores are required to be 16-bit. The encoding that should be + * 32-bit annoyingly doesn't work. Fix up the format and size so we can + * use scalar 32-bit values in the IR and avoid special casing earlier in + * the compiler. */ enum agx_format format = is_uniform_store ? AGX_FORMAT_I16 : I->format; agx_index reg = is_store ? I->src[0] : I->dest[0]; unsigned mask = I->mask; - if (is_uniform_store) { - mask = BITFIELD_MASK(agx_size_align_16(reg.size)); + if (is_uniform_store && reg.size != AGX_SIZE_16) { + if (reg.size == AGX_SIZE_64) { + assert(mask == 1); + mask = BITFIELD_MASK(4); + } else { + assert(reg.size == AGX_SIZE_32); + assert(mask == 1 || mask == 3); + mask = BITFIELD_MASK(mask == 3 ? 4 : 2); + } + reg.size = AGX_SIZE_16; } diff --git a/src/asahi/compiler/agx_validate.c b/src/asahi/compiler/agx_validate.c index defa090e40c..cf539fdb5b0 100644 --- a/src/asahi/compiler/agx_validate.c +++ b/src/asahi/compiler/agx_validate.c @@ -231,6 +231,12 @@ agx_read_registers(const agx_instr *I, unsigned s) case AGX_OPCODE_SPLIT: return I->nr_dests * agx_size_align_16(agx_split_width(I)); + case AGX_OPCODE_UNIFORM_STORE: + if (s == 0) + return util_bitcount(I->mask) * size; + else + return size; + case AGX_OPCODE_DEVICE_STORE: case AGX_OPCODE_LOCAL_STORE: case AGX_OPCODE_STACK_STORE: