agx: vectorize uniform_store

this makes preambles shorter.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
This commit is contained in:
Alyssa Rosenzweig
2024-03-03 18:54:34 -04:00
committed by Marge Bot
parent 703e5385eb
commit 85f7310ba7
4 changed files with 32 additions and 9 deletions
+11 -3
View File
@@ -612,10 +612,18 @@ agx_emit_store_preamble(agx_builder *b, nir_intrinsic_instr *instr)
agx_index vec = agx_src_index(&instr->src[0]);
unsigned base = nir_intrinsic_base(instr);
unsigned stride = agx_size_align_16(vec.size);
unsigned nr = nir_src_num_components(instr->src[0]);
for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
agx_uniform_store(b, agx_extract_nir_src(b, instr->src[0], i),
agx_immediate(base + i * stride));
for (unsigned i = 0; i < nr; i += (4 / stride)) {
agx_index data[4] = {0};
unsigned count = MIN2(4 / stride, nr - i);
for (unsigned c = 0; c < count; ++c) {
data[c] = agx_extract_nir_src(b, instr->src[0], i + c);
}
agx_uniform_store(b, agx_emit_collect(b, count, data),
agx_immediate(base + i * stride), BITFIELD_MASK(count));
}
return NULL;
+1 -1
View File
@@ -322,7 +322,7 @@ op("local_store",
# TODO: Consider permitting the short form
op("uniform_store",
encoding_32 = ((0b111 << 27) | 0b1000101 | (1 << 47), 0, 8, _),
dests = 0, srcs = 2, can_eliminate = False)
dests = 0, srcs = 2, imms = [MASK], can_eliminate = False)
# sources are value, base, index
op("atomic",
+14 -5
View File
@@ -674,16 +674,25 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
bool is_store = is_device_store || is_uniform_store;
bool has_base = !is_uniform_store;
/* Uniform stores internally packed as 16-bit. Fix up the format, mask,
* and size so we can use scalar 32-bit values in the IR and avoid
* special casing earlier in the compiler.
/* Uniform stores are required to be 16-bit. The encoding that should be
* 32-bit annoyingly doesn't work. Fix up the format and size so we can
* use scalar 32-bit values in the IR and avoid special casing earlier in
* the compiler.
*/
enum agx_format format = is_uniform_store ? AGX_FORMAT_I16 : I->format;
agx_index reg = is_store ? I->src[0] : I->dest[0];
unsigned mask = I->mask;
if (is_uniform_store) {
mask = BITFIELD_MASK(agx_size_align_16(reg.size));
if (is_uniform_store && reg.size != AGX_SIZE_16) {
if (reg.size == AGX_SIZE_64) {
assert(mask == 1);
mask = BITFIELD_MASK(4);
} else {
assert(reg.size == AGX_SIZE_32);
assert(mask == 1 || mask == 3);
mask = BITFIELD_MASK(mask == 3 ? 4 : 2);
}
reg.size = AGX_SIZE_16;
}
+6
View File
@@ -231,6 +231,12 @@ agx_read_registers(const agx_instr *I, unsigned s)
case AGX_OPCODE_SPLIT:
return I->nr_dests * agx_size_align_16(agx_split_width(I));
case AGX_OPCODE_UNIFORM_STORE:
if (s == 0)
return util_bitcount(I->mask) * size;
else
return size;
case AGX_OPCODE_DEVICE_STORE:
case AGX_OPCODE_LOCAL_STORE:
case AGX_OPCODE_STACK_STORE: