agx: vectorize uniform_store
this makes preambles shorter. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28483>
This commit is contained in:
committed by
Marge Bot
parent
703e5385eb
commit
85f7310ba7
@@ -612,10 +612,18 @@ agx_emit_store_preamble(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
agx_index vec = agx_src_index(&instr->src[0]);
|
||||
unsigned base = nir_intrinsic_base(instr);
|
||||
unsigned stride = agx_size_align_16(vec.size);
|
||||
unsigned nr = nir_src_num_components(instr->src[0]);
|
||||
|
||||
for (unsigned i = 0; i < nir_src_num_components(instr->src[0]); ++i) {
|
||||
agx_uniform_store(b, agx_extract_nir_src(b, instr->src[0], i),
|
||||
agx_immediate(base + i * stride));
|
||||
for (unsigned i = 0; i < nr; i += (4 / stride)) {
|
||||
agx_index data[4] = {0};
|
||||
unsigned count = MIN2(4 / stride, nr - i);
|
||||
|
||||
for (unsigned c = 0; c < count; ++c) {
|
||||
data[c] = agx_extract_nir_src(b, instr->src[0], i + c);
|
||||
}
|
||||
|
||||
agx_uniform_store(b, agx_emit_collect(b, count, data),
|
||||
agx_immediate(base + i * stride), BITFIELD_MASK(count));
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
||||
@@ -322,7 +322,7 @@ op("local_store",
|
||||
# TODO: Consider permitting the short form
|
||||
op("uniform_store",
|
||||
encoding_32 = ((0b111 << 27) | 0b1000101 | (1 << 47), 0, 8, _),
|
||||
dests = 0, srcs = 2, can_eliminate = False)
|
||||
dests = 0, srcs = 2, imms = [MASK], can_eliminate = False)
|
||||
|
||||
# sources are value, base, index
|
||||
op("atomic",
|
||||
|
||||
@@ -674,16 +674,25 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
|
||||
bool is_store = is_device_store || is_uniform_store;
|
||||
bool has_base = !is_uniform_store;
|
||||
|
||||
/* Uniform stores internally packed as 16-bit. Fix up the format, mask,
|
||||
* and size so we can use scalar 32-bit values in the IR and avoid
|
||||
* special casing earlier in the compiler.
|
||||
/* Uniform stores are required to be 16-bit. The encoding that should be
|
||||
* 32-bit annoyingly doesn't work. Fix up the format and size so we can
|
||||
* use scalar 32-bit values in the IR and avoid special casing earlier in
|
||||
* the compiler.
|
||||
*/
|
||||
enum agx_format format = is_uniform_store ? AGX_FORMAT_I16 : I->format;
|
||||
agx_index reg = is_store ? I->src[0] : I->dest[0];
|
||||
unsigned mask = I->mask;
|
||||
|
||||
if (is_uniform_store) {
|
||||
mask = BITFIELD_MASK(agx_size_align_16(reg.size));
|
||||
if (is_uniform_store && reg.size != AGX_SIZE_16) {
|
||||
if (reg.size == AGX_SIZE_64) {
|
||||
assert(mask == 1);
|
||||
mask = BITFIELD_MASK(4);
|
||||
} else {
|
||||
assert(reg.size == AGX_SIZE_32);
|
||||
assert(mask == 1 || mask == 3);
|
||||
mask = BITFIELD_MASK(mask == 3 ? 4 : 2);
|
||||
}
|
||||
|
||||
reg.size = AGX_SIZE_16;
|
||||
}
|
||||
|
||||
|
||||
@@ -231,6 +231,12 @@ agx_read_registers(const agx_instr *I, unsigned s)
|
||||
case AGX_OPCODE_SPLIT:
|
||||
return I->nr_dests * agx_size_align_16(agx_split_width(I));
|
||||
|
||||
case AGX_OPCODE_UNIFORM_STORE:
|
||||
if (s == 0)
|
||||
return util_bitcount(I->mask) * size;
|
||||
else
|
||||
return size;
|
||||
|
||||
case AGX_OPCODE_DEVICE_STORE:
|
||||
case AGX_OPCODE_LOCAL_STORE:
|
||||
case AGX_OPCODE_STACK_STORE:
|
||||
|
||||
Reference in New Issue
Block a user