agx: implement sparse residency queries
hw matches NIR well - just an extra destination on the texture instruction. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33682>
This commit is contained in:
committed by
Marge Bot
parent
93bccc0914
commit
739807944d
@@ -1066,11 +1066,12 @@ agx_expand_tex_to(agx_builder *b, nir_def *def, agx_index src, bool masked)
|
||||
if (!masked)
|
||||
mask = (nir_component_mask_t)BITFIELD_MASK(nr_channels);
|
||||
|
||||
agx_index packed_channels[4] = {agx_null()};
|
||||
agx_index unpacked_channels[4] = {agx_null()};
|
||||
agx_index packed_channels[8] = {agx_null()};
|
||||
agx_index unpacked_channels[8] = {agx_null()};
|
||||
|
||||
/* Hardware writes the masked components contiguously, expand out for NIR */
|
||||
agx_emit_split(b, packed_channels, src, 4 /* XXX: why not nr_channels */);
|
||||
agx_emit_split(b, packed_channels, src,
|
||||
ALIGN_POT(nr_channels, 4) /* XXX: why not nr_channels */);
|
||||
|
||||
for (unsigned i = 0; i < nr_channels; ++i) {
|
||||
unpacked_channels[i] =
|
||||
@@ -1089,15 +1090,19 @@ agx_emit_image_load(agx_builder *b, agx_index dst, nir_intrinsic_instr *intr)
|
||||
agx_index ms_index = agx_src_index(&intr->src[2]);
|
||||
agx_index lod = agx_src_index(&intr->src[3]);
|
||||
enum agx_lod_mode lod_mode = AGX_LOD_MODE_LOD_MIN;
|
||||
bool sparse = intr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
|
||||
|
||||
agx_index bindless = agx_immediate(0), texture;
|
||||
if (intr->intrinsic == nir_intrinsic_bindless_image_load)
|
||||
if (intr->intrinsic == nir_intrinsic_bindless_image_load ||
|
||||
intr->intrinsic == nir_intrinsic_bindless_image_sparse_load) {
|
||||
|
||||
texture = agx_translate_bindless_handle(b, &intr->src[0], &bindless);
|
||||
else if (nir_src_is_const(intr->src[0]) &&
|
||||
nir_src_as_uint(intr->src[0]) < 0x100)
|
||||
} else if (nir_src_is_const(intr->src[0]) &&
|
||||
nir_src_as_uint(intr->src[0]) < 0x100) {
|
||||
texture = agx_immediate(nir_src_as_uint(intr->src[0]));
|
||||
else
|
||||
} else {
|
||||
texture = agx_src_index(&intr->src[0]);
|
||||
}
|
||||
|
||||
assert(nir_src_num_components(intr->src[1]) == 4);
|
||||
agx_index coord[4] = {
|
||||
@@ -1146,12 +1151,13 @@ agx_emit_image_load(agx_builder *b, agx_index dst, nir_intrinsic_instr *intr)
|
||||
}
|
||||
|
||||
agx_index coords = agx_emit_collect(b, coord_comps, coord);
|
||||
agx_index tmp = agx_vec_temp(b->shader, dst.size, 4);
|
||||
agx_index tmp = agx_vec_temp(b->shader, dst.size, sparse ? 8 : 4);
|
||||
|
||||
agx_instr *I = agx_image_load_to(
|
||||
b, tmp, coords, lod, bindless, texture, agx_immediate(0), agx_null(),
|
||||
agx_tex_dim(dim, is_array), lod_mode, 0, false, nir_is_coherent(intr));
|
||||
I->mask = agx_expand_tex_to(b, &intr->def, tmp, true);
|
||||
agx_instr *I = agx_image_load_to(b, tmp, coords, lod, bindless, texture,
|
||||
agx_immediate(0), agx_null(),
|
||||
agx_tex_dim(dim, is_array), lod_mode, 0,
|
||||
false, sparse, nir_is_coherent(intr));
|
||||
I->mask = agx_expand_tex_to(b, &intr->def, tmp, !sparse);
|
||||
|
||||
b->shader->out->uses_txf = true;
|
||||
return NULL;
|
||||
@@ -1432,6 +1438,7 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
|
||||
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_sparse_load:
|
||||
return agx_emit_image_load(b, dst, instr);
|
||||
|
||||
case nir_intrinsic_image_store:
|
||||
@@ -2294,12 +2301,12 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
|
||||
else if (!agx_is_null(compare))
|
||||
compare_offset = compare;
|
||||
|
||||
agx_index tmp = agx_vec_temp(b->shader, dst.size, 4);
|
||||
agx_index tmp = agx_vec_temp(b->shader, dst.size, instr->is_sparse ? 8 : 4);
|
||||
agx_instr *I = agx_texture_sample_to(
|
||||
b, tmp, coords, lod, bindless, texture, sampler, compare_offset,
|
||||
agx_tex_dim(instr->sampler_dim, instr->is_array), lod_mode, 0,
|
||||
!agx_is_null(packed_offset), !agx_is_null(compare),
|
||||
instr->op == nir_texop_lod, agx_gather_for_nir(instr));
|
||||
instr->op == nir_texop_lod, agx_gather_for_nir(instr), instr->is_sparse);
|
||||
|
||||
if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms) {
|
||||
I->op = AGX_OPCODE_TEXTURE_LOAD;
|
||||
@@ -2309,8 +2316,10 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
|
||||
/* Destination masking doesn't seem to work properly for gathers (because
|
||||
* it's mostly pointless), but it does show up in the lowering of
|
||||
* textureGatherOffsets. Don't try to mask the destination for gathers.
|
||||
*
|
||||
* TODO: Check if it works with sparse.
|
||||
*/
|
||||
bool masked = (instr->op != nir_texop_tg4);
|
||||
bool masked = (instr->op != nir_texop_tg4) && !instr->is_sparse;
|
||||
I->mask = agx_expand_tex_to(b, &instr->def, tmp, masked);
|
||||
}
|
||||
|
||||
|
||||
@@ -400,6 +400,7 @@ typedef struct {
|
||||
bool offset : 1;
|
||||
bool shadow : 1;
|
||||
bool query_lod : 1;
|
||||
bool sparse : 1;
|
||||
enum agx_gather gather : 3;
|
||||
|
||||
/* TODO: Handle tilebuffer ops more efficient */
|
||||
|
||||
@@ -438,6 +438,7 @@ legalize_image_lod(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
CASE(image_load, 3)
|
||||
CASE(image_sparse_load, 3)
|
||||
CASE(image_store, 4)
|
||||
CASE(image_size, 1)
|
||||
default:
|
||||
@@ -527,6 +528,9 @@ lower_buffer_image(nir_builder *b, nir_intrinsic_instr *intr)
|
||||
nir_def *coord_vector = intr->src[1].ssa;
|
||||
nir_def *coord = nir_channel(b, coord_vector, 0);
|
||||
|
||||
assert(intr->intrinsic != nir_intrinsic_bindless_image_sparse_load &&
|
||||
"sparse buffer textures not expected");
|
||||
|
||||
/* If we're not bindless, assume we don't need an offset (GL driver) */
|
||||
if (intr->intrinsic == nir_intrinsic_bindless_image_load) {
|
||||
nir_def *desc = nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
|
||||
@@ -612,12 +616,14 @@ lower_images(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
|
||||
case nir_intrinsic_image_load:
|
||||
case nir_intrinsic_image_store:
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_sparse_load:
|
||||
case nir_intrinsic_bindless_image_store: {
|
||||
/* Legalize MSAA index */
|
||||
nir_src_rewrite(&intr->src[2], nir_u2u16(b, intr->src[2].ssa));
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_image_load ||
|
||||
intr->intrinsic == nir_intrinsic_bindless_image_load) {
|
||||
intr->intrinsic == nir_intrinsic_bindless_image_load ||
|
||||
intr->intrinsic == nir_intrinsic_bindless_image_sparse_load) {
|
||||
lower_image_load_robustness(b, intr);
|
||||
}
|
||||
|
||||
@@ -648,6 +654,19 @@ lower_images(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
|
||||
nir_def_rewrite_uses(&intr->def, image_texel_address(b, intr, false));
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_is_sparse_texels_resident:
|
||||
/* Residency information is in bit 0, so we need to mask. Unclear what's
|
||||
* in the upper bits. For now, let's match the blob.
|
||||
*/
|
||||
nir_def_replace(&intr->def,
|
||||
nir_ieq_imm(b, nir_iand_imm(b, intr->src[0].ssa, 1), 0));
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_sparse_residency_code_and:
|
||||
nir_def_replace(&intr->def,
|
||||
nir_iand(b, intr->src[0].ssa, intr->src[1].ssa));
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_image_size:
|
||||
case nir_intrinsic_image_texel_address:
|
||||
unreachable("should've been lowered");
|
||||
@@ -669,6 +688,7 @@ lower_robustness(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_deref_sparse_load:
|
||||
case nir_intrinsic_image_deref_store:
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -300,6 +300,7 @@ avoid_instr(const nir_instr *instr, const void *data)
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_bindless_image_load:
|
||||
case nir_intrinsic_bindless_image_sparse_load:
|
||||
case nir_intrinsic_bindless_image_store:
|
||||
case nir_intrinsic_bindless_image_store_block_agx:
|
||||
if (intr->src[0].ssa == def)
|
||||
|
||||
@@ -102,6 +102,7 @@ GATHER = enum("gather", {
|
||||
|
||||
OFFSET = immediate("offset", "bool")
|
||||
SHADOW = immediate("shadow", "bool")
|
||||
SPARSE = immediate("sparse", "bool")
|
||||
QUERY_LOD = immediate("query_lod", "bool")
|
||||
COHERENT = immediate("coherent", "bool")
|
||||
SCOREBOARD = immediate("scoreboard")
|
||||
@@ -314,11 +315,11 @@ op("fcmp", _, srcs = 2, imms = [FCOND, INVERT_COND])
|
||||
op("texture_sample",
|
||||
encoding = (0x31, 0x7F, 8, 10), # XXX WRONG SIZE
|
||||
srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET, SHADOW,
|
||||
QUERY_LOD, GATHER])
|
||||
QUERY_LOD, GATHER, SPARSE])
|
||||
for memory, can_reorder in [("texture", True), ("image", False)]:
|
||||
coherency = [COHERENT] if not can_reorder else []
|
||||
op(f"{memory}_load", encoding = (0x71, 0x7F, 8, 10), # XXX WRONG SIZE
|
||||
srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET] + coherency,
|
||||
srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET, SPARSE] + coherency,
|
||||
can_reorder = can_reorder,
|
||||
schedule_class = "none" if can_reorder else "load")
|
||||
|
||||
|
||||
@@ -893,10 +893,11 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
|
||||
}
|
||||
|
||||
uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) |
|
||||
((I->dim >> 3) << 7) | ((R >> 6) << 8) |
|
||||
((C >> 6) << 10) | ((D >> 6) << 12) | ((T >> 6) << 14) |
|
||||
((O & BITFIELD_MASK(6)) << 16) | (I->gather << 23) |
|
||||
(I->offset << 27) | ((S >> 6) << 28) | ((O >> 6) << 30);
|
||||
(I->sparse ? (1 << 6) : 0) | ((I->dim >> 3) << 7) |
|
||||
((R >> 6) << 8) | ((C >> 6) << 10) | ((D >> 6) << 12) |
|
||||
((T >> 6) << 14) | ((O & BITFIELD_MASK(6)) << 16) |
|
||||
(I->gather << 23) | (I->offset << 27) |
|
||||
((S >> 6) << 28) | ((O >> 6) << 30);
|
||||
|
||||
bool L = (extend != 0);
|
||||
|
||||
|
||||
@@ -1614,7 +1614,7 @@ agx_ra(agx_context *ctx)
|
||||
assert(ins->src[0].type == AGX_INDEX_REGISTER ||
|
||||
ins->src[0].type == AGX_INDEX_UNIFORM);
|
||||
|
||||
struct agx_copy copies[4];
|
||||
struct agx_copy copies[8];
|
||||
assert(ins->nr_dests <= ARRAY_SIZE(copies));
|
||||
|
||||
unsigned n = 0;
|
||||
|
||||
@@ -166,8 +166,11 @@ agx_write_registers(const agx_instr *I, unsigned d)
|
||||
case AGX_OPCODE_IMAGE_LOAD:
|
||||
case AGX_OPCODE_TEXTURE_LOAD:
|
||||
case AGX_OPCODE_TEXTURE_SAMPLE:
|
||||
/* Even when masked out, these clobber 4 registers */
|
||||
return 4 * size;
|
||||
/* Even when masked out, these clobber 4 registers.
|
||||
*
|
||||
* TODO: Figure out the sparse interaction.
|
||||
*/
|
||||
return (I->sparse ? 8 : 4) * size;
|
||||
|
||||
case AGX_OPCODE_DEVICE_LOAD:
|
||||
case AGX_OPCODE_LOCAL_LOAD:
|
||||
|
||||
Reference in New Issue
Block a user