agx: implement sparse residency queries

hw matches NIR well - just an extra destination on the texture instruction.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33682>
This commit is contained in:
Alyssa Rosenzweig
2025-01-22 15:12:21 -05:00
committed by Marge Bot
parent 93bccc0914
commit 739807944d
8 changed files with 61 additions and 25 deletions

View File

@@ -1066,11 +1066,12 @@ agx_expand_tex_to(agx_builder *b, nir_def *def, agx_index src, bool masked)
if (!masked)
mask = (nir_component_mask_t)BITFIELD_MASK(nr_channels);
agx_index packed_channels[4] = {agx_null()};
agx_index unpacked_channels[4] = {agx_null()};
agx_index packed_channels[8] = {agx_null()};
agx_index unpacked_channels[8] = {agx_null()};
/* Hardware writes the masked components contiguously, expand out for NIR */
agx_emit_split(b, packed_channels, src, 4 /* XXX: why not nr_channels */);
agx_emit_split(b, packed_channels, src,
ALIGN_POT(nr_channels, 4) /* XXX: why not nr_channels */);
for (unsigned i = 0; i < nr_channels; ++i) {
unpacked_channels[i] =
@@ -1089,15 +1090,19 @@ agx_emit_image_load(agx_builder *b, agx_index dst, nir_intrinsic_instr *intr)
agx_index ms_index = agx_src_index(&intr->src[2]);
agx_index lod = agx_src_index(&intr->src[3]);
enum agx_lod_mode lod_mode = AGX_LOD_MODE_LOD_MIN;
bool sparse = intr->intrinsic == nir_intrinsic_bindless_image_sparse_load;
agx_index bindless = agx_immediate(0), texture;
if (intr->intrinsic == nir_intrinsic_bindless_image_load)
if (intr->intrinsic == nir_intrinsic_bindless_image_load ||
intr->intrinsic == nir_intrinsic_bindless_image_sparse_load) {
texture = agx_translate_bindless_handle(b, &intr->src[0], &bindless);
else if (nir_src_is_const(intr->src[0]) &&
nir_src_as_uint(intr->src[0]) < 0x100)
} else if (nir_src_is_const(intr->src[0]) &&
nir_src_as_uint(intr->src[0]) < 0x100) {
texture = agx_immediate(nir_src_as_uint(intr->src[0]));
else
} else {
texture = agx_src_index(&intr->src[0]);
}
assert(nir_src_num_components(intr->src[1]) == 4);
agx_index coord[4] = {
@@ -1146,12 +1151,13 @@ agx_emit_image_load(agx_builder *b, agx_index dst, nir_intrinsic_instr *intr)
}
agx_index coords = agx_emit_collect(b, coord_comps, coord);
agx_index tmp = agx_vec_temp(b->shader, dst.size, 4);
agx_index tmp = agx_vec_temp(b->shader, dst.size, sparse ? 8 : 4);
agx_instr *I = agx_image_load_to(
b, tmp, coords, lod, bindless, texture, agx_immediate(0), agx_null(),
agx_tex_dim(dim, is_array), lod_mode, 0, false, nir_is_coherent(intr));
I->mask = agx_expand_tex_to(b, &intr->def, tmp, true);
agx_instr *I = agx_image_load_to(b, tmp, coords, lod, bindless, texture,
agx_immediate(0), agx_null(),
agx_tex_dim(dim, is_array), lod_mode, 0,
false, sparse, nir_is_coherent(intr));
I->mask = agx_expand_tex_to(b, &intr->def, tmp, !sparse);
b->shader->out->uses_txf = true;
return NULL;
@@ -1432,6 +1438,7 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)
case nir_intrinsic_image_load:
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_bindless_image_sparse_load:
return agx_emit_image_load(b, dst, instr);
case nir_intrinsic_image_store:
@@ -2294,12 +2301,12 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
else if (!agx_is_null(compare))
compare_offset = compare;
agx_index tmp = agx_vec_temp(b->shader, dst.size, 4);
agx_index tmp = agx_vec_temp(b->shader, dst.size, instr->is_sparse ? 8 : 4);
agx_instr *I = agx_texture_sample_to(
b, tmp, coords, lod, bindless, texture, sampler, compare_offset,
agx_tex_dim(instr->sampler_dim, instr->is_array), lod_mode, 0,
!agx_is_null(packed_offset), !agx_is_null(compare),
instr->op == nir_texop_lod, agx_gather_for_nir(instr));
instr->op == nir_texop_lod, agx_gather_for_nir(instr), instr->is_sparse);
if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms) {
I->op = AGX_OPCODE_TEXTURE_LOAD;
@@ -2309,8 +2316,10 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
/* Destination masking doesn't seem to work properly for gathers (because
* it's mostly pointless), but it does show up in the lowering of
* textureGatherOffsets. Don't try to mask the destination for gathers.
*
* TODO: Check if it works with sparse.
*/
bool masked = (instr->op != nir_texop_tg4);
bool masked = (instr->op != nir_texop_tg4) && !instr->is_sparse;
I->mask = agx_expand_tex_to(b, &instr->def, tmp, masked);
}

View File

@@ -400,6 +400,7 @@ typedef struct {
bool offset : 1;
bool shadow : 1;
bool query_lod : 1;
bool sparse : 1;
enum agx_gather gather : 3;
/* TODO: Handle tilebuffer ops more efficient */

View File

@@ -438,6 +438,7 @@ legalize_image_lod(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
switch (intr->intrinsic) {
CASE(image_load, 3)
CASE(image_sparse_load, 3)
CASE(image_store, 4)
CASE(image_size, 1)
default:
@@ -527,6 +528,9 @@ lower_buffer_image(nir_builder *b, nir_intrinsic_instr *intr)
nir_def *coord_vector = intr->src[1].ssa;
nir_def *coord = nir_channel(b, coord_vector, 0);
assert(intr->intrinsic != nir_intrinsic_bindless_image_sparse_load &&
"sparse buffer textures not expected");
/* If we're not bindless, assume we don't need an offset (GL driver) */
if (intr->intrinsic == nir_intrinsic_bindless_image_load) {
nir_def *desc = nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
@@ -612,12 +616,14 @@ lower_images(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
case nir_intrinsic_image_load:
case nir_intrinsic_image_store:
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_bindless_image_sparse_load:
case nir_intrinsic_bindless_image_store: {
/* Legalize MSAA index */
nir_src_rewrite(&intr->src[2], nir_u2u16(b, intr->src[2].ssa));
if (intr->intrinsic == nir_intrinsic_image_load ||
intr->intrinsic == nir_intrinsic_bindless_image_load) {
intr->intrinsic == nir_intrinsic_bindless_image_load ||
intr->intrinsic == nir_intrinsic_bindless_image_sparse_load) {
lower_image_load_robustness(b, intr);
}
@@ -648,6 +654,19 @@ lower_images(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
nir_def_rewrite_uses(&intr->def, image_texel_address(b, intr, false));
return true;
case nir_intrinsic_is_sparse_texels_resident:
/* Residency information is in bit 0, so we need to mask. Unclear what's
* in the upper bits. For now, let's match the blob.
*/
nir_def_replace(&intr->def,
nir_ieq_imm(b, nir_iand_imm(b, intr->src[0].ssa, 1), 0));
return true;
case nir_intrinsic_sparse_residency_code_and:
nir_def_replace(&intr->def,
nir_iand(b, intr->src[0].ssa, intr->src[1].ssa));
return true;
case nir_intrinsic_image_size:
case nir_intrinsic_image_texel_address:
unreachable("should've been lowered");
@@ -669,6 +688,7 @@ lower_robustness(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
switch (intr->intrinsic) {
case nir_intrinsic_image_deref_load:
case nir_intrinsic_image_deref_sparse_load:
case nir_intrinsic_image_deref_store:
break;
default:

View File

@@ -300,6 +300,7 @@ avoid_instr(const nir_instr *instr, const void *data)
switch (intr->intrinsic) {
case nir_intrinsic_bindless_image_load:
case nir_intrinsic_bindless_image_sparse_load:
case nir_intrinsic_bindless_image_store:
case nir_intrinsic_bindless_image_store_block_agx:
if (intr->src[0].ssa == def)

View File

@@ -102,6 +102,7 @@ GATHER = enum("gather", {
OFFSET = immediate("offset", "bool")
SHADOW = immediate("shadow", "bool")
SPARSE = immediate("sparse", "bool")
QUERY_LOD = immediate("query_lod", "bool")
COHERENT = immediate("coherent", "bool")
SCOREBOARD = immediate("scoreboard")
@@ -314,11 +315,11 @@ op("fcmp", _, srcs = 2, imms = [FCOND, INVERT_COND])
op("texture_sample",
encoding = (0x31, 0x7F, 8, 10), # XXX WRONG SIZE
srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET, SHADOW,
QUERY_LOD, GATHER])
QUERY_LOD, GATHER, SPARSE])
for memory, can_reorder in [("texture", True), ("image", False)]:
coherency = [COHERENT] if not can_reorder else []
op(f"{memory}_load", encoding = (0x71, 0x7F, 8, 10), # XXX WRONG SIZE
srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET] + coherency,
srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET, SPARSE] + coherency,
can_reorder = can_reorder,
schedule_class = "none" if can_reorder else "load")

View File

@@ -893,10 +893,11 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
}
uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) |
((I->dim >> 3) << 7) | ((R >> 6) << 8) |
((C >> 6) << 10) | ((D >> 6) << 12) | ((T >> 6) << 14) |
((O & BITFIELD_MASK(6)) << 16) | (I->gather << 23) |
(I->offset << 27) | ((S >> 6) << 28) | ((O >> 6) << 30);
(I->sparse ? (1 << 6) : 0) | ((I->dim >> 3) << 7) |
((R >> 6) << 8) | ((C >> 6) << 10) | ((D >> 6) << 12) |
((T >> 6) << 14) | ((O & BITFIELD_MASK(6)) << 16) |
(I->gather << 23) | (I->offset << 27) |
((S >> 6) << 28) | ((O >> 6) << 30);
bool L = (extend != 0);

View File

@@ -1614,7 +1614,7 @@ agx_ra(agx_context *ctx)
assert(ins->src[0].type == AGX_INDEX_REGISTER ||
ins->src[0].type == AGX_INDEX_UNIFORM);
struct agx_copy copies[4];
struct agx_copy copies[8];
assert(ins->nr_dests <= ARRAY_SIZE(copies));
unsigned n = 0;

View File

@@ -166,8 +166,11 @@ agx_write_registers(const agx_instr *I, unsigned d)
case AGX_OPCODE_IMAGE_LOAD:
case AGX_OPCODE_TEXTURE_LOAD:
case AGX_OPCODE_TEXTURE_SAMPLE:
/* Even when masked out, these clobber 4 registers */
return 4 * size;
/* Even when masked out, these clobber 4 registers.
*
* TODO: Figure out the sparse interaction.
*/
return (I->sparse ? 8 : 4) * size;
case AGX_OPCODE_DEVICE_LOAD:
case AGX_OPCODE_LOCAL_LOAD: