agx: implement sparse residency queries

hw matches NIR well - just an extra destination on the texture instruction. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33682>
2025-01-22 15:12:21 -05:00
parent 93bccc0914
commit 739807944d
8 changed files with 61 additions and 25 deletions
@@ -1066,11 +1066,12 @@ agx_expand_tex_to(agx_builder *b, nir_def *def, agx_index src, bool masked)
   if (!masked)
      mask = (nir_component_mask_t)BITFIELD_MASK(nr_channels);

-   agx_index packed_channels[4] = {agx_null()};
-   agx_index unpacked_channels[4] = {agx_null()};
+   agx_index packed_channels[8] = {agx_null()};
+   agx_index unpacked_channels[8] = {agx_null()};

   /* Hardware writes the masked components contiguously, expand out for NIR */
-   agx_emit_split(b, packed_channels, src, 4 /* XXX: why not nr_channels */);
+   agx_emit_split(b, packed_channels, src,
+                  ALIGN_POT(nr_channels, 4) /* XXX: why not nr_channels */);

   for (unsigned i = 0; i < nr_channels; ++i) {
      unpacked_channels[i] =
@@ -1089,15 +1090,19 @@ agx_emit_image_load(agx_builder *b, agx_index dst, nir_intrinsic_instr *intr)
   agx_index ms_index = agx_src_index(&intr->src[2]);
   agx_index lod = agx_src_index(&intr->src[3]);
   enum agx_lod_mode lod_mode = AGX_LOD_MODE_LOD_MIN;
+   bool sparse = intr->intrinsic == nir_intrinsic_bindless_image_sparse_load;

   agx_index bindless = agx_immediate(0), texture;
-   if (intr->intrinsic == nir_intrinsic_bindless_image_load)
+   if (intr->intrinsic == nir_intrinsic_bindless_image_load ||
+       intr->intrinsic == nir_intrinsic_bindless_image_sparse_load) {
+
      texture = agx_translate_bindless_handle(b, &intr->src[0], &bindless);
-   else if (nir_src_is_const(intr->src[0]) &&
-            nir_src_as_uint(intr->src[0]) < 0x100)
+   } else if (nir_src_is_const(intr->src[0]) &&
+              nir_src_as_uint(intr->src[0]) < 0x100) {
      texture = agx_immediate(nir_src_as_uint(intr->src[0]));
-   else
+   } else {
      texture = agx_src_index(&intr->src[0]);
+   }

   assert(nir_src_num_components(intr->src[1]) == 4);
   agx_index coord[4] = {
@@ -1146,12 +1151,13 @@ agx_emit_image_load(agx_builder *b, agx_index dst, nir_intrinsic_instr *intr)
   }

   agx_index coords = agx_emit_collect(b, coord_comps, coord);
-   agx_index tmp = agx_vec_temp(b->shader, dst.size, 4);
+   agx_index tmp = agx_vec_temp(b->shader, dst.size, sparse ? 8 : 4);

-   agx_instr *I = agx_image_load_to(
-      b, tmp, coords, lod, bindless, texture, agx_immediate(0), agx_null(),
-      agx_tex_dim(dim, is_array), lod_mode, 0, false, nir_is_coherent(intr));
-   I->mask = agx_expand_tex_to(b, &intr->def, tmp, true);
+   agx_instr *I = agx_image_load_to(b, tmp, coords, lod, bindless, texture,
+                                    agx_immediate(0), agx_null(),
+                                    agx_tex_dim(dim, is_array), lod_mode, 0,
+                                    false, sparse, nir_is_coherent(intr));
+   I->mask = agx_expand_tex_to(b, &intr->def, tmp, !sparse);

   b->shader->out->uses_txf = true;
   return NULL;
@@ -1432,6 +1438,7 @@ agx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr)

   case nir_intrinsic_image_load:
   case nir_intrinsic_bindless_image_load:
+   case nir_intrinsic_bindless_image_sparse_load:
      return agx_emit_image_load(b, dst, instr);

   case nir_intrinsic_image_store:
@@ -2294,12 +2301,12 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
   else if (!agx_is_null(compare))
      compare_offset = compare;

-   agx_index tmp = agx_vec_temp(b->shader, dst.size, 4);
+   agx_index tmp = agx_vec_temp(b->shader, dst.size, instr->is_sparse ? 8 : 4);
   agx_instr *I = agx_texture_sample_to(
      b, tmp, coords, lod, bindless, texture, sampler, compare_offset,
      agx_tex_dim(instr->sampler_dim, instr->is_array), lod_mode, 0,
      !agx_is_null(packed_offset), !agx_is_null(compare),
-      instr->op == nir_texop_lod, agx_gather_for_nir(instr));
+      instr->op == nir_texop_lod, agx_gather_for_nir(instr), instr->is_sparse);

   if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms) {
      I->op = AGX_OPCODE_TEXTURE_LOAD;
@@ -2309,8 +2316,10 @@ agx_emit_tex(agx_builder *b, nir_tex_instr *instr)
   /* Destination masking doesn't seem to work properly for gathers (because
    * it's mostly pointless), but it does show up in the lowering of
    * textureGatherOffsets. Don't try to mask the destination for gathers.
+    *
+    * TODO: Check if it works with sparse.
    */
-   bool masked = (instr->op != nir_texop_tg4);
+   bool masked = (instr->op != nir_texop_tg4) && !instr->is_sparse;
   I->mask = agx_expand_tex_to(b, &instr->def, tmp, masked);
 }

@@ -400,6 +400,7 @@ typedef struct {
   bool offset            : 1;
   bool shadow            : 1;
   bool query_lod         : 1;
+   bool sparse            : 1;
   enum agx_gather gather : 3;

   /* TODO: Handle tilebuffer ops more efficient */
@@ -438,6 +438,7 @@ legalize_image_lod(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)

   switch (intr->intrinsic) {
      CASE(image_load, 3)
+      CASE(image_sparse_load, 3)
      CASE(image_store, 4)
      CASE(image_size, 1)
   default:
@@ -527,6 +528,9 @@ lower_buffer_image(nir_builder *b, nir_intrinsic_instr *intr)
   nir_def *coord_vector = intr->src[1].ssa;
   nir_def *coord = nir_channel(b, coord_vector, 0);

+   assert(intr->intrinsic != nir_intrinsic_bindless_image_sparse_load &&
+          "sparse buffer textures not expected");
+
   /* If we're not bindless, assume we don't need an offset (GL driver) */
   if (intr->intrinsic == nir_intrinsic_bindless_image_load) {
      nir_def *desc = nir_load_from_texture_handle_agx(b, intr->src[0].ssa);
@@ -612,12 +616,14 @@ lower_images(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
   case nir_intrinsic_image_load:
   case nir_intrinsic_image_store:
   case nir_intrinsic_bindless_image_load:
+   case nir_intrinsic_bindless_image_sparse_load:
   case nir_intrinsic_bindless_image_store: {
      /* Legalize MSAA index */
      nir_src_rewrite(&intr->src[2], nir_u2u16(b, intr->src[2].ssa));

      if (intr->intrinsic == nir_intrinsic_image_load ||
-          intr->intrinsic == nir_intrinsic_bindless_image_load) {
+          intr->intrinsic == nir_intrinsic_bindless_image_load ||
+          intr->intrinsic == nir_intrinsic_bindless_image_sparse_load) {
         lower_image_load_robustness(b, intr);
      }

@@ -648,6 +654,19 @@ lower_images(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
      nir_def_rewrite_uses(&intr->def, image_texel_address(b, intr, false));
      return true;

+   case nir_intrinsic_is_sparse_texels_resident:
+      /* Residency information is in bit 0, so we need to mask. Unclear what's
+       * in the upper bits. For now, let's match the blob.
+       */
+      nir_def_replace(&intr->def,
+                      nir_ieq_imm(b, nir_iand_imm(b, intr->src[0].ssa, 1), 0));
+      return true;
+
+   case nir_intrinsic_sparse_residency_code_and:
+      nir_def_replace(&intr->def,
+                      nir_iand(b, intr->src[0].ssa, intr->src[1].ssa));
+      return true;
+
   case nir_intrinsic_image_size:
   case nir_intrinsic_image_texel_address:
      unreachable("should've been lowered");
@@ -669,6 +688,7 @@ lower_robustness(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)

   switch (intr->intrinsic) {
   case nir_intrinsic_image_deref_load:
+   case nir_intrinsic_image_deref_sparse_load:
   case nir_intrinsic_image_deref_store:
      break;
   default:
@@ -300,6 +300,7 @@ avoid_instr(const nir_instr *instr, const void *data)

            switch (intr->intrinsic) {
            case nir_intrinsic_bindless_image_load:
+            case nir_intrinsic_bindless_image_sparse_load:
            case nir_intrinsic_bindless_image_store:
            case nir_intrinsic_bindless_image_store_block_agx:
               if (intr->src[0].ssa == def)
@@ -102,6 +102,7 @@ GATHER = enum("gather", {

 OFFSET = immediate("offset", "bool")
 SHADOW = immediate("shadow", "bool")
+SPARSE = immediate("sparse", "bool")
 QUERY_LOD = immediate("query_lod", "bool")
 COHERENT = immediate("coherent", "bool")
 SCOREBOARD = immediate("scoreboard")
@@ -314,11 +315,11 @@ op("fcmp", _, srcs = 2, imms = [FCOND, INVERT_COND])
 op("texture_sample",
      encoding = (0x31, 0x7F, 8, 10), # XXX WRONG SIZE
      srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET, SHADOW,
-                        QUERY_LOD, GATHER])
+                        QUERY_LOD, GATHER, SPARSE])
 for memory, can_reorder in [("texture", True), ("image", False)]:
    coherency = [COHERENT] if not can_reorder else []
    op(f"{memory}_load", encoding = (0x71, 0x7F, 8, 10), # XXX WRONG SIZE
-       srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET] + coherency,
+       srcs = 6, imms = [DIM, LOD_MODE, MASK, SCOREBOARD, OFFSET, SPARSE] + coherency,
       can_reorder = can_reorder,
       schedule_class = "none" if can_reorder else "load")

@@ -893,10 +893,11 @@ agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups,
      }

      uint32_t extend = ((U & BITFIELD_MASK(5)) << 0) | (kill << 5) |
-                        ((I->dim >> 3) << 7) | ((R >> 6) << 8) |
-                        ((C >> 6) << 10) | ((D >> 6) << 12) | ((T >> 6) << 14) |
-                        ((O & BITFIELD_MASK(6)) << 16) | (I->gather << 23) |
-                        (I->offset << 27) | ((S >> 6) << 28) | ((O >> 6) << 30);
+                        (I->sparse ? (1 << 6) : 0) | ((I->dim >> 3) << 7) |
+                        ((R >> 6) << 8) | ((C >> 6) << 10) | ((D >> 6) << 12) |
+                        ((T >> 6) << 14) | ((O & BITFIELD_MASK(6)) << 16) |
+                        (I->gather << 23) | (I->offset << 27) |
+                        ((S >> 6) << 28) | ((O >> 6) << 30);

      bool L = (extend != 0);

@@ -1614,7 +1614,7 @@ agx_ra(agx_context *ctx)
         assert(ins->src[0].type == AGX_INDEX_REGISTER ||
                ins->src[0].type == AGX_INDEX_UNIFORM);

-         struct agx_copy copies[4];
+         struct agx_copy copies[8];
         assert(ins->nr_dests <= ARRAY_SIZE(copies));

         unsigned n = 0;
@@ -166,8 +166,11 @@ agx_write_registers(const agx_instr *I, unsigned d)
   case AGX_OPCODE_IMAGE_LOAD:
   case AGX_OPCODE_TEXTURE_LOAD:
   case AGX_OPCODE_TEXTURE_SAMPLE:
-      /* Even when masked out, these clobber 4 registers */
-      return 4 * size;
+      /* Even when masked out, these clobber 4 registers.
+       *
+       * TODO: Figure out the sparse interaction.
+       */
+      return (I->sparse ? 8 : 4) * size;

   case AGX_OPCODE_DEVICE_LOAD:
   case AGX_OPCODE_LOCAL_LOAD: