pan/va: Lower nir_texop_txd to TEX_GRADIENT with derivs followed by TEX_SINGLE

On v9+, use TEX_GRADIENT to convert user-provided gradient into a gradient descriptor consumed by the hardware, and then supply that descriptor to TEX_SINGLE. Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29521>
2024-10-23 03:44:17 +00:00
parent 9199c25e5e
commit 52226d0e3f
3 changed files with 64 additions and 18 deletions
--- a/src/panfrost/compiler/bifrost_compile.c
+++ b/src/panfrost/compiler/bifrost_compile.c
@@ -3711,7 +3711,8 @@ enum valhall_tex_sreg {
   VALHALL_TEX_SREG_SHADOW = 5,
   VALHALL_TEX_SREG_OFFSETMS = 6,
   VALHALL_TEX_SREG_LOD = 7,
-   VALHALL_TEX_SREG_GRDESC = 8,
+   VALHALL_TEX_SREG_GRDESC0 = 8,
+   VALHALL_TEX_SREG_GRDESC1 = 9,
   VALHALL_TEX_SREG_COUNT,
 };

@@ -3723,12 +3724,15 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)

   bool has_lod_mode = (instr->op == nir_texop_tex) ||
                       (instr->op == nir_texop_txl) ||
+                       (instr->op == nir_texop_txd) ||
                       (instr->op == nir_texop_txb);

   /* 32-bit indices to be allocated as consecutive staging registers */
   bi_index sregs[VALHALL_TEX_SREG_COUNT] = {};
   bi_index sampler = bi_imm_u32(instr->sampler_index);
   bi_index texture = bi_imm_u32(instr->texture_index);
+   bi_index ddx = bi_null();
+   bi_index ddy = bi_null();

   for (unsigned i = 0; i < instr->num_srcs; ++i) {
      bi_index index = bi_src_index(&instr->src[i].src);
@@ -3774,6 +3778,14 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
         }
         break;

+      case nir_tex_src_ddx:
+	 ddx = index;
+	 break;
+
+      case nir_tex_src_ddy:
+	 ddy = index;
+	 break;
+
      case nir_tex_src_bias:
         /* Upper 16-bits interpreted as a clamp, leave zero */
         assert(sz == 16 || sz == 32);
@@ -3819,19 +3831,6 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)
      explicit_offset = true;
   }

-   /* Allocate staging registers contiguously by compacting the array. */
-   unsigned sr_count = 0;
-
-   for (unsigned i = 0; i < ARRAY_SIZE(sregs); ++i) {
-      if (!bi_is_null(sregs[i]))
-         sregs[sr_count++] = sregs[i];
-   }
-
-   bi_index idx = sr_count ? bi_temp(b->shader) : bi_null();
-
-   if (sr_count)
-      bi_make_vec_to(b, idx, sregs, NULL, sr_count, 32);
-
   bool narrow_indices = va_is_valid_const_narrow_index(texture) &&
                         va_is_valid_const_narrow_index(sampler);

@@ -3860,6 +3859,50 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)

   enum bi_dimension dim = valhall_tex_dimension(instr->sampler_dim);

+   if (!bi_is_null(ddx) || !bi_is_null(ddy)) {
+      unsigned coords_comp_count =
+         instr->coord_components -
+         (instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
+      assert(!bi_is_null(ddx) && !bi_is_null(ddy));
+
+      lod_mode = BI_VA_LOD_MODE_GRDESC;
+
+      bi_index derivs[6] = {
+         bi_extract(b, ddx, 0),
+         bi_extract(b, ddy, 0),
+         coords_comp_count > 1 ? bi_extract(b, ddx, 1) : bi_null(),
+         coords_comp_count > 1 ? bi_extract(b, ddy, 1) : bi_null(),
+         coords_comp_count > 2 ? bi_extract(b, ddx, 2) : bi_null(),
+         coords_comp_count > 2 ? bi_extract(b, ddy, 2) : bi_null(),
+      };
+      bi_index derivs_packed = bi_temp(b->shader);
+      bi_make_vec_to(b, derivs_packed, derivs, NULL, coords_comp_count * 2, 32);
+      bi_index grdesc = bi_temp(b->shader);
+      bi_instr *I = bi_tex_gradient_to(b, grdesc, derivs_packed, src0, src1, dim,
+                                       !narrow_indices, 3, coords_comp_count * 2);
+      I->derivative_enable = true;
+      I->force_delta_enable = false;
+      I->lod_clamp_disable = true;
+      I->lod_bias_disable = true;
+      I->register_format = BI_REGISTER_FORMAT_U32;
+
+      bi_emit_cached_split_i32(b, grdesc, 2);
+      sregs[VALHALL_TEX_SREG_GRDESC0] = bi_extract(b, grdesc, 0);
+      sregs[VALHALL_TEX_SREG_GRDESC1] = bi_extract(b, grdesc, 1);
+   }
+
+   /* Allocate staging registers contiguously by compacting the array. */
+   unsigned sr_count = 0;
+   for (unsigned i = 0; i < ARRAY_SIZE(sregs); ++i) {
+      if (!bi_is_null(sregs[i]))
+         sregs[sr_count++] = sregs[i];
+   }
+
+   bi_index idx = sr_count ? bi_temp(b->shader) : bi_null();
+
+   if (sr_count)
+      bi_make_vec_to(b, idx, sregs, NULL, sr_count, 32);
+
   if (instr->op == nir_texop_lod) {
      assert(instr->def.num_components == 2 && instr->def.bit_size == 32);

@@ -3897,8 +3940,9 @@ bi_emit_tex_valhall(bi_builder *b, nir_tex_instr *instr)

   switch (instr->op) {
   case nir_texop_tex:
-   case nir_texop_txl:
   case nir_texop_txb:
+   case nir_texop_txl:
+   case nir_texop_txd:
      bi_tex_single_to(b, dest, idx, src0, src1, instr->is_array, dim, regfmt,
                       instr->is_shadow, explicit_offset, lod_mode,
                       !narrow_indices, mask, sr_count);
@@ -5192,7 +5236,8 @@ bifrost_preprocess_nir(nir_shader *nir, unsigned gpu_id)
                 .lower_txs_lod = true,
                 .lower_txp = ~0,
                 .lower_tg4_broadcom_swizzle = true,
-                 .lower_txd = true,
+                 .lower_txd_cube_map = true,
+                 .lower_txd = pan_arch(gpu_id) < 9,
                 .lower_invalid_implicit_lod = true,
                 .lower_index_to_offset = true,
              });
--- a/src/panfrost/compiler/compiler.h
+++ b/src/panfrost/compiler/compiler.h
@@ -371,7 +371,7 @@ bi_is_value_equiv(bi_index left, bi_index right)

 #define BI_MAX_VEC   8
 #define BI_MAX_DESTS 4
-#define BI_MAX_SRCS  6
+#define BI_MAX_SRCS  8

 typedef struct {
   /* Must be first */
--- a/src/panfrost/vulkan/panvk_vX_shader.c
+++ b/src/panfrost/vulkan/panvk_vX_shader.c
@@ -338,7 +338,8 @@ panvk_preprocess_nir(UNUSED struct vk_physical_device *vk_pdev, nir_shader *nir)
      .lower_txs_lod = true,
      .lower_txp = ~0,
      .lower_tg4_broadcom_swizzle = true,
-      .lower_txd = true,
+      .lower_txd_cube_map = true,
+      .lower_txd = PAN_ARCH < 9,
      .lower_invalid_implicit_lod = true,
   };
   NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);