nir/lower_blend,agx,panfrost: Use lowered I/O

This is one step towards lowering I/O during shader preprocess rather than at variant create time, which helps mitigate shader variant jank. It's also a lot simpler. Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> [v1] Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20836>
2022-11-28 22:28:13 -05:00
parent acfda67b4f
commit 50b82ca818
3 changed files with 62 additions and 152 deletions
@@ -431,15 +431,15 @@ nir_blend(
 }

 static int
-color_index_for_var(const nir_variable *var)
+color_index_for_location(unsigned location)
 {
-   assert(var->data.location != FRAG_RESULT_COLOR &&
+   assert(location != FRAG_RESULT_COLOR &&
          "gl_FragColor must be lowered before nir_lower_blend");

-   if (var->data.location < FRAG_RESULT_DATA0)
+   if (location < FRAG_RESULT_DATA0)
      return -1;
   else
-      return var->data.location - FRAG_RESULT_DATA0;
+      return location - FRAG_RESULT_DATA0;
 }

 /*
@@ -462,31 +462,44 @@ nir_blend_replace_rt(const nir_lower_blend_rt *rt)
 }

 static bool
-nir_lower_blend_store(nir_builder *b, nir_intrinsic_instr *store,
-                      const nir_lower_blend_options *options)
+nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
 {
-   assert(store->intrinsic == nir_intrinsic_store_deref);
+   const nir_lower_blend_options *options = data;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;

-   nir_variable *var = nir_intrinsic_get_var(store, 0);
-   int rt = color_index_for_var(var);
+   nir_intrinsic_instr *store = nir_instr_as_intrinsic(instr);
+   if (store->intrinsic != nir_intrinsic_store_output)
+      return false;
+
+   nir_io_semantics sem = nir_intrinsic_io_semantics(store);
+   int rt = color_index_for_location(sem.location);

   /* No blend lowering requested on this RT */
   if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
      return false;

-   b->cursor = nir_before_instr(&store->instr);
+   b->cursor = nir_before_instr(instr);

   /* Grab the input color.  We always want 4 channels during blend.  Dead
    * code will clean up any channels we don't need.
    */
-   assert(store->src[1].is_ssa);
-   nir_ssa_def *src = nir_pad_vector(b, store->src[1].ssa, 4);
+   assert(store->src[0].is_ssa);
+   nir_ssa_def *src = nir_pad_vector(b, store->src[0].ssa, 4);

   /* Grab the previous fragment color */
-   var->data.fb_fetch_output = true;
-   b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location);
+   b->shader->info.outputs_read |= BITFIELD64_BIT(sem.location);
   b->shader->info.fs.uses_fbfetch_output = true;
-   nir_ssa_def *dst = nir_pad_vector(b, nir_load_var(b, var), 4);
+   sem.fb_fetch_output = true;
+
+   assert(nir_src_as_uint(store->src[1]) == 0 && "store_output invariant");
+
+   nir_ssa_def *dst =
+      nir_load_output(b, 4,
+                      nir_src_bit_size(store->src[0]),
+                      nir_imm_int(b, 0),
+                     .dest_type = nir_intrinsic_src_type(store),
+                     .io_semantics = sem);

   /* Blend the two colors per the passed options. We only call nir_blend if
    * blending is enabled with a blend mode other than replace (independent of
@@ -508,7 +521,8 @@ nir_lower_blend_store(nir_builder *b, nir_intrinsic_instr *store,
   if (options->rt[rt].colormask != BITFIELD_MASK(4))
      blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);

-   const unsigned num_components = glsl_get_vector_elements(var->type);
+   const unsigned num_components =
+      util_format_get_nr_components(options->format[rt]);

   /* Shave off any components we don't want to store */
   blended = nir_trim_vector(b, blended, num_components);
@@ -519,118 +533,20 @@ nir_lower_blend_store(nir_builder *b, nir_intrinsic_instr *store,
                                       nir_component_mask(num_components));

   /* Write out the final color instead of the input */
-   nir_instr_rewrite_src_ssa(&store->instr, &store->src[1], blended);
+   nir_instr_rewrite_src_ssa(instr, &store->src[0], blended);
   return true;
 }

-static bool
-nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
-{
-   const nir_lower_blend_options *options = data;
-
-   switch (instr->type) {
-   case nir_instr_type_deref: {
-      /* Fix up output deref types, as needed */
-      nir_deref_instr *deref = nir_instr_as_deref(instr);
-      if (!nir_deref_mode_is(deref, nir_var_shader_out))
-         return false;
-
-      /* Indirects must be already lowered and output variables split */
-      assert(deref->deref_type == nir_deref_type_var);
-
-      if (deref->type == deref->var->type)
-         return false;
-
-      deref->type = deref->var->type;
-      return true;
-   }
-
-   case nir_instr_type_intrinsic: {
-      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
-      if (intrin->intrinsic != nir_intrinsic_load_deref &&
-          intrin->intrinsic != nir_intrinsic_store_deref)
-         return false;
-
-      nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
-      if (!nir_deref_mode_is(deref, nir_var_shader_out))
-         return false;
-
-      assert(glsl_type_is_vector_or_scalar(deref->type));
-
-      if (intrin->intrinsic == nir_intrinsic_load_deref) {
-         /* We need to fix up framebuffer if num_components changed */
-         const unsigned num_components = glsl_get_vector_elements(deref->type);
-         if (intrin->num_components == num_components)
-            return false;
-
-         b->cursor = nir_after_instr(&intrin->instr);
-
-         assert(intrin->dest.is_ssa);
-         nir_ssa_def *val = nir_resize_vector(b, &intrin->dest.ssa,
-                                              num_components);
-         intrin->num_components = num_components,
-         nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, val,
-                                        val->parent_instr);
-         return true;
-      } else {
-         return nir_lower_blend_store(b, intrin, options);
-      }
-   }
-
-   default:
-      return false;
-   }
-}
-
 /** Lower blending to framebuffer fetch and some math
 *
- * This pass requires that indirects are lowered and output variables split
- * so that we have a single output variable for each RT.  We could go to the
- * effort of handling arrays (possibly of arrays) but, given that we need
- * indirects lowered anyway (we need constant indices to look up blend
- * functions and formats), we may as well require variables to be split.
- * This can be done by calling nir_lower_io_arrays_to_elements_no_indirect().
+ * This pass requires that shader I/O is lowered to explicit load/store
+ * instructions using nir_lower_io.
 */
 void
 nir_lower_blend(nir_shader *shader, const nir_lower_blend_options *options)
 {
   assert(shader->info.stage == MESA_SHADER_FRAGMENT);

-   /* Re-type any blended output variables to have the same number of
-    * components as the image format.  The GL 4.6 Spec says:
-    *
-    *    "If a fragment shader writes to none of gl_FragColor, gl_FragData,
-    *    nor any user-defined output variables, the values of the fragment
-    *    colors following shader execution are undefined, and may differ for
-    *    each fragment color.  If some, but not all elements of gl_FragData or
-    *    of theser-defined output variables are written, the values of
-    *    fragment colors corresponding to unwritten elements orariables are
-    *    similarly undefined."
-    *
-    * Note the phrase "following shader execution".  Those color values are
-    * then supposed to go into blending which may, depending on the blend
-    * mode, apply constraints that result in well-defined rendering.  It's
-    * fine if we have to pad out a value with undef but we then need to blend
-    * that garbage value to ensure correct results.
-    *
-    * This may also, depending on output format, be a small optimization
-    * allowing NIR to dead-code unused calculations.
-    */
-   nir_foreach_shader_out_variable(var, shader) {
-      int rt = color_index_for_var(var);
-
-      /* No blend lowering requested on this RT */
-      if (rt < 0 || options->format[rt] == PIPE_FORMAT_NONE)
-         continue;
-
-      const unsigned num_format_components =
-         util_format_get_nr_components(options->format[rt]);
-
-      /* Indirects must be already lowered and output variables split */
-      assert(glsl_type_is_vector_or_scalar(var->type));
-      var->type = glsl_replace_vector_type(var->type, num_format_components);
-   }
-
   nir_shader_instructions_pass(shader, nir_lower_blend_instr,
                                nir_metadata_block_index |
                                nir_metadata_dominance,
@@ -1373,21 +1373,6 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
   if (nir->info.stage == MESA_SHADER_FRAGMENT) {
      struct asahi_fs_shader_key *key = &key_->fs;

-      nir_lower_blend_options opts = {
-         .scalar_blend_const = true,
-         .logicop_enable = key->blend.logicop_enable,
-         .logicop_func = key->blend.logicop_func,
-      };
-
-      static_assert(ARRAY_SIZE(opts.format) == PIPE_MAX_COLOR_BUFS,
-                    "max RTs out of sync");
-
-      for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i)
-         opts.format[i] = key->rt_formats[i];
-
-      memcpy(opts.rt, key->blend.rt, sizeof(opts.rt));
-      NIR_PASS_V(nir, nir_lower_blend, &opts);
-
      if (key->clip_plane_enable) {
         NIR_PASS_V(nir, nir_lower_clip_fs, key->clip_plane_enable, false);
      }
@@ -1405,6 +1390,21 @@ agx_compile_variant(struct agx_device *dev, struct agx_uncompiled_shader *so,
      struct agx_tilebuffer_layout tib =
         agx_build_tilebuffer_layout(key->rt_formats, key->nr_cbufs, 1);

+      nir_lower_blend_options opts = {
+         .scalar_blend_const = true,
+         .logicop_enable = key->blend.logicop_enable,
+         .logicop_func = key->blend.logicop_func,
+      };
+
+      static_assert(ARRAY_SIZE(opts.format) == PIPE_MAX_COLOR_BUFS,
+                    "max RTs out of sync");
+
+      for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; ++i)
+         opts.format[i] = key->rt_formats[i];
+
+      memcpy(opts.rt, key->blend.rt, sizeof(opts.rt));
+      NIR_PASS_V(nir, nir_lower_blend, &opts);
+
      NIR_PASS_V(nir, agx_nir_lower_tilebuffer, &tib);

      if (key->sprite_coord_enable) {
@@ -627,9 +627,6 @@ GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
   if (PAN_ARCH >= 6 && nir_alu_type_get_type_size(nir_type) == 8)
      nir_type = nir_alu_type_get_base_type(nir_type) | 16;

-   enum glsl_base_type glsl_type =
-      nir_get_glsl_base_type_for_nir_type(nir_type);
-
   nir_lower_blend_options options = {
      .logicop_enable = state->logicop_enable,
      .logicop_func = state->logicop_func,
@@ -674,23 +671,16 @@ GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
                     nir_alu_type_get_type_size(src_types[i]);
   }

-   nir_variable *c_src = nir_variable_create(
-      b.shader, nir_var_shader_in,
-      glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[0]), 4),
-      "gl_Color");
-   c_src->data.location = VARYING_SLOT_COL0;
-   nir_variable *c_src1 = nir_variable_create(
-      b.shader, nir_var_shader_in,
-      glsl_vector_type(nir_get_glsl_base_type_for_nir_type(src_types[1]), 4),
-      "gl_Color1");
-   c_src1->data.location = VARYING_SLOT_VAR0;
-   c_src1->data.driver_location = 1;
-   nir_variable *c_out =
-      nir_variable_create(b.shader, nir_var_shader_out,
-                          glsl_vector_type(glsl_type, 4), "gl_FragColor");
-   c_out->data.location = FRAG_RESULT_DATA0;
+   nir_ssa_def *pixel = nir_load_barycentric_pixel(&b, 32, .interp_mode = 1);
+   nir_ssa_def *zero = nir_imm_int(&b, 0);

-   nir_ssa_def *s_src[] = {nir_load_var(&b, c_src), nir_load_var(&b, c_src1)};
+   nir_ssa_def *s_src[2];
+   for (unsigned i = 0; i < 2; ++i) {
+      s_src[i] = nir_load_interpolated_input(
+         &b, 4, nir_alu_type_get_type_size(src_types[i]), pixel, zero,
+         .io_semantics.location = i ? VARYING_SLOT_VAR0 : VARYING_SLOT_COL0,
+         .io_semantics.num_slots = 1, .base = i, .dest_type = src_types[i]);
+   }

   /* On Midgard, the blend shader is responsible for format conversion.
    * As the OpenGL spec requires integer conversions to saturate, we must
@@ -706,7 +696,11 @@ GENX(pan_blend_create_shader)(const struct panfrost_device *dev,
   }

   /* Build a trivial blend shader */
-   nir_store_var(&b, c_out, s_src[0], 0xFF);
+   nir_store_output(
+      &b, s_src[0], zero, .write_mask = BITFIELD_MASK(4), .src_type = nir_type,
+      .io_semantics.location = FRAG_RESULT_DATA0, .io_semantics.num_slots = 1);
+
+   b.shader->info.io_lowered = true;

   options.src1 = s_src[1];