panfrost: Add a submit_batch() hook to panfrost_vtable

This per-arch method will take care of preparing the batch for submission and submitting it. This allows us to get rid of a bunch of methods in panfrost_vtable, and turn some indirect function calls into direct calls. Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26249>
2023-11-14 18:19:04 +01:00
parent 46fc7d2dc9
commit d618a46590
3 changed files with 186 additions and 197 deletions
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -24,6 +24,8 @@
 * SOFTWARE.
 */

+#include "drm-uapi/panfrost_drm.h"
+
 #include "gallium/auxiliary/util/u_blend.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
@@ -36,6 +38,8 @@
 #include "util/u_vbuf.h"
 #include "util/u_viewport.h"

+#include "decode.h"
+
 #include "genxml/gen_macros.h"

 #include "pan_afbc_cso.h"
@@ -4626,21 +4630,194 @@ init_polygon_list(struct panfrost_batch *batch)
 #endif
 }

+static int
+panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
+                            mali_ptr first_job_desc, uint32_t reqs,
+                            uint32_t out_sync)
+{
+   struct panfrost_context *ctx = batch->ctx;
+   struct pipe_context *gallium = (struct pipe_context *)ctx;
+   struct panfrost_device *dev = pan_device(gallium->screen);
+   struct drm_panfrost_submit submit = {
+      0,
+   };
+   uint32_t in_syncs[1];
+   uint32_t *bo_handles;
+   int ret;
+
+   /* If we trace, we always need a syncobj, so make one of our own if we
+    * weren't given one to use. Remember that we did so, so we can free it
+    * after we're done but preventing double-frees if we were given a
+    * syncobj */
+
+   if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
+      out_sync = ctx->syncobj;
+
+   submit.out_sync = out_sync;
+   submit.jc = first_job_desc;
+   submit.requirements = reqs;
+
+   if (ctx->in_sync_fd >= 0) {
+      ret =
+         drmSyncobjImportSyncFile(dev->fd, ctx->in_sync_obj, ctx->in_sync_fd);
+      assert(!ret);
+
+      in_syncs[submit.in_sync_count++] = ctx->in_sync_obj;
+      close(ctx->in_sync_fd);
+      ctx->in_sync_fd = -1;
+   }
+
+   if (submit.in_sync_count)
+      submit.in_syncs = (uintptr_t)in_syncs;
+
+   bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
+                          panfrost_pool_num_bos(&batch->invisible_pool) +
+                          batch->num_bos + 2,
+                       sizeof(*bo_handles));
+   assert(bo_handles);
+
+   pan_bo_access *flags = util_dynarray_begin(&batch->bos);
+   unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
+
+   for (int i = 0; i < end_bo; ++i) {
+      if (!flags[i])
+         continue;
+
+      assert(submit.bo_handle_count < batch->num_bos);
+      bo_handles[submit.bo_handle_count++] = i;
+
+      /* Update the BO access flags so that panfrost_bo_wait() knows
+       * about all pending accesses.
+       * We only keep the READ/WRITE info since this is all the BO
+       * wait logic cares about.
+       * We also preserve existing flags as this batch might not
+       * be the first one to access the BO.
+       */
+      struct panfrost_bo *bo = pan_lookup_bo(dev, i);
+
+      bo->gpu_access |= flags[i] & (PAN_BO_ACCESS_RW);
+   }
+
+   panfrost_pool_get_bo_handles(&batch->pool,
+                                bo_handles + submit.bo_handle_count);
+   submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
+   panfrost_pool_get_bo_handles(&batch->invisible_pool,
+                                bo_handles + submit.bo_handle_count);
+   submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
+
+   /* Add the tiler heap to the list of accessed BOs if the batch has at
+    * least one tiler job. Tiler heap is written by tiler jobs and read
+    * by fragment jobs (the polygon list is coming from this heap).
+    */
+   if (batch->scoreboard.first_tiler)
+      bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
+
+   /* Always used on Bifrost, occassionally used on Midgard */
+   bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;
+
+   submit.bo_handles = (u64)(uintptr_t)bo_handles;
+   if (ctx->is_noop)
+      ret = 0;
+   else
+      ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
+   free(bo_handles);
+
+   if (ret)
+      return errno;
+
+   /* Trace the job if we're doing that */
+   if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
+      /* Wait so we can get errors reported back */
+      drmSyncobjWait(dev->fd, &out_sync, 1, INT64_MAX, 0, NULL);
+
+      if (dev->debug & PAN_DBG_TRACE)
+         pandecode_jc(dev->decode_ctx, submit.jc, dev->gpu_id);
+
+      if (dev->debug & PAN_DBG_DUMP)
+         pandecode_dump_mappings(dev->decode_ctx);
+
+      /* Jobs won't be complete if blackhole rendering, that's ok */
+      if (!ctx->is_noop && dev->debug & PAN_DBG_SYNC)
+         pandecode_abort_on_fault(dev->decode_ctx, submit.jc, dev->gpu_id);
+   }
+
+   return 0;
+}
+
+/* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
+ * outsync corresponding to the later of the two (since there will be an
+ * implicit dep between them) */
+
+static int
+panfrost_batch_submit_jobs(struct panfrost_batch *batch)
+{
+   struct pipe_screen *pscreen = batch->ctx->base.screen;
+   struct panfrost_device *dev = pan_device(pscreen);
+   bool has_draws = batch->scoreboard.first_job;
+   bool has_tiler = batch->scoreboard.first_tiler;
+   bool has_frag = panfrost_has_fragment_job(batch);
+   uint32_t out_sync = batch->ctx->syncobj;
+   int ret = 0;
+
+   /* Take the submit lock to make sure no tiler jobs from other context
+    * are inserted between our tiler and fragment jobs, failing to do that
+    * might result in tiler heap corruption.
+    */
+   if (has_tiler)
+      pthread_mutex_lock(&dev->submit_lock);
+
+   if (has_draws) {
+      ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job, 0,
+                                        has_frag ? 0 : out_sync);
+
+      if (ret)
+         goto done;
+   }
+
+   if (has_frag) {
+      ret = panfrost_batch_submit_ioctl(batch, batch->frag_job,
+                                        PANFROST_JD_REQ_FS, out_sync);
+      if (ret)
+         goto done;
+   }
+
+done:
+   if (has_tiler)
+      pthread_mutex_unlock(&dev->submit_lock);
+
+   return ret;
+}
+
+static int
+submit_batch(struct panfrost_batch *batch, struct pan_fb_info *fb)
+{
+   preload(batch, fb);
+   init_polygon_list(batch);
+
+   /* Now that all draws are in, we can finally prepare the
+    * FBD for the batch (if there is one). */
+
+   emit_tls(batch);
+
+   if (panfrost_has_fragment_job(batch)) {
+      emit_fbd(batch, fb);
+      emit_fragment_job(batch, fb);
+   }
+
+   return panfrost_batch_submit_jobs(batch);
+}
+
 void
 GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
 {
   struct panfrost_device *dev = &screen->dev;

   screen->vtbl.prepare_shader = prepare_shader;
-   screen->vtbl.emit_tls = emit_tls;
-   screen->vtbl.emit_fbd = emit_fbd;
-   screen->vtbl.emit_fragment_job = emit_fragment_job;
   screen->vtbl.screen_destroy = screen_destroy;
-   screen->vtbl.preload = preload;
   screen->vtbl.context_populate_vtbl = context_populate_vtbl;
   screen->vtbl.init_batch = init_batch;
+   screen->vtbl.submit_batch = submit_batch;
   screen->vtbl.get_blend_shader = GENX(pan_blend_get_shader_locked);
-   screen->vtbl.init_polygon_list = init_polygon_list;
   screen->vtbl.get_compiler_options = GENX(pan_shader_get_compiler_options);
   screen->vtbl.compile_shader = GENX(pan_shader_compile);
   screen->vtbl.afbc_size = panfrost_afbc_size;
--- a/src/gallium/drivers/panfrost/pan_job.c
+++ b/src/gallium/drivers/panfrost/pan_job.c
@@ -26,15 +26,12 @@

 #include <assert.h>

-#include "drm-uapi/panfrost_drm.h"
-
 #include "util/format/u_format.h"
 #include "util/hash_table.h"
 #include "util/ralloc.h"
 #include "util/rounding.h"
 #include "util/u_framebuffer.h"
 #include "util/u_pack_color.h"
-#include "decode.h"
 #include "pan_bo.h"
 #include "pan_context.h"
 #include "pan_util.h"
@@ -590,164 +587,6 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
   }
 }

-static int
-panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
-                            mali_ptr first_job_desc, uint32_t reqs,
-                            uint32_t out_sync)
-{
-   struct panfrost_context *ctx = batch->ctx;
-   struct pipe_context *gallium = (struct pipe_context *)ctx;
-   struct panfrost_device *dev = pan_device(gallium->screen);
-   struct drm_panfrost_submit submit = {
-      0,
-   };
-   uint32_t in_syncs[1];
-   uint32_t *bo_handles;
-   int ret;
-
-   /* If we trace, we always need a syncobj, so make one of our own if we
-    * weren't given one to use. Remember that we did so, so we can free it
-    * after we're done but preventing double-frees if we were given a
-    * syncobj */
-
-   if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
-      out_sync = ctx->syncobj;
-
-   submit.out_sync = out_sync;
-   submit.jc = first_job_desc;
-   submit.requirements = reqs;
-
-   if (ctx->in_sync_fd >= 0) {
-      ret =
-         drmSyncobjImportSyncFile(dev->fd, ctx->in_sync_obj, ctx->in_sync_fd);
-      assert(!ret);
-
-      in_syncs[submit.in_sync_count++] = ctx->in_sync_obj;
-      close(ctx->in_sync_fd);
-      ctx->in_sync_fd = -1;
-   }
-
-   if (submit.in_sync_count)
-      submit.in_syncs = (uintptr_t)in_syncs;
-
-   bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
-                          panfrost_pool_num_bos(&batch->invisible_pool) +
-                          batch->num_bos + 2,
-                       sizeof(*bo_handles));
-   assert(bo_handles);
-
-   pan_bo_access *flags = util_dynarray_begin(&batch->bos);
-   unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
-
-   for (int i = 0; i < end_bo; ++i) {
-      if (!flags[i])
-         continue;
-
-      assert(submit.bo_handle_count < batch->num_bos);
-      bo_handles[submit.bo_handle_count++] = i;
-
-      /* Update the BO access flags so that panfrost_bo_wait() knows
-       * about all pending accesses.
-       * We only keep the READ/WRITE info since this is all the BO
-       * wait logic cares about.
-       * We also preserve existing flags as this batch might not
-       * be the first one to access the BO.
-       */
-      struct panfrost_bo *bo = pan_lookup_bo(dev, i);
-
-      bo->gpu_access |= flags[i] & (PAN_BO_ACCESS_RW);
-   }
-
-   panfrost_pool_get_bo_handles(&batch->pool,
-                                bo_handles + submit.bo_handle_count);
-   submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
-   panfrost_pool_get_bo_handles(&batch->invisible_pool,
-                                bo_handles + submit.bo_handle_count);
-   submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
-
-   /* Add the tiler heap to the list of accessed BOs if the batch has at
-    * least one tiler job. Tiler heap is written by tiler jobs and read
-    * by fragment jobs (the polygon list is coming from this heap).
-    */
-   if (batch->scoreboard.first_tiler)
-      bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
-
-   /* Always used on Bifrost, occassionally used on Midgard */
-   bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;
-
-   submit.bo_handles = (u64)(uintptr_t)bo_handles;
-   if (ctx->is_noop)
-      ret = 0;
-   else
-      ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
-   free(bo_handles);
-
-   if (ret)
-      return errno;
-
-   /* Trace the job if we're doing that */
-   if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
-      /* Wait so we can get errors reported back */
-      drmSyncobjWait(dev->fd, &out_sync, 1, INT64_MAX, 0, NULL);
-
-      if (dev->debug & PAN_DBG_TRACE)
-         pandecode_jc(dev->decode_ctx, submit.jc, dev->gpu_id);
-
-      if (dev->debug & PAN_DBG_DUMP)
-         pandecode_dump_mappings(dev->decode_ctx);
-
-      /* Jobs won't be complete if blackhole rendering, that's ok */
-      if (!ctx->is_noop && dev->debug & PAN_DBG_SYNC)
-         pandecode_abort_on_fault(dev->decode_ctx, submit.jc, dev->gpu_id);
-   }
-
-   return 0;
-}
-
-/* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
- * outsync corresponding to the later of the two (since there will be an
- * implicit dep between them) */
-
-static int
-panfrost_batch_submit_jobs(struct panfrost_batch *batch)
-{
-   struct pipe_screen *pscreen = batch->ctx->base.screen;
-   struct panfrost_device *dev = pan_device(pscreen);
-   bool has_draws = batch->scoreboard.first_job;
-   bool has_tiler = batch->scoreboard.first_tiler;
-   bool has_frag = panfrost_has_fragment_job(batch);
-   uint32_t out_sync = batch->ctx->syncobj;
-   int ret = 0;
-
-   /* Take the submit lock to make sure no tiler jobs from other context
-    * are inserted between our tiler and fragment jobs, failing to do that
-    * might result in tiler heap corruption.
-    */
-   if (has_tiler)
-      pthread_mutex_lock(&dev->submit_lock);
-
-   if (has_draws) {
-      ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job, 0,
-                                        has_frag ? 0 : out_sync);
-
-      if (ret)
-         goto done;
-   }
-
-   if (has_frag) {
-      ret = panfrost_batch_submit_ioctl(batch, batch->frag_job,
-                                        PANFROST_JD_REQ_FS, out_sync);
-      if (ret)
-         goto done;
-   }
-
-done:
-   if (has_tiler)
-      pthread_mutex_unlock(&dev->submit_lock);
-
-   return ret;
-}
-
 static void
 panfrost_emit_tile_map(struct panfrost_batch *batch, struct pan_fb_info *fb)
 {
@@ -804,21 +643,7 @@ panfrost_batch_submit(struct panfrost_context *ctx,
   panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);
   panfrost_emit_tile_map(batch, &fb);

-   screen->vtbl.preload(batch, &fb);
-   screen->vtbl.init_polygon_list(batch);
-
-   /* Now that all draws are in, we can finally prepare the
-    * FBD for the batch (if there is one). */
-
-   screen->vtbl.emit_tls(batch);
-
-   if (has_frag) {
-      screen->vtbl.emit_fbd(batch, &fb);
-      screen->vtbl.emit_fragment_job(batch, &fb);
-   }
-
-   ret = panfrost_batch_submit_jobs(batch);
-
+   ret = screen->vtbl.submit_batch(batch, &fb);
   if (ret)
      fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);

--- a/src/gallium/drivers/panfrost/pan_screen.h
+++ b/src/gallium/drivers/panfrost/pan_screen.h
@@ -64,36 +64,23 @@ struct panfrost_vtable {
   void (*prepare_shader)(struct panfrost_compiled_shader *,
                          struct panfrost_pool *, bool);

-   /* Emits a thread local storage descriptor */
-   void (*emit_tls)(struct panfrost_batch *);
-
-   /* Emits a framebuffer descriptor */
-   void (*emit_fbd)(struct panfrost_batch *, const struct pan_fb_info *);
-
-   /* Emits a fragment job */
-   void (*emit_fragment_job)(struct panfrost_batch *,
-                             const struct pan_fb_info *);
-
   /* General destructor */
   void (*screen_destroy)(struct pipe_screen *);

-   /* Preload framebuffer */
-   void (*preload)(struct panfrost_batch *, struct pan_fb_info *);
-
   /* Populate context vtable */
   void (*context_populate_vtbl)(struct pipe_context *pipe);

   /* Device-dependent initialization of a panfrost_batch */
   void (*init_batch)(struct panfrost_batch *batch);

+   /* Device-dependent submission of a panfrost_batch */
+   int (*submit_batch)(struct panfrost_batch *batch, struct pan_fb_info *fb);
+
   /* Get blend shader */
   struct pan_blend_shader_variant *(*get_blend_shader)(
      const struct panfrost_device *, const struct pan_blend_state *,
      nir_alu_type, nir_alu_type, unsigned rt);

-   /* Initialize the polygon list */
-   void (*init_polygon_list)(struct panfrost_batch *);
-
   /* Shader compilation methods */
   const nir_shader_compiler_options *(*get_compiler_options)(void);
   void (*compile_shader)(nir_shader *s, struct panfrost_compile_inputs *inputs,