tu/kgsl: Spin unti KGSL reports queue timestamp during profiling

KGSL writes the profiling values asynchronously while we read them immediately after the IOCTL returns which can result in the struct not being filled in by the time we read it, this results in AGI not correctly processing any timestamps from larger submits which take longer to queue. To fix this, we now busy-wait on until the value has been written out by KGSL. Signed-off-by: Mark Collins <mark@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30147>
2024-07-12 11:19:25 +00:00
parent 2d54a605fe
commit 0d6faa21f8
1 changed files with 8 additions and 1 deletions
@@ -1220,6 +1220,7 @@ kgsl_queue_submit(struct tu_queue *queue, struct vk_queue_submit *vk_submit)
      };
      profiling_buffer =
         (struct kgsl_cmdbatch_profiling_buffer *) tu_suballoc_bo_map(bo);
+      memset(profiling_buffer, 0, sizeof(*profiling_buffer));
   }

   if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count)) {
@@ -1300,7 +1301,13 @@ kgsl_queue_submit(struct tu_queue *queue, struct vk_queue_submit *vk_submit)

   uint64_t gpu_offset = 0;
 #if HAVE_PERFETTO
-   if (profiling_buffer && profiling_buffer->gpu_ticks_queued) {
+   if (profiling_buffer) {
+      /* We need to wait for KGSL to queue the GPU command before we can read
+       * the timestamp. Since this is just for profiling and doesn't take too
+       * long, we can just busy-wait for it.
+       */
+      while (p_atomic_read(&profiling_buffer->gpu_ticks_queued) == 0);
+
      struct kgsl_perfcounter_read_group perf = {
         .groupid = KGSL_PERFCOUNTER_GROUP_ALWAYSON,
         .countable = 0,