anti-lag: Only consider timestamps from queues which have presented.

Avoids stray submissions to compute queues to nullify the delay. Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34242>
2025-06-26 13:00:20 +02:00
parent 722ffe9a73
commit 81bb109a3b
3 changed files with 28 additions and 1 deletions
@@ -8,6 +8,7 @@
 #include <string.h>
 #include "util/os_time.h"
 #include "util/simple_mtx.h"
+#include "util/u_atomic.h"
 #include "vulkan/vulkan_core.h"
 #include "ringbuffer.h"
 #include "vk_alloc.h"
@@ -400,7 +401,11 @@ get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer
   /* Begin critical section. */
   ringbuffer_lock(ctx->frames);
   ringbuffer_lock(queue_ctx->queries);
-   struct query *query = allocate_query(ctx, queue_ctx);
+
+   /* Don't record timestamps for queues that are not deemed sensitive to latency. */
+   struct query *query =
+      p_atomic_read(&queue_ctx->latency_sensitive) ? allocate_query(ctx, queue_ctx) : NULL;
+
   if (query == NULL) {
      ringbuffer_unlock(queue_ctx->queries);
      ringbuffer_unlock(ctx->frames);
@@ -588,3 +593,20 @@ anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pS
   vk_free(&ctx->alloc, buf);
   return res;
 }
+
+VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo)
+{
+   /* When multiple queues are in flight, the min-delay approach
+    * has problems. An async compute queue could be submitted to
+    * with very low delay while the main graphics queue would be swamped with work.
+    * If we take a global min-delay over all queues, the algorithm would
+    * assume that there is very low delay and thus sleeps are disabled, but
+    * unless the graphics work depends directly on the async compute work,
+    * this is a false assumption. */
+   device_context *ctx = get_device_context(queue);
+   queue_context *queue_ctx = get_queue_context(ctx, queue);
+   p_atomic_set(&queue_ctx->latency_sensitive, true);
+
+   return ctx->vtable.QueuePresentKHR(queue, pPresentInfo);
+}
@@ -39,6 +39,7 @@ struct query {
 typedef struct queue_context {
   VkQueue queue;
   uint32_t queue_family_idx;
+   bool latency_sensitive;
   VkCommandPool cmdPool;
   VkQueryPool queryPool;
   VkSemaphore semaphore;
@@ -74,6 +75,7 @@ typedef struct device_context {
      DECLARE_HOOK(DestroySemaphore);
      DECLARE_HOOK(GetSemaphoreCounterValue);
      DECLARE_HOOK(WaitSemaphores);
+      DECLARE_HOOK(QueuePresentKHR);
 #undef DECLARE_HOOK
   } vtable;

@@ -105,6 +107,7 @@ VkResult anti_lag_QueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubm
                               VkFence fence);
 VkResult anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits,
                              VkFence fence);
+VkResult anti_lag_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo);

 VkResult anti_lag_NegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct);

@@ -194,6 +194,7 @@ init_device_vtable(device_context *ctx, PFN_vkGetDeviceProcAddr gpa, PFN_vkSetDe
   INIT_HOOK(CmdWriteTimestamp);
   INIT_HOOK(CreateSemaphore);
   INIT_HOOK(DestroySemaphore);
+   INIT_HOOK(QueuePresentKHR);
   INIT_HOOK_ALIAS(GetSemaphoreCounterValue, GetSemaphoreCounterValueKHR, timeline_semaphore_khr);
   INIT_HOOK_ALIAS(WaitSemaphores, WaitSemaphoresKHR, timeline_semaphore_khr);
 #undef INIT_HOOK
@@ -833,6 +834,7 @@ static const struct {
   ADD_HOOK(QueueSubmit),
   ADD_HOOK(QueueSubmit2),
   ADD_HOOK(QueueSubmit2KHR),
+   ADD_HOOK(QueuePresentKHR),
 };
 #undef ADD_HOOK