From 81bb109a3be2faf6d12b647cfc12feff92ad3af8 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 26 Jun 2025 13:00:20 +0200 Subject: [PATCH] anti-lag: Only consider timestamps from queues which have presented. Avoids stray submissions to compute queues to nullify the delay. Signed-off-by: Hans-Kristian Arntzen Part-of: --- src/vulkan/anti-lag-layer/anti_lag_layer.c | 24 ++++++++++++++++++- src/vulkan/anti-lag-layer/anti_lag_layer.h | 3 +++ .../anti-lag-layer/anti_lag_layer_interface.c | 2 ++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.c b/src/vulkan/anti-lag-layer/anti_lag_layer.c index 6c21e074024..d7543a5dfd9 100644 --- a/src/vulkan/anti-lag-layer/anti_lag_layer.c +++ b/src/vulkan/anti-lag-layer/anti_lag_layer.c @@ -8,6 +8,7 @@ #include #include "util/os_time.h" #include "util/simple_mtx.h" +#include "util/u_atomic.h" #include "vulkan/vulkan_core.h" #include "ringbuffer.h" #include "vk_alloc.h" @@ -400,7 +401,11 @@ get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer /* Begin critical section. */ ringbuffer_lock(ctx->frames); ringbuffer_lock(queue_ctx->queries); - struct query *query = allocate_query(ctx, queue_ctx); + + /* Don't record timestamps for queues that are not deemed sensitive to latency. */ + struct query *query = + p_atomic_read(&queue_ctx->latency_sensitive) ? allocate_query(ctx, queue_ctx) : NULL; + if (query == NULL) { ringbuffer_unlock(queue_ctx->queries); ringbuffer_unlock(ctx->frames); @@ -588,3 +593,20 @@ anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pS vk_free(&ctx->alloc, buf); return res; } + +VKAPI_ATTR VkResult VKAPI_CALL +anti_lag_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo) +{ + /* When multiple queues are in flight, the min-delay approach + * has problems. An async compute queue could be submitted to + * with very low delay while the main graphics queue would be swamped with work. + * If we take a global min-delay over all queues, the algorithm would + * assume that there is very low delay and thus sleeps are disabled, but + * unless the graphics work depends directly on the async compute work, + * this is a false assumption. */ + device_context *ctx = get_device_context(queue); + queue_context *queue_ctx = get_queue_context(ctx, queue); + p_atomic_set(&queue_ctx->latency_sensitive, true); + + return ctx->vtable.QueuePresentKHR(queue, pPresentInfo); +} diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.h b/src/vulkan/anti-lag-layer/anti_lag_layer.h index 31abb0f9aee..d03d246d79c 100644 --- a/src/vulkan/anti-lag-layer/anti_lag_layer.h +++ b/src/vulkan/anti-lag-layer/anti_lag_layer.h @@ -39,6 +39,7 @@ struct query { typedef struct queue_context { VkQueue queue; uint32_t queue_family_idx; + bool latency_sensitive; VkCommandPool cmdPool; VkQueryPool queryPool; VkSemaphore semaphore; @@ -74,6 +75,7 @@ typedef struct device_context { DECLARE_HOOK(DestroySemaphore); DECLARE_HOOK(GetSemaphoreCounterValue); DECLARE_HOOK(WaitSemaphores); + DECLARE_HOOK(QueuePresentKHR); #undef DECLARE_HOOK } vtable; @@ -105,6 +107,7 @@ VkResult anti_lag_QueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubm VkFence fence); VkResult anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence); +VkResult anti_lag_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo); VkResult anti_lag_NegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct); diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c b/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c index d2ca4a7dd44..6a803e24fe6 100644 --- a/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c +++ b/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c @@ -194,6 +194,7 @@ init_device_vtable(device_context *ctx, PFN_vkGetDeviceProcAddr gpa, PFN_vkSetDe INIT_HOOK(CmdWriteTimestamp); INIT_HOOK(CreateSemaphore); INIT_HOOK(DestroySemaphore); + INIT_HOOK(QueuePresentKHR); INIT_HOOK_ALIAS(GetSemaphoreCounterValue, GetSemaphoreCounterValueKHR, timeline_semaphore_khr); INIT_HOOK_ALIAS(WaitSemaphores, WaitSemaphoresKHR, timeline_semaphore_khr); #undef INIT_HOOK @@ -833,6 +834,7 @@ static const struct { ADD_HOOK(QueueSubmit), ADD_HOOK(QueueSubmit2), ADD_HOOK(QueueSubmit2KHR), + ADD_HOOK(QueuePresentKHR), }; #undef ADD_HOOK