anti-lag: Only consider timestamps from queues which have presented.

Avoids stray submissions to compute queues to nullify the delay.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34242>
This commit is contained in:
Hans-Kristian Arntzen
2025-06-26 13:00:20 +02:00
committed by Marge Bot
parent 722ffe9a73
commit 81bb109a3b
3 changed files with 28 additions and 1 deletions
+23 -1
View File
@@ -8,6 +8,7 @@
#include <string.h>
#include "util/os_time.h"
#include "util/simple_mtx.h"
#include "util/u_atomic.h"
#include "vulkan/vulkan_core.h"
#include "ringbuffer.h"
#include "vk_alloc.h"
@@ -400,7 +401,11 @@ get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer
/* Begin critical section. */
ringbuffer_lock(ctx->frames);
ringbuffer_lock(queue_ctx->queries);
struct query *query = allocate_query(ctx, queue_ctx);
/* Don't record timestamps for queues that are not deemed sensitive to latency. */
struct query *query =
p_atomic_read(&queue_ctx->latency_sensitive) ? allocate_query(ctx, queue_ctx) : NULL;
if (query == NULL) {
ringbuffer_unlock(queue_ctx->queries);
ringbuffer_unlock(ctx->frames);
@@ -588,3 +593,20 @@ anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pS
vk_free(&ctx->alloc, buf);
return res;
}
VKAPI_ATTR VkResult VKAPI_CALL
anti_lag_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo)
{
/* When multiple queues are in flight, the min-delay approach
* has problems. An async compute queue could be submitted to
* with very low delay while the main graphics queue would be swamped with work.
* If we take a global min-delay over all queues, the algorithm would
* assume that there is very low delay and thus sleeps are disabled, but
* unless the graphics work depends directly on the async compute work,
* this is a false assumption. */
device_context *ctx = get_device_context(queue);
queue_context *queue_ctx = get_queue_context(ctx, queue);
p_atomic_set(&queue_ctx->latency_sensitive, true);
return ctx->vtable.QueuePresentKHR(queue, pPresentInfo);
}
@@ -39,6 +39,7 @@ struct query {
typedef struct queue_context {
VkQueue queue;
uint32_t queue_family_idx;
bool latency_sensitive;
VkCommandPool cmdPool;
VkQueryPool queryPool;
VkSemaphore semaphore;
@@ -74,6 +75,7 @@ typedef struct device_context {
DECLARE_HOOK(DestroySemaphore);
DECLARE_HOOK(GetSemaphoreCounterValue);
DECLARE_HOOK(WaitSemaphores);
DECLARE_HOOK(QueuePresentKHR);
#undef DECLARE_HOOK
} vtable;
@@ -105,6 +107,7 @@ VkResult anti_lag_QueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubm
VkFence fence);
VkResult anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits,
VkFence fence);
VkResult anti_lag_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo);
VkResult anti_lag_NegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct);
@@ -194,6 +194,7 @@ init_device_vtable(device_context *ctx, PFN_vkGetDeviceProcAddr gpa, PFN_vkSetDe
INIT_HOOK(CmdWriteTimestamp);
INIT_HOOK(CreateSemaphore);
INIT_HOOK(DestroySemaphore);
INIT_HOOK(QueuePresentKHR);
INIT_HOOK_ALIAS(GetSemaphoreCounterValue, GetSemaphoreCounterValueKHR, timeline_semaphore_khr);
INIT_HOOK_ALIAS(WaitSemaphores, WaitSemaphoresKHR, timeline_semaphore_khr);
#undef INIT_HOOK
@@ -833,6 +834,7 @@ static const struct {
ADD_HOOK(QueueSubmit),
ADD_HOOK(QueueSubmit2),
ADD_HOOK(QueueSubmit2KHR),
ADD_HOOK(QueuePresentKHR),
};
#undef ADD_HOOK