From 829bd406c04566962268138195ecb2c4d78da5cf Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Tue, 18 Nov 2025 19:20:42 -0800 Subject: [PATCH] venus: fix racy semaphore feedback counter update Previously, we update the sfb dst slot upon vn_SignalSemaphore so that vn_GetSemaphoreCounterValue can poll just the feedback slot itself. However, that can race with pending sfb cmds that are going to update the slot value, ending up with stuck sync progression. This change fixes it by disallowing vn_SignalSemaphore to touch the sfb dst slot. To ensure counter query being monotonic, vn_GetSemaphoreCounterValue now takes the greater of signaled counter and the sfb counter read. Test with dEQP-VK.synchronization* group: - w/o this: stuck shows up within 2 min with 8 parallel deqp runs - with this: no stuck for multiple full runs of the same Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/14304 Fixes: 5c7e60362cb ("venus: enable timeline semaphore feedback") Part-of: --- src/virtio/vulkan/vn_queue.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/virtio/vulkan/vn_queue.c b/src/virtio/vulkan/vn_queue.c index f80f06b0162..925c7857f26 100644 --- a/src/virtio/vulkan/vn_queue.c +++ b/src/virtio/vulkan/vn_queue.c @@ -2189,7 +2189,7 @@ vn_GetSemaphoreCounterValue(VkDevice device, * feedback counter read from the feedback slot. */ simple_mtx_lock(&sem->feedback.counter_mtx); - const uint64_t counter = vn_feedback_get_counter(sem->feedback.slot); + uint64_t counter = vn_feedback_get_counter(sem->feedback.slot); if (sem->feedback.signaled_counter < counter) { /* When the timeline semaphore feedback slot gets signaled, the real * semaphore signal operation follows after but the signaling isr can @@ -2237,6 +2237,11 @@ vn_GetSemaphoreCounterValue(VkDevice device, sem->feedback.signaled_counter = counter; } + + /* vn_SignalSemaphore writes the sfb signaled_counter without updating + * the slot. So the semaphore counter query here must consider both. + */ + counter = MAX2(counter, sem->feedback.signaled_counter); simple_mtx_unlock(&sem->feedback.counter_mtx); *pValue = counter; @@ -2277,9 +2282,13 @@ vn_SignalSemaphore(VkDevice device, const VkSemaphoreSignalInfo *pSignalInfo) vn_async_vkSignalSemaphore(dev->primary_ring, device, pSignalInfo); if (sem->feedback.slot) { + /* Must not update the sfb dst slot here because there's no followed + * submission to flush the cache (implicit sync guarantee) before the + * pending sfb cmd to update the slot. Otherwise, the slot update can be + * written by the racy update here. + */ simple_mtx_lock(&sem->feedback.counter_mtx); - vn_feedback_set_counter(sem->feedback.slot, pSignalInfo->value); /* Update async counters. Since we're signaling, we're aligned with * the renderer. */