From 1e47ec2321664a513f64e57772c77d66ffd5fb35 Mon Sep 17 00:00:00 2001 From: Yiwei Zhang Date: Mon, 18 Mar 2024 20:48:02 -0700 Subject: [PATCH] venus: avoid constant busy wait for query result waiting Up to this commit in this MR, the gfxbench manhattan scores have been improved by 10~15% with ANGLE-on-Venus on some AMD platforms. Signed-off-by: Yiwei Zhang Part-of: --- src/virtio/vulkan/vn_common.c | 2 ++ src/virtio/vulkan/vn_common.h | 1 + src/virtio/vulkan/vn_query_pool.c | 42 +++++++++++++------------------ 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/src/virtio/vulkan/vn_common.c b/src/virtio/vulkan/vn_common.c index 33da57e0641..ade3ae5c429 100644 --- a/src/virtio/vulkan/vn_common.c +++ b/src/virtio/vulkan/vn_common.c @@ -184,6 +184,8 @@ vn_relax_reason_string(enum vn_relax_reason reason) return "fence"; case VN_RELAX_REASON_SEMAPHORE: return "semaphore"; + case VN_RELAX_REASON_QUERY: + return "query"; } return ""; } diff --git a/src/virtio/vulkan/vn_common.h b/src/virtio/vulkan/vn_common.h index 5d5e80b04d9..e6cf6781448 100644 --- a/src/virtio/vulkan/vn_common.h +++ b/src/virtio/vulkan/vn_common.h @@ -212,6 +212,7 @@ enum vn_relax_reason { VN_RELAX_REASON_RING_SPACE, VN_RELAX_REASON_FENCE, VN_RELAX_REASON_SEMAPHORE, + VN_RELAX_REASON_QUERY, }; struct vn_relax_state { diff --git a/src/virtio/vulkan/vn_query_pool.c b/src/virtio/vulkan/vn_query_pool.c index aefb5c02cdb..7d1ebfec194 100644 --- a/src/virtio/vulkan/vn_query_pool.c +++ b/src/virtio/vulkan/vn_query_pool.c @@ -250,33 +250,29 @@ vn_get_query_pool_feedback(struct vn_query_pool *pool, return result; } -static VkResult -vn_query_feedback_wait_ready(struct vn_query_pool *pool, - uint32_t firstQuery, - uint32_t queryCount) +static void +vn_query_feedback_wait_ready(struct vn_device *dev, + struct vn_query_pool *pool, + uint32_t first_query, + uint32_t query_count) { - /* Timeout after 5 seconds */ - uint64_t timeout = 5000ull * 1000 * 1000; - uint64_t abs_timeout_ns = os_time_get_absolute_timeout(timeout); + VN_TRACE_FUNC(); /* Feedback results are always 64 bit and include availability bit * (also 64 bit) */ - const uint32_t slot_array_size = pool->result_array_size + 1; - volatile uint64_t *src = pool->fb_buf->data; - src += (slot_array_size * firstQuery) + pool->result_array_size; + const uint32_t step = pool->result_array_size + 1; + const uint64_t *avail = (uint64_t *)pool->fb_buf->data + + first_query * step + pool->result_array_size; - uint32_t src_index = 0; - for (uint32_t i = 0; i < queryCount; i++) { - while (!src[src_index]) { - if (os_time_get_nano() > abs_timeout_ns) - return VK_ERROR_DEVICE_LOST; - - thrd_yield(); + struct vn_relax_state relax_state = + vn_relax_init(dev->instance, VN_RELAX_REASON_QUERY); + for (uint32_t i = 0, j = 0; i < query_count; i++, j += step) { + while (!avail[j]) { + vn_relax(&relax_state); } - src_index += slot_array_size; } - return VK_SUCCESS; + vn_relax_fini(&relax_state); } VkResult @@ -304,11 +300,9 @@ vn_GetQueryPoolResults(VkDevice device, */ if (pool->fb_buf) { /* If wait bit is set, wait poll until query is ready */ - if (flags & VK_QUERY_RESULT_WAIT_BIT) { - result = vn_query_feedback_wait_ready(pool, firstQuery, queryCount); - if (result != VK_SUCCESS) - return vn_result(dev->instance, result); - } + if (flags & VK_QUERY_RESULT_WAIT_BIT) + vn_query_feedback_wait_ready(dev, pool, firstQuery, queryCount); + result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData, stride, flags); return vn_result(dev->instance, result);