From 7c01cbda6fe6737c874a67aa194805596a148144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Wed, 28 Aug 2024 12:09:18 -0700 Subject: [PATCH] anv: Optimize vkQueueWaitIdle() on Xe KMD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vk_common_QueueWaitIdle() creates a syncobj, does a submit with no batch buffers what translates to execute trivial_batch_bo and then waits for syncobj to be signaled when trivial_batch_bo finishes. On Xe KMD on other hand we can avoid the trivial_batch_bo submission and instead use the special DRM_IOCTL_XE_EXEC with num_batch_buffer == 0 to get a syncobj to be signaled when the last exec finish execution. This should free a bit GPU to execute more important workloads. This will also optimize vkDeviceWaitIdle() that calls QueueWaitIdle(). It have to fallback to vk_common_QueueWaitIdle() when queue is in VK_QUEUE_SUBMIT_MODE_THREADED mode because vkQueueWaitIdle() could return but there still stuff in VK/CPU submission queue. Also it could cause use after free when resources attached to submission are freed before it is processed, example: vkCreateFence() or vkCreateSemaphore() vkQueueSubmit() // with Fence or Semaphore created above vkQueueWaitIdle() // with the race it returns vkDestroyFence() or vkDestroySemaphore() // vk_queue_submit_thread_func() start to process submission above... Reviewed-by: Paulo Zanoni Signed-off-by: José Roberto de Souza Part-of: --- src/intel/vulkan/anv_queue.c | 29 +++++++++++++++++++++++++++++ src/intel/vulkan/xe/anv_queue.c | 6 ++++-- src/intel/vulkan/xe/anv_queue.h | 3 +++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c index 7e911cf812e..95be16541cd 100644 --- a/src/intel/vulkan/anv_queue.c +++ b/src/intel/vulkan/anv_queue.c @@ -30,6 +30,8 @@ #include "i915/anv_queue.h" #include "xe/anv_queue.h" +#include "vk_common_entrypoints.h" + static VkResult anv_create_engine(struct anv_device *device, struct anv_queue *queue, @@ -137,3 +139,30 @@ anv_queue_finish(struct anv_queue *queue) anv_destroy_engine(queue); vk_queue_finish(&queue->vk); } + +VkResult +anv_QueueWaitIdle(VkQueue _queue) +{ + VK_FROM_HANDLE(anv_queue, queue, _queue); + struct anv_device *device = queue->device; + + switch (device->info->kmd_type) { + case INTEL_KMD_TYPE_XE: + if (queue->vk.submit.mode != VK_QUEUE_SUBMIT_MODE_THREADED) { + int ret = anv_xe_wait_exec_queue_idle(device, queue->exec_queue_id); + + if (ret == 0) + return VK_SUCCESS; + if (ret == -ECANCELED) + return VK_ERROR_DEVICE_LOST; + return vk_errorf(device, VK_ERROR_UNKNOWN, "anv_xe_wait_exec_queue_idle failed: %m"); + } + FALLTHROUGH; + case INTEL_KMD_TYPE_I915: + return vk_common_QueueWaitIdle(_queue); + default: + unreachable("Missing"); + } + + return VK_SUCCESS; +} diff --git a/src/intel/vulkan/xe/anv_queue.c b/src/intel/vulkan/xe/anv_queue.c index 63e4a4d7f3f..dcda9b8e939 100644 --- a/src/intel/vulkan/xe/anv_queue.c +++ b/src/intel/vulkan/xe/anv_queue.c @@ -150,7 +150,7 @@ anv_xe_create_engine(struct anv_device *device, * Wait for all previous DRM_IOCTL_XE_EXEC calls over the * drm_xe_exec_queue to complete. **/ -static void +int anv_xe_wait_exec_queue_idle(struct anv_device *device, uint32_t exec_queue_id) { struct drm_syncobj_wait syncobj_wait = { @@ -162,7 +162,7 @@ anv_xe_wait_exec_queue_idle(struct anv_device *device, uint32_t exec_queue_id) if (ret) { assert(ret == -ECANCELED); - return; + return ret; } syncobj_wait.handles = (uintptr_t)&syncobj; @@ -174,6 +174,8 @@ anv_xe_wait_exec_queue_idle(struct anv_device *device, uint32_t exec_queue_id) }; ret = intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &syncobj_destroy); assert(ret == 0); + + return ret; } static void diff --git a/src/intel/vulkan/xe/anv_queue.h b/src/intel/vulkan/xe/anv_queue.h index 646f0ef2f16..7af8cb4b0bf 100644 --- a/src/intel/vulkan/xe/anv_queue.h +++ b/src/intel/vulkan/xe/anv_queue.h @@ -33,3 +33,6 @@ anv_xe_create_engine(struct anv_device *device, const VkDeviceQueueCreateInfo *pCreateInfo); void anv_xe_destroy_engine(struct anv_device *device, struct anv_queue *queue); + +int +anv_xe_wait_exec_queue_idle(struct anv_device *device, uint32_t exec_queue_id);