anv: Optimize vkQueueWaitIdle() on Xe KMD

vk_common_QueueWaitIdle() creates a syncobj, does a submit with no
batch buffers what translates to execute trivial_batch_bo and then
waits for syncobj to be signaled when trivial_batch_bo finishes.

On Xe KMD on other hand we can avoid the trivial_batch_bo submission
and instead use the special DRM_IOCTL_XE_EXEC with num_batch_buffer == 0
to get a syncobj to be signaled when the last exec finish execution.
This should free a bit GPU to execute more important workloads.

This will also optimize vkDeviceWaitIdle() that calls QueueWaitIdle().

It have to fallback to vk_common_QueueWaitIdle() when queue is in
VK_QUEUE_SUBMIT_MODE_THREADED mode because vkQueueWaitIdle()
could return but there still stuff in VK/CPU submission queue.
Also it could cause use after free when resources attached to
submission are freed before it is processed, example:

   vkCreateFence() or vkCreateSemaphore()
   vkQueueSubmit() // with Fence or Semaphore created above
   vkQueueWaitIdle() // with the race it returns
   vkDestroyFence() or vkDestroySemaphore()
   // vk_queue_submit_thread_func() start to process submission above...

Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30958>
This commit is contained in:
José Roberto de Souza
2024-08-28 12:09:18 -07:00
committed by Marge Bot
parent 2ccc9a5c40
commit 7c01cbda6f
3 changed files with 36 additions and 2 deletions
+29
View File
@@ -30,6 +30,8 @@
#include "i915/anv_queue.h"
#include "xe/anv_queue.h"
#include "vk_common_entrypoints.h"
static VkResult
anv_create_engine(struct anv_device *device,
struct anv_queue *queue,
@@ -137,3 +139,30 @@ anv_queue_finish(struct anv_queue *queue)
anv_destroy_engine(queue);
vk_queue_finish(&queue->vk);
}
VkResult
anv_QueueWaitIdle(VkQueue _queue)
{
VK_FROM_HANDLE(anv_queue, queue, _queue);
struct anv_device *device = queue->device;
switch (device->info->kmd_type) {
case INTEL_KMD_TYPE_XE:
if (queue->vk.submit.mode != VK_QUEUE_SUBMIT_MODE_THREADED) {
int ret = anv_xe_wait_exec_queue_idle(device, queue->exec_queue_id);
if (ret == 0)
return VK_SUCCESS;
if (ret == -ECANCELED)
return VK_ERROR_DEVICE_LOST;
return vk_errorf(device, VK_ERROR_UNKNOWN, "anv_xe_wait_exec_queue_idle failed: %m");
}
FALLTHROUGH;
case INTEL_KMD_TYPE_I915:
return vk_common_QueueWaitIdle(_queue);
default:
unreachable("Missing");
}
return VK_SUCCESS;
}
+4 -2
View File
@@ -150,7 +150,7 @@ anv_xe_create_engine(struct anv_device *device,
* Wait for all previous DRM_IOCTL_XE_EXEC calls over the
* drm_xe_exec_queue to complete.
**/
static void
int
anv_xe_wait_exec_queue_idle(struct anv_device *device, uint32_t exec_queue_id)
{
struct drm_syncobj_wait syncobj_wait = {
@@ -162,7 +162,7 @@ anv_xe_wait_exec_queue_idle(struct anv_device *device, uint32_t exec_queue_id)
if (ret) {
assert(ret == -ECANCELED);
return;
return ret;
}
syncobj_wait.handles = (uintptr_t)&syncobj;
@@ -174,6 +174,8 @@ anv_xe_wait_exec_queue_idle(struct anv_device *device, uint32_t exec_queue_id)
};
ret = intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &syncobj_destroy);
assert(ret == 0);
return ret;
}
static void
+3
View File
@@ -33,3 +33,6 @@ anv_xe_create_engine(struct anv_device *device,
const VkDeviceQueueCreateInfo *pCreateInfo);
void
anv_xe_destroy_engine(struct anv_device *device, struct anv_queue *queue);
int
anv_xe_wait_exec_queue_idle(struct anv_device *device, uint32_t exec_queue_id);