From 4043ea91b68d3e12d4aff89458b25902005c16ba Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Thu, 11 Sep 2025 12:08:16 -0400 Subject: [PATCH] tu: Support sparse binds on the gfx queue Although this isn't technically required by the Vulkan spec, a few native Vulkan games including DOOM Eternal expect it to be supported because Windows drivers all support it. Now that support has been plumbed through in the kernel backend, expose the ability to submit sparse binds on the graphics queue that are implicitly synchronized with graphics commands. Part-of: --- src/freedreno/vulkan/tu_device.cc | 13 ++- src/freedreno/vulkan/tu_queue.cc | 152 ++++++++++++++++-------------- 2 files changed, 91 insertions(+), 74 deletions(-) diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index db5be3f2658..90f6abd9bcf 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -1493,6 +1493,15 @@ static const VkQueueFamilyProperties tu_gfx_queue_family_properties = { .minImageTransferGranularity = { 1, 1, 1 }, }; +static const VkQueueFamilyProperties tu_gfx_sparse_queue_family_properties = { + .queueFlags = + VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | + VK_QUEUE_SPARSE_BINDING_BIT, + .queueCount = 1, + .timestampValidBits = 48, + .minImageTransferGranularity = { 1, 1, 1 }, +}; + static const VkQueueFamilyProperties tu_sparse_queue_family_properties = { .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT, .queueCount = 1, @@ -1671,7 +1680,9 @@ tu_physical_device_init(struct tu_physical_device *device, device->queue_families[device->num_queue_families++] = (struct tu_queue_family) { .type = TU_QUEUE_GFX, - .properties = &tu_gfx_queue_family_properties, + .properties = device->has_sparse ? + &tu_gfx_sparse_queue_family_properties : + &tu_gfx_queue_family_properties, }; if (device->has_sparse) { diff --git a/src/freedreno/vulkan/tu_queue.cc b/src/freedreno/vulkan/tu_queue.cc index f0acb688b47..65a510f61a8 100644 --- a/src/freedreno/vulkan/tu_queue.cc +++ b/src/freedreno/vulkan/tu_queue.cc @@ -85,6 +85,80 @@ submit_add_entries(struct tu_device *dev, void *submit, } } +static VkResult +queue_submit_sparse(struct vk_queue *_queue, struct vk_queue_submit *vk_submit) +{ + struct tu_queue *queue = list_entry(_queue, struct tu_queue, vk); + struct tu_device *device = queue->device; + + pthread_mutex_lock(&device->submit_mutex); + + void *submit = tu_submit_create(device); + if (!submit) + return VK_ERROR_OUT_OF_HOST_MEMORY; + + for (uint32_t i = 0; i < vk_submit->buffer_bind_count; i++) { + const VkSparseBufferMemoryBindInfo *bind = &vk_submit->buffer_binds[i]; + VK_FROM_HANDLE(tu_buffer, buffer, bind->buffer); + + for (uint32_t j = 0; j < bind->bindCount; j++) { + const VkSparseMemoryBind *range = &bind->pBinds[j]; + VK_FROM_HANDLE(tu_device_memory, mem, range->memory); + + tu_submit_add_bind(queue->device, submit, + &buffer->vma, range->resourceOffset, + mem ? mem->bo : NULL, + mem ? range->memoryOffset : 0, + range->size); + } + } + + for (uint32_t i = 0; i < vk_submit->image_bind_count; i++) { + const VkSparseImageMemoryBindInfo *bind = &vk_submit->image_binds[i]; + VK_FROM_HANDLE(tu_image, image, bind->image); + + for (uint32_t j = 0; j < bind->bindCount; j++) + tu_bind_sparse_image(device, submit, image, &bind->pBinds[j]); + } + + for (uint32_t i = 0; i < vk_submit->image_opaque_bind_count; i++) { + const VkSparseImageOpaqueMemoryBindInfo *bind = + &vk_submit->image_opaque_binds[i]; + VK_FROM_HANDLE(tu_image, image, bind->image); + + for (uint32_t j = 0; j < bind->bindCount; j++) { + const VkSparseMemoryBind *range = &bind->pBinds[j]; + VK_FROM_HANDLE(tu_device_memory, mem, range->memory); + + tu_submit_add_bind(queue->device, submit, + &image->vma, range->resourceOffset, + mem ? mem->bo : NULL, + mem ? range->memoryOffset : 0, + range->size); + } + } + + VkResult result = + tu_queue_submit(queue, submit, vk_submit->waits, vk_submit->wait_count, + vk_submit->signals, vk_submit->signal_count, + NULL); + + if (result != VK_SUCCESS) { + pthread_mutex_unlock(&device->submit_mutex); + goto out; + } + + device->submit_count++; + + pthread_mutex_unlock(&device->submit_mutex); + pthread_cond_broadcast(&queue->device->timeline_cond); + +out: + tu_submit_finish(device, submit); + + return result; +} + static VkResult queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit) { @@ -94,6 +168,11 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit) bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context); struct util_dynarray dump_cmds; + if (vk_submit->buffer_bind_count || + vk_submit->image_bind_count || + vk_submit->image_opaque_bind_count) + return queue_submit_sparse(_queue, vk_submit); + util_dynarray_init(&dump_cmds, NULL); uint32_t perf_pass_index = @@ -260,79 +339,6 @@ fail_create_submit: return result; } -static VkResult -queue_submit_sparse(struct vk_queue *_queue, struct vk_queue_submit *vk_submit) -{ - struct tu_queue *queue = list_entry(_queue, struct tu_queue, vk); - struct tu_device *device = queue->device; - - pthread_mutex_lock(&device->submit_mutex); - - void *submit = tu_submit_create(device); - if (!submit) - return VK_ERROR_OUT_OF_HOST_MEMORY; - - for (uint32_t i = 0; i < vk_submit->buffer_bind_count; i++) { - const VkSparseBufferMemoryBindInfo *bind = &vk_submit->buffer_binds[i]; - VK_FROM_HANDLE(tu_buffer, buffer, bind->buffer); - - for (uint32_t j = 0; j < bind->bindCount; j++) { - const VkSparseMemoryBind *range = &bind->pBinds[j]; - VK_FROM_HANDLE(tu_device_memory, mem, range->memory); - - tu_submit_add_bind(queue->device, submit, - &buffer->vma, range->resourceOffset, - mem ? mem->bo : NULL, - mem ? range->memoryOffset : 0, - range->size); - } - } - - for (uint32_t i = 0; i < vk_submit->image_bind_count; i++) { - const VkSparseImageMemoryBindInfo *bind = &vk_submit->image_binds[i]; - VK_FROM_HANDLE(tu_image, image, bind->image); - - for (uint32_t j = 0; j < bind->bindCount; j++) - tu_bind_sparse_image(device, submit, image, &bind->pBinds[j]); - } - - for (uint32_t i = 0; i < vk_submit->image_opaque_bind_count; i++) { - const VkSparseImageOpaqueMemoryBindInfo *bind = - &vk_submit->image_opaque_binds[i]; - VK_FROM_HANDLE(tu_image, image, bind->image); - - for (uint32_t j = 0; j < bind->bindCount; j++) { - const VkSparseMemoryBind *range = &bind->pBinds[j]; - VK_FROM_HANDLE(tu_device_memory, mem, range->memory); - - tu_submit_add_bind(queue->device, submit, - &image->vma, range->resourceOffset, - mem ? mem->bo : NULL, - mem ? range->memoryOffset : 0, - range->size); - } - } - - VkResult result = - tu_queue_submit(queue, submit, vk_submit->waits, vk_submit->wait_count, - vk_submit->signals, vk_submit->signal_count, - NULL); - - if (result != VK_SUCCESS) { - pthread_mutex_unlock(&device->submit_mutex); - goto out; - } - - device->submit_count++; - - pthread_mutex_unlock(&device->submit_mutex); - pthread_cond_broadcast(&queue->device->timeline_cond); - -out: - tu_submit_finish(device, submit); - - return result; -} VkResult tu_queue_init(struct tu_device *device, struct tu_queue *queue,