tu: Support sparse binds on the gfx queue

Although this isn't technically required by the Vulkan spec, a few native Vulkan games including DOOM Eternal expect it to be supported because Windows drivers all support it. Now that support has been plumbed through in the kernel backend, expose the ability to submit sparse binds on the graphics queue that are implicitly synchronized with graphics commands. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37304>
2025-09-11 12:08:16 -04:00
parent 0cc0e786e0
commit 4043ea91b6
2 changed files with 91 additions and 74 deletions
@@ -1493,6 +1493,15 @@ static const VkQueueFamilyProperties tu_gfx_queue_family_properties = {
   .minImageTransferGranularity = { 1, 1, 1 },
 };

+static const VkQueueFamilyProperties tu_gfx_sparse_queue_family_properties = {
+   .queueFlags =
+      VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |
+      VK_QUEUE_SPARSE_BINDING_BIT,
+   .queueCount = 1,
+   .timestampValidBits = 48,
+   .minImageTransferGranularity = { 1, 1, 1 },
+};
+
 static const VkQueueFamilyProperties tu_sparse_queue_family_properties = {
   .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
   .queueCount = 1,
@@ -1671,7 +1680,9 @@ tu_physical_device_init(struct tu_physical_device *device,
   device->queue_families[device->num_queue_families++] =
      (struct tu_queue_family) {
         .type = TU_QUEUE_GFX,
-         .properties = &tu_gfx_queue_family_properties,
+         .properties = device->has_sparse ?
+            &tu_gfx_sparse_queue_family_properties :
+            &tu_gfx_queue_family_properties,
      };

   if (device->has_sparse) {
@@ -85,6 +85,80 @@ submit_add_entries(struct tu_device *dev, void *submit,
   }
 }

+static VkResult
+queue_submit_sparse(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
+{
+   struct tu_queue *queue = list_entry(_queue, struct tu_queue, vk);
+   struct tu_device *device = queue->device;
+
+   pthread_mutex_lock(&device->submit_mutex);
+
+   void *submit = tu_submit_create(device);
+   if (!submit)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   for (uint32_t i = 0; i < vk_submit->buffer_bind_count; i++) {
+      const VkSparseBufferMemoryBindInfo *bind = &vk_submit->buffer_binds[i];
+      VK_FROM_HANDLE(tu_buffer, buffer, bind->buffer);
+
+      for (uint32_t j = 0; j < bind->bindCount; j++) {
+         const VkSparseMemoryBind *range = &bind->pBinds[j];
+         VK_FROM_HANDLE(tu_device_memory, mem, range->memory);
+
+         tu_submit_add_bind(queue->device, submit,
+                            &buffer->vma, range->resourceOffset,
+                            mem ? mem->bo : NULL,
+                            mem ? range->memoryOffset : 0,
+                            range->size);
+      }
+   }
+
+   for (uint32_t i = 0; i < vk_submit->image_bind_count; i++) {
+      const VkSparseImageMemoryBindInfo *bind = &vk_submit->image_binds[i];
+      VK_FROM_HANDLE(tu_image, image, bind->image);
+
+      for (uint32_t j = 0; j < bind->bindCount; j++)
+         tu_bind_sparse_image(device, submit, image, &bind->pBinds[j]);
+   }
+
+   for (uint32_t i = 0; i < vk_submit->image_opaque_bind_count; i++) {
+      const VkSparseImageOpaqueMemoryBindInfo *bind =
+         &vk_submit->image_opaque_binds[i];
+      VK_FROM_HANDLE(tu_image, image, bind->image);
+
+      for (uint32_t j = 0; j < bind->bindCount; j++) {
+         const VkSparseMemoryBind *range = &bind->pBinds[j];
+         VK_FROM_HANDLE(tu_device_memory, mem, range->memory);
+
+         tu_submit_add_bind(queue->device, submit,
+                            &image->vma, range->resourceOffset,
+                            mem ? mem->bo : NULL,
+                            mem ? range->memoryOffset : 0,
+                            range->size);
+      }
+   }
+
+   VkResult result =
+      tu_queue_submit(queue, submit, vk_submit->waits, vk_submit->wait_count,
+                      vk_submit->signals, vk_submit->signal_count,
+                      NULL);
+
+   if (result != VK_SUCCESS) {
+      pthread_mutex_unlock(&device->submit_mutex);
+      goto out;
+   }
+
+   device->submit_count++;
+
+   pthread_mutex_unlock(&device->submit_mutex);
+   pthread_cond_broadcast(&queue->device->timeline_cond);
+
+out:
+   tu_submit_finish(device, submit);
+
+   return result;
+}
+
 static VkResult
 queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
 {
@@ -94,6 +168,11 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
   bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context);
   struct util_dynarray dump_cmds;

+   if (vk_submit->buffer_bind_count ||
+       vk_submit->image_bind_count ||
+       vk_submit->image_opaque_bind_count)
+      return queue_submit_sparse(_queue, vk_submit);
+
   util_dynarray_init(&dump_cmds, NULL);

   uint32_t perf_pass_index =
@@ -260,79 +339,6 @@ fail_create_submit:
   return result;
 }

-static VkResult
-queue_submit_sparse(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
-{
-   struct tu_queue *queue = list_entry(_queue, struct tu_queue, vk);
-   struct tu_device *device = queue->device;
-
-   pthread_mutex_lock(&device->submit_mutex);
-
-   void *submit = tu_submit_create(device);
-   if (!submit)
-      return VK_ERROR_OUT_OF_HOST_MEMORY;
-
-   for (uint32_t i = 0; i < vk_submit->buffer_bind_count; i++) {
-      const VkSparseBufferMemoryBindInfo *bind = &vk_submit->buffer_binds[i];
-      VK_FROM_HANDLE(tu_buffer, buffer, bind->buffer);
-
-      for (uint32_t j = 0; j < bind->bindCount; j++) {
-         const VkSparseMemoryBind *range = &bind->pBinds[j];
-         VK_FROM_HANDLE(tu_device_memory, mem, range->memory);
-
-         tu_submit_add_bind(queue->device, submit,
-                            &buffer->vma, range->resourceOffset,
-                            mem ? mem->bo : NULL,
-                            mem ? range->memoryOffset : 0,
-                            range->size);
-      }
-   }
-
-   for (uint32_t i = 0; i < vk_submit->image_bind_count; i++) {
-      const VkSparseImageMemoryBindInfo *bind = &vk_submit->image_binds[i];
-      VK_FROM_HANDLE(tu_image, image, bind->image);
-
-      for (uint32_t j = 0; j < bind->bindCount; j++)
-         tu_bind_sparse_image(device, submit, image, &bind->pBinds[j]);
-   }
-
-   for (uint32_t i = 0; i < vk_submit->image_opaque_bind_count; i++) {
-      const VkSparseImageOpaqueMemoryBindInfo *bind =
-         &vk_submit->image_opaque_binds[i];
-      VK_FROM_HANDLE(tu_image, image, bind->image);
-
-      for (uint32_t j = 0; j < bind->bindCount; j++) {
-         const VkSparseMemoryBind *range = &bind->pBinds[j];
-         VK_FROM_HANDLE(tu_device_memory, mem, range->memory);
-
-         tu_submit_add_bind(queue->device, submit,
-                            &image->vma, range->resourceOffset,
-                            mem ? mem->bo : NULL,
-                            mem ? range->memoryOffset : 0,
-                            range->size);
-      }
-   }
-
-   VkResult result =
-      tu_queue_submit(queue, submit, vk_submit->waits, vk_submit->wait_count,
-                      vk_submit->signals, vk_submit->signal_count,
-                      NULL);
-
-   if (result != VK_SUCCESS) {
-      pthread_mutex_unlock(&device->submit_mutex);
-      goto out;
-   }
-
-   device->submit_count++;
-
-   pthread_mutex_unlock(&device->submit_mutex);
-   pthread_cond_broadcast(&queue->device->timeline_cond);
-
-out:
-   tu_submit_finish(device, submit);
-
-   return result;
-}
 VkResult
 tu_queue_init(struct tu_device *device,
              struct tu_queue *queue,