tu: Support sparse binds on the gfx queue

Although this isn't technically required by the Vulkan spec, a few
native Vulkan games including DOOM Eternal expect it to be supported
because Windows drivers all support it. Now that support has been
plumbed through in the kernel backend, expose the ability to submit
sparse binds on the graphics queue that are implicitly synchronized with
graphics commands.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37304>
This commit is contained in:
Connor Abbott
2025-09-11 12:08:16 -04:00
committed by Marge Bot
parent 0cc0e786e0
commit 4043ea91b6
2 changed files with 91 additions and 74 deletions

View File

@@ -1493,6 +1493,15 @@ static const VkQueueFamilyProperties tu_gfx_queue_family_properties = {
.minImageTransferGranularity = { 1, 1, 1 },
};
static const VkQueueFamilyProperties tu_gfx_sparse_queue_family_properties = {
.queueFlags =
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |
VK_QUEUE_SPARSE_BINDING_BIT,
.queueCount = 1,
.timestampValidBits = 48,
.minImageTransferGranularity = { 1, 1, 1 },
};
static const VkQueueFamilyProperties tu_sparse_queue_family_properties = {
.queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
.queueCount = 1,
@@ -1671,7 +1680,9 @@ tu_physical_device_init(struct tu_physical_device *device,
device->queue_families[device->num_queue_families++] =
(struct tu_queue_family) {
.type = TU_QUEUE_GFX,
.properties = &tu_gfx_queue_family_properties,
.properties = device->has_sparse ?
&tu_gfx_sparse_queue_family_properties :
&tu_gfx_queue_family_properties,
};
if (device->has_sparse) {

View File

@@ -85,6 +85,80 @@ submit_add_entries(struct tu_device *dev, void *submit,
}
}
static VkResult
queue_submit_sparse(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
{
struct tu_queue *queue = list_entry(_queue, struct tu_queue, vk);
struct tu_device *device = queue->device;
pthread_mutex_lock(&device->submit_mutex);
void *submit = tu_submit_create(device);
if (!submit)
return VK_ERROR_OUT_OF_HOST_MEMORY;
for (uint32_t i = 0; i < vk_submit->buffer_bind_count; i++) {
const VkSparseBufferMemoryBindInfo *bind = &vk_submit->buffer_binds[i];
VK_FROM_HANDLE(tu_buffer, buffer, bind->buffer);
for (uint32_t j = 0; j < bind->bindCount; j++) {
const VkSparseMemoryBind *range = &bind->pBinds[j];
VK_FROM_HANDLE(tu_device_memory, mem, range->memory);
tu_submit_add_bind(queue->device, submit,
&buffer->vma, range->resourceOffset,
mem ? mem->bo : NULL,
mem ? range->memoryOffset : 0,
range->size);
}
}
for (uint32_t i = 0; i < vk_submit->image_bind_count; i++) {
const VkSparseImageMemoryBindInfo *bind = &vk_submit->image_binds[i];
VK_FROM_HANDLE(tu_image, image, bind->image);
for (uint32_t j = 0; j < bind->bindCount; j++)
tu_bind_sparse_image(device, submit, image, &bind->pBinds[j]);
}
for (uint32_t i = 0; i < vk_submit->image_opaque_bind_count; i++) {
const VkSparseImageOpaqueMemoryBindInfo *bind =
&vk_submit->image_opaque_binds[i];
VK_FROM_HANDLE(tu_image, image, bind->image);
for (uint32_t j = 0; j < bind->bindCount; j++) {
const VkSparseMemoryBind *range = &bind->pBinds[j];
VK_FROM_HANDLE(tu_device_memory, mem, range->memory);
tu_submit_add_bind(queue->device, submit,
&image->vma, range->resourceOffset,
mem ? mem->bo : NULL,
mem ? range->memoryOffset : 0,
range->size);
}
}
VkResult result =
tu_queue_submit(queue, submit, vk_submit->waits, vk_submit->wait_count,
vk_submit->signals, vk_submit->signal_count,
NULL);
if (result != VK_SUCCESS) {
pthread_mutex_unlock(&device->submit_mutex);
goto out;
}
device->submit_count++;
pthread_mutex_unlock(&device->submit_mutex);
pthread_cond_broadcast(&queue->device->timeline_cond);
out:
tu_submit_finish(device, submit);
return result;
}
static VkResult
queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
{
@@ -94,6 +168,11 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context);
struct util_dynarray dump_cmds;
if (vk_submit->buffer_bind_count ||
vk_submit->image_bind_count ||
vk_submit->image_opaque_bind_count)
return queue_submit_sparse(_queue, vk_submit);
util_dynarray_init(&dump_cmds, NULL);
uint32_t perf_pass_index =
@@ -260,79 +339,6 @@ fail_create_submit:
return result;
}
static VkResult
queue_submit_sparse(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
{
struct tu_queue *queue = list_entry(_queue, struct tu_queue, vk);
struct tu_device *device = queue->device;
pthread_mutex_lock(&device->submit_mutex);
void *submit = tu_submit_create(device);
if (!submit)
return VK_ERROR_OUT_OF_HOST_MEMORY;
for (uint32_t i = 0; i < vk_submit->buffer_bind_count; i++) {
const VkSparseBufferMemoryBindInfo *bind = &vk_submit->buffer_binds[i];
VK_FROM_HANDLE(tu_buffer, buffer, bind->buffer);
for (uint32_t j = 0; j < bind->bindCount; j++) {
const VkSparseMemoryBind *range = &bind->pBinds[j];
VK_FROM_HANDLE(tu_device_memory, mem, range->memory);
tu_submit_add_bind(queue->device, submit,
&buffer->vma, range->resourceOffset,
mem ? mem->bo : NULL,
mem ? range->memoryOffset : 0,
range->size);
}
}
for (uint32_t i = 0; i < vk_submit->image_bind_count; i++) {
const VkSparseImageMemoryBindInfo *bind = &vk_submit->image_binds[i];
VK_FROM_HANDLE(tu_image, image, bind->image);
for (uint32_t j = 0; j < bind->bindCount; j++)
tu_bind_sparse_image(device, submit, image, &bind->pBinds[j]);
}
for (uint32_t i = 0; i < vk_submit->image_opaque_bind_count; i++) {
const VkSparseImageOpaqueMemoryBindInfo *bind =
&vk_submit->image_opaque_binds[i];
VK_FROM_HANDLE(tu_image, image, bind->image);
for (uint32_t j = 0; j < bind->bindCount; j++) {
const VkSparseMemoryBind *range = &bind->pBinds[j];
VK_FROM_HANDLE(tu_device_memory, mem, range->memory);
tu_submit_add_bind(queue->device, submit,
&image->vma, range->resourceOffset,
mem ? mem->bo : NULL,
mem ? range->memoryOffset : 0,
range->size);
}
}
VkResult result =
tu_queue_submit(queue, submit, vk_submit->waits, vk_submit->wait_count,
vk_submit->signals, vk_submit->signal_count,
NULL);
if (result != VK_SUCCESS) {
pthread_mutex_unlock(&device->submit_mutex);
goto out;
}
device->submit_count++;
pthread_mutex_unlock(&device->submit_mutex);
pthread_cond_broadcast(&queue->device->timeline_cond);
out:
tu_submit_finish(device, submit);
return result;
}
VkResult
tu_queue_init(struct tu_device *device,
struct tu_queue *queue,