venus: make device memory alloc async

Add a new perf option NO_ASYNC_MEM_ALLOC. Track the ring seqno of the
memory alloc command, and do async ring wait to ensure:
- memory allocation is before resource creation
- memory import is before resource destroy

Signed-off-by: Yiwei Zhang <zzyiwei@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25611>
This commit is contained in:
Yiwei Zhang
2023-10-09 00:28:17 -07:00
committed by Marge Bot
parent 95d90cdf3d
commit 72cb85b778
4 changed files with 78 additions and 13 deletions
+1
View File
@@ -48,6 +48,7 @@ static const struct debug_control vn_perf_options[] = {
{ "no_cmd_batching", VN_PERF_NO_CMD_BATCHING },
{ "no_timeline_sem_feedback", VN_PERF_NO_TIMELINE_SEM_FEEDBACK },
{ "no_query_feedback", VN_PERF_NO_QUERY_FEEDBACK },
{ "no_async_mem_alloc", VN_PERF_NO_ASYNC_MEM_ALLOC },
{ NULL, 0 },
/* clang-format on */
};
+1
View File
@@ -118,6 +118,7 @@ enum vn_perf {
VN_PERF_NO_CMD_BATCHING = 1ull << 6,
VN_PERF_NO_TIMELINE_SEM_FEEDBACK = 1ull << 7,
VN_PERF_NO_QUERY_FEEDBACK = 1ull << 8,
VN_PERF_NO_ASYNC_MEM_ALLOC = 1ull << 9,
};
typedef uint64_t vn_object_id;
+61 -13
View File
@@ -28,8 +28,20 @@ vn_device_memory_alloc_simple(struct vn_device *dev,
{
VkDevice dev_handle = vn_device_to_handle(dev);
VkDeviceMemory mem_handle = vn_device_memory_to_handle(mem);
return vn_call_vkAllocateMemory(dev->instance, dev_handle, alloc_info,
NULL, &mem_handle);
if (VN_PERF(NO_ASYNC_MEM_ALLOC)) {
return vn_call_vkAllocateMemory(dev->instance, dev_handle, alloc_info,
NULL, &mem_handle);
}
struct vn_instance_submit_command instance_submit;
vn_submit_vkAllocateMemory(dev->instance, 0, dev_handle, alloc_info, NULL,
&mem_handle, &instance_submit);
if (!instance_submit.ring_seqno_valid)
return VK_ERROR_OUT_OF_HOST_MEMORY;
mem->bo_ring_seqno_valid = true;
mem->bo_ring_seqno = instance_submit.ring_seqno;
return VK_SUCCESS;
}
static inline void
@@ -41,6 +53,48 @@ vn_device_memory_free_simple(struct vn_device *dev,
vn_async_vkFreeMemory(dev->instance, dev_handle, mem_handle, NULL);
}
static VkResult
vn_device_memory_wait_alloc(struct vn_device *dev,
struct vn_device_memory *mem)
{
if (!mem->bo_ring_seqno_valid)
return VK_SUCCESS;
/* fine to false it here since renderer submission failure is fatal */
mem->bo_ring_seqno_valid = false;
uint32_t local_data[8];
struct vn_cs_encoder local_enc =
VN_CS_ENCODER_INITIALIZER_LOCAL(local_data, sizeof(local_data));
vn_encode_vkWaitRingSeqnoMESA(&local_enc, 0, dev->instance->ring.id,
mem->bo_ring_seqno);
return vn_renderer_submit_simple(dev->renderer, local_data,
vn_cs_encoder_get_len(&local_enc));
}
static inline VkResult
vn_device_memory_bo_init(struct vn_device *dev,
struct vn_device_memory *mem,
VkExternalMemoryHandleTypeFlags external_handles)
{
VkResult result = vn_device_memory_wait_alloc(dev, mem);
if (result != VK_SUCCESS)
return result;
return vn_renderer_bo_create_from_device_memory(
dev->renderer, mem->size, mem->base.id, mem->type.propertyFlags,
external_handles, &mem->base_bo);
}
static inline void
vn_device_memory_bo_fini(struct vn_device *dev, struct vn_device_memory *mem)
{
if (mem->base_bo) {
vn_device_memory_wait_alloc(dev, mem);
vn_renderer_bo_unref(dev->renderer, mem->base_bo);
}
}
static VkResult
vn_device_memory_pool_grow_alloc(struct vn_device *dev,
uint32_t mem_type_index,
@@ -68,9 +122,7 @@ vn_device_memory_pool_grow_alloc(struct vn_device *dev,
if (result != VK_SUCCESS)
goto obj_fini;
result = vn_renderer_bo_create_from_device_memory(
dev->renderer, mem->size, mem->base.id, mem->type.propertyFlags, 0,
&mem->base_bo);
result = vn_device_memory_bo_init(dev, mem, 0);
if (result != VK_SUCCESS)
goto mem_free;
@@ -342,9 +394,7 @@ vn_device_memory_alloc_export(struct vn_device *dev,
if (result != VK_SUCCESS)
return result;
result = vn_renderer_bo_create_from_device_memory(
dev->renderer, mem->size, mem->base.id, mem->type.propertyFlags,
external_handles, &mem->base_bo);
result = vn_device_memory_bo_init(dev, mem, external_handles);
if (result != VK_SUCCESS) {
vn_device_memory_free_simple(dev, mem);
return result;
@@ -587,8 +637,8 @@ vn_FreeMemory(VkDevice device,
if (mem->base_memory) {
vn_device_memory_pool_unref(dev, mem->base_memory);
} else {
if (mem->base_bo)
vn_renderer_bo_unref(dev->renderer, mem->base_bo);
/* ensure renderer side import still sees the resource */
vn_device_memory_bo_fini(dev, mem);
if (mem->bo_roundtrip_seqno_valid)
vn_instance_wait_roundtrip(dev->instance, mem->bo_roundtrip_seqno);
@@ -646,9 +696,7 @@ vn_MapMemory(VkDevice device,
* the extension.
*/
if (need_bo) {
result = vn_renderer_bo_create_from_device_memory(
dev->renderer, mem->size, mem->base.id, mem->type.propertyFlags, 0,
&mem->base_bo);
result = vn_device_memory_bo_init(dev, mem, 0);
if (result != VK_SUCCESS)
return vn_error(dev->instance, result);
}
+15
View File
@@ -32,6 +32,21 @@ struct vn_device_memory {
/* non-NULL when mappable or external */
struct vn_renderer_bo *base_bo;
/* ensure renderer side resource create is called after vkAllocateMemory
*
* 1. driver submits vkAllocateMemory (alloc) via ring for a ring seqno
* 2. driver submits via vq to wait for above ring to reach the seqno
* 3. driver creates virtgpu bo from renderer VkDeviceMemory
*
* ensure renderer side resource destroy is called after vkAllocateMemory
*
* 1. driver submits vkAllocateMemory (import) via ring for a ring seqno
* 2. driver submits via vq to wait for above ring to reach the seqno
* 3. driver destroys virtgpu bo
*/
bool bo_ring_seqno_valid;
uint32_t bo_ring_seqno;
/* ensure renderer side vkFreeMemory is called after vkGetMemoryFdKHR
*
* 1. driver creates virtgpu bo from renderer VkDeviceMemory