venus: make device memory alloc async
Add a new perf option NO_ASYNC_MEM_ALLOC. Track the ring seqno of the memory alloc command, and do async ring wait to ensure: - memory allocation is before resource creation - memory import is before resource destroy Signed-off-by: Yiwei Zhang <zzyiwei@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25611>
This commit is contained in:
@@ -48,6 +48,7 @@ static const struct debug_control vn_perf_options[] = {
|
||||
{ "no_cmd_batching", VN_PERF_NO_CMD_BATCHING },
|
||||
{ "no_timeline_sem_feedback", VN_PERF_NO_TIMELINE_SEM_FEEDBACK },
|
||||
{ "no_query_feedback", VN_PERF_NO_QUERY_FEEDBACK },
|
||||
{ "no_async_mem_alloc", VN_PERF_NO_ASYNC_MEM_ALLOC },
|
||||
{ NULL, 0 },
|
||||
/* clang-format on */
|
||||
};
|
||||
|
||||
@@ -118,6 +118,7 @@ enum vn_perf {
|
||||
VN_PERF_NO_CMD_BATCHING = 1ull << 6,
|
||||
VN_PERF_NO_TIMELINE_SEM_FEEDBACK = 1ull << 7,
|
||||
VN_PERF_NO_QUERY_FEEDBACK = 1ull << 8,
|
||||
VN_PERF_NO_ASYNC_MEM_ALLOC = 1ull << 9,
|
||||
};
|
||||
|
||||
typedef uint64_t vn_object_id;
|
||||
|
||||
@@ -28,8 +28,20 @@ vn_device_memory_alloc_simple(struct vn_device *dev,
|
||||
{
|
||||
VkDevice dev_handle = vn_device_to_handle(dev);
|
||||
VkDeviceMemory mem_handle = vn_device_memory_to_handle(mem);
|
||||
return vn_call_vkAllocateMemory(dev->instance, dev_handle, alloc_info,
|
||||
NULL, &mem_handle);
|
||||
if (VN_PERF(NO_ASYNC_MEM_ALLOC)) {
|
||||
return vn_call_vkAllocateMemory(dev->instance, dev_handle, alloc_info,
|
||||
NULL, &mem_handle);
|
||||
}
|
||||
|
||||
struct vn_instance_submit_command instance_submit;
|
||||
vn_submit_vkAllocateMemory(dev->instance, 0, dev_handle, alloc_info, NULL,
|
||||
&mem_handle, &instance_submit);
|
||||
if (!instance_submit.ring_seqno_valid)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
mem->bo_ring_seqno_valid = true;
|
||||
mem->bo_ring_seqno = instance_submit.ring_seqno;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static inline void
|
||||
@@ -41,6 +53,48 @@ vn_device_memory_free_simple(struct vn_device *dev,
|
||||
vn_async_vkFreeMemory(dev->instance, dev_handle, mem_handle, NULL);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vn_device_memory_wait_alloc(struct vn_device *dev,
|
||||
struct vn_device_memory *mem)
|
||||
{
|
||||
if (!mem->bo_ring_seqno_valid)
|
||||
return VK_SUCCESS;
|
||||
|
||||
/* fine to false it here since renderer submission failure is fatal */
|
||||
mem->bo_ring_seqno_valid = false;
|
||||
|
||||
uint32_t local_data[8];
|
||||
struct vn_cs_encoder local_enc =
|
||||
VN_CS_ENCODER_INITIALIZER_LOCAL(local_data, sizeof(local_data));
|
||||
vn_encode_vkWaitRingSeqnoMESA(&local_enc, 0, dev->instance->ring.id,
|
||||
mem->bo_ring_seqno);
|
||||
return vn_renderer_submit_simple(dev->renderer, local_data,
|
||||
vn_cs_encoder_get_len(&local_enc));
|
||||
}
|
||||
|
||||
static inline VkResult
|
||||
vn_device_memory_bo_init(struct vn_device *dev,
|
||||
struct vn_device_memory *mem,
|
||||
VkExternalMemoryHandleTypeFlags external_handles)
|
||||
{
|
||||
VkResult result = vn_device_memory_wait_alloc(dev, mem);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
return vn_renderer_bo_create_from_device_memory(
|
||||
dev->renderer, mem->size, mem->base.id, mem->type.propertyFlags,
|
||||
external_handles, &mem->base_bo);
|
||||
}
|
||||
|
||||
static inline void
|
||||
vn_device_memory_bo_fini(struct vn_device *dev, struct vn_device_memory *mem)
|
||||
{
|
||||
if (mem->base_bo) {
|
||||
vn_device_memory_wait_alloc(dev, mem);
|
||||
vn_renderer_bo_unref(dev->renderer, mem->base_bo);
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vn_device_memory_pool_grow_alloc(struct vn_device *dev,
|
||||
uint32_t mem_type_index,
|
||||
@@ -68,9 +122,7 @@ vn_device_memory_pool_grow_alloc(struct vn_device *dev,
|
||||
if (result != VK_SUCCESS)
|
||||
goto obj_fini;
|
||||
|
||||
result = vn_renderer_bo_create_from_device_memory(
|
||||
dev->renderer, mem->size, mem->base.id, mem->type.propertyFlags, 0,
|
||||
&mem->base_bo);
|
||||
result = vn_device_memory_bo_init(dev, mem, 0);
|
||||
if (result != VK_SUCCESS)
|
||||
goto mem_free;
|
||||
|
||||
@@ -342,9 +394,7 @@ vn_device_memory_alloc_export(struct vn_device *dev,
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = vn_renderer_bo_create_from_device_memory(
|
||||
dev->renderer, mem->size, mem->base.id, mem->type.propertyFlags,
|
||||
external_handles, &mem->base_bo);
|
||||
result = vn_device_memory_bo_init(dev, mem, external_handles);
|
||||
if (result != VK_SUCCESS) {
|
||||
vn_device_memory_free_simple(dev, mem);
|
||||
return result;
|
||||
@@ -587,8 +637,8 @@ vn_FreeMemory(VkDevice device,
|
||||
if (mem->base_memory) {
|
||||
vn_device_memory_pool_unref(dev, mem->base_memory);
|
||||
} else {
|
||||
if (mem->base_bo)
|
||||
vn_renderer_bo_unref(dev->renderer, mem->base_bo);
|
||||
/* ensure renderer side import still sees the resource */
|
||||
vn_device_memory_bo_fini(dev, mem);
|
||||
|
||||
if (mem->bo_roundtrip_seqno_valid)
|
||||
vn_instance_wait_roundtrip(dev->instance, mem->bo_roundtrip_seqno);
|
||||
@@ -646,9 +696,7 @@ vn_MapMemory(VkDevice device,
|
||||
* the extension.
|
||||
*/
|
||||
if (need_bo) {
|
||||
result = vn_renderer_bo_create_from_device_memory(
|
||||
dev->renderer, mem->size, mem->base.id, mem->type.propertyFlags, 0,
|
||||
&mem->base_bo);
|
||||
result = vn_device_memory_bo_init(dev, mem, 0);
|
||||
if (result != VK_SUCCESS)
|
||||
return vn_error(dev->instance, result);
|
||||
}
|
||||
|
||||
@@ -32,6 +32,21 @@ struct vn_device_memory {
|
||||
/* non-NULL when mappable or external */
|
||||
struct vn_renderer_bo *base_bo;
|
||||
|
||||
/* ensure renderer side resource create is called after vkAllocateMemory
|
||||
*
|
||||
* 1. driver submits vkAllocateMemory (alloc) via ring for a ring seqno
|
||||
* 2. driver submits via vq to wait for above ring to reach the seqno
|
||||
* 3. driver creates virtgpu bo from renderer VkDeviceMemory
|
||||
*
|
||||
* ensure renderer side resource destroy is called after vkAllocateMemory
|
||||
*
|
||||
* 1. driver submits vkAllocateMemory (import) via ring for a ring seqno
|
||||
* 2. driver submits via vq to wait for above ring to reach the seqno
|
||||
* 3. driver destroys virtgpu bo
|
||||
*/
|
||||
bool bo_ring_seqno_valid;
|
||||
uint32_t bo_ring_seqno;
|
||||
|
||||
/* ensure renderer side vkFreeMemory is called after vkGetMemoryFdKHR
|
||||
*
|
||||
* 1. driver creates virtgpu bo from renderer VkDeviceMemory
|
||||
|
||||
Reference in New Issue
Block a user