From f97998ff17c1d2b0acb45e5e35b18c7446ae4844 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Fri, 13 Dec 2024 11:22:55 -0500 Subject: [PATCH] tu/knl: Move u_trace fence handling to generic code Contrary to what the comment said, kgsl and drm actually have very similar semantics for fences. Add a common function to wait on a fence, and embed the queue + fence directly instead of a syncobj abstraction that doesn't actually gain us much. This common fence wait function will also be helpful for making more code generic. Part-of: --- src/freedreno/vulkan/tu_device.cc | 4 +-- src/freedreno/vulkan/tu_device.h | 6 ++-- src/freedreno/vulkan/tu_knl.cc | 6 ++-- src/freedreno/vulkan/tu_knl.h | 8 +++-- src/freedreno/vulkan/tu_knl_drm_msm.cc | 28 +++++---------- src/freedreno/vulkan/tu_knl_drm_virtio.cc | 27 +++++--------- src/freedreno/vulkan/tu_knl_kgsl.cc | 43 ++++++----------------- src/freedreno/vulkan/tu_queue.cc | 2 ++ 8 files changed, 47 insertions(+), 77 deletions(-) diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 1380ddb5454..949b14a04d1 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -1901,7 +1901,8 @@ tu_trace_read_ts(struct u_trace_context *utctx, /* Only need to stall on results for the first entry: */ if (offset_B == 0) { - tu_device_wait_u_trace(device, submission_data->syncobj); + tu_queue_wait_fence(submission_data->queue, submission_data->fence, + 1000000000); } if (tu_bo_map(device, bo, NULL) != VK_SUCCESS) { @@ -2132,7 +2133,6 @@ tu_u_trace_submission_data_finish( } vk_free(&device->vk.alloc, submission_data->cmd_trace_data); - vk_free(&device->vk.alloc, submission_data->syncobj); vk_free(&device->vk.alloc, submission_data); } diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 6fd33132bfc..3addbffeed6 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -542,10 +542,12 @@ struct tu_u_trace_cmd_data struct tu_u_trace_submission_data { uint32_t submission_id; + /* We have to know when timestamps are available, - * this sync object indicates it. + * this queue and fence indicates it. */ - struct tu_u_trace_syncobj *syncobj; + struct tu_queue *queue; + uint32_t fence; uint32_t cmd_buffer_count; uint32_t last_buffer_with_tracepoints; diff --git a/src/freedreno/vulkan/tu_knl.cc b/src/freedreno/vulkan/tu_knl.cc index 46a17c4e880..bc797b32cbf 100644 --- a/src/freedreno/vulkan/tu_knl.cc +++ b/src/freedreno/vulkan/tu_knl.cc @@ -255,9 +255,11 @@ tu_device_get_suspend_count(struct tu_device *dev, } VkResult -tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj) +tu_queue_wait_fence(struct tu_queue *queue, uint32_t fence, + uint64_t timeout_ns) { - return dev->instance->knl->device_wait_u_trace(dev, syncobj); + return queue->device->instance->knl->queue_wait_fence(queue, fence, + timeout_ns); } VkResult diff --git a/src/freedreno/vulkan/tu_knl.h b/src/freedreno/vulkan/tu_knl.h index a0c42a9d0c0..d13101e5ca3 100644 --- a/src/freedreno/vulkan/tu_knl.h +++ b/src/freedreno/vulkan/tu_knl.h @@ -101,8 +101,6 @@ struct tu_knl { void *metadata, uint32_t metadata_size); int (*bo_get_metadata)(struct tu_device *dev, struct tu_bo *bo, void *metadata, uint32_t metadata_size); - VkResult (*device_wait_u_trace)(struct tu_device *dev, - struct tu_u_trace_syncobj *syncobj); void *(*submit_create)(struct tu_device *device); void (*submit_finish)(struct tu_device *device, void *_submit); void (*submit_add_entries)(struct tu_device *device, void *_submit, @@ -112,6 +110,8 @@ struct tu_knl { struct vk_sync_wait *waits, uint32_t wait_count, struct vk_sync_signal *signals, uint32_t signal_count, struct tu_u_trace_submission_data *u_trace_submission_data); + VkResult (*queue_wait_fence)(struct tu_queue *queue, uint32_t fence, + uint64_t timeout_ns); const struct vk_device_entrypoint_table *device_entrypoints; }; @@ -261,4 +261,8 @@ tu_queue_submit(struct tu_queue *queue, void *submit, struct vk_sync_signal *signals, uint32_t signal_count, struct tu_u_trace_submission_data *u_trace_submission_data); +VkResult +tu_queue_wait_fence(struct tu_queue *queue, uint32_t fence, + uint64_t timeout_ns); + #endif /* TU_DRM_H */ diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc index 28d6089b734..9fa6ae0f99c 100644 --- a/src/freedreno/vulkan/tu_knl_drm_msm.cc +++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc @@ -27,12 +27,6 @@ #include "tu_rmv.h" #include "redump.h" -struct tu_u_trace_syncobj -{ - uint32_t msm_queue_id; - uint32_t fence; -}; - static int tu_drm_get_param(int fd, uint32_t param, uint64_t *value) { @@ -327,6 +321,14 @@ tu_wait_fence(struct tu_device *dev, return VK_SUCCESS; } +VkResult +msm_queue_wait_fence(struct tu_queue *queue, uint32_t fence, + uint64_t timeout_ns) +{ + return tu_wait_fence(queue->device, queue->msm_queue_id, fence, + timeout_ns); +} + static VkResult tu_free_zombie_vma_locked(struct tu_device *dev, bool wait) { @@ -947,12 +949,6 @@ msm_queue_submit(struct tu_queue *queue, void *_submit, if (u_trace_submission_data) { u_trace_submission_data->gpu_ts_offset = gpu_offset; - /* We have to allocate it here since it is different between drm/kgsl */ - u_trace_submission_data->syncobj = (struct tu_u_trace_syncobj *) - vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj), - 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - u_trace_submission_data->syncobj->fence = req.fence; - u_trace_submission_data->syncobj->msm_queue_id = queue->msm_queue_id; } for (uint32_t i = 0; i < wait_count; i++) { @@ -992,12 +988,6 @@ fail_in_syncobjs: return result; } -static VkResult -msm_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj) -{ - return tu_wait_fence(dev, syncobj->msm_queue_id, syncobj->fence, 1000000000); -} - static const struct tu_knl msm_knl_funcs = { .name = "msm", @@ -1016,11 +1006,11 @@ static const struct tu_knl msm_knl_funcs = { .bo_finish = tu_drm_bo_finish, .bo_set_metadata = msm_bo_set_metadata, .bo_get_metadata = msm_bo_get_metadata, - .device_wait_u_trace = msm_device_wait_u_trace, .submit_create = msm_submit_create, .submit_finish = msm_submit_finish, .submit_add_entries = msm_submit_add_entries, .queue_submit = msm_queue_submit, + .queue_wait_fence = msm_queue_wait_fence, }; VkResult diff --git a/src/freedreno/vulkan/tu_knl_drm_virtio.cc b/src/freedreno/vulkan/tu_knl_drm_virtio.cc index dad47938749..606d74edcfd 100644 --- a/src/freedreno/vulkan/tu_knl_drm_virtio.cc +++ b/src/freedreno/vulkan/tu_knl_drm_virtio.cc @@ -45,11 +45,6 @@ struct tu_userspace_fence_cmds { struct tu_userspace_fence_cmd cmds[64]; }; -struct tu_u_trace_syncobj { - uint32_t msm_queue_id; - uint32_t fence; -}; - struct tu_virtio_device { struct vdrm_device *vdrm; struct msm_shmem *shmem; @@ -407,6 +402,14 @@ out: return VK_ERROR_UNKNOWN; } +VkResult +virtio_queue_wait_fence(struct tu_queue *queue, uint32_t fence, + uint64_t timeout_ns) +{ + return tu_wait_fence(queue->device, queue->msm_queue_id, fence, + timeout_ns); +} + static VkResult tu_free_zombie_vma_locked(struct tu_device *dev, bool wait) { @@ -1025,12 +1028,6 @@ virtio_queue_submit(struct tu_queue *queue, void *_submit, if (u_trace_submission_data) { u_trace_submission_data->gpu_ts_offset = gpu_offset; - /* We have to allocate it here since it is different between drm/kgsl */ - u_trace_submission_data->syncobj = (struct tu_u_trace_syncobj *) - vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj), - 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - u_trace_submission_data->syncobj->fence = req->fence; - u_trace_submission_data->syncobj->msm_queue_id = queue->msm_queue_id; } for (uint32_t i = 0; i < wait_count; i++) { @@ -1072,12 +1069,6 @@ fail_in_syncobjs: return result; } -static VkResult -virtio_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj) -{ - return tu_wait_fence(dev, syncobj->msm_queue_id, syncobj->fence, 1000000000); -} - static const struct tu_knl virtio_knl_funcs = { .name = "virtgpu", @@ -1094,11 +1085,11 @@ static const struct tu_knl virtio_knl_funcs = { .bo_map = virtio_bo_map, .bo_allow_dump = virtio_bo_allow_dump, .bo_finish = tu_drm_bo_finish, - .device_wait_u_trace = virtio_device_wait_u_trace, .submit_create = msm_submit_create, .submit_finish = msm_submit_finish, .submit_add_entries = msm_submit_add_entries, .queue_submit = virtio_queue_submit, + .queue_wait_fence = virtio_queue_wait_fence, }; VkResult diff --git a/src/freedreno/vulkan/tu_knl_kgsl.cc b/src/freedreno/vulkan/tu_knl_kgsl.cc index d878be86180..26284929e15 100644 --- a/src/freedreno/vulkan/tu_knl_kgsl.cc +++ b/src/freedreno/vulkan/tu_knl_kgsl.cc @@ -443,12 +443,6 @@ struct kgsl_syncobj int fd; }; -struct tu_u_trace_syncobj -{ - uint32_t msm_queue_id; - uint32_t timestamp; -}; - static void kgsl_syncobj_init(struct kgsl_syncobj *s, bool signaled) { @@ -579,6 +573,16 @@ wait_timestamp_safe(int fd, } } +VkResult +kgsl_queue_wait_fence(struct tu_queue *queue, uint32_t fence, + uint64_t timeout_ns) +{ + uint64_t abs_timeout_ns = os_time_get_nano() + timeout_ns; + + return wait_timestamp_safe(queue->device->fd, queue->msm_queue_id, + fence, abs_timeout_ns); +} + static VkResult kgsl_syncobj_wait(struct tu_device *device, struct kgsl_syncobj *s, @@ -1280,12 +1284,6 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit, struct tu_u_trace_submission_data *submission_data = u_trace_submission_data; submission_data->gpu_ts_offset = gpu_offset; - /* We have to allocate it here since it is different between drm/kgsl */ - submission_data->syncobj = (struct tu_u_trace_syncobj *) - vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj), - 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - submission_data->syncobj->timestamp = req.timestamp; - submission_data->syncobj->msm_queue_id = queue->msm_queue_id; } fail_submit: @@ -1299,25 +1297,6 @@ fail_submit: return result; } -static VkResult -kgsl_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj) -{ - struct kgsl_device_waittimestamp_ctxtid req = { - .context_id = syncobj->msm_queue_id, - .timestamp = syncobj->timestamp, - .timeout = 5000, // 5s - }; - - int ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &req); - - if (ret) { - assert(errno == ETIME); - return VK_TIMEOUT; - } - - return VK_SUCCESS; -} - static VkResult kgsl_device_init(struct tu_device *dev) { @@ -1387,11 +1366,11 @@ static const struct tu_knl kgsl_knl_funcs = { .bo_map = kgsl_bo_map, .bo_allow_dump = kgsl_bo_allow_dump, .bo_finish = kgsl_bo_finish, - .device_wait_u_trace = kgsl_device_wait_u_trace, .submit_create = kgsl_submit_create, .submit_finish = kgsl_submit_finish, .submit_add_entries = kgsl_submit_add_entries, .queue_submit = kgsl_queue_submit, + .queue_wait_fence = kgsl_queue_wait_fence, }; VkResult diff --git a/src/freedreno/vulkan/tu_queue.cc b/src/freedreno/vulkan/tu_queue.cc index 509a79eef1d..dcefcb94ce7 100644 --- a/src/freedreno/vulkan/tu_queue.cc +++ b/src/freedreno/vulkan/tu_queue.cc @@ -138,6 +138,8 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit) if (u_trace_submission_data) { u_trace_submission_data->submission_id = device->submit_count; + u_trace_submission_data->queue = queue; + u_trace_submission_data->fence = queue->fence; for (uint32_t i = 0; i < u_trace_submission_data->cmd_buffer_count; i++) { bool free_data = i == u_trace_submission_data->last_buffer_with_tracepoints;