tu/knl: Move u_trace fence handling to generic code

Contrary to what the comment said, kgsl and drm actually have very
similar semantics for fences. Add a common function to wait on a fence,
and embed the queue + fence directly instead of a syncobj abstraction
that doesn't actually gain us much. This common fence wait function will
also be helpful for making more code generic.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32165>
This commit is contained in:
Connor Abbott
2024-12-13 11:22:55 -05:00
committed by Marge Bot
parent 0e7a91595f
commit f97998ff17
8 changed files with 47 additions and 77 deletions
+2 -2
View File
@@ -1901,7 +1901,8 @@ tu_trace_read_ts(struct u_trace_context *utctx,
/* Only need to stall on results for the first entry: */
if (offset_B == 0) {
tu_device_wait_u_trace(device, submission_data->syncobj);
tu_queue_wait_fence(submission_data->queue, submission_data->fence,
1000000000);
}
if (tu_bo_map(device, bo, NULL) != VK_SUCCESS) {
@@ -2132,7 +2133,6 @@ tu_u_trace_submission_data_finish(
}
vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
vk_free(&device->vk.alloc, submission_data->syncobj);
vk_free(&device->vk.alloc, submission_data);
}
+4 -2
View File
@@ -542,10 +542,12 @@ struct tu_u_trace_cmd_data
struct tu_u_trace_submission_data
{
uint32_t submission_id;
/* We have to know when timestamps are available,
* this sync object indicates it.
* this queue and fence indicates it.
*/
struct tu_u_trace_syncobj *syncobj;
struct tu_queue *queue;
uint32_t fence;
uint32_t cmd_buffer_count;
uint32_t last_buffer_with_tracepoints;
+4 -2
View File
@@ -255,9 +255,11 @@ tu_device_get_suspend_count(struct tu_device *dev,
}
VkResult
tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
tu_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
uint64_t timeout_ns)
{
return dev->instance->knl->device_wait_u_trace(dev, syncobj);
return queue->device->instance->knl->queue_wait_fence(queue, fence,
timeout_ns);
}
VkResult
+6 -2
View File
@@ -101,8 +101,6 @@ struct tu_knl {
void *metadata, uint32_t metadata_size);
int (*bo_get_metadata)(struct tu_device *dev, struct tu_bo *bo,
void *metadata, uint32_t metadata_size);
VkResult (*device_wait_u_trace)(struct tu_device *dev,
struct tu_u_trace_syncobj *syncobj);
void *(*submit_create)(struct tu_device *device);
void (*submit_finish)(struct tu_device *device, void *_submit);
void (*submit_add_entries)(struct tu_device *device, void *_submit,
@@ -112,6 +110,8 @@ struct tu_knl {
struct vk_sync_wait *waits, uint32_t wait_count,
struct vk_sync_signal *signals, uint32_t signal_count,
struct tu_u_trace_submission_data *u_trace_submission_data);
VkResult (*queue_wait_fence)(struct tu_queue *queue, uint32_t fence,
uint64_t timeout_ns);
const struct vk_device_entrypoint_table *device_entrypoints;
};
@@ -261,4 +261,8 @@ tu_queue_submit(struct tu_queue *queue, void *submit,
struct vk_sync_signal *signals, uint32_t signal_count,
struct tu_u_trace_submission_data *u_trace_submission_data);
VkResult
tu_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
uint64_t timeout_ns);
#endif /* TU_DRM_H */
+9 -19
View File
@@ -27,12 +27,6 @@
#include "tu_rmv.h"
#include "redump.h"
struct tu_u_trace_syncobj
{
uint32_t msm_queue_id;
uint32_t fence;
};
static int
tu_drm_get_param(int fd, uint32_t param, uint64_t *value)
{
@@ -327,6 +321,14 @@ tu_wait_fence(struct tu_device *dev,
return VK_SUCCESS;
}
VkResult
msm_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
uint64_t timeout_ns)
{
return tu_wait_fence(queue->device, queue->msm_queue_id, fence,
timeout_ns);
}
static VkResult
tu_free_zombie_vma_locked(struct tu_device *dev, bool wait)
{
@@ -947,12 +949,6 @@ msm_queue_submit(struct tu_queue *queue, void *_submit,
if (u_trace_submission_data) {
u_trace_submission_data->gpu_ts_offset = gpu_offset;
/* We have to allocate it here since it is different between drm/kgsl */
u_trace_submission_data->syncobj = (struct tu_u_trace_syncobj *)
vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
u_trace_submission_data->syncobj->fence = req.fence;
u_trace_submission_data->syncobj->msm_queue_id = queue->msm_queue_id;
}
for (uint32_t i = 0; i < wait_count; i++) {
@@ -992,12 +988,6 @@ fail_in_syncobjs:
return result;
}
static VkResult
msm_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
{
return tu_wait_fence(dev, syncobj->msm_queue_id, syncobj->fence, 1000000000);
}
static const struct tu_knl msm_knl_funcs = {
.name = "msm",
@@ -1016,11 +1006,11 @@ static const struct tu_knl msm_knl_funcs = {
.bo_finish = tu_drm_bo_finish,
.bo_set_metadata = msm_bo_set_metadata,
.bo_get_metadata = msm_bo_get_metadata,
.device_wait_u_trace = msm_device_wait_u_trace,
.submit_create = msm_submit_create,
.submit_finish = msm_submit_finish,
.submit_add_entries = msm_submit_add_entries,
.queue_submit = msm_queue_submit,
.queue_wait_fence = msm_queue_wait_fence,
};
VkResult
+9 -18
View File
@@ -45,11 +45,6 @@ struct tu_userspace_fence_cmds {
struct tu_userspace_fence_cmd cmds[64];
};
struct tu_u_trace_syncobj {
uint32_t msm_queue_id;
uint32_t fence;
};
struct tu_virtio_device {
struct vdrm_device *vdrm;
struct msm_shmem *shmem;
@@ -407,6 +402,14 @@ out:
return VK_ERROR_UNKNOWN;
}
VkResult
virtio_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
uint64_t timeout_ns)
{
return tu_wait_fence(queue->device, queue->msm_queue_id, fence,
timeout_ns);
}
static VkResult
tu_free_zombie_vma_locked(struct tu_device *dev, bool wait)
{
@@ -1025,12 +1028,6 @@ virtio_queue_submit(struct tu_queue *queue, void *_submit,
if (u_trace_submission_data) {
u_trace_submission_data->gpu_ts_offset = gpu_offset;
/* We have to allocate it here since it is different between drm/kgsl */
u_trace_submission_data->syncobj = (struct tu_u_trace_syncobj *)
vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
u_trace_submission_data->syncobj->fence = req->fence;
u_trace_submission_data->syncobj->msm_queue_id = queue->msm_queue_id;
}
for (uint32_t i = 0; i < wait_count; i++) {
@@ -1072,12 +1069,6 @@ fail_in_syncobjs:
return result;
}
static VkResult
virtio_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
{
return tu_wait_fence(dev, syncobj->msm_queue_id, syncobj->fence, 1000000000);
}
static const struct tu_knl virtio_knl_funcs = {
.name = "virtgpu",
@@ -1094,11 +1085,11 @@ static const struct tu_knl virtio_knl_funcs = {
.bo_map = virtio_bo_map,
.bo_allow_dump = virtio_bo_allow_dump,
.bo_finish = tu_drm_bo_finish,
.device_wait_u_trace = virtio_device_wait_u_trace,
.submit_create = msm_submit_create,
.submit_finish = msm_submit_finish,
.submit_add_entries = msm_submit_add_entries,
.queue_submit = virtio_queue_submit,
.queue_wait_fence = virtio_queue_wait_fence,
};
VkResult
+11 -32
View File
@@ -443,12 +443,6 @@ struct kgsl_syncobj
int fd;
};
struct tu_u_trace_syncobj
{
uint32_t msm_queue_id;
uint32_t timestamp;
};
static void
kgsl_syncobj_init(struct kgsl_syncobj *s, bool signaled)
{
@@ -579,6 +573,16 @@ wait_timestamp_safe(int fd,
}
}
VkResult
kgsl_queue_wait_fence(struct tu_queue *queue, uint32_t fence,
uint64_t timeout_ns)
{
uint64_t abs_timeout_ns = os_time_get_nano() + timeout_ns;
return wait_timestamp_safe(queue->device->fd, queue->msm_queue_id,
fence, abs_timeout_ns);
}
static VkResult
kgsl_syncobj_wait(struct tu_device *device,
struct kgsl_syncobj *s,
@@ -1280,12 +1284,6 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit,
struct tu_u_trace_submission_data *submission_data =
u_trace_submission_data;
submission_data->gpu_ts_offset = gpu_offset;
/* We have to allocate it here since it is different between drm/kgsl */
submission_data->syncobj = (struct tu_u_trace_syncobj *)
vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
submission_data->syncobj->timestamp = req.timestamp;
submission_data->syncobj->msm_queue_id = queue->msm_queue_id;
}
fail_submit:
@@ -1299,25 +1297,6 @@ fail_submit:
return result;
}
static VkResult
kgsl_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
{
struct kgsl_device_waittimestamp_ctxtid req = {
.context_id = syncobj->msm_queue_id,
.timestamp = syncobj->timestamp,
.timeout = 5000, // 5s
};
int ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &req);
if (ret) {
assert(errno == ETIME);
return VK_TIMEOUT;
}
return VK_SUCCESS;
}
static VkResult
kgsl_device_init(struct tu_device *dev)
{
@@ -1387,11 +1366,11 @@ static const struct tu_knl kgsl_knl_funcs = {
.bo_map = kgsl_bo_map,
.bo_allow_dump = kgsl_bo_allow_dump,
.bo_finish = kgsl_bo_finish,
.device_wait_u_trace = kgsl_device_wait_u_trace,
.submit_create = kgsl_submit_create,
.submit_finish = kgsl_submit_finish,
.submit_add_entries = kgsl_submit_add_entries,
.queue_submit = kgsl_queue_submit,
.queue_wait_fence = kgsl_queue_wait_fence,
};
VkResult
+2
View File
@@ -138,6 +138,8 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
if (u_trace_submission_data) {
u_trace_submission_data->submission_id = device->submit_count;
u_trace_submission_data->queue = queue;
u_trace_submission_data->fence = queue->fence;
for (uint32_t i = 0; i < u_trace_submission_data->cmd_buffer_count; i++) {
bool free_data = i == u_trace_submission_data->last_buffer_with_tracepoints;