diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 4ecd3075b5e..0a8e7d46f8d 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1705,28 +1705,24 @@ anv_queue_submit_trtt_batch(struct anv_sparse_submission *submit, struct anv_device *device = queue->device; VkResult result = VK_SUCCESS; - struct anv_trtt_batch_bo trtt_bbo; - trtt_bbo.size = align(batch->next - batch->start, 8); - - result = anv_bo_pool_alloc(&device->batch_bo_pool, trtt_bbo.size, - &trtt_bbo.bo); + uint32_t batch_size = align(batch->next - batch->start, 8); + struct anv_trtt_batch_bo *trtt_bbo; + result = anv_trtt_batch_bo_new(device, batch_size, &trtt_bbo); if (result != VK_SUCCESS) return result; - memcpy(trtt_bbo.bo->map, batch->start, trtt_bbo.size); + memcpy(trtt_bbo->bo->map, batch->start, trtt_bbo->size); #ifdef SUPPORT_INTEL_INTEGRATED_GPUS if (device->physical->memory.need_flush) - intel_flush_range(trtt_bbo.bo->map, trtt_bbo.size); + intel_flush_range(trtt_bbo->bo->map, trtt_bbo->size); #endif if (INTEL_DEBUG(DEBUG_BATCH)) { - intel_print_batch(queue->decoder, trtt_bbo.bo->map, trtt_bbo.bo->size, - trtt_bbo.bo->offset, false); + intel_print_batch(queue->decoder, trtt_bbo->bo->map, trtt_bbo->bo->size, + trtt_bbo->bo->offset, false); } - result = device->kmd_backend->execute_trtt_batch(submit, &trtt_bbo); - - anv_bo_pool_free(&device->batch_bo_pool, trtt_bbo.bo); + result = device->kmd_backend->execute_trtt_batch(submit, trtt_bbo); return result; } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 48b243d9c00..404d137e523 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1452,6 +1452,7 @@ anv_physical_device_try_create(struct vk_instance *vk_instance, } else { device->has_sparse = device->info.ver >= 12 && + device->has_exec_timeline && debug_get_bool_option("ANV_SPARSE", false); device->sparse_uses_trtt = true; } @@ -3105,6 +3106,8 @@ anv_device_init_trtt(struct anv_device *device) if (pthread_mutex_init(&trtt->mutex, NULL) != 0) return vk_error(device, VK_ERROR_INITIALIZATION_FAILED); + list_inithead(&trtt->in_flight_batches); + return VK_SUCCESS; } @@ -3113,6 +3116,32 @@ anv_device_finish_trtt(struct anv_device *device) { struct anv_trtt *trtt = &device->trtt; + if (trtt->timeline_val > 0) { + struct drm_syncobj_timeline_wait wait = { + .handles = (uintptr_t)&trtt->timeline_handle, + .points = (uintptr_t)&trtt->timeline_val, + .timeout_nsec = INT64_MAX, + .count_handles = 1, + .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, + .first_signaled = false, + }; + if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, &wait)) + fprintf(stderr, "TR-TT syncobj wait failed!\n"); + + list_for_each_entry_safe(struct anv_trtt_batch_bo, trtt_bbo, + &trtt->in_flight_batches, link) + anv_trtt_batch_bo_free(device, trtt_bbo); + + } + + if (trtt->timeline_handle > 0) { + struct drm_syncobj_destroy destroy = { + .handle = trtt->timeline_handle, + }; + if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_DESTROY, &destroy)) + fprintf(stderr, "TR-TT syncobj destroy failed!\n"); + } + pthread_mutex_destroy(&trtt->mutex); vk_free(&device->vk.alloc, trtt->l3_mirror); diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 1db938569ab..8d07c6da5b5 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1598,6 +1598,14 @@ struct anv_device_astc_emu { struct anv_trtt_batch_bo { struct anv_bo *bo; uint32_t size; + + /* Once device->trtt.timeline_handle signals timeline_val as complete we + * can free this struct and its members. + */ + uint64_t timeline_val; + + /* Part of device->trtt.in_flight_batches. */ + struct list_head link; }; struct anv_device { @@ -1807,6 +1815,15 @@ struct anv_device { */ struct anv_bo *cur_page_table_bo; uint64_t next_page_table_bo_offset; + + /* Timeline syncobj used to track completion of the TR-TT batch BOs. */ + uint32_t timeline_handle; + uint64_t timeline_val; + + /* List of struct anv_trtt_batch_bo batches that are in flight and can + * be freed once their timeline gets signaled. + */ + struct list_head in_flight_batches; } trtt; /* This is true if the user ever bound a sparse resource to memory. This @@ -1949,6 +1966,15 @@ VkResult anv_queue_submit_simple_batch(struct anv_queue *queue, VkResult anv_queue_submit_trtt_batch(struct anv_sparse_submission *submit, struct anv_batch *batch); +static inline void +anv_trtt_batch_bo_free(struct anv_device *device, + struct anv_trtt_batch_bo *trtt_bbo) +{ + anv_bo_pool_free(&device->batch_bo_pool, trtt_bbo->bo); + list_del(&trtt_bbo->link); + vk_free(&device->vk.alloc, trtt_bbo); +} + void anv_queue_trace(struct anv_queue *queue, const char *label, bool frame, bool begin); @@ -2840,6 +2866,8 @@ VkResult anv_sparse_image_check_support(struct anv_physical_device *pdevice, VkSampleCountFlagBits samples, VkImageType type, VkFormat format); +VkResult anv_trtt_batch_bo_new(struct anv_device *device, uint32_t batch_size, + struct anv_trtt_batch_bo **out_trtt_bbo); struct anv_buffer { struct vk_buffer vk; diff --git a/src/intel/vulkan/anv_sparse.c b/src/intel/vulkan/anv_sparse.c index a3fe2c1ecae..381ab9c9fc3 100644 --- a/src/intel/vulkan/anv_sparse.c +++ b/src/intel/vulkan/anv_sparse.c @@ -391,6 +391,15 @@ anv_trtt_init_context_state(struct anv_queue *queue) struct anv_device *device = queue->device; struct anv_trtt *trtt = &device->trtt; + struct drm_syncobj_create create = { + .handle = 0, + .flags = 0, + }; + if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_CREATE, &create)) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + assert(create.handle != 0); + trtt->timeline_handle = create.handle; + struct anv_bo *l3_bo; VkResult result = trtt_get_page_table_bo(device, &l3_bo, &trtt->l3_addr); if (result != VK_SUCCESS) @@ -1203,3 +1212,65 @@ anv_sparse_image_check_support(struct anv_physical_device *pdevice, return VK_SUCCESS; } + +static VkResult +anv_trtt_garbage_collect_batches(struct anv_device *device) +{ + struct anv_trtt *trtt = &device->trtt; + + if (trtt->timeline_val % 8 != 7) + return VK_SUCCESS; + + uint64_t cur_timeline_val = 0; + struct drm_syncobj_timeline_array array = { + .handles = (uintptr_t)&trtt->timeline_handle, + .points = (uintptr_t)&cur_timeline_val, + .count_handles = 1, + .flags = 0, + }; + if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_QUERY, &array)) + return vk_error(device, VK_ERROR_UNKNOWN); + + list_for_each_entry_safe(struct anv_trtt_batch_bo, trtt_bbo, + &trtt->in_flight_batches, link) { + if (trtt_bbo->timeline_val > cur_timeline_val) + return VK_SUCCESS; + + anv_trtt_batch_bo_free(device, trtt_bbo); + } + + return VK_SUCCESS; +} + +VkResult +anv_trtt_batch_bo_new(struct anv_device *device, uint32_t batch_size, + struct anv_trtt_batch_bo **out_trtt_bbo) +{ + struct anv_trtt *trtt = &device->trtt; + VkResult result; + + anv_trtt_garbage_collect_batches(device); + + struct anv_trtt_batch_bo *trtt_bbo = + vk_alloc(&device->vk.alloc, sizeof(*trtt_bbo), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!trtt_bbo) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + result = anv_bo_pool_alloc(&device->batch_bo_pool, batch_size, + &trtt_bbo->bo); + if (result != VK_SUCCESS) + goto out; + + trtt_bbo->size = batch_size; + trtt_bbo->timeline_val = ++trtt->timeline_val; + + list_addtail(&trtt_bbo->link, &trtt->in_flight_batches); + + *out_trtt_bbo = trtt_bbo; + + return VK_SUCCESS; +out: + vk_free(&device->vk.alloc, trtt_bbo); + return result; +} diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c index 622ae0fddd1..640c1d7cbf4 100644 --- a/src/intel/vulkan/i915/anv_batch_chain.c +++ b/src/intel/vulkan/i915/anv_batch_chain.c @@ -1008,6 +1008,13 @@ i915_execute_trtt_batch(struct anv_sparse_submission *submit, goto out; } + result = anv_execbuf_add_syncobj(device, &execbuf, trtt->timeline_handle, + I915_EXEC_FENCE_SIGNAL, + trtt_bbo->timeline_val); + if (result != VK_SUCCESS) + goto out; + + result = anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0); if (result != VK_SUCCESS) @@ -1067,15 +1074,6 @@ i915_execute_trtt_batch(struct anv_sparse_submission *submit, } } - /* TODO: we can get rid of this wait once we can properly handle the buffer - * lifetimes. - */ - result = anv_device_wait(device, trtt_bbo->bo, INT64_MAX); - if (result != VK_SUCCESS) { - result = vk_device_set_lost(&device->vk, - "trtt anv_device_wait failed: %m"); - } - out: anv_execbuf_finish(&execbuf); return result; diff --git a/src/intel/vulkan/xe/anv_batch_chain.c b/src/intel/vulkan/xe/anv_batch_chain.c index d251b743016..970b25f439d 100644 --- a/src/intel/vulkan/xe/anv_batch_chain.c +++ b/src/intel/vulkan/xe/anv_batch_chain.c @@ -189,15 +189,13 @@ xe_execute_trtt_batch(struct anv_sparse_submission *submit, { struct anv_queue *queue = submit->queue; struct anv_device *device = queue->device; - VkResult result = VK_SUCCESS; - - uint32_t syncobj_handle; - if (drmSyncobjCreate(device->fd, 0, &syncobj_handle)) - return vk_errorf(device, VK_ERROR_UNKNOWN, "Unable to create sync obj"); + struct anv_trtt *trtt = &device->trtt; + VkResult result; struct drm_xe_sync extra_sync = { - .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, - .handle = syncobj_handle, + .flags = DRM_XE_SYNC_TIMELINE_SYNCOBJ | DRM_XE_SYNC_SIGNAL, + .handle = trtt->timeline_handle, + .timeline_value = trtt_bbo->timeline_val, }; struct drm_xe_sync *xe_syncs = NULL; @@ -209,7 +207,7 @@ xe_execute_trtt_batch(struct anv_sparse_submission *submit, false, /* is_companion_rcs_queue */ &xe_syncs, &xe_syncs_count); if (result != VK_SUCCESS) - goto exec_error; + return result; struct drm_xe_exec exec = { .exec_queue_id = queue->exec_queue_id, @@ -220,34 +218,18 @@ xe_execute_trtt_batch(struct anv_sparse_submission *submit, }; if (!device->info->no_hw) { - if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) { - result = vk_device_set_lost(&device->vk, "XE_EXEC failed: %m"); - goto exec_error; - } + if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) + return vk_device_set_lost(&device->vk, "XE_EXEC failed: %m"); } if (queue->sync) { result = vk_sync_wait(&device->vk, queue->sync, 0, VK_SYNC_WAIT_COMPLETE, UINT64_MAX); - if (result != VK_SUCCESS) { - result = vk_queue_set_lost(&queue->vk, "trtt sync wait failed"); - goto exec_error; - } + if (result != VK_SUCCESS) + return vk_queue_set_lost(&queue->vk, "trtt sync wait failed"); } - /* FIXME: we shouldn't need this wait, figure out a way to remove it. */ - struct drm_syncobj_wait wait = { - .handles = (uintptr_t)&syncobj_handle, - .timeout_nsec = INT64_MAX, - .count_handles = 1, - }; - if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait)) - result = vk_device_set_lost(&device->vk, "DRM_IOCTL_SYNCOBJ_WAIT failed: %m"); - -exec_error: - drmSyncobjDestroy(device->fd, syncobj_handle); - - return result; + return VK_SUCCESS; } VkResult