diff --git a/src/intel/vulkan/xe/anv_batch_chain.c b/src/intel/vulkan/xe/anv_batch_chain.c index b964ec8f06a..bad00dce2c0 100644 --- a/src/intel/vulkan/xe/anv_batch_chain.c +++ b/src/intel/vulkan/xe/anv_batch_chain.c @@ -70,3 +70,208 @@ exec_error: return result; } + +#define TYPE_SIGNAL true +#define TYPE_WAIT false + +static void +xe_exec_fill_sync(struct drm_xe_sync *xe_sync, struct vk_sync *vk_sync, + uint64_t value, bool signal) +{ + if (unlikely(!vk_sync_type_is_drm_syncobj(vk_sync->type))) { + unreachable("Unsupported sync type"); + return; + } + + const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(vk_sync); + xe_sync->handle = syncobj->syncobj; + + if (value) { + xe_sync->flags |= DRM_XE_SYNC_TIMELINE_SYNCOBJ; + xe_sync->timeline_value = value; + } else { + xe_sync->flags |= DRM_XE_SYNC_SYNCOBJ; + } + + if (signal) + xe_sync->flags |= DRM_XE_SYNC_SIGNAL; +} + +static VkResult +xe_exec_process_syncs(struct anv_queue *queue, + uint32_t wait_count, const struct vk_sync_wait *waits, + uint32_t signal_count, const struct vk_sync_signal *signals, + struct anv_utrace_submit *utrace_submit, + struct drm_xe_sync **ret, uint32_t *ret_count) +{ + struct anv_device *device = queue->device; + uint32_t num_syncs = wait_count + signal_count + (utrace_submit ? 1 : 0) + + (queue->sync ? 1 : 0); + + if (!num_syncs) + return VK_SUCCESS; + + struct drm_xe_sync *xe_syncs = vk_zalloc(&device->vk.alloc, + sizeof(*xe_syncs) * num_syncs, 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + if (!xe_syncs) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + uint32_t count = 0; + + if (utrace_submit) { + struct drm_xe_sync *xe_sync = &xe_syncs[count++]; + + xe_exec_fill_sync(xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL); + } + + for (uint32_t i = 0; i < wait_count; i++) { + struct drm_xe_sync *xe_sync = &xe_syncs[count++]; + const struct vk_sync_wait *vk_wait = &waits[i]; + + xe_exec_fill_sync(xe_sync, vk_wait->sync, vk_wait->wait_value, + TYPE_WAIT); + } + + for (uint32_t i = 0; i < signal_count; i++) { + struct drm_xe_sync *xe_sync = &xe_syncs[count++]; + const struct vk_sync_signal *vk_signal = &signals[i]; + + xe_exec_fill_sync(xe_sync, vk_signal->sync, vk_signal->signal_value, + TYPE_SIGNAL); + } + + if (queue->sync) { + struct drm_xe_sync *xe_sync = &xe_syncs[count++]; + + xe_exec_fill_sync(xe_sync, queue->sync, 0, + TYPE_SIGNAL); + } + + assert(count == num_syncs); + *ret = xe_syncs; + *ret_count = num_syncs; + return VK_SUCCESS; +} + +static void +xe_exec_print_debug(struct anv_queue *queue, uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass, struct drm_xe_exec *exec) +{ + if (INTEL_DEBUG(DEBUG_SUBMIT)) + fprintf(stderr, "Batch offset=0x%016"PRIx64" on queue %u\n", + (uint64_t)exec->address, queue->vk.index_in_family); + + anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers, + perf_query_pool, perf_query_pass); +} + +VkResult +xe_queue_exec_utrace_locked(struct anv_queue *queue, + struct anv_utrace_submit *utrace_submit) +{ + struct anv_device *device = queue->device; + struct drm_xe_sync xe_sync = {}; + + xe_exec_fill_sync(&xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL); + +#ifdef SUPPORT_INTEL_INTEGRATED_GPUS + if (device->physical->memory.need_clflush) + intel_flush_range(utrace_submit->batch_bo->map, + utrace_submit->batch_bo->size); +#endif + + struct drm_xe_exec exec = { + .engine_id = queue->engine_id, + .num_batch_buffer = 1, + .syncs = (uintptr_t)&xe_sync, + .num_syncs = 1, + .address = utrace_submit->batch_bo->offset, + }; + if (likely(!device->info->no_hw)) { + if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) + return vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m"); + } + + return VK_SUCCESS; +} + +VkResult +xe_queue_exec_locked(struct anv_queue *queue, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + uint32_t signal_count, + const struct vk_sync_signal *signals, + struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass) +{ + struct anv_device *device = queue->device; + struct anv_utrace_submit *utrace_submit = NULL; + VkResult result; + + result = anv_device_utrace_flush_cmd_buffers(queue, cmd_buffer_count, + cmd_buffers, &utrace_submit); + if (result != VK_SUCCESS) + return result; + + if (utrace_submit && !utrace_submit->batch_bo) + utrace_submit = NULL; + + struct drm_xe_sync *xe_syncs = NULL; + uint32_t xe_syncs_count = 0; + result = xe_exec_process_syncs(queue, wait_count, waits, + signal_count, signals, + utrace_submit, + &xe_syncs, &xe_syncs_count); + if (result != VK_SUCCESS) + return result; + + struct drm_xe_exec exec = { + .engine_id = queue->engine_id, + .num_batch_buffer = 1, + .syncs = (uintptr_t)xe_syncs, + .num_syncs = xe_syncs_count, + }; + + if (cmd_buffer_count) { + anv_cmd_buffer_chain_command_buffers(cmd_buffers, cmd_buffer_count); + +#ifdef SUPPORT_INTEL_INTEGRATED_GPUS + if (device->physical->memory.need_clflush) + anv_cmd_buffer_clflush(cmd_buffers, cmd_buffer_count); +#endif + + struct anv_cmd_buffer *first_cmd_buffer = cmd_buffers[0]; + struct anv_batch_bo *first_batch_bo = list_first_entry(&first_cmd_buffer->batch_bos, + struct anv_batch_bo, link); + exec.address = first_batch_bo->bo->offset; + } else { + exec.address = device->trivial_batch_bo->offset; + } + + xe_exec_print_debug(queue, cmd_buffer_count, cmd_buffers, perf_query_pool, + perf_query_pass, &exec); + + /* TODO: add perfetto stuff when Xe supports it */ + + if (!device->info->no_hw) { + if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) + result = vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m"); + } + vk_free(&device->vk.alloc, xe_syncs); + + if (result == VK_SUCCESS && queue->sync) { + result = vk_sync_wait(&device->vk, queue->sync, 0, + VK_SYNC_WAIT_COMPLETE, UINT64_MAX); + if (result != VK_SUCCESS) + result = vk_queue_set_lost(&queue->vk, "sync wait failed"); + } + + if (result == VK_SUCCESS && utrace_submit) + result = xe_queue_exec_utrace_locked(queue, utrace_submit); + + return result; +} diff --git a/src/intel/vulkan/xe/anv_batch_chain.h b/src/intel/vulkan/xe/anv_batch_chain.h index 8984694ca21..9ee877e0494 100644 --- a/src/intel/vulkan/xe/anv_batch_chain.h +++ b/src/intel/vulkan/xe/anv_batch_chain.h @@ -26,10 +26,28 @@ #include #include "vulkan/vulkan_core.h" +#include "vk_sync.h" struct anv_queue; struct anv_bo; +struct anv_cmd_buffer; +struct anv_query_pool; +struct anv_utrace_submit; VkResult xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo, uint32_t batch_bo_size); +VkResult +xe_queue_exec_locked(struct anv_queue *queue, + uint32_t wait_count, + const struct vk_sync_wait *waits, + uint32_t cmd_buffer_count, + struct anv_cmd_buffer **cmd_buffers, + uint32_t signal_count, + const struct vk_sync_signal *signals, + struct anv_query_pool *perf_query_pool, + uint32_t perf_query_pass); + +VkResult +xe_queue_exec_utrace_locked(struct anv_queue *queue, + struct anv_utrace_submit *utrace_submit); diff --git a/src/intel/vulkan/xe/anv_kmd_backend.c b/src/intel/vulkan/xe/anv_kmd_backend.c index 80aa9310c14..2053c81b6ed 100644 --- a/src/intel/vulkan/xe/anv_kmd_backend.c +++ b/src/intel/vulkan/xe/anv_kmd_backend.c @@ -140,6 +140,8 @@ anv_xe_kmd_backend_get(void) .gem_vm_bind = xe_gem_vm_bind, .gem_vm_unbind = xe_gem_vm_unbind, .execute_simple_batch = xe_execute_simple_batch, + .queue_exec_locked = xe_queue_exec_locked, + .queue_exec_trace = xe_queue_exec_utrace_locked, }; return &xe_backend; }