diff --git a/src/intel/vulkan/xe/anv_batch_chain.c b/src/intel/vulkan/xe/anv_batch_chain.c
index b964ec8f06a..bad00dce2c0 100644
--- a/src/intel/vulkan/xe/anv_batch_chain.c
+++ b/src/intel/vulkan/xe/anv_batch_chain.c
@@ -70,3 +70,208 @@ exec_error:
 
    return result;
 }
+
+#define TYPE_SIGNAL true
+#define TYPE_WAIT false
+
+static void
+xe_exec_fill_sync(struct drm_xe_sync *xe_sync, struct vk_sync *vk_sync,
+                  uint64_t value, bool signal)
+{
+   if (unlikely(!vk_sync_type_is_drm_syncobj(vk_sync->type))) {
+      unreachable("Unsupported sync type");
+      return;
+   }
+
+   const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(vk_sync);
+   xe_sync->handle = syncobj->syncobj;
+
+   if (value) {
+      xe_sync->flags |= DRM_XE_SYNC_TIMELINE_SYNCOBJ;
+      xe_sync->timeline_value = value;
+   } else {
+      xe_sync->flags |= DRM_XE_SYNC_SYNCOBJ;
+   }
+
+   if (signal)
+      xe_sync->flags |= DRM_XE_SYNC_SIGNAL;
+}
+
+static VkResult
+xe_exec_process_syncs(struct anv_queue *queue,
+                      uint32_t wait_count, const struct vk_sync_wait *waits,
+                      uint32_t signal_count, const struct vk_sync_signal *signals,
+                      struct anv_utrace_submit *utrace_submit,
+                      struct drm_xe_sync **ret, uint32_t *ret_count)
+{
+   struct anv_device *device = queue->device;
+   uint32_t num_syncs = wait_count + signal_count + (utrace_submit ? 1 : 0) +
+                        (queue->sync ? 1 : 0);
+
+   if (!num_syncs)
+      return VK_SUCCESS;
+
+   struct drm_xe_sync *xe_syncs = vk_zalloc(&device->vk.alloc,
+                                            sizeof(*xe_syncs) * num_syncs, 8,
+                                            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!xe_syncs)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   uint32_t count = 0;
+
+   if (utrace_submit) {
+      struct drm_xe_sync *xe_sync = &xe_syncs[count++];
+
+      xe_exec_fill_sync(xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
+   }
+
+   for (uint32_t i = 0; i < wait_count; i++) {
+      struct drm_xe_sync *xe_sync = &xe_syncs[count++];
+      const struct vk_sync_wait *vk_wait = &waits[i];
+
+      xe_exec_fill_sync(xe_sync, vk_wait->sync, vk_wait->wait_value,
+                        TYPE_WAIT);
+   }
+
+   for (uint32_t i = 0; i < signal_count; i++) {
+      struct drm_xe_sync *xe_sync = &xe_syncs[count++];
+      const struct vk_sync_signal *vk_signal = &signals[i];
+
+      xe_exec_fill_sync(xe_sync, vk_signal->sync, vk_signal->signal_value,
+                        TYPE_SIGNAL);
+   }
+
+   if (queue->sync) {
+      struct drm_xe_sync *xe_sync = &xe_syncs[count++];
+
+      xe_exec_fill_sync(xe_sync, queue->sync, 0,
+                        TYPE_SIGNAL);
+   }
+
+   assert(count == num_syncs);
+   *ret = xe_syncs;
+   *ret_count = num_syncs;
+   return VK_SUCCESS;
+}
+
+static void
+xe_exec_print_debug(struct anv_queue *queue, uint32_t cmd_buffer_count,
+                    struct anv_cmd_buffer **cmd_buffers, struct anv_query_pool *perf_query_pool,
+                    uint32_t perf_query_pass, struct drm_xe_exec *exec)
+{
+   if (INTEL_DEBUG(DEBUG_SUBMIT))
+      fprintf(stderr, "Batch offset=0x%016"PRIx64" on queue %u\n",
+              (uint64_t)exec->address, queue->vk.index_in_family);
+
+   anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
+                                   perf_query_pool, perf_query_pass);
+}
+
+VkResult
+xe_queue_exec_utrace_locked(struct anv_queue *queue,
+                            struct anv_utrace_submit *utrace_submit)
+{
+   struct anv_device *device = queue->device;
+   struct drm_xe_sync xe_sync = {};
+
+   xe_exec_fill_sync(&xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
+
+#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
+   if (device->physical->memory.need_clflush)
+      intel_flush_range(utrace_submit->batch_bo->map,
+                        utrace_submit->batch_bo->size);
+#endif
+
+   struct drm_xe_exec exec = {
+      .engine_id = queue->engine_id,
+      .num_batch_buffer = 1,
+      .syncs = (uintptr_t)&xe_sync,
+      .num_syncs = 1,
+      .address = utrace_submit->batch_bo->offset,
+   };
+   if (likely(!device->info->no_hw)) {
+      if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
+         return vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m");
+   }
+
+   return VK_SUCCESS;
+}
+
+VkResult
+xe_queue_exec_locked(struct anv_queue *queue,
+                     uint32_t wait_count,
+                     const struct vk_sync_wait *waits,
+                     uint32_t cmd_buffer_count,
+                     struct anv_cmd_buffer **cmd_buffers,
+                     uint32_t signal_count,
+                     const struct vk_sync_signal *signals,
+                     struct anv_query_pool *perf_query_pool,
+                     uint32_t perf_query_pass)
+{
+   struct anv_device *device = queue->device;
+   struct anv_utrace_submit *utrace_submit = NULL;
+   VkResult result;
+
+   result = anv_device_utrace_flush_cmd_buffers(queue, cmd_buffer_count,
+                                                cmd_buffers, &utrace_submit);
+   if (result != VK_SUCCESS)
+      return result;
+
+   if (utrace_submit && !utrace_submit->batch_bo)
+      utrace_submit = NULL;
+
+   struct drm_xe_sync *xe_syncs = NULL;
+   uint32_t xe_syncs_count = 0;
+   result = xe_exec_process_syncs(queue, wait_count, waits,
+                                  signal_count, signals,
+                                  utrace_submit,
+                                  &xe_syncs, &xe_syncs_count);
+   if (result != VK_SUCCESS)
+      return result;
+
+   struct drm_xe_exec exec = {
+      .engine_id = queue->engine_id,
+      .num_batch_buffer = 1,
+      .syncs = (uintptr_t)xe_syncs,
+      .num_syncs = xe_syncs_count,
+   };
+
+   if (cmd_buffer_count) {
+      anv_cmd_buffer_chain_command_buffers(cmd_buffers, cmd_buffer_count);
+
+#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
+      if (device->physical->memory.need_clflush)
+         anv_cmd_buffer_clflush(cmd_buffers, cmd_buffer_count);
+#endif
+
+      struct anv_cmd_buffer *first_cmd_buffer = cmd_buffers[0];
+      struct anv_batch_bo *first_batch_bo = list_first_entry(&first_cmd_buffer->batch_bos,
+                                                             struct anv_batch_bo, link);
+      exec.address = first_batch_bo->bo->offset;
+   } else {
+      exec.address = device->trivial_batch_bo->offset;
+   }
+
+   xe_exec_print_debug(queue, cmd_buffer_count, cmd_buffers, perf_query_pool,
+                       perf_query_pass, &exec);
+
+   /* TODO: add perfetto stuff when Xe supports it */
+
+   if (!device->info->no_hw) {
+      if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
+         result = vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m");
+   }
+   vk_free(&device->vk.alloc, xe_syncs);
+
+   if (result == VK_SUCCESS && queue->sync) {
+      result = vk_sync_wait(&device->vk, queue->sync, 0,
+                            VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
+      if (result != VK_SUCCESS)
+         result = vk_queue_set_lost(&queue->vk, "sync wait failed");
+   }
+
+   if (result == VK_SUCCESS && utrace_submit)
+      result = xe_queue_exec_utrace_locked(queue, utrace_submit);
+
+   return result;
+}
diff --git a/src/intel/vulkan/xe/anv_batch_chain.h b/src/intel/vulkan/xe/anv_batch_chain.h
index 8984694ca21..9ee877e0494 100644
--- a/src/intel/vulkan/xe/anv_batch_chain.h
+++ b/src/intel/vulkan/xe/anv_batch_chain.h
@@ -26,10 +26,28 @@
 #include <stdint.h>
 
 #include "vulkan/vulkan_core.h"
+#include "vk_sync.h"
 
 struct anv_queue;
 struct anv_bo;
+struct anv_cmd_buffer;
+struct anv_query_pool;
+struct anv_utrace_submit;
 
 VkResult
 xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
                         uint32_t batch_bo_size);
+VkResult
+xe_queue_exec_locked(struct anv_queue *queue,
+                     uint32_t wait_count,
+                     const struct vk_sync_wait *waits,
+                     uint32_t cmd_buffer_count,
+                     struct anv_cmd_buffer **cmd_buffers,
+                     uint32_t signal_count,
+                     const struct vk_sync_signal *signals,
+                     struct anv_query_pool *perf_query_pool,
+                     uint32_t perf_query_pass);
+
+VkResult
+xe_queue_exec_utrace_locked(struct anv_queue *queue,
+                            struct anv_utrace_submit *utrace_submit);
diff --git a/src/intel/vulkan/xe/anv_kmd_backend.c b/src/intel/vulkan/xe/anv_kmd_backend.c
index 80aa9310c14..2053c81b6ed 100644
--- a/src/intel/vulkan/xe/anv_kmd_backend.c
+++ b/src/intel/vulkan/xe/anv_kmd_backend.c
@@ -140,6 +140,8 @@ anv_xe_kmd_backend_get(void)
       .gem_vm_bind = xe_gem_vm_bind,
       .gem_vm_unbind = xe_gem_vm_unbind,
       .execute_simple_batch = xe_execute_simple_batch,
+      .queue_exec_locked = xe_queue_exec_locked,
+      .queue_exec_trace = xe_queue_exec_utrace_locked,
    };
    return &xe_backend;
 }