diff --git a/src/panfrost/vulkan/csf/panvk_event.h b/src/panfrost/vulkan/csf/panvk_event.h new file mode 100644 index 00000000000..d99eee49893 --- /dev/null +++ b/src/panfrost/vulkan/csf/panvk_event.h @@ -0,0 +1,28 @@ +/* + * Copyright © 2024 Collabora Ltd. + * SPDX-License-Identifier: MIT + */ + +#ifndef PANVK_EVENT_H +#define PANVK_EVENT_H + +#ifndef PAN_ARCH +#error "PAN_ARCH must be defined" +#endif + +#include "vk_object.h" + +#include "panvk_mempool.h" + +struct panvk_event { + struct vk_object_base base; + + /* v10 is lacking IAND/IOR instructions, which forces us to have one syncobj + * per-subqueue instead of one syncobj on which subqueues would only + * set/clear their bit. */ + struct panvk_priv_mem syncobjs; +}; + +VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_event, base, VkEvent, VK_OBJECT_TYPE_EVENT) + +#endif diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_event.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_event.c new file mode 100644 index 00000000000..4461ccac8ca --- /dev/null +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_event.c @@ -0,0 +1,152 @@ +/* + * Copyright © 2024 Collabora Ltd. + * SPDX-License-Identifier: MIT + */ + +#include "panvk_cmd_buffer.h" +#include "panvk_entrypoints.h" +#include "panvk_event.h" + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event, + VkPipelineStageFlags2 stageMask) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(panvk_event, event, _event); + + /* Wrap stageMask with a VkDependencyInfo object so we can re-use + * get_cs_deps(). */ + const VkMemoryBarrier2 barrier = { + .srcStageMask = stageMask, + }; + const VkDependencyInfo info = { + .memoryBarrierCount = 1, + .pMemoryBarriers = &barrier, + }; + struct panvk_cs_deps deps; + + panvk_per_arch(get_cs_deps)(cmdbuf, &info, &deps); + + for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) { + struct cs_builder *b = panvk_get_cs_builder(cmdbuf, i); + uint32_t sb_mask = deps.src[i].wait_sb_mask; + struct cs_index sync_addr = cs_scratch_reg64(b, 0); + struct cs_index seqno = cs_scratch_reg32(b, 2); + struct cs_index cmp_scratch = cs_scratch_reg32(b, 3); + + cs_move64_to(b, sync_addr, + panvk_priv_mem_dev_addr(event->syncobjs) + + (i * sizeof(struct panvk_cs_sync32))); + cs_load32_to(b, seqno, sync_addr, + offsetof(struct panvk_cs_sync32, seqno)); + cs_wait_slot(b, SB_ID(LS), false); + + cs_match(b, seqno, cmp_scratch) { + cs_case(b, 0) { + /* Nothing to do, we just need it defined for the default case. */ + } + + cs_default(b) { + cs_move32_to(b, seqno, 0); + cs_sync32_set(b, false, MALI_CS_SYNC_SCOPE_SYSTEM, seqno, sync_addr, + cs_defer(sb_mask | SB_MASK(DEFERRED_FLUSH), + SB_ID(DEFERRED_SYNC))); + } + } + } +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event, + const VkDependencyInfo *pDependencyInfo) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + VK_FROM_HANDLE(panvk_event, event, _event); + struct panvk_cs_deps deps; + + panvk_per_arch(get_cs_deps)(cmdbuf, pDependencyInfo, &deps); + + if (deps.needs_draw_flush) + panvk_per_arch(cmd_flush_draws)(cmdbuf); + + for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) { + struct cs_builder *b = panvk_get_cs_builder(cmdbuf, i); + uint16_t sb_mask = deps.src[i].wait_sb_mask; + struct cs_index sync_addr = cs_scratch_reg64(b, 0); + struct cs_index seqno = cs_scratch_reg32(b, 2); + struct cs_index cmp_scratch = cs_scratch_reg32(b, 3); + + cs_move64_to(b, sync_addr, + panvk_priv_mem_dev_addr(event->syncobjs) + + (i * sizeof(struct panvk_cs_sync32))); + cs_load32_to(b, sync_addr, seqno, + offsetof(struct panvk_cs_sync32, seqno)); + cs_wait_slot(b, SB_ID(LS), false); + + cs_match(b, seqno, cmp_scratch) { + cs_case(b, 0) { + struct panvk_cache_flush_info cache_flush = deps.src[i].cache_flush; + + if (cache_flush.l2 != MALI_CS_FLUSH_MODE_NONE || + cache_flush.lsc != MALI_CS_FLUSH_MODE_NONE || + cache_flush.others) { + /* We rely on r88 being zero since we're in the if (r88 == 0) + * branch. */ + cs_flush_caches(b, cache_flush.l2, cache_flush.lsc, + cache_flush.others, seqno, + cs_defer(sb_mask, SB_ID(DEFERRED_FLUSH))); + } + + cs_move32_to(b, seqno, 1); + cs_sync32_set(b, false, MALI_CS_SYNC_SCOPE_SYSTEM, seqno, sync_addr, + cs_defer(sb_mask | SB_MASK(DEFERRED_FLUSH), + SB_ID(DEFERRED_SYNC))); + } + } + } +} + +static void +cmd_wait_event(struct panvk_cmd_buffer *cmdbuf, struct panvk_event *event, + const VkDependencyInfo *info) +{ + struct panvk_cs_deps deps; + + panvk_per_arch(get_cs_deps)(cmdbuf, info, &deps); + + for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) { + if (!deps.dst[i].wait_subqueue_mask) + continue; + + struct cs_builder *b = panvk_get_cs_builder(cmdbuf, i); + + for (uint32_t j = 0; j < PANVK_SUBQUEUE_COUNT; j++) { + if (!(deps.dst[i].wait_subqueue_mask & BITFIELD_BIT(j))) + continue; + + struct cs_index sync_addr = cs_scratch_reg64(b, 0); + struct cs_index seqno = cs_scratch_reg32(b, 2); + + cs_move64_to(b, sync_addr, + panvk_priv_mem_dev_addr(event->syncobjs) + + (j * sizeof(struct panvk_cs_sync32))); + + cs_move32_to(b, seqno, 0); + cs_sync64_wait(b, false, MALI_CS_CONDITION_GREATER, seqno, sync_addr); + } + } +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(CmdWaitEvents2)(VkCommandBuffer commandBuffer, + uint32_t eventCount, const VkEvent *pEvents, + const VkDependencyInfo *pDependencyInfos) +{ + VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); + + for (uint32_t i = 0; i < eventCount; i++) { + VK_FROM_HANDLE(panvk_event, event, pEvents[i]); + + cmd_wait_event(cmdbuf, event, &pDependencyInfos[i]); + } +} diff --git a/src/panfrost/vulkan/csf/panvk_vX_event.c b/src/panfrost/vulkan/csf/panvk_vX_event.c new file mode 100644 index 00000000000..66d63e18b5c --- /dev/null +++ b/src/panfrost/vulkan/csf/panvk_vX_event.c @@ -0,0 +1,96 @@ +/* + * Copyright © 2024 Collabora Ltd. + * SPDX-License-Identifier: MIT + */ + +#include "panvk_cmd_buffer.h" +#include "panvk_device.h" +#include "panvk_entrypoints.h" +#include "panvk_event.h" +#include "panvk_mempool.h" + +#include "vk_log.h" + +VKAPI_ATTR VkResult VKAPI_CALL +panvk_per_arch(CreateEvent)(VkDevice _device, + const VkEventCreateInfo *pCreateInfo, + const VkAllocationCallbacks *pAllocator, + VkEvent *pEvent) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + struct panvk_event *event = vk_object_zalloc( + &device->vk, pAllocator, sizeof(*event), VK_OBJECT_TYPE_EVENT); + if (!event) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + + struct panvk_pool_alloc_info info = { + .size = sizeof(struct panvk_cs_sync32) * PANVK_SUBQUEUE_COUNT, + .alignment = 64, + }; + + event->syncobjs = panvk_pool_alloc_mem(&device->mempools.rw_nc, info); + if (!panvk_priv_mem_host_addr(event->syncobjs)) { + vk_object_free(&device->vk, pAllocator, event); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + memset(panvk_priv_mem_host_addr(event->syncobjs), 0, + sizeof(struct panvk_cs_sync32) * PANVK_SUBQUEUE_COUNT); + + *pEvent = panvk_event_to_handle(event); + return VK_SUCCESS; +} + +VKAPI_ATTR void VKAPI_CALL +panvk_per_arch(DestroyEvent)(VkDevice _device, VkEvent _event, + const VkAllocationCallbacks *pAllocator) +{ + VK_FROM_HANDLE(panvk_device, device, _device); + VK_FROM_HANDLE(panvk_event, event, _event); + + if (!event) + return; + + panvk_pool_free_mem(&device->mempools.rw_nc, event->syncobjs); + + vk_object_free(&device->vk, pAllocator, event); +} + +VKAPI_ATTR VkResult VKAPI_CALL +panvk_per_arch(GetEventStatus)(VkDevice _device, VkEvent _event) +{ + VK_FROM_HANDLE(panvk_event, event, _event); + + struct panvk_cs_sync32 *syncobjs = panvk_priv_mem_host_addr(event->syncobjs); + + for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) { + if (!syncobjs[i].seqno) + return VK_EVENT_RESET; + } + + return VK_EVENT_SET; +} + +VKAPI_ATTR VkResult VKAPI_CALL +panvk_per_arch(SetEvent)(VkDevice _device, VkEvent _event) +{ + VK_FROM_HANDLE(panvk_event, event, _event); + + struct panvk_cs_sync32 *syncobjs = panvk_priv_mem_host_addr(event->syncobjs); + + for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) + syncobjs[i].seqno = 1; + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +panvk_per_arch(ResetEvent)(VkDevice _device, VkEvent _event) +{ + VK_FROM_HANDLE(panvk_event, event, _event); + + struct panvk_cs_sync32 *syncobjs = panvk_priv_mem_host_addr(event->syncobjs); + + memset(syncobjs, 0, sizeof(*syncobjs) * PANVK_SUBQUEUE_COUNT); + return VK_SUCCESS; +} diff --git a/src/panfrost/vulkan/meson.build b/src/panfrost/vulkan/meson.build index d296e3d8a96..6afbd57948b 100644 --- a/src/panfrost/vulkan/meson.build +++ b/src/panfrost/vulkan/meson.build @@ -63,6 +63,8 @@ csf_files = [ 'csf/panvk_vX_cmd_buffer.c', 'csf/panvk_vX_cmd_dispatch.c', 'csf/panvk_vX_cmd_draw.c', + 'csf/panvk_vX_cmd_event.c', + 'csf/panvk_vX_event.c', 'csf/panvk_vX_queue.c', ]