diff --git a/src/panfrost/vulkan/csf/panvk_instr.h b/src/panfrost/vulkan/csf/panvk_instr.h index 09f3d9baca7..b2f6faad694 100644 --- a/src/panfrost/vulkan/csf/panvk_instr.h +++ b/src/panfrost/vulkan/csf/panvk_instr.h @@ -69,9 +69,11 @@ void panvk_per_arch(panvk_instr_end_work)( const struct panvk_instr_end_args *const args); /** - * Mark the end of async work with an immediate scoreboard mask. + * Mark the end of async work with an async_op. Note that the signal_slot will + * be overwritten and should therefore be left as 0. */ void panvk_per_arch(panvk_instr_end_work_async)( enum panvk_subqueue_id id, struct panvk_cmd_buffer *cmdbuf, enum panvk_instr_work_type work_type, - const struct panvk_instr_end_args *const args, unsigned int wait_mask); + const struct panvk_instr_end_args *const args, + struct cs_async_op ts_async_op); diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c index 82191d553ec..1abc3bd9165 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c @@ -383,7 +383,7 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer, struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); panvk_per_arch(panvk_instr_end_work_async)( PANVK_SUBQUEUE_COMPUTE, cmdbuf, PANVK_INSTR_WORK_TYPE_DISPATCH, - &instr_info, dev->csf.sb.all_iters_mask); + &instr_info, cs_defer(dev->csf.sb.all_iters_mask, 0)); } VKAPI_ATTR void VKAPI_CALL @@ -408,5 +408,5 @@ panvk_per_arch(CmdDispatchIndirect)(VkCommandBuffer commandBuffer, }}; panvk_per_arch(panvk_instr_end_work_async)( PANVK_SUBQUEUE_COMPUTE, cmdbuf, PANVK_INSTR_WORK_TYPE_DISPATCH_INDIRECT, - &instr_info, dev->csf.sb.all_iters_mask); + &instr_info, cs_defer(dev->csf.sb.all_iters_mask, 0)); } diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 47562790b4e..555be62f273 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -3568,8 +3568,8 @@ panvk_per_arch(CmdEndRendering)(VkCommandBuffer commandBuffer) struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); panvk_per_arch(panvk_instr_end_work_async)( PANVK_SUBQUEUE_VERTEX_TILER, cmdbuf, PANVK_INSTR_WORK_TYPE_RENDER, - &instr_info, dev->csf.sb.all_iters_mask); + &instr_info, cs_defer(dev->csf.sb.all_iters_mask, 0)); panvk_per_arch(panvk_instr_end_work_async)( PANVK_SUBQUEUE_FRAGMENT, cmdbuf, PANVK_INSTR_WORK_TYPE_RENDER, - &instr_info, dev->csf.sb.all_iters_mask); + &instr_info, cs_defer(dev->csf.sb.all_iters_mask, 0)); } diff --git a/src/panfrost/vulkan/csf/panvk_vX_instr.c b/src/panfrost/vulkan/csf/panvk_vX_instr.c index dfc53057525..4a6e7e53a6a 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_instr.c +++ b/src/panfrost/vulkan/csf/panvk_vX_instr.c @@ -68,11 +68,12 @@ panvk_per_arch(panvk_instr_begin_work)(enum panvk_subqueue_id id, struct panvk_cmd_buffer *cmdbuf, enum panvk_instr_work_type work_type) { + struct cs_async_op op = cs_now(); struct panvk_utrace_cs_info cs_info = { .cmdbuf = cmdbuf, /* For the begin marker, the caller should wait for dependencies before calling begin. */ - .ts_wait_mask = 0, + .ts_async_op = &op, }; switch (work_type) { @@ -108,18 +109,20 @@ panvk_per_arch(panvk_instr_end_work)( enum panvk_instr_work_type work_type, const struct panvk_instr_end_args *const args) { - panvk_per_arch(panvk_instr_end_work_async)(id, cmdbuf, work_type, args, 0); + panvk_per_arch(panvk_instr_end_work_async)(id, cmdbuf, work_type, args, + cs_now()); } void panvk_per_arch(panvk_instr_end_work_async)( enum panvk_subqueue_id id, struct panvk_cmd_buffer *cmdbuf, enum panvk_instr_work_type work_type, - const struct panvk_instr_end_args *const args, unsigned int wait_mask) + const struct panvk_instr_end_args *const args, + struct cs_async_op ts_async_op) { struct panvk_utrace_cs_info cs_info = { .cmdbuf = cmdbuf, - .ts_wait_mask = wait_mask, + .ts_async_op = &ts_async_op, }; switch (work_type) { diff --git a/src/panfrost/vulkan/csf/panvk_vX_utrace.c b/src/panfrost/vulkan/csf/panvk_vX_utrace.c index 68b73125bf5..006233b7a2e 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_utrace.c +++ b/src/panfrost/vulkan/csf/panvk_vX_utrace.c @@ -14,14 +14,22 @@ static void cmd_write_timestamp(const struct panvk_device *dev, struct cs_builder *b, - uint64_t addr, uint32_t wait_mask) + uint64_t addr, struct cs_async_op ts_async_op) { const struct cs_index addr_reg = cs_scratch_reg64(b, 0); + + /* Overwrite the signal_slot. Note that this has no effect in case of + * synchronous or indirect syncs. */ + assert(!ts_async_op.wait_mask || +#if PAN_ARCH >= 11 + ts_async_op.indirect || +#endif + ts_async_op.signal_slot == 0); /* abuse DEFERRED_SYNC */ - const struct cs_async_op async = cs_defer(wait_mask, SB_ID(DEFERRED_SYNC)); + ts_async_op.signal_slot = SB_ID(DEFERRED_SYNC); cs_move64_to(b, addr_reg, addr); - cs_store_state(b, addr_reg, 0, MALI_CS_STATE_TIMESTAMP, async); + cs_store_state(b, addr_reg, 0, MALI_CS_STATE_TIMESTAMP, ts_async_op); } static void @@ -90,7 +98,7 @@ panvk_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, const struct panvk_priv_bo *bo = timestamps; const uint64_t addr = bo->addr.dev + offset_B; - cmd_write_timestamp(dev, b, addr, cs_info->ts_wait_mask); + cmd_write_timestamp(dev, b, addr, *cs_info->ts_async_op); } static void diff --git a/src/panfrost/vulkan/panvk_utrace.h b/src/panfrost/vulkan/panvk_utrace.h index a5928876f70..818fdd0782a 100644 --- a/src/panfrost/vulkan/panvk_utrace.h +++ b/src/panfrost/vulkan/panvk_utrace.h @@ -45,7 +45,7 @@ struct panvk_cmd_buffer; struct panvk_utrace_cs_info { struct panvk_cmd_buffer *cmdbuf; - uint32_t ts_wait_mask; + struct cs_async_op *ts_async_op; }; void panvk_per_arch(utrace_context_init)(struct panvk_device *dev); diff --git a/src/panfrost/vulkan/panvk_vX_cmd_meta.c b/src/panfrost/vulkan/panvk_vX_cmd_meta.c index c8b8802b836..22e668ef3b3 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_meta.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_meta.c @@ -66,9 +66,9 @@ panvk_per_arch(cmd_meta_compute_end)( #if PAN_ARCH >= 10 struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); - panvk_per_arch(panvk_instr_end_work_async)(PANVK_SUBQUEUE_COMPUTE, cmdbuf, - PANVK_INSTR_WORK_TYPE_META, NULL, - dev->csf.sb.all_iters_mask); + panvk_per_arch(panvk_instr_end_work_async)( + PANVK_SUBQUEUE_COMPUTE, cmdbuf, PANVK_INSTR_WORK_TYPE_META, NULL, + cs_defer(dev->csf.sb.all_iters_mask, 0)); #endif cmdbuf->state.compute.desc_state.sets[0] = save_ctx->set0; @@ -148,10 +148,10 @@ panvk_per_arch(cmd_meta_gfx_end)( struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device); panvk_per_arch(panvk_instr_end_work_async)( PANVK_SUBQUEUE_VERTEX_TILER, cmdbuf, PANVK_INSTR_WORK_TYPE_META, NULL, - dev->csf.sb.all_iters_mask); - panvk_per_arch(panvk_instr_end_work_async)(PANVK_SUBQUEUE_FRAGMENT, cmdbuf, - PANVK_INSTR_WORK_TYPE_META, NULL, - dev->csf.sb.all_iters_mask); + cs_defer(dev->csf.sb.all_iters_mask, 0)); + panvk_per_arch(panvk_instr_end_work_async)( + PANVK_SUBQUEUE_FRAGMENT, cmdbuf, PANVK_INSTR_WORK_TYPE_META, NULL, + cs_defer(dev->csf.sb.all_iters_mask, 0)); #endif cmdbuf->state.gfx.desc_state.sets[0] = save_ctx->set0;