diff --git a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c index 5a389a08788..f501f13ec74 100644 --- a/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c +++ b/src/panfrost/vulkan/bifrost/panvk_vX_meta_desc_copy.c @@ -326,8 +326,7 @@ panvk_meta_desc_copy_rsd(struct panvk_device *dev) return 0; } - pan_cast_and_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, - cfg) { + panvk_priv_mem_write_desc(shader->rsd, 0, RENDERER_STATE, cfg) { pan_shader_prepare_rsd(&shader->info, panvk_priv_mem_dev_addr(shader->code_mem), &cfg); } diff --git a/src/panfrost/vulkan/csf/panvk_vX_event.c b/src/panfrost/vulkan/csf/panvk_vX_event.c index ae9127214c9..eaba53d55e9 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_event.c +++ b/src/panfrost/vulkan/csf/panvk_vX_event.c @@ -34,8 +34,10 @@ panvk_per_arch(CreateEvent)(VkDevice _device, return panvk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); } - memset(panvk_priv_mem_host_addr(event->syncobjs), 0, - sizeof(struct panvk_cs_sync32) * PANVK_SUBQUEUE_COUNT); + panvk_priv_mem_write_array(event->syncobjs, 0, struct panvk_cs_sync32, + PANVK_SUBQUEUE_COUNT, sobjs) { + memset(sobjs, 0, sizeof(struct panvk_cs_sync32) * PANVK_SUBQUEUE_COUNT); + } *pEvent = panvk_event_to_handle(event); return VK_SUCCESS; @@ -61,11 +63,12 @@ panvk_per_arch(GetEventStatus)(VkDevice _device, VkEvent _event) { VK_FROM_HANDLE(panvk_event, event, _event); - struct panvk_cs_sync32 *syncobjs = panvk_priv_mem_host_addr(event->syncobjs); - - for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) { - if (!syncobjs[i].seqno) - return VK_EVENT_RESET; + panvk_priv_mem_readback_array(event->syncobjs, 0, struct panvk_cs_sync32, + PANVK_SUBQUEUE_COUNT, syncobjs) { + for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) { + if (!syncobjs[i].seqno) + return VK_EVENT_RESET; + } } return VK_EVENT_SET; @@ -76,10 +79,11 @@ panvk_per_arch(SetEvent)(VkDevice _device, VkEvent _event) { VK_FROM_HANDLE(panvk_event, event, _event); - struct panvk_cs_sync32 *syncobjs = panvk_priv_mem_host_addr(event->syncobjs); - - for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) - syncobjs[i].seqno = 1; + panvk_priv_mem_write_array(event->syncobjs, 0, struct panvk_cs_sync32, + PANVK_SUBQUEUE_COUNT, syncobjs) { + for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) + syncobjs[i].seqno = 1; + } return VK_SUCCESS; } @@ -89,8 +93,10 @@ panvk_per_arch(ResetEvent)(VkDevice _device, VkEvent _event) { VK_FROM_HANDLE(panvk_event, event, _event); - struct panvk_cs_sync32 *syncobjs = panvk_priv_mem_host_addr(event->syncobjs); + panvk_priv_mem_write_array(event->syncobjs, 0, struct panvk_cs_sync32, + PANVK_SUBQUEUE_COUNT, syncobjs) { + memset(syncobjs, 0, sizeof(*syncobjs) * PANVK_SUBQUEUE_COUNT); + } - memset(syncobjs, 0, sizeof(*syncobjs) * PANVK_SUBQUEUE_COUNT); return VK_SUCCESS; } diff --git a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c index 1368f1fab2a..52c55d021d7 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c +++ b/src/panfrost/vulkan/csf/panvk_vX_gpu_queue.c @@ -168,11 +168,11 @@ init_render_desc_ringbuf(struct panvk_gpu_queue *queue) return panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, "Failed to create the render desc ringbuf context"); - struct panvk_cs_sync32 *syncobj = panvk_priv_mem_host_addr(ringbuf->syncobj); - - *syncobj = (struct panvk_cs_sync32){ - .seqno = RENDER_DESC_RINGBUF_SIZE, - }; + panvk_priv_mem_write(ringbuf->syncobj, 0, struct panvk_cs_sync32, syncobj) { + *syncobj = (struct panvk_cs_sync32){ + .seqno = RENDER_DESC_RINGBUF_SIZE, + }; + } return VK_SUCCESS; } @@ -350,7 +350,6 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue) struct panvk_subqueue *subq = &queue->subqueues[subqueue]; const struct panvk_physical_device *phys_dev = to_panvk_physical_device(queue->vk.base.device->physical); - struct panvk_cs_sync64 *syncobjs = panvk_priv_mem_host_addr(queue->syncobjs); VkResult result = init_subqueue_tracing(queue, subqueue); if (result != VK_SUCCESS) @@ -401,6 +400,8 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue) assert(cs_is_valid(&b)); subq->req_resource.cs_buffer_size = cs_root_chunk_size(&b); subq->req_resource.cs_buffer_addr = cs_root_chunk_gpu_addr(&b); + panvk_priv_mem_flush(subq->req_resource.buf, 0, + subq->req_resource.cs_buffer_size); alloc_info.size = sizeof(struct panvk_cs_subqueue_context); alloc_info.alignment = 64; @@ -410,25 +411,43 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue) return panvk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY, "Failed to create a queue context"); - struct panvk_cs_subqueue_context *cs_ctx = - panvk_priv_mem_host_addr(subq->context); - - *cs_ctx = (struct panvk_cs_subqueue_context){ - .syncobjs = panvk_priv_mem_dev_addr(queue->syncobjs), - .debug.tracebuf.cs = subq->tracebuf.addr.dev, + panvk_priv_mem_write(subq->context, 0, struct panvk_cs_subqueue_context, + cs_ctx) { + *cs_ctx = (struct panvk_cs_subqueue_context){ + .syncobjs = panvk_priv_mem_dev_addr(queue->syncobjs), + .debug.tracebuf.cs = subq->tracebuf.addr.dev, #if PAN_ARCH == 10 - /* On the VT/COMPUTE queue, the first iter_sb will skipped since - * cs_next_iter_sb() is called before the first use, but that's okay, - * because the next slot will be equally free, and the skipped one will - * be re-used at some point. - * On the fragment queue, we increment the iterator when the - * FINISH_FRAGMENT job is issued, which is why we need this value - * to point to a valid+free scoreboard from the start. - */ - .iter_sb = SB_ITER(0), + /* On the VT/COMPUTE queue, the first iter_sb will skipped since + * cs_next_iter_sb() is called before the first use, but that's okay, + * because the next slot will be equally free, and the skipped one will + * be re-used at some point. + * On the fragment queue, we increment the iterator when the + * FINISH_FRAGMENT job is issued, which is why we need this value + * to point to a valid+free scoreboard from the start. + */ + .iter_sb = SB_ITER(0), #endif - .reg_dump_addr = panvk_priv_mem_dev_addr(subq->regs_save), - }; + .reg_dump_addr = panvk_priv_mem_dev_addr(subq->regs_save), + }; + + if (subqueue != PANVK_SUBQUEUE_COMPUTE) { + cs_ctx->render.tiler_heap = + panvk_priv_mem_dev_addr(queue->tiler_heap.desc); + /* Our geometry buffer comes 4k after the tiler heap, and we encode the + * size in the lower 12 bits so the address can be copied directly + * to the tiler descriptors. */ + cs_ctx->render.geom_buf = + (cs_ctx->render.tiler_heap + 4096) | ((64 * 1024) >> 12); + + /* Initialize the ringbuf */ + cs_ctx->render.desc_ringbuf = (struct panvk_cs_desc_ringbuf){ + .syncobj = + panvk_priv_mem_dev_addr(queue->render_desc_ringbuf.syncobj), + .ptr = queue->render_desc_ringbuf.addr.dev, + .pos = 0, + }; + } + } /* We use the geometry buffer for our temporary CS buffer. */ root_cs = (struct cs_buffer){ @@ -465,24 +484,13 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue) /* We do greater than test on sync objects, and given the reference seqno * registers are all zero at init time, we need to initialize all syncobjs * with a seqno of one. */ - syncobjs[subqueue].seqno = 1; + panvk_priv_mem_write(queue->syncobjs, + subqueue * sizeof(struct panvk_cs_sync64), + struct panvk_cs_sync64, syncobj) { + syncobj->seqno = 1; + } if (subqueue != PANVK_SUBQUEUE_COMPUTE) { - cs_ctx->render.tiler_heap = - panvk_priv_mem_dev_addr(queue->tiler_heap.desc); - /* Our geometry buffer comes 4k after the tiler heap, and we encode the - * size in the lower 12 bits so the address can be copied directly - * to the tiler descriptors. */ - cs_ctx->render.geom_buf = - (cs_ctx->render.tiler_heap + 4096) | ((64 * 1024) >> 12); - - /* Initialize the ringbuf */ - cs_ctx->render.desc_ringbuf = (struct panvk_cs_desc_ringbuf){ - .syncobj = panvk_priv_mem_dev_addr(queue->render_desc_ringbuf.syncobj), - .ptr = queue->render_desc_ringbuf.addr.dev, - .pos = 0, - }; - struct cs_index heap_ctx_addr = cs_scratch_reg64(&b, 0); /* Pre-set the heap context on the vertex-tiler/fragment queues. */ @@ -493,6 +501,8 @@ init_subqueue(struct panvk_gpu_queue *queue, enum panvk_subqueue_id subqueue) assert(cs_is_valid(&b)); + panvk_priv_mem_flush(queue->tiler_heap.desc, 4096, cs_root_chunk_size(&b)); + struct drm_panthor_sync_op syncop = { .flags = DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ | DRM_PANTHOR_SYNC_OP_SIGNAL, @@ -700,8 +710,7 @@ init_tiler(struct panvk_gpu_queue *queue) tiler_heap->context.handle = thc.handle; tiler_heap->context.dev_addr = thc.tiler_heap_ctx_gpu_va; - pan_cast_and_pack(panvk_priv_mem_host_addr(tiler_heap->desc), TILER_HEAP, - cfg) { + panvk_priv_mem_write_desc(tiler_heap->desc, 0, TILER_HEAP, cfg) { cfg.size = tiler_heap->chunk_size; cfg.base = thc.first_heap_chunk_gpu_va; cfg.bottom = cfg.base + 64; @@ -1125,16 +1134,17 @@ panvk_queue_submit_ioctl(struct panvk_queue_submit *submit) /* If we're tracing, we need to reset the desc ringbufs and the CS * tracebuf. */ for (uint32_t i = 0; i < ARRAY_SIZE(queue->subqueues); i++) { - struct panvk_cs_subqueue_context *ctx = - panvk_priv_mem_host_addr(queue->subqueues[i].context); + panvk_priv_mem_rmw(queue->subqueues[i].context, 0, + struct panvk_cs_subqueue_context, ctx) { + if (ctx->render.desc_ringbuf.ptr) { + ctx->render.desc_ringbuf.ptr = + queue->render_desc_ringbuf.addr.dev; + ctx->render.desc_ringbuf.pos = 0; + } - if (ctx->render.desc_ringbuf.ptr) { - ctx->render.desc_ringbuf.ptr = queue->render_desc_ringbuf.addr.dev; - ctx->render.desc_ringbuf.pos = 0; + if (ctx->debug.tracebuf.cs) + ctx->debug.tracebuf.cs = queue->subqueues[i].tracebuf.addr.dev; } - - if (ctx->debug.tracebuf.cs) - ctx->debug.tracebuf.cs = queue->subqueues[i].tracebuf.addr.dev; } } @@ -1235,28 +1245,30 @@ panvk_queue_submit_process_debug(const struct panvk_queue_submit *submit, } for (uint32_t i = 0; i < ARRAY_SIZE(queue->subqueues); i++) { - struct panvk_cs_subqueue_context *ctx = - panvk_priv_mem_host_addr(queue->subqueues[i].context); + panvk_priv_mem_readback(queue->subqueues[i].context, 0, + struct panvk_cs_subqueue_context, ctx) { + size_t trace_size = trace_size = + ctx->debug.tracebuf.cs - queue->subqueues[i].tracebuf.addr.dev; - size_t trace_size = - ctx->debug.tracebuf.cs - queue->subqueues[i].tracebuf.addr.dev; - if (!trace_size) - continue; + if (trace_size) { + assert( + trace_size <= queue->subqueues[i].tracebuf.size || + !"OOB access on the CS tracebuf, pass a bigger PANVK_CS_TRACEBUF_SIZE"); - assert( - trace_size <= queue->subqueues[i].tracebuf.size || - !"OOB access on the CS tracebuf, pass a bigger PANVK_CS_TRACEBUF_SIZE"); + assert( + !ctx->render.desc_ringbuf.ptr || + ctx->render.desc_ringbuf.pos <= + queue->render_desc_ringbuf.size || + !"OOB access on the desc tracebuf, pass a bigger PANVK_DESC_TRACEBUF_SIZE"); - assert( - !ctx->render.desc_ringbuf.ptr || - ctx->render.desc_ringbuf.pos <= queue->render_desc_ringbuf.size || - !"OOB access on the desc tracebuf, pass a bigger PANVK_DESC_TRACEBUF_SIZE"); + uint64_t trace = queue->subqueues[i].tracebuf.addr.dev; - uint64_t trace = queue->subqueues[i].tracebuf.addr.dev; - - pandecode_user_msg(decode_ctx, "\nCS traces on subqueue %d\n\n", i); - pandecode_cs_trace(decode_ctx, trace, trace_size, props->gpu_id); - pandecode_user_msg(decode_ctx, "\n"); + pandecode_user_msg(decode_ctx, "\nCS traces on subqueue %d\n\n", + i); + pandecode_cs_trace(decode_ctx, trace, trace_size, props->gpu_id); + pandecode_user_msg(decode_ctx, "\n"); + } + } } } @@ -1407,10 +1419,11 @@ panvk_per_arch(gpu_queue_check_status)(struct vk_queue *vk_queue) /* check for CS error and treat it as device lost */ for (uint32_t i = 0; i < PANVK_SUBQUEUE_COUNT; i++) { - const struct panvk_cs_subqueue_context *subq_ctx = - panvk_priv_mem_host_addr(queue->subqueues[i].context); - if (subq_ctx->last_error != 0) - return vk_queue_set_lost(&queue->vk, "CS_FAULT"); + panvk_priv_mem_readback(queue->subqueues[i].context, 0, + struct panvk_cs_subqueue_context, subq_ctx) { + if (subq_ctx->last_error != 0) + return vk_queue_set_lost(&queue->vk, "CS_FAULT"); + } } int ret = pan_kmod_ioctl(dev->drm_fd, DRM_IOCTL_PANTHOR_GROUP_GET_STATE, diff --git a/src/panfrost/vulkan/panvk_query_pool.h b/src/panfrost/vulkan/panvk_query_pool.h index bb8c1057d66..26b0696cbdb 100644 --- a/src/panfrost/vulkan/panvk_query_pool.h +++ b/src/panfrost/vulkan/panvk_query_pool.h @@ -47,38 +47,31 @@ struct panvk_query_pool { VK_DEFINE_NONDISP_HANDLE_CASTS(panvk_query_pool, vk.base, VkQueryPool, VK_OBJECT_TYPE_QUERY_POOL) -static uint64_t +static inline uint32_t +panvk_query_available_offset(struct panvk_query_pool *pool, uint32_t query) +{ + assert(query < pool->vk.query_count); + return query * sizeof(struct panvk_query_available_obj); +} + +static inline uint64_t panvk_query_available_dev_addr(struct panvk_query_pool *pool, uint32_t query) { - assert(query < pool->vk.query_count); - return panvk_priv_mem_dev_addr(pool->available_mem) + query * sizeof(struct panvk_query_available_obj); + return panvk_priv_mem_dev_addr(pool->available_mem) + + panvk_query_available_offset(pool, query); } -static struct panvk_query_available_obj * -panvk_query_available_host_addr(struct panvk_query_pool *pool, uint32_t query) -{ - assert(query < pool->vk.query_count); - return (struct panvk_query_available_obj *)panvk_priv_mem_host_addr(pool->available_mem) + query; -} - -static uint64_t +static inline uint64_t panvk_query_offset(struct panvk_query_pool *pool, uint32_t query) { assert(query < pool->vk.query_count); return query * (uint64_t)pool->query_stride; } -static uint64_t +static inline uint64_t panvk_query_report_dev_addr(struct panvk_query_pool *pool, uint32_t query) { return panvk_priv_mem_dev_addr(pool->mem) + panvk_query_offset(pool, query); } -static struct panvk_query_report * -panvk_query_report_host_addr(struct panvk_query_pool *pool, uint32_t query) -{ - return (void *)((char *)panvk_priv_mem_host_addr(pool->mem) + - panvk_query_offset(pool, query)); -} - #endif diff --git a/src/panfrost/vulkan/panvk_vX_buffer_view.c b/src/panfrost/vulkan/panvk_vX_buffer_view.c index f1b6a244d56..fa78e261198 100644 --- a/src/panfrost/vulkan/panvk_vX_buffer_view.c +++ b/src/panfrost/vulkan/panvk_vX_buffer_view.c @@ -70,11 +70,14 @@ panvk_per_arch(CreateBufferView)(VkDevice _device, if (!panvk_priv_mem_check_alloc(view->mem)) return panvk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); - struct pan_ptr ptr = { - .gpu = panvk_priv_mem_dev_addr(view->mem), - .cpu = panvk_priv_mem_host_addr(view->mem), - }; - GENX(pan_buffer_texture_emit)(&bview, &view->descs.tex, &ptr); + panvk_priv_mem_write(view->mem, 0, struct mali_surface_with_stride_packed, sd) { + struct pan_ptr ptr = { + .gpu = panvk_priv_mem_dev_addr(view->mem), + .cpu = sd, + }; + + GENX(pan_buffer_texture_emit)(&bview, &view->descs.tex, &ptr); + } #endif } diff --git a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c index d0ba120ab27..6a2ffac4526 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_fb_preload.c @@ -173,8 +173,7 @@ get_preload_shader(struct panvk_device *dev, return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); } - pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, - cfg) { + panvk_priv_mem_write_desc(shader->spd, 0, SHADER_PROGRAM, cfg) { cfg.stage = MALI_SHADER_STAGE_FRAGMENT; cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL; cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD; diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index 293557ca956..577de1e6294 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -70,7 +70,8 @@ static void panvk_device_init_mempools(struct panvk_device *dev) { struct panvk_pool_properties rw_pool_props = { - .create_flags = 0, + .create_flags = + panvk_device_adjust_bo_flags(dev, PAN_KMOD_BO_FLAG_WB_MMAP), .slab_size = 16 * 1024, .label = "Device RW cached memory pool", .owns_bos = false, @@ -93,7 +94,8 @@ panvk_device_init_mempools(struct panvk_device *dev) panvk_pool_init(&dev->mempools.rw_nc, dev, NULL, NULL, &rw_nc_pool_props); struct panvk_pool_properties exec_pool_props = { - .create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE, + .create_flags = panvk_device_adjust_bo_flags( + dev, PAN_KMOD_BO_FLAG_EXECUTABLE | PAN_KMOD_BO_FLAG_WB_MMAP), .slab_size = 16 * 1024, .label = "Device executable memory pool (shaders)", .owns_bos = false, diff --git a/src/panfrost/vulkan/panvk_vX_image_view.c b/src/panfrost/vulkan/panvk_vX_image_view.c index 7ec9a38bee9..218f5700f41 100644 --- a/src/panfrost/vulkan/panvk_vX_image_view.c +++ b/src/panfrost/vulkan/panvk_vX_image_view.c @@ -153,69 +153,73 @@ prepare_tex_descs(struct panvk_image_view *view) if (!panvk_priv_mem_check_alloc(view->mem)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - struct pan_ptr ptr = { - .gpu = panvk_priv_mem_dev_addr(view->mem), - .cpu = panvk_priv_mem_host_addr(view->mem), - }; + panvk_priv_mem_write_array(view->mem, 0, uint8_t, alloc_info.size, cpu_ptr) { + struct pan_ptr ptr = { + .gpu = panvk_priv_mem_dev_addr(view->mem), + .cpu = cpu_ptr, + }; #if PAN_ARCH >= 9 - struct pan_ptr storage_ptr = ptr; - if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) { - uint32_t storage_payload_offset = alloc_info.size - storage_payload_size; - storage_ptr.gpu += storage_payload_offset; - storage_ptr.cpu += storage_payload_offset; - } + struct pan_ptr storage_ptr = ptr; + if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) { + uint32_t storage_payload_offset = + alloc_info.size - storage_payload_size; + storage_ptr.gpu += storage_payload_offset; + storage_ptr.cpu += storage_payload_offset; + } #endif - if (plane_count > 1) { - memset(pview.planes, 0, sizeof(pview.planes)); + if (plane_count > 1) { + memset(pview.planes, 0, sizeof(pview.planes)); - for (uint32_t plane = 0; plane < plane_count; plane++) { - VkFormat plane_format = - vk_format_get_plane_format(view->vk.view_format, plane); + for (uint32_t plane = 0; plane < plane_count; plane++) { + VkFormat plane_format = + vk_format_get_plane_format(view->vk.view_format, plane); - /* We need a per-plane pview. */ - pview.planes[0] = view->pview.planes[plane]; - pview.format = vk_format_to_pipe_format(plane_format); + /* We need a per-plane pview. */ + pview.planes[0] = view->pview.planes[plane]; + pview.format = vk_format_to_pipe_format(plane_format); - GENX(pan_sampled_texture_emit)(&pview, &view->descs.tex[plane], &ptr); + GENX(pan_sampled_texture_emit)(&pview, &view->descs.tex[plane], + &ptr); #if PAN_ARCH >= 9 - if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) { - GENX(pan_storage_texture_emit)( - &pview, &view->descs.storage_tex[plane], &storage_ptr); - storage_ptr.cpu += tex_payload_size; - storage_ptr.gpu += tex_payload_size; + if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) { + GENX(pan_storage_texture_emit)( + &pview, &view->descs.storage_tex[plane], &storage_ptr); + storage_ptr.cpu += tex_payload_size; + storage_ptr.gpu += tex_payload_size; + } +#endif + + ptr.cpu += tex_payload_size; + ptr.gpu += tex_payload_size; } + } else { + GENX(pan_sampled_texture_emit)(&pview, &view->descs.tex[0], &ptr); +#if PAN_ARCH >= 9 + if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) + GENX(pan_storage_texture_emit)(&pview, &view->descs.storage_tex[0], + &storage_ptr); #endif + } + + if (can_preload_other_aspect) { + /* If the depth was present in the aspects mask, we've handled it + * already, so move on to the stencil. If it wasn't present, it's the + * stencil texture we create first, and we need t handle the depth here. + */ + pview.format = (view->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + ? panvk_image_stencil_only_pfmt(image) + : panvk_image_depth_only_pfmt(image); ptr.cpu += tex_payload_size; ptr.gpu += tex_payload_size; + + GENX(pan_sampled_texture_emit)(&pview, + &view->descs.zs.other_aspect_tex, &ptr); } - } else { - GENX(pan_sampled_texture_emit)(&pview, &view->descs.tex[0], &ptr); -#if PAN_ARCH >= 9 - if (view->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) - GENX(pan_storage_texture_emit)(&pview, &view->descs.storage_tex[0], - &storage_ptr); -#endif } - if (!can_preload_other_aspect) - return VK_SUCCESS; - - /* If the depth was present in the aspects mask, we've handled it already, so - * move on to the stencil. If it wasn't present, it's the stencil texture we - * create first, and we need t handle the depth here. - */ - pview.format = (view->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) - ? panvk_image_stencil_only_pfmt(image) - : panvk_image_depth_only_pfmt(image); - - ptr.cpu += tex_payload_size; - ptr.gpu += tex_payload_size; - - GENX(pan_sampled_texture_emit)(&pview, &view->descs.zs.other_aspect_tex, - &ptr); return VK_SUCCESS; } diff --git a/src/panfrost/vulkan/panvk_vX_query_pool.c b/src/panfrost/vulkan/panvk_vX_query_pool.c index ccfe11db3e9..21afc7383d0 100644 --- a/src/panfrost/vulkan/panvk_vX_query_pool.c +++ b/src/panfrost/vulkan/panvk_vX_query_pool.c @@ -24,9 +24,11 @@ static void reset_query_pool(struct panvk_query_pool *pool, uint32_t firstQuery, uint32_t queryCount) { - struct panvk_query_available_obj *available = - panvk_query_available_host_addr(pool, firstQuery); - memset(available, 0, queryCount * sizeof(*available)); + panvk_priv_mem_write_array(pool->available_mem, + panvk_query_available_offset(pool, firstQuery), + struct panvk_query_available_obj, queryCount, + available) + memset(available, 0, queryCount * sizeof(*available)); } VKAPI_ATTR VkResult VKAPI_CALL @@ -131,14 +133,19 @@ panvk_per_arch(ResetQueryPool)(VkDevice device, VkQueryPool queryPool, static bool panvk_query_is_available(struct panvk_query_pool *pool, uint32_t query) { - struct panvk_query_available_obj *available = - panvk_query_available_host_addr(pool, query); + bool res = false; + panvk_priv_mem_readback(pool->available_mem, + panvk_query_available_offset(pool, query), + struct panvk_query_available_obj, available) { #if PAN_ARCH >= 10 - return p_atomic_read(&available->sync_obj.seqno) != 0; + res = p_atomic_read(&available->sync_obj.seqno) != 0; #else - return p_atomic_read(&available->value) != 0; + res = p_atomic_read(&available->value) != 0; #endif + } + + return res; } static VkResult @@ -248,28 +255,29 @@ panvk_per_arch(GetQueryPoolResults)(VkDevice _device, VkQueryPool queryPool, bool write_results = available || (flags & VK_QUERY_RESULT_PARTIAL_BIT); - const struct panvk_query_report *src = - panvk_query_report_host_addr(pool, query); assert(i * stride < dataSize); void *dst = (char *)pData + i * stride; - switch (pool->vk.query_type) { - case VK_QUERY_TYPE_OCCLUSION: { - if (write_results) - cpu_write_occlusion_query_result(dst, 0, flags, src, - pool->reports_per_query); - break; - } + panvk_priv_mem_readback(pool->mem, panvk_query_offset(pool, query), + struct panvk_query_report, src) { + switch (pool->vk.query_type) { + case VK_QUERY_TYPE_OCCLUSION: { + if (write_results) + cpu_write_occlusion_query_result(dst, 0, flags, src, + pool->reports_per_query); + break; + } #if PAN_ARCH >= 10 - case VK_QUERY_TYPE_TIMESTAMP: { - if (write_results) - cpu_write_timestamp_query_result(dst, 0, flags, src, - pool->reports_per_query); - break; - } + case VK_QUERY_TYPE_TIMESTAMP: { + if (write_results) + cpu_write_timestamp_query_result(dst, 0, flags, src, + pool->reports_per_query); + break; + } #endif - default: - UNREACHABLE("Unsupported query type"); + default: + UNREACHABLE("Unsupported query type"); + } } if (!write_results) diff --git a/src/panfrost/vulkan/panvk_vX_shader.c b/src/panfrost/vulkan/panvk_vX_shader.c index f2a0e889044..1fe7f4a52f8 100644 --- a/src/panfrost/vulkan/panvk_vX_shader.c +++ b/src/panfrost/vulkan/panvk_vX_shader.c @@ -1093,8 +1093,7 @@ panvk_shader_upload(struct panvk_device *dev, if (!panvk_priv_mem_check_alloc(shader->rsd)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_cast_and_pack(panvk_priv_mem_host_addr(shader->rsd), RENDERER_STATE, - cfg) { + panvk_priv_mem_write_desc(shader->rsd, 0, RENDERER_STATE, cfg) { pan_shader_prepare_rsd(&shader->info, panvk_shader_variant_get_dev_addr(shader), &cfg); } @@ -1104,8 +1103,7 @@ panvk_shader_upload(struct panvk_device *dev, if (!panvk_priv_mem_check_alloc(shader->spd)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spd), SHADER_PROGRAM, - cfg) { + panvk_priv_mem_write_desc(shader->spd, 0, SHADER_PROGRAM, cfg) { cfg.stage = pan_shader_stage(&shader->info); if (cfg.stage == MALI_SHADER_STAGE_FRAGMENT) @@ -1131,8 +1129,8 @@ panvk_shader_upload(struct panvk_device *dev, if (!panvk_priv_mem_check_alloc(shader->spds.all_points)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.all_points), - SHADER_PROGRAM, cfg) { + panvk_priv_mem_write_desc(shader->spds.all_points, 0, SHADER_PROGRAM, + cfg) { cfg.stage = pan_shader_stage(&shader->info); cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); @@ -1146,8 +1144,8 @@ panvk_shader_upload(struct panvk_device *dev, if (!panvk_priv_mem_check_alloc(shader->spds.all_triangles)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.all_triangles), - SHADER_PROGRAM, cfg) { + panvk_priv_mem_write_desc(shader->spds.all_triangles, 0, SHADER_PROGRAM, + cfg) { cfg.stage = pan_shader_stage(&shader->info); cfg.register_allocation = pan_register_allocation(shader->info.work_reg_count); @@ -1162,8 +1160,8 @@ panvk_shader_upload(struct panvk_device *dev, if (!panvk_priv_mem_check_alloc(shader->spds.pos_points)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.pos_points), - SHADER_PROGRAM, cfg) { + panvk_priv_mem_write_desc(shader->spds.pos_points, 0, SHADER_PROGRAM, + cfg) { cfg.stage = pan_shader_stage(&shader->info); cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; cfg.register_allocation = @@ -1178,8 +1176,8 @@ panvk_shader_upload(struct panvk_device *dev, if (!panvk_priv_mem_check_alloc(shader->spds.pos_triangles)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.pos_triangles), - SHADER_PROGRAM, cfg) { + panvk_priv_mem_write_desc(shader->spds.pos_triangles, 0, SHADER_PROGRAM, + cfg) { cfg.stage = pan_shader_stage(&shader->info); cfg.vertex_warp_limit = MALI_WARP_LIMIT_HALF; cfg.register_allocation = @@ -1196,8 +1194,7 @@ panvk_shader_upload(struct panvk_device *dev, if (!panvk_priv_mem_check_alloc(shader->spds.var)) return panvk_error(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY); - pan_cast_and_pack(panvk_priv_mem_host_addr(shader->spds.var), - SHADER_PROGRAM, cfg) { + panvk_priv_mem_write_desc(shader->spds.var, 0, SHADER_PROGRAM, cfg) { unsigned work_count = shader->info.vs.secondary_work_reg_count; cfg.stage = pan_shader_stage(&shader->info); @@ -1583,13 +1580,13 @@ shader_desc_info_deserialize(struct panvk_device *dev, }; shader->desc_info.others.map = panvk_pool_alloc_mem(&dev->mempools.rw, alloc_info); - uint32_t *copy_table = - panvk_priv_mem_host_addr(shader->desc_info.others.map); - - if (!copy_table) + if (!panvk_priv_mem_check_alloc(shader->desc_info.others.map)) return panvk_error(shader, VK_ERROR_OUT_OF_DEVICE_MEMORY); - blob_copy_bytes(blob, copy_table, others_count * sizeof(*copy_table)); + panvk_priv_mem_write_array(shader->desc_info.others.map, 0, uint32_t, + others_count, copy_table) { + blob_copy_bytes(blob, copy_table, others_count * sizeof(*copy_table)); + } } #else shader->desc_info.dyn_bufs.count = blob_read_uint32(blob); @@ -1738,6 +1735,8 @@ shader_desc_info_serialize(struct blob *blob, others_count += shader->desc_info.others.count[i]; } + /* No need to wrap this one in panvk_priv_mem_readback(), because the + * GPU is not supposed to touch it. */ blob_write_bytes(blob, panvk_priv_mem_host_addr(shader->desc_info.others.map), sizeof(uint32_t) * others_count); @@ -2080,33 +2079,40 @@ emit_varying_attrs(struct panvk_pool *desc_pool, unsigned varying_count, const struct varyings_info *info, unsigned *buf_offsets, struct panvk_priv_mem *mem) { - *mem = panvk_pool_alloc_desc_array(desc_pool, varying_count, ATTRIBUTE); + if (!varying_count) { + *mem = (struct panvk_priv_mem){0}; + return VK_SUCCESS; + } - if (varying_count && !panvk_priv_mem_check_alloc(*mem)) + *mem = panvk_pool_alloc_desc_array(desc_pool, varying_count, ATTRIBUTE); + if (!panvk_priv_mem_check_alloc(*mem)) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - struct mali_attribute_packed *attrs = panvk_priv_mem_host_addr(*mem); - unsigned attr_idx = 0; + panvk_priv_mem_write_array(*mem, 0, struct mali_attribute_packed, + varying_count, attrs) { + unsigned attr_idx = 0; - for (unsigned i = 0; i < varying_count; i++) { - pan_pack(&attrs[attr_idx++], ATTRIBUTE, cfg) { - gl_varying_slot loc = varyings[i].location; - enum pipe_format pfmt = varyings[i].format != PIPE_FORMAT_NONE - ? info->fmts[loc] - : PIPE_FORMAT_NONE; + for (unsigned i = 0; i < varying_count; i++) { + pan_pack(&attrs[attr_idx++], ATTRIBUTE, cfg) { + gl_varying_slot loc = varyings[i].location; + enum pipe_format pfmt = varyings[i].format != PIPE_FORMAT_NONE + ? info->fmts[loc] + : PIPE_FORMAT_NONE; - if (pfmt == PIPE_FORMAT_NONE) { + if (pfmt == PIPE_FORMAT_NONE) { #if PAN_ARCH >= 7 - cfg.format = (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000; + cfg.format = + (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000; #else - cfg.format = (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0); + cfg.format = (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0); #endif - } else { - cfg.buffer_index = varying_buf_id(loc); - cfg.offset = buf_offsets[loc]; - cfg.format = varying_format(loc, info->fmts[loc]); + } else { + cfg.buffer_index = varying_buf_id(loc); + cfg.offset = buf_offsets[loc]; + cfg.format = varying_format(loc, info->fmts[loc]); + } + cfg.offset_enable = false; } - cfg.offset_enable = false; } }