80f864cd23
A simplification of the synchronization code is also undertaken as part of this commit to account for the implicit guarantee the FW gives the driver that jobs submitted to the same context will be run in submission order. Signed-off-by: Jarred Davies <jarred.davies@imgtec.com> Reviewed-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21577>
961 lines
31 KiB
C
961 lines
31 KiB
C
/*
|
|
* Copyright © 2022 Imagination Technologies Ltd.
|
|
*
|
|
* based in part on radv driver which is:
|
|
* Copyright © 2016 Red Hat.
|
|
* Copyright © 2016 Bas Nieuwenhuizen
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
/**
|
|
* This file implements VkQueue, VkFence, and VkSemaphore
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <unistd.h>
|
|
#include <vulkan/vulkan.h>
|
|
|
|
#include "pvr_job_compute.h"
|
|
#include "pvr_job_context.h"
|
|
#include "pvr_job_render.h"
|
|
#include "pvr_job_transfer.h"
|
|
#include "pvr_limits.h"
|
|
#include "pvr_private.h"
|
|
#include "util/macros.h"
|
|
#include "util/u_atomic.h"
|
|
#include "vk_alloc.h"
|
|
#include "vk_fence.h"
|
|
#include "vk_log.h"
|
|
#include "vk_object.h"
|
|
#include "vk_queue.h"
|
|
#include "vk_semaphore.h"
|
|
#include "vk_sync.h"
|
|
#include "vk_sync_dummy.h"
|
|
#include "vk_util.h"
|
|
|
|
static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
|
|
struct vk_queue_submit *submit);
|
|
|
|
static VkResult pvr_queue_init(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
const VkDeviceQueueCreateInfo *pCreateInfo,
|
|
uint32_t index_in_family)
|
|
{
|
|
struct pvr_transfer_ctx *transfer_ctx;
|
|
struct pvr_compute_ctx *compute_ctx;
|
|
struct pvr_compute_ctx *query_ctx;
|
|
struct pvr_render_ctx *gfx_ctx;
|
|
VkResult result;
|
|
|
|
*queue = (struct pvr_queue){ 0 };
|
|
|
|
result =
|
|
vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = pvr_transfer_ctx_create(device,
|
|
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
|
|
&transfer_ctx);
|
|
if (result != VK_SUCCESS)
|
|
goto err_vk_queue_finish;
|
|
|
|
result = pvr_compute_ctx_create(device,
|
|
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
|
|
&compute_ctx);
|
|
if (result != VK_SUCCESS)
|
|
goto err_transfer_ctx_destroy;
|
|
|
|
result = pvr_compute_ctx_create(device,
|
|
PVR_WINSYS_CTX_PRIORITY_MEDIUM,
|
|
&query_ctx);
|
|
if (result != VK_SUCCESS)
|
|
goto err_compute_ctx_destroy;
|
|
|
|
result =
|
|
pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
|
|
if (result != VK_SUCCESS)
|
|
goto err_query_ctx_destroy;
|
|
|
|
queue->device = device;
|
|
queue->gfx_ctx = gfx_ctx;
|
|
queue->compute_ctx = compute_ctx;
|
|
queue->query_ctx = query_ctx;
|
|
queue->transfer_ctx = transfer_ctx;
|
|
|
|
queue->vk.driver_submit = pvr_driver_queue_submit;
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_query_ctx_destroy:
|
|
pvr_compute_ctx_destroy(query_ctx);
|
|
|
|
err_compute_ctx_destroy:
|
|
pvr_compute_ctx_destroy(compute_ctx);
|
|
|
|
err_transfer_ctx_destroy:
|
|
pvr_transfer_ctx_destroy(transfer_ctx);
|
|
|
|
err_vk_queue_finish:
|
|
vk_queue_finish(&queue->vk);
|
|
|
|
return result;
|
|
}
|
|
|
|
VkResult pvr_queues_create(struct pvr_device *device,
|
|
const VkDeviceCreateInfo *pCreateInfo)
|
|
{
|
|
VkResult result;
|
|
|
|
/* Check requested queue families and queues */
|
|
assert(pCreateInfo->queueCreateInfoCount == 1);
|
|
assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
|
|
assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
|
|
|
|
const VkDeviceQueueCreateInfo *queue_create =
|
|
&pCreateInfo->pQueueCreateInfos[0];
|
|
|
|
device->queues = vk_alloc(&device->vk.alloc,
|
|
queue_create->queueCount * sizeof(*device->queues),
|
|
8,
|
|
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
|
if (!device->queues)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
device->queue_count = 0;
|
|
|
|
for (uint32_t i = 0; i < queue_create->queueCount; i++) {
|
|
result = pvr_queue_init(device, &device->queues[i], queue_create, i);
|
|
if (result != VK_SUCCESS)
|
|
goto err_queues_finish;
|
|
|
|
device->queue_count++;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_queues_finish:
|
|
pvr_queues_destroy(device);
|
|
return result;
|
|
}
|
|
|
|
static void pvr_queue_finish(struct pvr_queue *queue)
|
|
{
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(queue->next_job_wait_sync); i++) {
|
|
if (queue->next_job_wait_sync[i])
|
|
vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
|
|
}
|
|
|
|
for (uint32_t i = 0; i < ARRAY_SIZE(queue->last_job_signal_sync); i++) {
|
|
if (queue->last_job_signal_sync[i])
|
|
vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
|
|
}
|
|
|
|
pvr_render_ctx_destroy(queue->gfx_ctx);
|
|
pvr_compute_ctx_destroy(queue->query_ctx);
|
|
pvr_compute_ctx_destroy(queue->compute_ctx);
|
|
pvr_transfer_ctx_destroy(queue->transfer_ctx);
|
|
|
|
vk_queue_finish(&queue->vk);
|
|
}
|
|
|
|
void pvr_queues_destroy(struct pvr_device *device)
|
|
{
|
|
for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
|
|
pvr_queue_finish(&device->queues[q_idx]);
|
|
|
|
vk_free(&device->vk.alloc, device->queues);
|
|
}
|
|
|
|
static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_cmd_buffer *cmd_buffer,
|
|
struct pvr_sub_cmd_gfx *sub_cmd)
|
|
{
|
|
pvr_dev_addr_t original_ctrl_stream_addr = { 0 };
|
|
struct vk_sync *geom_signal_sync;
|
|
struct vk_sync *frag_signal_sync;
|
|
VkResult result;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&geom_signal_sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&frag_signal_sync);
|
|
if (result != VK_SUCCESS)
|
|
goto err_destroy_geom_sync;
|
|
|
|
/* FIXME: DoShadowLoadOrStore() */
|
|
|
|
/* Perform two render submits when using multiple framebuffer layers. The
|
|
* first submit contains just geometry, while the second only terminates
|
|
* (and triggers the fragment render if originally specified). This is needed
|
|
* because the render target cache gets cleared on terminating submits, which
|
|
* could result in missing primitives.
|
|
*/
|
|
if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
|
|
/* If fragment work shouldn't be run there's no need for a split,
|
|
* and if geometry_terminate is false this kick can't have a fragment
|
|
* stage without another terminating geometry kick.
|
|
*/
|
|
assert(sub_cmd->job.geometry_terminate && sub_cmd->job.run_frag);
|
|
|
|
/* First submit must not touch fragment work. */
|
|
sub_cmd->job.geometry_terminate = false;
|
|
sub_cmd->job.run_frag = false;
|
|
|
|
result =
|
|
pvr_render_job_submit(queue->gfx_ctx,
|
|
&sub_cmd->job,
|
|
queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
|
|
NULL,
|
|
NULL,
|
|
NULL);
|
|
|
|
sub_cmd->job.geometry_terminate = true;
|
|
sub_cmd->job.run_frag = true;
|
|
|
|
if (result != VK_SUCCESS)
|
|
goto err_destroy_frag_sync;
|
|
|
|
original_ctrl_stream_addr = sub_cmd->job.ctrl_stream_addr;
|
|
|
|
/* Second submit contains only a trivial control stream to terminate the
|
|
* geometry work.
|
|
*/
|
|
assert(sub_cmd->terminate_ctrl_stream);
|
|
sub_cmd->job.ctrl_stream_addr =
|
|
sub_cmd->terminate_ctrl_stream->vma->dev_addr;
|
|
}
|
|
|
|
result = pvr_render_job_submit(queue->gfx_ctx,
|
|
&sub_cmd->job,
|
|
queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
|
|
queue->next_job_wait_sync[PVR_JOB_TYPE_FRAG],
|
|
geom_signal_sync,
|
|
frag_signal_sync);
|
|
|
|
if (original_ctrl_stream_addr.addr > 0)
|
|
sub_cmd->job.ctrl_stream_addr = original_ctrl_stream_addr;
|
|
|
|
if (result != VK_SUCCESS)
|
|
goto err_destroy_frag_sync;
|
|
|
|
/* Replace the completion fences. */
|
|
if (queue->last_job_signal_sync[PVR_JOB_TYPE_GEOM]) {
|
|
vk_sync_destroy(&device->vk,
|
|
queue->last_job_signal_sync[PVR_JOB_TYPE_GEOM]);
|
|
}
|
|
|
|
queue->last_job_signal_sync[PVR_JOB_TYPE_GEOM] = geom_signal_sync;
|
|
|
|
if (queue->last_job_signal_sync[PVR_JOB_TYPE_FRAG]) {
|
|
vk_sync_destroy(&device->vk,
|
|
queue->last_job_signal_sync[PVR_JOB_TYPE_FRAG]);
|
|
}
|
|
|
|
queue->last_job_signal_sync[PVR_JOB_TYPE_FRAG] = frag_signal_sync;
|
|
|
|
/* FIXME: DoShadowLoadOrStore() */
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_destroy_frag_sync:
|
|
vk_sync_destroy(&device->vk, frag_signal_sync);
|
|
err_destroy_geom_sync:
|
|
vk_sync_destroy(&device->vk, geom_signal_sync);
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult pvr_process_compute_cmd(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_sub_cmd_compute *sub_cmd)
|
|
{
|
|
struct vk_sync *sync;
|
|
VkResult result;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result =
|
|
pvr_compute_job_submit(queue->compute_ctx,
|
|
sub_cmd,
|
|
queue->next_job_wait_sync[PVR_JOB_TYPE_COMPUTE],
|
|
sync);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, sync);
|
|
return result;
|
|
}
|
|
|
|
/* Replace the signal fence. */
|
|
if (queue->last_job_signal_sync[PVR_JOB_TYPE_COMPUTE]) {
|
|
vk_sync_destroy(&device->vk,
|
|
queue->last_job_signal_sync[PVR_JOB_TYPE_COMPUTE]);
|
|
}
|
|
|
|
queue->last_job_signal_sync[PVR_JOB_TYPE_COMPUTE] = sync;
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult pvr_process_transfer_cmds(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_sub_cmd_transfer *sub_cmd)
|
|
{
|
|
struct vk_sync *sync;
|
|
VkResult result;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result =
|
|
pvr_transfer_job_submit(device,
|
|
queue->transfer_ctx,
|
|
sub_cmd,
|
|
queue->next_job_wait_sync[PVR_JOB_TYPE_TRANSFER],
|
|
sync);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, sync);
|
|
return result;
|
|
}
|
|
|
|
/* Replace the signal syncs. */
|
|
if (queue->last_job_signal_sync[PVR_JOB_TYPE_TRANSFER]) {
|
|
vk_sync_destroy(&device->vk,
|
|
queue->last_job_signal_sync[PVR_JOB_TYPE_TRANSFER]);
|
|
}
|
|
|
|
queue->last_job_signal_sync[PVR_JOB_TYPE_TRANSFER] = sync;
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_process_occlusion_query_cmd(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_sub_cmd_compute *sub_cmd)
|
|
{
|
|
struct vk_sync *sync;
|
|
VkResult result;
|
|
|
|
/* TODO: Currently we add barrier event sub commands to handle the sync
|
|
* necessary for the different occlusion query types. Would we get any speed
|
|
* up in processing the queue by doing that sync here without using event sub
|
|
* commands?
|
|
*/
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result = pvr_compute_job_submit(
|
|
queue->query_ctx,
|
|
sub_cmd,
|
|
queue->next_job_wait_sync[PVR_JOB_TYPE_OCCLUSION_QUERY],
|
|
sync);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, sync);
|
|
return result;
|
|
}
|
|
|
|
if (queue->last_job_signal_sync[PVR_JOB_TYPE_OCCLUSION_QUERY]) {
|
|
vk_sync_destroy(
|
|
&device->vk,
|
|
queue->last_job_signal_sync[PVR_JOB_TYPE_OCCLUSION_QUERY]);
|
|
}
|
|
|
|
queue->last_job_signal_sync[PVR_JOB_TYPE_OCCLUSION_QUERY] = sync;
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult pvr_process_event_cmd_barrier(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_sub_cmd_event *sub_cmd)
|
|
{
|
|
const uint32_t src_mask = sub_cmd->barrier.wait_for_stage_mask;
|
|
const uint32_t dst_mask = sub_cmd->barrier.wait_at_stage_mask;
|
|
struct vk_sync_wait wait_syncs[PVR_JOB_TYPE_MAX + 1];
|
|
uint32_t src_wait_count = 0;
|
|
VkResult result;
|
|
|
|
assert(sub_cmd->type == PVR_EVENT_TYPE_BARRIER);
|
|
|
|
assert(!(src_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
|
|
assert(!(dst_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
|
|
|
|
u_foreach_bit (stage, src_mask) {
|
|
if (queue->last_job_signal_sync[stage]) {
|
|
wait_syncs[src_wait_count++] = (struct vk_sync_wait){
|
|
.sync = queue->last_job_signal_sync[stage],
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.wait_value = 0,
|
|
};
|
|
}
|
|
}
|
|
|
|
/* No previous src jobs that need finishing so no need for a barrier. */
|
|
if (src_wait_count == 0)
|
|
return VK_SUCCESS;
|
|
|
|
u_foreach_bit (stage, dst_mask) {
|
|
uint32_t wait_count = src_wait_count;
|
|
struct vk_sync_signal signal;
|
|
struct vk_sync *signal_sync;
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&signal_sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
signal = (struct vk_sync_signal){
|
|
.sync = signal_sync,
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.signal_value = 0,
|
|
};
|
|
|
|
if (queue->next_job_wait_sync[stage]) {
|
|
wait_syncs[wait_count++] = (struct vk_sync_wait){
|
|
.sync = queue->next_job_wait_sync[stage],
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.wait_value = 0,
|
|
};
|
|
}
|
|
|
|
result = device->ws->ops->null_job_submit(device->ws,
|
|
wait_syncs,
|
|
wait_count,
|
|
&signal);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, signal_sync);
|
|
return result;
|
|
}
|
|
|
|
if (queue->next_job_wait_sync[stage])
|
|
vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
|
|
|
|
queue->next_job_wait_sync[stage] = signal_sync;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
pvr_process_event_cmd_set_or_reset(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_sub_cmd_event *sub_cmd)
|
|
{
|
|
/* Not PVR_JOB_TYPE_MAX since that also includes
|
|
* PVR_JOB_TYPE_OCCLUSION_QUERY so no stage in the src mask.
|
|
*/
|
|
struct vk_sync_wait waits[PVR_NUM_SYNC_PIPELINE_STAGES];
|
|
struct vk_sync_signal signal;
|
|
struct vk_sync *signal_sync;
|
|
|
|
uint32_t wait_for_stage_mask;
|
|
uint32_t wait_count = 0;
|
|
VkResult result;
|
|
|
|
assert(sub_cmd->type == PVR_EVENT_TYPE_SET ||
|
|
sub_cmd->type == PVR_EVENT_TYPE_RESET);
|
|
|
|
if (sub_cmd->type == PVR_EVENT_TYPE_SET)
|
|
wait_for_stage_mask = sub_cmd->set.wait_for_stage_mask;
|
|
else
|
|
wait_for_stage_mask = sub_cmd->reset.wait_for_stage_mask;
|
|
|
|
assert(!(wait_for_stage_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
|
|
|
|
u_foreach_bit (stage, wait_for_stage_mask) {
|
|
if (!queue->last_job_signal_sync[stage])
|
|
continue;
|
|
|
|
waits[wait_count++] = (struct vk_sync_wait){
|
|
.sync = queue->last_job_signal_sync[stage],
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.wait_value = 0,
|
|
};
|
|
}
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&signal_sync);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
signal = (struct vk_sync_signal){
|
|
.sync = signal_sync,
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.signal_value = 0,
|
|
};
|
|
|
|
result =
|
|
device->ws->ops->null_job_submit(device->ws, waits, wait_count, &signal);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, signal_sync);
|
|
return result;
|
|
}
|
|
|
|
if (sub_cmd->type == PVR_EVENT_TYPE_SET) {
|
|
if (sub_cmd->set.event->sync)
|
|
vk_sync_destroy(&device->vk, sub_cmd->set.event->sync);
|
|
|
|
sub_cmd->set.event->sync = signal_sync;
|
|
sub_cmd->set.event->state = PVR_EVENT_STATE_SET_BY_DEVICE;
|
|
} else {
|
|
if (sub_cmd->reset.event->sync)
|
|
vk_sync_destroy(&device->vk, sub_cmd->reset.event->sync);
|
|
|
|
sub_cmd->reset.event->sync = signal_sync;
|
|
sub_cmd->reset.event->state = PVR_EVENT_STATE_RESET_BY_DEVICE;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
/**
|
|
* \brief Process an event sub command of wait type.
|
|
*
|
|
* This sets up barrier syncobjs to create a dependency from the event syncobjs
|
|
* onto the next job submissions.
|
|
*
|
|
* The barriers are setup by taking into consideration each event's dst stage
|
|
* mask so this is in line with vkCmdWaitEvents2().
|
|
*
|
|
* \param[in] device Device to create the syncobjs on.
|
|
* \param[in] sub_cmd Sub command to process.
|
|
* \param[in,out] barriers Current barriers as input. Barriers
|
|
* for the next jobs as output.
|
|
* \parma[in,out] per_cmd_buffer_syncobjs Completion syncobjs for the command
|
|
* buffer being processed.
|
|
*/
|
|
static VkResult pvr_process_event_cmd_wait(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_sub_cmd_event *sub_cmd)
|
|
{
|
|
uint32_t dst_mask = 0;
|
|
VkResult result;
|
|
|
|
STACK_ARRAY(struct vk_sync_wait, waits, sub_cmd->wait.count + 1);
|
|
if (!waits)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
for (uint32_t i = 0; i < sub_cmd->wait.count; i++)
|
|
dst_mask |= sub_cmd->wait.wait_at_stage_masks[i];
|
|
|
|
u_foreach_bit (stage, dst_mask) {
|
|
struct vk_sync_signal signal;
|
|
struct vk_sync *signal_sync;
|
|
uint32_t wait_count = 0;
|
|
|
|
for (uint32_t i = 0; i < sub_cmd->wait.count; i++) {
|
|
if (sub_cmd->wait.wait_at_stage_masks[i] & stage) {
|
|
waits[wait_count++] = (struct vk_sync_wait){
|
|
.sync = sub_cmd->wait.events[i]->sync,
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.wait_value = 0,
|
|
};
|
|
}
|
|
}
|
|
|
|
if (!wait_count)
|
|
continue;
|
|
|
|
if (queue->next_job_wait_sync[stage]) {
|
|
waits[wait_count++] = (struct vk_sync_wait){
|
|
.sync = queue->next_job_wait_sync[stage],
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.wait_value = 0,
|
|
};
|
|
}
|
|
|
|
assert(wait_count <= (sub_cmd->wait.count + 1));
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&signal_sync);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_waits;
|
|
|
|
signal = (struct vk_sync_signal){
|
|
.sync = signal_sync,
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.signal_value = 0,
|
|
};
|
|
|
|
result = device->ws->ops->null_job_submit(device->ws,
|
|
waits,
|
|
wait_count,
|
|
&signal);
|
|
if (result != VK_SUCCESS) {
|
|
vk_sync_destroy(&device->vk, signal.sync);
|
|
goto err_free_waits;
|
|
}
|
|
|
|
if (queue->next_job_wait_sync[stage])
|
|
vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
|
|
|
|
queue->next_job_wait_sync[stage] = signal.sync;
|
|
}
|
|
|
|
STACK_ARRAY_FINISH(waits);
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_free_waits:
|
|
STACK_ARRAY_FINISH(waits);
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult pvr_process_event_cmd(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_sub_cmd_event *sub_cmd)
|
|
{
|
|
switch (sub_cmd->type) {
|
|
case PVR_EVENT_TYPE_SET:
|
|
case PVR_EVENT_TYPE_RESET:
|
|
return pvr_process_event_cmd_set_or_reset(device, queue, sub_cmd);
|
|
case PVR_EVENT_TYPE_WAIT:
|
|
return pvr_process_event_cmd_wait(device, queue, sub_cmd);
|
|
case PVR_EVENT_TYPE_BARRIER:
|
|
return pvr_process_event_cmd_barrier(device, queue, sub_cmd);
|
|
default:
|
|
unreachable("Invalid event sub-command type.");
|
|
};
|
|
}
|
|
|
|
static VkResult pvr_process_cmd_buffer(struct pvr_device *device,
|
|
struct pvr_queue *queue,
|
|
struct pvr_cmd_buffer *cmd_buffer)
|
|
{
|
|
VkResult result;
|
|
|
|
list_for_each_entry_safe (struct pvr_sub_cmd,
|
|
sub_cmd,
|
|
&cmd_buffer->sub_cmds,
|
|
link) {
|
|
switch (sub_cmd->type) {
|
|
case PVR_SUB_CMD_TYPE_GRAPHICS: {
|
|
if (sub_cmd->gfx.has_occlusion_query) {
|
|
struct pvr_sub_cmd_event frag_to_transfer_barrier = {
|
|
.type = PVR_EVENT_TYPE_BARRIER,
|
|
.barrier = {
|
|
.wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
|
|
.wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
|
|
},
|
|
};
|
|
|
|
/* If the fragment job utilizes occlusion queries, for data
|
|
* integrity it needs to wait for the occlusion query to be
|
|
* processed.
|
|
*/
|
|
|
|
result = pvr_process_event_cmd_barrier(device,
|
|
queue,
|
|
&frag_to_transfer_barrier);
|
|
if (result != VK_SUCCESS)
|
|
break;
|
|
}
|
|
|
|
result =
|
|
pvr_process_graphics_cmd(device, queue, cmd_buffer, &sub_cmd->gfx);
|
|
break;
|
|
}
|
|
|
|
case PVR_SUB_CMD_TYPE_COMPUTE:
|
|
result = pvr_process_compute_cmd(device, queue, &sub_cmd->compute);
|
|
break;
|
|
|
|
case PVR_SUB_CMD_TYPE_TRANSFER: {
|
|
const bool serialize_with_frag = sub_cmd->transfer.serialize_with_frag;
|
|
|
|
if (serialize_with_frag) {
|
|
struct pvr_sub_cmd_event frag_to_transfer_barrier = {
|
|
.type = PVR_EVENT_TYPE_BARRIER,
|
|
.barrier = {
|
|
.wait_for_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
|
|
.wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
|
|
},
|
|
};
|
|
|
|
result = pvr_process_event_cmd_barrier(device,
|
|
queue,
|
|
&frag_to_transfer_barrier);
|
|
if (result != VK_SUCCESS)
|
|
break;
|
|
}
|
|
|
|
result = pvr_process_transfer_cmds(device, queue, &sub_cmd->transfer);
|
|
|
|
if (serialize_with_frag) {
|
|
struct pvr_sub_cmd_event transfer_to_frag_barrier = {
|
|
.type = PVR_EVENT_TYPE_BARRIER,
|
|
.barrier = {
|
|
.wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
|
|
.wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
|
|
},
|
|
};
|
|
|
|
if (result != VK_SUCCESS)
|
|
break;
|
|
|
|
result = pvr_process_event_cmd_barrier(device,
|
|
queue,
|
|
&transfer_to_frag_barrier);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
|
|
result =
|
|
pvr_process_occlusion_query_cmd(device, queue, &sub_cmd->compute);
|
|
break;
|
|
|
|
case PVR_SUB_CMD_TYPE_EVENT:
|
|
result = pvr_process_event_cmd(device, queue, &sub_cmd->event);
|
|
break;
|
|
|
|
default:
|
|
mesa_loge("Unsupported sub-command type %d", sub_cmd->type);
|
|
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
}
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
p_atomic_inc(&device->global_cmd_buffer_submit_count);
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult pvr_clear_last_submits_syncs(struct pvr_queue *queue)
|
|
{
|
|
struct vk_sync_wait waits[PVR_JOB_TYPE_MAX * 2];
|
|
uint32_t wait_count = 0;
|
|
VkResult result;
|
|
|
|
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
|
|
if (queue->next_job_wait_sync[i]) {
|
|
waits[wait_count++] = (struct vk_sync_wait){
|
|
.sync = queue->next_job_wait_sync[i],
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.wait_value = 0,
|
|
};
|
|
}
|
|
|
|
if (queue->last_job_signal_sync[i]) {
|
|
waits[wait_count++] = (struct vk_sync_wait){
|
|
.sync = queue->last_job_signal_sync[i],
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.wait_value = 0,
|
|
};
|
|
}
|
|
}
|
|
|
|
result = vk_sync_wait_many(&queue->device->vk,
|
|
wait_count,
|
|
waits,
|
|
VK_SYNC_WAIT_COMPLETE,
|
|
UINT64_MAX);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return vk_error(queue, result);
|
|
|
|
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
|
|
if (queue->next_job_wait_sync[i]) {
|
|
vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
|
|
queue->next_job_wait_sync[i] = NULL;
|
|
}
|
|
|
|
if (queue->last_job_signal_sync[i]) {
|
|
vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
|
|
queue->last_job_signal_sync[i] = NULL;
|
|
}
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult pvr_process_queue_signals(struct pvr_queue *queue,
|
|
struct vk_sync_signal *signals,
|
|
uint32_t signal_count)
|
|
{
|
|
struct vk_sync_wait signal_waits[PVR_JOB_TYPE_MAX];
|
|
struct pvr_device *device = queue->device;
|
|
VkResult result;
|
|
|
|
for (uint32_t signal_idx = 0; signal_idx < signal_count; signal_idx++) {
|
|
struct vk_sync_signal *signal = &signals[signal_idx];
|
|
const enum pvr_pipeline_stage_bits signal_stage_src =
|
|
pvr_stage_mask_src(signal->stage_mask);
|
|
uint32_t wait_count = 0;
|
|
|
|
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
|
|
if (!(signal_stage_src & BITFIELD_BIT(i)) ||
|
|
!queue->last_job_signal_sync[i])
|
|
continue;
|
|
|
|
signal_waits[wait_count++] = (struct vk_sync_wait){
|
|
.sync = queue->last_job_signal_sync[i],
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.wait_value = 0,
|
|
};
|
|
}
|
|
|
|
result = device->ws->ops->null_job_submit(device->ws,
|
|
signal_waits,
|
|
wait_count,
|
|
signal);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult pvr_process_queue_waits(struct pvr_queue *queue,
|
|
struct vk_sync_wait *waits,
|
|
uint32_t wait_count)
|
|
{
|
|
struct pvr_device *device = queue->device;
|
|
VkResult result;
|
|
|
|
STACK_ARRAY(struct vk_sync_wait, stage_waits, wait_count);
|
|
if (!stage_waits)
|
|
return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
|
|
struct vk_sync_signal next_job_wait_signal_sync;
|
|
uint32_t stage_wait_count = 0;
|
|
|
|
for (uint32_t wait_idx = 0; wait_idx < wait_count; wait_idx++) {
|
|
if (!(pvr_stage_mask(waits[wait_idx].stage_mask) & BITFIELD_BIT(i)))
|
|
continue;
|
|
|
|
stage_waits[stage_wait_count++] = (struct vk_sync_wait){
|
|
.sync = waits[wait_idx].sync,
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.wait_value = waits[wait_idx].wait_value,
|
|
};
|
|
}
|
|
|
|
result = vk_sync_create(&device->vk,
|
|
&device->pdevice->ws->syncobj_type,
|
|
0U,
|
|
0UL,
|
|
&queue->next_job_wait_sync[i]);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_waits;
|
|
|
|
next_job_wait_signal_sync = (struct vk_sync_signal){
|
|
.sync = queue->next_job_wait_sync[i],
|
|
.stage_mask = ~(VkPipelineStageFlags2)0,
|
|
.signal_value = 0,
|
|
};
|
|
|
|
result = device->ws->ops->null_job_submit(device->ws,
|
|
stage_waits,
|
|
stage_wait_count,
|
|
&next_job_wait_signal_sync);
|
|
if (result != VK_SUCCESS)
|
|
goto err_free_waits;
|
|
}
|
|
|
|
STACK_ARRAY_FINISH(stage_waits);
|
|
|
|
return VK_SUCCESS;
|
|
|
|
err_free_waits:
|
|
STACK_ARRAY_FINISH(stage_waits);
|
|
|
|
return result;
|
|
}
|
|
|
|
static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
|
|
struct vk_queue_submit *submit)
|
|
{
|
|
struct pvr_queue *driver_queue = container_of(queue, struct pvr_queue, vk);
|
|
struct pvr_device *device = driver_queue->device;
|
|
VkResult result;
|
|
|
|
result = pvr_clear_last_submits_syncs(driver_queue);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
result =
|
|
pvr_process_queue_waits(driver_queue, submit->waits, submit->wait_count);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
for (uint32_t i = 0U; i < submit->command_buffer_count; i++) {
|
|
result = pvr_process_cmd_buffer(
|
|
device,
|
|
driver_queue,
|
|
container_of(submit->command_buffers[i], struct pvr_cmd_buffer, vk));
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
}
|
|
|
|
result = pvr_process_queue_signals(driver_queue,
|
|
submit->signals,
|
|
submit->signal_count);
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
return VK_SUCCESS;
|
|
}
|