From e15e3a8e8636c38def3e8a4fb93cfed90b8faf1b Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 11 Oct 2021 16:00:47 +0200 Subject: [PATCH] radv: optimize subpass barrier flushes for imageless framebuffers The driver should always know the attachments at this point. This should reduce the number of L2 cache flushes for imageless framebuffers. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 33 ++++++++++++--------------- src/amd/vulkan/radv_device.c | 1 - src/amd/vulkan/radv_meta_resolve_cs.c | 2 +- src/amd/vulkan/radv_meta_resolve_fs.c | 4 ++-- src/amd/vulkan/radv_private.h | 4 ++-- 5 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 9fdb7050a5c..c805ff1220f 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -3961,29 +3961,25 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flag } void -radv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass_barrier *barrier) +radv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass *subpass, + const struct radv_subpass_barrier *barrier) { - struct radv_framebuffer *fb = cmd_buffer->state.framebuffer; - if (fb && !fb->imageless) { - for (int i = 0; i < fb->attachment_count; ++i) { - cmd_buffer->state.flush_bits |= - radv_src_access_flush(cmd_buffer, barrier->src_access_mask, fb->attachments[i]->image); - } - } else { + struct radv_render_pass *pass = cmd_buffer->state.pass; + + for (uint32_t i = 0; i < pass->attachment_count; i++) { + struct radv_image_view *iview = cmd_buffer->state.attachments[i].iview; + cmd_buffer->state.flush_bits |= - radv_src_access_flush(cmd_buffer, barrier->src_access_mask, NULL); + radv_src_access_flush(cmd_buffer, barrier->src_access_mask, iview->image); } radv_stage_flush(cmd_buffer, barrier->src_stage_mask); - if (fb && !fb->imageless) { - for (int i = 0; i < fb->attachment_count; ++i) { - cmd_buffer->state.flush_bits |= - radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, fb->attachments[i]->image); - } - } else { + for (uint32_t i = 0; i < pass->attachment_count; i++) { + struct radv_image_view *iview = cmd_buffer->state.attachments[i].iview; + cmd_buffer->state.flush_bits |= - radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, NULL); + radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, iview->image); } } @@ -5725,7 +5721,7 @@ radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer, uint32_t subpa ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096); - radv_emit_subpass_barrier(cmd_buffer, &subpass->start_barrier); + radv_emit_subpass_barrier(cmd_buffer, subpass, &subpass->start_barrier); radv_cmd_buffer_set_subpass(cmd_buffer, subpass); @@ -7284,7 +7280,8 @@ radv_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pS radv_mark_noncoherent_rb(cmd_buffer); - radv_emit_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier); + radv_emit_subpass_barrier(cmd_buffer, cmd_buffer->state.subpass, + &cmd_buffer->state.pass->end_barrier); radv_cmd_buffer_end_subpass(cmd_buffer); diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 12d54f1cf60..160c451f75a 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -7054,7 +7054,6 @@ radv_CreateFramebuffer(VkDevice _device, const VkFramebufferCreateInfo *pCreateI framebuffer->width = pCreateInfo->width; framebuffer->height = pCreateInfo->height; framebuffer->layers = pCreateInfo->layers; - framebuffer->imageless = !!imageless_create_info; if (!imageless_create_info) { for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c index 0e8bad1dde0..b2440e108d2 100644 --- a/src/amd/vulkan/radv_meta_resolve_cs.c +++ b/src/amd/vulkan/radv_meta_resolve_cs.c @@ -795,7 +795,7 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer) barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - radv_emit_subpass_barrier(cmd_buffer, &barrier); + radv_emit_subpass_barrier(cmd_buffer, subpass, &barrier); for (uint32_t i = 0; i < subpass->color_count; ++i) { struct radv_subpass_attachment src_att = subpass->color_attachments[i]; diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c b/src/amd/vulkan/radv_meta_resolve_fs.c index 1733dce702f..50d86768c64 100644 --- a/src/amd/vulkan/radv_meta_resolve_fs.c +++ b/src/amd/vulkan/radv_meta_resolve_fs.c @@ -1078,7 +1078,7 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer) barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; - radv_emit_subpass_barrier(cmd_buffer, &barrier); + radv_emit_subpass_barrier(cmd_buffer, subpass, &barrier); radv_decompress_resolve_subpass_src(cmd_buffer); @@ -1131,7 +1131,7 @@ radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer, barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT; - radv_emit_subpass_barrier(cmd_buffer, &barrier); + radv_emit_subpass_barrier(cmd_buffer, subpass, &barrier); struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment; struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 8331ebeb018..4fc60d66aff 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2410,7 +2410,6 @@ struct radv_framebuffer { uint32_t height; uint32_t layers; - bool imageless; uint32_t attachment_count; struct radv_image_view *attachments[0]; @@ -2423,7 +2422,8 @@ struct radv_subpass_barrier { }; void radv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, - const struct radv_subpass_barrier *barrier); + const struct radv_subpass *subpass, + const struct radv_subpass_barrier *barrier); struct radv_subpass_attachment { uint32_t attachment;