From 5c420c940a9ce7204cd685ce6536149c505eeb7f Mon Sep 17 00:00:00 2001 From: Karmjit Mahil Date: Tue, 3 Oct 2023 16:11:45 +0100 Subject: [PATCH] pvr: fix spm-related renderpass hwr Signed-off-by: Karmjit Mahil Acked-by: Erik Faye-Lund Part-of: --- src/imagination/vulkan/pvr_cmd_buffer.c | 47 ++++++++++++++----------- src/imagination/vulkan/pvr_hw_pass.c | 44 +++++++++++++++++------ src/imagination/vulkan/pvr_hw_pass.h | 2 ++ 3 files changed, 61 insertions(+), 32 deletions(-) diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 531f45ceca3..20d256d65a2 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -1429,6 +1429,13 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, VkResult result; if (sub_cmd->barrier_store) { + /* Store to the SPM scratch buffer. */ + + /* The scratch buffer is always needed and allocated to avoid data loss in + * case SPM is hit so set the flag unconditionally. + */ + job->requires_spm_scratch_buffer = true; + /* There can only ever be one frag job running on the hardware at any one * time, and a context switch is not allowed mid-tile, so instead of * allocating a new scratch buffer we can reuse the SPM scratch buffer to @@ -1474,6 +1481,12 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, typed_memcpy(job->pds_bgnd_reg_values, spm_bgobj_state->pds_reg_values, ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); + + STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) == + ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); + typed_memcpy(job->pds_pr_bgnd_reg_values, + spm_bgobj_state->pds_reg_values, + ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); } else if (hw_render->load_op) { const struct pvr_load_op *load_op = hw_render->load_op; struct pvr_pds_upload load_op_program; @@ -1497,27 +1510,19 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info, job->pds_bgnd_reg_values); } - /* TODO: In some cases a PR can be removed by storing to the color attachment - * and have the background object load directly from it instead of using the - * scratch buffer. In those cases we can also set this to "false" and avoid - * extra fw overhead. - */ - /* The scratch buffer is always needed and allocated to avoid data loss in - * case SPM is hit so set the flag unconditionally. - */ - job->requires_spm_scratch_buffer = true; - - memcpy(job->pr_pbe_reg_words, - &framebuffer->spm_eot_state_per_render[0].pbe_reg_words, - sizeof(job->pbe_reg_words)); - job->pr_pds_pixel_event_data_offset = - framebuffer->spm_eot_state_per_render[0].pixel_event_program_data_offset; - - STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) == - ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); - typed_memcpy(job->pds_pr_bgnd_reg_values, - spm_bgobj_state->pds_reg_values, - ARRAY_SIZE(spm_bgobj_state->pds_reg_values)); + if (!hw_render->requires_frag_pr) { + memcpy(job->pr_pbe_reg_words, + job->pbe_reg_words, + sizeof(job->pbe_reg_words)); + job->pr_pds_pixel_event_data_offset = job->pds_pixel_event_data_offset; + } else { + memcpy(job->pr_pbe_reg_words, + &framebuffer->spm_eot_state_per_render[0].pbe_reg_words, + sizeof(job->pbe_reg_words)); + job->pr_pds_pixel_event_data_offset = + framebuffer->spm_eot_state_per_render[0] + .pixel_event_program_data_offset; + } render_target = pvr_get_render_target(render_pass_info->pass, framebuffer, diff --git a/src/imagination/vulkan/pvr_hw_pass.c b/src/imagination/vulkan/pvr_hw_pass.c index 908dca1b582..eac7f445e55 100644 --- a/src/imagination/vulkan/pvr_hw_pass.c +++ b/src/imagination/vulkan/pvr_hw_pass.c @@ -274,7 +274,7 @@ static uint32_t pvr_get_accum_format_bitsize(VkFormat vk_format) return vk_format_get_blocksizebits(vk_format); if (!vk_format_has_stencil(vk_format)) - return pvr_get_pbe_accum_format_size_in_bytes(vk_format) * 8; + return pvr_get_pbe_accum_format_size_in_bytes(vk_format) * 8; return 0; } @@ -2430,23 +2430,27 @@ static uint32_t pvr_count_uses_in_list(uint32_t *attachments, return count; } -static uint32_t +static void pvr_count_uses_in_color_output_list(struct pvr_render_subpass *subpass, - uint32_t attach_idx) + uint32_t attach_idx, + uint32_t *color_output_count_out, + uint32_t *resolve_output_count_out) { - uint32_t count = 0U; + uint32_t resolve_count = 0U; + uint32_t color_count = 0U; for (uint32_t i = 0U; i < subpass->color_count; i++) { if (subpass->color_attachments[i] == attach_idx) { - count++; + color_count++; if (subpass->resolve_attachments && subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED) - count++; + resolve_count++; } } - return count; + *color_output_count_out = color_count; + *resolve_output_count_out = resolve_count; } void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc, @@ -2488,6 +2492,7 @@ VkResult pvr_create_renderpass_hwsetup( struct pvr_renderpass_hw_map *subpass_map; struct pvr_renderpass_hwsetup *hw_setup; struct pvr_renderpass_context *ctx; + bool requires_frag_pr = false; bool *surface_allocate; VkResult result; @@ -2587,21 +2592,35 @@ VkResult pvr_create_renderpass_hwsetup( /* Count the number of references to this attachment in subpasses. */ for (uint32_t j = 0U; j < pass->subpass_count; j++) { struct pvr_render_subpass *subpass = &pass->subpasses[j]; - const uint32_t color_output_uses = - pvr_count_uses_in_color_output_list(subpass, i); const uint32_t input_attachment_uses = pvr_count_uses_in_list(subpass->input_attachments, subpass->input_count, i); + uint32_t resolve_output_uses; + uint32_t color_output_uses; + uint32_t total_output_uses; - if (color_output_uses != 0U || input_attachment_uses != 0U) + pvr_count_uses_in_color_output_list(subpass, + i, + &color_output_uses, + &resolve_output_uses); + + total_output_uses = color_output_uses + resolve_output_uses; + + if (total_output_uses != 0U || input_attachment_uses != 0U) int_attach->last_read = j; int_attach->remaining_count += - color_output_uses + input_attachment_uses; + total_output_uses + input_attachment_uses; if ((uint32_t)subpass->depth_stencil_attachment == i) int_attach->remaining_count++; + + requires_frag_pr |= resolve_output_uses != 0; + /* TODO: Should this be checking the normal attachment store op? */ + requires_frag_pr |= color_output_uses != 0 && + pass->attachments[i].stencil_store_op != + VK_ATTACHMENT_STORE_OP_STORE; } if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { @@ -2691,6 +2710,9 @@ VkResult pvr_create_renderpass_hwsetup( /* Finalise the last in-progress render. */ result = pvr_close_render(device, ctx); + for (uint32_t i = 0; i < hw_setup->render_count; i++) + hw_setup->renders[i].requires_frag_pr = requires_frag_pr; + end_create_renderpass_hwsetup: if (result != VK_SUCCESS) { pvr_free_render(ctx); diff --git a/src/imagination/vulkan/pvr_hw_pass.h b/src/imagination/vulkan/pvr_hw_pass.h index 8f1eac7a38e..ffe2a4e91c7 100644 --- a/src/imagination/vulkan/pvr_hw_pass.h +++ b/src/imagination/vulkan/pvr_hw_pass.h @@ -255,6 +255,8 @@ struct pvr_renderpass_hwsetup_render { bool has_side_effects; struct pvr_load_op *load_op; + + bool requires_frag_pr; }; struct pvr_renderpass_hw_map {