pvr: fix spm-related renderpass hwr

Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Karmjit Mahil
2023-10-03 16:11:45 +01:00
committed by Marge Bot
parent a7ec9d7182
commit 5c420c940a
3 changed files with 61 additions and 32 deletions
+26 -21
View File
@@ -1429,6 +1429,13 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
VkResult result;
if (sub_cmd->barrier_store) {
/* Store to the SPM scratch buffer. */
/* The scratch buffer is always needed and allocated to avoid data loss in
* case SPM is hit so set the flag unconditionally.
*/
job->requires_spm_scratch_buffer = true;
/* There can only ever be one frag job running on the hardware at any one
* time, and a context switch is not allowed mid-tile, so instead of
* allocating a new scratch buffer we can reuse the SPM scratch buffer to
@@ -1474,6 +1481,12 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
typed_memcpy(job->pds_bgnd_reg_values,
spm_bgobj_state->pds_reg_values,
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
typed_memcpy(job->pds_pr_bgnd_reg_values,
spm_bgobj_state->pds_reg_values,
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
} else if (hw_render->load_op) {
const struct pvr_load_op *load_op = hw_render->load_op;
struct pvr_pds_upload load_op_program;
@@ -1497,27 +1510,19 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
job->pds_bgnd_reg_values);
}
/* TODO: In some cases a PR can be removed by storing to the color attachment
* and have the background object load directly from it instead of using the
* scratch buffer. In those cases we can also set this to "false" and avoid
* extra fw overhead.
*/
/* The scratch buffer is always needed and allocated to avoid data loss in
* case SPM is hit so set the flag unconditionally.
*/
job->requires_spm_scratch_buffer = true;
memcpy(job->pr_pbe_reg_words,
&framebuffer->spm_eot_state_per_render[0].pbe_reg_words,
sizeof(job->pbe_reg_words));
job->pr_pds_pixel_event_data_offset =
framebuffer->spm_eot_state_per_render[0].pixel_event_program_data_offset;
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
typed_memcpy(job->pds_pr_bgnd_reg_values,
spm_bgobj_state->pds_reg_values,
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
if (!hw_render->requires_frag_pr) {
memcpy(job->pr_pbe_reg_words,
job->pbe_reg_words,
sizeof(job->pbe_reg_words));
job->pr_pds_pixel_event_data_offset = job->pds_pixel_event_data_offset;
} else {
memcpy(job->pr_pbe_reg_words,
&framebuffer->spm_eot_state_per_render[0].pbe_reg_words,
sizeof(job->pbe_reg_words));
job->pr_pds_pixel_event_data_offset =
framebuffer->spm_eot_state_per_render[0]
.pixel_event_program_data_offset;
}
render_target = pvr_get_render_target(render_pass_info->pass,
framebuffer,
+33 -11
View File
@@ -274,7 +274,7 @@ static uint32_t pvr_get_accum_format_bitsize(VkFormat vk_format)
return vk_format_get_blocksizebits(vk_format);
if (!vk_format_has_stencil(vk_format))
return pvr_get_pbe_accum_format_size_in_bytes(vk_format) * 8;
return pvr_get_pbe_accum_format_size_in_bytes(vk_format) * 8;
return 0;
}
@@ -2430,23 +2430,27 @@ static uint32_t pvr_count_uses_in_list(uint32_t *attachments,
return count;
}
static uint32_t
static void
pvr_count_uses_in_color_output_list(struct pvr_render_subpass *subpass,
uint32_t attach_idx)
uint32_t attach_idx,
uint32_t *color_output_count_out,
uint32_t *resolve_output_count_out)
{
uint32_t count = 0U;
uint32_t resolve_count = 0U;
uint32_t color_count = 0U;
for (uint32_t i = 0U; i < subpass->color_count; i++) {
if (subpass->color_attachments[i] == attach_idx) {
count++;
color_count++;
if (subpass->resolve_attachments &&
subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED)
count++;
resolve_count++;
}
}
return count;
*color_output_count_out = color_count;
*resolve_output_count_out = resolve_count;
}
void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc,
@@ -2488,6 +2492,7 @@ VkResult pvr_create_renderpass_hwsetup(
struct pvr_renderpass_hw_map *subpass_map;
struct pvr_renderpass_hwsetup *hw_setup;
struct pvr_renderpass_context *ctx;
bool requires_frag_pr = false;
bool *surface_allocate;
VkResult result;
@@ -2587,21 +2592,35 @@ VkResult pvr_create_renderpass_hwsetup(
/* Count the number of references to this attachment in subpasses. */
for (uint32_t j = 0U; j < pass->subpass_count; j++) {
struct pvr_render_subpass *subpass = &pass->subpasses[j];
const uint32_t color_output_uses =
pvr_count_uses_in_color_output_list(subpass, i);
const uint32_t input_attachment_uses =
pvr_count_uses_in_list(subpass->input_attachments,
subpass->input_count,
i);
uint32_t resolve_output_uses;
uint32_t color_output_uses;
uint32_t total_output_uses;
if (color_output_uses != 0U || input_attachment_uses != 0U)
pvr_count_uses_in_color_output_list(subpass,
i,
&color_output_uses,
&resolve_output_uses);
total_output_uses = color_output_uses + resolve_output_uses;
if (total_output_uses != 0U || input_attachment_uses != 0U)
int_attach->last_read = j;
int_attach->remaining_count +=
color_output_uses + input_attachment_uses;
total_output_uses + input_attachment_uses;
if ((uint32_t)subpass->depth_stencil_attachment == i)
int_attach->remaining_count++;
requires_frag_pr |= resolve_output_uses != 0;
/* TODO: Should this be checking the normal attachment store op? */
requires_frag_pr |= color_output_uses != 0 &&
pass->attachments[i].stencil_store_op !=
VK_ATTACHMENT_STORE_OP_STORE;
}
if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
@@ -2691,6 +2710,9 @@ VkResult pvr_create_renderpass_hwsetup(
/* Finalise the last in-progress render. */
result = pvr_close_render(device, ctx);
for (uint32_t i = 0; i < hw_setup->render_count; i++)
hw_setup->renders[i].requires_frag_pr = requires_frag_pr;
end_create_renderpass_hwsetup:
if (result != VK_SUCCESS) {
pvr_free_render(ctx);
+2
View File
@@ -255,6 +255,8 @@ struct pvr_renderpass_hwsetup_render {
bool has_side_effects;
struct pvr_load_op *load_op;
bool requires_frag_pr;
};
struct pvr_renderpass_hw_map {