pvr: fix spm-related renderpass hwr
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com> Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
@@ -1429,6 +1429,13 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
||||
VkResult result;
|
||||
|
||||
if (sub_cmd->barrier_store) {
|
||||
/* Store to the SPM scratch buffer. */
|
||||
|
||||
/* The scratch buffer is always needed and allocated to avoid data loss in
|
||||
* case SPM is hit so set the flag unconditionally.
|
||||
*/
|
||||
job->requires_spm_scratch_buffer = true;
|
||||
|
||||
/* There can only ever be one frag job running on the hardware at any one
|
||||
* time, and a context switch is not allowed mid-tile, so instead of
|
||||
* allocating a new scratch buffer we can reuse the SPM scratch buffer to
|
||||
@@ -1474,6 +1481,12 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
||||
typed_memcpy(job->pds_bgnd_reg_values,
|
||||
spm_bgobj_state->pds_reg_values,
|
||||
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||
|
||||
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
|
||||
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||
typed_memcpy(job->pds_pr_bgnd_reg_values,
|
||||
spm_bgobj_state->pds_reg_values,
|
||||
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||
} else if (hw_render->load_op) {
|
||||
const struct pvr_load_op *load_op = hw_render->load_op;
|
||||
struct pvr_pds_upload load_op_program;
|
||||
@@ -1497,27 +1510,19 @@ static VkResult pvr_sub_cmd_gfx_job_init(const struct pvr_device_info *dev_info,
|
||||
job->pds_bgnd_reg_values);
|
||||
}
|
||||
|
||||
/* TODO: In some cases a PR can be removed by storing to the color attachment
|
||||
* and have the background object load directly from it instead of using the
|
||||
* scratch buffer. In those cases we can also set this to "false" and avoid
|
||||
* extra fw overhead.
|
||||
*/
|
||||
/* The scratch buffer is always needed and allocated to avoid data loss in
|
||||
* case SPM is hit so set the flag unconditionally.
|
||||
*/
|
||||
job->requires_spm_scratch_buffer = true;
|
||||
|
||||
memcpy(job->pr_pbe_reg_words,
|
||||
&framebuffer->spm_eot_state_per_render[0].pbe_reg_words,
|
||||
sizeof(job->pbe_reg_words));
|
||||
job->pr_pds_pixel_event_data_offset =
|
||||
framebuffer->spm_eot_state_per_render[0].pixel_event_program_data_offset;
|
||||
|
||||
STATIC_ASSERT(ARRAY_SIZE(job->pds_pr_bgnd_reg_values) ==
|
||||
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||
typed_memcpy(job->pds_pr_bgnd_reg_values,
|
||||
spm_bgobj_state->pds_reg_values,
|
||||
ARRAY_SIZE(spm_bgobj_state->pds_reg_values));
|
||||
if (!hw_render->requires_frag_pr) {
|
||||
memcpy(job->pr_pbe_reg_words,
|
||||
job->pbe_reg_words,
|
||||
sizeof(job->pbe_reg_words));
|
||||
job->pr_pds_pixel_event_data_offset = job->pds_pixel_event_data_offset;
|
||||
} else {
|
||||
memcpy(job->pr_pbe_reg_words,
|
||||
&framebuffer->spm_eot_state_per_render[0].pbe_reg_words,
|
||||
sizeof(job->pbe_reg_words));
|
||||
job->pr_pds_pixel_event_data_offset =
|
||||
framebuffer->spm_eot_state_per_render[0]
|
||||
.pixel_event_program_data_offset;
|
||||
}
|
||||
|
||||
render_target = pvr_get_render_target(render_pass_info->pass,
|
||||
framebuffer,
|
||||
|
||||
@@ -274,7 +274,7 @@ static uint32_t pvr_get_accum_format_bitsize(VkFormat vk_format)
|
||||
return vk_format_get_blocksizebits(vk_format);
|
||||
|
||||
if (!vk_format_has_stencil(vk_format))
|
||||
return pvr_get_pbe_accum_format_size_in_bytes(vk_format) * 8;
|
||||
return pvr_get_pbe_accum_format_size_in_bytes(vk_format) * 8;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2430,23 +2430,27 @@ static uint32_t pvr_count_uses_in_list(uint32_t *attachments,
|
||||
return count;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
static void
|
||||
pvr_count_uses_in_color_output_list(struct pvr_render_subpass *subpass,
|
||||
uint32_t attach_idx)
|
||||
uint32_t attach_idx,
|
||||
uint32_t *color_output_count_out,
|
||||
uint32_t *resolve_output_count_out)
|
||||
{
|
||||
uint32_t count = 0U;
|
||||
uint32_t resolve_count = 0U;
|
||||
uint32_t color_count = 0U;
|
||||
|
||||
for (uint32_t i = 0U; i < subpass->color_count; i++) {
|
||||
if (subpass->color_attachments[i] == attach_idx) {
|
||||
count++;
|
||||
color_count++;
|
||||
|
||||
if (subpass->resolve_attachments &&
|
||||
subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED)
|
||||
count++;
|
||||
resolve_count++;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
*color_output_count_out = color_count;
|
||||
*resolve_output_count_out = resolve_count;
|
||||
}
|
||||
|
||||
void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc,
|
||||
@@ -2488,6 +2492,7 @@ VkResult pvr_create_renderpass_hwsetup(
|
||||
struct pvr_renderpass_hw_map *subpass_map;
|
||||
struct pvr_renderpass_hwsetup *hw_setup;
|
||||
struct pvr_renderpass_context *ctx;
|
||||
bool requires_frag_pr = false;
|
||||
bool *surface_allocate;
|
||||
VkResult result;
|
||||
|
||||
@@ -2587,21 +2592,35 @@ VkResult pvr_create_renderpass_hwsetup(
|
||||
/* Count the number of references to this attachment in subpasses. */
|
||||
for (uint32_t j = 0U; j < pass->subpass_count; j++) {
|
||||
struct pvr_render_subpass *subpass = &pass->subpasses[j];
|
||||
const uint32_t color_output_uses =
|
||||
pvr_count_uses_in_color_output_list(subpass, i);
|
||||
const uint32_t input_attachment_uses =
|
||||
pvr_count_uses_in_list(subpass->input_attachments,
|
||||
subpass->input_count,
|
||||
i);
|
||||
uint32_t resolve_output_uses;
|
||||
uint32_t color_output_uses;
|
||||
uint32_t total_output_uses;
|
||||
|
||||
if (color_output_uses != 0U || input_attachment_uses != 0U)
|
||||
pvr_count_uses_in_color_output_list(subpass,
|
||||
i,
|
||||
&color_output_uses,
|
||||
&resolve_output_uses);
|
||||
|
||||
total_output_uses = color_output_uses + resolve_output_uses;
|
||||
|
||||
if (total_output_uses != 0U || input_attachment_uses != 0U)
|
||||
int_attach->last_read = j;
|
||||
|
||||
int_attach->remaining_count +=
|
||||
color_output_uses + input_attachment_uses;
|
||||
total_output_uses + input_attachment_uses;
|
||||
|
||||
if ((uint32_t)subpass->depth_stencil_attachment == i)
|
||||
int_attach->remaining_count++;
|
||||
|
||||
requires_frag_pr |= resolve_output_uses != 0;
|
||||
/* TODO: Should this be checking the normal attachment store op? */
|
||||
requires_frag_pr |= color_output_uses != 0 &&
|
||||
pass->attachments[i].stencil_store_op !=
|
||||
VK_ATTACHMENT_STORE_OP_STORE;
|
||||
}
|
||||
|
||||
if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
@@ -2691,6 +2710,9 @@ VkResult pvr_create_renderpass_hwsetup(
|
||||
/* Finalise the last in-progress render. */
|
||||
result = pvr_close_render(device, ctx);
|
||||
|
||||
for (uint32_t i = 0; i < hw_setup->render_count; i++)
|
||||
hw_setup->renders[i].requires_frag_pr = requires_frag_pr;
|
||||
|
||||
end_create_renderpass_hwsetup:
|
||||
if (result != VK_SUCCESS) {
|
||||
pvr_free_render(ctx);
|
||||
|
||||
@@ -255,6 +255,8 @@ struct pvr_renderpass_hwsetup_render {
|
||||
bool has_side_effects;
|
||||
|
||||
struct pvr_load_op *load_op;
|
||||
|
||||
bool requires_frag_pr;
|
||||
};
|
||||
|
||||
struct pvr_renderpass_hw_map {
|
||||
|
||||
Reference in New Issue
Block a user