diff --git a/src/imagination/vulkan/pvr_blit.c b/src/imagination/vulkan/pvr_blit.c index 80ed1daea8d..288f6dffd40 100644 --- a/src/imagination/vulkan/pvr_blit.c +++ b/src/imagination/vulkan/pvr_blit.c @@ -25,6 +25,7 @@ #include #include +#include "pvr_clear.h" #include "pvr_csb.h" #include "pvr_private.h" #include "util/list.h" @@ -141,13 +142,414 @@ void pvr_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd); } +/** + * \brief Returns the maximum number of layers to clear starting from base_layer + * that contain or match the target rectangle. + * + * \param[in] target_rect The region which the clear should contain or + * match. + * \param[in] base_layer The layer index to start at. + * \param[in] clear_rect_count Amount of clear_rects + * \param[in] clear_rects Array of clear rects. + * + * \return Max number of layers that cover or match the target region. + */ +static uint32_t +pvr_get_max_layers_covering_target(VkRect2D target_rect, + uint32_t base_layer, + uint32_t clear_rect_count, + const VkClearRect *clear_rects) +{ + const int32_t target_x0 = target_rect.offset.x; + const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width; + const int32_t target_y0 = target_rect.offset.y; + const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height; + + uint32_t layer_count = 0; + + assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX); + assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX); + + for (uint32_t i = 0; i < clear_rect_count; i++) { + const VkClearRect *clear_rect = &clear_rects[i]; + const uint32_t max_layer = + clear_rect->baseArrayLayer + clear_rect->layerCount; + bool target_is_covered; + int32_t x0, x1; + int32_t y0, y1; + + if (clear_rect->baseArrayLayer == 0) + continue; + + assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <= + UINT32_MAX); + + /* Check for layer intersection. */ + if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer) + continue; + + x0 = clear_rect->rect.offset.x; + x1 = x0 + (int32_t)clear_rect->rect.extent.width; + y0 = clear_rect->rect.offset.y; + y1 = y0 + (int32_t)clear_rect->rect.extent.height; + + assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX); + assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <= + INT32_MAX); + + target_is_covered = x0 <= target_x0 && x1 >= target_x1; + target_is_covered &= y0 <= target_y0 && y1 >= target_y1; + + if (target_is_covered) + layer_count = MAX2(layer_count, max_layer - base_layer); + } + + return layer_count; +} + +/* Return true if vertex shader is required to output render target id to pick + * the texture array layer. + */ +static inline bool +pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info, + uint32_t rect_count, + const VkClearRect *rects) +{ + if (!PVR_HAS_FEATURE(dev_info, gs_rta_support)) + return false; + + for (uint32_t i = 0; i < rect_count; i++) { + if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1) + return true; + } + + return false; +} + +static inline uint32_t +pvr_clear_template_idx_from_aspect(VkImageAspectFlags aspect) +{ + switch (aspect) { + case VK_IMAGE_ASPECT_COLOR_BIT: + /* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019: + * + * "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not + * include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT" + * + */ + return PVR_STATIC_CLEAR_COLOR_BIT; + + case VK_IMAGE_ASPECT_DEPTH_BIT: + return PVR_STATIC_CLEAR_DEPTH_BIT; + + case VK_IMAGE_ASPECT_STENCIL_BIT: + return PVR_STATIC_CLEAR_STENCIL_BIT; + + case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: + return PVR_STATIC_CLEAR_DEPTH_BIT | PVR_STATIC_CLEAR_STENCIL_BIT; + + default: + unreachable("Invalid aspect mask for clear."); + return 0; + } +} + +static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer, + uint32_t attachment_count, + const VkClearAttachment *attachments, + uint32_t rect_count, + const VkClearRect *rects) +{ + const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass; + struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info; + const struct pvr_renderpass_hwsetup_subpass *hw_pass = + pvr_get_hw_subpass(pass, pass_info->subpass_idx); + struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx; + struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info; + bool z_replicate = hw_pass->z_replicate != -1; + uint32_t vs_output_size_in_bytes; + bool vs_has_rt_id_output; + + /* TODO: This function can be optimized so that most of the device memory + * gets allocated together in one go and then filled as needed. There might + * also be opportunities to reuse pds code and data segments. + */ + + assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS); + + pvr_reset_graphics_dirty_state(cmd_buffer, false); + + /* We'll be emitting to the control stream. */ + sub_cmd->empty_cmd = false; + + vs_has_rt_id_output = + pvr_clear_needs_rt_id_output(dev_info, rect_count, rects); + + /* 4 because we're expecting the USC to output X, Y, Z, and W. */ + vs_output_size_in_bytes = 4 * sizeof(uint32_t); + if (vs_has_rt_id_output) + vs_output_size_in_bytes += sizeof(uint32_t); + + for (uint32_t i = 0; i < attachment_count; i++) { + const VkClearAttachment *attachment = &attachments[i]; + struct pvr_pds_vertex_shader_program pds_program; + struct pvr_pds_upload pds_program_upload = { 0 }; + uint64_t current_base_array_layer = ~0; + VkResult result; + float depth; + + if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) { + pvr_finishme("Implement clear for color attachment."); + } else if (z_replicate && + attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { + pvr_finishme("Implement clear for depth/depth+stencil attachment on " + "z_replicate."); + } else { + struct pvr_static_clear_ppp_template template; + uint32_t template_idx; + struct pvr_bo *pvr_bo; + + template_idx = + pvr_clear_template_idx_from_aspect(attachment->aspectMask); + template = + cmd_buffer->device->static_clear_state.ppp_templates[template_idx]; + + if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { + template.config.ispa.sref = + attachment->clearValue.depthStencil.stencil; + } + + if (vs_has_rt_id_output) { + template.config.output_sel.rhw_pres = true; + template.config.output_sel.render_tgt_pres = true; + template.config.output_sel.vtxsize = 4 + 1; + } + + result = pvr_emit_ppp_from_template(&sub_cmd->control_stream, + &template, + &pvr_bo); + if (result != VK_SUCCESS) { + cmd_buffer->state.status = result; + return; + } + + list_add(&pvr_bo->link, &cmd_buffer->bo_list); + } + + if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) + depth = attachment->clearValue.depthStencil.depth; + else + depth = 1.0f; + + if (vs_has_rt_id_output) { + const struct pvr_device_static_clear_state *dev_clear_state = + &cmd_buffer->device->static_clear_state; + const struct pvr_bo *multi_layer_vert_bo = + dev_clear_state->usc_multi_layer_vertex_shader_bo; + + /* We can't use the device's passthrough pds program since it doesn't + * have iterate_instance_id enabled. We'll be uploading code sections + * per each clear rect. + */ + + /* TODO: See if we can allocate all the code section memory in one go. + * We'd need to make sure that changing instance_id_modifier doesn't + * change the code section size. + * Also check if we can reuse the same code segment for each rect. + * Seems like the instance_id_modifier is written into the data section + * and used by the pds ADD instruction that way instead of it being + * embedded into the code section. + */ + + pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program, + multi_layer_vert_bo); + } else { + /* We can reuse the device's code section but we'll need to upload data + * sections so initialize the program. + */ + pvr_pds_clear_vertex_shader_program_init_base( + &pds_program, + cmd_buffer->device->static_clear_state.usc_vertex_shader_bo); + + pds_program_upload.code_offset = + cmd_buffer->device->static_clear_state.pds.code_offset; + /* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe + * let's change its interface to make that clear and not set this? + */ + pds_program_upload.code_size = + cmd_buffer->device->static_clear_state.pds.code_size; + } + + for (uint32_t j = 0; j < rect_count; j++) { + struct pvr_pds_upload pds_program_data_upload; + const VkClearRect *clear_rect = &rects[j]; + struct pvr_bo *vertices_bo; + uint32_t *vdm_cs_buffer; + VkResult result; + + if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) && + (clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) { + pvr_finishme("Add deferred RTA clear."); + + if (clear_rect->baseArrayLayer != 0) + continue; + } + + /* TODO: Allocate all the buffers in one go before the loop, and add + * support to multi-alloc bo. + */ + result = pvr_clear_vertices_upload(cmd_buffer->device, + &clear_rect->rect, + depth, + &vertices_bo); + if (result != VK_SUCCESS) { + cmd_buffer->state.status = result; + return; + } + + list_add(&vertices_bo->link, &cmd_buffer->bo_list); + + if (vs_has_rt_id_output) { + if (current_base_array_layer != clear_rect->baseArrayLayer) { + const uint32_t base_array_layer = clear_rect->baseArrayLayer; + struct pvr_pds_upload pds_program_code_upload; + + result = + pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code( + &pds_program, + cmd_buffer, + base_array_layer, + &pds_program_code_upload); + if (result != VK_SUCCESS) { + cmd_buffer->state.status = result; + return; + } + + pds_program_upload.code_offset = + pds_program_code_upload.code_offset; + /* TODO: The code size doesn't get used by pvr_clear_vdm_state() + * maybe let's change its interface to make that clear and not + * set this? + */ + pds_program_upload.code_size = pds_program_code_upload.code_size; + + current_base_array_layer = base_array_layer; + } + + result = + pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data( + &pds_program, + cmd_buffer, + vertices_bo, + &pds_program_data_upload); + if (result != VK_SUCCESS) + return; + } else { + result = pvr_pds_clear_vertex_shader_program_create_and_upload_data( + &pds_program, + cmd_buffer, + vertices_bo, + &pds_program_data_upload); + if (result != VK_SUCCESS) + return; + } + + pds_program_upload.data_offset = pds_program_data_upload.data_offset; + pds_program_upload.data_size = pds_program_data_upload.data_size; + + vdm_cs_buffer = pvr_csb_alloc_dwords(&sub_cmd->control_stream, + PVR_CLEAR_VDM_STATE_DWORD_COUNT); + if (!vdm_cs_buffer) { + result = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); + cmd_buffer->state.status = result; + return; + } + + pvr_pack_clear_vdm_state(dev_info, + &pds_program_upload, + pds_program.temps_used, + 4, + vs_output_size_in_bytes, + clear_rect->layerCount, + vdm_cs_buffer); + } + } +} + void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments, uint32_t rectCount, const VkClearRect *pRects) { - assert(!"Unimplemented"); + PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer); + struct pvr_cmd_buffer_state *state = &cmd_buffer->state; + struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx; + + PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer); + assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS); + + /* TODO: There are some optimizations that can be made here: + * - For a full screen clear, update the clear values for the corresponding + * attachment index. + * - For a full screen color attachment clear, add its index to a load op + * override to add it to the background shader. This will elide any load + * op loads currently in the background shader as well as the usual + * frag kick for geometry clear. + */ + + /* If we have any depth/stencil clears, update the sub command depth/stencil + * modification and usage flags. + */ + if (state->depth_format != VK_FORMAT_UNDEFINED) { + uint32_t full_screen_clear_count; + bool has_stencil_clear = false; + bool has_depth_clear = false; + + for (uint32_t i = 0; i < attachmentCount; i++) { + const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask; + + if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) + has_stencil_clear = true; + + if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) + has_depth_clear = true; + + if (has_stencil_clear && has_depth_clear) + break; + } + + sub_cmd->modifies_stencil |= has_stencil_clear; + sub_cmd->modifies_depth |= has_depth_clear; + + /* We only care about clears that have a baseArrayLayer of 0 as any + * attachment clears we move to the background shader must apply to all of + * the attachment's sub resources. + */ + full_screen_clear_count = + pvr_get_max_layers_covering_target(state->render_pass_info.render_area, + 0, + rectCount, + pRects); + + if (full_screen_clear_count > 0) { + if (has_stencil_clear && + sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) { + sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER; + } + + if (has_depth_clear && + sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) { + sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER; + } + } + } + + pvr_clear_attachments(cmd_buffer, + attachmentCount, + pAttachments, + rectCount, + pRects); } void pvr_CmdResolveImage2KHR(VkCommandBuffer commandBuffer, diff --git a/src/imagination/vulkan/pvr_clear.c b/src/imagination/vulkan/pvr_clear.c index 5a6e55136ee..5795b031ea5 100644 --- a/src/imagination/vulkan/pvr_clear.c +++ b/src/imagination/vulkan/pvr_clear.c @@ -237,10 +237,10 @@ VkResult pvr_emit_ppp_from_template( * \param[out] pvr_bo_out BO upload object. * \return VK_SUCCESS if the upload succeeded. */ -static VkResult pvr_clear_vertices_upload(struct pvr_device *device, - const VkRect2D *rect, - float depth, - struct pvr_bo **const pvr_bo_out) +VkResult pvr_clear_vertices_upload(struct pvr_device *device, + const VkRect2D *rect, + float depth, + struct pvr_bo **const pvr_bo_out) { const float y1 = (float)(rect->offset.y + rect->extent.height); const float x1 = (float)(rect->offset.x + rect->extent.width); diff --git a/src/imagination/vulkan/pvr_clear.h b/src/imagination/vulkan/pvr_clear.h index f0c6ddd93e4..826ee7ba045 100644 --- a/src/imagination/vulkan/pvr_clear.h +++ b/src/imagination/vulkan/pvr_clear.h @@ -185,4 +185,9 @@ void pvr_pack_clear_vdm_state( uint32_t layer_count, uint32_t state_buffer[const static PVR_CLEAR_VDM_STATE_DWORD_COUNT]); +VkResult pvr_clear_vertices_upload(struct pvr_device *device, + const VkRect2D *rect, + float depth, + struct pvr_bo **const pvr_bo_out); + #endif /* PVR_CLEAR_H */ diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index c9ef4d12bcd..531a82d5b3a 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -1636,9 +1636,8 @@ VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer) return VK_SUCCESS; } -static void -pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer, - bool start_geom) +void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer, + bool start_geom) { struct vk_dynamic_graphics_state *const dynamic_state = &cmd_buffer->vk.dynamic_graphics_state; @@ -2165,7 +2164,7 @@ static VkResult pvr_init_render_targets(struct pvr_device *device, return VK_SUCCESS; } -static const struct pvr_renderpass_hwsetup_subpass * +const struct pvr_renderpass_hwsetup_subpass * pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass) { const struct pvr_renderpass_hw_map *map = diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index c1a0b9f4be9..5a4c277f44b 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -1586,6 +1586,12 @@ void pvr_device_destroy_compute_query_programs(struct pvr_device *device); VkResult pvr_add_query_program(struct pvr_cmd_buffer *cmd_buffer, const struct pvr_query_info *query_info); +void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer *const cmd_buffer, + bool start_geom); + +const struct pvr_renderpass_hwsetup_subpass * +pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass); + #define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \ VK_FROM_HANDLE(__pvr_type, __name, __handle)