anv: use stage mask to deduce cs/pb-stall requirements
When flushing the render target cache for future operations, we need a stall at pixel scoreboard. We likely didn't see any issue until now because a change in render target added the pb-stall. When using a 2 compute shaders with the following pattern : vkCmdDispatch() vkCmdPipelineBarrier() ImageBarrier with (src|dst)AccessMask=0 & identical layout vkCmdDispatch() we should ensure that the first dispatch is completed before executing the second one, otherwise they can race to on resource accesses. This fixes failures in some new CTS tests. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: mesa-stable Reviewed-by: Ivan Briano <ivan.briano@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31676>
This commit is contained in:
committed by
Marge Bot
parent
aabadb30fc
commit
ea2bbe3271
@@ -4238,6 +4238,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
|
||||
VkAccessFlags2 src_flags = 0;
|
||||
VkAccessFlags2 dst_flags = 0;
|
||||
|
||||
VkPipelineStageFlags2 src_stages = 0;
|
||||
|
||||
#if GFX_VER < 20
|
||||
bool apply_sparse_flushes = false;
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
@@ -4251,6 +4253,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
|
||||
src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask;
|
||||
dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask;
|
||||
|
||||
src_stages |= dep_info->pMemoryBarriers[i].srcStageMask;
|
||||
|
||||
/* Shader writes to buffers that could then be written by a transfer
|
||||
* command (including queries).
|
||||
*/
|
||||
@@ -4283,6 +4287,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
|
||||
src_flags |= buf_barrier->srcAccessMask;
|
||||
dst_flags |= buf_barrier->dstAccessMask;
|
||||
|
||||
src_stages |= buf_barrier->srcStageMask;
|
||||
|
||||
/* Shader writes to buffers that could then be written by a transfer
|
||||
* command (including queries).
|
||||
*/
|
||||
@@ -4313,6 +4319,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
|
||||
src_flags |= img_barrier->srcAccessMask;
|
||||
dst_flags |= img_barrier->dstAccessMask;
|
||||
|
||||
src_stages |= img_barrier->srcStageMask;
|
||||
|
||||
ANV_FROM_HANDLE(anv_image, image, img_barrier->image);
|
||||
const VkImageSubresourceRange *range = &img_barrier->subresourceRange;
|
||||
|
||||
@@ -4420,6 +4428,69 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer,
|
||||
anv_pipe_flush_bits_for_access_flags(cmd_buffer, src_flags) |
|
||||
anv_pipe_invalidate_bits_for_access_flags(cmd_buffer, dst_flags);
|
||||
|
||||
/* What stage require a stall at pixel scoreboard */
|
||||
VkPipelineStageFlags2 pb_stall_stages =
|
||||
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
|
||||
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
|
||||
VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT |
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
||||
if (anv_cmd_buffer_is_render_queue(cmd_buffer)) {
|
||||
/* On a render queue, the following stages can also use a pixel shader.
|
||||
*/
|
||||
pb_stall_stages |=
|
||||
VK_PIPELINE_STAGE_2_TRANSFER_BIT |
|
||||
VK_PIPELINE_STAGE_2_RESOLVE_BIT |
|
||||
VK_PIPELINE_STAGE_2_BLIT_BIT |
|
||||
VK_PIPELINE_STAGE_2_CLEAR_BIT;
|
||||
}
|
||||
VkPipelineStageFlags2 cs_stall_stages =
|
||||
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
|
||||
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
|
||||
VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR |
|
||||
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT;
|
||||
if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) {
|
||||
/* On a compute queue, the following stages can also use a compute
|
||||
* shader.
|
||||
*/
|
||||
cs_stall_stages |=
|
||||
VK_PIPELINE_STAGE_2_TRANSFER_BIT |
|
||||
VK_PIPELINE_STAGE_2_RESOLVE_BIT |
|
||||
VK_PIPELINE_STAGE_2_BLIT_BIT |
|
||||
VK_PIPELINE_STAGE_2_CLEAR_BIT;
|
||||
} else if (anv_cmd_buffer_is_render_queue(cmd_buffer) &&
|
||||
cmd_buffer->state.current_pipeline == GPGPU) {
|
||||
/* In GPGPU mode, the render queue can also use a compute shader for
|
||||
* transfer operations.
|
||||
*/
|
||||
cs_stall_stages |= VK_PIPELINE_STAGE_2_TRANSFER_BIT;
|
||||
}
|
||||
|
||||
/* Prior to Gfx20, we can restrict pb-stall/cs-stall to some pipeline
|
||||
* modes. Gfx20 doesn't do pipeline switches so we have to assume the worse
|
||||
* case.
|
||||
*/
|
||||
const bool needs_pb_stall =
|
||||
anv_cmd_buffer_is_render_queue(cmd_buffer) &&
|
||||
#if GFX_VER < 20
|
||||
cmd_buffer->state.current_pipeline == _3D &&
|
||||
#endif
|
||||
(src_stages & pb_stall_stages);
|
||||
if (needs_pb_stall) {
|
||||
bits |= GFX_VERx10 >= 125 ?
|
||||
ANV_PIPE_PSS_STALL_SYNC_BIT :
|
||||
ANV_PIPE_STALL_AT_SCOREBOARD_BIT;
|
||||
}
|
||||
const bool needs_cs_stall =
|
||||
anv_cmd_buffer_is_render_or_compute_queue(cmd_buffer) &&
|
||||
#if GFX_VER < 20
|
||||
cmd_buffer->state.current_pipeline == GPGPU &&
|
||||
#endif
|
||||
(src_stages & cs_stall_stages);
|
||||
if (needs_cs_stall)
|
||||
bits |= ANV_PIPE_CS_STALL_BIT;
|
||||
|
||||
#if GFX_VER < 20
|
||||
/* Our HW implementation of the sparse feature lives in the GAM unit
|
||||
* (interface between all the GPU caches and external memory). As a result
|
||||
|
||||
Reference in New Issue
Block a user