diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 8d49ad48d28..689286a124d 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -4238,6 +4238,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags = 0; VkAccessFlags2 dst_flags = 0; + VkPipelineStageFlags2 src_stages = 0; + #if GFX_VER < 20 bool apply_sparse_flushes = false; struct anv_device *device = cmd_buffer->device; @@ -4251,6 +4253,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, src_flags |= dep_info->pMemoryBarriers[i].srcAccessMask; dst_flags |= dep_info->pMemoryBarriers[i].dstAccessMask; + src_stages |= dep_info->pMemoryBarriers[i].srcStageMask; + /* Shader writes to buffers that could then be written by a transfer * command (including queries). */ @@ -4283,6 +4287,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, src_flags |= buf_barrier->srcAccessMask; dst_flags |= buf_barrier->dstAccessMask; + src_stages |= buf_barrier->srcStageMask; + /* Shader writes to buffers that could then be written by a transfer * command (including queries). */ @@ -4313,6 +4319,8 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, src_flags |= img_barrier->srcAccessMask; dst_flags |= img_barrier->dstAccessMask; + src_stages |= img_barrier->srcStageMask; + ANV_FROM_HANDLE(anv_image, image, img_barrier->image); const VkImageSubresourceRange *range = &img_barrier->subresourceRange; @@ -4420,6 +4428,69 @@ cmd_buffer_barrier(struct anv_cmd_buffer *cmd_buffer, anv_pipe_flush_bits_for_access_flags(cmd_buffer, src_flags) | anv_pipe_invalidate_bits_for_access_flags(cmd_buffer, dst_flags); + /* What stage require a stall at pixel scoreboard */ + VkPipelineStageFlags2 pb_stall_stages = + VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + if (anv_cmd_buffer_is_render_queue(cmd_buffer)) { + /* On a render queue, the following stages can also use a pixel shader. + */ + pb_stall_stages |= + VK_PIPELINE_STAGE_2_TRANSFER_BIT | + VK_PIPELINE_STAGE_2_RESOLVE_BIT | + VK_PIPELINE_STAGE_2_BLIT_BIT | + VK_PIPELINE_STAGE_2_CLEAR_BIT; + } + VkPipelineStageFlags2 cs_stall_stages = + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | + VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR | + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + if (anv_cmd_buffer_is_compute_queue(cmd_buffer)) { + /* On a compute queue, the following stages can also use a compute + * shader. + */ + cs_stall_stages |= + VK_PIPELINE_STAGE_2_TRANSFER_BIT | + VK_PIPELINE_STAGE_2_RESOLVE_BIT | + VK_PIPELINE_STAGE_2_BLIT_BIT | + VK_PIPELINE_STAGE_2_CLEAR_BIT; + } else if (anv_cmd_buffer_is_render_queue(cmd_buffer) && + cmd_buffer->state.current_pipeline == GPGPU) { + /* In GPGPU mode, the render queue can also use a compute shader for + * transfer operations. + */ + cs_stall_stages |= VK_PIPELINE_STAGE_2_TRANSFER_BIT; + } + + /* Prior to Gfx20, we can restrict pb-stall/cs-stall to some pipeline + * modes. Gfx20 doesn't do pipeline switches so we have to assume the worse + * case. + */ + const bool needs_pb_stall = + anv_cmd_buffer_is_render_queue(cmd_buffer) && +#if GFX_VER < 20 + cmd_buffer->state.current_pipeline == _3D && +#endif + (src_stages & pb_stall_stages); + if (needs_pb_stall) { + bits |= GFX_VERx10 >= 125 ? + ANV_PIPE_PSS_STALL_SYNC_BIT : + ANV_PIPE_STALL_AT_SCOREBOARD_BIT; + } + const bool needs_cs_stall = + anv_cmd_buffer_is_render_or_compute_queue(cmd_buffer) && +#if GFX_VER < 20 + cmd_buffer->state.current_pipeline == GPGPU && +#endif + (src_stages & cs_stall_stages); + if (needs_cs_stall) + bits |= ANV_PIPE_CS_STALL_BIT; + #if GFX_VER < 20 /* Our HW implementation of the sparse feature lives in the GAM unit * (interface between all the GPU caches and external memory). As a result