From 46e2cc5d4cef59ff4a67e1c3b1ab46a932a8ed57 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Tue, 17 Jan 2023 10:50:45 -0500 Subject: [PATCH] radv: add some graphics pipeline hints to optimize pipeline bind this is a costly function, and we want to avoid loading random struct data as much as possible these struct members aren't accessed anywhere else in the function, so eliminating access avoids some cpu overhead Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 9 ++++----- src/amd/vulkan/radv_pipeline.c | 5 +++++ src/amd/vulkan/radv_private.h | 6 ++++++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index e8ff9cf106f..1f0fc38af8a 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -6330,7 +6330,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline } /* Re-emit the provoking vertex mode state because the SGPR idx can be different. */ - if (graphics_pipeline->last_vgt_api_stage_locs[AC_UD_NGG_PROVOKING_VTX].sgpr_idx != -1) { + if (graphics_pipeline->has_pv_sgpr) { cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE; } @@ -6344,7 +6344,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline /* Re-emit the streamout buffers because the SGPR idx can be different and with NGG streamout * they always need to be emitted because a buffer size of 0 is used to disable streamout. */ - if (graphics_pipeline->last_vgt_api_stage_locs[AC_UD_STREAMOUT_BUFFERS].sgpr_idx != -1) { + if (graphics_pipeline->has_streamout) { cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; if (cmd_buffer->device->physical_device->use_ngg_streamout) { @@ -6358,8 +6358,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY; /* Re-emit the rasterization samples state because the SGPR idx can be different. */ - const struct radv_shader *ps = graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]; - if (ps->info.user_sgprs_locs.shader_data[AC_UD_PS_NUM_SAMPLES].sgpr_idx != -1) { + if (graphics_pipeline->has_dynamic_samples) { cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES; } @@ -6367,7 +6366,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline radv_bind_vs_input_state(cmd_buffer, graphics_pipeline); - if (ps->info.ps.needs_sample_positions) + if (graphics_pipeline->has_sample_positions) cmd_buffer->sample_positions_needed = true; if (graphics_pipeline->esgs_ring_size > cmd_buffer->esgs_ring_size_needed) diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 42c707eec34..1ea50c2dc1f 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -5047,6 +5047,11 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv pipeline->rast_prim = vgt_gs_out_prim_type; pipeline->last_vgt_api_stage_locs = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.user_sgprs_locs.shader_data; + pipeline->has_pv_sgpr = pipeline->last_vgt_api_stage_locs[AC_UD_NGG_PROVOKING_VTX].sgpr_idx != -1; + pipeline->has_streamout = pipeline->last_vgt_api_stage_locs[AC_UD_STREAMOUT_BUFFERS].sgpr_idx != -1; + pipeline->has_dynamic_samples = ps->info.user_sgprs_locs.shader_data[AC_UD_PS_NUM_SAMPLES].sgpr_idx != -1; + pipeline->has_sample_positions = ps->info.ps.needs_sample_positions; + pipeline->base.push_constant_size = pipeline_layout.push_constant_size; pipeline->base.dynamic_offset_count = pipeline_layout.dynamic_offset_count; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 8a13a30fa80..df67efcb3e5 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -2064,6 +2064,12 @@ struct radv_graphics_pipeline { /* Whether the pipeline uses NGG (GFX10+). */ bool is_ngg; bool has_ngg_culling; + /* shortcuts for pipeline bind */ + bool has_pv_sgpr; + bool has_streamout; + bool has_dynamic_samples; + bool has_sample_positions; + uint8_t vtx_emit_num; unsigned esgs_ring_size;