radv: add some graphics pipeline hints to optimize pipeline bind

this is a costly function, and we want to avoid loading random struct data
as much as possible

these struct members aren't accessed anywhere else in the function, so eliminating
access avoids some cpu overhead

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20751>
This commit is contained in:
Mike Blumenkrantz
2023-01-17 10:50:45 -05:00
committed by Marge Bot
parent 8fc5d93060
commit 46e2cc5d4c
3 changed files with 15 additions and 5 deletions
+4 -5
View File
@@ -6330,7 +6330,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
}
/* Re-emit the provoking vertex mode state because the SGPR idx can be different. */
if (graphics_pipeline->last_vgt_api_stage_locs[AC_UD_NGG_PROVOKING_VTX].sgpr_idx != -1) {
if (graphics_pipeline->has_pv_sgpr) {
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PROVOKING_VERTEX_MODE;
}
@@ -6344,7 +6344,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
/* Re-emit the streamout buffers because the SGPR idx can be different and with NGG streamout
* they always need to be emitted because a buffer size of 0 is used to disable streamout.
*/
if (graphics_pipeline->last_vgt_api_stage_locs[AC_UD_STREAMOUT_BUFFERS].sgpr_idx != -1) {
if (graphics_pipeline->has_streamout) {
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
if (cmd_buffer->device->physical_device->use_ngg_streamout) {
@@ -6358,8 +6358,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_NGG_QUERY;
/* Re-emit the rasterization samples state because the SGPR idx can be different. */
const struct radv_shader *ps = graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT];
if (ps->info.user_sgprs_locs.shader_data[AC_UD_PS_NUM_SAMPLES].sgpr_idx != -1) {
if (graphics_pipeline->has_dynamic_samples) {
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZATION_SAMPLES;
}
@@ -6367,7 +6366,7 @@ radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipeline
radv_bind_vs_input_state(cmd_buffer, graphics_pipeline);
if (ps->info.ps.needs_sample_positions)
if (graphics_pipeline->has_sample_positions)
cmd_buffer->sample_positions_needed = true;
if (graphics_pipeline->esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
+5
View File
@@ -5047,6 +5047,11 @@ radv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv
pipeline->rast_prim = vgt_gs_out_prim_type;
pipeline->last_vgt_api_stage_locs = pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.user_sgprs_locs.shader_data;
pipeline->has_pv_sgpr = pipeline->last_vgt_api_stage_locs[AC_UD_NGG_PROVOKING_VTX].sgpr_idx != -1;
pipeline->has_streamout = pipeline->last_vgt_api_stage_locs[AC_UD_STREAMOUT_BUFFERS].sgpr_idx != -1;
pipeline->has_dynamic_samples = ps->info.user_sgprs_locs.shader_data[AC_UD_PS_NUM_SAMPLES].sgpr_idx != -1;
pipeline->has_sample_positions = ps->info.ps.needs_sample_positions;
pipeline->base.push_constant_size = pipeline_layout.push_constant_size;
pipeline->base.dynamic_offset_count = pipeline_layout.dynamic_offset_count;
+6
View File
@@ -2064,6 +2064,12 @@ struct radv_graphics_pipeline {
/* Whether the pipeline uses NGG (GFX10+). */
bool is_ngg;
bool has_ngg_culling;
/* shortcuts for pipeline bind */
bool has_pv_sgpr;
bool has_streamout;
bool has_dynamic_samples;
bool has_sample_positions;
uint8_t vtx_emit_num;
unsigned esgs_ring_size;