From 8c4d0b287fa007f33cb11df2011ab23098fe9bc8 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Mon, 29 Apr 2024 15:06:26 +0200 Subject: [PATCH] radv: emit compute pipelines directly from the cmdbuf Using this intermediate CS isn't really useful and it prevents us to optimize register writes in the near future. This will also be removed for graphics pipelines. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 35 ++++++++++++++++--- src/amd/vulkan/radv_pipeline_compute.c | 48 +++----------------------- src/amd/vulkan/radv_pipeline_compute.h | 9 +++-- src/amd/vulkan/radv_pipeline_rt.c | 2 +- 4 files changed, 40 insertions(+), 54 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index ae62ded4000..e6ca331d76b 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1901,6 +1901,29 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader cmd_buffer->state.emitted_ps_epilog = ps_epilog; } +static void +radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, + const struct radv_shader *shader) +{ + uint64_t va = radv_shader_get_va(shader); + + radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8); + + radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2); + radeon_emit(cs, shader->config.rsrc1); + radeon_emit(cs, shader->config.rsrc2); + if (pdev->info.gfx_level >= GFX10) { + radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3); + } + + radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radv_get_compute_resource_limits(pdev, shader)); + + radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2])); +} + static void radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) { @@ -6585,17 +6608,21 @@ static void radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compute_pipeline *pipeline) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); if (pipeline == cmd_buffer->state.emitted_compute_pipeline) return; - assert(!pipeline->base.ctx_cs.cdw); + radeon_check_space(device->ws, cmd_buffer->cs, pdev->info.gfx_level >= GFX10 ? 19 : 16); + + if (pipeline->base.type == RADV_PIPELINE_COMPUTE) { + radv_emit_compute_shader(pdev, cmd_buffer->cs, cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]); + } else { + radv_emit_compute_shader(pdev, cmd_buffer->cs, cmd_buffer->state.rt_prolog); + } cmd_buffer->state.emitted_compute_pipeline = pipeline; - radeon_check_space(device->ws, cmd_buffer->cs, pipeline->base.cs.cdw); - radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw); - if (radv_device_fault_detection_enabled(device)) radv_save_pipeline(cmd_buffer, &pipeline->base); } diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c index 9647aeebfd7..6cbef4e5e31 100644 --- a/src/amd/vulkan/radv_pipeline_compute.c +++ b/src/amd/vulkan/radv_pipeline_compute.c @@ -37,7 +37,7 @@ #include "sid.h" #include "vk_format.h" -static uint32_t +uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs) { unsigned threads_per_threadgroup; @@ -95,53 +95,13 @@ radv_get_compute_pipeline_metadata(const struct radv_device *device, const struc } void -radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, - const struct radv_shader *shader) -{ - uint64_t va = radv_shader_get_va(shader); - - radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8); - - radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2); - radeon_emit(cs, shader->config.rsrc1); - radeon_emit(cs, shader->config.rsrc2); - if (pdev->info.gfx_level >= GFX10) { - radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3); - } - - radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radv_get_compute_resource_limits(pdev, shader)); - - radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0])); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1])); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2])); -} - -static void -radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_pipeline *pipeline, - struct radv_shader *shader) -{ - const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = &pipeline->base.cs; - - cs->reserved_dw = cs->max_dw = pdev->info.gfx_level >= GFX10 ? 19 : 16; - cs->buf = malloc(cs->max_dw * 4); - - radv_emit_compute_shader(pdev, cs, shader); - - assert(pipeline->base.cs.cdw <= pipeline->base.cs.max_dw); -} - -void -radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline, - const struct radv_pipeline_layout *layout, struct radv_shader *shader) +radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout, + struct radv_shader *shader) { pipeline->base.need_indirect_descriptor_sets |= radv_shader_need_indirect_descriptor_sets(shader); pipeline->base.push_constant_size = layout->push_constant_size; pipeline->base.dynamic_offset_count = layout->dynamic_offset_count; - - radv_compute_generate_pm4(device, pipeline, shader); } struct radv_shader * @@ -321,7 +281,7 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkC return result; } - radv_compute_pipeline_init(device, pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]); + radv_compute_pipeline_init(pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]); if (pipeline->base.create_flags & VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV) { const VkComputePipelineIndirectBufferInfoNV *indirect_buffer = diff --git a/src/amd/vulkan/radv_pipeline_compute.h b/src/amd/vulkan/radv_pipeline_compute.h index 2d0af7dab1f..ccdc78b9bb9 100644 --- a/src/amd/vulkan/radv_pipeline_compute.h +++ b/src/amd/vulkan/radv_pipeline_compute.h @@ -42,14 +42,13 @@ struct radv_compute_pipeline_metadata { uint64_t inline_push_const_mask; }; +uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs); + void radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline, struct radv_compute_pipeline_metadata *metadata); -void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, - const struct radv_shader *shader); - -void radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline, - const struct radv_pipeline_layout *layout, struct radv_shader *shader); +void radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout, + struct radv_shader *shader); struct radv_shader *radv_compile_cs(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_shader_stage *cs_stage, bool keep_executable_info, diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 8552dd3e107..04258edc50f 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -1031,7 +1031,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTra compute_rt_stack_size(pCreateInfo, pipeline); compile_rt_prolog(device, pipeline); - radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout, pipeline->prolog); + radv_compute_pipeline_init(&pipeline->base, pipeline_layout, pipeline->prolog); } /* write shader VAs into group handles */