diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index ae62ded4000..e6ca331d76b 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -1901,6 +1901,29 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader cmd_buffer->state.emitted_ps_epilog = ps_epilog; } +static void +radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, + const struct radv_shader *shader) +{ + uint64_t va = radv_shader_get_va(shader); + + radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8); + + radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2); + radeon_emit(cs, shader->config.rsrc1); + radeon_emit(cs, shader->config.rsrc2); + if (pdev->info.gfx_level >= GFX10) { + radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3); + } + + radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radv_get_compute_resource_limits(pdev, shader)); + + radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1])); + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2])); +} + static void radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) { @@ -6585,17 +6608,21 @@ static void radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compute_pipeline *pipeline) { struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + const struct radv_physical_device *pdev = radv_device_physical(device); if (pipeline == cmd_buffer->state.emitted_compute_pipeline) return; - assert(!pipeline->base.ctx_cs.cdw); + radeon_check_space(device->ws, cmd_buffer->cs, pdev->info.gfx_level >= GFX10 ? 19 : 16); + + if (pipeline->base.type == RADV_PIPELINE_COMPUTE) { + radv_emit_compute_shader(pdev, cmd_buffer->cs, cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]); + } else { + radv_emit_compute_shader(pdev, cmd_buffer->cs, cmd_buffer->state.rt_prolog); + } cmd_buffer->state.emitted_compute_pipeline = pipeline; - radeon_check_space(device->ws, cmd_buffer->cs, pipeline->base.cs.cdw); - radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw); - if (radv_device_fault_detection_enabled(device)) radv_save_pipeline(cmd_buffer, &pipeline->base); } diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c index 9647aeebfd7..6cbef4e5e31 100644 --- a/src/amd/vulkan/radv_pipeline_compute.c +++ b/src/amd/vulkan/radv_pipeline_compute.c @@ -37,7 +37,7 @@ #include "sid.h" #include "vk_format.h" -static uint32_t +uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs) { unsigned threads_per_threadgroup; @@ -95,53 +95,13 @@ radv_get_compute_pipeline_metadata(const struct radv_device *device, const struc } void -radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, - const struct radv_shader *shader) -{ - uint64_t va = radv_shader_get_va(shader); - - radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8); - - radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2); - radeon_emit(cs, shader->config.rsrc1); - radeon_emit(cs, shader->config.rsrc2); - if (pdev->info.gfx_level >= GFX10) { - radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3); - } - - radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radv_get_compute_resource_limits(pdev, shader)); - - radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0])); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1])); - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2])); -} - -static void -radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_pipeline *pipeline, - struct radv_shader *shader) -{ - const struct radv_physical_device *pdev = radv_device_physical(device); - struct radeon_cmdbuf *cs = &pipeline->base.cs; - - cs->reserved_dw = cs->max_dw = pdev->info.gfx_level >= GFX10 ? 19 : 16; - cs->buf = malloc(cs->max_dw * 4); - - radv_emit_compute_shader(pdev, cs, shader); - - assert(pipeline->base.cs.cdw <= pipeline->base.cs.max_dw); -} - -void -radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline, - const struct radv_pipeline_layout *layout, struct radv_shader *shader) +radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout, + struct radv_shader *shader) { pipeline->base.need_indirect_descriptor_sets |= radv_shader_need_indirect_descriptor_sets(shader); pipeline->base.push_constant_size = layout->push_constant_size; pipeline->base.dynamic_offset_count = layout->dynamic_offset_count; - - radv_compute_generate_pm4(device, pipeline, shader); } struct radv_shader * @@ -321,7 +281,7 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkC return result; } - radv_compute_pipeline_init(device, pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]); + radv_compute_pipeline_init(pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]); if (pipeline->base.create_flags & VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV) { const VkComputePipelineIndirectBufferInfoNV *indirect_buffer = diff --git a/src/amd/vulkan/radv_pipeline_compute.h b/src/amd/vulkan/radv_pipeline_compute.h index 2d0af7dab1f..ccdc78b9bb9 100644 --- a/src/amd/vulkan/radv_pipeline_compute.h +++ b/src/amd/vulkan/radv_pipeline_compute.h @@ -42,14 +42,13 @@ struct radv_compute_pipeline_metadata { uint64_t inline_push_const_mask; }; +uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs); + void radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline, struct radv_compute_pipeline_metadata *metadata); -void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs, - const struct radv_shader *shader); - -void radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline, - const struct radv_pipeline_layout *layout, struct radv_shader *shader); +void radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout, + struct radv_shader *shader); struct radv_shader *radv_compile_cs(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_shader_stage *cs_stage, bool keep_executable_info, diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 8552dd3e107..04258edc50f 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -1031,7 +1031,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTra compute_rt_stack_size(pCreateInfo, pipeline); compile_rt_prolog(device, pipeline); - radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout, pipeline->prolog); + radv_compute_pipeline_init(&pipeline->base, pipeline_layout, pipeline->prolog); } /* write shader VAs into group handles */