radv: emit compute pipelines directly from the cmdbuf

Using this intermediate CS isn't really useful and it prevents us to
optimize register writes in the near future. This will also be removed
for graphics pipelines.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28977>
This commit is contained in:
Samuel Pitoiset
2024-04-29 15:06:26 +02:00
committed by Marge Bot
parent 72a73a6f8a
commit 8c4d0b287f
4 changed files with 40 additions and 54 deletions
+31 -4
View File
@@ -1901,6 +1901,29 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader
cmd_buffer->state.emitted_ps_epilog = ps_epilog;
}
static void
radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
const struct radv_shader *shader)
{
uint64_t va = radv_shader_get_va(shader);
radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
radeon_emit(cs, shader->config.rsrc1);
radeon_emit(cs, shader->config.rsrc2);
if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
}
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radv_get_compute_resource_limits(pdev, shader));
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
}
static void
radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
@@ -6585,17 +6608,21 @@ static void
radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compute_pipeline *pipeline)
{
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pipeline == cmd_buffer->state.emitted_compute_pipeline)
return;
assert(!pipeline->base.ctx_cs.cdw);
radeon_check_space(device->ws, cmd_buffer->cs, pdev->info.gfx_level >= GFX10 ? 19 : 16);
if (pipeline->base.type == RADV_PIPELINE_COMPUTE) {
radv_emit_compute_shader(pdev, cmd_buffer->cs, cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]);
} else {
radv_emit_compute_shader(pdev, cmd_buffer->cs, cmd_buffer->state.rt_prolog);
}
cmd_buffer->state.emitted_compute_pipeline = pipeline;
radeon_check_space(device->ws, cmd_buffer->cs, pipeline->base.cs.cdw);
radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw);
if (radv_device_fault_detection_enabled(device))
radv_save_pipeline(cmd_buffer, &pipeline->base);
}
+4 -44
View File
@@ -37,7 +37,7 @@
#include "sid.h"
#include "vk_format.h"
static uint32_t
uint32_t
radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs)
{
unsigned threads_per_threadgroup;
@@ -95,53 +95,13 @@ radv_get_compute_pipeline_metadata(const struct radv_device *device, const struc
}
void
radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
const struct radv_shader *shader)
{
uint64_t va = radv_shader_get_va(shader);
radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
radeon_emit(cs, shader->config.rsrc1);
radeon_emit(cs, shader->config.rsrc2);
if (pdev->info.gfx_level >= GFX10) {
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
}
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radv_get_compute_resource_limits(pdev, shader));
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
}
static void
radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
struct radv_shader *shader)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radeon_cmdbuf *cs = &pipeline->base.cs;
cs->reserved_dw = cs->max_dw = pdev->info.gfx_level >= GFX10 ? 19 : 16;
cs->buf = malloc(cs->max_dw * 4);
radv_emit_compute_shader(pdev, cs, shader);
assert(pipeline->base.cs.cdw <= pipeline->base.cs.max_dw);
}
void
radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
const struct radv_pipeline_layout *layout, struct radv_shader *shader)
radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout,
struct radv_shader *shader)
{
pipeline->base.need_indirect_descriptor_sets |= radv_shader_need_indirect_descriptor_sets(shader);
pipeline->base.push_constant_size = layout->push_constant_size;
pipeline->base.dynamic_offset_count = layout->dynamic_offset_count;
radv_compute_generate_pm4(device, pipeline, shader);
}
struct radv_shader *
@@ -321,7 +281,7 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkC
return result;
}
radv_compute_pipeline_init(device, pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
radv_compute_pipeline_init(pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
if (pipeline->base.create_flags & VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV) {
const VkComputePipelineIndirectBufferInfoNV *indirect_buffer =
+4 -5
View File
@@ -42,14 +42,13 @@ struct radv_compute_pipeline_metadata {
uint64_t inline_push_const_mask;
};
uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs);
void radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline,
struct radv_compute_pipeline_metadata *metadata);
void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
const struct radv_shader *shader);
void radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
const struct radv_pipeline_layout *layout, struct radv_shader *shader);
void radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout,
struct radv_shader *shader);
struct radv_shader *radv_compile_cs(struct radv_device *device, struct vk_pipeline_cache *cache,
struct radv_shader_stage *cs_stage, bool keep_executable_info,
+1 -1
View File
@@ -1031,7 +1031,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTra
compute_rt_stack_size(pCreateInfo, pipeline);
compile_rt_prolog(device, pipeline);
radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout, pipeline->prolog);
radv_compute_pipeline_init(&pipeline->base, pipeline_layout, pipeline->prolog);
}
/* write shader VAs into group handles */