radv: emit compute pipelines directly from the cmdbuf
Using this intermediate CS isn't really useful and it prevents us to optimize register writes in the near future. This will also be removed for graphics pipelines. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28977>
This commit is contained in:
committed by
Marge Bot
parent
72a73a6f8a
commit
8c4d0b287f
@@ -1901,6 +1901,29 @@ radv_emit_ps_epilog_state(struct radv_cmd_buffer *cmd_buffer, struct radv_shader
|
||||
cmd_buffer->state.emitted_ps_epilog = ps_epilog;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
|
||||
const struct radv_shader *shader)
|
||||
{
|
||||
uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
|
||||
radeon_emit(cs, shader->config.rsrc1);
|
||||
radeon_emit(cs, shader->config.rsrc2);
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radv_get_compute_resource_limits(pdev, shader));
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
@@ -6585,17 +6608,21 @@ static void
|
||||
radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compute_pipeline *pipeline)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pipeline == cmd_buffer->state.emitted_compute_pipeline)
|
||||
return;
|
||||
|
||||
assert(!pipeline->base.ctx_cs.cdw);
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, pdev->info.gfx_level >= GFX10 ? 19 : 16);
|
||||
|
||||
if (pipeline->base.type == RADV_PIPELINE_COMPUTE) {
|
||||
radv_emit_compute_shader(pdev, cmd_buffer->cs, cmd_buffer->state.shaders[MESA_SHADER_COMPUTE]);
|
||||
} else {
|
||||
radv_emit_compute_shader(pdev, cmd_buffer->cs, cmd_buffer->state.rt_prolog);
|
||||
}
|
||||
|
||||
cmd_buffer->state.emitted_compute_pipeline = pipeline;
|
||||
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, pipeline->base.cs.cdw);
|
||||
radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw);
|
||||
|
||||
if (radv_device_fault_detection_enabled(device))
|
||||
radv_save_pipeline(cmd_buffer, &pipeline->base);
|
||||
}
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
#include "sid.h"
|
||||
#include "vk_format.h"
|
||||
|
||||
static uint32_t
|
||||
uint32_t
|
||||
radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs)
|
||||
{
|
||||
unsigned threads_per_threadgroup;
|
||||
@@ -95,53 +95,13 @@ radv_get_compute_pipeline_metadata(const struct radv_device *device, const struc
|
||||
}
|
||||
|
||||
void
|
||||
radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
|
||||
const struct radv_shader *shader)
|
||||
{
|
||||
uint64_t va = radv_shader_get_va(shader);
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8);
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
|
||||
radeon_emit(cs, shader->config.rsrc1);
|
||||
radeon_emit(cs, shader->config.rsrc2);
|
||||
if (pdev->info.gfx_level >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
|
||||
}
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, radv_get_compute_resource_limits(pdev, shader));
|
||||
|
||||
radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
|
||||
radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
|
||||
}
|
||||
|
||||
static void
|
||||
radv_compute_generate_pm4(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
|
||||
struct radv_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radeon_cmdbuf *cs = &pipeline->base.cs;
|
||||
|
||||
cs->reserved_dw = cs->max_dw = pdev->info.gfx_level >= GFX10 ? 19 : 16;
|
||||
cs->buf = malloc(cs->max_dw * 4);
|
||||
|
||||
radv_emit_compute_shader(pdev, cs, shader);
|
||||
|
||||
assert(pipeline->base.cs.cdw <= pipeline->base.cs.max_dw);
|
||||
}
|
||||
|
||||
void
|
||||
radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
|
||||
const struct radv_pipeline_layout *layout, struct radv_shader *shader)
|
||||
radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout,
|
||||
struct radv_shader *shader)
|
||||
{
|
||||
pipeline->base.need_indirect_descriptor_sets |= radv_shader_need_indirect_descriptor_sets(shader);
|
||||
|
||||
pipeline->base.push_constant_size = layout->push_constant_size;
|
||||
pipeline->base.dynamic_offset_count = layout->dynamic_offset_count;
|
||||
|
||||
radv_compute_generate_pm4(device, pipeline, shader);
|
||||
}
|
||||
|
||||
struct radv_shader *
|
||||
@@ -321,7 +281,7 @@ radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkC
|
||||
return result;
|
||||
}
|
||||
|
||||
radv_compute_pipeline_init(device, pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
|
||||
radv_compute_pipeline_init(pipeline, pipeline_layout, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
|
||||
|
||||
if (pipeline->base.create_flags & VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV) {
|
||||
const VkComputePipelineIndirectBufferInfoNV *indirect_buffer =
|
||||
|
||||
@@ -42,14 +42,13 @@ struct radv_compute_pipeline_metadata {
|
||||
uint64_t inline_push_const_mask;
|
||||
};
|
||||
|
||||
uint32_t radv_get_compute_resource_limits(const struct radv_physical_device *pdev, const struct radv_shader *cs);
|
||||
|
||||
void radv_get_compute_pipeline_metadata(const struct radv_device *device, const struct radv_compute_pipeline *pipeline,
|
||||
struct radv_compute_pipeline_metadata *metadata);
|
||||
|
||||
void radv_emit_compute_shader(const struct radv_physical_device *pdev, struct radeon_cmdbuf *cs,
|
||||
const struct radv_shader *shader);
|
||||
|
||||
void radv_compute_pipeline_init(const struct radv_device *device, struct radv_compute_pipeline *pipeline,
|
||||
const struct radv_pipeline_layout *layout, struct radv_shader *shader);
|
||||
void radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout,
|
||||
struct radv_shader *shader);
|
||||
|
||||
struct radv_shader *radv_compile_cs(struct radv_device *device, struct vk_pipeline_cache *cache,
|
||||
struct radv_shader_stage *cs_stage, bool keep_executable_info,
|
||||
|
||||
@@ -1031,7 +1031,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTra
|
||||
compute_rt_stack_size(pCreateInfo, pipeline);
|
||||
compile_rt_prolog(device, pipeline);
|
||||
|
||||
radv_compute_pipeline_init(device, &pipeline->base, pipeline_layout, pipeline->prolog);
|
||||
radv_compute_pipeline_init(&pipeline->base, pipeline_layout, pipeline->prolog);
|
||||
}
|
||||
|
||||
/* write shader VAs into group handles */
|
||||
|
||||
Reference in New Issue
Block a user