radeonsi: add si_emit_task_shader_pointers

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38044>
This commit is contained in:
Qiang Yu
2025-06-04 16:24:28 +08:00
committed by Marge Bot
parent 66cd3f1b52
commit 963ad4bb4e
3 changed files with 97 additions and 58 deletions

View File

@@ -2015,10 +2015,12 @@ void si_shader_pointers_mark_dirty(struct si_context *sctx)
BITFIELD_RANGE(SI_DESCS_FIRST_SHADER, SI_NUM_DESCS - SI_DESCS_FIRST_SHADER);
sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0;
si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
sctx->graphics_internal_bindings_pointer_dirty = sctx->descriptors[SI_DESCS_INTERNAL].buffer != NULL;
sctx->compute_internal_bindings_pointer_dirty = sctx->descriptors[SI_DESCS_INTERNAL].buffer != NULL;
sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
sctx->graphics_internal_bindings_pointer_dirty =
sctx->compute_internal_bindings_pointer_dirty =
sctx->task_internal_bindings_pointer_dirty = sctx->descriptors[SI_DESCS_INTERNAL].buffer != NULL;
sctx->graphics_bindless_pointer_dirty =
sctx->compute_bindless_pointer_dirty =
sctx->task_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
sctx->compute_shaderbuf_sgprs_dirty = true;
sctx->compute_image_sgprs_dirty = true;
if (sctx->gfx_level >= GFX11)
@@ -2214,6 +2216,7 @@ static void si_upload_shader_descriptors(struct si_context *sctx,
if (descriptors_dirty & BITFIELD_BIT(SI_DESCS_INTERNAL)) {
sctx->graphics_internal_bindings_pointer_dirty = true;
sctx->compute_internal_bindings_pointer_dirty = true;
sctx->task_internal_bindings_pointer_dirty = true;
}
/* Upload descriptors. */
@@ -2357,71 +2360,87 @@ static void si_emit_graphics_shader_pointers(struct si_context *sctx, unsigned i
sctx->shader_pointers_dirty &= ~emit_desc_mask;
}
static void si_emit_compute_shader_pointer_packets(struct si_context *sctx,
struct radeon_cmdbuf *cs,
unsigned shader_pointers_dirty,
unsigned shader_pointers_mask,
bool *internal_bindings_pointer_dirty,
bool *bindless_pointer_dirty)
{
radeon_begin(cs);
/* Set shader pointers. */
if (sctx->gfx_level >= GFX12) {
gfx12_push_consecutive_shader_pointers(sctx, shader_pointers_mask,
R_00B900_COMPUTE_USER_DATA_0, compute);
if (*internal_bindings_pointer_dirty) {
gfx12_push_compute_sh_reg(R_00B900_COMPUTE_USER_DATA_0 +
sctx->descriptors[SI_DESCS_INTERNAL].shader_userdata_offset,
sctx->descriptors[SI_DESCS_INTERNAL].gpu_address);
*internal_bindings_pointer_dirty = false;
}
if (*bindless_pointer_dirty) {
gfx12_push_compute_sh_reg(R_00B900_COMPUTE_USER_DATA_0 +
sctx->bindless_descriptors.shader_userdata_offset,
sctx->bindless_descriptors.gpu_address);
*bindless_pointer_dirty = false;
}
} else if (sctx->screen->info.has_set_sh_pairs_packed) {
gfx11_push_consecutive_shader_pointers(sctx, shader_pointers_mask,
R_00B900_COMPUTE_USER_DATA_0, compute);
if (*internal_bindings_pointer_dirty) {
gfx11_push_compute_sh_reg(R_00B900_COMPUTE_USER_DATA_0 +
sctx->descriptors[SI_DESCS_INTERNAL].shader_userdata_offset,
sctx->descriptors[SI_DESCS_INTERNAL].gpu_address);
*internal_bindings_pointer_dirty = false;
}
if (*bindless_pointer_dirty) {
gfx11_push_compute_sh_reg(R_00B900_COMPUTE_USER_DATA_0 +
sctx->bindless_descriptors.shader_userdata_offset,
sctx->bindless_descriptors.gpu_address);
*bindless_pointer_dirty = false;
}
} else {
si_emit_consecutive_shader_pointers(sctx, shader_pointers_mask,
R_00B900_COMPUTE_USER_DATA_0, compute);
if (*internal_bindings_pointer_dirty) {
radeon_emit_one_32bit_pointer(&sctx->descriptors[SI_DESCS_INTERNAL],
R_00B900_COMPUTE_USER_DATA_0);
*internal_bindings_pointer_dirty = false;
}
if (*bindless_pointer_dirty) {
radeon_emit_one_32bit_pointer(&sctx->bindless_descriptors,
R_00B900_COMPUTE_USER_DATA_0);
*bindless_pointer_dirty = false;
}
}
radeon_end();
}
void si_emit_compute_shader_pointers(struct si_context *sctx)
{
/* This does not update internal bindings as that is not needed for compute shaders. */
unsigned descriptors_dirty = sctx->descriptors_dirty &
(BITFIELD_BIT(SI_DESCS_INTERNAL) | SI_DESCS_SHADER_MASK(COMPUTE));
unsigned shader_pointers_dirty = sctx->shader_pointers_dirty | descriptors_dirty;
si_upload_shader_descriptors(sctx, descriptors_dirty);
radeon_begin(&sctx->gfx_cs);
/* Set shader pointers. */
if (sctx->gfx_level >= GFX12) {
gfx12_push_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
R_00B900_COMPUTE_USER_DATA_0, compute);
if (sctx->compute_internal_bindings_pointer_dirty) {
gfx12_push_compute_sh_reg(R_00B900_COMPUTE_USER_DATA_0 +
sctx->descriptors[SI_DESCS_INTERNAL].shader_userdata_offset,
sctx->descriptors[SI_DESCS_INTERNAL].gpu_address);
sctx->compute_internal_bindings_pointer_dirty = false;
}
if (sctx->compute_bindless_pointer_dirty) {
gfx12_push_compute_sh_reg(R_00B900_COMPUTE_USER_DATA_0 +
sctx->bindless_descriptors.shader_userdata_offset,
sctx->bindless_descriptors.gpu_address);
sctx->compute_bindless_pointer_dirty = false;
}
} else if (sctx->screen->info.has_set_sh_pairs_packed) {
gfx11_push_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
R_00B900_COMPUTE_USER_DATA_0, compute);
if (sctx->compute_internal_bindings_pointer_dirty) {
gfx11_push_compute_sh_reg(R_00B900_COMPUTE_USER_DATA_0 +
sctx->descriptors[SI_DESCS_INTERNAL].shader_userdata_offset,
sctx->descriptors[SI_DESCS_INTERNAL].gpu_address);
sctx->compute_internal_bindings_pointer_dirty = false;
}
if (sctx->compute_bindless_pointer_dirty) {
gfx11_push_compute_sh_reg(R_00B900_COMPUTE_USER_DATA_0 +
sctx->bindless_descriptors.shader_userdata_offset,
sctx->bindless_descriptors.gpu_address);
sctx->compute_bindless_pointer_dirty = false;
}
} else {
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
R_00B900_COMPUTE_USER_DATA_0, compute);
if (sctx->compute_internal_bindings_pointer_dirty) {
radeon_emit_one_32bit_pointer(&sctx->descriptors[SI_DESCS_INTERNAL],
R_00B900_COMPUTE_USER_DATA_0);
sctx->compute_internal_bindings_pointer_dirty = false;
}
if (sctx->compute_bindless_pointer_dirty) {
radeon_emit_one_32bit_pointer(&sctx->bindless_descriptors,
R_00B900_COMPUTE_USER_DATA_0);
sctx->compute_bindless_pointer_dirty = false;
}
}
si_emit_compute_shader_pointer_packets(sctx, &sctx->gfx_cs, shader_pointers_dirty,
SI_DESCS_SHADER_MASK(COMPUTE),
&sctx->compute_internal_bindings_pointer_dirty,
&sctx->compute_bindless_pointer_dirty);
sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
radeon_begin(&sctx->gfx_cs);
/* Set shader buffer descriptors in user SGPRs. */
struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel;
unsigned num_shaderbufs = shader->cs_num_shaderbufs_in_user_sgprs;
@@ -2464,6 +2483,22 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
radeon_end();
}
void si_emit_task_shader_pointers(struct si_context *sctx)
{
unsigned descriptors_dirty = sctx->descriptors_dirty &
(BITFIELD_BIT(SI_DESCS_INTERNAL) | SI_DESCS_SHADER_MASK(TASK));
unsigned shader_pointers_dirty = sctx->shader_pointers_dirty | descriptors_dirty;
si_upload_shader_descriptors(sctx, descriptors_dirty);
si_emit_compute_shader_pointer_packets(sctx, sctx->gfx_cs.gang_cs, shader_pointers_dirty,
SI_DESCS_SHADER_MASK(TASK),
&sctx->task_internal_bindings_pointer_dirty,
&sctx->task_bindless_pointer_dirty);
sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(TASK);
}
/* BINDLESS */
static void si_init_bindless_descriptors(struct si_context *sctx, struct si_descriptors *desc,
@@ -2539,6 +2574,7 @@ static unsigned si_create_bindless_descriptor(struct si_context *sctx, uint32_t
/* Make sure to re-emit the shader pointers for all stages. */
sctx->graphics_bindless_pointer_dirty = true;
sctx->compute_bindless_pointer_dirty = true;
sctx->task_bindless_pointer_dirty = true;
si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
return desc_slot;

View File

@@ -1250,8 +1250,10 @@ struct si_context {
bool bindless_descriptors_dirty;
bool graphics_internal_bindings_pointer_dirty;
bool compute_internal_bindings_pointer_dirty;
bool task_internal_bindings_pointer_dirty;
bool graphics_bindless_pointer_dirty;
bool compute_bindless_pointer_dirty;
bool task_bindless_pointer_dirty;
bool gs_attribute_ring_pointer_dirty;
/* Allocated bindless handles */

View File

@@ -616,6 +616,7 @@ void si_set_user_data_base(struct si_context *sctx, unsigned shader, uint32_t ne
void si_shader_change_notify(struct si_context *sctx);
void si_update_needs_color_decompress_masks(struct si_context *sctx);
void si_emit_compute_shader_pointers(struct si_context *sctx);
void si_emit_task_shader_pointers(struct si_context *sctx);
void si_set_internal_const_buffer(struct si_context *sctx, uint slot,
const struct pipe_constant_buffer *input);
void si_set_internal_shader_buffer(struct si_context *sctx, uint slot,