pvr: handle num workgroups in indirect compute

Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36412>
This commit is contained in:
Simon Perretta
2025-05-21 09:55:17 +01:00
committed by Marge Bot
parent c5cee9dfe4
commit 12979f2ba7
5 changed files with 72 additions and 20 deletions
+42 -20
View File
@@ -1766,6 +1766,7 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
uint32_t code_size = 0;
uint32_t temps_used = 0;
uint32_t doutw = 0;
uint32_t doutd = 0;
uint32_t barrier_ctrl_word = 0;
uint32_t barrier_ctrl_word2 = 0;
@@ -1774,7 +1775,7 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
* DOUTW for local, and two for global.
*/
uint32_t work_group_id_ctrl_words[2] = { 0 };
uint32_t num_work_groups_ctrl_words[2] = { 0 };
uint32_t num_work_groups_ctrl_words[3] = { 0 };
uint32_t local_id_ctrl_word = 0;
uint32_t local_input_register;
@@ -1900,6 +1901,12 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
pvr_pds_get_constants(&next_constant, 1, &data_size);
}
if (has_num_work_groups_regs) {
num_work_groups_ctrl_words[2] =
pvr_pds_get_constants(&next_constant, 1, &data_size);
program->num_workgroups_indirect_src_dma = num_work_groups_ctrl_words[2];
}
/* For DOUTW */
if (has_local_input_regs) {
local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
@@ -1921,6 +1928,9 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
program->num_workgroups_constant_offset_in_dwords[0];
program->num_workgroups_constant_offset_in_dwords[2] =
pvr_pds_get_constants(&next_constant, 1, &data_size);
program->num_workgroups_indirect_src =
pvr_pds_get_constants(&next_constant, 2, &data_size);
}
if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
@@ -2071,11 +2081,6 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
true,
dev_info);
/* If we don't want num work-groups Z then this is the last one.
*/
if (!has_num_work_groups_reg[2])
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
pvr_pds_write_constant32(buffer, num_work_groups_ctrl_words[0], doutw);
}
/* If we only want one of X or Y then handle them separately. */
@@ -2088,12 +2093,6 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
true,
dev_info);
/* If we don't want num work-groups Z then this is the last
* one.
*/
if (has_num_work_groups_reg[2])
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
pvr_pds_write_constant32(buffer,
num_work_groups_ctrl_words[0],
doutw);
@@ -2105,12 +2104,6 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
true,
dev_info);
/* If we don't want num work-groups Z then this is the last
* one.
*/
if (!has_num_work_groups_reg[2])
doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
pvr_pds_write_constant32(buffer,
num_work_groups_ctrl_words[0],
doutw);
@@ -2122,14 +2115,29 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
doutw = pvr_pds_encode_doutw_src1(
program->num_work_groups_regs[2],
PVR_PDS_DOUTW_LOWER32,
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
true,
dev_info);
pvr_pds_write_constant32(buffer, num_work_groups_ctrl_words[1], doutw);
}
if (has_num_work_groups_regs) {
/* This is done in cmd_buffer instead. */
/* doutd = 3 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
*/
doutd = 0;
doutd |= program->num_work_groups_regs[0]
<< PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
doutd |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
doutd |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
pvr_pds_write_constant32(buffer, num_work_groups_ctrl_words[2], doutd);
}
/* Handle the local IDs. */
if (has_local_input_reg[1] || has_local_input_reg[2]) {
uint32_t dest_reg;
@@ -2310,6 +2318,20 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
1)); /* SRC0 */
}
/* TODO: this is hardcoded to assume that all three num workgroup
* elements are used...
*/
if (has_num_work_groups_regs) {
APPEND(pvr_pds_encode_doutd(
/* cc */ 0,
/* END */ 0,
/* SRC1 */ num_work_groups_ctrl_words[2], /* DOUTD 32-bit Src1 */
/* SRC0 */ program->num_workgroups_indirect_src >> 1)); /* DOUTD
* 64-bit
* Src0.
*/
}
/* Issue the task to the USC. */
if (program->kick_usc && program->has_coefficient_update_task) {
APPEND(pvr_pds_encode_doutu(0, /* cc */
+2
View File
@@ -564,6 +564,8 @@ struct pvr_pds_compute_shader_program {
uint32_t base_workgroup_constant_offset_in_dwords[3];
uint32_t num_workgroups_constant_offset_in_dwords[3];
uint32_t num_workgroups_indirect_src;
uint32_t num_workgroups_indirect_src_dma;
bool kick_usc;
bool conditional_render;
+14
View File
@@ -4228,6 +4228,20 @@ static void pvr_compute_update_kernel(
}
if (uses_num_wgs) {
if (indirect_addr.addr) {
unsigned offset =
pipeline->num_workgroups_indirect_src_patching_offset;
uint64_t *pds_data64 =
pvr_bo_suballoc_get_map_addr(pds_data_upload.pvr_bo);
pds_data64[offset / 2] = indirect_addr.addr;
offset = pipeline->num_workgroups_indirect_src_dma_patching_offset;
pds_data[offset] |=
3 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
}
unsigned offset = pipeline->num_workgroups_data_patching_offset;
for (unsigned u = 0; u < PVR_WORKGROUP_DIMENSIONS; ++u) {
pds_data[offset + u] = global_workgroup_size[u];
+12
View File
@@ -844,6 +844,10 @@ static VkResult pvr_pds_compute_program_create_and_upload(
for (unsigned u = 0; u < PVR_WORKGROUP_DIMENSIONS; ++u) {
data_buffer[offset + u] = 0;
}
offset = program.num_workgroups_indirect_src;
data_buffer[offset] = 0;
data_buffer[offset + 1] = 0;
}
/* FIXME: Figure out the define for alignment of 16. */
@@ -874,9 +878,17 @@ static VkResult pvr_pds_compute_program_create_and_upload(
}
compute_pipeline->num_workgroups_data_patching_offset = ~0u;
compute_pipeline->num_workgroups_indirect_src_patching_offset = ~0u;
compute_pipeline->num_workgroups_indirect_src_dma_patching_offset = ~0u;
if (uses_num_wgs) {
compute_pipeline->num_workgroups_data_patching_offset =
program.num_workgroups_constant_offset_in_dwords[0];
compute_pipeline->num_workgroups_indirect_src_patching_offset =
program.num_workgroups_indirect_src;
compute_pipeline->num_workgroups_indirect_src_dma_patching_offset =
program.num_workgroups_indirect_src_dma;
}
compute_pipeline->pds_cs_program_info = (struct pvr_pds_info){
+2
View File
@@ -917,6 +917,8 @@ struct pvr_compute_pipeline {
uint32_t *pds_cs_data_section;
uint32_t base_workgroup_data_patching_offset;
uint32_t num_workgroups_data_patching_offset;
uint32_t num_workgroups_indirect_src_patching_offset;
uint32_t num_workgroups_indirect_src_dma_patching_offset;
};
struct pvr_graphics_pipeline {