diff --git a/src/imagination/vulkan/pds/pvr_pds.c b/src/imagination/vulkan/pds/pvr_pds.c index 0906d995f30..44daff1321f 100644 --- a/src/imagination/vulkan/pds/pvr_pds.c +++ b/src/imagination/vulkan/pds/pvr_pds.c @@ -1766,6 +1766,7 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, uint32_t code_size = 0; uint32_t temps_used = 0; uint32_t doutw = 0; + uint32_t doutd = 0; uint32_t barrier_ctrl_word = 0; uint32_t barrier_ctrl_word2 = 0; @@ -1774,7 +1775,7 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, * DOUTW for local, and two for global. */ uint32_t work_group_id_ctrl_words[2] = { 0 }; - uint32_t num_work_groups_ctrl_words[2] = { 0 }; + uint32_t num_work_groups_ctrl_words[3] = { 0 }; uint32_t local_id_ctrl_word = 0; uint32_t local_input_register; @@ -1900,6 +1901,12 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, pvr_pds_get_constants(&next_constant, 1, &data_size); } + if (has_num_work_groups_regs) { + num_work_groups_ctrl_words[2] = + pvr_pds_get_constants(&next_constant, 1, &data_size); + program->num_workgroups_indirect_src_dma = num_work_groups_ctrl_words[2]; + } + /* For DOUTW */ if (has_local_input_regs) { local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size); @@ -1921,6 +1928,9 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, program->num_workgroups_constant_offset_in_dwords[0]; program->num_workgroups_constant_offset_in_dwords[2] = pvr_pds_get_constants(&next_constant, 1, &data_size); + + program->num_workgroups_indirect_src = + pvr_pds_get_constants(&next_constant, 2, &data_size); } if (gen_mode == PDS_GENERATE_DATA_SEGMENT) { @@ -2071,11 +2081,6 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, true, dev_info); - /* If we don't want num work-groups Z then this is the last one. - */ - if (!has_num_work_groups_reg[2]) - doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; - pvr_pds_write_constant32(buffer, num_work_groups_ctrl_words[0], doutw); } /* If we only want one of X or Y then handle them separately. */ @@ -2088,12 +2093,6 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, true, dev_info); - /* If we don't want num work-groups Z then this is the last - * one. - */ - if (has_num_work_groups_reg[2]) - doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; - pvr_pds_write_constant32(buffer, num_work_groups_ctrl_words[0], doutw); @@ -2105,12 +2104,6 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, true, dev_info); - /* If we don't want num work-groups Z then this is the last - * one. - */ - if (!has_num_work_groups_reg[2]) - doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN; - pvr_pds_write_constant32(buffer, num_work_groups_ctrl_words[0], doutw); @@ -2122,14 +2115,29 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, doutw = pvr_pds_encode_doutw_src1( program->num_work_groups_regs[2], PVR_PDS_DOUTW_LOWER32, - PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE | - PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN, + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE, true, dev_info); pvr_pds_write_constant32(buffer, num_work_groups_ctrl_words[1], doutw); } + if (has_num_work_groups_regs) { + /* This is done in cmd_buffer instead. */ + /* doutd = 3 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT; + */ + doutd = 0; + doutd |= program->num_work_groups_regs[0] + << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT; + + doutd |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED | + PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE; + + doutd |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN; + + pvr_pds_write_constant32(buffer, num_work_groups_ctrl_words[2], doutd); + } + /* Handle the local IDs. */ if (has_local_input_reg[1] || has_local_input_reg[2]) { uint32_t dest_reg; @@ -2310,6 +2318,20 @@ pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program, 1)); /* SRC0 */ } + /* TODO: this is hardcoded to assume that all three num workgroup + * elements are used... + */ + if (has_num_work_groups_regs) { + APPEND(pvr_pds_encode_doutd( + /* cc */ 0, + /* END */ 0, + /* SRC1 */ num_work_groups_ctrl_words[2], /* DOUTD 32-bit Src1 */ + /* SRC0 */ program->num_workgroups_indirect_src >> 1)); /* DOUTD + * 64-bit + * Src0. + */ + } + /* Issue the task to the USC. */ if (program->kick_usc && program->has_coefficient_update_task) { APPEND(pvr_pds_encode_doutu(0, /* cc */ diff --git a/src/imagination/vulkan/pds/pvr_pds.h b/src/imagination/vulkan/pds/pvr_pds.h index 63827c92bca..f930546320b 100644 --- a/src/imagination/vulkan/pds/pvr_pds.h +++ b/src/imagination/vulkan/pds/pvr_pds.h @@ -564,6 +564,8 @@ struct pvr_pds_compute_shader_program { uint32_t base_workgroup_constant_offset_in_dwords[3]; uint32_t num_workgroups_constant_offset_in_dwords[3]; + uint32_t num_workgroups_indirect_src; + uint32_t num_workgroups_indirect_src_dma; bool kick_usc; bool conditional_render; diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c index 845ad4377a0..f2c84416190 100644 --- a/src/imagination/vulkan/pvr_cmd_buffer.c +++ b/src/imagination/vulkan/pvr_cmd_buffer.c @@ -4228,6 +4228,20 @@ static void pvr_compute_update_kernel( } if (uses_num_wgs) { + if (indirect_addr.addr) { + unsigned offset = + pipeline->num_workgroups_indirect_src_patching_offset; + + uint64_t *pds_data64 = + pvr_bo_suballoc_get_map_addr(pds_data_upload.pvr_bo); + pds_data64[offset / 2] = indirect_addr.addr; + + offset = pipeline->num_workgroups_indirect_src_dma_patching_offset; + + pds_data[offset] |= + 3 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT; + } + unsigned offset = pipeline->num_workgroups_data_patching_offset; for (unsigned u = 0; u < PVR_WORKGROUP_DIMENSIONS; ++u) { pds_data[offset + u] = global_workgroup_size[u]; diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c index 691a490cf0e..417f1cf221f 100644 --- a/src/imagination/vulkan/pvr_pipeline.c +++ b/src/imagination/vulkan/pvr_pipeline.c @@ -844,6 +844,10 @@ static VkResult pvr_pds_compute_program_create_and_upload( for (unsigned u = 0; u < PVR_WORKGROUP_DIMENSIONS; ++u) { data_buffer[offset + u] = 0; } + + offset = program.num_workgroups_indirect_src; + data_buffer[offset] = 0; + data_buffer[offset + 1] = 0; } /* FIXME: Figure out the define for alignment of 16. */ @@ -874,9 +878,17 @@ static VkResult pvr_pds_compute_program_create_and_upload( } compute_pipeline->num_workgroups_data_patching_offset = ~0u; + compute_pipeline->num_workgroups_indirect_src_patching_offset = ~0u; + compute_pipeline->num_workgroups_indirect_src_dma_patching_offset = ~0u; if (uses_num_wgs) { compute_pipeline->num_workgroups_data_patching_offset = program.num_workgroups_constant_offset_in_dwords[0]; + + compute_pipeline->num_workgroups_indirect_src_patching_offset = + program.num_workgroups_indirect_src; + + compute_pipeline->num_workgroups_indirect_src_dma_patching_offset = + program.num_workgroups_indirect_src_dma; } compute_pipeline->pds_cs_program_info = (struct pvr_pds_info){ diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h index 0b3772dee0a..6ef78746b32 100644 --- a/src/imagination/vulkan/pvr_private.h +++ b/src/imagination/vulkan/pvr_private.h @@ -917,6 +917,8 @@ struct pvr_compute_pipeline { uint32_t *pds_cs_data_section; uint32_t base_workgroup_data_patching_offset; uint32_t num_workgroups_data_patching_offset; + uint32_t num_workgroups_indirect_src_patching_offset; + uint32_t num_workgroups_indirect_src_dma_patching_offset; }; struct pvr_graphics_pipeline {