diff --git a/src/amd/common/ac_shader_util.c b/src/amd/common/ac_shader_util.c index 8451fab4fdb..e48ac89d777 100644 --- a/src/amd/common/ac_shader_util.c +++ b/src/amd/common/ac_shader_util.c @@ -1209,20 +1209,6 @@ uint32_t ac_compute_num_tess_patches(const struct radeon_info *info, uint32_t nu return num_patches; } -uint32_t -ac_compute_tess_lds_size(const struct radeon_info *info, uint32_t lds_per_patch, uint32_t num_patches) -{ - unsigned lds_size = lds_per_patch * num_patches; - - /* The first vec4 is reserved for the tf0/1 shader message group vote. */ - if (info->gfx_level >= GFX11) - lds_size += AC_HS_MSG_VOTE_LDS_BYTES; - - assert(lds_size <= (info->gfx_level >= GFX9 ? 65536 : 32768)); - - return align(lds_size, info->lds_encode_granularity) / info->lds_encode_granularity; -} - uint32_t ac_apply_cu_en(uint32_t value, uint32_t clear_mask, unsigned value_shift, const struct radeon_info *info) { diff --git a/src/amd/common/ac_shader_util.h b/src/amd/common/ac_shader_util.h index 9414973397d..80f9ccd09d7 100644 --- a/src/amd/common/ac_shader_util.h +++ b/src/amd/common/ac_shader_util.h @@ -306,9 +306,6 @@ uint32_t ac_compute_num_tess_patches(const struct radeon_info *info, uint32_t nu uint32_t lds_per_patch, uint32_t wave_size, bool tess_uses_primid); -uint32_t ac_compute_tess_lds_size(const struct radeon_info *info, - uint32_t lds_per_patch, uint32_t num_patches); - uint32_t ac_apply_cu_en(uint32_t value, uint32_t clear_mask, unsigned value_shift, const struct radeon_info *info); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 8a96a79dd84..df57e140caf 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -3495,17 +3495,21 @@ radv_emit_patch_control_points(struct radv_cmd_buffer *cmd_buffer) * is dynamic. */ if (cmd_buffer->state.uses_dynamic_patch_control_points) { - /* Compute the number of patches. */ - cmd_buffer->state.tess_num_patches = radv_get_tcs_num_patches( - pdev, d->vk.ts.patch_control_points, tcs->info.tcs.tcs_vertices_out, vs->info.vs.num_linked_outputs, - tcs->info.tcs.num_lds_per_vertex_outputs, tcs->info.tcs.num_lds_per_patch_outputs, - tcs->info.tcs.num_linked_outputs, tcs->info.tcs.num_linked_patch_outputs); + struct shader_info tcs_info; - /* Compute the LDS size. */ - cmd_buffer->state.tess_lds_size = - radv_get_tess_lds_size(pdev, d->vk.ts.patch_control_points, tcs->info.tcs.tcs_vertices_out, - vs->info.vs.num_linked_outputs, cmd_buffer->state.tess_num_patches, - tcs->info.tcs.num_lds_per_vertex_outputs, tcs->info.tcs.num_lds_per_patch_outputs); + /* No other shader_info fields are needed. */ + tcs_info.tess.tcs_vertices_out = tcs->info.tcs.tcs_vertices_out; + /* These are only used to determine the LDS layout for TCS outputs. */ + tcs_info.outputs_read = tcs->info.tcs.tcs_outputs_read; + tcs_info.outputs_written = tcs->info.tcs.tcs_outputs_written; + tcs_info.patch_outputs_read = tcs->info.tcs.tcs_patch_outputs_read; + tcs_info.patch_outputs_written = tcs->info.tcs.tcs_patch_outputs_written; + + radv_get_tess_wg_info(pdev, &tcs_info, d->vk.ts.patch_control_points, + /* TODO: This should be only inputs in LDS (not VGPR inputs) to reduce LDS usage */ + vs->info.vs.num_linked_outputs, tcs->info.tcs.num_linked_outputs, + tcs->info.tcs.num_linked_patch_outputs, tcs->info.tcs.all_invocations_define_tess_levels, + &cmd_buffer->state.tess_num_patches, &cmd_buffer->state.tess_lds_size); } ls_hs_config = S_028B58_NUM_PATCHES(cmd_buffer->state.tess_num_patches) | diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 689b667624c..26e408e6a06 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -3567,43 +3567,17 @@ radv_get_user_sgpr(const struct radv_shader *shader, int idx) return offset ? ((offset - SI_SH_REG_OFFSET) >> 2) : 0; } -static uint32_t -radv_get_tess_patch_size(uint32_t tcs_num_input_vertices, uint32_t tcs_num_output_vertices, uint32_t tcs_num_inputs, - uint32_t tcs_num_lds_outputs, uint32_t tcs_num_lds_patch_outputs) +void +radv_get_tess_wg_info(const struct radv_physical_device *pdev, const struct shader_info *tcs_info, + unsigned tcs_num_input_vertices, unsigned tcs_num_lds_inputs, unsigned tcs_num_vram_outputs, + unsigned tcs_num_vram_patch_outputs, bool all_invocations_define_tess_levels, + unsigned *num_patches_per_wg, unsigned *hw_lds_size) { - const uint32_t input_vertex_size = get_tcs_input_vertex_stride(tcs_num_inputs); - const uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size; - const uint32_t lds_output_vertex_size = tcs_num_lds_outputs * 16; - const uint32_t lds_pervertex_output_patch_size = tcs_num_output_vertices * lds_output_vertex_size; - const uint32_t lds_output_patch_size = lds_pervertex_output_patch_size + tcs_num_lds_patch_outputs * 16; + const uint32_t lds_input_vertex_size = get_tcs_input_vertex_stride(tcs_num_lds_inputs); - return input_patch_size + lds_output_patch_size; -} - -uint32_t -radv_get_tcs_num_patches(const struct radv_physical_device *pdev, unsigned tcs_num_input_vertices, - unsigned tcs_num_output_vertices, unsigned tcs_num_inputs, unsigned tcs_num_lds_outputs, - unsigned tcs_num_lds_patch_outputs, unsigned tcs_num_vram_outputs, - unsigned tcs_num_vram_patch_outputs) -{ - const uint32_t lds_per_patch = radv_get_tess_patch_size( - tcs_num_input_vertices, tcs_num_output_vertices, tcs_num_inputs, tcs_num_lds_outputs, tcs_num_lds_patch_outputs); - const uint32_t vram_per_patch = radv_get_tess_patch_size(tcs_num_input_vertices, tcs_num_output_vertices, 0, - tcs_num_vram_outputs, tcs_num_vram_patch_outputs); - - return ac_compute_num_tess_patches(&pdev->info, tcs_num_input_vertices, tcs_num_output_vertices, vram_per_patch, - lds_per_patch, pdev->ge_wave_size, false); -} - -uint32_t -radv_get_tess_lds_size(const struct radv_physical_device *pdev, uint32_t tcs_num_input_vertices, - uint32_t tcs_num_output_vertices, uint32_t tcs_num_inputs, uint32_t tcs_num_patches, - uint32_t tcs_num_lds_outputs, uint32_t tcs_num_lds_patch_outputs) -{ - const uint32_t lds_per_patch = radv_get_tess_patch_size( - tcs_num_input_vertices, tcs_num_output_vertices, tcs_num_inputs, tcs_num_lds_outputs, tcs_num_lds_patch_outputs); - - return ac_compute_tess_lds_size(&pdev->info, lds_per_patch, tcs_num_patches); + ac_nir_compute_tess_wg_info(&pdev->info, tcs_info, pdev->ge_wave_size, false, all_invocations_define_tess_levels, + tcs_num_input_vertices, lds_input_vertex_size, tcs_num_vram_outputs, + tcs_num_vram_patch_outputs, num_patches_per_wg, hw_lds_size); } VkResult diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 403b6a45715..205f3c8cf15 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -664,14 +664,10 @@ get_tcs_input_vertex_stride(unsigned tcs_num_inputs) return stride; } -uint32_t radv_get_tcs_num_patches(const struct radv_physical_device *pdev, unsigned tcs_num_input_vertices, - unsigned tcs_num_output_vertices, unsigned tcs_num_inputs, - unsigned tcs_num_lds_outputs, unsigned tcs_num_lds_patch_outputs, - unsigned tcs_num_vram_outputs, unsigned tcs_num_vram_patch_outputs); - -uint32_t radv_get_tess_lds_size(const struct radv_physical_device *pdev, uint32_t tcs_num_input_vertices, - uint32_t tcs_num_output_vertices, uint32_t tcs_num_inputs, uint32_t tcs_num_patches, - uint32_t tcs_num_lds_outputs, uint32_t tcs_num_lds_patch_outputs); +void radv_get_tess_wg_info(const struct radv_physical_device *pdev, const struct shader_info *tcs_info, + unsigned tcs_num_input_vertices, unsigned tcs_num_lds_inputs, unsigned tcs_num_vram_outputs, + unsigned tcs_num_vram_patch_outputs, bool all_invocations_define_tess_levels, + unsigned *num_patches_per_wg, unsigned *hw_lds_size); void radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, const struct radv_graphics_state_key *gfx_state); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 4a9ed3ba113..002339ec2d8 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -633,17 +633,18 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info) { const struct radv_physical_device *pdev = radv_device_physical(device); + nir_tcs_info tcs_info; - const uint64_t tess_lvl_mask = VARYING_BIT_TESS_LEVEL_OUTER | VARYING_BIT_TESS_LEVEL_INNER; - const uint64_t per_vtx_out_mask = nir->info.outputs_read & nir->info.outputs_written & ~tess_lvl_mask; - const uint64_t tess_lvl_out_mask = nir->info.outputs_written & tess_lvl_mask; - const uint32_t per_patch_out_mask = nir->info.patch_outputs_read & nir->info.patch_outputs_written; + nir_gather_tcs_info(nir, &tcs_info, nir->info.tess._primitive_mode, nir->info.tess.spacing); - info->tcs.num_lds_per_vertex_outputs = util_bitcount64(per_vtx_out_mask); - info->tcs.num_lds_per_patch_outputs = util_bitcount64(tess_lvl_out_mask) + util_bitcount(per_patch_out_mask); + info->tcs.tcs_outputs_read = nir->info.outputs_read; + info->tcs.tcs_outputs_written = nir->info.outputs_written; + info->tcs.tcs_patch_outputs_read = nir->info.patch_inputs_read; + info->tcs.tcs_patch_outputs_written = nir->info.patch_outputs_written; info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out; info->tcs.tes_inputs_read = ~0ULL; info->tcs.tes_patch_inputs_read = ~0ULL; + info->tcs.all_invocations_define_tess_levels = tcs_info.all_invocations_define_tess_levels; if (!info->inputs_linked) info->tcs.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.inputs_read)); @@ -655,16 +656,12 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, } if (gfx_state->ts.patch_control_points) { - /* Number of tessellation patches per workgroup processed by the current pipeline. */ - info->num_tess_patches = radv_get_tcs_num_patches( - pdev, gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, - info->tcs.num_lds_per_vertex_outputs, info->tcs.num_lds_per_patch_outputs, info->tcs.num_linked_outputs, - info->tcs.num_linked_patch_outputs); - /* LDS size used by VS+TCS for storing TCS inputs and outputs. */ - info->tcs.num_lds_blocks = radv_get_tess_lds_size( - pdev, gfx_state->ts.patch_control_points, nir->info.tess.tcs_vertices_out, info->tcs.num_linked_inputs, - info->num_tess_patches, info->tcs.num_lds_per_vertex_outputs, info->tcs.num_lds_per_patch_outputs); + radv_get_tess_wg_info(pdev, &nir->info, gfx_state->ts.patch_control_points, + /* TODO: This should be only inputs in LDS (not VGPR inputs) to reduce LDS usage */ + info->tcs.num_linked_inputs, info->tcs.num_linked_outputs, + info->tcs.num_linked_patch_outputs, tcs_info.all_invocations_define_tess_levels, + &info->num_tess_patches, &info->tcs.num_lds_blocks); } } diff --git a/src/amd/vulkan/radv_shader_info.h b/src/amd/vulkan/radv_shader_info.h index b35968c217f..e904e4f4757 100644 --- a/src/amd/vulkan/radv_shader_info.h +++ b/src/amd/vulkan/radv_shader_info.h @@ -235,14 +235,17 @@ struct radv_shader_info { struct { uint64_t tes_inputs_read; uint64_t tes_patch_inputs_read; + uint64_t tcs_outputs_read; + uint64_t tcs_outputs_written; + uint32_t tcs_patch_outputs_read; + uint32_t tcs_patch_outputs_written; unsigned tcs_vertices_out; uint32_t num_lds_blocks; uint8_t num_linked_inputs; /* Number of reserved per-vertex input slots in LDS. */ uint8_t num_linked_outputs; /* Number of reserved per-vertex output slots in VRAM. */ uint8_t num_linked_patch_outputs; /* Number of reserved per-patch output slots in VRAM. */ - uint8_t num_lds_per_vertex_outputs; /* Number of reserved per-vertex output slots in LDS. */ - uint8_t num_lds_per_patch_outputs; /* Number of reserved per-patch output slots in LDS. */ bool tes_reads_tess_factors : 1; + bool all_invocations_define_tess_levels : 1; } tcs; struct { enum mesa_prim output_prim;