From 0543394bfa7908e1c4d6bb97deedd6ee3a954ffb Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 15 Feb 2024 12:11:00 +0100 Subject: [PATCH] radv: move mesh_fast_launch_2 to radv_physical_device Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 6 +++--- src/amd/vulkan/radv_device.c | 5 +---- src/amd/vulkan/radv_device_generated_commands.c | 4 ++-- src/amd/vulkan/radv_physical_device.c | 3 +++ src/amd/vulkan/radv_pipeline_graphics.c | 11 ++++++----- src/amd/vulkan/radv_private.h | 6 +++--- src/amd/vulkan/radv_shader.c | 4 ++-- src/amd/vulkan/radv_shader_args.c | 4 ++-- src/amd/vulkan/radv_shader_info.c | 2 +- 9 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index d2701018f06..506d5e0a640 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -8184,7 +8184,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3 uint32_t draw_id_enable = !!cmd_buffer->state.uses_drawid; uint32_t draw_id_reg = !draw_id_enable ? 0 : (base_reg + (xyz_dim_enable ? 12 : 0) - SI_SH_REG_OFFSET) >> 2; - uint32_t mode1_enable = !cmd_buffer->device->mesh_fast_launch_2; + uint32_t mode1_enable = !cmd_buffer->device->physical_device->mesh_fast_launch_2; radeon_emit(cs, PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM_S(1)); radeon_emit(cs, 0); /* data_offset */ @@ -8283,7 +8283,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer) uint32_t xyz_dim_en = mesh_shader->info.cs.uses_grid_size; uint32_t xyz_dim_reg = !xyz_dim_en ? 0 : (cmd_buffer->state.vtx_base_sgpr - SI_SH_REG_OFFSET) >> 2; uint32_t ring_entry_reg = ((mesh_shader->info.user_data_0 - SI_SH_REG_OFFSET) >> 2) + ring_entry_loc->sgpr_idx; - uint32_t mode1_en = !cmd_buffer->device->mesh_fast_launch_2; + uint32_t mode1_en = !cmd_buffer->device->physical_device->mesh_fast_launch_2; uint32_t linear_dispatch_en = cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch; const bool sqtt_en = !!cmd_buffer->device->sqtt.bo; @@ -8587,7 +8587,7 @@ radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t x radv_emit_userdata_mesh(cmd_buffer, x, y, z); - if (cmd_buffer->device->mesh_fast_launch_2) { + if (cmd_buffer->device->physical_device->mesh_fast_launch_2) { if (!view_mask) { radv_cs_emit_mesh_dispatch_packet(cmd_buffer, x, y, z); } else { diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index e4b72471758..72c7c2c8632 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -685,7 +685,7 @@ radv_device_init_cache_key(struct radv_device *device) device->vk.enabled_features.image2DViewOf3D && device->physical_device->rad_info.gfx_level == GFX9; key->invariant_geom = !!(device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM); key->lower_discard_to_demote = !!(device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE); - key->mesh_fast_launch_2 = device->mesh_fast_launch_2; + key->mesh_fast_launch_2 = device->physical_device->mesh_fast_launch_2; key->mesh_shader_queries = device->vk.enabled_features.meshShaderQueries; key->no_fmask = !!(device->instance->debug_flags & RADV_DEBUG_NO_FMASK); key->no_rt = !!(device->instance->debug_flags & RADV_DEBUG_NO_RT); @@ -829,9 +829,6 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr device->pbb_allowed = device->physical_device->rad_info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING); - device->mesh_fast_launch_2 = device->physical_device->rad_info.gfx_level >= GFX11 && - !(device->instance->debug_flags & RADV_DEBUG_NO_GS_FAST_LAUNCH_2); - device->disable_trunc_coord = device->instance->drirc.disable_trunc_coord; if (device->instance->vk.app_info.engine_name && !strcmp(device->instance->vk.app_info.engine_name, "DXVK")) { diff --git a/src/amd/vulkan/radv_device_generated_commands.c b/src/amd/vulkan/radv_device_generated_commands.c index 9aec28c0658..ff46436b328 100644 --- a/src/amd/vulkan/radv_device_generated_commands.c +++ b/src/amd/vulkan/radv_device_generated_commands.c @@ -91,7 +91,7 @@ radv_get_sequence_size_graphics(const struct radv_indirect_command_layout *layou } else { if (layout->draw_mesh_tasks) { /* userdata writes + instance count + non-indexed draw */ - *cmd_size += (6 + 2 + (device->mesh_fast_launch_2 ? 5 : 3)) * 4; + *cmd_size += (6 + 2 + (device->physical_device->mesh_fast_launch_2 ? 5 : 3)) * 4; } else { /* userdata writes + instance count + non-indexed draw */ *cmd_size += (5 + 2 + 3) * 4; @@ -1181,7 +1181,7 @@ dgc_emit_draw_mesh_tasks(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *stream_ dgc_emit_userdata_mesh(b, cs, vtx_base_sgpr, x, y, z, sequence_id, device); dgc_emit_instance_count(b, cs, nir_imm_int(b, 1)); - if (device->mesh_fast_launch_2) { + if (device->physical_device->mesh_fast_launch_2) { dgc_emit_dispatch_mesh_direct(b, cs, x, y, z); } else { nir_def *vertex_count = nir_imul(b, x, nir_imul(b, y, z)); diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index 5f34a05e933..b39badffc1c 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -1992,6 +1992,9 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm device->emulate_ngg_gs_query_pipeline_stat = device->use_ngg && device->rad_info.gfx_level < GFX11; + device->mesh_fast_launch_2 = + device->rad_info.gfx_level >= GFX11 && !(device->instance->debug_flags & RADV_DEBUG_NO_GS_FAST_LAUNCH_2); + device->emulate_mesh_shader_queries = device->rad_info.gfx_level == GFX10_3; /* Determine the number of threads per wave for all stages. */ diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index b1dfc081a1c..c6fd1bcd267 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -2510,7 +2510,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac active_nir_stages |= mesa_to_vk_shader_stage(i); } - if (!device->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir && + if (!device->physical_device->mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir && BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) { nir_shader *mesh = stages[MESA_SHADER_MESH].nir; nir_shader *task = stages[MESA_SHADER_TASK].nir; @@ -3331,11 +3331,12 @@ radv_emit_mesh_shader(const struct radv_device *device, struct radeon_cmdbuf *ct const struct radv_physical_device *pdevice = device->physical_device; radv_emit_hw_ngg(device, ctx_cs, cs, NULL, ms); - radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, - device->mesh_fast_launch_2 ? ms->info.ngg_info.max_out_verts : ms->info.workgroup_size); + radeon_set_context_reg( + ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, + device->physical_device->mesh_fast_launch_2 ? ms->info.ngg_info.max_out_verts : ms->info.workgroup_size); radeon_set_uconfig_reg_idx(pdevice, ctx_cs, R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST); - if (device->mesh_fast_launch_2) { + if (device->physical_device->mesh_fast_launch_2) { radeon_set_sh_reg_seq(cs, R_00B2B0_SPI_SHADER_GS_MESHLET_DIM, 2); radeon_emit(cs, S_00B2B0_MESHLET_NUM_THREAD_X(ms->info.cs.block_size[0] - 1) | S_00B2B0_MESHLET_NUM_THREAD_Y(ms->info.cs.block_size[1] - 1) | @@ -3586,7 +3587,7 @@ radv_emit_vgt_shader_config(const struct radv_device *device, struct radeon_cmdb stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1); } else if (key->mesh) { assert(!key->ngg_passthrough); - unsigned gs_fast_launch = device->mesh_fast_launch_2 ? 2 : 1; + unsigned gs_fast_launch = device->physical_device->mesh_fast_launch_2 ? 2 : 1; stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(gs_fast_launch) | S_028B54_NGG_WAVE_ID_EN(key->mesh_scratch_ring); } else if (key->ngg) { diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 8d84d27fe5c..953a8d39d91 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -270,6 +270,9 @@ struct radv_physical_device { /* Whether to emulate the number of primitives generated by GS. */ bool emulate_ngg_gs_query_pipeline_stat; + /* Whether to use GS_FAST_LAUNCH(2) for mesh shaders. */ + bool mesh_fast_launch_2; + /* Whether to emulate mesh/task shader queries. */ bool emulate_mesh_shader_queries; @@ -1151,9 +1154,6 @@ struct radv_device { /* Whether the driver uses a global BO list. */ bool use_global_bo_list; - /* Whether to use GS_FAST_LAUNCH(2) for mesh shaders. */ - bool mesh_fast_launch_2; - /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */ int force_aniso; diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 31750479121..2a481f1d176 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -536,7 +536,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_shader_st /* Mesh shaders run as NGG which can implement local_invocation_index from * the wave ID in merged_wave_info, but they don't have local_invocation_ids on GFX10.3. */ - .lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !device->mesh_fast_launch_2, + .lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !device->physical_device->mesh_fast_launch_2, .lower_local_invocation_index = nir->info.stage == MESA_SHADER_COMPUTE && ((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) + (nir->info.workgroup_size[2] == 1)) == 2, @@ -900,7 +900,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, NIR_PASS_V(nir, ac_nir_lower_ngg_ms, options.gfx_level, options.clip_cull_dist_mask, options.vs_output_param_offset, options.has_param_exports, &scratch_ring, info->wave_size, hw_workgroup_size, gfx_state->has_multiview_view_index, info->ms.has_query, - device->mesh_fast_launch_2); + device->physical_device->mesh_fast_launch_2); ngg_stage->info.ms.needs_ms_scratch_ring = scratch_ring; } else { unreachable("invalid SW stage passed to radv_lower_ngg"); diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 471f33a421a..5243c6cabcc 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -262,7 +262,7 @@ declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_a static void declare_ms_input_vgprs(const struct radv_device *device, struct radv_shader_args *args) { - if (device->mesh_fast_launch_2) { + if (device->physical_device->mesh_fast_launch_2) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.local_invocation_ids); } else { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id); @@ -785,7 +785,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics declare_ngg_sgprs(info, args, has_ngg_provoking_vtx); } - if (previous_stage != MESA_SHADER_MESH || !device->mesh_fast_launch_2) { + if (previous_stage != MESA_SHADER_MESH || !device->physical_device->mesh_fast_launch_2) { ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 10a4703b925..3c7f04d5805 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -737,7 +737,7 @@ calc_mesh_workgroup_size(const struct radv_device *device, const nir_shader *nir { unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); - if (device->mesh_fast_launch_2) { + if (device->physical_device->mesh_fast_launch_2) { /* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */ info->workgroup_size = api_workgroup_size; } else {