From 7a0e7d24bbf7a1f93158cd4ce1a76b5d2a36e5a5 Mon Sep 17 00:00:00 2001 From: Mohamed Ahmed Date: Wed, 27 Aug 2025 01:31:57 +0300 Subject: [PATCH] nvk: Use the compute MME for compute dispatch Switching from compute to 3D and vice versa leads to a long stall which destroys compute performance. This switches to the compute MME on Ampere onwards (which was where it was added) for compute dispatches which eliminates stalling from sub-channel switching in these cases. Reviewed-by: Karol Herbst Reviewed-by: Faith Ekstrand Part-of: --- src/nouveau/vulkan/nvk_cmd_dispatch.c | 11 +++++++++-- src/nouveau/vulkan/nvk_cmd_indirect.c | 6 +++++- src/nouveau/vulkan/nvk_query_pool.c | 9 ++++++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/nouveau/vulkan/nvk_cmd_dispatch.c b/src/nouveau/vulkan/nvk_cmd_dispatch.c index afd879a099e..b3a75ddc5a2 100644 --- a/src/nouveau/vulkan/nvk_cmd_dispatch.c +++ b/src/nouveau/vulkan/nvk_cmd_dispatch.c @@ -25,6 +25,7 @@ #include "nv_push_clc3c0.h" #include "nv_push_clc597.h" #include "nv_push_clc6c0.h" +#include "nv_push_clc7c0.h" #include "nv_push_clc86f.h" VkResult @@ -315,7 +316,10 @@ nvk_CmdDispatchBase(VkCommandBuffer commandBuffer, struct nv_push *p = nvk_cmd_buffer_push(cmd, 7); - P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_ADD_CS_INVOCATIONS)); + if (nvk_cmd_buffer_compute_cls(cmd) >= AMPERE_COMPUTE_B) + P_1INC(p, NVC7C0, CALL_MME_MACRO(NVK_MME_ADD_CS_INVOCATIONS)); + else + P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_ADD_CS_INVOCATIONS)); P_INLINE_DATA(p, cs_invocations >> 32); P_INLINE_DATA(p, cs_invocations); @@ -562,7 +566,10 @@ nvk_CmdDispatchIndirect(VkCommandBuffer commandBuffer, p = nvk_cmd_buffer_push(cmd, 14); if (nvk_cmd_buffer_compute_cls(cmd) < BLACKWELL_COMPUTE_A) P_IMMD(p, NVC597, SET_MME_DATA_FIFO_CONFIG, FIFO_SIZE_SIZE_4KB); - P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DISPATCH_INDIRECT)); + if (nvk_cmd_buffer_compute_cls(cmd) >= AMPERE_COMPUTE_B) + P_1INC(p, NVC7C0, CALL_MME_MACRO(NVK_MME_DISPATCH_INDIRECT)); + else + P_1INC(p, NV9097, CALL_MME_MACRO(NVK_MME_DISPATCH_INDIRECT)); P_INLINE_DATA(p, dispatch_addr >> 32); P_INLINE_DATA(p, dispatch_addr); P_INLINE_DATA(p, root_desc_addr >> 32); diff --git a/src/nouveau/vulkan/nvk_cmd_indirect.c b/src/nouveau/vulkan/nvk_cmd_indirect.c index 3af732c84f9..c6d3bd6b032 100644 --- a/src/nouveau/vulkan/nvk_cmd_indirect.c +++ b/src/nouveau/vulkan/nvk_cmd_indirect.c @@ -20,6 +20,7 @@ #include "nv_push_cla0c0.h" #include "nv_push_clb1c0.h" #include "nv_push_clc6c0.h" +#include "nv_push_clc7c0.h" #include "nv_push_clc86f.h" struct nvk_indirect_commands_layout { @@ -395,7 +396,10 @@ build_process_cs_cmd_seq(nir_builder *b, struct nvk_nir_push *p, /* Now emit commands */ nir_def *invoc = nir_imul_2x32_64(b, disp_size_x, disp_size_y); invoc = nir_imul(b, invoc, nir_u2u64(b, disp_size_z)); - nvk_nir_P_1INC(b, p, NV9097, CALL_MME_MACRO(NVK_MME_ADD_CS_INVOCATIONS), 2); + if (pdev->info.cls_compute >= AMPERE_COMPUTE_B) + nvk_nir_P_1INC(b, p, NVC7C0, CALL_MME_MACRO(NVK_MME_ADD_CS_INVOCATIONS), 2); + else + nvk_nir_P_1INC(b, p, NV9097, CALL_MME_MACRO(NVK_MME_ADD_CS_INVOCATIONS), 2); nvk_nir_push_dw(b, p, nir_unpack_64_2x32_split_y(b, invoc)); nvk_nir_push_dw(b, p, nir_unpack_64_2x32_split_x(b, invoc)); diff --git a/src/nouveau/vulkan/nvk_query_pool.c b/src/nouveau/vulkan/nvk_query_pool.c index 086ca9a26c2..bf6efc3989a 100644 --- a/src/nouveau/vulkan/nvk_query_pool.c +++ b/src/nouveau/vulkan/nvk_query_pool.c @@ -28,6 +28,7 @@ #include "nv_push_cl9097.h" #include "nv_push_cla0c0.h" #include "nv_push_clc597.h" +#include "nv_push_clc7c0.h" VKAPI_ATTR VkResult VKAPI_CALL nvk_CreateQueryPool(VkDevice device, @@ -378,6 +379,9 @@ nvk_cmd_begin_end_query(struct nvk_cmd_buffer *cmd, uint32_t query, uint32_t index, bool end) { + const struct nvk_device *dev = nvk_cmd_buffer_device(cmd); + const struct nvk_physical_device *pdev = nvk_device_physical(dev); + uint64_t report_addr = nvk_query_report_addr(pool, query) + end * sizeof(struct nvk_query_report); @@ -417,7 +421,10 @@ nvk_cmd_begin_end_query(struct nvk_cmd_buffer *cmd, assert(!(stats_left & (sq->flag - 1))); if (sq->flag == VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT) { - P_1INC(p, NVC597, CALL_MME_MACRO(NVK_MME_WRITE_CS_INVOCATIONS)); + if (pdev->info.cls_compute >= AMPERE_COMPUTE_B) + P_1INC(p, NVC7C0, CALL_MME_MACRO(NVK_MME_WRITE_CS_INVOCATIONS)); + else + P_1INC(p, NVC597, CALL_MME_MACRO(NVK_MME_WRITE_CS_INVOCATIONS)); P_INLINE_DATA(p, report_addr >> 32); P_INLINE_DATA(p, report_addr); } else {