From f55771a17d1432d1d5f90a789aba3a7f2d5565cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Sun, 21 Dec 2025 09:29:29 -0600 Subject: [PATCH] radv: Bypass L2 for gang semaphore BO with SDMA/ACE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the "gang leader" is SDMA, we need to ensure that the gang semaphores BO is coherent between SDMA and CP. To achieve this, we need bypass the L2 cache when either SDMA or CP are connected to L2. Suggested-by: Samuel Pitoiset Signed-off-by: Timur Kristóf Reviewed-by: Konstantin Seurer Reviewed-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_queue.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c index 09d7f54ffbb..b3964396fdb 100644 --- a/src/amd/vulkan/radv_queue.c +++ b/src/amd/vulkan/radv_queue.c @@ -1341,13 +1341,36 @@ radv_create_gang_wait_preambles_postambles(struct radv_queue *queue) VkResult r = VK_SUCCESS; struct radeon_winsys *ws = device->ws; struct radeon_winsys_bo *gang_sem_bo = NULL; + enum radeon_bo_flag gang_sem_bo_flags = RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM; + + /* When the "gang leader" is SDMA, we need to ensure that the gang semaphores BO + * is coherent between SDMA and CP. To achieve this, we need bypass the L2 cache + * when either SDMA or CP are connected to L2. + * + * GFX6-8: + * neither CP nor SDMA are connected to L2 + * GFX9: + * CP is connected to L2, SDMA isn't connected to L2 + * GFX10: + * CP is connected to L2 + * SDMA is connected to L2 but there are possible hw bugs with + * cache_policy=BYPASS which is currently the default configuration set by SDMA0_UTCL1_PAGE + * GFX10.3-11.5: + * CP is connected to L2 + * SDMA is connected to L2, and an alternative solution would be to + * configure the cache policy in SDMA packets (ie. with CVP=1) directly to overwrite the + * default behavior, but it doesn't seem worth it. + * GFX12: + * neither CP nor SDMA are connected to L2 + */ + if (ip == AMD_IP_SDMA && pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level < GFX12) + gang_sem_bo_flags |= RADEON_FLAG_GL2_BYPASS; /* Gang semaphores BO. * DWORD 0: used in preambles, gang leader writes, gang members wait. * DWORD 1: used in postambles, gang leader waits, gang members write. */ - r = radv_bo_create(device, NULL, 8, 4, RADEON_DOMAIN_VRAM, - RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, RADV_BO_PRIORITY_SCRATCH, 0, true, + r = radv_bo_create(device, NULL, 8, 4, RADEON_DOMAIN_VRAM, gang_sem_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, true, &gang_sem_bo); if (r != VK_SUCCESS) return r;