From 60d438e517b173dca7f287e453f7e9bf142e4b0d Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 14 Nov 2025 18:34:51 +0100 Subject: [PATCH] radv: always use MALL for CP DMA operations on GFX12 CP DMA isn't coherent with L2 on GFX12, but {SRC,DST}_ADDR_TC_L2 means MALL. Only small buffers are using copy/fill CP DMA operations, so this shouldn't have much effect. Found by inspection. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cp_dma.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/amd/vulkan/radv_cp_dma.c b/src/amd/vulkan/radv_cp_dma.c index a220683792e..d110a59c70c 100644 --- a/src/amd/vulkan/radv_cp_dma.c +++ b/src/amd/vulkan/radv_cp_dma.c @@ -52,6 +52,9 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool { const struct radv_physical_device *pdev = radv_device_physical(device); const bool cp_dma_use_L2 = (flags & CP_DMA_USE_L2) && pdev->info.cp_dma_use_L2; + const bool cp_dma_use_mall = pdev->info.gfx_level == GFX12; + /* GFX12: TC_L2 means MALL, which should always be set. */ + const bool cp_dma_tc_l2_flag = cp_dma_use_L2 || cp_dma_use_mall; uint32_t header = 0, command = 0; assert(size <= cp_dma_max_byte_count(pdev->info.gfx_level)); @@ -72,12 +75,12 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool /* Src and dst flags. */ if (pdev->info.gfx_level >= GFX9 && !(flags & CP_DMA_CLEAR) && src_va == dst_va) header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */ - else if (cp_dma_use_L2) + else if (cp_dma_tc_l2_flag) header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2); if (flags & CP_DMA_CLEAR) header |= S_411_SRC_SEL(V_411_DATA); - else if (cp_dma_use_L2) + else if (cp_dma_tc_l2_flag) header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2); radeon_begin(cs); @@ -90,7 +93,7 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radv_cmd_stream *cs, bool radeon_emit(dst_va >> 32); /* DST_ADDR_HI [31:0] */ radeon_emit(command); } else { - assert(!cp_dma_use_L2); + assert(!cp_dma_tc_l2_flag); header |= S_411_SRC_ADDR_HI(src_va >> 32); radeon_emit(PKT3(PKT3_CP_DMA, 4, predicating)); radeon_emit(src_va); /* SRC_ADDR_LO [31:0] */