diff --git a/src/amd/vulkan/meta/radv_meta_copy.c b/src/amd/vulkan/meta/radv_meta_copy.c index 3fe1151a646..cc70b615b4b 100644 --- a/src/amd/vulkan/meta/radv_meta_copy.c +++ b/src/amd/vulkan/meta/radv_meta_copy.c @@ -406,7 +406,14 @@ transfer_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_i radv_sdma_get_surf(device, dst_image, region->dstSubresource, region->dstOffset, dst_aspect_mask); const VkExtent3D extent = radv_sdma_get_copy_extent(src_image, region->srcSubresource, region->extent); - radv_sdma_copy_image(device, cs, &src, &dst, extent); + if (radv_sdma_use_t2t_scanline_copy(device, &src, &dst, extent)) { + if (!alloc_transfer_temp_bo(cmd_buffer)) + return; + + radv_sdma_copy_image_t2t_scanline(device, cs, &src, &dst, extent, cmd_buffer->transfer.copy_temp); + } else { + radv_sdma_copy_image(device, cs, &src, &dst, extent); + } } } diff --git a/src/amd/vulkan/radv_sdma.c b/src/amd/vulkan/radv_sdma.c index b0f86f618f0..c086104d673 100644 --- a/src/amd/vulkan/radv_sdma.c +++ b/src/amd/vulkan/radv_sdma.c @@ -37,6 +37,22 @@ struct radv_sdma_chunked_copy_info { unsigned num_rows_per_copy; }; +static const VkExtent3D radv_sdma_t2t_alignment_2d_and_planar[] = { + {16, 16, 1}, /* 1 bpp */ + {16, 8, 1}, /* 2 bpp */ + {8, 8, 1}, /* 4 bpp */ + {8, 4, 1}, /* 8 bpp */ + {4, 4, 1}, /* 16 bpp */ +}; + +static const VkExtent3D radv_sdma_t2t_alignment_3d[] = { + {8, 4, 8}, /* 1 bpp */ + {4, 4, 8}, /* 2 bpp */ + {4, 4, 4}, /* 4 bpp */ + {4, 2, 4}, /* 8 bpp */ + {2, 2, 4}, /* 16 bpp */ +}; + ALWAYS_INLINE static unsigned radv_sdma_pitch_alignment(const struct radv_device *device, const unsigned bpp) { @@ -265,7 +281,10 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima .bpp = surf->bpe, .blk_w = surf->blk_w, .blk_h = surf->blk_h, + .mip_levels = image->vk.mip_levels, + .micro_tile_mode = surf->micro_tile_mode, .is_linear = surf->is_linear, + .is_3d = surf->u.gfx9.resource_type == RADEON_RESOURCE_3D, }; if (surf->is_linear) { @@ -664,3 +683,116 @@ radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, } } } + +bool +radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct radv_sdma_surf *src, + const struct radv_sdma_surf *dst, const VkExtent3D extent) +{ + /* These need a linear-to-linear / linear-to-tiled copy. */ + if (src->is_linear || dst->is_linear) + return false; + + /* SDMA can't do format conversion. */ + assert(src->bpp == dst->bpp); + + const enum sdma_version ver = device->physical_device->rad_info.sdma_ip_version; + if (ver < SDMA_5_0) { + /* SDMA v4.x and older doesn't support proper mip level selection. */ + if (src->mip_levels > 1 || dst->mip_levels > 1) + return true; + } + + /* The two images can have a different block size, + * but must have the same swizzle mode. + */ + if (src->micro_tile_mode != dst->micro_tile_mode) + return true; + + /* The T2T subwindow copy packet only has fields for one metadata configuration. + * It can either compress or decompress, or copy uncompressed images, but it + * can't copy from a compressed image to another. + */ + if (src->meta_va && dst->meta_va) + return true; + + const bool needs_3d_alignment = src->is_3d && (src->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY || + src->micro_tile_mode == RADEON_MICRO_MODE_STANDARD); + const unsigned log2bpp = util_logbase2(src->bpp); + const VkExtent3D *const alignment = + needs_3d_alignment ? &radv_sdma_t2t_alignment_3d[log2bpp] : &radv_sdma_t2t_alignment_2d_and_planar[log2bpp]; + + const VkExtent3D copy_extent_blk = radv_sdma_pixel_extent_to_blocks(extent, src->blk_w, src->blk_h); + const VkOffset3D src_offset_blk = radv_sdma_pixel_offset_to_blocks(src->offset, src->blk_w, src->blk_h); + const VkOffset3D dst_offset_blk = radv_sdma_pixel_offset_to_blocks(dst->offset, dst->blk_w, dst->blk_h); + + if (!radv_is_aligned(copy_extent_blk.width, alignment->width) || + !radv_is_aligned(copy_extent_blk.height, alignment->height) || + !radv_is_aligned(copy_extent_blk.depth, alignment->depth)) + return true; + + if (!radv_is_aligned(src_offset_blk.x, alignment->width) || !radv_is_aligned(src_offset_blk.y, alignment->height) || + !radv_is_aligned(src_offset_blk.z, alignment->depth)) + return true; + + if (!radv_is_aligned(dst_offset_blk.x, alignment->width) || !radv_is_aligned(dst_offset_blk.y, alignment->height) || + !radv_is_aligned(dst_offset_blk.z, alignment->depth)) + return true; + + return false; +} + +void +radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radeon_cmdbuf *cs, + const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst, + const VkExtent3D extent, struct radeon_winsys_bo *temp_bo) +{ + const struct radv_sdma_chunked_copy_info info = radv_sdma_get_chunked_copy_info(device, src, extent); + struct radv_sdma_surf t2l_src = *src; + struct radv_sdma_surf t2l_dst = { + .va = temp_bo->va, + .bpp = src->bpp, + .blk_w = src->blk_w, + .blk_h = src->blk_h, + .pitch = info.aligned_row_pitch * src->blk_w, + }; + struct radv_sdma_surf l2t_dst = *dst; + struct radv_sdma_surf l2t_src = { + .va = temp_bo->va, + .bpp = dst->bpp, + .blk_w = dst->blk_w, + .blk_h = dst->blk_h, + .pitch = info.aligned_row_pitch * dst->blk_w, + }; + + for (unsigned slice = 0; slice < extent.depth; ++slice) { + for (unsigned row = 0; row < info.extent_vertical_blocks; row += info.num_rows_per_copy) { + const unsigned rows = MIN2(info.extent_vertical_blocks - row, info.num_rows_per_copy); + + const VkExtent3D t2l_extent = { + .width = info.extent_horizontal_blocks * src->blk_w, + .height = rows * src->blk_h, + .depth = 1, + }; + + t2l_src.offset.y = src->offset.y + row * src->blk_h; + t2l_src.offset.z = src->offset.z + slice; + t2l_dst.slice_pitch = t2l_dst.pitch * t2l_extent.height; + + radv_sdma_emit_copy_tiled_sub_window(device, cs, &t2l_src, &t2l_dst, t2l_extent, true); + radv_sdma_emit_nop(device, cs); + + const VkExtent3D l2t_extent = { + .width = info.extent_horizontal_blocks * dst->blk_w, + .height = rows * dst->blk_h, + .depth = 1, + }; + + l2t_dst.offset.y = dst->offset.y + row * dst->blk_h; + l2t_dst.offset.z = dst->offset.z + slice; + l2t_src.slice_pitch = l2t_src.pitch * l2t_extent.height; + + radv_sdma_emit_copy_tiled_sub_window(device, cs, &l2t_dst, &l2t_src, l2t_extent, false); + radv_sdma_emit_nop(device, cs); + } + } +} diff --git a/src/amd/vulkan/radv_sdma.h b/src/amd/vulkan/radv_sdma.h index e089618b407..bcc95919c97 100644 --- a/src/amd/vulkan/radv_sdma.h +++ b/src/amd/vulkan/radv_sdma.h @@ -31,13 +31,16 @@ extern "C" { #endif struct radv_sdma_surf { - VkExtent3D extent; /* Image extent. */ - VkOffset3D offset; /* Image offset. */ - uint64_t va; /* Virtual address of image data. */ - unsigned bpp; /* Bytes per pixel. */ - unsigned blk_w; /* Image format block width in pixels. */ - unsigned blk_h; /* Image format block height in pixels. */ - bool is_linear; /* Whether the image is linear. */ + VkExtent3D extent; /* Image extent. */ + VkOffset3D offset; /* Image offset. */ + uint64_t va; /* Virtual address of image data. */ + unsigned bpp; /* Bytes per pixel. */ + unsigned blk_w; /* Image format block width in pixels. */ + unsigned blk_h; /* Image format block height in pixels. */ + unsigned mip_levels; /* Mip levels in the image. */ + uint8_t micro_tile_mode; /* Micro tile mode of the image. */ + bool is_linear; /* Whether the image is linear. */ + bool is_3d; /* Whether the image is 3-dimensional. */ union { /* linear images only */ @@ -83,6 +86,11 @@ void radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, str bool to_image); void radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst, const VkExtent3D extent); +bool radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct radv_sdma_surf *src, + const struct radv_sdma_surf *dst, const VkExtent3D extent); +void radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radeon_cmdbuf *cs, + const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst, + const VkExtent3D extent, struct radeon_winsys_bo *temp_bo); void radv_sdma_copy_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va, uint64_t size); void radv_sdma_fill_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va,