radv: Implement T2T scanline copy workaround.

The built-in tiled-to-tiled copy packet doesn't support copying
between images that don't meet certain criteria such as alignment,
micro tile format, compression state etc.

To work around this, we copy the image piece by piece to a
temporary buffer that we know is supported,
and then copy it to the intended destination.

The implementation assumes that at least one pixel row of the
image fits into the temporary buffer, and will try to copy as
many rows as fit.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Tatsuyuki Ishi <ishitatsuyuki@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26913>
This commit is contained in:
Timur Kristóf
2023-12-09 22:42:46 +01:00
parent a4b4c9b723
commit bd3f2567cc
3 changed files with 155 additions and 8 deletions
+8 -1
View File
@@ -406,7 +406,14 @@ transfer_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_i
radv_sdma_get_surf(device, dst_image, region->dstSubresource, region->dstOffset, dst_aspect_mask);
const VkExtent3D extent = radv_sdma_get_copy_extent(src_image, region->srcSubresource, region->extent);
radv_sdma_copy_image(device, cs, &src, &dst, extent);
if (radv_sdma_use_t2t_scanline_copy(device, &src, &dst, extent)) {
if (!alloc_transfer_temp_bo(cmd_buffer))
return;
radv_sdma_copy_image_t2t_scanline(device, cs, &src, &dst, extent, cmd_buffer->transfer.copy_temp);
} else {
radv_sdma_copy_image(device, cs, &src, &dst, extent);
}
}
}
+132
View File
@@ -37,6 +37,22 @@ struct radv_sdma_chunked_copy_info {
unsigned num_rows_per_copy;
};
static const VkExtent3D radv_sdma_t2t_alignment_2d_and_planar[] = {
{16, 16, 1}, /* 1 bpp */
{16, 8, 1}, /* 2 bpp */
{8, 8, 1}, /* 4 bpp */
{8, 4, 1}, /* 8 bpp */
{4, 4, 1}, /* 16 bpp */
};
static const VkExtent3D radv_sdma_t2t_alignment_3d[] = {
{8, 4, 8}, /* 1 bpp */
{4, 4, 8}, /* 2 bpp */
{4, 4, 4}, /* 4 bpp */
{4, 2, 4}, /* 8 bpp */
{2, 2, 4}, /* 16 bpp */
};
ALWAYS_INLINE static unsigned
radv_sdma_pitch_alignment(const struct radv_device *device, const unsigned bpp)
{
@@ -265,7 +281,10 @@ radv_sdma_get_surf(const struct radv_device *const device, const struct radv_ima
.bpp = surf->bpe,
.blk_w = surf->blk_w,
.blk_h = surf->blk_h,
.mip_levels = image->vk.mip_levels,
.micro_tile_mode = surf->micro_tile_mode,
.is_linear = surf->is_linear,
.is_3d = surf->u.gfx9.resource_type == RADEON_RESOURCE_3D,
};
if (surf->is_linear) {
@@ -664,3 +683,116 @@ radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs,
}
}
}
bool
radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct radv_sdma_surf *src,
const struct radv_sdma_surf *dst, const VkExtent3D extent)
{
/* These need a linear-to-linear / linear-to-tiled copy. */
if (src->is_linear || dst->is_linear)
return false;
/* SDMA can't do format conversion. */
assert(src->bpp == dst->bpp);
const enum sdma_version ver = device->physical_device->rad_info.sdma_ip_version;
if (ver < SDMA_5_0) {
/* SDMA v4.x and older doesn't support proper mip level selection. */
if (src->mip_levels > 1 || dst->mip_levels > 1)
return true;
}
/* The two images can have a different block size,
* but must have the same swizzle mode.
*/
if (src->micro_tile_mode != dst->micro_tile_mode)
return true;
/* The T2T subwindow copy packet only has fields for one metadata configuration.
* It can either compress or decompress, or copy uncompressed images, but it
* can't copy from a compressed image to another.
*/
if (src->meta_va && dst->meta_va)
return true;
const bool needs_3d_alignment = src->is_3d && (src->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
src->micro_tile_mode == RADEON_MICRO_MODE_STANDARD);
const unsigned log2bpp = util_logbase2(src->bpp);
const VkExtent3D *const alignment =
needs_3d_alignment ? &radv_sdma_t2t_alignment_3d[log2bpp] : &radv_sdma_t2t_alignment_2d_and_planar[log2bpp];
const VkExtent3D copy_extent_blk = radv_sdma_pixel_extent_to_blocks(extent, src->blk_w, src->blk_h);
const VkOffset3D src_offset_blk = radv_sdma_pixel_offset_to_blocks(src->offset, src->blk_w, src->blk_h);
const VkOffset3D dst_offset_blk = radv_sdma_pixel_offset_to_blocks(dst->offset, dst->blk_w, dst->blk_h);
if (!radv_is_aligned(copy_extent_blk.width, alignment->width) ||
!radv_is_aligned(copy_extent_blk.height, alignment->height) ||
!radv_is_aligned(copy_extent_blk.depth, alignment->depth))
return true;
if (!radv_is_aligned(src_offset_blk.x, alignment->width) || !radv_is_aligned(src_offset_blk.y, alignment->height) ||
!radv_is_aligned(src_offset_blk.z, alignment->depth))
return true;
if (!radv_is_aligned(dst_offset_blk.x, alignment->width) || !radv_is_aligned(dst_offset_blk.y, alignment->height) ||
!radv_is_aligned(dst_offset_blk.z, alignment->depth))
return true;
return false;
}
void
radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radeon_cmdbuf *cs,
const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst,
const VkExtent3D extent, struct radeon_winsys_bo *temp_bo)
{
const struct radv_sdma_chunked_copy_info info = radv_sdma_get_chunked_copy_info(device, src, extent);
struct radv_sdma_surf t2l_src = *src;
struct radv_sdma_surf t2l_dst = {
.va = temp_bo->va,
.bpp = src->bpp,
.blk_w = src->blk_w,
.blk_h = src->blk_h,
.pitch = info.aligned_row_pitch * src->blk_w,
};
struct radv_sdma_surf l2t_dst = *dst;
struct radv_sdma_surf l2t_src = {
.va = temp_bo->va,
.bpp = dst->bpp,
.blk_w = dst->blk_w,
.blk_h = dst->blk_h,
.pitch = info.aligned_row_pitch * dst->blk_w,
};
for (unsigned slice = 0; slice < extent.depth; ++slice) {
for (unsigned row = 0; row < info.extent_vertical_blocks; row += info.num_rows_per_copy) {
const unsigned rows = MIN2(info.extent_vertical_blocks - row, info.num_rows_per_copy);
const VkExtent3D t2l_extent = {
.width = info.extent_horizontal_blocks * src->blk_w,
.height = rows * src->blk_h,
.depth = 1,
};
t2l_src.offset.y = src->offset.y + row * src->blk_h;
t2l_src.offset.z = src->offset.z + slice;
t2l_dst.slice_pitch = t2l_dst.pitch * t2l_extent.height;
radv_sdma_emit_copy_tiled_sub_window(device, cs, &t2l_src, &t2l_dst, t2l_extent, true);
radv_sdma_emit_nop(device, cs);
const VkExtent3D l2t_extent = {
.width = info.extent_horizontal_blocks * dst->blk_w,
.height = rows * dst->blk_h,
.depth = 1,
};
l2t_dst.offset.y = dst->offset.y + row * dst->blk_h;
l2t_dst.offset.z = dst->offset.z + slice;
l2t_src.slice_pitch = l2t_src.pitch * l2t_extent.height;
radv_sdma_emit_copy_tiled_sub_window(device, cs, &l2t_dst, &l2t_src, l2t_extent, false);
radv_sdma_emit_nop(device, cs);
}
}
}
+15 -7
View File
@@ -31,13 +31,16 @@ extern "C" {
#endif
struct radv_sdma_surf {
VkExtent3D extent; /* Image extent. */
VkOffset3D offset; /* Image offset. */
uint64_t va; /* Virtual address of image data. */
unsigned bpp; /* Bytes per pixel. */
unsigned blk_w; /* Image format block width in pixels. */
unsigned blk_h; /* Image format block height in pixels. */
bool is_linear; /* Whether the image is linear. */
VkExtent3D extent; /* Image extent. */
VkOffset3D offset; /* Image offset. */
uint64_t va; /* Virtual address of image data. */
unsigned bpp; /* Bytes per pixel. */
unsigned blk_w; /* Image format block width in pixels. */
unsigned blk_h; /* Image format block height in pixels. */
unsigned mip_levels; /* Mip levels in the image. */
uint8_t micro_tile_mode; /* Micro tile mode of the image. */
bool is_linear; /* Whether the image is linear. */
bool is_3d; /* Whether the image is 3-dimensional. */
union {
/* linear images only */
@@ -83,6 +86,11 @@ void radv_sdma_copy_buffer_image_unaligned(const struct radv_device *device, str
bool to_image);
void radv_sdma_copy_image(const struct radv_device *device, struct radeon_cmdbuf *cs, const struct radv_sdma_surf *src,
const struct radv_sdma_surf *dst, const VkExtent3D extent);
bool radv_sdma_use_t2t_scanline_copy(const struct radv_device *device, const struct radv_sdma_surf *src,
const struct radv_sdma_surf *dst, const VkExtent3D extent);
void radv_sdma_copy_image_t2t_scanline(const struct radv_device *device, struct radeon_cmdbuf *cs,
const struct radv_sdma_surf *src, const struct radv_sdma_surf *dst,
const VkExtent3D extent, struct radeon_winsys_bo *temp_bo);
void radv_sdma_copy_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t src_va, uint64_t dst_va,
uint64_t size);
void radv_sdma_fill_buffer(const struct radv_device *device, struct radeon_cmdbuf *cs, const uint64_t va,