radeonsi: handle compressed formats in si_compute_copy_image

This fixes a lot of AMD_TEST=copyimage cases, but there are still some
failures.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16215>
This commit is contained in:
Marek Olšák
2022-04-25 03:31:50 -04:00
parent 1ea662cfbf
commit b6beb70ea4
4 changed files with 56 additions and 20 deletions
-3
View File
@@ -899,9 +899,6 @@ static bool si_can_use_compute_blit(struct si_context *sctx, enum pipe_format fo
if (has_dcc && is_store && sctx->chip_class < GFX10)
return false;
if (util_format_is_compressed(format))
return false;
return true;
}
+30 -2
View File
@@ -528,6 +528,34 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
}
}
/* Interpret compressed formats as UINT. */
struct pipe_box new_box;
unsigned src_access = 0, dst_access = 0;
/* Note that staging copies do compressed<->UINT, so one of the formats is already UINT. */
if (util_format_is_compressed(src_format) || util_format_is_compressed(dst_format)) {
if (util_format_is_compressed(src_format))
src_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
if (util_format_is_compressed(dst_format))
dst_access |= SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT;
dstx = util_format_get_nblocksx(dst_format, dstx);
dsty = util_format_get_nblocksy(dst_format, dsty);
new_box.x = util_format_get_nblocksx(src_format, src_box->x);
new_box.y = util_format_get_nblocksy(src_format, src_box->y);
new_box.z = src_box->z;
new_box.width = util_format_get_nblocksx(src_format, src_box->width);
new_box.height = util_format_get_nblocksy(src_format, src_box->height);
new_box.depth = src_box->depth;
src_box = &new_box;
if (ssrc->surface.bpe == 8)
src_format = dst_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
else
src_format = dst_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
}
if (util_format_is_subsampled_422(src_format)) {
src_format = dst_format = PIPE_FORMAT_R32_UINT;
/* Interpreting 422 subsampled format (16 bpp) as 32 bpp
@@ -574,13 +602,13 @@ void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, u
struct pipe_image_view image[2] = {0};
image[0].resource = src;
image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ;
image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ | src_access;
image[0].format = src_format;
image[0].u.tex.level = src_level;
image[0].u.tex.first_layer = 0;
image[0].u.tex.last_layer = util_max_layer(src, src_level);
image[1].resource = dst;
image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_WRITE;
image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_WRITE | dst_access;
image[1].format = dst_format;
image[1].u.tex.level = dst_level;
image[1].u.tex.first_layer = 0;
+23 -13
View File
@@ -763,7 +763,6 @@ static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_i
static const unsigned char swizzle[4] = {0, 1, 2, 3};
struct si_texture *tex = (struct si_texture *)res;
unsigned level = view->u.tex.level;
unsigned width, height, depth, hw_level;
bool uses_dcc = vi_dcc_enabled(tex, level);
unsigned access = view->access;
@@ -785,27 +784,38 @@ static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_i
si_decompress_dcc(ctx, tex);
}
if (ctx->chip_class >= GFX9) {
/* Always set the base address. The swizzle modes don't
* allow setting mipmap level offsets as the base.
*/
width = res->b.b.width0;
height = res->b.b.height0;
depth = res->b.b.depth0;
hw_level = level;
} else {
unsigned width = res->b.b.width0;
unsigned height = res->b.b.height0;
unsigned depth = res->b.b.depth0;
unsigned hw_level = level;
if (ctx->chip_class <= GFX8) {
/* Always force the base level to the selected level.
*
* This is required for 3D textures, where otherwise
* selecting a single slice for non-layered bindings
* fails. It doesn't hurt the other targets.
*/
width = u_minify(res->b.b.width0, level);
height = u_minify(res->b.b.height0, level);
depth = u_minify(res->b.b.depth0, level);
width = u_minify(width, level);
height = u_minify(height, level);
depth = u_minify(depth, level);
hw_level = 0;
}
if (access & SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT) {
if (ctx->chip_class >= GFX9) {
/* Since the aligned width and height are derived from the width and height
* by the hw, set them directly as the width and height, so that UINT formats
* get exactly the same layout as BCn formats.
*/
width = tex->surface.u.gfx9.base_mip_width;
height = tex->surface.u.gfx9.base_mip_height;
} else {
width = util_format_get_nblocksx(tex->buffer.b.b.format, width);
height = util_format_get_nblocksy(tex->buffer.b.b.format, height);
}
}
screen->make_texture_descriptor(
screen, tex, false, res->b.b.target, view->format, swizzle, hw_level, hw_level,
view->u.tex.first_layer, view->u.tex.last_layer, width, height, depth, desc, fmask_desc);
+3 -2
View File
@@ -172,8 +172,9 @@ enum si_clear_code
DCC_UNCOMPRESSED = 0xFFFFFFFF,
};
#define SI_IMAGE_ACCESS_DCC_OFF (1 << 8)
#define SI_IMAGE_ACCESS_ALLOW_DCC_STORE (1 << 9)
#define SI_IMAGE_ACCESS_DCC_OFF (1 << 8)
#define SI_IMAGE_ACCESS_ALLOW_DCC_STORE (1 << 9)
#define SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT (1 << 10) /* for compressed/subsampled images */
/* Debug flags. */
enum