isl: Tile W memcpy support

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Nanley Chery <nanley.g.chery@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31579>
This commit is contained in:
Lionel Landwerlin
2023-12-01 20:19:08 +02:00
committed by Marge Bot
parent c0e98d2c89
commit 1a72fc013c
2 changed files with 487 additions and 0 deletions
+425
View File
@@ -57,6 +57,9 @@ static const uint32_t xtile_span = 64;
static const uint32_t ytile_width = 128;
static const uint32_t ytile_height = 32;
static const uint32_t ytile_span = 16;
static const uint32_t wtile_width = 64;
static const uint32_t wtile_height = 64;
static const uint32_t wtile_span = 2;
static inline uint32_t
ror(uint32_t n, uint32_t d)
@@ -100,6 +103,198 @@ rgba8_copy(void *dst, const void *src, size_t bytes)
return dst;
}
#define wtile_block_id(x, y) \
(((((x) >> 3) & 0x7) << 3) | \
(((y) >> 3) & 0x7))
#define wtile_block_offset(x, y) \
((((y) & 4) << 3) + \
(((y) & 2) << 2) + \
(((y) & 1) << 1) + \
(((x) & 4) << 2) + \
(((x) & 2) << 1) + \
(((x) & 1) << 0))
/**
* Copy from linear into a W tile block.
*
* @dst is a pointer to a block in a W tile, @src is a pointer to the linear
* data, coordinates are relative to the surface (not the tile).
*/
static inline void
wtile_block_copy_from_linear(void *dst, const void *src,
unsigned x0, unsigned x1,
unsigned y0, unsigned y1,
unsigned src_pitch)
{
uint8_t *dst_data = dst + wtile_block_id(x0, y0) * 64;
const uint8_t *src_data = src;
for (unsigned y = y0; y < y1; y++)
for (unsigned x = x0; x < x1; x++)
dst_data[wtile_block_offset(x, y)] = src_data[y * src_pitch + x];
}
/**
* Copy from linear into a full W tile block.
*
* @dst is a pointer to a block in a W tile, @src is a pointer to the linear
* data.
*/
static inline void
wtile_block_full_copy_from_linear(void *dst, const void *src,
unsigned x0, unsigned y0,
unsigned src_pitch)
{
uint16_t *dst_data = dst + wtile_block_id(x0, y0) * 64;
const uint8_t *src_data = src;
/*
* The layout of a block is a series of 2 consecutive bytes elements.
* _________________________________
* |B00|B01|B04|B05|B16|B17|B20|B21|
* |B02|B03|B06|B07|B18|B19|B22|B23|
* |B08|B09|B12|B13|B24|B25|B28|B29|
* |B10|B11|B14|B15|B26|B27|B30|B31|
* |B32|B33|B36|B37|B48|B49|B52|B53|
* |B34|B35|B38|B39|B50|B51|B54|B55|
* |B40|B41|B44|B45|B56|B57|B60|B61|
* |B42|B43|B46|B47|B58|B59|B62|B64|
* ---------------------------------
*/
#define src_lin(bx, by) \
(*((const uint16_t *)(src_data + (y0 + by) * src_pitch + x0 + bx * 2)))
dst_data[0] = src_lin(0, 0);
dst_data[1] = src_lin(0, 1);
dst_data[2] = src_lin(1, 0);
dst_data[3] = src_lin(1, 1);
dst_data[4] = src_lin(0, 2);
dst_data[5] = src_lin(0, 3);
dst_data[6] = src_lin(1, 2);
dst_data[7] = src_lin(1, 3);
dst_data[8] = src_lin(2, 0);
dst_data[9] = src_lin(2, 1);
dst_data[10] = src_lin(3, 0);
dst_data[11] = src_lin(3, 1);
dst_data[12] = src_lin(2, 2);
dst_data[13] = src_lin(2, 3);
dst_data[14] = src_lin(3, 2);
dst_data[15] = src_lin(3, 3);
dst_data[16] = src_lin(0, 4);
dst_data[17] = src_lin(0, 5);
dst_data[18] = src_lin(1, 4);
dst_data[19] = src_lin(1, 5);
dst_data[20] = src_lin(0, 6);
dst_data[21] = src_lin(0, 7);
dst_data[22] = src_lin(1, 6);
dst_data[23] = src_lin(1, 7);
dst_data[24] = src_lin(2, 4);
dst_data[25] = src_lin(2, 5);
dst_data[26] = src_lin(3, 4);
dst_data[27] = src_lin(3, 5);
dst_data[28] = src_lin(2, 6);
dst_data[29] = src_lin(2, 7);
dst_data[30] = src_lin(3, 6);
dst_data[31] = src_lin(3, 7);
#undef src_lin
}
/**
* Copy from W tile block into linear.
*
* @dst is a pointer to the linear data, @src is a pointer to a block in the W
* tile.
*/
static inline void
wtile_block_copy_to_linear(void *dst, const void *src,
unsigned x0, unsigned x1,
unsigned y0, unsigned y1,
unsigned dst_pitch)
{
uint8_t *dst_data = dst;
const uint8_t *src_data = src + wtile_block_id(x0, y0) * 64;
for (unsigned y = y0; y < y1; y++)
for (unsigned x = x0; x < x1; x++)
dst_data[y * dst_pitch + x] = src_data[wtile_block_offset(x, y)];
}
/**
* Copy to linear from a full W tile block.
*
* @dst is a pointer to the linear data, @src is a pointer to a block in a W
* tile.
*/
static inline void
wtile_block_full_copy_to_linear(void *dst, const void *src,
unsigned x0, unsigned y0,
unsigned dst_pitch)
{
uint8_t *dst_data = dst;
const uint16_t *src_data = src + wtile_block_id(x0, y0) * 64;
/*
* The layout of a block is a series of 2 consecutive bytes elements.
* _________________________________
* |B00|B01|B04|B05|B16|B17|B20|B21|
* |B02|B03|B06|B07|B18|B19|B22|B23|
* |B08|B09|B12|B13|B24|B25|B28|B29|
* |B10|B11|B14|B15|B26|B27|B30|B31|
* |B32|B33|B36|B37|B48|B49|B52|B53|
* |B34|B35|B38|B39|B50|B51|B54|B55|
* |B40|B41|B44|B45|B56|B57|B60|B61|
* |B42|B43|B46|B47|B58|B59|B62|B64|
* ---------------------------------
*/
#define dst_lin(bx, by) \
(*((uint16_t *)(dst_data + (y0 + by) * dst_pitch + x0 + bx * 2)))
dst_lin(0, 0) = src_data[0];
dst_lin(0, 1) = src_data[1];
dst_lin(1, 0) = src_data[2];
dst_lin(1, 1) = src_data[3];
dst_lin(0, 2) = src_data[4];
dst_lin(0, 3) = src_data[5];
dst_lin(1, 2) = src_data[6];
dst_lin(1, 3) = src_data[7];
dst_lin(2, 0) = src_data[8];
dst_lin(2, 1) = src_data[9];
dst_lin(3, 0) = src_data[10];
dst_lin(3, 1) = src_data[11];
dst_lin(2, 2) = src_data[12];
dst_lin(2, 3) = src_data[13];
dst_lin(3, 2) = src_data[14];
dst_lin(3, 3) = src_data[15];
dst_lin(0, 4) = src_data[16];
dst_lin(0, 5) = src_data[17];
dst_lin(1, 4) = src_data[18];
dst_lin(1, 5) = src_data[19];
dst_lin(0, 6) = src_data[20];
dst_lin(0, 7) = src_data[21];
dst_lin(1, 6) = src_data[22];
dst_lin(1, 7) = src_data[23];
dst_lin(2, 4) = src_data[24];
dst_lin(2, 5) = src_data[25];
dst_lin(3, 4) = src_data[26];
dst_lin(3, 5) = src_data[27];
dst_lin(2, 6) = src_data[28];
dst_lin(2, 7) = src_data[29];
dst_lin(3, 6) = src_data[30];
dst_lin(3, 7) = src_data[31];
#undef dst_lin
}
#ifdef __SSSE3__
static const uint8_t rgba8_permutation[16] =
{ 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };
@@ -602,6 +797,79 @@ linear_to_tile4(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
}
}
/**
* Copy texture data from linear to W tile layout.
*
* \copydoc tile_copy_fn
*/
static inline void
linear_to_wtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
uint32_t y0, uint32_t y3,
char *dst, const char *src, int32_t src_pitch)
{
/*
* The layout is a series of block of 64B each.
* ___________________________________________
* |blk00|blk08|blk16|blk24|blk32|blk48|blk56|
* |blk01|blk09|blk17|blk25|blk33|blk49|blk57|
* |blk02|blk10|blk18|blk26|blk34|blk50|blk58|
* |blk03|blk11|blk19|blk27|blk35|blk51|blk59|
* |blk04|blk12|blk20|blk28|blk36|blk52|blk60|
* |blk05|blk13|blk21|blk29|blk37|blk53|blk61|
* |blk06|blk14|blk22|blk30|blk38|blk54|blk62|
* |blk07|blk15|blk23|blk31|blk39|blk55|blk63|
* -------------------------------------------
*/
/* Find intermediate Y offsets that are aligned to a 64B element (8 rows).
*/
uint32_t y1 = MIN2(y3, ALIGN_UP(y0, 8));
uint32_t y2 = MAX2(y1, ALIGN_DOWN(y3, 8));
uint32_t xo, yo;
/* If the y0 coordinate is not aligned to a block, do partial copies into
* blocks 0, 8, 16, 24, 32, 48 & 56.
*/
if (y0 != y1) {
if (x0 != x1)
wtile_block_copy_from_linear(dst, src, x0, x1, y0, y1, src_pitch);
for (xo = x1; xo < x2; xo += 8)
wtile_block_copy_from_linear(dst, src, xo, xo + 8, y0, y1, src_pitch);
if (x2 != x3)
wtile_block_copy_from_linear(dst, src, x2, x3, y0, y1, src_pitch);
}
for (yo = y1; yo < y2; yo += 8) {
/* Do partial copies int blocks [1, 6] if x0 is not aligned to block. */
if (x0 != x1) {
wtile_block_copy_from_linear(dst, src,
x0, x1, yo, yo + 8, src_pitch);
}
/* Full block copies on the inside. */
for (xo = x1; xo < x2; xo += 8)
wtile_block_full_copy_from_linear(dst, src, xo, yo, src_pitch);
/* Do partial copies int blocks [57, 62] if y3 is not aligned to block.
*/
if (x2 != x3) {
wtile_block_copy_from_linear(dst, src,
x2, x3, yo, yo + 8, src_pitch);
}
}
/* If the x3 coordinate is not aligned to a block, do partial copies into
* blocks [57,62].
*/
if (y2 != y3) {
if (x0 != x1)
wtile_block_copy_from_linear(dst, src, x0, x1, y2, y3, src_pitch);
for (xo = x1; xo < x2; xo += 8)
wtile_block_copy_from_linear(dst, src, xo, xo + 8, y2, y3, src_pitch);
if (x2 != x3)
wtile_block_copy_from_linear(dst, src, x2, x3, y2, y3, src_pitch);
}
}
/**
* Copy texture data from X tile layout to linear.
*
@@ -961,6 +1229,78 @@ tile4_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
}
}
/**
* Copy texture data from W tile layout to linear.
*
* \copydoc tile_copy_fn
*/
static inline void
wtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
uint32_t y0, uint32_t y3,
char *dst, const char *src,
int32_t dst_pitch)
{
/*
* The layout is a series of block of 64B each.
* ___________________________________________
* |blk00|blk08|blk16|blk24|blk32|blk48|blk56|
* |blk01|blk09|blk17|blk25|blk33|blk49|blk57|
* |blk02|blk10|blk18|blk26|blk34|blk50|blk58|
* |blk03|blk11|blk19|blk27|blk35|blk51|blk59|
* |blk04|blk12|blk20|blk28|blk36|blk52|blk60|
* |blk05|blk13|blk21|blk29|blk37|blk53|blk61|
* |blk06|blk14|blk22|blk30|blk38|blk54|blk62|
* |blk07|blk15|blk23|blk31|blk39|blk55|blk63|
* -------------------------------------------
*/
/* Find intermediate Y offsets that are aligned to a 64B element (8 rows).
*/
uint32_t y1 = MIN2(y3, ALIGN_UP(y0, 8));
uint32_t y2 = MAX2(y1, ALIGN_DOWN(y3, 8));
uint32_t xo, yo;
/* If the y0 coordinate is not aligned to a block, do partial copies into
* blocks 0, 8, 16, 24, 32, 48 & 56.
*/
if (y0 != y1) {
if (x0 != x1)
wtile_block_copy_to_linear(dst, src, x0, x1, y0, y1, dst_pitch);
for (xo = x1; xo < x2; xo += 8)
wtile_block_copy_to_linear(dst, src, xo, xo + 8, y0, y1, dst_pitch);
if (x2 != x3)
wtile_block_copy_to_linear(dst, src, x2, x3, y0, y1, dst_pitch);
}
for (yo = y1; yo < y2; yo += 8) {
/* Do partial copies int blocks [1, 6] if x0 is not aligned to block. */
if (x0 != x1)
wtile_block_copy_to_linear(dst, src, x0, x1, yo, yo + 8, dst_pitch);
/* Full block copies on the inside. */
for (xo = x1; xo < x2; xo += 8)
wtile_block_full_copy_to_linear(dst, src, xo, yo, dst_pitch);
/* Do partial copies int blocks [57, 62] if y3 is not aligned to block.
*/
if (x2 != x3)
wtile_block_copy_to_linear(dst, src, x2, x3, yo, yo + 8, dst_pitch);
}
/* If the x3 coordinate is not aligned to a block, do partial copies into
* blocks [57,62].
*/
if (y2 != y3) {
if (x0 != x1)
wtile_block_copy_to_linear(dst, src, x0, x1, y2, y3, dst_pitch);
for (xo = x1; xo < x2; xo += 8) {
wtile_block_copy_to_linear(dst, src,
xo, MIN2(xo + 8, x3), y2, y3, dst_pitch);
}
if (x2 != x3)
wtile_block_copy_to_linear(dst, src, x2, x3, y2, y3, dst_pitch);
}
}
#if defined(INLINE_SSE41)
static ALWAYS_INLINE void *
_memcpy_streaming_load(void *dest, const void *src, size_t count)
@@ -1135,6 +1475,35 @@ linear_to_tile4_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
}
}
/**
* Copy texture data from linear to tile W layout, faster.
*
* Same as \ref linear_to_tilew but faster, because it passes constant
* parameters for common cases, allowing the compiler to inline code
* optimized for those cases.
*
* \copydoc tile_copy_fn
*/
static FLATTEN void
linear_to_wtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
uint32_t y0, uint32_t y1,
char *dst, const char *src,
int32_t src_pitch,
uint32_t swizzle_bit,
isl_memcpy_type copy_type)
{
assert(swizzle_bit == 0);
if (x0 == 0 && x3 == wtile_width && y0 == 0 && y1 == wtile_height) {
return linear_to_wtiled(0, 0,
wtile_width, wtile_width,
0, wtile_height,
dst, src, src_pitch);
} else {
return linear_to_wtiled(x0, x1, x2, x3, y0, y1,
dst, src, src_pitch);
}
}
/**
* Copy texture data from X tile layout to linear, faster.
*
@@ -1298,6 +1667,36 @@ tile4_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
}
}
/**
* Copy texture data from tileW layout to linear, faster.
*
* Same as \ref tilew_to_linear but faster, because it passes constant
* parameters for common cases, allowing the compiler to inline code
* optimized for those cases.
*
* \copydoc tile_copy_fn
*/
static FLATTEN void
wtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
uint32_t y0, uint32_t y1,
char *dst, const char *src,
int32_t dst_pitch,
uint32_t swizzle_bit,
isl_memcpy_type copy_type)
{
assert(swizzle_bit == 0);
if (x0 == 0 && x3 == wtile_width && y0 == 0 && y1 == wtile_height) {
return wtiled_to_linear(0, 0,
wtile_width, wtile_width,
0, wtile_height,
dst, src, dst_pitch);
} else {
return wtiled_to_linear(x0, x1, x2, x3, y0, y1,
dst, src, dst_pitch);
}
}
/**
* Copy from linear to tiled texture.
*
@@ -1340,6 +1739,19 @@ linear_to_tiled(uint32_t xt1, uint32_t xt2,
th = ytile_height;
xt_sub_range_alignment = ytile_span;
tile_copy = linear_to_tile4_faster;
} else if (tiling == ISL_TILING_W) {
tw = wtile_width;
th = wtile_height;
/* The copy function prioritizes W-Tile blocks. The width of a W-Tile
* block is four W-Tile spans.
*/
xt_sub_range_alignment = wtile_span * 4;
tile_copy = linear_to_wtiled_faster;
/* TileW is a special case with doubled physical tile width due to HW
* programming requirements (see isl_tiling_get_info() in
* src/intel/isl/isl.c)
*/
dst_pitch /= 2;
} else {
unreachable("unsupported tiling");
}
@@ -1437,6 +1849,19 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
th = ytile_height;
xt_sub_range_alignment = ytile_span;
tile_copy = tile4_to_linear_faster;
} else if (tiling == ISL_TILING_W) {
tw = wtile_width;
th = wtile_height;
/* The copy function prioritizes W-Tile blocks. The width of a W-Tile
* block is four W-Tile spans.
*/
xt_sub_range_alignment = wtile_span * 4;
tile_copy = wtiled_to_linear_faster;
/* TileW is a special case with doubled physical tile width due to HW
* programming requirements (see isl_tiling_get_info() in
* src/intel/isl/isl.c)
*/
src_pitch /= 2;
} else {
unreachable("unsupported tiling");
}
@@ -60,6 +60,11 @@ typedef uint8_t *(*swizzle_func_t)(const uint8_t *base_addr, uint32_t pitch, uin
std::make_tuple( 0, 16, 0, 32), \
std::make_tuple( 0, 16, 0, 64)
#define FULL_TILEW_COORDINATES \
std::make_tuple( 0, 64, 0, 64), \
std::make_tuple( 0, 128, 0, 64), \
std::make_tuple( 0, 128, 0,128)
struct tile_swizzle_ops {
enum isl_tiling tiling;
swizzle_func_t linear_to_tile_swizzle;
@@ -138,10 +143,42 @@ uint8_t *linear_to_tileX_swizzle(const uint8_t * base_addr, uint32_t pitch, uint
return (uint8_t *) (base_addr + tiled_off);
}
uint8_t *linear_to_tileW_swizzle(const uint8_t *base_addr, uint32_t pitch, uint32_t x_B, uint32_t y_px)
{
/* TileW is a special case with doubled physical tile width due to HW
* programming requirements (see isl_tiling_get_info() in
* src/intel/isl/isl.c)
*/
pitch /= 2;
const uint32_t cu = 6, cv = 6;
const uint32_t tile_id = (y_px >> cv) * (pitch >> cu) + (x_B >> cu);
/* The table below represents the mapping from coordinate (x_B, y_px) to
* byte offset in a 64x64px 1Bpp image:
*
* Bit ind : 11 10 9 8 7 6 5 4 3 2 1 0
* Tile-W : u5 u4 u3 v5 v4 v3 v2 u2 v1 u1 v0 u0
*/
uint32_t tiled_off;
tiled_off = tile_id * 4096 |
swizzle_bitops(x_B, 1, 0, 0) |
swizzle_bitops(y_px, 1, 0, 1) |
swizzle_bitops(x_B, 1, 1, 2) |
swizzle_bitops(y_px, 1, 1, 3) |
swizzle_bitops(x_B, 1, 2, 4) |
swizzle_bitops(y_px, 4, 2, 5) |
swizzle_bitops(x_B, 3, 3, 9);
return (uint8_t *) (base_addr + tiled_off);
}
struct tile_swizzle_ops swizzle_opers[] = {
{ISL_TILING_Y0, linear_to_tileY_swizzle},
{ISL_TILING_4, linear_to_tile4_swizzle},
{ISL_TILING_X, linear_to_tileX_swizzle},
{ISL_TILING_W, linear_to_tileW_swizzle},
};
class tileTFixture: public ::testing::Test {
@@ -194,6 +231,11 @@ class tileXFixture : public tileTFixture,
int, int>>
{};
class tileWFixture : public tileTFixture,
public ::testing::WithParamInterface<std::tuple<int, int,
int, int>>
{};
void tileTFixture::test_setup(TILE_CONV convert,
enum isl_tiling tiling_fmt,
enum isl_format format,
@@ -401,6 +443,24 @@ TEST_P(tileXFixture, tiletolin)
run_test(x1, x2, y1, y2);
}
TEST_P(tileWFixture, lintotile)
{
auto [x1, x2, y1, y2] = GetParam();
test_setup(LIN_TO_TILE, ISL_TILING_W, TILEW_IMAGE_FORMAT, x2, y2);
if (print_results)
printf("Coordinates: x1=%d x2=%d y1=%d y2=%d \n", x1, x2, y1, y2);
run_test(x1, x2, y1, y2);
}
TEST_P(tileWFixture, tiletolin)
{
auto [x1, x2, y1, y2] = GetParam();
test_setup(TILE_TO_LIN, ISL_TILING_W, TILEW_IMAGE_FORMAT, x2, y2);
if (print_results)
printf("Coordinates: x1=%d x2=%d y1=%d y2=%d \n", x1, x2, y1, y2);
run_test(x1, x2, y1, y2);
}
INSTANTIATE_TEST_SUITE_P(tileY, tileYFixture, testing::Values(TILE_COORDINATES,
FULL_TILEY_COORDINATES));
@@ -408,3 +468,5 @@ INSTANTIATE_TEST_SUITE_P(tile4, tile4Fixture, testing::Values(TILE_COORDINATES,
FULL_TILEY_COORDINATES));
INSTANTIATE_TEST_SUITE_P(tileX, tileXFixture, testing::Values(TILE_COORDINATES,
FULL_TILEX_COORDINATES));
INSTANTIATE_TEST_SUITE_P(tileW, tileWFixture, testing::Values(TILE_COORDINATES,
FULL_TILEW_COORDINATES));