diff --git a/src/panfrost/lib/pan_layout.c b/src/panfrost/lib/pan_layout.c index 7344f05d4b8..b1af38a2f89 100644 --- a/src/panfrost/lib/pan_layout.c +++ b/src/panfrost/lib/pan_layout.c @@ -106,36 +106,6 @@ pan_image_renderblock_size_el(uint64_t modifier, enum pipe_format format, }; } -static unsigned -linear_or_tiled_row_align_req(unsigned arch, enum pipe_format format, - uint64_t modifier) -{ - assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || - modifier == DRM_FORMAT_MOD_LINEAR); - - /* Prior to v7 we assume a cacheline alignment, though this could be relaxed - * on some formats if we have to, like we do on v7+. */ - if (arch < 7) - return 64; - - switch (format) { - /* For v7+, NV12/NV21/I420 have a looser alignment requirement of 16 bytes */ - case PIPE_FORMAT_R8_G8B8_420_UNORM: - case PIPE_FORMAT_G8_B8R8_420_UNORM: - case PIPE_FORMAT_R8_G8_B8_420_UNORM: - case PIPE_FORMAT_R8_B8_G8_420_UNORM: - case PIPE_FORMAT_R8_G8B8_422_UNORM: - case PIPE_FORMAT_R8_B8G8_422_UNORM: - return 16; - /* the 10 bit formats have even looser alignment */ - case PIPE_FORMAT_R10_G10B10_420_UNORM: - case PIPE_FORMAT_R10_G10B10_422_UNORM: - return 1; - default: - return 64; - } -} - /* * Computes sizes for checksumming, which is 8 bytes per 16x16 tile. * Checksumming is believed to be a CRC variant (CRC64 based on the size?). @@ -318,7 +288,7 @@ wsi_row_pitch_to_row_stride( get_plane_blocksize(format, plane_idx); row_align_mask = - linear_or_tiled_row_align_req(arch, format, modifier) - 1; + pan_linear_or_tiled_row_align_req(arch, format, plane_idx) - 1; offset_align_mask = row_align_mask; *row_stride_B = wsi_row_pitch_B * block_size_el.height; width_px = (*row_stride_B / tile_size_B) * @@ -381,12 +351,21 @@ pan_image_layout_init( const bool afbc = drm_is_afbc(props->modifier); const bool afrc = drm_is_afrc(props->modifier); - const int align_req_B = - afbc ? pan_afbc_header_row_stride_align(arch, props->format, - props->modifier) - : (afrc ? pan_afrc_buffer_alignment_from_modifier(props->modifier) - : linear_or_tiled_row_align_req(arch, props->format, - props->modifier)); + int align_req_B; + + if (afbc) { + align_req_B = + pan_afbc_header_row_stride_align(arch, props->format, props->modifier); + } else if (afrc) { + align_req_B = pan_afrc_buffer_alignment_from_modifier(props->modifier); + } else { + /* This is the alignment for non-explicit layout, and we want things + * aligned on at least a cacheline for performance reasons in that case. + */ + align_req_B = + pan_linear_or_tiled_row_align_req(arch, props->format, plane_idx); + align_req_B = MAX2(align_req_B, 64); + } /* Mandate alignment */ unsigned wsi_row_stride_B = 0; diff --git a/src/panfrost/lib/pan_layout.h b/src/panfrost/lib/pan_layout.h index 3288da9be40..d366366dbae 100644 --- a/src/panfrost/lib/pan_layout.h +++ b/src/panfrost/lib/pan_layout.h @@ -187,6 +187,68 @@ unsigned pan_image_get_wsi_row_pitch(const struct pan_image_props *props, const struct pan_image_layout *layout, unsigned level); +static inline unsigned +pan_linear_or_tiled_row_align_req(unsigned arch, enum pipe_format format, + unsigned plane_idx) +{ + if (arch < 7) { + unsigned nplanes = util_format_get_num_planes(format); + + /* If this is a planar format, align on the plane blocksize. */ + if (nplanes > 1) { + enum pipe_format plane_format = + util_format_get_plane_format(format, plane_idx); + + return util_next_power_of_two(util_format_get_blocksize(plane_format)); + } + + /* Align on blocksize if the format is compressed. */ + if (util_format_is_compressed(format)) + return util_next_power_of_two(util_format_get_blocksize(format)); + + const struct util_format_description *fdesc = + util_format_description(format); + unsigned comp_sz_bits = 0; + for (unsigned i = 0; i < ARRAY_SIZE(fdesc->channel); i++) { + if (!fdesc->channel[0].size) + continue; + + /* Align on a pixel if any component is not 8-bit aligned or not a + * power of two. */ + if (fdesc->channel[0].size % 8 != 0 || + !util_is_power_of_two_nonzero(fdesc->channel[0].size)) + return util_next_power_of_two(util_format_get_blocksize(format)); + + /* Align on a pixel if not all components have the same size. */ + if (comp_sz_bits != 0 && comp_sz_bits != fdesc->channel[0].size) + return util_next_power_of_two(util_format_get_blocksize(format)); + + comp_sz_bits = fdesc->channel[0].size; + } + + /* If all components are the same size, 8-bit aligned and a power of two, + * align on a component. */ + return comp_sz_bits / 8; + } + + switch (format) { + /* For v7+, NV12/NV21/I420 have a looser alignment requirement of 16 bytes */ + case PIPE_FORMAT_R8_G8B8_420_UNORM: + case PIPE_FORMAT_G8_B8R8_420_UNORM: + case PIPE_FORMAT_R8_G8_B8_420_UNORM: + case PIPE_FORMAT_R8_B8_G8_420_UNORM: + case PIPE_FORMAT_R8_G8B8_422_UNORM: + case PIPE_FORMAT_R8_B8G8_422_UNORM: + return 16; + /* the 10 bit formats have even looser alignment */ + case PIPE_FORMAT_R10_G10B10_420_UNORM: + case PIPE_FORMAT_R10_G10B10_422_UNORM: + return 1; + default: + return 64; + } +} + #ifdef __cplusplus } /* extern C */ #endif diff --git a/src/panfrost/lib/tests/test-layout.cpp b/src/panfrost/lib/tests/test-layout.cpp index 85f06c6cd8d..7f74462dc85 100644 --- a/src/panfrost/lib/tests/test-layout.cpp +++ b/src/panfrost/lib/tests/test-layout.cpp @@ -29,6 +29,37 @@ #include +TEST(Align, UTiledLinear) +{ + struct { + unsigned arch; + enum pipe_format format; + unsigned plane_idx; + unsigned alignment; + } cases[] = { + { 6, PIPE_FORMAT_ETC2_RGB8, 0, 8 }, + { 6, PIPE_FORMAT_R32G32B32_FLOAT, 0, 4 }, + { 6, PIPE_FORMAT_R8G8B8A8_UNORM, 0, 1 }, + { 6, PIPE_FORMAT_R5G6B5_UNORM, 0, 2 }, + { 6, PIPE_FORMAT_R8_G8B8_420_UNORM, 0, 1 }, + { 6, PIPE_FORMAT_R8_G8B8_420_UNORM, 1, 2 }, + { 7, PIPE_FORMAT_ETC2_RGB8, 0, 64 }, + { 7, PIPE_FORMAT_R32G32B32_FLOAT, 0, 64 }, + { 7, PIPE_FORMAT_R8G8B8A8_UNORM, 0, 64 }, + { 7, PIPE_FORMAT_R5G6B5_UNORM, 0, 64 }, + { 7, PIPE_FORMAT_R8_G8B8_420_UNORM, 0, 16 }, + { 7, PIPE_FORMAT_R8_G8B8_420_UNORM, 1, 16 }, + { 7, PIPE_FORMAT_R10_G10B10_420_UNORM, 0, 1 }, + { 7, PIPE_FORMAT_R10_G10B10_420_UNORM, 1, 1 }, + }; + for (unsigned i = 0; i < ARRAY_SIZE(cases); ++i) { + unsigned align = pan_linear_or_tiled_row_align_req( + cases[i].arch, cases[i].format, cases[i].plane_idx); + + EXPECT_EQ(align, cases[i].alignment); + } +} + TEST(BlockSize, Linear) { enum pipe_format format[] = {PIPE_FORMAT_R32G32B32_FLOAT, @@ -588,26 +619,7 @@ offset_align_for_mod(unsigned arch, const struct pan_image_props *iprops, assert(modifier == DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED || modifier == DRM_FORMAT_MOD_LINEAR); - if (arch < 7) - return 64; - - switch (format) { - /* For v7+, NV12/NV21/I420 have a looser alignment requirement of 16 bytes - */ - case PIPE_FORMAT_R8_G8B8_420_UNORM: - case PIPE_FORMAT_G8_B8R8_420_UNORM: - case PIPE_FORMAT_R8_G8_B8_420_UNORM: - case PIPE_FORMAT_R8_B8_G8_420_UNORM: - case PIPE_FORMAT_R8_G8B8_422_UNORM: - case PIPE_FORMAT_R8_B8G8_422_UNORM: - return 16; - /* the 10 bit formats have even looser alignment */ - case PIPE_FORMAT_R10_G10B10_420_UNORM: - case PIPE_FORMAT_R10_G10B10_422_UNORM: - return 1; - default: - return 64; - } + return pan_linear_or_tiled_row_align_req(arch, format, plane_idx); } }