panfrost: also consider z/s for tile-size

We also have a budget for the tile size for depth-buffers. It's
currently hard to trigger issues with this than for color-buffers,
but this becomes important when we support larger MSAA counts.

We also need to take a bit of care for stencil-only attachments, because
they also count against a limit here. We really only care about the
sample counts here, because the stencil buffer budget is always a
quarter of the depth-buffer budget, and always uses a single byte per
sample.

Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33925>
This commit is contained in:
Boris Brezillon
2025-03-07 10:22:14 +01:00
committed by Marge Bot
parent 9ec6197a0b
commit 77d38bb7ca
9 changed files with 80 additions and 25 deletions
@@ -89,6 +89,7 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
panfrost_query_core_count(&dev->kmod.props, &dev->core_id_range);
dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(&dev->kmod.props);
dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev->model);
dev->optimal_z_tib_size = panfrost_query_optimal_z_tib_size(dev->model);
dev->compressed_formats =
panfrost_query_compressed_formats(&dev->kmod.props);
dev->tiler_features = panfrost_query_tiler_features(&dev->kmod.props);
@@ -113,6 +113,7 @@ struct panfrost_device {
/* Maximum tilebuffer size in bytes for optimal performance. */
unsigned optimal_tib_size;
unsigned optimal_z_tib_size;
unsigned thread_tls_alloc;
struct panfrost_tiler_features tiler_features;
+1
View File
@@ -478,6 +478,7 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
memset(s, 0, sizeof(*s));
fb->tile_buf_budget = dev->optimal_tib_size;
fb->z_tile_buf_budget = dev->optimal_z_tib_size;
fb->width = batch->key.width;
fb->height = batch->key.height;
fb->extent.minx = batch->minx;
+30
View File
@@ -376,6 +376,26 @@ pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
return sum;
}
static unsigned
pan_zsbuf_bytes_per_pixel(const struct pan_fb_info *fb)
{
unsigned samples = fb->nr_samples;
const struct pan_image_view *zs_view = fb->zs.view.zs;
if (zs_view)
samples = zs_view->nr_samples;
const struct pan_image_view *s_view = fb->zs.view.s;
if (s_view)
samples = MAX2(samples, s_view->nr_samples);
/* Depth is always stored in a 32-bit float. Stencil requires depth to
* be allocated, but doesn't have it's own budget; it's tied to the
* depth buffer.
*/
return sizeof(float) * samples;
}
/*
* Select the largest tile size that fits within the tilebuffer budget.
* Formally, maximize (pixels per tile) such that it is a power of two and
@@ -397,6 +417,16 @@ GENX(pan_select_tile_size)(struct pan_fb_info *fb)
bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
fb->tile_size = fb->tile_buf_budget >> util_logbase2_ceil(bytes_per_pixel);
unsigned zs_bytes_per_pixel = pan_zsbuf_bytes_per_pixel(fb);
if (zs_bytes_per_pixel > 0) {
assert(util_is_power_of_two_nonzero(fb->z_tile_buf_budget));
assert(fb->z_tile_buf_budget >= 1024);
fb->tile_size =
MIN2(fb->tile_size,
fb->z_tile_buf_budget >> util_logbase2_ceil(zs_bytes_per_pixel));
}
/* Clamp tile size to hardware limits */
fb->tile_size =
MIN2(fb->tile_size, panfrost_max_effective_tile_size(PAN_ARCH));
+1
View File
@@ -140,6 +140,7 @@ struct pan_fb_info {
/* Optimal tile buffer size. */
unsigned tile_buf_budget;
unsigned z_tile_buf_budget;
unsigned tile_size;
unsigned cbuf_allocation;
+39 -25
View File
@@ -37,43 +37,45 @@
#define HAS_ANISO (0)
#define MODEL(gpu_id_, gpu_variant_, shortname, counters_, \
min_rev_anisotropic_, tib_size_, quirks_) \
min_rev_anisotropic_, tib_size_, tib_z_size_, quirks_) \
{ \
.gpu_id = gpu_id_, .gpu_variant = gpu_variant_, \
.name = "Mali-" shortname, \
.performance_counters = counters_, \
.min_rev_anisotropic = min_rev_anisotropic_, \
.tilebuffer_size = tib_size_, .quirks = quirks_, \
.tilebuffer_size = tib_size_, \
.tilebuffer_z_size = tib_z_size_, \
.quirks = quirks_, \
}
/* Table of supported Mali GPUs */
/* clang-format off */
const struct panfrost_model panfrost_model_list[] = {
MODEL(0x600, 0, "T600", "T60x", NO_ANISO, 8192, {}),
MODEL(0x620, 0, "T620", "T62x", NO_ANISO, 8192, {}),
MODEL(0x720, 0, "T720", "T72x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
MODEL(0x750, 0, "T760", "T76x", NO_ANISO, 8192, {}),
MODEL(0x820, 0, "T820", "T82x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
MODEL(0x830, 0, "T830", "T83x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
MODEL(0x860, 0, "T860", "T86x", NO_ANISO, 8192, {}),
MODEL(0x880, 0, "T880", "T88x", NO_ANISO, 8192, {}),
MODEL(0x600, 0, "T600", "T60x", NO_ANISO, 8192, 8192, {}),
MODEL(0x620, 0, "T620", "T62x", NO_ANISO, 8192, 8192, {}),
MODEL(0x720, 0, "T720", "T72x", NO_ANISO, 8192, 8192, { .no_hierarchical_tiling = true }),
MODEL(0x750, 0, "T760", "T76x", NO_ANISO, 8192, 8192, {}),
MODEL(0x820, 0, "T820", "T82x", NO_ANISO, 8192, 8192, { .no_hierarchical_tiling = true }),
MODEL(0x830, 0, "T830", "T83x", NO_ANISO, 8192, 8192, { .no_hierarchical_tiling = true }),
MODEL(0x860, 0, "T860", "T86x", NO_ANISO, 8192, 8192, {}),
MODEL(0x880, 0, "T880", "T88x", NO_ANISO, 8192, 8192, {}),
MODEL(0x6000, 0, "G71", "TMIx", NO_ANISO, 8192, {}),
MODEL(0x6221, 0, "G72", "THEx", 0x0030 /* r0p3 */, 16384, {}),
MODEL(0x7090, 0, "G51", "TSIx", 0x1010 /* r1p1 */, 8192, {}),
MODEL(0x7093, 0, "G31", "TDVx", HAS_ANISO, 8192, {}),
MODEL(0x7211, 0, "G76", "TNOx", HAS_ANISO, 16384, {}),
MODEL(0x7212, 0, "G52", "TGOx", HAS_ANISO, 16384, {}),
MODEL(0x7402, 0, "G52 r1", "TGOx", HAS_ANISO, 8192, {}),
MODEL(0x9091, 0, "G57", "TNAx", HAS_ANISO, 16384, {}),
MODEL(0x9093, 0, "G57", "TNAx", HAS_ANISO, 16384, {}),
MODEL(0x6000, 0, "G71", "TMIx", NO_ANISO, 8192, 8192, {}),
MODEL(0x6221, 0, "G72", "THEx", 0x0030 /* r0p3 */, 16384, 8192, {}),
MODEL(0x7090, 0, "G51", "TSIx", 0x1010 /* r1p1 */, 8192, 8192, {}),
MODEL(0x7093, 0, "G31", "TDVx", HAS_ANISO, 8192, 8192, {}),
MODEL(0x7211, 0, "G76", "TNOx", HAS_ANISO, 16384, 8192, {}),
MODEL(0x7212, 0, "G52", "TGOx", HAS_ANISO, 16384, 8192, {}),
MODEL(0x7402, 0, "G52 r1", "TGOx", HAS_ANISO, 8192, 8192, {}),
MODEL(0x9091, 0, "G57", "TNAx", HAS_ANISO, 16384, 8192, {}),
MODEL(0x9093, 0, "G57", "TNAx", HAS_ANISO, 16384, 8192, {}),
MODEL(0xa867, 0, "G610", "TVIx", HAS_ANISO, 32768, {}),
MODEL(0xac74, 0, "G310", "TVAx", HAS_ANISO, 16384, {}),
MODEL(0xac74, 1, "G310", "TVAx", HAS_ANISO, 16384, {}),
MODEL(0xac74, 2, "G310", "TVAx", HAS_ANISO, 16384, {}),
MODEL(0xac74, 3, "G310", "TVAx", HAS_ANISO, 32768, {}),
MODEL(0xac74, 4, "G310", "TVAx", HAS_ANISO, 32768, {}),
MODEL(0xa867, 0, "G610", "TVIx", HAS_ANISO, 32768, 16384, {}),
MODEL(0xac74, 0, "G310", "TVAx", HAS_ANISO, 16384, 8192, {}),
MODEL(0xac74, 1, "G310", "TVAx", HAS_ANISO, 16384, 8192, {}),
MODEL(0xac74, 2, "G310", "TVAx", HAS_ANISO, 16384, 8192, {}),
MODEL(0xac74, 3, "G310", "TVAx", HAS_ANISO, 32768, 16384, {}),
MODEL(0xac74, 4, "G310", "TVAx", HAS_ANISO, 32768, 16384, {}),
};
/* clang-format on */
@@ -207,6 +209,18 @@ panfrost_query_optimal_tib_size(const struct panfrost_model *model)
return model->tilebuffer_size / 2;
}
unsigned
panfrost_query_optimal_z_tib_size(const struct panfrost_model *model)
{
/* Preconditions ensure the returned value is a multiple of 1 KiB, the
* granularity of the colour buffer allocation field.
*/
assert(model->tilebuffer_z_size >= 1024);
assert(util_is_power_of_two_nonzero(model->tilebuffer_z_size));
return model->tilebuffer_z_size / 2;
}
uint64_t
panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev, uint64_t va)
{
+5
View File
@@ -66,6 +66,9 @@ struct panfrost_model {
/* Default tilebuffer size in bytes for the model. */
unsigned tilebuffer_size;
/* Default tilebuffer depth size in bytes for the model. */
unsigned tilebuffer_z_size;
struct {
/* The GPU lacks the capability for hierarchical tiling, without
* an "Advanced Tiling Unit", instead requiring a single bin
@@ -98,6 +101,8 @@ bool panfrost_query_afrc(const struct pan_kmod_dev_props *props);
unsigned panfrost_query_optimal_tib_size(const struct panfrost_model *model);
unsigned panfrost_query_optimal_z_tib_size(const struct panfrost_model *model);
uint64_t panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev,
uint64_t va);
@@ -2145,6 +2145,7 @@ panvk_per_arch(cmd_inherit_render_state)(
0;
*fbinfo = (struct pan_fb_info){
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
.z_tile_buf_budget = panfrost_query_optimal_z_tib_size(phys_dev->model),
.nr_samples = inheritance_info->rasterizationSamples,
.rt_count = inheritance_info->colorAttachmentCount,
};
+1
View File
@@ -233,6 +233,7 @@ panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
cmdbuf->state.gfx.render.view_mask = pRenderingInfo->viewMask;
*fbinfo = (struct pan_fb_info){
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
.z_tile_buf_budget = panfrost_query_optimal_z_tib_size(phys_dev->model),
.nr_samples = 1,
.rt_count = pRenderingInfo->colorAttachmentCount,
};