panfrost: also consider z/s for tile-size
We also have a budget for the tile size for depth-buffers. It's currently hard to trigger issues with this than for color-buffers, but this becomes important when we support larger MSAA counts. We also need to take a bit of care for stencil-only attachments, because they also count against a limit here. We really only care about the sample counts here, because the stencil buffer budget is always a quarter of the depth-buffer budget, and always uses a single byte per sample. Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33925>
This commit is contained in:
committed by
Marge Bot
parent
9ec6197a0b
commit
77d38bb7ca
@@ -89,6 +89,7 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev)
|
||||
panfrost_query_core_count(&dev->kmod.props, &dev->core_id_range);
|
||||
dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(&dev->kmod.props);
|
||||
dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev->model);
|
||||
dev->optimal_z_tib_size = panfrost_query_optimal_z_tib_size(dev->model);
|
||||
dev->compressed_formats =
|
||||
panfrost_query_compressed_formats(&dev->kmod.props);
|
||||
dev->tiler_features = panfrost_query_tiler_features(&dev->kmod.props);
|
||||
|
||||
@@ -113,6 +113,7 @@ struct panfrost_device {
|
||||
|
||||
/* Maximum tilebuffer size in bytes for optimal performance. */
|
||||
unsigned optimal_tib_size;
|
||||
unsigned optimal_z_tib_size;
|
||||
|
||||
unsigned thread_tls_alloc;
|
||||
struct panfrost_tiler_features tiler_features;
|
||||
|
||||
@@ -478,6 +478,7 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
|
||||
memset(s, 0, sizeof(*s));
|
||||
|
||||
fb->tile_buf_budget = dev->optimal_tib_size;
|
||||
fb->z_tile_buf_budget = dev->optimal_z_tib_size;
|
||||
fb->width = batch->key.width;
|
||||
fb->height = batch->key.height;
|
||||
fb->extent.minx = batch->minx;
|
||||
|
||||
@@ -376,6 +376,26 @@ pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
|
||||
return sum;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
pan_zsbuf_bytes_per_pixel(const struct pan_fb_info *fb)
|
||||
{
|
||||
unsigned samples = fb->nr_samples;
|
||||
|
||||
const struct pan_image_view *zs_view = fb->zs.view.zs;
|
||||
if (zs_view)
|
||||
samples = zs_view->nr_samples;
|
||||
|
||||
const struct pan_image_view *s_view = fb->zs.view.s;
|
||||
if (s_view)
|
||||
samples = MAX2(samples, s_view->nr_samples);
|
||||
|
||||
/* Depth is always stored in a 32-bit float. Stencil requires depth to
|
||||
* be allocated, but doesn't have it's own budget; it's tied to the
|
||||
* depth buffer.
|
||||
*/
|
||||
return sizeof(float) * samples;
|
||||
}
|
||||
|
||||
/*
|
||||
* Select the largest tile size that fits within the tilebuffer budget.
|
||||
* Formally, maximize (pixels per tile) such that it is a power of two and
|
||||
@@ -397,6 +417,16 @@ GENX(pan_select_tile_size)(struct pan_fb_info *fb)
|
||||
bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
|
||||
fb->tile_size = fb->tile_buf_budget >> util_logbase2_ceil(bytes_per_pixel);
|
||||
|
||||
unsigned zs_bytes_per_pixel = pan_zsbuf_bytes_per_pixel(fb);
|
||||
if (zs_bytes_per_pixel > 0) {
|
||||
assert(util_is_power_of_two_nonzero(fb->z_tile_buf_budget));
|
||||
assert(fb->z_tile_buf_budget >= 1024);
|
||||
|
||||
fb->tile_size =
|
||||
MIN2(fb->tile_size,
|
||||
fb->z_tile_buf_budget >> util_logbase2_ceil(zs_bytes_per_pixel));
|
||||
}
|
||||
|
||||
/* Clamp tile size to hardware limits */
|
||||
fb->tile_size =
|
||||
MIN2(fb->tile_size, panfrost_max_effective_tile_size(PAN_ARCH));
|
||||
|
||||
@@ -140,6 +140,7 @@ struct pan_fb_info {
|
||||
|
||||
/* Optimal tile buffer size. */
|
||||
unsigned tile_buf_budget;
|
||||
unsigned z_tile_buf_budget;
|
||||
unsigned tile_size;
|
||||
unsigned cbuf_allocation;
|
||||
|
||||
|
||||
@@ -37,43 +37,45 @@
|
||||
#define HAS_ANISO (0)
|
||||
|
||||
#define MODEL(gpu_id_, gpu_variant_, shortname, counters_, \
|
||||
min_rev_anisotropic_, tib_size_, quirks_) \
|
||||
min_rev_anisotropic_, tib_size_, tib_z_size_, quirks_) \
|
||||
{ \
|
||||
.gpu_id = gpu_id_, .gpu_variant = gpu_variant_, \
|
||||
.name = "Mali-" shortname, \
|
||||
.performance_counters = counters_, \
|
||||
.min_rev_anisotropic = min_rev_anisotropic_, \
|
||||
.tilebuffer_size = tib_size_, .quirks = quirks_, \
|
||||
.tilebuffer_size = tib_size_, \
|
||||
.tilebuffer_z_size = tib_z_size_, \
|
||||
.quirks = quirks_, \
|
||||
}
|
||||
|
||||
/* Table of supported Mali GPUs */
|
||||
/* clang-format off */
|
||||
const struct panfrost_model panfrost_model_list[] = {
|
||||
MODEL(0x600, 0, "T600", "T60x", NO_ANISO, 8192, {}),
|
||||
MODEL(0x620, 0, "T620", "T62x", NO_ANISO, 8192, {}),
|
||||
MODEL(0x720, 0, "T720", "T72x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
|
||||
MODEL(0x750, 0, "T760", "T76x", NO_ANISO, 8192, {}),
|
||||
MODEL(0x820, 0, "T820", "T82x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
|
||||
MODEL(0x830, 0, "T830", "T83x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }),
|
||||
MODEL(0x860, 0, "T860", "T86x", NO_ANISO, 8192, {}),
|
||||
MODEL(0x880, 0, "T880", "T88x", NO_ANISO, 8192, {}),
|
||||
MODEL(0x600, 0, "T600", "T60x", NO_ANISO, 8192, 8192, {}),
|
||||
MODEL(0x620, 0, "T620", "T62x", NO_ANISO, 8192, 8192, {}),
|
||||
MODEL(0x720, 0, "T720", "T72x", NO_ANISO, 8192, 8192, { .no_hierarchical_tiling = true }),
|
||||
MODEL(0x750, 0, "T760", "T76x", NO_ANISO, 8192, 8192, {}),
|
||||
MODEL(0x820, 0, "T820", "T82x", NO_ANISO, 8192, 8192, { .no_hierarchical_tiling = true }),
|
||||
MODEL(0x830, 0, "T830", "T83x", NO_ANISO, 8192, 8192, { .no_hierarchical_tiling = true }),
|
||||
MODEL(0x860, 0, "T860", "T86x", NO_ANISO, 8192, 8192, {}),
|
||||
MODEL(0x880, 0, "T880", "T88x", NO_ANISO, 8192, 8192, {}),
|
||||
|
||||
MODEL(0x6000, 0, "G71", "TMIx", NO_ANISO, 8192, {}),
|
||||
MODEL(0x6221, 0, "G72", "THEx", 0x0030 /* r0p3 */, 16384, {}),
|
||||
MODEL(0x7090, 0, "G51", "TSIx", 0x1010 /* r1p1 */, 8192, {}),
|
||||
MODEL(0x7093, 0, "G31", "TDVx", HAS_ANISO, 8192, {}),
|
||||
MODEL(0x7211, 0, "G76", "TNOx", HAS_ANISO, 16384, {}),
|
||||
MODEL(0x7212, 0, "G52", "TGOx", HAS_ANISO, 16384, {}),
|
||||
MODEL(0x7402, 0, "G52 r1", "TGOx", HAS_ANISO, 8192, {}),
|
||||
MODEL(0x9091, 0, "G57", "TNAx", HAS_ANISO, 16384, {}),
|
||||
MODEL(0x9093, 0, "G57", "TNAx", HAS_ANISO, 16384, {}),
|
||||
MODEL(0x6000, 0, "G71", "TMIx", NO_ANISO, 8192, 8192, {}),
|
||||
MODEL(0x6221, 0, "G72", "THEx", 0x0030 /* r0p3 */, 16384, 8192, {}),
|
||||
MODEL(0x7090, 0, "G51", "TSIx", 0x1010 /* r1p1 */, 8192, 8192, {}),
|
||||
MODEL(0x7093, 0, "G31", "TDVx", HAS_ANISO, 8192, 8192, {}),
|
||||
MODEL(0x7211, 0, "G76", "TNOx", HAS_ANISO, 16384, 8192, {}),
|
||||
MODEL(0x7212, 0, "G52", "TGOx", HAS_ANISO, 16384, 8192, {}),
|
||||
MODEL(0x7402, 0, "G52 r1", "TGOx", HAS_ANISO, 8192, 8192, {}),
|
||||
MODEL(0x9091, 0, "G57", "TNAx", HAS_ANISO, 16384, 8192, {}),
|
||||
MODEL(0x9093, 0, "G57", "TNAx", HAS_ANISO, 16384, 8192, {}),
|
||||
|
||||
MODEL(0xa867, 0, "G610", "TVIx", HAS_ANISO, 32768, {}),
|
||||
MODEL(0xac74, 0, "G310", "TVAx", HAS_ANISO, 16384, {}),
|
||||
MODEL(0xac74, 1, "G310", "TVAx", HAS_ANISO, 16384, {}),
|
||||
MODEL(0xac74, 2, "G310", "TVAx", HAS_ANISO, 16384, {}),
|
||||
MODEL(0xac74, 3, "G310", "TVAx", HAS_ANISO, 32768, {}),
|
||||
MODEL(0xac74, 4, "G310", "TVAx", HAS_ANISO, 32768, {}),
|
||||
MODEL(0xa867, 0, "G610", "TVIx", HAS_ANISO, 32768, 16384, {}),
|
||||
MODEL(0xac74, 0, "G310", "TVAx", HAS_ANISO, 16384, 8192, {}),
|
||||
MODEL(0xac74, 1, "G310", "TVAx", HAS_ANISO, 16384, 8192, {}),
|
||||
MODEL(0xac74, 2, "G310", "TVAx", HAS_ANISO, 16384, 8192, {}),
|
||||
MODEL(0xac74, 3, "G310", "TVAx", HAS_ANISO, 32768, 16384, {}),
|
||||
MODEL(0xac74, 4, "G310", "TVAx", HAS_ANISO, 32768, 16384, {}),
|
||||
};
|
||||
/* clang-format on */
|
||||
|
||||
@@ -207,6 +209,18 @@ panfrost_query_optimal_tib_size(const struct panfrost_model *model)
|
||||
return model->tilebuffer_size / 2;
|
||||
}
|
||||
|
||||
unsigned
|
||||
panfrost_query_optimal_z_tib_size(const struct panfrost_model *model)
|
||||
{
|
||||
/* Preconditions ensure the returned value is a multiple of 1 KiB, the
|
||||
* granularity of the colour buffer allocation field.
|
||||
*/
|
||||
assert(model->tilebuffer_z_size >= 1024);
|
||||
assert(util_is_power_of_two_nonzero(model->tilebuffer_z_size));
|
||||
|
||||
return model->tilebuffer_z_size / 2;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev, uint64_t va)
|
||||
{
|
||||
|
||||
@@ -66,6 +66,9 @@ struct panfrost_model {
|
||||
/* Default tilebuffer size in bytes for the model. */
|
||||
unsigned tilebuffer_size;
|
||||
|
||||
/* Default tilebuffer depth size in bytes for the model. */
|
||||
unsigned tilebuffer_z_size;
|
||||
|
||||
struct {
|
||||
/* The GPU lacks the capability for hierarchical tiling, without
|
||||
* an "Advanced Tiling Unit", instead requiring a single bin
|
||||
@@ -98,6 +101,8 @@ bool panfrost_query_afrc(const struct pan_kmod_dev_props *props);
|
||||
|
||||
unsigned panfrost_query_optimal_tib_size(const struct panfrost_model *model);
|
||||
|
||||
unsigned panfrost_query_optimal_z_tib_size(const struct panfrost_model *model);
|
||||
|
||||
uint64_t panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev,
|
||||
uint64_t va);
|
||||
|
||||
|
||||
@@ -2145,6 +2145,7 @@ panvk_per_arch(cmd_inherit_render_state)(
|
||||
0;
|
||||
*fbinfo = (struct pan_fb_info){
|
||||
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
|
||||
.z_tile_buf_budget = panfrost_query_optimal_z_tib_size(phys_dev->model),
|
||||
.nr_samples = inheritance_info->rasterizationSamples,
|
||||
.rt_count = inheritance_info->colorAttachmentCount,
|
||||
};
|
||||
|
||||
@@ -233,6 +233,7 @@ panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
|
||||
cmdbuf->state.gfx.render.view_mask = pRenderingInfo->viewMask;
|
||||
*fbinfo = (struct pan_fb_info){
|
||||
.tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
|
||||
.z_tile_buf_budget = panfrost_query_optimal_z_tib_size(phys_dev->model),
|
||||
.nr_samples = 1,
|
||||
.rt_count = pRenderingInfo->colorAttachmentCount,
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user