diff --git a/src/gallium/drivers/panfrost/pan_device.c b/src/gallium/drivers/panfrost/pan_device.c index 7bd310db42a..a2bf204eff1 100644 --- a/src/gallium/drivers/panfrost/pan_device.c +++ b/src/gallium/drivers/panfrost/pan_device.c @@ -89,6 +89,7 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) panfrost_query_core_count(&dev->kmod.props, &dev->core_id_range); dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(&dev->kmod.props); dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev->model); + dev->optimal_z_tib_size = panfrost_query_optimal_z_tib_size(dev->model); dev->compressed_formats = panfrost_query_compressed_formats(&dev->kmod.props); dev->tiler_features = panfrost_query_tiler_features(&dev->kmod.props); diff --git a/src/gallium/drivers/panfrost/pan_device.h b/src/gallium/drivers/panfrost/pan_device.h index b7a16ee93b5..c08afb01ffc 100644 --- a/src/gallium/drivers/panfrost/pan_device.h +++ b/src/gallium/drivers/panfrost/pan_device.h @@ -113,6 +113,7 @@ struct panfrost_device { /* Maximum tilebuffer size in bytes for optimal performance. */ unsigned optimal_tib_size; + unsigned optimal_z_tib_size; unsigned thread_tls_alloc; struct panfrost_tiler_features tiler_features; diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 9437f562fc4..9ba80d3def2 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -478,6 +478,7 @@ panfrost_batch_to_fb_info(const struct panfrost_batch *batch, memset(s, 0, sizeof(*s)); fb->tile_buf_budget = dev->optimal_tib_size; + fb->z_tile_buf_budget = dev->optimal_z_tib_size; fb->width = batch->key.width; fb->height = batch->key.height; fb->extent.minx = batch->minx; diff --git a/src/panfrost/lib/pan_desc.c b/src/panfrost/lib/pan_desc.c index 53f9a6459da..4d605f249c6 100644 --- a/src/panfrost/lib/pan_desc.c +++ b/src/panfrost/lib/pan_desc.c @@ -376,6 +376,26 @@ pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb) return sum; } +static unsigned +pan_zsbuf_bytes_per_pixel(const struct pan_fb_info *fb) +{ + unsigned samples = fb->nr_samples; + + const struct pan_image_view *zs_view = fb->zs.view.zs; + if (zs_view) + samples = zs_view->nr_samples; + + const struct pan_image_view *s_view = fb->zs.view.s; + if (s_view) + samples = MAX2(samples, s_view->nr_samples); + + /* Depth is always stored in a 32-bit float. Stencil requires depth to + * be allocated, but doesn't have it's own budget; it's tied to the + * depth buffer. + */ + return sizeof(float) * samples; +} + /* * Select the largest tile size that fits within the tilebuffer budget. * Formally, maximize (pixels per tile) such that it is a power of two and @@ -397,6 +417,16 @@ GENX(pan_select_tile_size)(struct pan_fb_info *fb) bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb); fb->tile_size = fb->tile_buf_budget >> util_logbase2_ceil(bytes_per_pixel); + unsigned zs_bytes_per_pixel = pan_zsbuf_bytes_per_pixel(fb); + if (zs_bytes_per_pixel > 0) { + assert(util_is_power_of_two_nonzero(fb->z_tile_buf_budget)); + assert(fb->z_tile_buf_budget >= 1024); + + fb->tile_size = + MIN2(fb->tile_size, + fb->z_tile_buf_budget >> util_logbase2_ceil(zs_bytes_per_pixel)); + } + /* Clamp tile size to hardware limits */ fb->tile_size = MIN2(fb->tile_size, panfrost_max_effective_tile_size(PAN_ARCH)); diff --git a/src/panfrost/lib/pan_desc.h b/src/panfrost/lib/pan_desc.h index e2186703f4c..3b91e8ae6d5 100644 --- a/src/panfrost/lib/pan_desc.h +++ b/src/panfrost/lib/pan_desc.h @@ -140,6 +140,7 @@ struct pan_fb_info { /* Optimal tile buffer size. */ unsigned tile_buf_budget; + unsigned z_tile_buf_budget; unsigned tile_size; unsigned cbuf_allocation; diff --git a/src/panfrost/lib/pan_props.c b/src/panfrost/lib/pan_props.c index cbc1889a674..1c96825143c 100644 --- a/src/panfrost/lib/pan_props.c +++ b/src/panfrost/lib/pan_props.c @@ -37,43 +37,45 @@ #define HAS_ANISO (0) #define MODEL(gpu_id_, gpu_variant_, shortname, counters_, \ - min_rev_anisotropic_, tib_size_, quirks_) \ + min_rev_anisotropic_, tib_size_, tib_z_size_, quirks_) \ { \ .gpu_id = gpu_id_, .gpu_variant = gpu_variant_, \ .name = "Mali-" shortname, \ .performance_counters = counters_, \ .min_rev_anisotropic = min_rev_anisotropic_, \ - .tilebuffer_size = tib_size_, .quirks = quirks_, \ + .tilebuffer_size = tib_size_, \ + .tilebuffer_z_size = tib_z_size_, \ + .quirks = quirks_, \ } /* Table of supported Mali GPUs */ /* clang-format off */ const struct panfrost_model panfrost_model_list[] = { - MODEL(0x600, 0, "T600", "T60x", NO_ANISO, 8192, {}), - MODEL(0x620, 0, "T620", "T62x", NO_ANISO, 8192, {}), - MODEL(0x720, 0, "T720", "T72x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }), - MODEL(0x750, 0, "T760", "T76x", NO_ANISO, 8192, {}), - MODEL(0x820, 0, "T820", "T82x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }), - MODEL(0x830, 0, "T830", "T83x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }), - MODEL(0x860, 0, "T860", "T86x", NO_ANISO, 8192, {}), - MODEL(0x880, 0, "T880", "T88x", NO_ANISO, 8192, {}), + MODEL(0x600, 0, "T600", "T60x", NO_ANISO, 8192, 8192, {}), + MODEL(0x620, 0, "T620", "T62x", NO_ANISO, 8192, 8192, {}), + MODEL(0x720, 0, "T720", "T72x", NO_ANISO, 8192, 8192, { .no_hierarchical_tiling = true }), + MODEL(0x750, 0, "T760", "T76x", NO_ANISO, 8192, 8192, {}), + MODEL(0x820, 0, "T820", "T82x", NO_ANISO, 8192, 8192, { .no_hierarchical_tiling = true }), + MODEL(0x830, 0, "T830", "T83x", NO_ANISO, 8192, 8192, { .no_hierarchical_tiling = true }), + MODEL(0x860, 0, "T860", "T86x", NO_ANISO, 8192, 8192, {}), + MODEL(0x880, 0, "T880", "T88x", NO_ANISO, 8192, 8192, {}), - MODEL(0x6000, 0, "G71", "TMIx", NO_ANISO, 8192, {}), - MODEL(0x6221, 0, "G72", "THEx", 0x0030 /* r0p3 */, 16384, {}), - MODEL(0x7090, 0, "G51", "TSIx", 0x1010 /* r1p1 */, 8192, {}), - MODEL(0x7093, 0, "G31", "TDVx", HAS_ANISO, 8192, {}), - MODEL(0x7211, 0, "G76", "TNOx", HAS_ANISO, 16384, {}), - MODEL(0x7212, 0, "G52", "TGOx", HAS_ANISO, 16384, {}), - MODEL(0x7402, 0, "G52 r1", "TGOx", HAS_ANISO, 8192, {}), - MODEL(0x9091, 0, "G57", "TNAx", HAS_ANISO, 16384, {}), - MODEL(0x9093, 0, "G57", "TNAx", HAS_ANISO, 16384, {}), + MODEL(0x6000, 0, "G71", "TMIx", NO_ANISO, 8192, 8192, {}), + MODEL(0x6221, 0, "G72", "THEx", 0x0030 /* r0p3 */, 16384, 8192, {}), + MODEL(0x7090, 0, "G51", "TSIx", 0x1010 /* r1p1 */, 8192, 8192, {}), + MODEL(0x7093, 0, "G31", "TDVx", HAS_ANISO, 8192, 8192, {}), + MODEL(0x7211, 0, "G76", "TNOx", HAS_ANISO, 16384, 8192, {}), + MODEL(0x7212, 0, "G52", "TGOx", HAS_ANISO, 16384, 8192, {}), + MODEL(0x7402, 0, "G52 r1", "TGOx", HAS_ANISO, 8192, 8192, {}), + MODEL(0x9091, 0, "G57", "TNAx", HAS_ANISO, 16384, 8192, {}), + MODEL(0x9093, 0, "G57", "TNAx", HAS_ANISO, 16384, 8192, {}), - MODEL(0xa867, 0, "G610", "TVIx", HAS_ANISO, 32768, {}), - MODEL(0xac74, 0, "G310", "TVAx", HAS_ANISO, 16384, {}), - MODEL(0xac74, 1, "G310", "TVAx", HAS_ANISO, 16384, {}), - MODEL(0xac74, 2, "G310", "TVAx", HAS_ANISO, 16384, {}), - MODEL(0xac74, 3, "G310", "TVAx", HAS_ANISO, 32768, {}), - MODEL(0xac74, 4, "G310", "TVAx", HAS_ANISO, 32768, {}), + MODEL(0xa867, 0, "G610", "TVIx", HAS_ANISO, 32768, 16384, {}), + MODEL(0xac74, 0, "G310", "TVAx", HAS_ANISO, 16384, 8192, {}), + MODEL(0xac74, 1, "G310", "TVAx", HAS_ANISO, 16384, 8192, {}), + MODEL(0xac74, 2, "G310", "TVAx", HAS_ANISO, 16384, 8192, {}), + MODEL(0xac74, 3, "G310", "TVAx", HAS_ANISO, 32768, 16384, {}), + MODEL(0xac74, 4, "G310", "TVAx", HAS_ANISO, 32768, 16384, {}), }; /* clang-format on */ @@ -207,6 +209,18 @@ panfrost_query_optimal_tib_size(const struct panfrost_model *model) return model->tilebuffer_size / 2; } +unsigned +panfrost_query_optimal_z_tib_size(const struct panfrost_model *model) +{ + /* Preconditions ensure the returned value is a multiple of 1 KiB, the + * granularity of the colour buffer allocation field. + */ + assert(model->tilebuffer_z_size >= 1024); + assert(util_is_power_of_two_nonzero(model->tilebuffer_z_size)); + + return model->tilebuffer_z_size / 2; +} + uint64_t panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev, uint64_t va) { diff --git a/src/panfrost/lib/pan_props.h b/src/panfrost/lib/pan_props.h index 220872e2cc8..4f0a467cbf0 100644 --- a/src/panfrost/lib/pan_props.h +++ b/src/panfrost/lib/pan_props.h @@ -66,6 +66,9 @@ struct panfrost_model { /* Default tilebuffer size in bytes for the model. */ unsigned tilebuffer_size; + /* Default tilebuffer depth size in bytes for the model. */ + unsigned tilebuffer_z_size; + struct { /* The GPU lacks the capability for hierarchical tiling, without * an "Advanced Tiling Unit", instead requiring a single bin @@ -98,6 +101,8 @@ bool panfrost_query_afrc(const struct pan_kmod_dev_props *props); unsigned panfrost_query_optimal_tib_size(const struct panfrost_model *model); +unsigned panfrost_query_optimal_z_tib_size(const struct panfrost_model *model); + uint64_t panfrost_clamp_to_usable_va_range(const struct pan_kmod_dev *dev, uint64_t va); diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c index 5c2cc302b57..c342e0a1e36 100644 --- a/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_draw.c @@ -2145,6 +2145,7 @@ panvk_per_arch(cmd_inherit_render_state)( 0; *fbinfo = (struct pan_fb_info){ .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model), + .z_tile_buf_budget = panfrost_query_optimal_z_tib_size(phys_dev->model), .nr_samples = inheritance_info->rasterizationSamples, .rt_count = inheritance_info->colorAttachmentCount, }; diff --git a/src/panfrost/vulkan/panvk_vX_cmd_draw.c b/src/panfrost/vulkan/panvk_vX_cmd_draw.c index 5ba08b87eeb..307a19746af 100644 --- a/src/panfrost/vulkan/panvk_vX_cmd_draw.c +++ b/src/panfrost/vulkan/panvk_vX_cmd_draw.c @@ -233,6 +233,7 @@ panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf, cmdbuf->state.gfx.render.view_mask = pRenderingInfo->viewMask; *fbinfo = (struct pan_fb_info){ .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model), + .z_tile_buf_budget = panfrost_query_optimal_z_tib_size(phys_dev->model), .nr_samples = 1, .rt_count = pRenderingInfo->colorAttachmentCount, };