panvk: pool large TLS allocations

Cache TLS in the case of large spilling. For content that is spilling
large amounts of TLS this can bring substantial uplifts in
performance.

Reviewed-by: Christoph Pillmayer <christoph.pillmayer@arm.com>
Reviewed-by: Eric R. Smith <eric.smith@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36713>
This commit is contained in:
Aksel Hjerpbakk
2025-08-08 11:50:06 +00:00
committed by Marge Bot
parent de3d04dd72
commit 0e88dd575f
8 changed files with 62 additions and 31 deletions
@@ -975,7 +975,7 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
.owns_bos = true,
.needs_locking = false,
};
panvk_pool_init(&cmdbuf->cs_pool, device, &pool->cs_bo_pool, &cs_pool_props);
panvk_pool_init(&cmdbuf->cs_pool, device, &pool->cs_bo_pool, NULL, &cs_pool_props);
struct panvk_pool_properties desc_pool_props = {
.create_flags = 0,
@@ -985,7 +985,7 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
.owns_bos = true,
.needs_locking = false,
};
panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool,
panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool, NULL,
&desc_pool_props);
struct panvk_pool_properties tls_pool_props = {
@@ -997,7 +997,7 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
.owns_bos = true,
.needs_locking = false,
};
panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool,
panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool, &pool->tls_big_bo_pool,
&tls_pool_props);
for (uint32_t i = 0; i < ARRAY_SIZE(cmdbuf->utrace.uts); i++)
+1 -1
View File
@@ -206,7 +206,7 @@ panvk_per_arch(utrace_clone_init_pool)(struct panvk_pool *pool,
.label = "utrace clone pool",
.owns_bos = true,
};
panvk_pool_init(pool, dev, NULL, &pool_props);
panvk_pool_init(pool, dev, NULL, NULL, &pool_props);
}
static struct cs_buffer
+3 -3
View File
@@ -442,7 +442,7 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
.owns_bos = true,
.needs_locking = false,
};
panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool,
panvk_pool_init(&cmdbuf->desc_pool, device, &pool->desc_bo_pool, NULL,
&desc_pool_props);
struct panvk_pool_properties tls_pool_props = {
@@ -454,7 +454,7 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
.owns_bos = true,
.needs_locking = false,
};
panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool,
panvk_pool_init(&cmdbuf->tls_pool, device, &pool->tls_bo_pool, &pool->tls_big_bo_pool,
&tls_pool_props);
struct panvk_pool_properties var_pool_props = {
@@ -466,7 +466,7 @@ panvk_create_cmdbuf(struct vk_command_pool *vk_pool, VkCommandBufferLevel level,
.owns_bos = true,
.needs_locking = false,
};
panvk_pool_init(&cmdbuf->varying_pool, device, &pool->varying_bo_pool,
panvk_pool_init(&cmdbuf->varying_pool, device, &pool->varying_bo_pool, NULL,
&var_pool_props);
list_inithead(&cmdbuf->batches);
+2
View File
@@ -41,6 +41,7 @@ panvk_CreateCommandPool(VkDevice _device,
panvk_bo_pool_init(&pool->desc_bo_pool);
panvk_bo_pool_init(&pool->varying_bo_pool);
panvk_bo_pool_init(&pool->tls_bo_pool);
panvk_bo_pool_init(&pool->tls_big_bo_pool);
list_inithead(&pool->push_sets);
*pCmdPool = panvk_cmd_pool_to_handle(pool);
return VK_SUCCESS;
@@ -62,6 +63,7 @@ panvk_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool,
panvk_bo_pool_cleanup(&pool->desc_bo_pool);
panvk_bo_pool_cleanup(&pool->varying_bo_pool);
panvk_bo_pool_cleanup(&pool->tls_bo_pool);
panvk_bo_pool_cleanup(&pool->tls_big_bo_pool);
list_for_each_entry_safe(struct panvk_cmd_pool_obj, obj, &pool->push_sets,
node) {
+1
View File
@@ -16,6 +16,7 @@ struct panvk_cmd_pool {
struct panvk_bo_pool desc_bo_pool;
struct panvk_bo_pool varying_bo_pool;
struct panvk_bo_pool tls_bo_pool;
struct panvk_bo_pool tls_big_bo_pool;
struct list_head push_sets;
};
+43 -21
View File
@@ -65,25 +65,40 @@ panvk_pool_alloc_backing(struct panvk_pool *pool, size_t sz)
list_first_entry(&pool->bo_pool->free_bos, struct panvk_priv_bo, node);
list_del(&bo->node);
} else {
/* We don't know what the BO will be used for, so let's flag it
* RW and attach it to both the fragment and vertex/tiler jobs.
* TODO: if we want fine grained BO assignment we should pass
* flags to this function and keep the read/write,
* fragment/vertex+tiler pools separate.
*/
VkResult result =
panvk_priv_bo_create(pool->dev, bo_sz, pool->props.create_flags,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, &bo);
if (pool->big_bo_pool) {
list_for_each_entry_safe(struct panvk_priv_bo, pooled_bo,
&pool->big_bo_pool->free_bos, node) {
const size_t picked_bo_sz = pan_kmod_bo_size(pooled_bo->bo);
/* check if we have any sufficient match */
if (picked_bo_sz >= bo_sz) {
bo = pooled_bo;
list_del(&bo->node);
break;
}
}
}
/* Pool allocations are indirect, meaning there's no VkResult returned
* and no way for the caller to know why the device memory allocation
* failed. We want to propagate host allocation failures, so set
* errno to -ENOMEM if panvk_priv_bo_create() returns
* VK_ERROR_OUT_OF_HOST_MEMORY.
* We expect the caller to check the returned pointer and catch the
* host allocation failure with a call to panvk_error(). */
if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
errno = -ENOMEM;
if (bo == NULL) {
/* We don't know what the BO will be used for, so let's flag it
* RW and attach it to both the fragment and vertex/tiler jobs.
* TODO: if we want fine grained BO assignment we should pass
* flags to this function and keep the read/write,
* fragment/vertex+tiler pools separate.
*/
VkResult result =
panvk_priv_bo_create(pool->dev, bo_sz, pool->props.create_flags,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE, &bo);
/* Pool allocations are indirect, meaning there's no VkResult returned
* and no way for the caller to know why the device memory allocation
* failed. We want to propagate host allocation failures, so set
* errno to -ENOMEM if panvk_priv_bo_create() returns
* VK_ERROR_OUT_OF_HOST_MEMORY.
* We expect the caller to check the returned pointer and catch the
* host allocation failure with a call to panvk_error(). */
if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
errno = -ENOMEM;
}
}
if (bo == NULL)
@@ -185,6 +200,7 @@ PAN_POOL_ALLOCATOR(struct panvk_pool, panvk_pool_alloc_aligned)
void
panvk_pool_init(struct panvk_pool *pool, struct panvk_device *dev,
struct panvk_bo_pool *bo_pool,
struct panvk_bo_pool *big_bo_pool,
const struct panvk_pool_properties *props)
{
memset(pool, 0, sizeof(*pool));
@@ -193,6 +209,7 @@ panvk_pool_init(struct panvk_pool *pool, struct panvk_device *dev,
pan_pool_init(&pool->base, pool->props.slab_size);
pool->dev = dev;
pool->bo_pool = bo_pool;
pool->big_bo_pool = big_bo_pool;
list_inithead(&pool->bos);
list_inithead(&pool->big_bos);
@@ -214,9 +231,14 @@ panvk_pool_reset(struct panvk_pool *pool)
}
}
list_for_each_entry_safe(struct panvk_priv_bo, bo, &pool->big_bos, node) {
list_del(&bo->node);
panvk_priv_bo_unref(bo);
if (pool->big_bo_pool) {
list_splicetail(&pool->big_bos, &pool->big_bo_pool->free_bos);
list_inithead(&pool->big_bos);
} else {
list_for_each_entry_safe(struct panvk_priv_bo, bo, &pool->big_bos, node) {
list_del(&bo->node);
panvk_priv_bo_unref(bo);
}
}
if (!pool->props.owns_bos)
+6
View File
@@ -82,6 +82,11 @@ struct panvk_pool {
*/
struct panvk_bo_pool *bo_pool;
/* Before allocating a new big BO, check if the BO pool has a sufficiently sized BO.
* When returning big BOs, if big_bo_pool != NULL, return them to this big_bo_pool.
*/
struct panvk_bo_pool *big_bo_pool;
/* BOs allocated by this pool */
struct list_head bos;
struct list_head big_bos;
@@ -105,6 +110,7 @@ to_panvk_pool(struct pan_pool *pool)
void panvk_pool_init(struct panvk_pool *pool, struct panvk_device *dev,
struct panvk_bo_pool *bo_pool,
struct panvk_bo_pool *big_bo_pool,
const struct panvk_pool_properties *props);
void panvk_pool_reset(struct panvk_pool *pool);
+3 -3
View File
@@ -78,7 +78,7 @@ panvk_device_init_mempools(struct panvk_device *dev)
.prealloc = false,
};
panvk_pool_init(&dev->mempools.rw, dev, NULL, &rw_pool_props);
panvk_pool_init(&dev->mempools.rw, dev, NULL, NULL, &rw_pool_props);
struct panvk_pool_properties rw_nc_pool_props = {
.create_flags = PAN_ARCH <= 9 ? 0 : PAN_KMOD_BO_FLAG_GPU_UNCACHED,
@@ -89,7 +89,7 @@ panvk_device_init_mempools(struct panvk_device *dev)
.prealloc = false,
};
panvk_pool_init(&dev->mempools.rw_nc, dev, NULL, &rw_nc_pool_props);
panvk_pool_init(&dev->mempools.rw_nc, dev, NULL, NULL, &rw_nc_pool_props);
struct panvk_pool_properties exec_pool_props = {
.create_flags = PAN_KMOD_BO_FLAG_EXECUTABLE,
@@ -100,7 +100,7 @@ panvk_device_init_mempools(struct panvk_device *dev)
.prealloc = false,
};
panvk_pool_init(&dev->mempools.exec, dev, NULL, &exec_pool_props);
panvk_pool_init(&dev->mempools.exec, dev, NULL, NULL, &exec_pool_props);
}
static void