From 6c1c116a0fb66f2d74976260775ec73025c4a697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Corentin=20No=C3=ABl?= Date: Mon, 5 May 2025 12:05:40 +0200 Subject: [PATCH] virgl: Avoid possible double free when destroying the hw resource MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a resource is un-referenced, the reference count is decremented, and intentionally no lock is acquired. This can result in the following race condition when a resource is created from a handle: ``` [Thread] Operation [0] Create resource from handle for the first time, refcount set to 1 [0] resource is unreferenced, refcount is decremented to 0 (intentionally no mutex is locked) [0] before entering virgl_hw_res_destroy to lock virgl_drm_winsys::bo_handles_mutex the thread yields [1] Create resource from handle pulls the resource from virgl_drm_winsys::bo_handles, refcount is incremented to 1 [1] resource is unreferenced, refcount is decremented to 0 [1] Enter virgl_hw_res_destroy, [1] acquire the lock on virgl_drm_winsys::bo_handles_mutex [1] check reference count to be 0, yes -> the resource is destroyed [1] release the lock on virgl_drm_winsys::bo_handles_mutex [0] Enter virgl_hw_res_destroy, [0] acquire the lock on virgl_drm_winsys::bo_handles_mutex [0] Here the res pointer already points to freed memory [0] check reference count to be 0, yes -> the resource is destroyed (again!) double free or corruption (!prev) ``` To work around this race condition, keep track of the number of times the resource was pulled from virgl_drm_winsys::bo_handles to see whether it has to be kept alive despite the reference count being zero. This can be reproduced with the `spec@ext_image_dma_buf_import@ext_image_dma_buf_import-refcount-multithread` piglit test. Signed-off-by: Corentin Noël Part-of: --- src/gallium/winsys/virgl/drm/virgl_drm_winsys.c | 17 ++++++++++++++++- src/gallium/winsys/virgl/drm/virgl_drm_winsys.h | 7 +++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c index 2b7cd2d4d09..33f5cf7a1a1 100644 --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c @@ -82,6 +82,11 @@ static void virgl_hw_res_destroy(struct virgl_drm_winsys *qdws, return; } + if (!--res->needed_references) { + mtx_unlock(&qdws->bo_handles_mutex); + return; + } + _mesa_hash_table_remove_key(qdws->bo_handles, (void *)(uintptr_t)res->bo_handle); if (res->flink_name) @@ -238,6 +243,7 @@ virgl_drm_winsys_resource_create_blob(struct virgl_winsys *qws, pipe_reference_init(&res->reference, 1); p_atomic_set(&res->external, false); p_atomic_set(&res->num_cs_references, 0); + res->needed_references = 1; virgl_resource_cache_entry_init(&res->cache_entry, params); return res; } @@ -306,6 +312,7 @@ virgl_drm_winsys_resource_create(struct virgl_winsys *qws, pipe_reference_init(&res->reference, 1); p_atomic_set(&res->external, false); p_atomic_set(&res->num_cs_references, 0); + res->needed_references = 1; /* A newly created resource is considered busy by the kernel until the * command is retired. But for our purposes, we can consider it idle @@ -530,8 +537,15 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws, * until it enters virgl_hw_res_destroy, there is a small window that * the refcount can drop to zero. Call p_atomic_inc directly instead of * virgl_drm_resource_reference to avoid hitting assert failures. + * + * If the refcount was 0, that means that the resource is currently + * waiting to be freed in another thread, increase the needed_references + * as a workaround to make sure that it won't be double freed for now. */ - p_atomic_inc(&res->reference.count); + int32_t ref = p_atomic_inc_return(&res->reference.count); + if (ref == 1) + res->needed_references++; + goto done; } @@ -574,6 +588,7 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws, pipe_reference_init(&res->reference, 1); p_atomic_set(&res->external, true); res->num_cs_references = 0; + res->needed_references = 1; if (res->flink_name) _mesa_hash_table_insert(qdws->bo_names, (void *)(uintptr_t)res->flink_name, res); diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h index b2bab8f25d5..6dab2b0d33c 100644 --- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h +++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.h @@ -48,6 +48,13 @@ struct virgl_hw_res { uint32_t flags; uint32_t flink_name; + /* We are not holding a lock when releasing references of the + * resource (intentionally) so we might start destroying it in one thread + * while briefly increasing the reference in another one leading to the + * free function being called twice, this ensures that it will be only + * called once. */ + int needed_references; + /* false when the resource is known to be typed */ bool maybe_untyped;