nvk/nvkmd: Add map sync to/from GPU helpers

If we have the ability to do cache ops from userspace (true on x86 and
aarch64), that's preferred.  Otherwise, we call into a back-end hook to
trap through to the kernel.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33959>
This commit is contained in:
Faith Ekstrand
2025-03-08 21:29:53 -06:00
parent fcb6c5c7a6
commit 986c2cfed9
4 changed files with 87 additions and 0 deletions

View File

@@ -39,6 +39,9 @@ struct nv_device_info {
uint8_t mp_per_tpc;
uint8_t max_warps_per_mp;
/** Non-coherent memory map atom size */
uint16_t nc_atom_size_B;
uint16_t cls_copy;
uint16_t cls_eng2d;
uint16_t cls_eng3d;

View File

@@ -6,6 +6,7 @@
#include "nvkmd_nouveau.h"
#include "nouveau_device.h"
#include "util/cache_ops.h"
#include "util/os_misc.h"
#include "util/drm_is_nouveau.h"
#include "vk_log.h"
@@ -100,6 +101,9 @@ nvkmd_nouveau_try_create_pdev(struct _drmDevice *drm_device,
.has_overmap = true,
};
/* We get this ourselves */
pdev->base.dev_info.nc_atom_size_B = util_cache_granularity();
/* Nouveau uses the OS page size for all pages, regardless of whether they
* come from VRAM or system RAM.
*/

View File

@@ -6,8 +6,11 @@
#include "nvkmd.h"
#include "nouveau/nvkmd_nouveau.h"
#include "nv_push.h"
#include "util/cache_ops.h"
#include "util/u_math.h"
#include <inttypes.h>
#include <unistd.h>
static void
nvkmd_dev_add_mem(struct nvkmd_dev *dev,
@@ -444,3 +447,41 @@ nvkmd_mem_unmap(struct nvkmd_mem *mem, enum nvkmd_mem_map_flags flags)
simple_mtx_unlock(&mem->map_mutex);
}
}
void
nvkmd_mem_sync_to_gpu(struct nvkmd_mem *mem, bool client_map,
uint64_t offset_B, uint64_t range_B)
{
if (mem->flags & NVKMD_MEM_COHERENT)
return;
const uint32_t atom_size_B = mem->dev->pdev->dev_info.nc_atom_size_B;
assert(util_is_aligned(offset_B, atom_size_B));
assert(util_is_aligned(range_B, atom_size_B));
if (util_has_cache_ops()) {
void *map = client_map ? mem->client_map : mem->map;
util_flush_range(map + offset_B, range_B);
} else {
mem->ops->sync_to_gpu(mem, offset_B, range_B);
}
}
void
nvkmd_mem_sync_from_gpu(struct nvkmd_mem *mem, bool client_map,
uint64_t offset_B, uint64_t range_B)
{
if (mem->flags & NVKMD_MEM_COHERENT)
return;
const uint32_t atom_size_B = mem->dev->pdev->dev_info.nc_atom_size_B;
assert(util_is_aligned(offset_B, atom_size_B));
assert(util_is_aligned(range_B, atom_size_B));
if (util_has_cache_ops()) {
void *map = client_map ? mem->client_map : mem->map;
util_flush_inval_range(map + offset_B, range_B);
} else {
mem->ops->sync_from_gpu(mem, offset_B, range_B);
}
}

View File

@@ -224,6 +224,12 @@ struct nvkmd_mem_ops {
enum nvkmd_mem_map_flags flags,
void *map);
void (*sync_to_gpu)(struct nvkmd_mem *mem,
uint64_t offset_B, uint64_t range_B);
void (*sync_from_gpu)(struct nvkmd_mem *mem,
uint64_t offset_B, uint64_t range_B);
VkResult (*export_dma_buf)(struct nvkmd_mem *mem,
struct vk_object_base *log_obj,
int *fd_out);
@@ -507,6 +513,39 @@ nvkmd_mem_overmap(struct nvkmd_mem *mem, struct vk_object_base *log_obj,
return result;
}
void nvkmd_mem_sync_to_gpu(struct nvkmd_mem *mem, bool client_map,
uint64_t offset_B, uint64_t range_B);
void nvkmd_mem_sync_from_gpu(struct nvkmd_mem *mem, bool client_map,
uint64_t offset_B, uint64_t range_B);
static inline void
nvkmd_mem_sync_map_to_gpu(struct nvkmd_mem *mem,
uint64_t offset_B, uint64_t range_B)
{
nvkmd_mem_sync_to_gpu(mem, false, offset_B, range_B);
}
static inline void
nvkmd_mem_sync_client_map_to_gpu(struct nvkmd_mem *mem,
uint64_t offset_B, uint64_t range_B)
{
nvkmd_mem_sync_to_gpu(mem, true, offset_B, range_B);
}
static inline void
nvkmd_mem_sync_map_from_gpu(struct nvkmd_mem *mem,
uint64_t offset_B, uint64_t range_B)
{
nvkmd_mem_sync_from_gpu(mem, false, offset_B, range_B);
}
static inline void
nvkmd_mem_sync_client_map_from_gpu(struct nvkmd_mem *mem,
uint64_t offset_B, uint64_t range_B)
{
nvkmd_mem_sync_from_gpu(mem, true, offset_B, range_B);
}
static inline VkResult MUST_CHECK
nvkmd_mem_export_dma_buf(struct nvkmd_mem *mem,
struct vk_object_base *log_obj,