nvk/nvkmd: Add map sync to/from GPU helpers
If we have the ability to do cache ops from userspace (true on x86 and aarch64), that's preferred. Otherwise, we call into a back-end hook to trap through to the kernel. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33959>
This commit is contained in:
@@ -39,6 +39,9 @@ struct nv_device_info {
|
||||
uint8_t mp_per_tpc;
|
||||
uint8_t max_warps_per_mp;
|
||||
|
||||
/** Non-coherent memory map atom size */
|
||||
uint16_t nc_atom_size_B;
|
||||
|
||||
uint16_t cls_copy;
|
||||
uint16_t cls_eng2d;
|
||||
uint16_t cls_eng3d;
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "nvkmd_nouveau.h"
|
||||
|
||||
#include "nouveau_device.h"
|
||||
#include "util/cache_ops.h"
|
||||
#include "util/os_misc.h"
|
||||
#include "util/drm_is_nouveau.h"
|
||||
#include "vk_log.h"
|
||||
@@ -100,6 +101,9 @@ nvkmd_nouveau_try_create_pdev(struct _drmDevice *drm_device,
|
||||
.has_overmap = true,
|
||||
};
|
||||
|
||||
/* We get this ourselves */
|
||||
pdev->base.dev_info.nc_atom_size_B = util_cache_granularity();
|
||||
|
||||
/* Nouveau uses the OS page size for all pages, regardless of whether they
|
||||
* come from VRAM or system RAM.
|
||||
*/
|
||||
|
||||
@@ -6,8 +6,11 @@
|
||||
#include "nvkmd.h"
|
||||
#include "nouveau/nvkmd_nouveau.h"
|
||||
#include "nv_push.h"
|
||||
#include "util/cache_ops.h"
|
||||
#include "util/u_math.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static void
|
||||
nvkmd_dev_add_mem(struct nvkmd_dev *dev,
|
||||
@@ -444,3 +447,41 @@ nvkmd_mem_unmap(struct nvkmd_mem *mem, enum nvkmd_mem_map_flags flags)
|
||||
simple_mtx_unlock(&mem->map_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nvkmd_mem_sync_to_gpu(struct nvkmd_mem *mem, bool client_map,
|
||||
uint64_t offset_B, uint64_t range_B)
|
||||
{
|
||||
if (mem->flags & NVKMD_MEM_COHERENT)
|
||||
return;
|
||||
|
||||
const uint32_t atom_size_B = mem->dev->pdev->dev_info.nc_atom_size_B;
|
||||
assert(util_is_aligned(offset_B, atom_size_B));
|
||||
assert(util_is_aligned(range_B, atom_size_B));
|
||||
|
||||
if (util_has_cache_ops()) {
|
||||
void *map = client_map ? mem->client_map : mem->map;
|
||||
util_flush_range(map + offset_B, range_B);
|
||||
} else {
|
||||
mem->ops->sync_to_gpu(mem, offset_B, range_B);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nvkmd_mem_sync_from_gpu(struct nvkmd_mem *mem, bool client_map,
|
||||
uint64_t offset_B, uint64_t range_B)
|
||||
{
|
||||
if (mem->flags & NVKMD_MEM_COHERENT)
|
||||
return;
|
||||
|
||||
const uint32_t atom_size_B = mem->dev->pdev->dev_info.nc_atom_size_B;
|
||||
assert(util_is_aligned(offset_B, atom_size_B));
|
||||
assert(util_is_aligned(range_B, atom_size_B));
|
||||
|
||||
if (util_has_cache_ops()) {
|
||||
void *map = client_map ? mem->client_map : mem->map;
|
||||
util_flush_inval_range(map + offset_B, range_B);
|
||||
} else {
|
||||
mem->ops->sync_from_gpu(mem, offset_B, range_B);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -224,6 +224,12 @@ struct nvkmd_mem_ops {
|
||||
enum nvkmd_mem_map_flags flags,
|
||||
void *map);
|
||||
|
||||
void (*sync_to_gpu)(struct nvkmd_mem *mem,
|
||||
uint64_t offset_B, uint64_t range_B);
|
||||
|
||||
void (*sync_from_gpu)(struct nvkmd_mem *mem,
|
||||
uint64_t offset_B, uint64_t range_B);
|
||||
|
||||
VkResult (*export_dma_buf)(struct nvkmd_mem *mem,
|
||||
struct vk_object_base *log_obj,
|
||||
int *fd_out);
|
||||
@@ -507,6 +513,39 @@ nvkmd_mem_overmap(struct nvkmd_mem *mem, struct vk_object_base *log_obj,
|
||||
return result;
|
||||
}
|
||||
|
||||
void nvkmd_mem_sync_to_gpu(struct nvkmd_mem *mem, bool client_map,
|
||||
uint64_t offset_B, uint64_t range_B);
|
||||
void nvkmd_mem_sync_from_gpu(struct nvkmd_mem *mem, bool client_map,
|
||||
uint64_t offset_B, uint64_t range_B);
|
||||
|
||||
static inline void
|
||||
nvkmd_mem_sync_map_to_gpu(struct nvkmd_mem *mem,
|
||||
uint64_t offset_B, uint64_t range_B)
|
||||
{
|
||||
nvkmd_mem_sync_to_gpu(mem, false, offset_B, range_B);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nvkmd_mem_sync_client_map_to_gpu(struct nvkmd_mem *mem,
|
||||
uint64_t offset_B, uint64_t range_B)
|
||||
{
|
||||
nvkmd_mem_sync_to_gpu(mem, true, offset_B, range_B);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nvkmd_mem_sync_map_from_gpu(struct nvkmd_mem *mem,
|
||||
uint64_t offset_B, uint64_t range_B)
|
||||
{
|
||||
nvkmd_mem_sync_from_gpu(mem, false, offset_B, range_B);
|
||||
}
|
||||
|
||||
static inline void
|
||||
nvkmd_mem_sync_client_map_from_gpu(struct nvkmd_mem *mem,
|
||||
uint64_t offset_B, uint64_t range_B)
|
||||
{
|
||||
nvkmd_mem_sync_from_gpu(mem, true, offset_B, range_B);
|
||||
}
|
||||
|
||||
static inline VkResult MUST_CHECK
|
||||
nvkmd_mem_export_dma_buf(struct nvkmd_mem *mem,
|
||||
struct vk_object_base *log_obj,
|
||||
|
||||
Reference in New Issue
Block a user