radeonsi: implement SVM interfaces
Acked-by: Adam Jackson <ajax@redhat.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35175>
This commit is contained in:
@@ -760,7 +760,7 @@ Rusticl OpenCL 1.2 -- all DONE:
|
||||
|
||||
Rusticl OpenCL 2.0 -- all DONE:
|
||||
|
||||
Shared virtual memory DONE (iris, nvc0, llvmpipe)
|
||||
Shared virtual memory DONE (iris, nvc0, llvmpipe, radeonsi)
|
||||
Device queues not started
|
||||
- cl_khr_create_command_queue DONE
|
||||
- Additional queries for clGetDeviceInfo DONE
|
||||
|
||||
@@ -28,7 +28,7 @@ VK_EXT_texel_buffer_alignment on panvk
|
||||
cl_khr_kernel_clock on freedreno, iris, llvmpipe, nvc0, panfrost, radeonsi and zink with llvm-19 or newer
|
||||
GL_KHR_texture_compression_astc_hdr on panfrost and asahi
|
||||
cl_ext_buffer_device_address on iris, llvmpipe, radeonsi and zink
|
||||
Completed OpenCL 2.0 coarse grain buffer SVM support for iris
|
||||
Completed OpenCL 2.0 coarse grain buffer SVM support for iris and radeonsi
|
||||
VK_EXT_shader_subgroup_ballot on panvk
|
||||
VK_EXT_shader_subgroup_vote on panvk
|
||||
Vulkan video support on GFX12 (RDNA4) for RADV
|
||||
|
||||
@@ -994,6 +994,12 @@ int ac_drm_va_range_free(amdgpu_va_handle va_range_handle)
|
||||
return amdgpu_va_range_free(va_range_handle);
|
||||
}
|
||||
|
||||
int ac_drm_va_range_query(ac_drm_device *dev, enum amdgpu_gpu_va_range type, uint64_t *start,
|
||||
uint64_t *end)
|
||||
{
|
||||
return amdgpu_va_range_query(dev->adev, type, start, end);
|
||||
}
|
||||
|
||||
int ac_drm_create_userqueue(ac_drm_device *dev, uint32_t ip_type, uint32_t doorbell_handle,
|
||||
uint32_t doorbell_offset, uint64_t queue_va, uint64_t queue_size,
|
||||
uint64_t wptr_va, uint64_t rptr_va, void *mqd_in, uint32_t flags, uint32_t *queue_id)
|
||||
|
||||
@@ -150,6 +150,8 @@ PROC int ac_drm_va_range_alloc(ac_drm_device *dev, enum amdgpu_gpu_va_range va_r
|
||||
uint64_t *va_base_allocated, amdgpu_va_handle *va_range_handle,
|
||||
uint64_t flags) TAIL;
|
||||
PROC int ac_drm_va_range_free(amdgpu_va_handle va_range_handle) TAIL;
|
||||
PROC int ac_drm_va_range_query(ac_drm_device *dev, enum amdgpu_gpu_va_range type, uint64_t *start,
|
||||
uint64_t *end) TAIL;
|
||||
PROC int ac_drm_create_userqueue(ac_drm_device *dev, uint32_t ip_type, uint32_t doorbell_handle,
|
||||
uint32_t doorbell_offset, uint64_t queue_va, uint64_t queue_size,
|
||||
uint64_t wptr_va, uint64_t rptr_va, void *mqd_in, uint32_t flags,
|
||||
|
||||
@@ -91,6 +91,11 @@ void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res,
|
||||
if (res->b.b.bind & PIPE_BIND_CUSTOM)
|
||||
res->flags |= RADEON_FLAG_NO_SUBALLOC;
|
||||
|
||||
/* The frontend assigns addresses so we can't sub allocate at all.
|
||||
*/
|
||||
if (res->b.b.flags & PIPE_RESOURCE_FLAG_FRONTEND_VM)
|
||||
res->flags |= RADEON_FLAG_NO_SUBALLOC | RADEON_FLAG_NO_VMA;
|
||||
|
||||
if (res->b.b.bind & PIPE_BIND_PROTECTED ||
|
||||
/* Force scanout/depth/stencil buffer allocation to be encrypted */
|
||||
(sscreen->debug_flags & DBG(TMZ) &&
|
||||
@@ -258,7 +263,7 @@ static bool si_invalidate_buffer(struct si_context *sctx, struct si_resource *bu
|
||||
|
||||
/* Can't reallocate when this resource can't change its address.
|
||||
*/
|
||||
if (buf->b.b.flags & PIPE_RESOURCE_FLAG_FIXED_ADDRESS)
|
||||
if (buf->b.b.flags & PIPE_RESOURCE_FLAG_FIXED_ADDRESS || buf->flags & RADEON_FLAG_NO_VMA)
|
||||
return false;
|
||||
|
||||
/* Check if mapping this buffer would cause waiting for the GPU. */
|
||||
@@ -788,12 +793,45 @@ static uint64_t si_resource_get_address(struct pipe_screen *screen,
|
||||
return res->gpu_address;
|
||||
}
|
||||
|
||||
static struct pipe_vm_allocation *si_alloc_vm(struct pipe_screen *screen,
|
||||
uint64_t start, uint64_t size)
|
||||
{
|
||||
struct si_screen *sscreen = si_screen(screen);
|
||||
return sscreen->ws->alloc_vm(sscreen->ws, start, size);
|
||||
}
|
||||
|
||||
static void si_free_vm(struct pipe_screen *screen,
|
||||
struct pipe_vm_allocation *alloc)
|
||||
{
|
||||
struct si_screen *sscreen = si_screen(screen);
|
||||
sscreen->ws->free_vm(sscreen->ws, alloc);
|
||||
}
|
||||
|
||||
static bool si_resource_assign_vma(struct pipe_screen *screen,
|
||||
struct pipe_resource *resource,
|
||||
uint64_t address)
|
||||
{
|
||||
struct si_screen *sscreen = si_screen(screen);
|
||||
struct si_resource *res = si_resource(resource);
|
||||
|
||||
int ret = sscreen->ws->buffer_assign_vma(sscreen->ws, res->buf, address);
|
||||
if (ret)
|
||||
res->gpu_address = address;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void si_init_screen_buffer_functions(struct si_screen *sscreen)
|
||||
{
|
||||
sscreen->b.resource_create = si_resource_create;
|
||||
sscreen->b.resource_destroy = si_resource_destroy;
|
||||
sscreen->b.resource_from_user_memory = si_buffer_from_user_memory;
|
||||
sscreen->b.resource_get_address = si_resource_get_address;
|
||||
if (sscreen->ws->alloc_vm) {
|
||||
sscreen->b.alloc_vm = si_alloc_vm;
|
||||
sscreen->b.free_vm = si_free_vm;
|
||||
sscreen->b.resource_assign_vma = si_resource_assign_vma;
|
||||
}
|
||||
}
|
||||
|
||||
void si_init_buffer_functions(struct si_context *sctx)
|
||||
|
||||
@@ -1345,4 +1345,7 @@ void si_init_screen_caps(struct si_screen *sscreen)
|
||||
* KHR-GL46.texture_lod_bias.texture_lod_bias_all
|
||||
*/
|
||||
caps->max_texture_lod_bias = 16;
|
||||
|
||||
if (sscreen->ws->va_range)
|
||||
sscreen->ws->va_range(sscreen->ws, &caps->min_vma, &caps->max_vma);
|
||||
}
|
||||
|
||||
@@ -66,6 +66,7 @@ enum radeon_bo_flag
|
||||
RADEON_FLAG_WINSYS_SLAB_BACKING = (1 << 11), /* only used by the winsys */
|
||||
RADEON_FLAG_GFX12_ALLOW_DCC = (1 << 12), /* allow DCC, VRAM only */
|
||||
RADEON_FLAG_CLEAR_VRAM = (1 << 13),
|
||||
RADEON_FLAG_NO_VMA = (1 << 14), /* frontend assigns addresses */
|
||||
};
|
||||
|
||||
static inline void
|
||||
@@ -509,6 +510,33 @@ struct radeon_winsys {
|
||||
*/
|
||||
enum radeon_bo_flag (*buffer_get_flags)(struct pb_buffer_lean *buf);
|
||||
|
||||
/**
|
||||
* Query the valid virtual memory range of the device for use with alloc_vm.
|
||||
*/
|
||||
void (*va_range)(struct radeon_winsys *rws, uint64_t *start, uint64_t *end);
|
||||
|
||||
/**
|
||||
* Reserves a virtual memory range for use through buffer_assign_vma. Start and size must be
|
||||
* within the limits of va_range otherwise this function will return NULL.
|
||||
*/
|
||||
struct pipe_vm_allocation *(*alloc_vm)(struct radeon_winsys *rws, uint64_t start, uint64_t size);
|
||||
|
||||
/**
|
||||
* Frees a virtual memory range reservation.
|
||||
*/
|
||||
void (*free_vm)(struct radeon_winsys *rws, struct pipe_vm_allocation *alloc);
|
||||
|
||||
/**
|
||||
* Assigns the given address to buf.
|
||||
*
|
||||
* \param buf The buffer the address gets assigned to. This buffer must have been created
|
||||
* with the RADEON_FLAG_NO_VMA flag.
|
||||
* \param address Address to be assigned. Needs to be within a range previously reserved
|
||||
* through alloc_vm or 0.
|
||||
*/
|
||||
bool (*buffer_assign_vma)(struct radeon_winsys *rws, struct pb_buffer_lean *buf,
|
||||
uint64_t address);
|
||||
|
||||
/**************************************************************************
|
||||
* Command submission.
|
||||
*
|
||||
|
||||
@@ -253,11 +253,16 @@ void amdgpu_bo_destroy(struct amdgpu_winsys *aws, struct pb_buffer_lean *_buf)
|
||||
_mesa_hash_table_remove_key(aws->bo_export_table, bo->bo.abo);
|
||||
|
||||
if (bo->b.base.placement & RADEON_DOMAIN_VRAM_GTT) {
|
||||
uint64_t vma = amdgpu_bo_real_vm_address(bo);
|
||||
if (vma) {
|
||||
amdgpu_bo_va_op_common(aws, amdgpu_winsys_bo(_buf), bo->kms_handle, true, NULL, 0,
|
||||
bo->b.base.size, amdgpu_va_get_start_addr(bo->va_handle),
|
||||
bo->b.base.size, vma,
|
||||
AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
|
||||
AMDGPU_VM_PAGE_EXECUTABLE, AMDGPU_VA_OP_UNMAP);
|
||||
ac_drm_va_range_free(bo->va_handle);
|
||||
}
|
||||
|
||||
if (!(bo->b.base.usage & RADEON_FLAG_NO_VMA))
|
||||
ac_drm_va_range_free(bo->va.handle);
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&aws->bo_export_table_lock);
|
||||
@@ -666,7 +671,7 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *aws,
|
||||
uint32_t kms_handle = 0;
|
||||
ac_drm_bo_export(aws->dev, buf_handle, amdgpu_bo_handle_type_kms, &kms_handle);
|
||||
|
||||
if (initial_domain & RADEON_DOMAIN_VRAM_GTT) {
|
||||
if (initial_domain & RADEON_DOMAIN_VRAM_GTT && !(flags & RADEON_FLAG_NO_VMA)) {
|
||||
unsigned va_gap_size = aws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0;
|
||||
|
||||
r = ac_drm_va_range_alloc(aws->dev, amdgpu_gpu_va_range_general,
|
||||
@@ -691,6 +696,8 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *aws,
|
||||
size, va, vm_flags, AMDGPU_VA_OP_MAP);
|
||||
if (r)
|
||||
goto error_va_map;
|
||||
|
||||
bo->va.handle = va_handle;
|
||||
}
|
||||
|
||||
simple_mtx_init(&bo->map_lock, mtx_plain);
|
||||
@@ -701,7 +708,6 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *aws,
|
||||
bo->b.base.size = size;
|
||||
bo->b.unique_id = __sync_fetch_and_add(&aws->next_bo_unique_id, 1);
|
||||
bo->bo = buf_handle;
|
||||
bo->va_handle = va_handle;
|
||||
bo->kms_handle = kms_handle;
|
||||
bo->vm_always_valid = request.flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
|
||||
|
||||
@@ -1472,6 +1478,9 @@ amdgpu_bo_create(struct amdgpu_winsys *aws,
|
||||
|
||||
/* Sub-allocate small buffers from slabs. */
|
||||
if (heap >= 0 && size <= max_slab_entry_size) {
|
||||
/* radeon_get_heap_index returns -1 with RADEON_FLAG_NO_SUBALLOC */
|
||||
assert(!(flags & RADEON_FLAG_NO_SUBALLOC));
|
||||
|
||||
struct pb_slab_entry *entry;
|
||||
unsigned alloc_size = size;
|
||||
|
||||
@@ -1698,7 +1707,7 @@ static struct pb_buffer_lean *amdgpu_bo_from_handle(struct radeon_winsys *rws,
|
||||
bo->b.unique_id = __sync_fetch_and_add(&aws->next_bo_unique_id, 1);
|
||||
simple_mtx_init(&bo->map_lock, mtx_plain);
|
||||
bo->bo = result.bo;
|
||||
bo->va_handle = va_handle;
|
||||
bo->va.handle = va_handle;
|
||||
bo->kms_handle = kms_handle;
|
||||
bo->is_shared = true;
|
||||
|
||||
@@ -1868,7 +1877,7 @@ static struct pb_buffer_lean *amdgpu_bo_from_ptr(struct radeon_winsys *rws,
|
||||
simple_mtx_init(&bo->map_lock, mtx_plain);
|
||||
bo->bo = buf_handle;
|
||||
bo->cpu_ptr = pointer;
|
||||
bo->va_handle = va_handle;
|
||||
bo->va.handle = va_handle;
|
||||
bo->kms_handle = kms_handle;
|
||||
|
||||
aws->allocated_gtt += aligned_size;
|
||||
@@ -1917,11 +1926,11 @@ uint64_t amdgpu_bo_get_va(struct pb_buffer_lean *buf)
|
||||
struct amdgpu_bo_real_reusable_slab *slab_bo =
|
||||
(struct amdgpu_bo_real_reusable_slab *)get_slab_entry_real_bo(bo);
|
||||
|
||||
return amdgpu_va_get_start_addr(slab_bo->b.b.va_handle) + get_slab_entry_offset(bo);
|
||||
return amdgpu_bo_real_vm_address(&slab_bo->b.b) + get_slab_entry_offset(bo);
|
||||
} else if (bo->type == AMDGPU_BO_SPARSE) {
|
||||
return amdgpu_va_get_start_addr(get_sparse_bo(bo)->va_handle);
|
||||
} else {
|
||||
return amdgpu_va_get_start_addr(get_real_bo(bo)->va_handle);
|
||||
return amdgpu_bo_real_vm_address(get_real_bo(bo));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1937,6 +1946,76 @@ static void amdgpu_buffer_destroy(struct radeon_winsys *rws, struct pb_buffer_le
|
||||
amdgpu_bo_destroy_or_cache(rws, buf);
|
||||
}
|
||||
|
||||
static void amdgpu_va_range(struct radeon_winsys *rws, uint64_t *start, uint64_t *end)
|
||||
{
|
||||
struct amdgpu_winsys *aws = amdgpu_winsys(rws);
|
||||
ac_drm_va_range_query(aws->dev, amdgpu_gpu_va_range_general, start, end);
|
||||
}
|
||||
|
||||
struct amdgpu_vm_allocation {
|
||||
struct pipe_vm_allocation base;
|
||||
amdgpu_va_handle handle;
|
||||
};
|
||||
|
||||
static struct pipe_vm_allocation *amdgpu_alloc_vm(struct radeon_winsys *rws,
|
||||
uint64_t start,
|
||||
uint64_t size)
|
||||
{
|
||||
struct amdgpu_winsys *aws = amdgpu_winsys(rws);
|
||||
struct amdgpu_vm_allocation *alloc = CALLOC_STRUCT(amdgpu_vm_allocation);
|
||||
if (!alloc)
|
||||
return NULL;
|
||||
|
||||
uint64_t allocated;
|
||||
if (ac_drm_va_range_alloc(aws->dev, amdgpu_gpu_va_range_general, size, 0, start, &allocated,
|
||||
&alloc->handle, 0)) {
|
||||
FREE(alloc);
|
||||
return NULL;
|
||||
} else {
|
||||
assert(allocated == start);
|
||||
alloc->base.start = start;
|
||||
alloc->base.size = size;
|
||||
return &alloc->base;
|
||||
}
|
||||
}
|
||||
|
||||
static void amdgpu_free_vm(struct radeon_winsys *rws,
|
||||
struct pipe_vm_allocation *palloc)
|
||||
{
|
||||
struct amdgpu_vm_allocation *alloc = (void *)palloc;
|
||||
|
||||
if (alloc) {
|
||||
amdgpu_va_range_free(alloc->handle);
|
||||
FREE(alloc);
|
||||
}
|
||||
}
|
||||
|
||||
static bool amdgpu_buffer_assign_vma(struct radeon_winsys *rws, struct pb_buffer_lean *buf,
|
||||
uint64_t va)
|
||||
{
|
||||
struct amdgpu_winsys *aws = amdgpu_winsys(rws);
|
||||
struct amdgpu_winsys_bo *wbo = amdgpu_winsys_bo(buf);
|
||||
struct amdgpu_bo_real *bo = get_real_bo(wbo);
|
||||
unsigned vm_flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE;
|
||||
|
||||
assert(buf->usage & RADEON_FLAG_NO_VMA);
|
||||
if (buf->usage & RADEON_FLAG_GL2_BYPASS)
|
||||
vm_flags |= AMDGPU_VM_MTYPE_UC;
|
||||
|
||||
int r;
|
||||
if (va)
|
||||
r = amdgpu_bo_va_op_common(aws, NULL, bo->kms_handle, false, &bo->vm_timeline_point,
|
||||
0, bo->b.base.size, va, vm_flags, AMDGPU_VA_OP_MAP);
|
||||
else
|
||||
r = amdgpu_bo_va_op_common(aws, wbo, bo->kms_handle, true, &bo->vm_timeline_point,
|
||||
0, bo->b.base.size, bo->va.svm, vm_flags, AMDGPU_VA_OP_UNMAP);
|
||||
|
||||
if (!r)
|
||||
bo->va.svm = va;
|
||||
|
||||
return r == 0;
|
||||
}
|
||||
|
||||
void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *sws)
|
||||
{
|
||||
sws->base.buffer_set_metadata = amdgpu_buffer_set_metadata;
|
||||
@@ -1957,4 +2036,8 @@ void amdgpu_bo_init_functions(struct amdgpu_screen_winsys *sws)
|
||||
sws->base.buffer_get_virtual_address = amdgpu_bo_get_va;
|
||||
sws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain;
|
||||
sws->base.buffer_get_flags = amdgpu_bo_get_flags;
|
||||
sws->base.va_range = amdgpu_va_range;
|
||||
sws->base.alloc_vm = amdgpu_alloc_vm;
|
||||
sws->base.free_vm = amdgpu_free_vm;
|
||||
sws->base.buffer_assign_vma = amdgpu_buffer_assign_vma;
|
||||
}
|
||||
|
||||
@@ -88,7 +88,11 @@ struct amdgpu_bo_real {
|
||||
struct amdgpu_winsys_bo b;
|
||||
|
||||
ac_drm_bo bo;
|
||||
amdgpu_va_handle va_handle;
|
||||
union {
|
||||
uint64_t svm; /* used when RADEON_FLAG_NO_VMA is set */
|
||||
amdgpu_va_handle handle;
|
||||
} va;
|
||||
|
||||
/* Timeline point of latest VM ioctl completion. Only used in userqueue. */
|
||||
uint64_t vm_timeline_point;
|
||||
|
||||
@@ -202,6 +206,14 @@ static inline struct amdgpu_bo_real_reusable_slab *get_real_bo_reusable_slab(str
|
||||
return (struct amdgpu_bo_real_reusable_slab*)bo;
|
||||
}
|
||||
|
||||
static inline uint64_t amdgpu_bo_real_vm_address(struct amdgpu_bo_real *bo)
|
||||
{
|
||||
if (bo->b.base.usage & RADEON_FLAG_NO_VMA)
|
||||
return bo->va.svm;
|
||||
else
|
||||
return amdgpu_va_get_start_addr(bo->va.handle);
|
||||
}
|
||||
|
||||
/* Given a sequence number "fences->seq_no[queue_index]", return a pointer to a non-NULL fence
|
||||
* pointer in the queue ring corresponding to that sequence number if the fence is non-NULL.
|
||||
* If the fence is not present in the ring (= is idle), return NULL. If it returns a non-NULL
|
||||
|
||||
@@ -1162,7 +1162,7 @@ static unsigned amdgpu_cs_get_buffer_list(struct radeon_cmdbuf *rcs,
|
||||
for (unsigned i = 0; i < num_real_buffers; i++) {
|
||||
list[i].bo_size = real_buffers->buffers[i].bo->base.size;
|
||||
list[i].vm_address =
|
||||
amdgpu_va_get_start_addr(get_real_bo(real_buffers->buffers[i].bo)->va_handle);
|
||||
amdgpu_bo_real_vm_address(get_real_bo(real_buffers->buffers[i].bo));
|
||||
list[i].priority_usage = real_buffers->buffers[i].usage;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user