diff --git a/src/gallium/frontends/rusticl/api/device.rs b/src/gallium/frontends/rusticl/api/device.rs index 4e080d7d49f..6b36054846f 100644 --- a/src/gallium/frontends/rusticl/api/device.rs +++ b/src/gallium/frontends/rusticl/api/device.rs @@ -181,7 +181,18 @@ impl CLInfo for cl_device_id { (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN) as cl_device_fp_config, ), CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS => cl_prop::(false), - CL_DEVICE_SVM_CAPABILITIES => cl_prop::(0), + CL_DEVICE_SVM_CAPABILITIES | CL_DEVICE_SVM_CAPABILITIES_ARM => { + cl_prop::( + if dev.svm_supported() { + CL_DEVICE_SVM_COARSE_GRAIN_BUFFER + | CL_DEVICE_SVM_FINE_GRAIN_BUFFER + | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM + } else { + 0 + } + .into(), + ) + } CL_DEVICE_TYPE => cl_prop::(dev.device_type(false)), CL_DEVICE_VENDOR => cl_prop(dev.screen().device_vendor()), CL_DEVICE_VENDOR_ID => cl_prop::(dev.vendor_id()), diff --git a/src/gallium/frontends/rusticl/api/icd.rs b/src/gallium/frontends/rusticl/api/icd.rs index f493748ca85..3add76166b1 100644 --- a/src/gallium/frontends/rusticl/api/icd.rs +++ b/src/gallium/frontends/rusticl/api/icd.rs @@ -1271,6 +1271,17 @@ extern "C" fn cl_get_extension_function_address( "clCreateProgramWithILKHR" => cl_create_program_with_il as *mut ::std::ffi::c_void, "clGetPlatformInfo" => cl_get_platform_info as *mut ::std::ffi::c_void, "clIcdGetPlatformIDsKHR" => cl_icd_get_platform_ids_khr as *mut ::std::ffi::c_void, + + // cl_arm_shared_virtual_memory + "clEnqueueSVMFreeARM" => cl_enqueue_svm_free_arm as *mut ::std::ffi::c_void, + "clEnqueueSVMMapARM" => cl_enqueue_svm_map_arm as *mut ::std::ffi::c_void, + "clEnqueueSVMMemcpyARM" => cl_enqueue_svm_memcpy_arm as *mut ::std::ffi::c_void, + "clEnqueueSVMMemFillARM" => cl_enqueue_svm_mem_fill_arm as *mut ::std::ffi::c_void, + "clEnqueueSVMUnmapARM" => cl_enqueue_svm_unmap_arm as *mut ::std::ffi::c_void, + "clSetKernelArgSVMPointerARM" => cl_set_kernel_arg_svm_pointer as *mut ::std::ffi::c_void, + "clSetKernelExecInfoARM" => cl_set_kernel_exec_info as *mut ::std::ffi::c_void, + "clSVMAllocARM" => cl_svm_alloc as *mut ::std::ffi::c_void, + "clSVMFreeARM" => cl_svm_free as *mut ::std::ffi::c_void, _ => ptr::null_mut(), } } @@ -1646,76 +1657,120 @@ extern "C" fn cl_get_pipe_info( } extern "C" fn cl_svm_alloc( - _context: cl_context, - _flags: cl_svm_mem_flags, - _size: usize, - _alignment: ::std::os::raw::c_uint, + context: cl_context, + flags: cl_svm_mem_flags, + size: usize, + alignment: ::std::os::raw::c_uint, ) -> *mut ::std::os::raw::c_void { - ptr::null_mut() + svm_alloc(context, flags, size, alignment).unwrap_or(ptr::null_mut()) } -extern "C" fn cl_svm_free(_context: cl_context, _svm_pointer: *mut ::std::os::raw::c_void) {} +extern "C" fn cl_svm_free(context: cl_context, svm_pointer: *mut ::std::os::raw::c_void) { + svm_free(context, svm_pointer).ok(); +} extern "C" fn cl_enqueue_svm_free( - _command_queue: cl_command_queue, - _num_svm_pointers: cl_uint, - _svm_pointers: *mut *mut ::std::os::raw::c_void, - _pfn_free_func: ::std::option::Option, - _user_data: *mut ::std::os::raw::c_void, - _num_events_in_wait_list: cl_uint, - _event_wait_list: *const cl_event, - _event: *mut cl_event, + command_queue: cl_command_queue, + num_svm_pointers: cl_uint, + svm_pointers: *mut *mut ::std::os::raw::c_void, + pfn_free_func: ::std::option::Option, + user_data: *mut ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, ) -> cl_int { - CL_INVALID_OPERATION + match_err!(enqueue_svm_free( + command_queue, + num_svm_pointers, + svm_pointers, + pfn_free_func, + user_data, + num_events_in_wait_list, + event_wait_list, + event + )) } extern "C" fn cl_enqueue_svm_memcpy( - _command_queue: cl_command_queue, - _blocking_copy: cl_bool, - _dst_ptr: *mut ::std::os::raw::c_void, - _src_ptr: *const ::std::os::raw::c_void, - _size: usize, - _num_events_in_wait_list: cl_uint, - _event_wait_list: *const cl_event, - _event: *mut cl_event, + command_queue: cl_command_queue, + blocking_copy: cl_bool, + dst_ptr: *mut ::std::os::raw::c_void, + src_ptr: *const ::std::os::raw::c_void, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, ) -> cl_int { - CL_INVALID_OPERATION + match_err!(enqueue_svm_memcpy( + command_queue, + blocking_copy, + dst_ptr, + src_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event + )) } extern "C" fn cl_enqueue_svm_mem_fill( - _command_queue: cl_command_queue, - _svm_ptr: *mut ::std::os::raw::c_void, - _pattern: *const ::std::os::raw::c_void, - _pattern_size: usize, - _size: usize, - _num_events_in_wait_list: cl_uint, - _event_wait_list: *const cl_event, - _event: *mut cl_event, + command_queue: cl_command_queue, + svm_ptr: *mut ::std::os::raw::c_void, + pattern: *const ::std::os::raw::c_void, + pattern_size: usize, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, ) -> cl_int { - CL_INVALID_OPERATION + match_err!(enqueue_svm_mem_fill( + command_queue, + svm_ptr, + pattern, + pattern_size, + size, + num_events_in_wait_list, + event_wait_list, + event + )) } extern "C" fn cl_enqueue_svm_map( - _command_queue: cl_command_queue, - _blocking_map: cl_bool, - _flags: cl_map_flags, - _svm_ptr: *mut ::std::os::raw::c_void, - _size: usize, - _num_events_in_wait_list: cl_uint, - _event_wait_list: *const cl_event, - _event: *mut cl_event, + command_queue: cl_command_queue, + blocking_map: cl_bool, + flags: cl_map_flags, + svm_ptr: *mut ::std::os::raw::c_void, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, ) -> cl_int { - CL_INVALID_OPERATION + match_err!(enqueue_svm_map( + command_queue, + blocking_map, + flags, + svm_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event + )) } extern "C" fn cl_enqueue_svm_unmap( - _command_queue: cl_command_queue, - _svm_ptr: *mut ::std::os::raw::c_void, - _num_events_in_wait_list: cl_uint, - _event_wait_list: *const cl_event, - _event: *mut cl_event, + command_queue: cl_command_queue, + svm_ptr: *mut ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, ) -> cl_int { - CL_INVALID_OPERATION + match_err!(enqueue_svm_unmap( + command_queue, + svm_ptr, + num_events_in_wait_list, + event_wait_list, + event + )) } extern "C" fn cl_create_sampler_with_properties( @@ -1730,20 +1785,25 @@ extern "C" fn cl_create_sampler_with_properties( } extern "C" fn cl_set_kernel_arg_svm_pointer( - _kernel: cl_kernel, - _arg_index: cl_uint, - _arg_value: *const ::std::os::raw::c_void, + kernel: cl_kernel, + arg_index: cl_uint, + arg_value: *const ::std::os::raw::c_void, ) -> cl_int { - CL_INVALID_OPERATION + match_err!(set_kernel_arg_svm_pointer(kernel, arg_index, arg_value)) } extern "C" fn cl_set_kernel_exec_info( - _kernel: cl_kernel, - _param_name: cl_kernel_exec_info, - _param_value_size: usize, - _param_value: *const ::std::os::raw::c_void, + kernel: cl_kernel, + param_name: cl_kernel_exec_info, + param_value_size: usize, + param_value: *const ::std::os::raw::c_void, ) -> cl_int { - CL_INVALID_OPERATION + match_err!(set_kernel_exec_info( + kernel, + param_name, + param_value_size, + param_value + )) } extern "C" fn cl_clone_kernel(source_kernel: cl_kernel, errcode_ret: *mut cl_int) -> cl_kernel { @@ -1760,16 +1820,25 @@ extern "C" fn cl_create_program_with_il( } extern "C" fn cl_enqueue_svm_migrate_mem( - _command_queue: cl_command_queue, - _num_svm_pointers: cl_uint, - _svm_pointers: *mut *const ::std::os::raw::c_void, - _sizes: *const usize, - _flags: cl_mem_migration_flags, - _num_events_in_wait_list: cl_uint, - _event_wait_list: *const cl_event, - _event: *mut cl_event, + command_queue: cl_command_queue, + num_svm_pointers: cl_uint, + svm_pointers: *mut *const ::std::os::raw::c_void, + sizes: *const usize, + flags: cl_mem_migration_flags, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, ) -> cl_int { - CL_INVALID_OPERATION + match_err!(enqueue_svm_migrate_mem( + command_queue, + num_svm_pointers, + svm_pointers, + sizes, + flags, + num_events_in_wait_list, + event_wait_list, + event + )) } extern "C" fn cl_get_device_and_host_timer( @@ -1892,3 +1961,108 @@ extern "C" fn cl_icd_get_platform_ids_khr( ) -> cl_int { match_err!(get_platform_ids(num_entries, platforms, num_platforms)) } + +// cl_arm_shared_virtual_memory +extern "C" fn cl_enqueue_svm_free_arm( + command_queue: cl_command_queue, + num_svm_pointers: cl_uint, + svm_pointers: *mut *mut ::std::os::raw::c_void, + pfn_free_func: ::std::option::Option, + user_data: *mut ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> cl_int { + match_err!(enqueue_svm_free_arm( + command_queue, + num_svm_pointers, + svm_pointers, + pfn_free_func, + user_data, + num_events_in_wait_list, + event_wait_list, + event + )) +} + +extern "C" fn cl_enqueue_svm_memcpy_arm( + command_queue: cl_command_queue, + blocking_copy: cl_bool, + dst_ptr: *mut ::std::os::raw::c_void, + src_ptr: *const ::std::os::raw::c_void, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> cl_int { + match_err!(enqueue_svm_memcpy_arm( + command_queue, + blocking_copy, + dst_ptr, + src_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event + )) +} + +extern "C" fn cl_enqueue_svm_mem_fill_arm( + command_queue: cl_command_queue, + svm_ptr: *mut ::std::os::raw::c_void, + pattern: *const ::std::os::raw::c_void, + pattern_size: usize, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> cl_int { + match_err!(enqueue_svm_mem_fill_arm( + command_queue, + svm_ptr, + pattern, + pattern_size, + size, + num_events_in_wait_list, + event_wait_list, + event + )) +} + +extern "C" fn cl_enqueue_svm_map_arm( + command_queue: cl_command_queue, + blocking_map: cl_bool, + flags: cl_map_flags, + svm_ptr: *mut ::std::os::raw::c_void, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> cl_int { + match_err!(enqueue_svm_map_arm( + command_queue, + blocking_map, + flags, + svm_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event + )) +} + +extern "C" fn cl_enqueue_svm_unmap_arm( + command_queue: cl_command_queue, + svm_ptr: *mut ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> cl_int { + match_err!(enqueue_svm_unmap_arm( + command_queue, + svm_ptr, + num_events_in_wait_list, + event_wait_list, + event + )) +} diff --git a/src/gallium/frontends/rusticl/api/kernel.rs b/src/gallium/frontends/rusticl/api/kernel.rs index 2cb7ef64b6d..523a6f24e74 100644 --- a/src/gallium/frontends/rusticl/api/kernel.rs +++ b/src/gallium/frontends/rusticl/api/kernel.rs @@ -11,6 +11,7 @@ use mesa_rust_util::string::*; use rusticl_opencl_gen::*; use std::collections::HashSet; +use std::mem; use std::os::raw::c_void; use std::ptr; use std::slice; @@ -314,6 +315,78 @@ pub fn set_kernel_arg( //• CL_MAX_SIZE_RESTRICTION_EXCEEDED if the size in bytes of the memory object (if the argument is a memory object) or arg_size (if the argument is declared with local qualifier) exceeds a language- specified maximum size restriction for this argument, such as the MaxByteOffset SPIR-V decoration. This error code is missing before version 2.2. } +pub fn set_kernel_arg_svm_pointer( + kernel: cl_kernel, + arg_index: cl_uint, + arg_value: *const ::std::os::raw::c_void, +) -> CLResult<()> { + let kernel = kernel.get_ref()?; + let arg_index = arg_index as usize; + let arg_value = arg_value as usize; + + if !kernel.has_svm_devs() { + return Err(CL_INVALID_OPERATION); + } + + if let Some(arg) = kernel.args.get(arg_index) { + if !matches!( + arg.kind, + KernelArgType::MemConstant | KernelArgType::MemGlobal + ) { + return Err(CL_INVALID_ARG_INDEX); + } + + let arg_value = KernelArgValue::Constant(arg_value.to_ne_bytes().to_vec()); + kernel.values[arg_index].replace(Some(arg_value)); + Ok(()) + } else { + Err(CL_INVALID_ARG_INDEX) + } + + // CL_INVALID_ARG_VALUE if arg_value specified is not a valid value. +} + +pub fn set_kernel_exec_info( + kernel: cl_kernel, + param_name: cl_kernel_exec_info, + param_value_size: usize, + param_value: *const ::std::os::raw::c_void, +) -> CLResult<()> { + let k = kernel.get_ref()?; + + // CL_INVALID_OPERATION if no devices in the context associated with kernel support SVM. + if !k.prog.devs.iter().any(|dev| dev.svm_supported()) { + return Err(CL_INVALID_OPERATION); + } + + // CL_INVALID_VALUE ... if param_value is NULL + if param_value.is_null() { + return Err(CL_INVALID_VALUE); + } + + // CL_INVALID_VALUE ... if the size specified by param_value_size is not valid. + match param_name { + CL_KERNEL_EXEC_INFO_SVM_PTRS | CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM => { + // it's a list of pointers + if param_value_size % mem::size_of::<*const c_void>() != 0 { + return Err(CL_INVALID_VALUE); + } + } + CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM + | CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM => { + if param_value_size != mem::size_of::() { + return Err(CL_INVALID_VALUE); + } + } + // CL_INVALID_VALUE if param_name is not valid + _ => return Err(CL_INVALID_VALUE), + } + + Ok(()) + + // CL_INVALID_OPERATION if param_name is CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM and param_value is CL_TRUE but no devices in context associated with kernel support fine-grain system SVM allocations. +} + pub fn enqueue_ndrange_kernel( command_queue: cl_command_queue, kernel: cl_kernel, diff --git a/src/gallium/frontends/rusticl/api/memory.rs b/src/gallium/frontends/rusticl/api/memory.rs index 8ceb38cc334..63d1d566aa2 100644 --- a/src/gallium/frontends/rusticl/api/memory.rs +++ b/src/gallium/frontends/rusticl/api/memory.rs @@ -4,6 +4,7 @@ use crate::api::event::create_and_queue; use crate::api::icd::*; use crate::api::types::*; use crate::api::util::*; +use crate::core::context::Context; use crate::core::device::*; use crate::core::format::*; use crate::core::memory::*; @@ -13,7 +14,10 @@ use mesa_rust_util::properties::Properties; use mesa_rust_util::ptr::*; use rusticl_opencl_gen::*; +use std::alloc; +use std::alloc::Layout; use std::cmp::Ordering; +use std::mem; use std::os::raw::c_void; use std::ptr; use std::slice; @@ -56,7 +60,7 @@ fn validate_mem_flags(flags: cl_mem_flags, images: bool) -> CLResult<()> { Ok(()) } -fn validate_map_flags(m: &Mem, map_flags: cl_mem_flags) -> CLResult<()> { +fn validate_map_flags_common(map_flags: cl_mem_flags) -> CLResult<()> { // CL_INVALID_VALUE ... if values specified in map_flags are not valid. let valid_flags = cl_bitfield::from(CL_MAP_READ | CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION); @@ -69,6 +73,12 @@ fn validate_map_flags(m: &Mem, map_flags: cl_mem_flags) -> CLResult<()> { return Err(CL_INVALID_VALUE); } + Ok(()) +} + +fn validate_map_flags(m: &Mem, map_flags: cl_mem_flags) -> CLResult<()> { + validate_map_flags_common(map_flags)?; + // CL_INVALID_OPERATION if buffer has been created with CL_MEM_HOST_WRITE_ONLY or // CL_MEM_HOST_NO_ACCESS and CL_MAP_READ is set in map_flags if bit_check(m.flags, CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS) && @@ -223,7 +233,9 @@ impl CLInfo for cl_mem { CL_MEM_REFERENCE_COUNT => cl_prop::(self.refcnt()?), CL_MEM_SIZE => cl_prop::(mem.size), CL_MEM_TYPE => cl_prop::(mem.mem_type), - CL_MEM_USES_SVM_POINTER => cl_prop::(CL_FALSE), + CL_MEM_USES_SVM_POINTER | CL_MEM_USES_SVM_POINTER_ARM => { + cl_prop::(mem.is_svm().into()) + } _ => return Err(CL_INVALID_VALUE), }) } @@ -2172,3 +2184,599 @@ impl CLInfo for cl_mem { Err(CL_INVALID_MEM_OBJECT) } } + +pub fn svm_alloc( + context: cl_context, + flags: cl_svm_mem_flags, + size: usize, + mut alignment: cl_uint, +) -> CLResult<*mut c_void> { + // clSVMAlloc will fail if + + // context is not a valid context + let c = context.get_ref()?; + + // or no devices in context support SVM. + if !c.has_svm_devs() { + return Err(CL_INVALID_OPERATION); + } + + // flags does not contain CL_MEM_SVM_FINE_GRAIN_BUFFER but does contain CL_MEM_SVM_ATOMICS. + if !bit_check(flags, CL_MEM_SVM_FINE_GRAIN_BUFFER) && bit_check(flags, CL_MEM_SVM_ATOMICS) { + return Err(CL_INVALID_VALUE); + } + + // size is 0 or > CL_DEVICE_MAX_MEM_ALLOC_SIZE value for any device in context. + if size == 0 || checked_compare(size, Ordering::Greater, c.max_mem_alloc()) { + return Err(CL_INVALID_VALUE); + } + + if alignment == 0 { + alignment = mem::size_of::<[u64; 16]>() as cl_uint; + } + + // alignment is not a power of two + if !alignment.is_power_of_two() { + return Err(CL_INVALID_VALUE); + } + + let layout; + let ptr; + + // SAFETY: we already verify the parameters to from_size_align above and layout is of non zero + // size + unsafe { + layout = Layout::from_size_align_unchecked(size, alignment as usize); + ptr = alloc::alloc(layout); + } + + if ptr.is_null() { + return Err(CL_OUT_OF_HOST_MEMORY); + } + + c.add_svm_ptr(ptr.cast(), layout); + Ok(ptr.cast()) + + // Values specified in flags do not follow rules described for supported values in the SVM Memory Flags table. + // CL_MEM_SVM_FINE_GRAIN_BUFFER or CL_MEM_SVM_ATOMICS is specified in flags and these are not supported by at least one device in context. + // The values specified in flags are not valid, i.e. don’t match those defined in the SVM Memory Flags table. + // the OpenCL implementation cannot support the specified alignment for at least one device in context. + // There was a failure to allocate resources. +} + +fn svm_free_impl(c: &Context, svm_pointer: *mut c_void) { + if let Some(layout) = c.remove_svm_ptr(svm_pointer) { + // SAFETY: we make sure that svm_pointer is a valid allocation and reuse the same layout + // from the allocation + unsafe { + alloc::dealloc(svm_pointer.cast(), layout); + } + } +} + +pub fn svm_free(context: cl_context, svm_pointer: *mut c_void) -> CLResult<()> { + let c = context.get_ref()?; + svm_free_impl(c, svm_pointer); + Ok(()) +} + +fn enqueue_svm_free_impl( + command_queue: cl_command_queue, + num_svm_pointers: cl_uint, + svm_pointers: *mut *mut c_void, + pfn_free_func: Option, + user_data: *mut c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, + cmd_type: cl_command_type, +) -> CLResult<()> { + let q = command_queue.get_arc()?; + let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; + + // CL_INVALID_VALUE if num_svm_pointers is 0 and svm_pointers is non-NULL, or if svm_pointers is + // NULL and num_svm_pointers is not 0. + if num_svm_pointers == 0 && !svm_pointers.is_null() + || num_svm_pointers != 0 && svm_pointers.is_null() + { + return Err(CL_INVALID_VALUE); + } + + // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. + if !q.device.svm_supported() { + return Err(CL_INVALID_OPERATION); + } + + create_and_queue( + q, + cmd_type, + evs, + event, + false, + Box::new(move |q, _| { + if let Some(cb) = pfn_free_func { + // SAFETY: it's undefined behavior if the application screws up + unsafe { + cb(command_queue, num_svm_pointers, svm_pointers, user_data); + } + } else { + // SAFETY: num_svm_pointers specifies the amount of elements in svm_pointers + let svm_pointers = + unsafe { slice::from_raw_parts(svm_pointers, num_svm_pointers as usize) }; + for &ptr in svm_pointers { + svm_free_impl(&q.context, ptr); + } + } + + Ok(()) + }), + ) +} + +pub fn enqueue_svm_free( + command_queue: cl_command_queue, + num_svm_pointers: cl_uint, + svm_pointers: *mut *mut c_void, + pfn_free_func: Option, + user_data: *mut c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + enqueue_svm_free_impl( + command_queue, + num_svm_pointers, + svm_pointers, + pfn_free_func, + user_data, + num_events_in_wait_list, + event_wait_list, + event, + CL_COMMAND_SVM_FREE, + ) +} + +pub fn enqueue_svm_free_arm( + command_queue: cl_command_queue, + num_svm_pointers: cl_uint, + svm_pointers: *mut *mut c_void, + pfn_free_func: Option, + user_data: *mut c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + enqueue_svm_free_impl( + command_queue, + num_svm_pointers, + svm_pointers, + pfn_free_func, + user_data, + num_events_in_wait_list, + event_wait_list, + event, + CL_COMMAND_SVM_FREE_ARM, + ) +} + +fn enqueue_svm_memcpy_impl( + command_queue: cl_command_queue, + blocking_copy: cl_bool, + dst_ptr: *mut c_void, + src_ptr: *const c_void, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, + cmd_type: cl_command_type, +) -> CLResult<()> { + let q = command_queue.get_arc()?; + let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; + let block = check_cl_bool(blocking_copy).ok_or(CL_INVALID_VALUE)?; + + // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. + if !q.device.svm_supported() { + return Err(CL_INVALID_OPERATION); + } + + // CL_INVALID_VALUE if dst_ptr or src_ptr is NULL. + if dst_ptr.is_null() || src_ptr.is_null() { + return Err(CL_INVALID_VALUE); + } + + // CL_MEM_COPY_OVERLAP if the values specified for dst_ptr, src_ptr and size result in an + // overlapping copy. + let dst_ptr_addr = dst_ptr as usize; + let src_ptr_addr = src_ptr as usize; + if (src_ptr_addr <= dst_ptr_addr && dst_ptr_addr < src_ptr_addr + size) + || (dst_ptr_addr <= src_ptr_addr && src_ptr_addr < dst_ptr_addr + size) + { + return Err(CL_MEM_COPY_OVERLAP); + } + + create_and_queue( + q, + cmd_type, + evs, + event, + block, + Box::new(move |_, _| { + // SAFETY: We check for overlapping copies already and alignment doesn't matter for void + // pointers. And we also trust applications to provide properly allocated memory regions + // and if not it's all undefined anyway. + unsafe { + ptr::copy_nonoverlapping(src_ptr, dst_ptr, size); + } + Ok(()) + }), + ) +} + +pub fn enqueue_svm_memcpy( + command_queue: cl_command_queue, + blocking_copy: cl_bool, + dst_ptr: *mut c_void, + src_ptr: *const c_void, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + enqueue_svm_memcpy_impl( + command_queue, + blocking_copy, + dst_ptr, + src_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event, + CL_COMMAND_SVM_MEMCPY, + ) +} + +pub fn enqueue_svm_memcpy_arm( + command_queue: cl_command_queue, + blocking_copy: cl_bool, + dst_ptr: *mut c_void, + src_ptr: *const c_void, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + enqueue_svm_memcpy_impl( + command_queue, + blocking_copy, + dst_ptr, + src_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event, + CL_COMMAND_SVM_MEMCPY_ARM, + ) +} + +fn enqueue_svm_mem_fill_impl( + command_queue: cl_command_queue, + svm_ptr: *mut ::std::os::raw::c_void, + pattern: *const ::std::os::raw::c_void, + pattern_size: usize, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, + cmd_type: cl_command_type, +) -> CLResult<()> { + let q = command_queue.get_arc()?; + let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; + let svm_ptr_addr = svm_ptr as usize; + + // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. + if !q.device.svm_supported() { + return Err(CL_INVALID_OPERATION); + } + + // CL_INVALID_VALUE if svm_ptr is NULL. + if svm_ptr.is_null() { + return Err(CL_INVALID_VALUE); + } + + // CL_INVALID_VALUE if svm_ptr is not aligned to pattern_size bytes. + if svm_ptr_addr & (pattern_size - 1) != 0 { + return Err(CL_INVALID_VALUE); + } + + // CL_INVALID_VALUE if pattern is NULL or if pattern_size is 0 or if pattern_size is not one of + // {1, 2, 4, 8, 16, 32, 64, 128}. + if pattern.is_null() + || pattern_size == 0 + || !pattern_size.is_power_of_two() + || pattern_size > 128 + { + return Err(CL_INVALID_VALUE); + } + + // CL_INVALID_VALUE if size is not a multiple of pattern_size. + if size % pattern_size != 0 { + return Err(CL_INVALID_VALUE); + } + + create_and_queue( + q, + cmd_type, + evs, + event, + false, + Box::new(move |_, _| { + let mut offset = 0; + while offset < size { + // SAFETY: pointer are either valid or undefined behavior + unsafe { + ptr::copy(pattern, svm_ptr.add(offset), pattern_size); + } + offset += pattern_size; + } + + Ok(()) + }), + ) +} + +pub fn enqueue_svm_mem_fill( + command_queue: cl_command_queue, + svm_ptr: *mut ::std::os::raw::c_void, + pattern: *const ::std::os::raw::c_void, + pattern_size: usize, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + enqueue_svm_mem_fill_impl( + command_queue, + svm_ptr, + pattern, + pattern_size, + size, + num_events_in_wait_list, + event_wait_list, + event, + CL_COMMAND_SVM_MEMFILL, + ) +} + +pub fn enqueue_svm_mem_fill_arm( + command_queue: cl_command_queue, + svm_ptr: *mut ::std::os::raw::c_void, + pattern: *const ::std::os::raw::c_void, + pattern_size: usize, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + enqueue_svm_mem_fill_impl( + command_queue, + svm_ptr, + pattern, + pattern_size, + size, + num_events_in_wait_list, + event_wait_list, + event, + CL_COMMAND_SVM_MEMFILL_ARM, + ) +} + +fn enqueue_svm_map_impl( + command_queue: cl_command_queue, + blocking_map: cl_bool, + flags: cl_map_flags, + svm_ptr: *mut ::std::os::raw::c_void, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, + cmd_type: cl_command_type, +) -> CLResult<()> { + let q = command_queue.get_arc()?; + let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; + let block = check_cl_bool(blocking_map).ok_or(CL_INVALID_VALUE)?; + + // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. + if !q.device.svm_supported() { + return Err(CL_INVALID_OPERATION); + } + + // CL_INVALID_VALUE if svm_ptr is NULL. + if svm_ptr.is_null() { + return Err(CL_INVALID_VALUE); + } + + // CL_INVALID_VALUE if size is 0 ... + if size == 0 { + return Err(CL_INVALID_VALUE); + } + + // ... or if values specified in map_flags are not valid. + validate_map_flags_common(flags)?; + + create_and_queue(q, cmd_type, evs, event, block, Box::new(|_, _| Ok(()))) +} + +pub fn enqueue_svm_map( + command_queue: cl_command_queue, + blocking_map: cl_bool, + flags: cl_map_flags, + svm_ptr: *mut ::std::os::raw::c_void, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + enqueue_svm_map_impl( + command_queue, + blocking_map, + flags, + svm_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event, + CL_COMMAND_SVM_MAP, + ) +} + +pub fn enqueue_svm_map_arm( + command_queue: cl_command_queue, + blocking_map: cl_bool, + flags: cl_map_flags, + svm_ptr: *mut ::std::os::raw::c_void, + size: usize, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + enqueue_svm_map_impl( + command_queue, + blocking_map, + flags, + svm_ptr, + size, + num_events_in_wait_list, + event_wait_list, + event, + CL_COMMAND_SVM_MAP_ARM, + ) +} + +fn enqueue_svm_unmap_impl( + command_queue: cl_command_queue, + svm_ptr: *mut ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, + cmd_type: cl_command_type, +) -> CLResult<()> { + let q = command_queue.get_arc()?; + let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; + + // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. + if !q.device.svm_supported() { + return Err(CL_INVALID_OPERATION); + } + + // CL_INVALID_VALUE if svm_ptr is NULL. + if svm_ptr.is_null() { + return Err(CL_INVALID_VALUE); + } + + create_and_queue(q, cmd_type, evs, event, false, Box::new(|_, _| Ok(()))) +} + +pub fn enqueue_svm_unmap( + command_queue: cl_command_queue, + svm_ptr: *mut ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + enqueue_svm_unmap_impl( + command_queue, + svm_ptr, + num_events_in_wait_list, + event_wait_list, + event, + CL_COMMAND_SVM_UNMAP, + ) +} + +pub fn enqueue_svm_unmap_arm( + command_queue: cl_command_queue, + svm_ptr: *mut ::std::os::raw::c_void, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + enqueue_svm_unmap_impl( + command_queue, + svm_ptr, + num_events_in_wait_list, + event_wait_list, + event, + CL_COMMAND_SVM_UNMAP_ARM, + ) +} + +pub fn enqueue_svm_migrate_mem( + command_queue: cl_command_queue, + num_svm_pointers: cl_uint, + svm_pointers: *mut *const ::std::os::raw::c_void, + sizes: *const usize, + flags: cl_mem_migration_flags, + num_events_in_wait_list: cl_uint, + event_wait_list: *const cl_event, + event: *mut cl_event, +) -> CLResult<()> { + let q = command_queue.get_arc()?; + let evs = event_list_from_cl(&q, num_events_in_wait_list, event_wait_list)?; + + // CL_INVALID_OPERATION if the device associated with command queue does not support SVM. + if !q.device.svm_supported() { + return Err(CL_INVALID_OPERATION); + } + + // CL_INVALID_VALUE if num_svm_pointers is zero or svm_pointers is NULL. + if num_svm_pointers == 0 || svm_pointers.is_null() { + return Err(CL_INVALID_VALUE); + } + + let num_svm_pointers = num_svm_pointers as usize; + // SAFETY: Just hoping the application is alright. + let mut svm_pointers = + unsafe { slice::from_raw_parts(svm_pointers, num_svm_pointers) }.to_owned(); + // if sizes is NULL, every allocation containing the pointers need to be migrated + let mut sizes = if sizes.is_null() { + vec![0; num_svm_pointers] + } else { + unsafe { slice::from_raw_parts(sizes, num_svm_pointers) }.to_owned() + }; + + // CL_INVALID_VALUE if sizes[i] is non-zero range [svm_pointers[i], svm_pointers[i]+sizes[i]) is + // not contained within an existing clSVMAlloc allocation. + for (ptr, size) in svm_pointers.iter_mut().zip(&mut sizes) { + if let Some((alloc, layout)) = q.context.find_svm_alloc(ptr.cast()) { + let ptr_addr = *ptr as usize; + let alloc_addr = alloc as usize; + + // if the offset + size is bigger than the allocation we are out of bounds + if (ptr_addr - alloc_addr) + *size <= layout.size() { + // if the size is 0, the entire allocation should be migrated + if *size == 0 { + *ptr = alloc.cast(); + *size = layout.size(); + } + continue; + } + } + + return Err(CL_INVALID_VALUE); + } + + let to_device = !bit_check(flags, CL_MIGRATE_MEM_OBJECT_HOST); + let content_undefined = bit_check(flags, CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED); + + create_and_queue( + q, + CL_COMMAND_SVM_MIGRATE_MEM, + evs, + event, + false, + Box::new(move |_, ctx| { + ctx.svm_migrate(&svm_pointers, &sizes, to_device, content_undefined); + Ok(()) + }), + ) +} diff --git a/src/gallium/frontends/rusticl/core/context.rs b/src/gallium/frontends/rusticl/core/context.rs index 7f9245fce68..3f1dbdc78aa 100644 --- a/src/gallium/frontends/rusticl/core/context.rs +++ b/src/gallium/frontends/rusticl/core/context.rs @@ -10,6 +10,8 @@ use mesa_rust::pipe::screen::ResourceType; use mesa_rust_util::properties::Properties; use rusticl_opencl_gen::*; +use std::alloc::Layout; +use std::collections::BTreeMap; use std::collections::HashMap; use std::convert::TryInto; use std::os::raw::c_void; @@ -21,6 +23,7 @@ pub struct Context { pub devs: Vec>, pub properties: Properties, pub dtors: Mutex>>, + pub svm_ptrs: Mutex>, } impl_cl_type_trait!(cl_context, Context, CL_INVALID_CONTEXT); @@ -35,6 +38,7 @@ impl Context { devs: devs, properties: properties, dtors: Mutex::new(Vec::new()), + svm_ptrs: Mutex::new(BTreeMap::new()), }) } @@ -150,6 +154,33 @@ impl Context { .min() .unwrap() } + + pub fn has_svm_devs(&self) -> bool { + self.devs.iter().any(|dev| dev.svm_supported()) + } + + pub fn add_svm_ptr(&self, ptr: *mut c_void, layout: Layout) { + self.svm_ptrs.lock().unwrap().insert(ptr, layout); + } + + pub fn find_svm_alloc(&self, ptr: *const c_void) -> Option<(*const c_void, Layout)> { + let lock = self.svm_ptrs.lock().unwrap(); + if let Some((&base, layout)) = lock.range(..=ptr).next_back() { + // SAFETY: we really just do some pointer math here... + unsafe { + // we check if ptr is within [base..base+size) + // means we can check if ptr - (base + size) < 0 + if ptr.offset_from(base.add(layout.size())) < 0 { + return Some((base, *layout)); + } + } + } + None + } + + pub fn remove_svm_ptr(&self, ptr: *const c_void) -> Option { + self.svm_ptrs.lock().unwrap().remove(&ptr) + } } impl Drop for Context { diff --git a/src/gallium/frontends/rusticl/core/device.rs b/src/gallium/frontends/rusticl/core/device.rs index f2450c89ef9..b3f6aeefe06 100644 --- a/src/gallium/frontends/rusticl/core/device.rs +++ b/src/gallium/frontends/rusticl/core/device.rs @@ -524,6 +524,10 @@ impl Device { } } + if self.svm_supported() { + add_ext(1, 0, 0, "cl_arm_shared_virtual_memory", ""); + } + self.extensions = exts; self.clc_features = feats; self.extension_string = exts_str.join(" "); @@ -748,6 +752,10 @@ impl Device { ) } + pub fn svm_supported(&self) -> bool { + self.screen.param(pipe_cap::PIPE_CAP_SYSTEM_SVM) == 1 + } + pub fn unified_memory(&self) -> bool { self.screen.param(pipe_cap::PIPE_CAP_UMA) == 1 } diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index e1ce6f0d4bc..ccc19030d87 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -1245,6 +1245,10 @@ impl Kernel { // TODO include args self.dev_state.get(dev).nir.shared_size() as cl_ulong } + + pub fn has_svm_devs(&self) -> bool { + self.prog.devs.iter().any(|dev| dev.svm_supported()) + } } impl Clone for Kernel { diff --git a/src/gallium/frontends/rusticl/core/memory.rs b/src/gallium/frontends/rusticl/core/memory.rs index d8e2a413e09..430c2cc0bef 100644 --- a/src/gallium/frontends/rusticl/core/memory.rs +++ b/src/gallium/frontends/rusticl/core/memory.rs @@ -602,6 +602,14 @@ impl Mem { self.is_parent_buffer() && self.mem_type == CL_MEM_OBJECT_IMAGE2D } + // this is kinda bogus, because that won't work with system SVM, but the spec wants us to + // implement this. + pub fn is_svm(&self) -> bool { + let mem = self.get_parent(); + self.context.find_svm_alloc(mem.host_ptr.cast()).is_some() + && bit_check(mem.flags, CL_MEM_USE_HOST_PTR) + } + fn get_res(&self) -> CLResult<&HashMap, Arc>> { self.get_parent().res.as_ref().ok_or(CL_OUT_OF_HOST_MEMORY) } diff --git a/src/gallium/frontends/rusticl/mesa/pipe/context.rs b/src/gallium/frontends/rusticl/mesa/pipe/context.rs index abd234bb0b9..96c43e95805 100644 --- a/src/gallium/frontends/rusticl/mesa/pipe/context.rs +++ b/src/gallium/frontends/rusticl/mesa/pipe/context.rs @@ -513,6 +513,30 @@ impl PipeContext { PipeFence::new(fence, &self.screen) } } + + pub fn svm_migrate( + &self, + ptrs: &[*const c_void], + sizes: &[usize], + to_device: bool, + content_undefined: bool, + ) { + assert_eq!(ptrs.len(), sizes.len()); + unsafe { + if let Some(cb) = self.pipe.as_ref().svm_migrate { + cb( + self.pipe.as_ptr(), + ptrs.len() as u32, + ptrs.as_ptr(), + sizes.as_ptr(), + to_device, + content_undefined, + ); + } else { + panic!("svm_migrate not implemented but called!"); + } + } + } } impl Drop for PipeContext {