From 2ac657d2d0e6cb93070f8fdd0d3e3f313bb72551 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Fri, 29 Apr 2022 14:02:57 +0200 Subject: [PATCH] rusticl/kernel: add support for offsets Signed-off-by: Karol Herbst Acked-by: Alyssa Rosenzweig Part-of: --- src/gallium/frontends/rusticl/core/kernel.rs | 40 +++++++++++++++---- .../frontends/rusticl/mesa/pipe/context.rs | 11 +---- src/gallium/frontends/rusticl/rusticl_nir.c | 2 + src/gallium/frontends/rusticl/rusticl_nir.h | 1 + 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/gallium/frontends/rusticl/core/kernel.rs b/src/gallium/frontends/rusticl/core/kernel.rs index e3a03e94482..3eb6314cb36 100644 --- a/src/gallium/frontends/rusticl/core/kernel.rs +++ b/src/gallium/frontends/rusticl/core/kernel.rs @@ -3,6 +3,7 @@ extern crate mesa_rust_gen; extern crate rusticl_opencl_gen; use crate::api::icd::*; +use crate::api::util::cl_prop; use crate::core::device::*; use crate::core::event::*; use crate::core::memory::*; @@ -43,6 +44,7 @@ pub enum KernelArgType { #[derive(Hash, PartialEq, Eq)] pub enum InternalKernelArgType { ConstantBuffer, + GlobalWorkOffsets, } pub struct KernelArg { @@ -138,7 +140,11 @@ pub struct Kernel { impl_cl_type_trait!(cl_kernel, Kernel, CL_INVALID_KERNEL); -fn create_kernel_arr(vals: &[usize], val: u32) -> [u32; 3] { +fn create_kernel_arr(vals: &[usize], val: T) -> [T; 3] +where + T: std::convert::TryFrom + Copy, + >::Error: std::fmt::Debug, +{ let mut res = [val; 3]; for (i, v) in vals.iter().enumerate() { res[i] = (*v).try_into().expect("64 bit work groups not supported"); @@ -247,6 +253,18 @@ fn lower_and_optimize_nir_late( nir.extract_constant_initializers(); // TODO printf // TODO 32 bit devices + // add vars for global offsets + res.push(InternalKernelArg { + kind: InternalKernelArgType::GlobalWorkOffsets, + offset: 0, + size: 24, + }); + lower_state.base_global_invoc_id = nir.add_var( + nir_variable_mode::nir_var_uniform, + unsafe { glsl_vector_type(glsl_base_type::GLSL_TYPE_UINT64, 3) }, + args + res.len() - 1, + "base_global_invocation_id", + ); if nir.has_constant() { res.push(InternalKernelArg { kind: InternalKernelArgType::ConstantBuffer, @@ -274,6 +292,10 @@ fn lower_and_optimize_nir_late( nir_variable_mode::nir_var_mem_global | nir_variable_mode::nir_var_mem_constant, nir_address_format::nir_address_format_64bit_global, ); + nir.pass0(nir_lower_system_values); + let mut compute_options = nir_lower_compute_system_values_options::default(); + compute_options.set_has_base_global_invocation_id(true); + nir.pass1(nir_lower_compute_system_values, &compute_options); nir.pass1(rusticl_lower_intrinsics, &mut lower_state); nir.pass2( nir_lower_explicit_io, @@ -282,9 +304,6 @@ fn lower_and_optimize_nir_late( | nir_variable_mode::nir_var_uniform, nir_address_format::nir_address_format_32bit_offset_as_64bit, ); - nir.pass0(nir_lower_system_values); - let compute_options = nir_lower_compute_system_values_options::default(); - nir.pass1(nir_lower_compute_system_values, &compute_options); nir.pass0(nir_opt_deref); nir.pass0(nir_lower_vars_to_ssa); @@ -358,9 +377,9 @@ impl Kernel { offsets: &[usize], ) -> EventSig { let nir = self.nirs.get(&q.device).unwrap(); - let mut block = create_kernel_arr(block, 1); - let mut grid = create_kernel_arr(grid, 1); - let offsets = create_kernel_arr(offsets, 0); + let mut block = create_kernel_arr::(block, 1); + let mut grid = create_kernel_arr::(grid, 1); + let offsets = create_kernel_arr::(offsets, 0); let mut input: Vec = Vec::new(); let mut resource_info = Vec::new(); let mut local_size: u32 = nir.shared_size(); @@ -377,6 +396,7 @@ impl Kernel { if arg.dead { continue; } + input.append(&mut vec![0; arg.offset - input.len()]); match val.borrow().as_ref().unwrap() { KernelArgValue::Constant(c) => input.extend_from_slice(c), KernelArgValue::MemObject(mem) => { @@ -400,6 +420,7 @@ impl Kernel { } for arg in &self.internal_args { + input.append(&mut vec![0; arg.offset - input.len()]); match arg.kind { InternalKernelArgType::ConstantBuffer => { input.extend_from_slice(&[0; 8]); @@ -418,6 +439,9 @@ impl Kernel { ); resource_info.push((Some(res), arg.offset)); } + InternalKernelArgType::GlobalWorkOffsets => { + input.extend_from_slice(&cl_prop::<[u64; 3]>(offsets)); + } } } @@ -438,7 +462,7 @@ impl Kernel { ctx.bind_compute_state(cso); ctx.set_global_binding(resources.as_slice(), &mut globals); - ctx.launch_grid(work_dim, block, grid, offsets, &input); + ctx.launch_grid(work_dim, block, grid, &input); ctx.clear_global_binding(globals.len() as u32); ctx.delete_compute_state(cso); ctx.memory_barrier(PIPE_BARRIER_GLOBAL_BUFFER); diff --git a/src/gallium/frontends/rusticl/mesa/pipe/context.rs b/src/gallium/frontends/rusticl/mesa/pipe/context.rs index 42e68c6e28f..bdcb898f980 100644 --- a/src/gallium/frontends/rusticl/mesa/pipe/context.rs +++ b/src/gallium/frontends/rusticl/mesa/pipe/context.rs @@ -171,14 +171,7 @@ impl PipeContext { unsafe { self.pipe.as_ref().delete_compute_state.unwrap()(self.pipe.as_ptr(), state) } } - pub fn launch_grid( - &self, - work_dim: u32, - block: [u32; 3], - grid: [u32; 3], - grid_base: [u32; 3], - input: &[u8], - ) { + pub fn launch_grid(&self, work_dim: u32, block: [u32; 3], grid: [u32; 3], input: &[u8]) { let info = pipe_grid_info { pc: 0, input: input.as_ptr().cast(), @@ -186,7 +179,7 @@ impl PipeContext { block: block, last_block: [0; 3], grid: grid, - grid_base: grid_base, + grid_base: [0; 3], indirect: ptr::null_mut(), indirect_offset: 0, }; diff --git a/src/gallium/frontends/rusticl/rusticl_nir.c b/src/gallium/frontends/rusticl/rusticl_nir.c index 5f7b978a1aa..973b2151e91 100644 --- a/src/gallium/frontends/rusticl/rusticl_nir.c +++ b/src/gallium/frontends/rusticl/rusticl_nir.c @@ -19,6 +19,8 @@ rusticl_lower_intrinsics_instr( struct rusticl_lower_state *state = _state; switch (intrinsic->intrinsic) { + case nir_intrinsic_load_base_global_invocation_id: + return nir_load_var(b, state->base_global_invoc_id); case nir_intrinsic_load_constant_base_ptr: return nir_load_var(b, state->const_buf); default: diff --git a/src/gallium/frontends/rusticl/rusticl_nir.h b/src/gallium/frontends/rusticl/rusticl_nir.h index 9121c25043c..f34e91dd248 100644 --- a/src/gallium/frontends/rusticl/rusticl_nir.h +++ b/src/gallium/frontends/rusticl/rusticl_nir.h @@ -1,4 +1,5 @@ struct rusticl_lower_state { + nir_variable *base_global_invoc_id; nir_variable *const_buf; };