rusticl/kernel: rework KernelExecBuilder::get_resources_and_globals to reduce allocations

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36917>
This commit is contained in:
Karol Herbst
2025-08-21 18:32:20 +02:00
committed by Marge Bot
parent 0b80bf3ee0
commit 0478d83eae
+18 -15
View File
@@ -1251,22 +1251,25 @@ fn extract<'a, const S: usize>(buf: &'a mut &[u8]) -> &'a [u8; S] {
struct KernelExecBuilder<'a> {
dev: &'static Device,
input: Vec<u8>,
resource_info: Vec<(&'a PipeResource, usize)>,
resources: Vec<&'a PipeResource>,
resource_offsets: Vec<usize>,
workgroup_id_offset_loc: Option<usize>,
}
impl<'a> KernelExecBuilder<'a> {
fn new(dev: &'static Device, input_size: u32) -> Self {
fn new(dev: &'static Device, input_size: u32, num_globals: usize) -> Self {
Self {
dev: dev,
input: Vec::with_capacity(input_size as usize),
resource_info: Vec::new(),
resources: Vec::with_capacity(num_globals),
resource_offsets: Vec::with_capacity(num_globals),
workgroup_id_offset_loc: None,
}
}
fn add_global(&mut self, res: &'a PipeResourceOwned, offset: usize) {
self.resource_info.push((res.borrow(), self.input.len()));
self.resources.push(res.borrow());
self.resource_offsets.push(self.input.len());
self.add_pointer(offset as u64);
}
@@ -1304,15 +1307,14 @@ impl<'a> KernelExecBuilder<'a> {
/// This returns a list of the tracked resources and their pointer into the managed kernel input
/// buffer. Before using the input buffer the locations need to be filled with the gpu addresses
/// of the resources.
fn get_resources_and_globals(&mut self) -> (Vec<&PipeResource>, Vec<*mut u32>) {
let mut resources = Vec::with_capacity(self.resource_info.len());
let mut globals: Vec<*mut u32> = Vec::with_capacity(self.resource_info.len());
for &(res, offset) in &self.resource_info {
resources.push(res);
globals.push(unsafe { self.input.as_mut_ptr().byte_add(offset) }.cast());
}
fn get_resources_and_globals<'s>(&'s mut self) -> (&'s mut [&'a PipeResource], Vec<*mut u32>) {
let globals = self
.resource_offsets
.iter()
.map(|&offset| unsafe { self.input.as_mut_ptr().byte_add(offset) }.cast())
.collect();
(resources, globals)
(&mut self.resources, globals)
}
fn input(&self) -> &[u8] {
@@ -1487,7 +1489,8 @@ impl Kernel {
};
let nir_kernel_build = &nir_kernel_builds[variant];
let mut exec_builder = KernelExecBuilder::new(ctx.dev, nir_kernel_build.input_size);
let mut exec_builder =
KernelExecBuilder::new(ctx.dev, nir_kernel_build.input_size, buffer_arcs.len() + 2);
// Set it once so we get the alignment padding right
let static_local_size: u64 = nir_kernel_build.shared_size;
let mut variable_local_size: u64 = static_local_size;
@@ -1711,13 +1714,13 @@ impl Kernel {
// subtract the shader local_size as we only request something on top of that.
variable_local_size -= static_local_size;
let (mut resources, mut globals) = exec_builder.get_resources_and_globals();
let (resources, mut globals) = exec_builder.get_resources_and_globals();
ctx.bind_kernel(&nir_kernel_builds, variant)?;
ctx.bind_sampler_states(samplers);
ctx.bind_sampler_views(sviews);
ctx.bind_shader_images(&iviews);
ctx.set_global_binding(resources.as_mut_slice(), &mut globals);
ctx.set_global_binding(resources, &mut globals);
for z in 0..grid[2].div_ceil(hw_max_grid[2]) {
for y in 0..grid[1].div_ceil(hw_max_grid[1]) {