diff --git a/src/gallium/frontends/rusticl/core/memory.rs b/src/gallium/frontends/rusticl/core/memory.rs index eef9be0de8f..82b54b53c04 100644 --- a/src/gallium/frontends/rusticl/core/memory.rs +++ b/src/gallium/frontends/rusticl/core/memory.rs @@ -9,6 +9,7 @@ use crate::core::queue::*; use crate::core::util::*; use crate::impl_cl_type_trait; use crate::impl_cl_type_trait_base; +use crate::perf_warning; use mesa_rust::pipe::context::*; use mesa_rust::pipe::resource::*; @@ -789,6 +790,8 @@ impl Buffer { CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]); let tx_dst = dst.tx(q, ctx, offset, size, RWFlags::WR)?; + perf_warning!("clEnqueueCopyBufferRect stalls the GPU"); + // TODO check to use hw accelerated paths (e.g. resource_copy_region or blits) sw_copy( tx_src.ptr(), @@ -877,6 +880,8 @@ impl Buffer { debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0); debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0); + perf_warning!("clEnqueueCopyBufferToImage stalls the GPU"); + sw_copy( tx_src.ptr(), tx_dst.ptr(), @@ -938,6 +943,8 @@ impl Buffer { let ptr = ptr.as_ptr(); let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?; + perf_warning!("clEnqueueReadBuffer and clEnqueueMapBuffer stall the GPU"); + unsafe { ptr::copy(tx.ptr(), ptr, size); } @@ -963,6 +970,8 @@ impl Buffer { CLVec::calc_offset_size(src_origin, region, [1, src_row_pitch, src_slice_pitch]); let tx = self.tx(q, ctx, offset, size, RWFlags::RD)?; + perf_warning!("clEnqueueReadBufferRect stalls the GPU"); + sw_copy( tx.ptr(), dst, @@ -1042,6 +1051,9 @@ impl Buffer { let ptr = ptr.as_ptr(); let offset = self.apply_offset(offset)?; let r = self.get_res_of_dev(q.device)?; + + perf_warning!("clEnqueueWriteBuffer and clEnqueueUnmapMemObject might stall the GPU"); + ctx.buffer_subdata( r, offset.try_into().map_err(|_| CL_OUT_OF_HOST_MEMORY)?, @@ -1069,6 +1081,8 @@ impl Buffer { CLVec::calc_offset_size(dst_origin, region, [1, dst_row_pitch, dst_slice_pitch]); let tx = self.tx(q, ctx, offset, size, RWFlags::WR)?; + perf_warning!("clEnqueueWriteBufferRect stalls the GPU"); + sw_copy( src, tx.ptr(), @@ -1130,6 +1144,8 @@ impl Image { debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0); debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0); + perf_warning!("clEnqueueCopyImageToBuffer stalls the GPU"); + sw_copy( tx_src.ptr(), tx_dst.ptr(), @@ -1213,6 +1229,10 @@ impl Image { debug_assert!(src_pitch[0] != 0 && src_pitch[1] != 0 && src_pitch[2] != 0); debug_assert!(dst_pitch[0] != 0 && dst_pitch[1] != 0 && dst_pitch[2] != 0); + perf_warning!( + "clEnqueueCopyImage stalls the GPU when src or dst are created from a buffer" + ); + sw_copy( tx_src.ptr(), tx_dst.ptr(), @@ -1375,6 +1395,8 @@ impl Image { src_slice_pitch = tx.slice_pitch(); }; + perf_warning!("clEnqueueReadImage and clEnqueueMapImage stall the GPU"); + sw_copy( tx.ptr(), dst, @@ -1470,6 +1492,9 @@ impl Image { let dst_row_pitch = self.image_desc.image_row_pitch; let dst_slice_pitch = self.image_desc.image_slice_pitch; + // texture_subdata most likely maps the resource anyway + perf_warning!("clEnqueueWriteImage and clEnqueueUnmapMemObject stall the GPU"); + if let Some(Mem::Buffer(buffer)) = &self.parent { let pixel_size = self.image_format.pixel_size().unwrap(); let (offset, size) = CLVec::calc_offset_size(