From c58534155213b74ee5a7dbf4bf628e5fe44566c2 Mon Sep 17 00:00:00 2001 From: Tim Van Patten Date: Fri, 29 Aug 2025 14:25:44 -0600 Subject: [PATCH] intel/ds: Skip expensive timestamp query until necessary The Xe ioctl DRM_XE_DEVICE_QUERY_ENGINE_CYCLES provides accurate timestamps correlated between the CPU and GPU. However, it is slow and impacts performance while collecting Perfetto traces. Instead, use Perfetto's GetBootTimeNs() to track when to emit the BUILTIN_CLOCK_BOOTTIME clock sync event so it only occurs every 1 second. This reduces the impact of recording gpu.renderstages from -8% to -4%. More concretely, FPS measurements when tracing Unity BoatAttack demo on an Intel ADL device: * gpu.renderstages disabled: 48.044293667 * gpu.renderstages enabled: 38.119778333 (-20.66%) * gpu.renderstages enabeled + this fix: 42.641818333 (-11.24%) Reviewed-by: Lionel Landwerlin Part-of: --- src/intel/ds/intel_driver_ds.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index c11160652fe..0eab9067d1f 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -127,6 +127,10 @@ sync_timestamp(IntelRenderpassDataSource::TraceContext &ctx, struct intel_ds_device *device) { uint64_t cpu_ts, gpu_ts; + uint64_t boottime = perfetto::base::GetBootTimeNs().count(); + + if (boottime < device->next_clock_sync_ns) + return; if (!intel_gem_read_correlate_cpu_gpu_timestamp(device->fd, device->info.kmd_type, @@ -141,13 +145,10 @@ sync_timestamp(IntelRenderpassDataSource::TraceContext &ctx, uint32_t cpu_clock_id = perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME; gpu_ts = intel_device_info_timebase_scale(&device->info, gpu_ts); - if (cpu_ts < device->next_clock_sync_ns) - return; - PERFETTO_LOG("sending clocks gpu=0x%08x", device->gpu_clock_id); device->sync_gpu_ts = gpu_ts; - device->next_clock_sync_ns = cpu_ts + 1000000000ull; + device->next_clock_sync_ns = boottime + 1000000000ull; MesaRenderpassDataSource::EmitClockSync(ctx, cpu_ts, gpu_ts, cpu_clock_id, device->gpu_clock_id);