From 3984875792fa00a93ac2863681238c4bd8069111 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 28 May 2024 17:33:28 +0300 Subject: [PATCH] u_trace: extend tracepoint end_of_pipe bit into flags We ran into an issue with Intel drivers where it became tricky to tell whether a timestamp must be recorded with a special end-of-pipe compute instruction or something else. We initially tried to deal with that internally by checking some state in the command buffers but turns out it doesn't work. This change adds a flag field to the tracepoint to have that information there and the flags are passed to the record_ts vfunc. Signed-off-by: Lionel Landwerlin Reviewed-by: Danylo Piliaiev Part-of: --- src/freedreno/vulkan/tu_device.cc | 2 +- .../drivers/freedreno/freedreno_context.c | 2 +- src/gallium/drivers/iris/iris_utrace.c | 8 ++++--- .../drivers/radeonsi/si_tracepoints.py | 4 +--- src/gallium/drivers/radeonsi/si_utrace.c | 4 ++-- src/intel/ds/intel_driver_ds.h | 13 +++++++++++ src/intel/ds/intel_tracepoints.py | 15 ++++++++++--- src/intel/vulkan/anv_utrace.c | 22 +++++++++++-------- src/intel/vulkan_hasvk/anv_utrace.c | 7 +++--- src/util/perf/u_trace.c | 3 +-- src/util/perf/u_trace.h | 2 +- src/util/perf/u_trace.py | 6 ++--- src/util/perf/u_trace_priv.h | 6 ++--- 13 files changed, 59 insertions(+), 35 deletions(-) diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 83a46ad0e65..eb815baacb7 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -1696,7 +1696,7 @@ tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) template static void tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps, - unsigned idx, bool end_of_pipe) + unsigned idx, uint32_t) { struct tu_bo *bo = (struct tu_bo *) timestamps; struct tu_cs *ts_cs = (struct tu_cs *) cs; diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c index e7f0981514b..500cff9178e 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.c +++ b/src/gallium/drivers/freedreno/freedreno_context.c @@ -500,7 +500,7 @@ fd_get_device_reset_status(struct pipe_context *pctx) static void fd_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps, - unsigned idx, bool end_of_pipe) + unsigned idx, uint32_t flags) { struct fd_batch *batch = container_of(ut, struct fd_batch, trace); struct fd_ringbuffer *ring = cs; diff --git a/src/gallium/drivers/iris/iris_utrace.c b/src/gallium/drivers/iris/iris_utrace.c index f320b6a5df0..477e620b68d 100644 --- a/src/gallium/drivers/iris/iris_utrace.c +++ b/src/gallium/drivers/iris/iris_utrace.c @@ -92,7 +92,7 @@ iris_utrace_delete_ts_buffer(struct u_trace_context *utctx, void *timestamps) static void iris_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps, unsigned idx, - bool end_of_pipe) + uint32_t flags) { struct iris_batch *batch = container_of(trace, struct iris_batch, trace); struct iris_context *ice = batch->ice; @@ -102,12 +102,14 @@ iris_utrace_record_ts(struct u_trace *trace, void *cs, iris_use_pinned_bo(batch, bo, true, IRIS_DOMAIN_NONE); const bool is_end_compute = - (cs == NULL && ice->utrace.last_compute_walker != NULL && end_of_pipe); + cs == NULL && + (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS); if (is_end_compute) { + assert(ice->utrace.last_compute_walker != NULL); batch->screen->vtbl.rewrite_compute_walker_pc( batch, ice->utrace.last_compute_walker, bo, ts_offset); ice->utrace.last_compute_walker = NULL; - } else if (end_of_pipe) { + } else if (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) { iris_emit_pipe_control_write(batch, "query: pipelined snapshot write", PIPE_CONTROL_WRITE_TIMESTAMP, bo, ts_offset, 0ull); diff --git a/src/gallium/drivers/radeonsi/si_tracepoints.py b/src/gallium/drivers/radeonsi/si_tracepoints.py index 00320b9c963..bb981889e3c 100644 --- a/src/gallium/drivers/radeonsi/si_tracepoints.py +++ b/src/gallium/drivers/radeonsi/si_tracepoints.py @@ -26,8 +26,7 @@ def define_tracepoints(args): def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None, - tp_default_enabled=True, end_pipelined=True, - need_cs_param=False): + tp_default_enabled=True, need_cs_param=False): global si_default_tps if tp_default_enabled: si_default_tps.append(name) @@ -41,7 +40,6 @@ def define_tracepoints(args): tp_struct=tp_struct, tp_perfetto='si_ds_end_{0}'.format(name), tp_print=tp_print, - end_of_pipe=end_pipelined, need_cs_param=need_cs_param) # Various draws/dispatch, radeonsi diff --git a/src/gallium/drivers/radeonsi/si_utrace.c b/src/gallium/drivers/radeonsi/si_utrace.c index 847863c37a9..eb45468588e 100644 --- a/src/gallium/drivers/radeonsi/si_utrace.c +++ b/src/gallium/drivers/radeonsi/si_utrace.c @@ -12,8 +12,8 @@ #include "util/hash_table.h" -static void si_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps, - unsigned idx, bool end_of_pipe) +static void si_utrace_record_ts(struct u_trace *trace, void *cs, void *timestamps, + unsigned idx, uint32_t flags) { struct si_context *ctx = container_of(trace, struct si_context, trace); struct pipe_resource *buffer = timestamps; diff --git a/src/intel/ds/intel_driver_ds.h b/src/intel/ds/intel_driver_ds.h index 50b0c5af0cb..505abb8eb53 100644 --- a/src/intel/ds/intel_driver_ds.h +++ b/src/intel/ds/intel_driver_ds.h @@ -61,6 +61,19 @@ enum intel_ds_stall_flag { INTEL_DS_CCS_CACHE_FLUSH_BIT = BITFIELD_BIT(16), }; +enum intel_ds_tracepoint_flags { + /** + * Whether the tracepoint's timestamp must be recorded with as an + * end-of-pipe timestamp. + */ + INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE = BITFIELD_BIT(0), + /** + * Whether this tracepoint's timestamp is recorded on the compute pipeline. + */ + INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS = BITFIELD_BIT(1), + +}; + /* Convert internal driver PIPE_CONTROL stall bits to intel_ds_stall_flag. */ typedef enum intel_ds_stall_flag (*intel_ds_stall_cb_t)(uint32_t flags); diff --git a/src/intel/ds/intel_tracepoints.py b/src/intel/ds/intel_tracepoints.py index 9ce5f503c0a..ac49f0e3459 100644 --- a/src/intel/ds/intel_tracepoints.py +++ b/src/intel/ds/intel_tracepoints.py @@ -46,6 +46,7 @@ def define_tracepoints(args): def begin_end_tp(name, tp_args=[], tp_struct=None, tp_print=None, tp_default_enabled=True, end_pipelined=True, + compute=False, need_cs_param=False): global intel_default_tps if tp_default_enabled: @@ -54,13 +55,19 @@ def define_tracepoints(args): toggle_name=name, tp_perfetto='intel_ds_begin_{0}'.format(name), need_cs_param=need_cs_param) + tp_flags = [] + if end_pipelined: + if compute: + tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS') + else: + tp_flags.append('INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE') Tracepoint('intel_end_{0}'.format(name), toggle_name=name, args=tp_args, tp_struct=tp_struct, tp_perfetto='intel_ds_end_{0}'.format(name), tp_print=tp_print, - end_of_pipe=end_pipelined, + tp_flags=tp_flags, need_cs_param=need_cs_param) # Frame tracepoints @@ -175,7 +182,8 @@ def define_tracepoints(args): tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'), Arg(type='uint32_t', var='group_y', c_format='%u'), Arg(type='uint32_t', var='group_z', c_format='%u'),], - tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z']) + tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'], + compute=True) # Used to identify copies generated by utrace begin_end_tp('trace_copy', @@ -190,7 +198,8 @@ def define_tracepoints(args): tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'), Arg(type='uint32_t', var='group_y', c_format='%u'), Arg(type='uint32_t', var='group_z', c_format='%u'),], - tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z']) + tp_print=['group=%ux%ux%u', '__entry->group_x', '__entry->group_y', '__entry->group_z'], + compute=True) def flag_bits(args): bits = [Arg(type='enum intel_ds_stall_flag', name='flags', var='decode_cb(flags)', c_format='0x%x')] diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c index bd55f3e79b6..ae812eec097 100644 --- a/src/intel/vulkan/anv_utrace.c +++ b/src/intel/vulkan/anv_utrace.c @@ -396,7 +396,7 @@ anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) static void anv_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, unsigned idx, - bool end_of_pipe) + uint32_t flags) { struct anv_device *device = container_of(ut->utctx, struct anv_device, ds.trace_context); @@ -414,15 +414,19 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs, /* Is this a end of compute trace point? */ const bool is_end_compute = cs == NULL && - (cmd_buffer->last_compute_walker != NULL || - cmd_buffer->last_indirect_dispatch != NULL) && - end_of_pipe; + (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE_CS); + + assert(!is_end_compute || + cmd_buffer->state.last_indirect_dispatch != NULL || + cmd_buffer->state.last_compute_walker != NULL); + + enum anv_timestamp_capture_type capture_type = + is_end_compute ? + (cmd_buffer->state.last_indirect_dispatch != NULL ? + ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH : ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER) : + (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) ? + ANV_TIMESTAMP_CAPTURE_END_OF_PIPE : ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE; - enum anv_timestamp_capture_type capture_type = end_of_pipe ? - (is_end_compute ? - (cmd_buffer->last_indirect_dispatch != NULL ? - ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH : ANV_TIMESTAMP_REWRITE_COMPUTE_WALKER) : - ANV_TIMESTAMP_CAPTURE_END_OF_PIPE) : ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE; void *addr = capture_type == ANV_TIMESTAMP_REWRITE_INDIRECT_DISPATCH ? cmd_buffer->state.last_indirect_dispatch : diff --git a/src/intel/vulkan_hasvk/anv_utrace.c b/src/intel/vulkan_hasvk/anv_utrace.c index a986491f264..c7e849605e5 100644 --- a/src/intel/vulkan_hasvk/anv_utrace.c +++ b/src/intel/vulkan_hasvk/anv_utrace.c @@ -224,7 +224,7 @@ anv_utrace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps) static void anv_utrace_record_ts(struct u_trace *ut, void *cs, void *timestamps, unsigned idx, - bool end_of_pipe) + uint32_t flags) { struct anv_cmd_buffer *cmd_buffer = container_of(ut, struct anv_cmd_buffer, trace); @@ -232,8 +232,9 @@ anv_utrace_record_ts(struct u_trace *ut, void *cs, struct anv_bo *bo = timestamps; enum anv_timestamp_capture_type capture_type = - (end_of_pipe) ? ANV_TIMESTAMP_CAPTURE_END_OF_PIPE - : ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE; + (flags & INTEL_DS_TRACEPOINT_FLAG_END_OF_PIPE) ? + ANV_TIMESTAMP_CAPTURE_END_OF_PIPE : + ANV_TIMESTAMP_CAPTURE_TOP_OF_PIPE; device->physical->cmd_emit_timestamp(&cmd_buffer->batch, device, (struct anv_address) { .bo = bo, diff --git a/src/util/perf/u_trace.c b/src/util/perf/u_trace.c index d90db9ec805..da5241557f7 100644 --- a/src/util/perf/u_trace.c +++ b/src/util/perf/u_trace.c @@ -853,8 +853,7 @@ u_trace_appendv(struct u_trace *ut, } /* record a timestamp for the trace: */ - ut->utctx->record_timestamp(ut, cs, chunk->timestamps, tp_idx, - tp->end_of_pipe); + ut->utctx->record_timestamp(ut, cs, chunk->timestamps, tp_idx, tp->flags); chunk->traces[tp_idx] = (struct u_trace_event) { .tp = tp, diff --git a/src/util/perf/u_trace.h b/src/util/perf/u_trace.h index 56479e95392..bd14fb7fe06 100644 --- a/src/util/perf/u_trace.h +++ b/src/util/perf/u_trace.h @@ -105,7 +105,7 @@ typedef void (*u_trace_record_ts)(struct u_trace *ut, void *cs, void *timestamps, unsigned idx, - bool end_of_pipe); + uint32_t flags); /** * Driver provided callback to read back a previously recorded timestamp. diff --git a/src/util/perf/u_trace.py b/src/util/perf/u_trace.py index b474f05dabe..4f5722f6648 100644 --- a/src/util/perf/u_trace.py +++ b/src/util/perf/u_trace.py @@ -34,7 +34,7 @@ class Tracepoint(object): """ def __init__(self, name, args=[], toggle_name=None, tp_struct=None, tp_print=None, tp_perfetto=None, - tp_markers=None, end_of_pipe=False, need_cs_param=True): + tp_markers=None, tp_flags=[], need_cs_param=True): """Parameters: - name: the tracepoint name, a tracepoint function with the given @@ -72,7 +72,7 @@ class Tracepoint(object): self.tp_print = tp_print self.tp_perfetto = tp_perfetto self.tp_markers = tp_markers - self.end_of_pipe = end_of_pipe + self.tp_flags = tp_flags self.toggle_name = toggle_name self.need_cs_param = need_cs_param @@ -449,7 +449,7 @@ static void __emit_label_${trace_name}(struct u_trace_context *utctx, void *cs, static const struct u_tracepoint __tp_${trace_name} = { ALIGN_POT(sizeof(struct trace_${trace_name}), 8), /* keep size 64b aligned */ "${trace_name}", - ${"true" if trace.end_of_pipe else "false"}, + ${0 if len(trace.tp_flags) == 0 else " | ".join(trace.tp_flags)}, ${index}, __print_${trace_name}, __print_json_${trace_name}, diff --git a/src/util/perf/u_trace_priv.h b/src/util/perf/u_trace_priv.h index a25811a48e8..8f76c18075d 100644 --- a/src/util/perf/u_trace_priv.h +++ b/src/util/perf/u_trace_priv.h @@ -46,11 +46,9 @@ struct u_tracepoint { unsigned payload_sz; const char *name; /** - * Whether this tracepoint's timestamp must be recorded with as an - * end-of-pipe timestamp (for some GPUs the recording timestamp instruction - * might be different for top/end of pipe). + * A bitfield of driver agnostic flags */ - bool end_of_pipe:1; + uint16_t flags; /** * Index of this tracepoint in _tracepoint_names in the generated * u_trace perfetto header. By associating these names with iids in setup,