diff --git a/src/intel/ds/intel_driver_ds.cc b/src/intel/ds/intel_driver_ds.cc index 7c1377cd4bf..9032aad3e7d 100644 --- a/src/intel/ds/intel_driver_ds.cc +++ b/src/intel/ds/intel_driver_ds.cc @@ -316,7 +316,7 @@ end_event(struct intel_ds_queue *queue, uint64_t ts_ns, event->set_duration(ts_ns - start_ns); event->set_submission_id(submission_id); - if (payload && payload_as_extra) { + if ((payload || indirect_data) && payload_as_extra) { payload_as_extra(event, payload, indirect_data); } }); @@ -427,6 +427,7 @@ CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect, INTEL_DS_QUEUE_STAGE_DRAW_MESH) CREATE_DUAL_EVENT_CALLBACK(draw_mesh_indirect_count, INTEL_DS_QUEUE_STAGE_DRAW_MESH) CREATE_DUAL_EVENT_CALLBACK(xfb, INTEL_DS_QUEUE_STAGE_CMD_BUFFER) CREATE_DUAL_EVENT_CALLBACK(compute, INTEL_DS_QUEUE_STAGE_COMPUTE) +CREATE_DUAL_EVENT_CALLBACK(compute_indirect, INTEL_DS_QUEUE_STAGE_COMPUTE) CREATE_DUAL_EVENT_CALLBACK(generate_draws, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) CREATE_DUAL_EVENT_CALLBACK(generate_commands, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) CREATE_DUAL_EVENT_CALLBACK(trace_copy, INTEL_DS_QUEUE_STAGE_INTERNAL_OPS) diff --git a/src/intel/ds/intel_tracepoints.py b/src/intel/ds/intel_tracepoints.py index 665d173319e..902aeeb7743 100644 --- a/src/intel/ds/intel_tracepoints.py +++ b/src/intel/ds/intel_tracepoints.py @@ -41,6 +41,7 @@ def define_tracepoints(args): from u_trace import TracepointArgStruct as ArgStruct Header('intel_driver_ds.h', scope=HeaderScope.SOURCE) + Header('vulkan/vulkan_core.h', scope=HeaderScope.SOURCE|HeaderScope.PERFETTO) Header('blorp/blorp_priv.h', scope=HeaderScope.HEADER) Header('ds/intel_driver_ds.h', scope=HeaderScope.HEADER) @@ -162,9 +163,11 @@ def define_tracepoints(args): begin_end_tp('draw_indexed_indirect', tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u'),]) begin_end_tp('draw_indirect_count', - tp_args=[Arg(type='uint32_t', var='max_draw_count', c_format='%u'),]) + tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u', + is_indirect=True),]) begin_end_tp('draw_indexed_indirect_count', - tp_args=[Arg(type='uint32_t', var='max_draw_count', c_format='%u'),]) + tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u', + is_indirect=True),]) begin_end_tp('draw_mesh', tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'), @@ -173,7 +176,8 @@ def define_tracepoints(args): begin_end_tp('draw_mesh_indirect', tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u'),]) begin_end_tp('draw_mesh_indirect_count', - tp_args=[Arg(type='uint32_t', var='max_draw_count', c_format='%u'),]) + tp_args=[Arg(type='uint32_t', var='draw_count', c_format='%u', + is_indirect=True),]) begin_end_tp('compute', tp_args=[Arg(type='uint32_t', var='group_x', c_format='%u'), @@ -181,6 +185,12 @@ def define_tracepoints(args): Arg(type='uint32_t', var='group_z', c_format='%u'),], compute=True) + begin_end_tp('compute_indirect', + tp_args=[ArgStruct(type='VkDispatchIndirectCommand', var='size', + is_indirect=True, c_format="%ux%ux%u", + fields=['x', 'y', 'z'])], + compute=True) + # Used to identify copies generated by utrace begin_end_tp('trace_copy', tp_args=[Arg(type='uint32_t', var='count', c_format='%u'),]) diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 5bd07650217..8894841df45 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -242,6 +242,12 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, enum anv_timestamp_capture_type type, void *data); +void genX(cmd_capture_data)(struct anv_batch *batch, + struct anv_device *device, + struct anv_address dst_addr, + struct anv_address src_addr, + uint32_t size_B); + void genX(batch_emit_post_3dprimitive_was)(struct anv_batch *batch, const struct anv_device *device, diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index d80751fc30e..335abd937d3 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -580,6 +580,15 @@ anv_address_physical(struct anv_address addr) return intel_canonical_address(address); } +static inline struct u_trace_address +anv_address_utrace(struct anv_address addr) +{ + return (struct u_trace_address) { + .bo = addr.bo, + .offset = addr.offset, + }; +} + static inline struct anv_address anv_address_add(struct anv_address addr, uint64_t offset) { @@ -1221,6 +1230,9 @@ struct anv_physical_device { void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, enum anv_timestamp_capture_type, void *); + void (*cmd_capture_data)(struct anv_batch *, struct anv_device *, + struct anv_address, struct anv_address, + uint32_t); struct intel_measure_device measure_device; /* Value of PIPELINE_SELECT::PipelineSelection == GPGPU */ diff --git a/src/intel/vulkan/anv_utrace.c b/src/intel/vulkan/anv_utrace.c index 4ad2e946c5f..3fba950afa6 100644 --- a/src/intel/vulkan/anv_utrace.c +++ b/src/intel/vulkan/anv_utrace.c @@ -102,9 +102,6 @@ anv_device_utrace_emit_gfx_copy_buffer(struct u_trace_context *utctx, void *ts_to, uint64_t to_offset_B, uint64_t size_B) { - assert(from_offset_B % sizeof(union anv_utrace_timestamp) == 0); - assert(to_offset_B % sizeof(union anv_utrace_timestamp) == 0); - struct anv_device *device = container_of(utctx, struct anv_device, ds.trace_context); struct anv_memcpy_state *memcpy_state = cmdstream; @@ -124,9 +121,6 @@ anv_device_utrace_emit_cs_copy_buffer(struct u_trace_context *utctx, void *ts_to, uint64_t to_offset_B, uint64_t size_B) { - assert(from_offset_B % sizeof(union anv_utrace_timestamp) == 0); - assert(to_offset_B % sizeof(union anv_utrace_timestamp) == 0); - struct anv_device *device = container_of(utctx, struct anv_device, ds.trace_context); struct anv_simple_shader *simple_state = cmdstream; @@ -436,6 +430,42 @@ anv_utrace_read_ts(struct u_trace_context *utctx, return intel_device_info_timebase_scale(device->info, ts->timestamp); } +static void +anv_utrace_capture_data(struct u_trace *ut, + void *cs, + void *dst_buffer, + uint64_t dst_offset_B, + void *src_buffer, + uint64_t src_offset_B, + uint32_t size_B) +{ + struct anv_device *device = + container_of(ut->utctx, struct anv_device, ds.trace_context); + struct anv_cmd_buffer *cmd_buffer = + container_of(ut, struct anv_cmd_buffer, trace); + /* cmd_buffer is only valid if cs == NULL */ + struct anv_batch *batch = cs != NULL ? cs : &cmd_buffer->batch; + struct anv_address dst_addr = { + .bo = dst_buffer, + .offset = dst_offset_B, + }; + struct anv_address src_addr = { + .bo = src_buffer, + .offset = src_offset_B, + }; + + device->physical->cmd_capture_data(batch, device, dst_addr, src_addr, size_B); +} + +static const void * +anv_utrace_get_data(struct u_trace_context *utctx, + void *buffer, uint64_t offset_B, uint32_t size_B) +{ + struct anv_bo *bo = buffer; + + return bo->map + offset_B; +} + void anv_device_utrace_init(struct anv_device *device) { @@ -449,13 +479,13 @@ anv_device_utrace_init(struct anv_device *device) u_trace_context_init(&device->ds.trace_context, &device->ds, device->utrace_timestamp_size, - 0, + 12, anv_utrace_create_buffer, anv_utrace_destroy_buffer, anv_utrace_record_ts, anv_utrace_read_ts, - NULL, - NULL, + anv_utrace_capture_data, + anv_utrace_get_data, anv_utrace_delete_submit); for (uint32_t q = 0; q < device->queue_count; q++) { diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 65f822a1338..60dddfc8210 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -5865,6 +5865,17 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, } } +void genX(cmd_capture_data)(struct anv_batch *batch, + struct anv_device *device, + struct anv_address dst_addr, + struct anv_address src_addr, + uint32_t size_B) { + struct mi_builder b; + mi_builder_init(&b, device->info, batch); + mi_builder_set_mocs(&b, isl_mocs(&device->isl_dev, 0, false)); + mi_memcpy(&b, dst_addr, src_addr, size_B); +} + void genX(batch_emit_secondary_call)(struct anv_batch *batch, struct anv_device *device, struct anv_address secondary_addr, diff --git a/src/intel/vulkan/genX_cmd_compute.c b/src/intel/vulkan/genX_cmd_compute.c index a59a86edbc3..4045143eeeb 100644 --- a/src/intel/vulkan/genX_cmd_compute.c +++ b/src/intel/vulkan/genX_cmd_compute.c @@ -529,7 +529,7 @@ void genX(CmdDispatchIndirect)( INTEL_SNAPSHOT_COMPUTE, "compute indirect", 0); - trace_intel_begin_compute(&cmd_buffer->trace); + trace_intel_begin_compute_indirect(&cmd_buffer->trace); if (prog_data->uses_num_work_groups) { cmd_buffer->state.compute.num_workgroups = addr; @@ -545,7 +545,8 @@ void genX(CmdDispatchIndirect)( emit_cs_walker(cmd_buffer, pipeline, prog_data, addr, 0, 0, 0); - trace_intel_end_compute(&cmd_buffer->trace, 0, 0, 0); + trace_intel_end_compute_indirect(&cmd_buffer->trace, + anv_address_utrace(addr)); } struct anv_address diff --git a/src/intel/vulkan/genX_cmd_draw.c b/src/intel/vulkan/genX_cmd_draw.c index 9cd51af5c20..b2be4f72739 100644 --- a/src/intel/vulkan/genX_cmd_draw.c +++ b/src/intel/vulkan/genX_cmd_draw.c @@ -2021,7 +2021,8 @@ void genX(CmdDrawIndirectCount)( false /* indexed */); } - trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount); + trace_intel_end_draw_indirect_count(&cmd_buffer->trace, + anv_address_utrace(count_address)); } void genX(CmdDrawIndexedIndirectCount)( @@ -2069,7 +2070,8 @@ void genX(CmdDrawIndexedIndirectCount)( true /* indexed */); } - trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount); + trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, + anv_address_utrace(count_address)); } @@ -2345,10 +2347,11 @@ genX(CmdDrawMeshTasksIndirectCountEXT)( const uint32_t mocs = anv_mocs_for_address(cmd_buffer->device, &count_buffer->address); mi_builder_set_mocs(&b, mocs); + struct anv_address count_addr = + anv_address_add(count_buffer->address, countBufferOffset); struct mi_value max = prepare_for_draw_count_predicate( - cmd_buffer, &b, - anv_address_add(count_buffer->address, countBufferOffset)); + cmd_buffer, &b, count_addr); for (uint32_t i = 0; i < maxDrawCount; i++) { struct anv_address draw = anv_address_add(buffer->address, offset); @@ -2362,7 +2365,8 @@ genX(CmdDrawMeshTasksIndirectCountEXT)( offset += stride; } - trace_intel_end_draw_mesh_indirect_count(&cmd_buffer->trace, maxDrawCount); + trace_intel_end_draw_mesh_indirect_count(&cmd_buffer->trace, + anv_address_utrace(count_addr)); } #endif /* GFX_VERx10 >= 125 */ diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c index c653a3dfcfd..205a098d03d 100644 --- a/src/intel/vulkan/genX_init_state.c +++ b/src/intel/vulkan/genX_init_state.c @@ -844,6 +844,7 @@ genX(init_physical_device_state)(ASSERTED struct anv_physical_device *pdevice) #endif pdevice->cmd_emit_timestamp = genX(cmd_emit_timestamp); + pdevice->cmd_capture_data = genX(cmd_capture_data); pdevice->gpgpu_pipeline_value = GPGPU; diff --git a/src/intel/vulkan_hasvk/anv_genX.h b/src/intel/vulkan_hasvk/anv_genX.h index f1326939265..e11558e2f31 100644 --- a/src/intel/vulkan_hasvk/anv_genX.h +++ b/src/intel/vulkan_hasvk/anv_genX.h @@ -134,6 +134,12 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, struct anv_address addr, enum anv_timestamp_capture_type); +void genX(cmd_capture_data)(struct anv_batch *batch, + struct anv_device *device, + struct anv_address dst_addr, + struct anv_address src_addr, + uint32_t size_B); + void genX(rasterization_mode)(VkPolygonMode raster_mode, VkLineRasterizationModeEXT line_mode, diff --git a/src/intel/vulkan_hasvk/anv_private.h b/src/intel/vulkan_hasvk/anv_private.h index 315863a38e6..4fffe0f2cff 100644 --- a/src/intel/vulkan_hasvk/anv_private.h +++ b/src/intel/vulkan_hasvk/anv_private.h @@ -493,6 +493,15 @@ anv_address_physical(struct anv_address addr) } } +static inline struct u_trace_address +anv_address_utrace(struct anv_address addr) +{ + return (struct u_trace_address) { + .bo = addr.bo, + .offset = addr.offset, + }; +} + static inline struct anv_address anv_address_add(struct anv_address addr, uint64_t offset) { @@ -909,7 +918,11 @@ struct anv_physical_device { int64_t master_minor; struct intel_query_engine_info * engine_info; - void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address, enum anv_timestamp_capture_type); + void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, + struct anv_address, enum anv_timestamp_capture_type); + void (*cmd_capture_data)(struct anv_batch *, struct anv_device *, + struct anv_address, struct anv_address, + uint32_t); struct intel_measure_device measure_device; }; diff --git a/src/intel/vulkan_hasvk/anv_utrace.c b/src/intel/vulkan_hasvk/anv_utrace.c index 1ed6453ca7f..2c5f6c2d91d 100644 --- a/src/intel/vulkan_hasvk/anv_utrace.c +++ b/src/intel/vulkan_hasvk/anv_utrace.c @@ -272,6 +272,42 @@ anv_utrace_read_ts(struct u_trace_context *utctx, return intel_device_info_timebase_scale(device->info, *ts); } +static void +anv_utrace_capture_data(struct u_trace *ut, + void *cs, + void *dst_buffer, + uint64_t dst_offset_B, + void *src_buffer, + uint64_t src_offset_B, + uint32_t size_B) +{ + struct anv_device *device = + container_of(ut->utctx, struct anv_device, ds.trace_context); + struct anv_cmd_buffer *cmd_buffer = + container_of(ut, struct anv_cmd_buffer, trace); + /* cmd_buffer is only valid if cs == NULL */ + struct anv_batch *batch = cs != NULL ? cs : &cmd_buffer->batch; + struct anv_address dst_addr = { + .bo = dst_buffer, + .offset = dst_offset_B, + }; + struct anv_address src_addr = { + .bo = src_buffer, + .offset = src_offset_B, + }; + + device->physical->cmd_capture_data(batch, device, dst_addr, src_addr, size_B); +} + +static const void * +anv_utrace_get_data(struct u_trace_context *utctx, void *buffer, + uint64_t offset_B, uint32_t size_B) +{ + struct anv_bo *bo = buffer; + + return bo->map + offset_B; +} + void anv_device_utrace_init(struct anv_device *device) { @@ -287,8 +323,8 @@ anv_device_utrace_init(struct anv_device *device) anv_utrace_destroy_buffer, anv_utrace_record_ts, anv_utrace_read_ts, - NULL, - NULL, + anv_utrace_capture_data, + anv_utrace_get_data, anv_utrace_delete_flush_data); for (uint32_t q = 0; q < device->queue_count; q++) { diff --git a/src/intel/vulkan_hasvk/genX_cmd_buffer.c b/src/intel/vulkan_hasvk/genX_cmd_buffer.c index 0c3bb12871b..c37f83b2126 100644 --- a/src/intel/vulkan_hasvk/genX_cmd_buffer.c +++ b/src/intel/vulkan_hasvk/genX_cmd_buffer.c @@ -4193,7 +4193,8 @@ void genX(CmdDrawIndirectCount)( mi_value_unref(&b, max); - trace_intel_end_draw_indirect_count(&cmd_buffer->trace, maxDrawCount); + trace_intel_end_draw_indirect_count(&cmd_buffer->trace, + anv_address_utrace(count_address)); } void genX(CmdDrawIndexedIndirectCount)( @@ -4263,8 +4264,8 @@ void genX(CmdDrawIndexedIndirectCount)( mi_value_unref(&b, max); - trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, maxDrawCount); - + trace_intel_end_draw_indexed_indirect_count(&cmd_buffer->trace, + anv_address_utrace(count_address)); } void genX(CmdBeginTransformFeedbackEXT)( @@ -6031,3 +6032,14 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch, unreachable("invalid"); } } + +void genX(cmd_capture_data)(struct anv_batch *batch, + struct anv_device *device, + struct anv_address dst_addr, + struct anv_address src_addr, + uint32_t size_B) +{ + struct mi_builder b; + mi_builder_init(&b, device->info, batch); + mi_memcpy(&b, dst_addr, src_addr, size_B); +} diff --git a/src/intel/vulkan_hasvk/genX_state.c b/src/intel/vulkan_hasvk/genX_state.c index cb6e3f95a52..6f1e1fa3a26 100644 --- a/src/intel/vulkan_hasvk/genX_state.c +++ b/src/intel/vulkan_hasvk/genX_state.c @@ -108,6 +108,7 @@ genX(init_physical_device_state)(ASSERTED struct anv_physical_device *pdevice) assert(pdevice->info.verx10 == GFX_VERx10); pdevice->cmd_emit_timestamp = genX(cmd_emit_timestamp); + pdevice->cmd_capture_data = genX(cmd_capture_data); } VkResult