From f684f4efb03fea20c8ea137bea5b5a1681f1391e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Thu, 23 May 2024 14:07:05 -0700 Subject: [PATCH] intel/perf: Add support for LNL OA sample format size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LNL OA sample format is 576 bytes long while previous platforms were 256 bytes, so now we need a function to return the OA sample format size. Reviewed-by: Lionel Landwerlin Signed-off-by: José Roberto de Souza Part-of: --- src/intel/perf/i915/intel_perf.c | 7 +++++-- src/intel/perf/i915/intel_perf.h | 2 +- src/intel/perf/intel_perf.c | 17 ++++++++++++++--- src/intel/perf/intel_perf.h | 8 +------- src/intel/perf/intel_perf_query.c | 9 ++++++--- src/intel/perf/xe/intel_perf.c | 22 ++++++++++++---------- src/intel/perf/xe/intel_perf.h | 4 ++-- 7 files changed, 41 insertions(+), 28 deletions(-) diff --git a/src/intel/perf/i915/intel_perf.c b/src/intel/perf/i915/intel_perf.c index 5a619d54380..5a8c015d981 100644 --- a/src/intel/perf/i915/intel_perf.c +++ b/src/intel/perf/i915/intel_perf.c @@ -221,12 +221,15 @@ i915_oa_metrics_available(struct intel_perf_config *perf, int fd, bool use_regis } int -i915_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer, +i915_perf_stream_read_samples(struct intel_perf_config *perf_config, + int perf_stream_fd, uint8_t *buffer, size_t buffer_len) { + const size_t sample_header_size = perf_config->oa_sample_size + + sizeof(struct intel_perf_record_header); int len; - if (buffer_len < INTEL_PERF_OA_HEADER_SAMPLE_SIZE) + if (buffer_len < sample_header_size) return -ENOSPC; do { diff --git a/src/intel/perf/i915/intel_perf.h b/src/intel/perf/i915/intel_perf.h index 826b2632746..76ae5aa0c9c 100644 --- a/src/intel/perf/i915/intel_perf.h +++ b/src/intel/perf/i915/intel_perf.h @@ -19,7 +19,7 @@ int i915_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd, uint32_t ctx_id, uint64_t metrics_set_id, uint64_t report_format, uint64_t period_exponent, bool hold_preemption, bool enable); -int i915_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer, size_t buffer_len); +int i915_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd, uint8_t *buffer, size_t buffer_len); int i915_perf_stream_set_state(int perf_stream_fd, bool enable); int i915_perf_stream_set_metrics_id(int perf_stream_fd, uint64_t metrics_set_id); diff --git a/src/intel/perf/intel_perf.c b/src/intel/perf/intel_perf.c index 6860b3f41da..cb1b6f67423 100644 --- a/src/intel/perf/intel_perf.c +++ b/src/intel/perf/intel_perf.c @@ -1445,7 +1445,7 @@ intel_perf_init_query_fields(struct intel_perf_config *perf_cfg, MAX_QUERY_FIELDS(devinfo)); add_query_register(perf_cfg, INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC, - 0, 256, 0); + 0, perf_cfg->oa_sample_size, 0); if (use_register_snapshots) { if (devinfo->ver <= 11) { @@ -1520,6 +1520,15 @@ intel_perf_init_query_fields(struct intel_perf_config *perf_cfg, layout->size = align(layout->size, 64); } +static size_t +intel_perf_get_oa_format_size(const struct intel_device_info *devinfo) +{ + if (devinfo->verx10 >= 200) + return 576; + + return 256; +} + void intel_perf_init_metrics(struct intel_perf_config *perf_cfg, const struct intel_device_info *devinfo, @@ -1528,6 +1537,8 @@ intel_perf_init_metrics(struct intel_perf_config *perf_cfg, bool use_register_snapshots) { perf_cfg->devinfo = devinfo; + perf_cfg->oa_sample_size = intel_perf_get_oa_format_size(devinfo); + intel_perf_init_query_fields(perf_cfg, devinfo, use_register_snapshots); if (include_pipeline_statistics) { @@ -1608,9 +1619,9 @@ intel_perf_stream_read_samples(struct intel_perf_config *perf_config, { switch (perf_config->devinfo->kmd_type) { case INTEL_KMD_TYPE_I915: - return i915_perf_stream_read_samples(perf_stream_fd, buffer, buffer_len); + return i915_perf_stream_read_samples(perf_config, perf_stream_fd, buffer, buffer_len); case INTEL_KMD_TYPE_XE: - return xe_perf_stream_read_samples(perf_stream_fd, buffer, buffer_len); + return xe_perf_stream_read_samples(perf_config, perf_stream_fd, buffer, buffer_len); default: unreachable("missing"); return -1; diff --git a/src/intel/perf/intel_perf.h b/src/intel/perf/intel_perf.h index a02500a37fa..60db9a2f98a 100644 --- a/src/intel/perf/intel_perf.h +++ b/src/intel/perf/intel_perf.h @@ -125,13 +125,6 @@ struct intel_pipeline_stat { #define STATS_BO_END_OFFSET_BYTES (STATS_BO_SIZE / 2) #define MAX_STAT_COUNTERS (STATS_BO_END_OFFSET_BYTES / 8) -/* Up to now all platforms uses the same sample size */ -#define INTEL_PERF_OA_SAMPLE_SIZE 256 - -/* header + sample */ -#define INTEL_PERF_OA_HEADER_SAMPLE_SIZE (sizeof(struct intel_perf_record_header) + \ - INTEL_PERF_OA_SAMPLE_SIZE) - struct intel_perf_query_result { /** * Storage for the final accumulated OA counters. @@ -360,6 +353,7 @@ struct intel_perf_config { int n_counters; struct intel_perf_query_field_layout query_layout; + size_t oa_sample_size; /* Variables referenced in the XML meta data for OA performance * counters, e.g in the normalization equations. diff --git a/src/intel/perf/intel_perf_query.c b/src/intel/perf/intel_perf_query.c index 20013b85511..2c7aa58475e 100644 --- a/src/intel/perf/intel_perf_query.c +++ b/src/intel/perf/intel_perf_query.c @@ -163,10 +163,12 @@ struct oa_sample_buf { struct exec_node link; int refcount; int len; - uint8_t buf[INTEL_PERF_OA_HEADER_SAMPLE_SIZE * 10]; uint32_t last_timestamp; + uint8_t buf[]; }; +#define oa_sample_buf_buf_length(perf) (perf->oa_sample_size * 10) + /** * gen representation of a performance query object. * @@ -418,7 +420,7 @@ get_free_sample_buf(struct intel_perf_context *perf_ctx) if (node) buf = exec_node_data(struct oa_sample_buf, node, link); else { - buf = ralloc_size(perf_ctx->perf, sizeof(*buf)); + buf = ralloc_size(perf_ctx->perf, sizeof(*buf) + oa_sample_buf_buf_length(perf_ctx->perf)); exec_node_init(&buf->link); buf->refcount = 0; @@ -973,7 +975,8 @@ read_oa_samples_until(struct intel_perf_context *perf_ctx, len = intel_perf_stream_read_samples(perf_ctx->perf, perf_ctx->oa_stream_fd, - buf->buf, sizeof(buf->buf)); + buf->buf, + oa_sample_buf_buf_length(perf_ctx->perf)); if (len <= 0) { exec_list_push_tail(&perf_ctx->free_sample_buffers, &buf->link); diff --git a/src/intel/perf/xe/intel_perf.c b/src/intel/perf/xe/intel_perf.c index 556e986f342..7e6e2d63c6d 100644 --- a/src/intel/perf/xe/intel_perf.c +++ b/src/intel/perf/xe/intel_perf.c @@ -211,15 +211,17 @@ xe_perf_stream_read_error(int perf_stream_fd, uint8_t *buffer, size_t buffer_len } int -xe_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer, - size_t buffer_len) +xe_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd, + uint8_t *buffer, size_t buffer_len) { - uint32_t num_samples = buffer_len / INTEL_PERF_OA_HEADER_SAMPLE_SIZE; - const size_t max_bytes_read = num_samples * INTEL_PERF_OA_SAMPLE_SIZE; + const size_t sample_size = perf_config->oa_sample_size; + const size_t sample_header_size = sample_size + sizeof(struct intel_perf_record_header); + uint32_t num_samples = buffer_len / sample_header_size; + const size_t max_bytes_read = num_samples * sample_size; uint8_t *offset, *offset_samples; int len, i; - if (buffer_len < INTEL_PERF_OA_HEADER_SAMPLE_SIZE) + if (buffer_len < sample_header_size) return -ENOSPC; do { @@ -233,7 +235,7 @@ xe_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer, return len < 0 ? -errno : 0; } - num_samples = len / INTEL_PERF_OA_SAMPLE_SIZE; + num_samples = len / sample_size; offset = buffer; offset_samples = buffer + (buffer_len - len); /* move all samples to the end of buffer */ @@ -246,12 +248,12 @@ xe_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer, /* TODO: also append REPORT_LOST and BUFFER_LOST */ header->type = INTEL_PERF_RECORD_TYPE_SAMPLE; header->pad = 0; - header->size = INTEL_PERF_OA_HEADER_SAMPLE_SIZE; + header->size = sample_header_size; offset += sizeof(*header); - memmove(offset, offset_samples, INTEL_PERF_OA_SAMPLE_SIZE); - offset += INTEL_PERF_OA_SAMPLE_SIZE; - offset_samples += INTEL_PERF_OA_SAMPLE_SIZE; + memmove(offset, offset_samples, sample_size); + offset += sample_size; + offset_samples += sample_size; } return offset - buffer; diff --git a/src/intel/perf/xe/intel_perf.h b/src/intel/perf/xe/intel_perf.h index d91c702dbd8..4008185fdb0 100644 --- a/src/intel/perf/xe/intel_perf.h +++ b/src/intel/perf/xe/intel_perf.h @@ -25,5 +25,5 @@ int xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd, bool hold_preemption, bool enable); int xe_perf_stream_set_state(int perf_stream_fd, bool enable); int xe_perf_stream_set_metrics_id(int perf_stream_fd, uint64_t metrics_set_id); -int xe_perf_stream_read_samples(int perf_stream_fd, uint8_t *buffer, - size_t buffer_len); +int xe_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd, + uint8_t *buffer, size_t buffer_len);