intel/perf: Extend intel_perf_stream_set_metrics_id() to syncronize metrics id changes

Xe KMD added a uAPI to syncronze metrics id changes, so we can make
it wait for all previous workloads in exec_queue and all previous
metrics id changes to finish before start change it again.
This should make Vulkan queries more robust.

So this makes use of intel_bind_timeline to syncronize the metrics id
changes and xe_queue_get_syncobj_for_idle() to syncronize with
exec_queue.

As i915 and some versions of Xe KMD will not support it, this feature
will only be used then intel_bind_timeline parameter is not NULL and
timeline has a valid syncobj id.
At this patch level all callers will set it to NULL, next patch will
add and initialize timeline in ANV when supported by Xe KMD.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31283>
This commit is contained in:
José Roberto de Souza
2024-08-15 10:14:49 -07:00
committed by Marge Bot
parent 3e6546f662
commit a38a98c4cb
10 changed files with 131 additions and 29 deletions

View File

@@ -1569,7 +1569,7 @@ int
intel_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
uint32_t ctx_id, uint64_t metrics_set_id,
uint64_t period_exponent, bool hold_preemption,
bool enable)
bool enable, struct intel_bind_timeline *timeline)
{
uint64_t report_format = intel_perf_get_oa_format(perf_config);
@@ -1581,7 +1581,7 @@ intel_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
case INTEL_KMD_TYPE_XE:
return xe_perf_stream_open(perf_config, drm_fd, ctx_id, metrics_set_id,
report_format, period_exponent,
hold_preemption, enable);
hold_preemption, enable, timeline);
default:
unreachable("missing");
return 0;
@@ -1629,13 +1629,18 @@ intel_perf_stream_set_state(struct intel_perf_config *perf_config,
int
intel_perf_stream_set_metrics_id(struct intel_perf_config *perf_config,
int perf_stream_fd, uint64_t metrics_set_id)
int drm_fd, int perf_stream_fd,
uint32_t exec_queue,
uint64_t metrics_set_id,
struct intel_bind_timeline *timeline)
{
switch (perf_config->devinfo->kmd_type) {
case INTEL_KMD_TYPE_I915:
return i915_perf_stream_set_metrics_id(perf_stream_fd, metrics_set_id);
case INTEL_KMD_TYPE_XE:
return xe_perf_stream_set_metrics_id(perf_stream_fd, metrics_set_id);
return xe_perf_stream_set_metrics_id(perf_stream_fd, drm_fd,
exec_queue, metrics_set_id,
timeline);
default:
unreachable("missing");
return -1;

View File

@@ -30,6 +30,7 @@
#include <string.h>
#include "compiler/glsl/list.h"
#include "common/intel_bind_timeline.h"
#include "dev/intel_device_info.h"
#include "util/bitscan.h"
#include "util/bitset.h"
@@ -585,14 +586,17 @@ void intel_perf_get_counters_passes(struct intel_perf_config *perf,
int intel_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
uint32_t ctx_id, uint64_t metrics_set_id,
uint64_t period_exponent, bool hold_preemption,
bool enable);
bool enable, struct intel_bind_timeline *timeline);
int intel_perf_stream_read_samples(struct intel_perf_config *perf_config,
int perf_stream_fd, uint8_t *buffer,
size_t buffer_len);
int intel_perf_stream_set_state(struct intel_perf_config *perf_config,
int perf_stream_fd, bool enable);
int intel_perf_stream_set_metrics_id(struct intel_perf_config *perf_config,
int perf_stream_fd, uint64_t metrics_set_id);
int drm_fd, int perf_stream_fd,
uint32_t exec_queue,
uint64_t metrics_set_id,
struct intel_bind_timeline *timeline);
#ifdef __cplusplus
} // extern "C"

View File

@@ -359,7 +359,7 @@ intel_perf_open(struct intel_perf_context *perf_ctx,
{
int fd = intel_perf_stream_open(perf_ctx->perf, drm_fd, ctx_id,
metrics_set_id, period_exponent, false,
enable);
enable, NULL);
if (fd == -1) {
DBG("Error opening gen perf OA stream: %m\n");
return false;

View File

@@ -11,6 +11,7 @@
#include "perf/intel_perf.h"
#include "intel_perf_common.h"
#include "intel/common/intel_gem.h"
#include "intel/common/xe/intel_queue.h"
#include "drm-uapi/xe_drm.h"
@@ -136,7 +137,8 @@ int
xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
uint32_t exec_id, uint64_t metrics_set_id,
uint64_t report_format, uint64_t period_exponent,
bool hold_preemption, bool enable)
bool hold_preemption, bool enable,
struct intel_bind_timeline *timeline)
{
struct drm_xe_ext_set_property props[DRM_XE_OA_PROPERTY_NO_PREEMPT + 1] = {};
struct drm_xe_observation_param observation_param = {
@@ -144,6 +146,10 @@ xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
.observation_op = DRM_XE_OBSERVATION_OP_STREAM_OPEN,
.param = (uintptr_t)&props,
};
struct drm_xe_sync sync = {
.type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
};
uint32_t i = 0;
int fd, flags;
@@ -157,7 +163,18 @@ xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
if (hold_preemption)
oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_NO_PREEMPT, hold_preemption);
fd = intel_ioctl(drm_fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
if (timeline && intel_bind_timeline_get_syncobj(timeline)) {
oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_NUM_SYNCS, 1);
oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_SYNCS, (uintptr_t)&sync);
sync.handle = intel_bind_timeline_get_syncobj(timeline);
sync.timeline_value = intel_bind_timeline_bind_begin(timeline);
fd = intel_ioctl(drm_fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
intel_bind_timeline_bind_end(timeline);
} else {
fd = intel_ioctl(drm_fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
}
if (fd < 0)
return fd;
@@ -181,15 +198,63 @@ xe_perf_stream_set_state(int perf_stream_fd, bool enable)
}
int
xe_perf_stream_set_metrics_id(int perf_stream_fd, uint64_t metrics_set_id)
xe_perf_stream_set_metrics_id(int perf_stream_fd, int drm_fd,
uint32_t exec_queue, uint64_t metrics_set_id,
struct intel_bind_timeline *timeline)
{
struct drm_xe_ext_set_property prop = {};
struct drm_xe_ext_set_property prop[3] = {};
uint32_t index = 0;
int ret;
oa_prop_set(&prop, &index, DRM_XE_OA_PROPERTY_OA_METRIC_SET,
metrics_set_id);
return intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_CONFIG,
(void *)(uintptr_t)&prop);
oa_prop_set(prop, &index, DRM_XE_OA_PROPERTY_OA_METRIC_SET,
metrics_set_id);
if (timeline && intel_bind_timeline_get_syncobj(timeline)) {
struct drm_xe_sync xe_syncs[3] = {};
uint32_t syncobj;
int ret2;
oa_prop_set(prop, &index, DRM_XE_OA_PROPERTY_NUM_SYNCS, ARRAY_SIZE(xe_syncs));
oa_prop_set(prop, &index, DRM_XE_OA_PROPERTY_SYNCS, (uintptr_t)xe_syncs);
/* wait on all previous exec in queues */
ret = xe_queue_get_syncobj_for_idle(drm_fd, exec_queue, &syncobj);
if (ret)
return ret;
xe_syncs[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
xe_syncs[0].flags = 0;/* wait */
xe_syncs[0].handle = syncobj;
/* wait on previous set_metrics_id to complete */
xe_syncs[1].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
xe_syncs[1].flags = 0;/* wait */
xe_syncs[1].handle = intel_bind_timeline_get_syncobj(timeline);
xe_syncs[1].timeline_value = intel_bind_timeline_get_last_point(timeline);
/* signal completion */
xe_syncs[2].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
xe_syncs[2].flags = DRM_XE_SYNC_FLAG_SIGNAL;
xe_syncs[2].handle = intel_bind_timeline_get_syncobj(timeline);
xe_syncs[2].timeline_value = intel_bind_timeline_bind_begin(timeline);
ret = intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_CONFIG,
(void *)(uintptr_t)&prop);
intel_bind_timeline_bind_end(timeline);
/* Looks safe to destroy as Xe KMD should increase the ref count until
* it is using it
*/
struct drm_syncobj_destroy syncobj_destroy = {
.handle = syncobj,
};
ret2 = intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_DESTROY, &syncobj_destroy);
assert(ret2 == 0);
} else {
ret = intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_CONFIG,
(void *)(uintptr_t)&prop);
}
return ret;
}
static int

View File

@@ -9,6 +9,7 @@
#include <stddef.h>
#include <stdint.h>
struct intel_bind_timeline;
struct intel_perf_config;
struct intel_perf_registers;
@@ -22,8 +23,11 @@ void xe_remove_config(struct intel_perf_config *perf, int fd, uint64_t config_id
int xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
uint32_t exec_id, uint64_t metrics_set_id,
uint64_t report_format, uint64_t period_exponent,
bool hold_preemption, bool enable);
bool hold_preemption, bool enable,
struct intel_bind_timeline *timeline);
int xe_perf_stream_set_state(int perf_stream_fd, bool enable);
int xe_perf_stream_set_metrics_id(int perf_stream_fd, uint64_t metrics_set_id);
int xe_perf_stream_set_metrics_id(int perf_stream_fd, int drm_fd,
uint32_t exec_queue, uint64_t metrics_set_id,
struct intel_bind_timeline *timeline);
int xe_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd,
uint8_t *buffer, size_t buffer_len);

View File

@@ -105,12 +105,11 @@ anv_device_perf_close(struct anv_device *device)
device->perf_fd = -1;
}
static int
anv_device_perf_open(struct anv_device *device, struct anv_queue *queue, uint64_t metric_id)
static uint32_t
anv_device_perf_get_queue_context_or_exec_queue_id(struct anv_queue *queue)
{
struct anv_device *device = queue->device;
uint32_t context_or_exec_queue_id;
uint64_t period_exponent = 31; /* slowest sampling period */
int ret;
switch (device->physical->info.kmd_type) {
case INTEL_KMD_TYPE_I915:
@@ -125,9 +124,18 @@ anv_device_perf_open(struct anv_device *device, struct anv_queue *queue, uint64_
context_or_exec_queue_id = 0;
}
return context_or_exec_queue_id;
}
static int
anv_device_perf_open(struct anv_device *device, struct anv_queue *queue, uint64_t metric_id)
{
uint64_t period_exponent = 31; /* slowest sampling period */
int ret;
ret = intel_perf_stream_open(device->physical->perf, device->fd,
context_or_exec_queue_id, metric_id,
period_exponent, true, true);
anv_device_perf_get_queue_context_or_exec_queue_id(queue),
metric_id, period_exponent, true, true, NULL);
if (ret >= 0)
device->perf_queue = queue;
@@ -276,9 +284,13 @@ VkResult anv_QueueSetPerformanceConfigurationINTEL(
if (device->perf_fd < 0)
return VK_ERROR_INITIALIZATION_FAILED;
} else {
uint32_t context_or_exec_queue = anv_device_perf_get_queue_context_or_exec_queue_id(device->perf_queue);
int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
device->fd,
device->perf_fd,
config->config_id);
context_or_exec_queue,
config->config_id,
NULL);
if (ret < 0)
return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
}

View File

@@ -928,8 +928,11 @@ i915_queue_exec_locked(struct anv_queue *queue,
(query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
device->fd,
device->perf_fd,
query_info->oa_metrics_set_id);
-1,/* this parameter, exec_queue is not used in i915 */
query_info->oa_metrics_set_id,
NULL);
if (ret < 0) {
result = vk_device_set_lost(&device->vk,
"i915-perf config failed: %s",

View File

@@ -334,8 +334,11 @@ xe_queue_exec_locked(struct anv_queue *queue,
(query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
device->fd,
device->perf_fd,
query_info->oa_metrics_set_id);
queue->exec_queue_id,
query_info->oa_metrics_set_id,
NULL);
if (ret < 0) {
result = vk_device_set_lost(&device->vk,
"intel_perf_stream_set_metrics_id failed: %s",

View File

@@ -2222,8 +2222,11 @@ anv_queue_exec_locked(struct anv_queue *queue,
(query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
device->fd,
device->perf_fd,
query_info->oa_metrics_set_id);
-1,/* this parameter, exec_queue is not used in i915 */
query_info->oa_metrics_set_id,
NULL);
if (ret < 0) {
result = vk_device_set_lost(&device->vk,
"i915-perf config failed: %s",

View File

@@ -108,7 +108,7 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
return intel_perf_stream_open(device->physical->perf, device->fd,
device->context_id, metric_id,
period_exponent, true, true);
period_exponent, true, true, NULL);
}
/* VK_INTEL_performance_query */
@@ -237,8 +237,11 @@ VkResult anv_QueueSetPerformanceConfigurationINTEL(
return VK_ERROR_INITIALIZATION_FAILED;
} else {
int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
device->fd,
device->perf_fd,
config->config_id);
-1,/* this parameter, exec_queue is not used in i915 */
config->config_id,
NULL);
if (ret < 0)
return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
}