intel/perf: Extend intel_perf_stream_set_metrics_id() to syncronize metrics id changes
Xe KMD added a uAPI to syncronze metrics id changes, so we can make it wait for all previous workloads in exec_queue and all previous metrics id changes to finish before start change it again. This should make Vulkan queries more robust. So this makes use of intel_bind_timeline to syncronize the metrics id changes and xe_queue_get_syncobj_for_idle() to syncronize with exec_queue. As i915 and some versions of Xe KMD will not support it, this feature will only be used then intel_bind_timeline parameter is not NULL and timeline has a valid syncobj id. At this patch level all callers will set it to NULL, next patch will add and initialize timeline in ANV when supported by Xe KMD. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31283>
This commit is contained in:
committed by
Marge Bot
parent
3e6546f662
commit
a38a98c4cb
@@ -1569,7 +1569,7 @@ int
|
||||
intel_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
|
||||
uint32_t ctx_id, uint64_t metrics_set_id,
|
||||
uint64_t period_exponent, bool hold_preemption,
|
||||
bool enable)
|
||||
bool enable, struct intel_bind_timeline *timeline)
|
||||
{
|
||||
uint64_t report_format = intel_perf_get_oa_format(perf_config);
|
||||
|
||||
@@ -1581,7 +1581,7 @@ intel_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
|
||||
case INTEL_KMD_TYPE_XE:
|
||||
return xe_perf_stream_open(perf_config, drm_fd, ctx_id, metrics_set_id,
|
||||
report_format, period_exponent,
|
||||
hold_preemption, enable);
|
||||
hold_preemption, enable, timeline);
|
||||
default:
|
||||
unreachable("missing");
|
||||
return 0;
|
||||
@@ -1629,13 +1629,18 @@ intel_perf_stream_set_state(struct intel_perf_config *perf_config,
|
||||
|
||||
int
|
||||
intel_perf_stream_set_metrics_id(struct intel_perf_config *perf_config,
|
||||
int perf_stream_fd, uint64_t metrics_set_id)
|
||||
int drm_fd, int perf_stream_fd,
|
||||
uint32_t exec_queue,
|
||||
uint64_t metrics_set_id,
|
||||
struct intel_bind_timeline *timeline)
|
||||
{
|
||||
switch (perf_config->devinfo->kmd_type) {
|
||||
case INTEL_KMD_TYPE_I915:
|
||||
return i915_perf_stream_set_metrics_id(perf_stream_fd, metrics_set_id);
|
||||
case INTEL_KMD_TYPE_XE:
|
||||
return xe_perf_stream_set_metrics_id(perf_stream_fd, metrics_set_id);
|
||||
return xe_perf_stream_set_metrics_id(perf_stream_fd, drm_fd,
|
||||
exec_queue, metrics_set_id,
|
||||
timeline);
|
||||
default:
|
||||
unreachable("missing");
|
||||
return -1;
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include "compiler/glsl/list.h"
|
||||
#include "common/intel_bind_timeline.h"
|
||||
#include "dev/intel_device_info.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/bitset.h"
|
||||
@@ -585,14 +586,17 @@ void intel_perf_get_counters_passes(struct intel_perf_config *perf,
|
||||
int intel_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
|
||||
uint32_t ctx_id, uint64_t metrics_set_id,
|
||||
uint64_t period_exponent, bool hold_preemption,
|
||||
bool enable);
|
||||
bool enable, struct intel_bind_timeline *timeline);
|
||||
int intel_perf_stream_read_samples(struct intel_perf_config *perf_config,
|
||||
int perf_stream_fd, uint8_t *buffer,
|
||||
size_t buffer_len);
|
||||
int intel_perf_stream_set_state(struct intel_perf_config *perf_config,
|
||||
int perf_stream_fd, bool enable);
|
||||
int intel_perf_stream_set_metrics_id(struct intel_perf_config *perf_config,
|
||||
int perf_stream_fd, uint64_t metrics_set_id);
|
||||
int drm_fd, int perf_stream_fd,
|
||||
uint32_t exec_queue,
|
||||
uint64_t metrics_set_id,
|
||||
struct intel_bind_timeline *timeline);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -359,7 +359,7 @@ intel_perf_open(struct intel_perf_context *perf_ctx,
|
||||
{
|
||||
int fd = intel_perf_stream_open(perf_ctx->perf, drm_fd, ctx_id,
|
||||
metrics_set_id, period_exponent, false,
|
||||
enable);
|
||||
enable, NULL);
|
||||
if (fd == -1) {
|
||||
DBG("Error opening gen perf OA stream: %m\n");
|
||||
return false;
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "perf/intel_perf.h"
|
||||
#include "intel_perf_common.h"
|
||||
#include "intel/common/intel_gem.h"
|
||||
#include "intel/common/xe/intel_queue.h"
|
||||
|
||||
#include "drm-uapi/xe_drm.h"
|
||||
|
||||
@@ -136,7 +137,8 @@ int
|
||||
xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
|
||||
uint32_t exec_id, uint64_t metrics_set_id,
|
||||
uint64_t report_format, uint64_t period_exponent,
|
||||
bool hold_preemption, bool enable)
|
||||
bool hold_preemption, bool enable,
|
||||
struct intel_bind_timeline *timeline)
|
||||
{
|
||||
struct drm_xe_ext_set_property props[DRM_XE_OA_PROPERTY_NO_PREEMPT + 1] = {};
|
||||
struct drm_xe_observation_param observation_param = {
|
||||
@@ -144,6 +146,10 @@ xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
|
||||
.observation_op = DRM_XE_OBSERVATION_OP_STREAM_OPEN,
|
||||
.param = (uintptr_t)&props,
|
||||
};
|
||||
struct drm_xe_sync sync = {
|
||||
.type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
|
||||
.flags = DRM_XE_SYNC_FLAG_SIGNAL,
|
||||
};
|
||||
uint32_t i = 0;
|
||||
int fd, flags;
|
||||
|
||||
@@ -157,7 +163,18 @@ xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
|
||||
if (hold_preemption)
|
||||
oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_NO_PREEMPT, hold_preemption);
|
||||
|
||||
fd = intel_ioctl(drm_fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
|
||||
if (timeline && intel_bind_timeline_get_syncobj(timeline)) {
|
||||
oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_NUM_SYNCS, 1);
|
||||
oa_prop_set(props, &i, DRM_XE_OA_PROPERTY_SYNCS, (uintptr_t)&sync);
|
||||
|
||||
sync.handle = intel_bind_timeline_get_syncobj(timeline);
|
||||
sync.timeline_value = intel_bind_timeline_bind_begin(timeline);
|
||||
fd = intel_ioctl(drm_fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
|
||||
intel_bind_timeline_bind_end(timeline);
|
||||
} else {
|
||||
fd = intel_ioctl(drm_fd, DRM_IOCTL_XE_OBSERVATION, &observation_param);
|
||||
}
|
||||
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
@@ -181,15 +198,63 @@ xe_perf_stream_set_state(int perf_stream_fd, bool enable)
|
||||
}
|
||||
|
||||
int
|
||||
xe_perf_stream_set_metrics_id(int perf_stream_fd, uint64_t metrics_set_id)
|
||||
xe_perf_stream_set_metrics_id(int perf_stream_fd, int drm_fd,
|
||||
uint32_t exec_queue, uint64_t metrics_set_id,
|
||||
struct intel_bind_timeline *timeline)
|
||||
{
|
||||
struct drm_xe_ext_set_property prop = {};
|
||||
struct drm_xe_ext_set_property prop[3] = {};
|
||||
uint32_t index = 0;
|
||||
int ret;
|
||||
|
||||
oa_prop_set(&prop, &index, DRM_XE_OA_PROPERTY_OA_METRIC_SET,
|
||||
metrics_set_id);
|
||||
return intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_CONFIG,
|
||||
(void *)(uintptr_t)&prop);
|
||||
oa_prop_set(prop, &index, DRM_XE_OA_PROPERTY_OA_METRIC_SET,
|
||||
metrics_set_id);
|
||||
|
||||
if (timeline && intel_bind_timeline_get_syncobj(timeline)) {
|
||||
struct drm_xe_sync xe_syncs[3] = {};
|
||||
uint32_t syncobj;
|
||||
int ret2;
|
||||
|
||||
oa_prop_set(prop, &index, DRM_XE_OA_PROPERTY_NUM_SYNCS, ARRAY_SIZE(xe_syncs));
|
||||
oa_prop_set(prop, &index, DRM_XE_OA_PROPERTY_SYNCS, (uintptr_t)xe_syncs);
|
||||
|
||||
/* wait on all previous exec in queues */
|
||||
ret = xe_queue_get_syncobj_for_idle(drm_fd, exec_queue, &syncobj);
|
||||
if (ret)
|
||||
return ret;
|
||||
xe_syncs[0].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
|
||||
xe_syncs[0].flags = 0;/* wait */
|
||||
xe_syncs[0].handle = syncobj;
|
||||
|
||||
/* wait on previous set_metrics_id to complete */
|
||||
xe_syncs[1].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
|
||||
xe_syncs[1].flags = 0;/* wait */
|
||||
xe_syncs[1].handle = intel_bind_timeline_get_syncobj(timeline);
|
||||
xe_syncs[1].timeline_value = intel_bind_timeline_get_last_point(timeline);
|
||||
|
||||
/* signal completion */
|
||||
xe_syncs[2].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
|
||||
xe_syncs[2].flags = DRM_XE_SYNC_FLAG_SIGNAL;
|
||||
xe_syncs[2].handle = intel_bind_timeline_get_syncobj(timeline);
|
||||
xe_syncs[2].timeline_value = intel_bind_timeline_bind_begin(timeline);
|
||||
|
||||
ret = intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_CONFIG,
|
||||
(void *)(uintptr_t)&prop);
|
||||
intel_bind_timeline_bind_end(timeline);
|
||||
|
||||
/* Looks safe to destroy as Xe KMD should increase the ref count until
|
||||
* it is using it
|
||||
*/
|
||||
struct drm_syncobj_destroy syncobj_destroy = {
|
||||
.handle = syncobj,
|
||||
};
|
||||
ret2 = intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_DESTROY, &syncobj_destroy);
|
||||
assert(ret2 == 0);
|
||||
} else {
|
||||
ret = intel_ioctl(perf_stream_fd, DRM_XE_OBSERVATION_IOCTL_CONFIG,
|
||||
(void *)(uintptr_t)&prop);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
struct intel_bind_timeline;
|
||||
struct intel_perf_config;
|
||||
struct intel_perf_registers;
|
||||
|
||||
@@ -22,8 +23,11 @@ void xe_remove_config(struct intel_perf_config *perf, int fd, uint64_t config_id
|
||||
int xe_perf_stream_open(struct intel_perf_config *perf_config, int drm_fd,
|
||||
uint32_t exec_id, uint64_t metrics_set_id,
|
||||
uint64_t report_format, uint64_t period_exponent,
|
||||
bool hold_preemption, bool enable);
|
||||
bool hold_preemption, bool enable,
|
||||
struct intel_bind_timeline *timeline);
|
||||
int xe_perf_stream_set_state(int perf_stream_fd, bool enable);
|
||||
int xe_perf_stream_set_metrics_id(int perf_stream_fd, uint64_t metrics_set_id);
|
||||
int xe_perf_stream_set_metrics_id(int perf_stream_fd, int drm_fd,
|
||||
uint32_t exec_queue, uint64_t metrics_set_id,
|
||||
struct intel_bind_timeline *timeline);
|
||||
int xe_perf_stream_read_samples(struct intel_perf_config *perf_config, int perf_stream_fd,
|
||||
uint8_t *buffer, size_t buffer_len);
|
||||
|
||||
@@ -105,12 +105,11 @@ anv_device_perf_close(struct anv_device *device)
|
||||
device->perf_fd = -1;
|
||||
}
|
||||
|
||||
static int
|
||||
anv_device_perf_open(struct anv_device *device, struct anv_queue *queue, uint64_t metric_id)
|
||||
static uint32_t
|
||||
anv_device_perf_get_queue_context_or_exec_queue_id(struct anv_queue *queue)
|
||||
{
|
||||
struct anv_device *device = queue->device;
|
||||
uint32_t context_or_exec_queue_id;
|
||||
uint64_t period_exponent = 31; /* slowest sampling period */
|
||||
int ret;
|
||||
|
||||
switch (device->physical->info.kmd_type) {
|
||||
case INTEL_KMD_TYPE_I915:
|
||||
@@ -125,9 +124,18 @@ anv_device_perf_open(struct anv_device *device, struct anv_queue *queue, uint64_
|
||||
context_or_exec_queue_id = 0;
|
||||
}
|
||||
|
||||
return context_or_exec_queue_id;
|
||||
}
|
||||
|
||||
static int
|
||||
anv_device_perf_open(struct anv_device *device, struct anv_queue *queue, uint64_t metric_id)
|
||||
{
|
||||
uint64_t period_exponent = 31; /* slowest sampling period */
|
||||
int ret;
|
||||
|
||||
ret = intel_perf_stream_open(device->physical->perf, device->fd,
|
||||
context_or_exec_queue_id, metric_id,
|
||||
period_exponent, true, true);
|
||||
anv_device_perf_get_queue_context_or_exec_queue_id(queue),
|
||||
metric_id, period_exponent, true, true, NULL);
|
||||
if (ret >= 0)
|
||||
device->perf_queue = queue;
|
||||
|
||||
@@ -276,9 +284,13 @@ VkResult anv_QueueSetPerformanceConfigurationINTEL(
|
||||
if (device->perf_fd < 0)
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
} else {
|
||||
uint32_t context_or_exec_queue = anv_device_perf_get_queue_context_or_exec_queue_id(device->perf_queue);
|
||||
int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
|
||||
device->fd,
|
||||
device->perf_fd,
|
||||
config->config_id);
|
||||
context_or_exec_queue,
|
||||
config->config_id,
|
||||
NULL);
|
||||
if (ret < 0)
|
||||
return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
|
||||
}
|
||||
|
||||
@@ -928,8 +928,11 @@ i915_queue_exec_locked(struct anv_queue *queue,
|
||||
(query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
|
||||
query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
|
||||
int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
|
||||
device->fd,
|
||||
device->perf_fd,
|
||||
query_info->oa_metrics_set_id);
|
||||
-1,/* this parameter, exec_queue is not used in i915 */
|
||||
query_info->oa_metrics_set_id,
|
||||
NULL);
|
||||
if (ret < 0) {
|
||||
result = vk_device_set_lost(&device->vk,
|
||||
"i915-perf config failed: %s",
|
||||
|
||||
@@ -334,8 +334,11 @@ xe_queue_exec_locked(struct anv_queue *queue,
|
||||
(query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
|
||||
query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
|
||||
int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
|
||||
device->fd,
|
||||
device->perf_fd,
|
||||
query_info->oa_metrics_set_id);
|
||||
queue->exec_queue_id,
|
||||
query_info->oa_metrics_set_id,
|
||||
NULL);
|
||||
if (ret < 0) {
|
||||
result = vk_device_set_lost(&device->vk,
|
||||
"intel_perf_stream_set_metrics_id failed: %s",
|
||||
|
||||
@@ -2222,8 +2222,11 @@ anv_queue_exec_locked(struct anv_queue *queue,
|
||||
(query_info->kind == INTEL_PERF_QUERY_TYPE_OA ||
|
||||
query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) {
|
||||
int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
|
||||
device->fd,
|
||||
device->perf_fd,
|
||||
query_info->oa_metrics_set_id);
|
||||
-1,/* this parameter, exec_queue is not used in i915 */
|
||||
query_info->oa_metrics_set_id,
|
||||
NULL);
|
||||
if (ret < 0) {
|
||||
result = vk_device_set_lost(&device->vk,
|
||||
"i915-perf config failed: %s",
|
||||
|
||||
@@ -108,7 +108,7 @@ anv_device_perf_open(struct anv_device *device, uint64_t metric_id)
|
||||
|
||||
return intel_perf_stream_open(device->physical->perf, device->fd,
|
||||
device->context_id, metric_id,
|
||||
period_exponent, true, true);
|
||||
period_exponent, true, true, NULL);
|
||||
}
|
||||
|
||||
/* VK_INTEL_performance_query */
|
||||
@@ -237,8 +237,11 @@ VkResult anv_QueueSetPerformanceConfigurationINTEL(
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
} else {
|
||||
int ret = intel_perf_stream_set_metrics_id(device->physical->perf,
|
||||
device->fd,
|
||||
device->perf_fd,
|
||||
config->config_id);
|
||||
-1,/* this parameter, exec_queue is not used in i915 */
|
||||
config->config_id,
|
||||
NULL);
|
||||
if (ret < 0)
|
||||
return vk_device_set_lost(&device->vk, "i915-perf config failed: %m");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user