|
|
|
@@ -0,0 +1,391 @@
|
|
|
|
|
/*
|
|
|
|
|
* Copyright © 2012 Intel Corporation
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
|
* Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
|
* DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* \file brw_performance_monitor.c
|
|
|
|
|
*
|
|
|
|
|
* Implementation of the GL_AMD_performance_monitor extension.
|
|
|
|
|
*
|
|
|
|
|
* Currently only for Ironlake.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <limits.h>
|
|
|
|
|
|
|
|
|
|
#include "main/bitset.h"
|
|
|
|
|
#include "main/macros.h"
|
|
|
|
|
#include "main/mtypes.h"
|
|
|
|
|
#include "main/performance_monitor.h"
|
|
|
|
|
|
|
|
|
|
#include "brw_context.h"
|
|
|
|
|
#include "brw_defines.h"
|
|
|
|
|
#include "intel_batchbuffer.h"
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* i965 representation of a performance monitor object.
|
|
|
|
|
*/
|
|
|
|
|
struct brw_perf_monitor_object
|
|
|
|
|
{
|
|
|
|
|
/** The base class. */
|
|
|
|
|
struct gl_perf_monitor_object base;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* BO containing raw counter data in a hardware specific form.
|
|
|
|
|
*/
|
|
|
|
|
drm_intel_bo *bo;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/** Downcasting convenience macro. */
|
|
|
|
|
static inline struct brw_perf_monitor_object *
|
|
|
|
|
brw_perf_monitor(struct gl_perf_monitor_object *m)
|
|
|
|
|
{
|
|
|
|
|
return (struct brw_perf_monitor_object *) m;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define SECOND_SNAPSHOT_OFFSET_IN_BYTES 2048
|
|
|
|
|
|
|
|
|
|
/* Two random values used to ensure we're getting valid snapshots. */
|
|
|
|
|
#define FIRST_SNAPSHOT_REPORT_ID 0xd2e9c607
|
|
|
|
|
#define SECOND_SNAPSHOT_REPORT_ID 0xad584b1d
|
|
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
|
|
#define COUNTER(name) \
|
|
|
|
|
{ \
|
|
|
|
|
.Name = name, \
|
|
|
|
|
.Type = GL_UNSIGNED_INT, \
|
|
|
|
|
.Minimum = { .u32 = 0 }, \
|
|
|
|
|
.Maximum = { .u32 = ~0 }, \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GROUP(name, max_active, counter_list) \
|
|
|
|
|
{ \
|
|
|
|
|
.Name = name, \
|
|
|
|
|
.MaxActiveCounters = max_active, \
|
|
|
|
|
.Counters = counter_list, \
|
|
|
|
|
.NumCounters = ARRAY_SIZE(counter_list), \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct brw_perf_bo_layout {
|
|
|
|
|
int group;
|
|
|
|
|
int counter;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Ironlake:
|
|
|
|
|
* @{
|
|
|
|
|
*/
|
|
|
|
|
const static struct gl_perf_monitor_counter gen5_raw_aggregating_counters[] = {
|
|
|
|
|
COUNTER("cycles the CS unit is starved"),
|
|
|
|
|
COUNTER("cycles the CS unit is stalled"),
|
|
|
|
|
COUNTER("cycles the VF unit is starved"),
|
|
|
|
|
COUNTER("cycles the VF unit is stalled"),
|
|
|
|
|
COUNTER("cycles the VS unit is starved"),
|
|
|
|
|
COUNTER("cycles the VS unit is stalled"),
|
|
|
|
|
COUNTER("cycles the GS unit is starved"),
|
|
|
|
|
COUNTER("cycles the GS unit is stalled"),
|
|
|
|
|
COUNTER("cycles the CL unit is starved"),
|
|
|
|
|
COUNTER("cycles the CL unit is stalled"),
|
|
|
|
|
COUNTER("cycles the SF unit is starved"),
|
|
|
|
|
COUNTER("cycles the SF unit is stalled"),
|
|
|
|
|
COUNTER("cycles the WZ unit is starved"),
|
|
|
|
|
COUNTER("cycles the WZ unit is stalled"),
|
|
|
|
|
COUNTER("Z buffer read/write"),
|
|
|
|
|
COUNTER("cycles each EU was active"),
|
|
|
|
|
COUNTER("cycles each EU was suspended"),
|
|
|
|
|
COUNTER("cycles threads loaded all EUs"),
|
|
|
|
|
COUNTER("cycles filtering active"),
|
|
|
|
|
COUNTER("cycles PS threads executed"),
|
|
|
|
|
COUNTER("subspans written to RC"),
|
|
|
|
|
COUNTER("bytes read for texture reads"),
|
|
|
|
|
COUNTER("texels returned from sampler"),
|
|
|
|
|
COUNTER("polygons not culled"),
|
|
|
|
|
COUNTER("clocks MASF has valid message"),
|
|
|
|
|
COUNTER("64b writes/reads from RC"),
|
|
|
|
|
COUNTER("reads on dataport"),
|
|
|
|
|
COUNTER("clocks MASF has valid msg not consumed by sampler"),
|
|
|
|
|
COUNTER("cycles any EU is stalled for math"),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const static struct gl_perf_monitor_group gen5_groups[] = {
|
|
|
|
|
GROUP("Aggregating Counters", INT_MAX, gen5_raw_aggregating_counters),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const static struct brw_perf_bo_layout gen5_perf_bo_layout[] =
|
|
|
|
|
{
|
|
|
|
|
{ -1, -1, }, /* Report ID */
|
|
|
|
|
{ -1, -1, }, /* TIMESTAMP (64-bit) */
|
|
|
|
|
{ -1, -1, }, /* ...second half... */
|
|
|
|
|
{ 0, 0, }, /* cycles the CS unit is starved */
|
|
|
|
|
{ 0, 1, }, /* cycles the CS unit is stalled */
|
|
|
|
|
{ 0, 2, }, /* cycles the VF unit is starved */
|
|
|
|
|
{ 0, 3, }, /* cycles the VF unit is stalled */
|
|
|
|
|
{ 0, 4, }, /* cycles the VS unit is starved */
|
|
|
|
|
{ 0, 5, }, /* cycles the VS unit is stalled */
|
|
|
|
|
{ 0, 6, }, /* cycles the GS unit is starved */
|
|
|
|
|
{ 0, 7, }, /* cycles the GS unit is stalled */
|
|
|
|
|
{ 0, 8, }, /* cycles the CL unit is starved */
|
|
|
|
|
{ 0, 9, }, /* cycles the CL unit is stalled */
|
|
|
|
|
{ 0, 10, }, /* cycles the SF unit is starved */
|
|
|
|
|
{ 0, 11, }, /* cycles the SF unit is stalled */
|
|
|
|
|
{ 0, 12, }, /* cycles the WZ unit is starved */
|
|
|
|
|
{ 0, 13, }, /* cycles the WZ unit is stalled */
|
|
|
|
|
{ 0, 14, }, /* Z buffer read/write */
|
|
|
|
|
{ 0, 15, }, /* cycles each EU was active */
|
|
|
|
|
{ 0, 16, }, /* cycles each EU was suspended */
|
|
|
|
|
{ 0, 17, }, /* cycles threads loaded all EUs */
|
|
|
|
|
{ 0, 18, }, /* cycles filtering active */
|
|
|
|
|
{ 0, 19, }, /* cycles PS threads executed */
|
|
|
|
|
{ 0, 20, }, /* subspans written to RC */
|
|
|
|
|
{ 0, 21, }, /* bytes read for texture reads */
|
|
|
|
|
{ 0, 22, }, /* texels returned from sampler */
|
|
|
|
|
{ 0, 23, }, /* polygons not culled */
|
|
|
|
|
{ 0, 24, }, /* clocks MASF has valid message */
|
|
|
|
|
{ 0, 25, }, /* 64b writes/reads from RC */
|
|
|
|
|
{ 0, 26, }, /* reads on dataport */
|
|
|
|
|
{ 0, 27, }, /* clocks MASF has valid msg not consumed by sampler */
|
|
|
|
|
{ 0, 28, }, /* cycles any EU is stalled for math */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/** @} */
|
|
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
snapshot_aggregating_counters(struct brw_context *brw,
|
|
|
|
|
drm_intel_bo *bo, uint32_t offset_in_bytes)
|
|
|
|
|
{
|
|
|
|
|
uint32_t report_id = offset_in_bytes == 0 ? FIRST_SNAPSHOT_REPORT_ID
|
|
|
|
|
: SECOND_SNAPSHOT_REPORT_ID;
|
|
|
|
|
|
|
|
|
|
if (brw->gen == 5) {
|
|
|
|
|
/* Ironlake requires two MI_REPORT_PERF_COUNT commands to write all
|
|
|
|
|
* the counters. The report ID is ignored in the second set.
|
|
|
|
|
*/
|
|
|
|
|
BEGIN_BATCH(6);
|
|
|
|
|
OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | GEN5_MI_COUNTER_SET_0);
|
|
|
|
|
OUT_RELOC(bo,
|
|
|
|
|
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
|
|
|
|
offset_in_bytes);
|
|
|
|
|
OUT_BATCH(report_id);
|
|
|
|
|
|
|
|
|
|
OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | GEN5_MI_COUNTER_SET_1);
|
|
|
|
|
OUT_RELOC(bo,
|
|
|
|
|
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
|
|
|
|
offset_in_bytes + 64);
|
|
|
|
|
OUT_BATCH(report_id);
|
|
|
|
|
ADVANCE_BATCH();
|
|
|
|
|
} else {
|
|
|
|
|
assert(!"Unsupported generation for performance counters.");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool
|
|
|
|
|
aggregating_counters_needed(struct brw_context *brw,
|
|
|
|
|
struct gl_perf_monitor_object *m)
|
|
|
|
|
{
|
|
|
|
|
return m->ActiveGroups[0];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/******************************************************************************/
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a new performance monitor object.
|
|
|
|
|
*/
|
|
|
|
|
static struct gl_perf_monitor_object *
|
|
|
|
|
brw_new_perf_monitor(struct gl_context *ctx)
|
|
|
|
|
{
|
|
|
|
|
return calloc(1, sizeof(struct brw_perf_monitor_object));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Delete a performance monitor object.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
brw_delete_perf_monitor(struct gl_context *ctx, struct gl_perf_monitor_object *m)
|
|
|
|
|
{
|
|
|
|
|
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
|
|
|
|
|
|
|
|
|
if (monitor->bo)
|
|
|
|
|
drm_intel_bo_unreference(monitor->bo);
|
|
|
|
|
|
|
|
|
|
free(monitor);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Driver hook for glBeginPerformanceMonitorAMD().
|
|
|
|
|
*/
|
|
|
|
|
static GLboolean
|
|
|
|
|
brw_begin_perf_monitor(struct gl_context *ctx,
|
|
|
|
|
struct gl_perf_monitor_object *m)
|
|
|
|
|
{
|
|
|
|
|
struct brw_context *brw = brw_context(ctx);
|
|
|
|
|
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
|
|
|
|
|
|
|
|
|
/* If the BO already exists, throw it away. It contains old results
|
|
|
|
|
* that we're not interested in any more.
|
|
|
|
|
*/
|
|
|
|
|
if (monitor->bo)
|
|
|
|
|
drm_intel_bo_unreference(monitor->bo);
|
|
|
|
|
|
|
|
|
|
/* Create a new BO. */
|
|
|
|
|
monitor->bo =
|
|
|
|
|
drm_intel_bo_alloc(brw->bufmgr, "performance monitor", 4096, 64);
|
|
|
|
|
drm_intel_bo_map(monitor->bo, true);
|
|
|
|
|
memset((char *) monitor->bo->virtual, 0xff, 4096);
|
|
|
|
|
drm_intel_bo_unmap(monitor->bo);
|
|
|
|
|
|
|
|
|
|
/* Take a shapshot of all active counters */
|
|
|
|
|
if (aggregating_counters_needed(brw, m)) {
|
|
|
|
|
snapshot_aggregating_counters(brw, monitor->bo, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Driver hook for glEndPerformanceMonitorAMD().
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
brw_end_perf_monitor(struct gl_context *ctx,
|
|
|
|
|
struct gl_perf_monitor_object *m)
|
|
|
|
|
{
|
|
|
|
|
struct brw_context *brw = brw_context(ctx);
|
|
|
|
|
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
|
|
|
|
if (aggregating_counters_needed(brw, m)) {
|
|
|
|
|
snapshot_aggregating_counters(brw, monitor->bo,
|
|
|
|
|
SECOND_SNAPSHOT_OFFSET_IN_BYTES);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Reset a performance monitor, throwing away any results.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
brw_reset_perf_monitor(struct gl_context *ctx,
|
|
|
|
|
struct gl_perf_monitor_object *m)
|
|
|
|
|
{
|
|
|
|
|
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
|
|
|
|
|
|
|
|
|
if (monitor->bo) {
|
|
|
|
|
drm_intel_bo_unreference(monitor->bo);
|
|
|
|
|
monitor->bo = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (m->Active) {
|
|
|
|
|
brw_begin_perf_monitor(ctx, m);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Is a performance monitor result available?
|
|
|
|
|
*/
|
|
|
|
|
static GLboolean
|
|
|
|
|
brw_is_perf_monitor_result_available(struct gl_context *ctx,
|
|
|
|
|
struct gl_perf_monitor_object *m)
|
|
|
|
|
{
|
|
|
|
|
struct brw_context *brw = brw_context(ctx);
|
|
|
|
|
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
|
|
|
|
return !m->Active && monitor->bo &&
|
|
|
|
|
!drm_intel_bo_references(brw->batch.bo, monitor->bo) &&
|
|
|
|
|
!drm_intel_bo_busy(monitor->bo);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get the performance monitor result.
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
brw_get_perf_monitor_result(struct gl_context *ctx,
|
|
|
|
|
struct gl_perf_monitor_object *m,
|
|
|
|
|
GLsizei data_size,
|
|
|
|
|
GLuint *data,
|
|
|
|
|
GLint *bytes_written)
|
|
|
|
|
{
|
|
|
|
|
struct brw_context *brw = brw_context(ctx);
|
|
|
|
|
struct brw_perf_monitor_object *monitor = brw_perf_monitor(m);
|
|
|
|
|
|
|
|
|
|
/* This hook should only be called when results are available. */
|
|
|
|
|
assert(monitor->bo != NULL);
|
|
|
|
|
|
|
|
|
|
drm_intel_bo_map(monitor->bo, false);
|
|
|
|
|
unsigned *gpu_bo = monitor->bo->virtual;
|
|
|
|
|
|
|
|
|
|
/* Copy data from the BO to the supplied array.
|
|
|
|
|
*
|
|
|
|
|
* The output data format is: <group ID, counter ID, value> for each
|
|
|
|
|
* active counter. The API allows counters to appear in any order.
|
|
|
|
|
*/
|
|
|
|
|
GLsizei offset = 0;
|
|
|
|
|
|
|
|
|
|
/* Look for expected report ID values to ensure data is present. */
|
|
|
|
|
assert(gpu_bo[0] == FIRST_SNAPSHOT_REPORT_ID);
|
|
|
|
|
assert(gpu_bo[SECOND_SNAPSHOT_OFFSET_IN_BYTES/4] == SECOND_SNAPSHOT_REPORT_ID);
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < brw->perfmon.entries_in_bo; i++) {
|
|
|
|
|
int group = brw->perfmon.bo_layout[i].group;
|
|
|
|
|
int counter = brw->perfmon.bo_layout[i].counter;
|
|
|
|
|
|
|
|
|
|
if (group < 0 || !BITSET_TEST(m->ActiveCounters[group], counter))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
const struct gl_perf_monitor_group *group_obj =
|
|
|
|
|
&ctx->PerfMonitor.Groups[group];
|
|
|
|
|
|
|
|
|
|
const struct gl_perf_monitor_counter *c = &group_obj->Counters[counter];
|
|
|
|
|
|
|
|
|
|
data[offset++] = group;
|
|
|
|
|
data[offset++] = counter;
|
|
|
|
|
|
|
|
|
|
uint32_t second_snapshot_index =
|
|
|
|
|
SECOND_SNAPSHOT_OFFSET_IN_BYTES / sizeof(uint32_t) + i;
|
|
|
|
|
|
|
|
|
|
/* Won't work for uint64_t values, but we don't expose any yet. */
|
|
|
|
|
data[offset] = gpu_bo[second_snapshot_index] - gpu_bo[i];
|
|
|
|
|
offset += _mesa_perf_monitor_counter_size(c) / sizeof(uint32_t);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
drm_intel_bo_unmap(monitor->bo);
|
|
|
|
|
|
|
|
|
|
if (bytes_written)
|
|
|
|
|
*bytes_written = offset * sizeof(uint32_t);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
brw_init_performance_monitors(struct brw_context *brw)
|
|
|
|
|
{
|
|
|
|
|
struct gl_context *ctx = &brw->ctx;
|
|
|
|
|
|
|
|
|
|
ctx->Driver.NewPerfMonitor = brw_new_perf_monitor;
|
|
|
|
|
ctx->Driver.DeletePerfMonitor = brw_delete_perf_monitor;
|
|
|
|
|
ctx->Driver.BeginPerfMonitor = brw_begin_perf_monitor;
|
|
|
|
|
ctx->Driver.EndPerfMonitor = brw_end_perf_monitor;
|
|
|
|
|
ctx->Driver.ResetPerfMonitor = brw_reset_perf_monitor;
|
|
|
|
|
ctx->Driver.IsPerfMonitorResultAvailable = brw_is_perf_monitor_result_available;
|
|
|
|
|
ctx->Driver.GetPerfMonitorResult = brw_get_perf_monitor_result;
|
|
|
|
|
|
|
|
|
|
if (brw->gen == 5) {
|
|
|
|
|
ctx->PerfMonitor.Groups = gen5_groups;
|
|
|
|
|
ctx->PerfMonitor.NumGroups = ARRAY_SIZE(gen5_groups);
|
|
|
|
|
brw->perfmon.bo_layout = gen5_perf_bo_layout;
|
|
|
|
|
brw->perfmon.entries_in_bo = ARRAY_SIZE(gen5_perf_bo_layout);
|
|
|
|
|
}
|
|
|
|
|
}
|