freedreno: Add perfetto renderpass support
Add a custom DataSource to provide trace events for render stages. Signed-off-by: Rob Clark <robdclark@chromium.org> Acked-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9901>
This commit is contained in:
@@ -237,6 +237,9 @@ FreedrenoDriver::dump_perfcnt()
|
||||
|
||||
auto last_ts = last_dump_ts;
|
||||
|
||||
/* Capture the timestamp from the *start* of the sampling period: */
|
||||
last_capture_ts = last_dump_ts;
|
||||
|
||||
collect_countables();
|
||||
|
||||
auto elapsed_time_ns = last_dump_ts - last_ts;
|
||||
@@ -252,8 +255,6 @@ FreedrenoDriver::dump_perfcnt()
|
||||
configure_counters(false, false);
|
||||
}
|
||||
|
||||
last_capture_ts = last_dump_ts;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
|
||||
#include "freedreno_autotune.h"
|
||||
#include "freedreno_gmem.h"
|
||||
#include "freedreno_perfetto.h"
|
||||
#include "freedreno_screen.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
@@ -443,6 +444,15 @@ struct fd_context {
|
||||
|
||||
struct u_trace_context trace_context dt;
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
struct fd_perfetto_state perfetto;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Counter to generate submit-ids
|
||||
*/
|
||||
uint32_t submit_count;
|
||||
|
||||
/* Called on rebind_resource() for any per-gen cleanup required: */
|
||||
void (*rebind_resource)(struct fd_context *ctx, struct fd_resource *rsc) dt;
|
||||
|
||||
|
||||
@@ -671,7 +671,16 @@ fd_gmem_render_tiles(struct fd_batch *batch)
|
||||
struct pipe_framebuffer_state *pfb = &batch->framebuffer;
|
||||
bool sysmem = false;
|
||||
|
||||
ctx->submit_count++;
|
||||
|
||||
if (!batch->nondraw) {
|
||||
#if HAVE_PERFETTO
|
||||
/* For non-draw batches, we don't really have a good place to
|
||||
* match up the api event submit-id to the on-gpu rendering,
|
||||
* so skip this for non-draw batches.
|
||||
*/
|
||||
fd_perfetto_submit(ctx);
|
||||
#endif
|
||||
trace_flush_batch(&batch->trace, batch, batch->cleared,
|
||||
batch->gmem_reason, batch->num_draws);
|
||||
trace_framebuffer_state(&batch->trace, pfb);
|
||||
@@ -718,18 +727,29 @@ fd_gmem_render_tiles(struct fd_batch *batch)
|
||||
ctx->stats.batch_nondraw++;
|
||||
} else if (sysmem) {
|
||||
trace_render_sysmem(&batch->trace);
|
||||
trace_start_render_pass(
|
||||
&batch->trace, ctx->submit_count, pipe_surface_format(pfb->cbufs[0]),
|
||||
pipe_surface_format(pfb->zsbuf), pfb->width, pfb->height,
|
||||
pfb->nr_cbufs, pfb->samples, 0, 0, 0);
|
||||
if (ctx->query_prepare)
|
||||
ctx->query_prepare(batch, 1);
|
||||
render_sysmem(batch);
|
||||
trace_end_render_pass(&batch->trace);
|
||||
ctx->stats.batch_sysmem++;
|
||||
} else {
|
||||
struct fd_gmem_stateobj *gmem = lookup_gmem_state(batch, false, false);
|
||||
batch->gmem_state = gmem;
|
||||
trace_render_gmem(&batch->trace, gmem->nbins_x, gmem->nbins_y,
|
||||
gmem->bin_w, gmem->bin_h);
|
||||
trace_start_render_pass(
|
||||
&batch->trace, ctx->submit_count, pipe_surface_format(pfb->cbufs[0]),
|
||||
pipe_surface_format(pfb->zsbuf), pfb->width, pfb->height,
|
||||
pfb->nr_cbufs, pfb->samples, gmem->nbins_x * gmem->nbins_y,
|
||||
gmem->bin_w, gmem->bin_h);
|
||||
if (ctx->query_prepare)
|
||||
ctx->query_prepare(batch, gmem->nbins_x * gmem->nbins_y);
|
||||
render_tiles(batch, gmem);
|
||||
trace_end_render_pass(&batch->trace);
|
||||
batch->gmem_state = NULL;
|
||||
|
||||
fd_screen_lock(ctx->screen);
|
||||
|
||||
@@ -0,0 +1,440 @@
|
||||
/*
|
||||
* Copyright © 2021 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <perfetto.h>
|
||||
|
||||
#include "util/u_perfetto.h"
|
||||
|
||||
#include "freedreno_tracepoints.h"
|
||||
|
||||
static uint32_t gpu_clock_id;
|
||||
static uint64_t next_clock_sync_ns; /* cpu time of next clk sync */
|
||||
|
||||
/**
|
||||
* The timestamp at the point where we first emitted the clock_sync..
|
||||
* this will be a *later* timestamp that the first GPU traces (since
|
||||
* we capture the first clock_sync from the CPU *after* the first GPU
|
||||
* tracepoints happen). To avoid confusing perfetto we need to drop
|
||||
* the GPU traces with timestamps before this.
|
||||
*/
|
||||
static uint64_t sync_gpu_ts;
|
||||
|
||||
struct FdRenderpassIncrementalState {
|
||||
bool was_cleared = true;
|
||||
};
|
||||
|
||||
struct FdRenderpassTraits : public perfetto::DefaultDataSourceTraits {
|
||||
using IncrementalStateType = FdRenderpassIncrementalState;
|
||||
};
|
||||
|
||||
class FdRenderpassDataSource : public perfetto::DataSource<FdRenderpassDataSource, FdRenderpassTraits> {
|
||||
public:
|
||||
void OnSetup(const SetupArgs &) override
|
||||
{
|
||||
// Use this callback to apply any custom configuration to your data source
|
||||
// based on the TraceConfig in SetupArgs.
|
||||
}
|
||||
|
||||
void OnStart(const StartArgs &) override
|
||||
{
|
||||
// This notification can be used to initialize the GPU driver, enable
|
||||
// counters, etc. StartArgs will contains the DataSourceDescriptor,
|
||||
// which can be extended.
|
||||
u_trace_perfetto_start();
|
||||
PERFETTO_LOG("Tracing started");
|
||||
|
||||
/* Note: clock_id's below 128 are reserved.. for custom clock sources,
|
||||
* using the hash of a namespaced string is the recommended approach.
|
||||
* See: https://perfetto.dev/docs/concepts/clock-sync
|
||||
*/
|
||||
gpu_clock_id =
|
||||
_mesa_hash_string("org.freedesktop.mesa.freedreno") | 0x80000000;
|
||||
}
|
||||
|
||||
void OnStop(const StopArgs &) override
|
||||
{
|
||||
PERFETTO_LOG("Tracing stopped");
|
||||
|
||||
// Undo any initialization done in OnStart.
|
||||
u_trace_perfetto_stop();
|
||||
// TODO we should perhaps block until queued traces are flushed?
|
||||
|
||||
Trace([](FdRenderpassDataSource::TraceContext ctx) {
|
||||
auto packet = ctx.NewTracePacket();
|
||||
packet->Finalize();
|
||||
ctx.Flush();
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
|
||||
PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(FdRenderpassDataSource);
|
||||
|
||||
static void
|
||||
send_descriptors(FdRenderpassDataSource::TraceContext &ctx, uint64_t ts_ns)
|
||||
{
|
||||
PERFETTO_LOG("Sending renderstage descriptors");
|
||||
|
||||
auto packet = ctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(0);
|
||||
// packet->set_timestamp(ts_ns);
|
||||
// packet->set_timestamp_clock_id(gpu_clock_id);
|
||||
|
||||
auto event = packet->set_gpu_render_stage_event();
|
||||
event->set_gpu_id(0);
|
||||
|
||||
auto spec = event->set_specifications();
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(queues); i++) {
|
||||
auto desc = spec->add_hw_queue();
|
||||
|
||||
desc->set_name(queues[i].name);
|
||||
desc->set_description(queues[i].desc);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(stages); i++) {
|
||||
auto desc = spec->add_stage();
|
||||
|
||||
desc->set_name(stages[i].name);
|
||||
if (stages[i].desc)
|
||||
desc->set_description(stages[i].desc);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
stage_start(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct fd_perfetto_state *p = &ctx->perfetto;
|
||||
|
||||
p->start_ts[stage] = ts_ns;
|
||||
}
|
||||
|
||||
static void
|
||||
stage_end(struct pipe_context *pctx, uint64_t ts_ns, enum fd_stage_id stage)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct fd_perfetto_state *p = &ctx->perfetto;
|
||||
|
||||
/* If we haven't managed to calibrate the alignment between GPU and CPU
|
||||
* timestamps yet, then skip this trace, otherwise perfetto won't know
|
||||
* what to do with it.
|
||||
*/
|
||||
if (!sync_gpu_ts)
|
||||
return;
|
||||
|
||||
FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
|
||||
if (auto state = tctx.GetIncrementalState(); state->was_cleared) {
|
||||
send_descriptors(tctx, p->start_ts[stage]);
|
||||
state->was_cleared = false;
|
||||
}
|
||||
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(p->start_ts[stage]);
|
||||
packet->set_timestamp_clock_id(gpu_clock_id);
|
||||
|
||||
auto event = packet->set_gpu_render_stage_event();
|
||||
event->set_event_id(0); // ???
|
||||
event->set_hw_queue_id(DEFAULT_HW_QUEUE_ID);
|
||||
event->set_duration(ts_ns - p->start_ts[stage]);
|
||||
event->set_stage_id(stage);
|
||||
event->set_context((uintptr_t)pctx);
|
||||
|
||||
/* The "surface" meta-stage has extra info about render target: */
|
||||
if (stage == SURFACE_STAGE_ID) {
|
||||
|
||||
event->set_submission_id(p->submit_id);
|
||||
|
||||
if (p->cbuf0_format) {
|
||||
auto data = event->add_extra_data();
|
||||
|
||||
data->set_name("color0 format");
|
||||
data->set_value(util_format_short_name(p->cbuf0_format));
|
||||
}
|
||||
|
||||
if (p->zs_format) {
|
||||
auto data = event->add_extra_data();
|
||||
|
||||
data->set_name("zs format");
|
||||
data->set_value(util_format_short_name(p->zs_format));
|
||||
}
|
||||
|
||||
{
|
||||
auto data = event->add_extra_data();
|
||||
|
||||
data->set_name("width");
|
||||
data->set_value(std::to_string(p->width));
|
||||
}
|
||||
|
||||
{
|
||||
auto data = event->add_extra_data();
|
||||
|
||||
data->set_name("height");
|
||||
data->set_value(std::to_string(p->height));
|
||||
}
|
||||
|
||||
{
|
||||
auto data = event->add_extra_data();
|
||||
|
||||
data->set_name("MSAA");
|
||||
data->set_value(std::to_string(p->samples));
|
||||
}
|
||||
|
||||
{
|
||||
auto data = event->add_extra_data();
|
||||
|
||||
data->set_name("MRTs");
|
||||
data->set_value(std::to_string(p->mrts));
|
||||
}
|
||||
|
||||
// "renderMode"
|
||||
// "surfaceID"
|
||||
|
||||
if (p->nbins) {
|
||||
auto data = event->add_extra_data();
|
||||
|
||||
data->set_name("numberOfBins");
|
||||
data->set_value(std::to_string(p->nbins));
|
||||
}
|
||||
|
||||
if (p->binw) {
|
||||
auto data = event->add_extra_data();
|
||||
|
||||
data->set_name("binWidth");
|
||||
data->set_value(std::to_string(p->binw));
|
||||
}
|
||||
|
||||
if (p->binh) {
|
||||
auto data = event->add_extra_data();
|
||||
|
||||
data->set_name("binHeight");
|
||||
data->set_value(std::to_string(p->binh));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void
|
||||
fd_perfetto_init(void)
|
||||
{
|
||||
util_perfetto_init();
|
||||
|
||||
perfetto::DataSourceDescriptor dsd;
|
||||
dsd.set_name("gpu.renderstages.msm");
|
||||
FdRenderpassDataSource::Register(dsd);
|
||||
}
|
||||
|
||||
static void
|
||||
sync_timestamp(struct fd_context *ctx)
|
||||
{
|
||||
uint64_t cpu_ts = perfetto::base::GetBootTimeNs().count();
|
||||
uint64_t gpu_ts;
|
||||
|
||||
if (cpu_ts < next_clock_sync_ns)
|
||||
return;
|
||||
|
||||
if (fd_pipe_get_param(ctx->pipe, FD_TIMESTAMP, &gpu_ts)) {
|
||||
PERFETTO_ELOG("Could not sync CPU and GPU clocks");
|
||||
return;
|
||||
}
|
||||
|
||||
/* convert GPU ts into ns: */
|
||||
gpu_ts = ctx->ts_to_ns(gpu_ts);
|
||||
|
||||
FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(cpu_ts);
|
||||
|
||||
auto event = packet->set_clock_snapshot();
|
||||
|
||||
{
|
||||
auto clock = event->add_clocks();
|
||||
|
||||
clock->set_clock_id(perfetto::protos::pbzero::BUILTIN_CLOCK_BOOTTIME);
|
||||
clock->set_timestamp(cpu_ts);
|
||||
}
|
||||
|
||||
{
|
||||
auto clock = event->add_clocks();
|
||||
|
||||
clock->set_clock_id(gpu_clock_id);
|
||||
clock->set_timestamp(gpu_ts);
|
||||
}
|
||||
|
||||
sync_gpu_ts = gpu_ts;
|
||||
next_clock_sync_ns = cpu_ts + 30000000;
|
||||
});
|
||||
}
|
||||
|
||||
static void
|
||||
emit_submit_id(struct fd_context *ctx)
|
||||
{
|
||||
FdRenderpassDataSource::Trace([=](FdRenderpassDataSource::TraceContext tctx) {
|
||||
auto packet = tctx.NewTracePacket();
|
||||
|
||||
packet->set_timestamp(perfetto::base::GetBootTimeNs().count());
|
||||
|
||||
auto event = packet->set_vulkan_api_event();
|
||||
auto submit = event->set_vk_queue_submit();
|
||||
|
||||
submit->set_submission_id(ctx->submit_count);
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
fd_perfetto_submit(struct fd_context *ctx)
|
||||
{
|
||||
sync_timestamp(ctx);
|
||||
emit_submit_id(ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Trace callbacks, called from u_trace once the timestamps from GPU have been
|
||||
* collected.
|
||||
*/
|
||||
|
||||
void
|
||||
fd_start_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_start_render_pass *payload)
|
||||
{
|
||||
stage_start(pctx, ts_ns, SURFACE_STAGE_ID);
|
||||
|
||||
struct fd_perfetto_state *p = &fd_context(pctx)->perfetto;
|
||||
|
||||
p->submit_id = payload->submit_id;
|
||||
p->cbuf0_format = payload->cbuf0_format;
|
||||
p->zs_format = payload->zs_format;
|
||||
p->width = payload->width;
|
||||
p->height = payload->height;
|
||||
p->mrts = payload->mrts;
|
||||
p->samples = payload->samples;
|
||||
p->nbins = payload->nbins;
|
||||
p->binw = payload->binw;
|
||||
p->binh = payload->binh;
|
||||
}
|
||||
|
||||
void
|
||||
fd_end_render_pass(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_end_render_pass *payload)
|
||||
{
|
||||
stage_end(pctx, ts_ns, SURFACE_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_start_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_start_binning_ib *payload)
|
||||
{
|
||||
stage_start(pctx, ts_ns, BINNING_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_end_binning_ib(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_end_binning_ib *payload)
|
||||
{
|
||||
stage_end(pctx, ts_ns, BINNING_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_start_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_start_draw_ib *payload)
|
||||
{
|
||||
stage_start(
|
||||
pctx, ts_ns,
|
||||
fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_end_draw_ib(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_end_draw_ib *payload)
|
||||
{
|
||||
stage_end(
|
||||
pctx, ts_ns,
|
||||
fd_context(pctx)->perfetto.nbins ? GMEM_STAGE_ID : BYPASS_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_start_blit(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_start_blit *payload)
|
||||
{
|
||||
stage_start(pctx, ts_ns, BLIT_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_end_blit(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_end_blit *payload)
|
||||
{
|
||||
stage_end(pctx, ts_ns, BLIT_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_start_compute(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_start_compute *payload)
|
||||
{
|
||||
stage_start(pctx, ts_ns, COMPUTE_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_end_compute(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_end_compute *payload)
|
||||
{
|
||||
stage_end(pctx, ts_ns, COMPUTE_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_start_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_start_clear_restore *payload)
|
||||
{
|
||||
stage_start(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_end_clear_restore(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_end_clear_restore *payload)
|
||||
{
|
||||
stage_end(pctx, ts_ns, CLEAR_RESTORE_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_start_resolve(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_start_resolve *payload)
|
||||
{
|
||||
stage_start(pctx, ts_ns, RESOLVE_STAGE_ID);
|
||||
}
|
||||
|
||||
void
|
||||
fd_end_resolve(struct pipe_context *pctx, uint64_t ts_ns,
|
||||
const struct trace_end_resolve *payload)
|
||||
{
|
||||
stage_end(pctx, ts_ns, RESOLVE_STAGE_ID);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright © 2021 Google, Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef FREEDRENO_PERFETTO_H_
|
||||
#define FREEDRENO_PERFETTO_H_
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
|
||||
/**
|
||||
* Render-stage id's
|
||||
*/
|
||||
enum fd_stage_id {
|
||||
SURFACE_STAGE_ID, /* Surface is a sort of meta-stage for render-target info */
|
||||
BINNING_STAGE_ID,
|
||||
GMEM_STAGE_ID,
|
||||
BYPASS_STAGE_ID,
|
||||
BLIT_STAGE_ID,
|
||||
COMPUTE_STAGE_ID,
|
||||
CLEAR_RESTORE_STAGE_ID,
|
||||
RESOLVE_STAGE_ID,
|
||||
// TODO add the rest
|
||||
|
||||
NUM_STAGES
|
||||
};
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
const char *desc;
|
||||
} stages[] = {
|
||||
[SURFACE_STAGE_ID] = {"Surface"},
|
||||
[BINNING_STAGE_ID] = {"Binning", "Perform Visibility pass and determine target bins"},
|
||||
[GMEM_STAGE_ID] = {"Render", "Rendering to GMEM"},
|
||||
[BYPASS_STAGE_ID] = {"Render", "Rendering to system memory"},
|
||||
[BLIT_STAGE_ID] = {"Blit", "Performing a Blit operation"},
|
||||
[COMPUTE_STAGE_ID] = {"Compute", "Compute job"},
|
||||
[CLEAR_RESTORE_STAGE_ID] = {"Clear/Restore", "Clear (sysmem) or per-tile clear or restore (GMEM)"},
|
||||
[RESOLVE_STAGE_ID] = {"Resolve", "Per tile resolve (GMEM to system memory"},
|
||||
// TODO add the rest
|
||||
};
|
||||
|
||||
/**
|
||||
* Queue-id's
|
||||
*/
|
||||
enum {
|
||||
DEFAULT_HW_QUEUE_ID,
|
||||
};
|
||||
|
||||
static const struct {
|
||||
const char *name;
|
||||
const char *desc;
|
||||
} queues[] = {
|
||||
[DEFAULT_HW_QUEUE_ID] = {"GPU Queue 0", "Default Adreno Hardware Queue"},
|
||||
};
|
||||
|
||||
/**
|
||||
* The u_trace tracepoints which are used to capture GPU timestamps and
|
||||
* trigger perfetto events tend to come in begin/end pairs (ie. start
|
||||
* and end of binning pass, etc), but perfetto wants one event for the
|
||||
* whole pass. So we need to buffer up some state at the "begin" trae
|
||||
* callback, and then emit the perfetto event at the "end" event based
|
||||
* on previously recorded timestamp/data. This struct is where we can
|
||||
* accumulate that state.
|
||||
*/
|
||||
struct fd_perfetto_state {
|
||||
uint64_t start_ts[NUM_STAGES];
|
||||
|
||||
/*
|
||||
* Surface state for the renderpass:
|
||||
*/
|
||||
uint32_t submit_id;
|
||||
enum pipe_format cbuf0_format : 16;
|
||||
enum pipe_format zs_format : 16;
|
||||
uint16_t width;
|
||||
uint16_t height;
|
||||
uint8_t mrts;
|
||||
uint8_t samples;
|
||||
uint16_t nbins;
|
||||
uint16_t binw;
|
||||
uint16_t binh;
|
||||
// TODO # of draws and possibly estimated cost might be useful addition..
|
||||
};
|
||||
|
||||
void fd_perfetto_init(void);
|
||||
|
||||
struct fd_context;
|
||||
void fd_perfetto_submit(struct fd_context *ctx);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* FREEDRENO_PERFETTO_H_ */
|
||||
@@ -45,6 +45,7 @@
|
||||
#include <sys/sysinfo.h>
|
||||
|
||||
#include "freedreno_fence.h"
|
||||
#include "freedreno_perfetto.h"
|
||||
#include "freedreno_query.h"
|
||||
#include "freedreno_resource.h"
|
||||
#include "freedreno_screen.h"
|
||||
@@ -931,6 +932,10 @@ fd_screen_create(struct fd_device *dev, struct renderonly *ro)
|
||||
if (!screen)
|
||||
return NULL;
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
fd_perfetto_init();
|
||||
#endif
|
||||
|
||||
pscreen = &screen->base;
|
||||
|
||||
screen->dev = dev;
|
||||
|
||||
@@ -69,8 +69,28 @@ Tracepoint('render_gmem',
|
||||
|
||||
Tracepoint('render_sysmem')
|
||||
|
||||
Tracepoint('start_binning_ib')
|
||||
Tracepoint('end_binning_ib')
|
||||
# Note that this doesn't include full information about all of the MRTs
|
||||
# but seems to roughly match what I see with a blob trace
|
||||
Tracepoint('start_render_pass',
|
||||
args=[['uint32_t', 'submit_id'],
|
||||
['enum pipe_format', 'cbuf0_format'],
|
||||
['enum pipe_format', 'zs_format'],
|
||||
['uint16_t', 'width'],
|
||||
['uint16_t', 'height'],
|
||||
['uint8_t', 'mrts'],
|
||||
['uint8_t', 'samples'],
|
||||
['uint16_t', 'nbins'],
|
||||
['uint16_t', 'binw'],
|
||||
['uint16_t', 'binh']],
|
||||
tp_perfetto='fd_start_render_pass'
|
||||
)
|
||||
Tracepoint('end_render_pass',
|
||||
tp_perfetto='fd_end_render_pass')
|
||||
|
||||
Tracepoint('start_binning_ib',
|
||||
tp_perfetto='fd_start_binning_ib')
|
||||
Tracepoint('end_binning_ib',
|
||||
tp_perfetto='fd_end_binning_ib')
|
||||
|
||||
Tracepoint('start_vsc_overflow_test')
|
||||
Tracepoint('end_vsc_overflow_test')
|
||||
@@ -81,12 +101,16 @@ Tracepoint('end_prologue')
|
||||
# For GMEM pass, where this could either be a clear or resolve
|
||||
Tracepoint('start_clear_restore',
|
||||
args=[['uint16_t', 'fast_cleared']],
|
||||
tp_print=['fast_cleared: 0x%x', '__entry->fast_cleared']
|
||||
tp_print=['fast_cleared: 0x%x', '__entry->fast_cleared'],
|
||||
tp_perfetto='fd_start_clear_restore',
|
||||
)
|
||||
Tracepoint('end_clear_restore')
|
||||
Tracepoint('end_clear_restore',
|
||||
tp_perfetto='fd_end_clear_restore')
|
||||
|
||||
Tracepoint('start_resolve')
|
||||
Tracepoint('end_resolve')
|
||||
Tracepoint('start_resolve',
|
||||
tp_perfetto='fd_start_resolve')
|
||||
Tracepoint('end_resolve',
|
||||
tp_perfetto='fd_end_resolve')
|
||||
|
||||
Tracepoint('start_tile',
|
||||
args=[['uint16_t', 'bin_h'],
|
||||
@@ -97,18 +121,24 @@ Tracepoint('start_tile',
|
||||
'__entry->bin_h', '__entry->yoff', '__entry->bin_w', '__entry->xoff'],
|
||||
)
|
||||
|
||||
Tracepoint('start_draw_ib')
|
||||
Tracepoint('end_draw_ib')
|
||||
Tracepoint('start_draw_ib',
|
||||
tp_perfetto='fd_start_draw_ib')
|
||||
Tracepoint('end_draw_ib',
|
||||
tp_perfetto='fd_end_draw_ib')
|
||||
|
||||
Tracepoint('start_blit',
|
||||
args=[['enum pipe_texture_target', 'src_target'],
|
||||
['enum pipe_texture_target', 'dst_target']],
|
||||
tp_print=['%s -> %s', 'util_str_tex_target(__entry->src_target, true)',
|
||||
'util_str_tex_target(__entry->dst_target, true)'],
|
||||
tp_perfetto='fd_start_blit',
|
||||
)
|
||||
Tracepoint('end_blit')
|
||||
Tracepoint('end_blit',
|
||||
tp_perfetto='fd_end_blit')
|
||||
|
||||
Tracepoint('start_compute')
|
||||
Tracepoint('end_compute')
|
||||
Tracepoint('start_compute',
|
||||
tp_perfetto='fd_start_compute')
|
||||
Tracepoint('end_compute',
|
||||
tp_perfetto='fd_end_compute')
|
||||
|
||||
utrace_generate(cpath=args.src, hpath=args.hdr)
|
||||
|
||||
@@ -248,6 +248,23 @@ if cpp.has_argument('-Wpacked-bitfield-compat')
|
||||
freedreno_cpp_args += '-Wno-packed-bitfield-compat'
|
||||
endif
|
||||
|
||||
libfreedreno_dependencies = [
|
||||
dep_libdrm,
|
||||
idep_mesautil,
|
||||
idep_nir_headers,
|
||||
idep_libfreedreno_common,
|
||||
idep_u_tracepoints,
|
||||
]
|
||||
|
||||
if with_perfetto
|
||||
libfreedreno_dependencies += dep_perfetto
|
||||
files_libfreedreno += 'freedreno_perfetto.cc'
|
||||
endif
|
||||
|
||||
# The header file ends up part of the build (but just a stub)
|
||||
# in either case:
|
||||
files_libfreedreno += 'freedreno_perfetto.h'
|
||||
|
||||
libfreedreno = static_library(
|
||||
'freedreno',
|
||||
[files_libfreedreno, freedreno_xml_header_files],
|
||||
@@ -255,13 +272,7 @@ libfreedreno = static_library(
|
||||
c_args : [freedreno_c_args],
|
||||
cpp_args : [freedreno_cpp_args],
|
||||
gnu_symbol_visibility : 'hidden',
|
||||
dependencies : [
|
||||
dep_libdrm,
|
||||
idep_mesautil,
|
||||
idep_nir_headers,
|
||||
idep_libfreedreno_common,
|
||||
idep_u_tracepoints,
|
||||
],
|
||||
dependencies : libfreedreno_dependencies,
|
||||
)
|
||||
|
||||
driver_freedreno = declare_dependency(
|
||||
|
||||
Reference in New Issue
Block a user