radeonsi: print saved CS to the log context

Use the auto logger facility, so that CS chunks will be interleaved
with other log info.

v2:
- fix some crashes when not using CE
- fix skipping "previous" chunks of current (unflushed) IB
- fix error handling in si_begin_cs_debug

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
Nicolai Hähnle
2017-08-04 18:24:33 +02:00
committed by Nicolai Hähnle
parent bc93339799
commit 4c3f36ec6b
5 changed files with 262 additions and 87 deletions

View File

@@ -34,6 +34,9 @@
#include "util/u_memory.h"
#include "ac_debug.h"
static void si_dump_bo_list(struct si_context *sctx,
const struct radeon_saved_cs *saved, FILE *f);
DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
static void si_dump_shader(struct si_screen *sscreen,
@@ -266,49 +269,181 @@ static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
fprintf(f, "\n");
}
static void si_dump_last_ib(struct si_context *sctx, FILE *f)
struct si_log_chunk_cs {
struct si_context *ctx;
struct si_saved_cs *cs;
bool dump_bo_list;
unsigned gfx_begin, gfx_end;
unsigned ce_begin, ce_end;
};
static void si_log_chunk_type_cs_destroy(void *data)
{
struct si_log_chunk_cs *chunk = data;
si_saved_cs_reference(&chunk->cs, NULL);
free(chunk);
}
static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
unsigned begin, unsigned end,
unsigned last_trace_id, const char *name,
enum chip_class chip_class)
{
unsigned orig_end = end;
assert(begin <= end);
fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
name, begin);
for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
if (begin < chunk->cdw) {
ac_parse_ib_chunk(f, chunk->buf + begin,
MIN2(end, chunk->cdw) - begin,
last_trace_id, chip_class, NULL, NULL);
}
if (end <= chunk->cdw)
return;
if (begin < chunk->cdw)
fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
name);
begin -= MIN2(begin, chunk->cdw);
end -= chunk->cdw;
}
assert(end <= cs->current.cdw);
ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
chip_class, NULL, NULL);
fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
name, orig_end);
}
static void si_log_chunk_type_cs_print(void *data, FILE *f)
{
struct si_log_chunk_cs *chunk = data;
struct si_context *ctx = chunk->ctx;
struct si_saved_cs *scs = chunk->cs;
int last_trace_id = -1;
int last_ce_trace_id = -1;
if (!sctx->last_gfx.ib)
return;
/* We are expecting that the ddebug pipe has already
* waited for the context, so this buffer should be idle.
* If the GPU is hung, there is no point in waiting for it.
*/
uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
NULL,
PIPE_TRANSFER_UNSYNCHRONIZED |
PIPE_TRANSFER_READ);
if (map) {
last_trace_id = map[0];
last_ce_trace_id = map[1];
}
if (sctx->last_trace_buf) {
/* We are expecting that the ddebug pipe has already
* waited for the context, so this buffer should be idle.
* If the GPU is hung, there is no point in waiting for it.
*/
uint32_t *map = sctx->b.ws->buffer_map(sctx->last_trace_buf->buf,
NULL,
PIPE_TRANSFER_UNSYNCHRONIZED |
PIPE_TRANSFER_READ);
if (map) {
last_trace_id = map[0];
last_ce_trace_id = map[1];
if (chunk->gfx_end != chunk->gfx_begin) {
if (chunk->gfx_begin == 0) {
if (ctx->init_config)
ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
-1, "IB2: Init config", ctx->b.chip_class,
NULL, NULL);
if (ctx->init_config_gs_rings)
ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
ctx->init_config_gs_rings->ndw,
-1, "IB2: Init GS rings", ctx->b.chip_class,
NULL, NULL);
}
if (scs->flushed) {
ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
chunk->gfx_end - chunk->gfx_begin,
last_trace_id, "IB", ctx->b.chip_class,
NULL, NULL);
} else {
si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
chunk->gfx_end, last_trace_id, "IB",
ctx->b.chip_class);
}
}
if (sctx->init_config)
ac_parse_ib(f, sctx->init_config->pm4, sctx->init_config->ndw,
-1, "IB2: Init config", sctx->b.chip_class,
NULL, NULL);
if (chunk->ce_end != chunk->ce_begin) {
assert(ctx->ce_ib);
if (sctx->init_config_gs_rings)
ac_parse_ib(f, sctx->init_config_gs_rings->pm4,
sctx->init_config_gs_rings->ndw,
-1, "IB2: Init GS rings", sctx->b.chip_class,
NULL, NULL);
ac_parse_ib(f, sctx->last_gfx.ib, sctx->last_gfx.num_dw,
last_trace_id, "IB", sctx->b.chip_class,
NULL, NULL);
if (sctx->last_ce.ib) {
ac_parse_ib(f, sctx->last_ce.ib, sctx->last_ce.num_dw,
last_ce_trace_id, "CE IB", sctx->b.chip_class,
NULL, NULL);
if (scs->flushed) {
ac_parse_ib(f, scs->ce.ib + chunk->ce_begin,
chunk->ce_end - chunk->ce_begin,
last_ce_trace_id, "CE IB", ctx->b.chip_class,
NULL, NULL);
} else {
si_parse_current_ib(f, ctx->ce_ib, chunk->ce_begin,
chunk->ce_end, last_ce_trace_id, "CE IB",
ctx->b.chip_class);
}
}
if (chunk->dump_bo_list) {
fprintf(f, "Flushing.\n\n");
si_dump_bo_list(ctx, &scs->gfx, f);
}
}
static const struct u_log_chunk_type si_log_chunk_type_cs = {
.destroy = si_log_chunk_type_cs_destroy,
.print = si_log_chunk_type_cs_print,
};
static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
bool dump_bo_list)
{
assert(ctx->current_saved_cs);
struct si_saved_cs *scs = ctx->current_saved_cs;
unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
unsigned ce_cur = 0;
if (ctx->ce_ib)
ce_cur = ctx->ce_ib->prev_dw + ctx->ce_ib->current.cdw;
if (!dump_bo_list &&
gfx_cur == scs->gfx_last_dw &&
ce_cur == scs->ce_last_dw)
return;
struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
chunk->ctx = ctx;
si_saved_cs_reference(&chunk->cs, scs);
chunk->dump_bo_list = dump_bo_list;
chunk->gfx_begin = scs->gfx_last_dw;
chunk->gfx_end = gfx_cur;
scs->gfx_last_dw = gfx_cur;
chunk->ce_begin = scs->ce_last_dw;
chunk->ce_end = ce_cur;
scs->ce_last_dw = ce_cur;
u_log_chunk(log, &si_log_chunk_type_cs, chunk);
}
void si_auto_log_cs(void *data, struct u_log_context *log)
{
struct si_context *ctx = (struct si_context *)data;
si_log_cs(ctx, log, false);
}
void si_log_hw_flush(struct si_context *sctx)
{
if (!sctx->b.log)
return;
si_log_cs(sctx, sctx->b.log, true);
}
static const char *priority_to_string(enum radeon_bo_priority priority)
@@ -920,6 +1055,9 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
{
struct si_context *sctx = (struct si_context*)ctx;
if (sctx->b.log)
u_log_flush(sctx->b.log);
if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
si_dump_debug_registers(sctx, f);
@@ -957,18 +1095,6 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
u_log_new_page_print(&log, f);
u_log_context_destroy(&log);
if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
si_dump_bo_list(sctx, &sctx->last_gfx, f);
si_dump_last_ib(sctx, f);
fprintf(f, "Done.\n");
/* dump only once */
radeon_clear_saved_cs(&sctx->last_gfx);
radeon_clear_saved_cs(&sctx->last_ce);
r600_resource_reference(&sctx->last_trace_buf, NULL);
}
}
static void si_dump_dma(struct si_context *sctx,

View File

@@ -57,6 +57,14 @@ static unsigned si_ce_needed_cs_space(void)
return space;
}
void si_destroy_saved_cs(struct si_saved_cs *scs)
{
radeon_clear_saved_cs(&scs->gfx);
radeon_clear_saved_cs(&scs->ce);
r600_resource_reference(&scs->trace_buf, NULL);
free(scs);
}
/* initialize */
void si_need_cs_space(struct si_context *ctx)
{
@@ -139,17 +147,15 @@ void si_context_gfx_flush(void *context, unsigned flags,
si_emit_cache_flush(ctx);
if (ctx->trace_buf)
if (ctx->current_saved_cs) {
si_trace_emit(ctx);
si_log_hw_flush(ctx);
if (ctx->is_debug) {
/* Save the IB for debug contexts. */
radeon_clear_saved_cs(&ctx->last_gfx);
radeon_save_cs(ws, cs, &ctx->last_gfx, true);
radeon_clear_saved_cs(&ctx->last_ce);
radeon_save_cs(ws, ctx->ce_ib, &ctx->last_ce, false);
r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
r600_resource_reference(&ctx->trace_buf, NULL);
radeon_save_cs(ws, cs, &ctx->current_saved_cs->gfx, true);
if (ctx->ce_ib)
radeon_save_cs(ws, ctx->ce_ib, &ctx->current_saved_cs->ce, false);
ctx->current_saved_cs->flushed = true;
}
/* Flush the CS. */
@@ -165,31 +171,50 @@ void si_context_gfx_flush(void *context, unsigned flags,
*/
ctx->b.ws->fence_wait(ctx->b.ws, ctx->b.last_gfx_fence, 800*1000*1000);
si_check_vm_faults(&ctx->b, &ctx->last_gfx, RING_GFX);
si_check_vm_faults(&ctx->b, &ctx->current_saved_cs->gfx, RING_GFX);
}
if (ctx->current_saved_cs)
si_saved_cs_reference(&ctx->current_saved_cs, NULL);
si_begin_new_cs(ctx);
ctx->gfx_flush_in_progress = false;
}
void si_begin_new_cs(struct si_context *ctx)
static void si_begin_cs_debug(struct si_context *ctx)
{
if (ctx->is_debug) {
static const uint32_t zeros[2];
static const uint32_t zeros[2];
assert(!ctx->current_saved_cs);
/* Create a buffer used for writing trace IDs and initialize it to 0. */
assert(!ctx->trace_buf);
ctx->trace_buf = (struct r600_resource*)
ctx->current_saved_cs = calloc(1, sizeof(*ctx->current_saved_cs));
if (!ctx->current_saved_cs)
return;
pipe_reference_init(&ctx->current_saved_cs->reference, 1);
ctx->current_saved_cs->trace_buf = (struct r600_resource*)
pipe_buffer_create(ctx->b.b.screen, 0,
PIPE_USAGE_STAGING, 8);
if (ctx->trace_buf)
pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
0, sizeof(zeros), zeros);
ctx->trace_id = 0;
if (!ctx->current_saved_cs->trace_buf) {
free(ctx->current_saved_cs);
ctx->current_saved_cs = NULL;
return;
}
if (ctx->trace_buf)
si_trace_emit(ctx);
pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->current_saved_cs->trace_buf->b.b,
0, sizeof(zeros), zeros);
ctx->current_saved_cs->trace_id = 0;
si_trace_emit(ctx);
radeon_add_to_buffer_list(&ctx->b, &ctx->b.gfx, ctx->current_saved_cs->trace_buf,
RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
}
void si_begin_new_cs(struct si_context *ctx)
{
if (ctx->is_debug)
si_begin_cs_debug(ctx);
/* Flush read caches at the beginning of CS not flushed by the kernel. */
if (ctx->b.chip_class >= CIK)

View File

@@ -96,9 +96,7 @@ static void si_destroy_context(struct pipe_context *context)
LLVMDisposeTargetMachine(sctx->tm);
r600_resource_reference(&sctx->trace_buf, NULL);
r600_resource_reference(&sctx->last_trace_buf, NULL);
radeon_clear_saved_cs(&sctx->last_gfx);
si_saved_cs_reference(&sctx->current_saved_cs, NULL);
pb_slabs_deinit(&sctx->bindless_descriptor_slabs);
util_dynarray_fini(&sctx->bindless_descriptors);
@@ -163,6 +161,9 @@ static void si_set_log_context(struct pipe_context *ctx,
{
struct si_context *sctx = (struct si_context *)ctx;
sctx->b.log = log;
if (log)
u_log_add_auto_logger(log, si_auto_log_cs, sctx);
}
static struct pipe_context *si_create_context(struct pipe_screen *screen,

View File

@@ -268,6 +268,19 @@ struct si_image_handle
struct pipe_image_view view;
};
struct si_saved_cs {
struct pipe_reference reference;
struct si_context *ctx;
struct radeon_saved_cs gfx;
struct radeon_saved_cs ce;
struct r600_resource *trace_buf;
unsigned trace_id;
unsigned gfx_last_dw;
unsigned ce_last_dw;
bool flushed;
};
struct si_context {
struct r600_common_context b;
struct blitter_context *blitter;
@@ -419,11 +432,7 @@ struct si_context {
/* Debug state. */
bool is_debug;
struct radeon_saved_cs last_gfx;
struct radeon_saved_cs last_ce;
struct r600_resource *last_trace_buf;
struct r600_resource *trace_buf;
unsigned trace_id;
struct si_saved_cs *current_saved_cs;
uint64_t dmesg_timestamp;
unsigned apitrace_call_number;
@@ -497,6 +506,8 @@ void cik_emit_prefetch_L2(struct si_context *sctx);
void si_init_cp_dma_functions(struct si_context *sctx);
/* si_debug.c */
void si_auto_log_cs(void *data, struct u_log_context *log);
void si_log_hw_flush(struct si_context *sctx);
void si_init_debug_functions(struct si_context *sctx);
void si_check_vm_faults(struct r600_common_context *ctx,
struct radeon_saved_cs *saved, enum ring_type ring);
@@ -506,6 +517,7 @@ bool si_replace_shader(unsigned num, struct ac_shader_binary *binary);
void si_init_dma_functions(struct si_context *sctx);
/* si_hw_context.c */
void si_destroy_saved_cs(struct si_saved_cs *scs);
void si_context_gfx_flush(void *context, unsigned flags,
struct pipe_fence_handle **fence);
void si_begin_new_cs(struct si_context *ctx);
@@ -603,4 +615,13 @@ si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size)
return MIN2(alignment, tcc_cache_line_size);
}
static inline void
si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)
{
if (pipe_reference(&(*dst)->reference, &src->reference))
si_destroy_saved_cs(*dst);
*dst = src;
}
#endif

View File

@@ -30,6 +30,7 @@
#include "gfx9d.h"
#include "util/u_index_modify.h"
#include "util/u_log.h"
#include "util/u_upload_mgr.h"
#include "util/u_prim.h"
@@ -1434,7 +1435,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
si_ce_post_draw_synchronization(sctx);
if (sctx->trace_buf)
if (unlikely(sctx->current_saved_cs))
si_trace_emit(sctx);
/* Workaround for a VGT hang when streamout is enabled.
@@ -1464,20 +1465,18 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
void si_trace_emit(struct si_context *sctx)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
sctx->trace_id++;
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf,
RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE);
uint64_t va = sctx->current_saved_cs->trace_buf->gpu_address;
uint32_t trace_id = ++sctx->current_saved_cs->trace_id;
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_ME));
radeon_emit(cs, sctx->trace_buf->gpu_address);
radeon_emit(cs, sctx->trace_buf->gpu_address >> 32);
radeon_emit(cs, sctx->trace_id);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, trace_id);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, AC_ENCODE_TRACE_POINT(sctx->trace_id));
radeon_emit(cs, AC_ENCODE_TRACE_POINT(trace_id));
if (sctx->ce_ib) {
struct radeon_winsys_cs *ce = sctx->ce_ib;
@@ -1486,10 +1485,13 @@ void si_trace_emit(struct si_context *sctx)
radeon_emit(ce, S_370_DST_SEL(V_370_MEM_ASYNC) |
S_370_WR_CONFIRM(1) |
S_370_ENGINE_SEL(V_370_CE));
radeon_emit(ce, sctx->trace_buf->gpu_address + 4);
radeon_emit(ce, (sctx->trace_buf->gpu_address + 4) >> 32);
radeon_emit(ce, sctx->trace_id);
radeon_emit(ce, va + 4);
radeon_emit(ce, (va + 4) >> 32);
radeon_emit(ce, trace_id);
radeon_emit(ce, PKT3(PKT3_NOP, 0, 0));
radeon_emit(ce, AC_ENCODE_TRACE_POINT(sctx->trace_id));
radeon_emit(ce, AC_ENCODE_TRACE_POINT(trace_id));
}
if (sctx->b.log)
u_log_flush(sctx->b.log);
}