ac/spm: rename ac_spm_trace_data to ac_spm
This is shorter and it's the main struct that controls SPM, while ac_spm_trace contains the generated data only. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22707>
This commit is contained in:
committed by
Marge Bot
parent
0d4fa8e5c6
commit
f2d5b7cd1c
+45
-48
@@ -29,28 +29,27 @@
|
||||
#include "ac_perfcounter.h"
|
||||
|
||||
static struct ac_spm_block_select *
|
||||
ac_spm_get_block_select(struct ac_spm_trace_data *spm_trace,
|
||||
const struct ac_pc_block *block)
|
||||
ac_spm_get_block_select(struct ac_spm *spm, const struct ac_pc_block *block)
|
||||
{
|
||||
struct ac_spm_block_select *block_sel, *new_block_sel;
|
||||
uint32_t num_block_sel;
|
||||
|
||||
for (uint32_t i = 0; i < spm_trace->num_block_sel; i++) {
|
||||
if (spm_trace->block_sel[i].b->b->b->gpu_block == block->b->b->gpu_block)
|
||||
return &spm_trace->block_sel[i];
|
||||
for (uint32_t i = 0; i < spm->num_block_sel; i++) {
|
||||
if (spm->block_sel[i].b->b->b->gpu_block == block->b->b->gpu_block)
|
||||
return &spm->block_sel[i];
|
||||
}
|
||||
|
||||
/* Allocate a new select block if it doesn't already exist. */
|
||||
num_block_sel = spm_trace->num_block_sel + 1;
|
||||
block_sel = realloc(spm_trace->block_sel, num_block_sel * sizeof(*block_sel));
|
||||
num_block_sel = spm->num_block_sel + 1;
|
||||
block_sel = realloc(spm->block_sel, num_block_sel * sizeof(*block_sel));
|
||||
if (!block_sel)
|
||||
return NULL;
|
||||
|
||||
spm_trace->num_block_sel = num_block_sel;
|
||||
spm_trace->block_sel = block_sel;
|
||||
spm->num_block_sel = num_block_sel;
|
||||
spm->block_sel = block_sel;
|
||||
|
||||
/* Initialize the new select block. */
|
||||
new_block_sel = &spm_trace->block_sel[spm_trace->num_block_sel - 1];
|
||||
new_block_sel = &spm->block_sel[spm->num_block_sel - 1];
|
||||
memset(new_block_sel, 0, sizeof(*new_block_sel));
|
||||
|
||||
new_block_sel->b = block;
|
||||
@@ -81,16 +80,15 @@ ac_spm_init_muxsel(const struct ac_pc_block *block,
|
||||
}
|
||||
|
||||
static bool
|
||||
ac_spm_map_counter(struct ac_spm_trace_data *spm_trace,
|
||||
struct ac_spm_block_select *block_sel,
|
||||
ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel,
|
||||
struct ac_spm_counter_info *counter,
|
||||
uint32_t *spm_wire)
|
||||
{
|
||||
if (block_sel->b->b->b->gpu_block == SQ) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(spm_trace->sq_block_sel); i++) {
|
||||
struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[i];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(spm->sq_block_sel); i++) {
|
||||
struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[i];
|
||||
struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
|
||||
if (i < spm_trace->num_used_sq_block_sel)
|
||||
if (i < spm->num_used_sq_block_sel)
|
||||
continue;
|
||||
|
||||
/* SQ doesn't support 16-bit counters. */
|
||||
@@ -105,7 +103,7 @@ ac_spm_map_counter(struct ac_spm_trace_data *spm_trace,
|
||||
/* One wire per SQ module. */
|
||||
*spm_wire = i;
|
||||
|
||||
spm_trace->num_used_sq_block_sel++;
|
||||
spm->num_used_sq_block_sel++;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
@@ -154,7 +152,7 @@ ac_spm_map_counter(struct ac_spm_trace_data *spm_trace,
|
||||
|
||||
static bool
|
||||
ac_spm_add_counter(const struct ac_perfcounters *pc,
|
||||
struct ac_spm_trace_data *spm_trace,
|
||||
struct ac_spm *spm,
|
||||
const struct ac_spm_counter_create_info *info)
|
||||
{
|
||||
struct ac_spm_counter_info *counter;
|
||||
@@ -181,20 +179,20 @@ ac_spm_add_counter(const struct ac_perfcounters *pc,
|
||||
return false;
|
||||
}
|
||||
|
||||
counter = &spm_trace->counters[spm_trace->num_counters];
|
||||
spm_trace->num_counters++;
|
||||
counter = &spm->counters[spm->num_counters];
|
||||
spm->num_counters++;
|
||||
|
||||
counter->gpu_block = info->gpu_block;
|
||||
counter->instance = info->instance;
|
||||
counter->event_id = info->event_id;
|
||||
|
||||
/* Get the select block used to configure the counter. */
|
||||
block_sel = ac_spm_get_block_select(spm_trace, block);
|
||||
block_sel = ac_spm_get_block_select(spm, block);
|
||||
if (!block_sel)
|
||||
return false;
|
||||
|
||||
/* Map the counter to the select block. */
|
||||
if (!ac_spm_map_counter(spm_trace, block_sel, counter, &spm_wire)) {
|
||||
if (!ac_spm_map_counter(spm, block_sel, counter, &spm_wire)) {
|
||||
fprintf(stderr, "ac/spm: No free slots available!\n");
|
||||
return false;
|
||||
}
|
||||
@@ -216,14 +214,14 @@ bool ac_init_spm(const struct radeon_info *info,
|
||||
const struct ac_perfcounters *pc,
|
||||
unsigned num_counters,
|
||||
const struct ac_spm_counter_create_info *counters,
|
||||
struct ac_spm_trace_data *spm_trace)
|
||||
struct ac_spm *spm)
|
||||
{
|
||||
spm_trace->counters = CALLOC(num_counters, sizeof(*spm_trace->counters));
|
||||
if (!spm_trace->counters)
|
||||
spm->counters = CALLOC(num_counters, sizeof(*spm->counters));
|
||||
if (!spm->counters)
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < num_counters; i++) {
|
||||
if (!ac_spm_add_counter(pc, spm_trace, &counters[i])) {
|
||||
if (!ac_spm_add_counter(pc, spm, &counters[i])) {
|
||||
fprintf(stderr, "ac/spm: Failed to add SPM counter (%d).\n", i);
|
||||
return false;
|
||||
}
|
||||
@@ -239,8 +237,8 @@ bool ac_init_spm(const struct radeon_info *info,
|
||||
}
|
||||
|
||||
/* Count the number of even/odd counters for this segment. */
|
||||
for (unsigned c = 0; c < spm_trace->num_counters; c++) {
|
||||
struct ac_spm_counter_info *counter = &spm_trace->counters[c];
|
||||
for (unsigned c = 0; c < spm->num_counters; c++) {
|
||||
struct ac_spm_counter_info *counter = &spm->counters[c];
|
||||
|
||||
if (counter->segment_type != s)
|
||||
continue;
|
||||
@@ -259,10 +257,10 @@ bool ac_init_spm(const struct radeon_info *info,
|
||||
DIV_ROUND_UP(num_odd_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL);
|
||||
unsigned num_lines = (even_lines > odd_lines) ? (2 * even_lines - 1) : (2 * odd_lines);
|
||||
|
||||
spm_trace->muxsel_lines[s] = CALLOC(num_lines, sizeof(*spm_trace->muxsel_lines[s]));
|
||||
if (!spm_trace->muxsel_lines[s])
|
||||
spm->muxsel_lines[s] = CALLOC(num_lines, sizeof(*spm->muxsel_lines[s]));
|
||||
if (!spm->muxsel_lines[s])
|
||||
return false;
|
||||
spm_trace->num_muxsel_lines[s] = num_lines;
|
||||
spm->num_muxsel_lines[s] = num_lines;
|
||||
}
|
||||
|
||||
/* RLC uses the following order: Global, SE0, SE1, SE2, SE3. */
|
||||
@@ -276,12 +274,12 @@ bool ac_init_spm(const struct radeon_info *info,
|
||||
};
|
||||
|
||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
||||
if (!spm_trace->muxsel_lines[s])
|
||||
if (!spm->muxsel_lines[s])
|
||||
continue;
|
||||
|
||||
uint32_t segment_offset = 0;
|
||||
for (unsigned i = 0; s != ordered_segment[i]; i++) {
|
||||
segment_offset += spm_trace->num_muxsel_lines[ordered_segment[i]] *
|
||||
segment_offset += spm->num_muxsel_lines[ordered_segment[i]] *
|
||||
AC_SPM_NUM_COUNTER_PER_MUXSEL;
|
||||
}
|
||||
|
||||
@@ -298,12 +296,12 @@ bool ac_init_spm(const struct radeon_info *info,
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx++] = global_timestamp_muxsel;
|
||||
spm->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx++] = global_timestamp_muxsel;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < spm_trace->num_counters; i++) {
|
||||
struct ac_spm_counter_info *counter = &spm_trace->counters[i];
|
||||
for (unsigned i = 0; i < spm->num_counters; i++) {
|
||||
struct ac_spm_counter_info *counter = &spm->counters[i];
|
||||
|
||||
if (counter->segment_type != s)
|
||||
continue;
|
||||
@@ -312,7 +310,7 @@ bool ac_init_spm(const struct radeon_info *info,
|
||||
counter->offset = segment_offset + even_line_idx *
|
||||
AC_SPM_NUM_COUNTER_PER_MUXSEL + even_counter_idx;
|
||||
|
||||
spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx] = spm_trace->counters[i].muxsel;
|
||||
spm->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx] = spm->counters[i].muxsel;
|
||||
if (++even_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
|
||||
even_counter_idx = 0;
|
||||
even_line_idx += 2;
|
||||
@@ -321,7 +319,7 @@ bool ac_init_spm(const struct radeon_info *info,
|
||||
counter->offset = segment_offset + odd_line_idx *
|
||||
AC_SPM_NUM_COUNTER_PER_MUXSEL + odd_counter_idx;
|
||||
|
||||
spm_trace->muxsel_lines[s][odd_line_idx].muxsel[odd_counter_idx] = spm_trace->counters[i].muxsel;
|
||||
spm->muxsel_lines[s][odd_line_idx].muxsel[odd_counter_idx] = spm->counters[i].muxsel;
|
||||
if (++odd_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) {
|
||||
odd_counter_idx = 0;
|
||||
odd_line_idx += 2;
|
||||
@@ -333,30 +331,30 @@ bool ac_init_spm(const struct radeon_info *info,
|
||||
return true;
|
||||
}
|
||||
|
||||
void ac_destroy_spm(struct ac_spm_trace_data *spm_trace)
|
||||
void ac_destroy_spm(struct ac_spm *spm)
|
||||
{
|
||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
||||
FREE(spm_trace->muxsel_lines[s]);
|
||||
FREE(spm->muxsel_lines[s]);
|
||||
}
|
||||
FREE(spm_trace->block_sel);
|
||||
FREE(spm_trace->counters);
|
||||
FREE(spm->block_sel);
|
||||
FREE(spm->counters);
|
||||
}
|
||||
|
||||
static uint32_t ac_spm_get_sample_size(const struct ac_spm_trace_data *spm_trace)
|
||||
static uint32_t ac_spm_get_sample_size(const struct ac_spm *spm)
|
||||
{
|
||||
uint32_t sample_size = 0; /* in bytes */
|
||||
|
||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
||||
sample_size += spm_trace->num_muxsel_lines[s] * AC_SPM_MUXSEL_LINE_SIZE * 4;
|
||||
sample_size += spm->num_muxsel_lines[s] * AC_SPM_MUXSEL_LINE_SIZE * 4;
|
||||
}
|
||||
|
||||
return sample_size;
|
||||
}
|
||||
|
||||
static uint32_t ac_spm_get_num_samples(const struct ac_spm_trace_data *spm_trace)
|
||||
static uint32_t ac_spm_get_num_samples(const struct ac_spm *spm)
|
||||
{
|
||||
uint32_t sample_size = ac_spm_get_sample_size(spm_trace);
|
||||
uint32_t *ptr = (uint32_t *)spm_trace->ptr;
|
||||
uint32_t sample_size = ac_spm_get_sample_size(spm);
|
||||
uint32_t *ptr = (uint32_t *)spm->ptr;
|
||||
uint32_t data_size, num_lines_written;
|
||||
uint32_t num_samples = 0;
|
||||
|
||||
@@ -376,8 +374,7 @@ static uint32_t ac_spm_get_num_samples(const struct ac_spm_trace_data *spm_trace
|
||||
return num_samples;
|
||||
}
|
||||
|
||||
void ac_spm_get_trace(const struct ac_spm_trace_data *spm,
|
||||
struct ac_spm_trace *trace)
|
||||
void ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace)
|
||||
{
|
||||
memset(trace, 0, sizeof(*trace));
|
||||
|
||||
|
||||
@@ -90,7 +90,7 @@ struct ac_spm_block_select {
|
||||
struct ac_spm_counter_select counters[AC_SPM_MAX_COUNTER_PER_BLOCK];
|
||||
};
|
||||
|
||||
struct ac_spm_trace_data {
|
||||
struct ac_spm {
|
||||
/* struct radeon_winsys_bo or struct pb_buffer */
|
||||
void *bo;
|
||||
void *ptr;
|
||||
@@ -125,10 +125,9 @@ bool ac_init_spm(const struct radeon_info *info,
|
||||
const struct ac_perfcounters *pc,
|
||||
unsigned num_counters,
|
||||
const struct ac_spm_counter_create_info *counters,
|
||||
struct ac_spm_trace_data *spm_trace);
|
||||
void ac_destroy_spm(struct ac_spm_trace_data *spm_trace);
|
||||
struct ac_spm *spm);
|
||||
void ac_destroy_spm(struct ac_spm *spm);
|
||||
|
||||
void ac_spm_get_trace(const struct ac_spm_trace_data *spm,
|
||||
struct ac_spm_trace *trace);
|
||||
void ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -543,11 +543,11 @@ radv_handle_thread_trace(VkQueue _queue)
|
||||
if (radv_get_thread_trace(queue, &thread_trace)) {
|
||||
struct ac_spm_trace spm_trace;
|
||||
|
||||
if (queue->device->spm_trace.bo)
|
||||
ac_spm_get_trace(&queue->device->spm_trace, &spm_trace);
|
||||
if (queue->device->spm.bo)
|
||||
ac_spm_get_trace(&queue->device->spm, &spm_trace);
|
||||
|
||||
ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace,
|
||||
queue->device->spm_trace.bo ? &spm_trace : NULL);
|
||||
queue->device->spm.bo ? &spm_trace : NULL);
|
||||
} else {
|
||||
/* Trigger a new capture if the driver failed to get
|
||||
* the trace because the buffer was too small.
|
||||
|
||||
@@ -1027,7 +1027,7 @@ struct radv_device {
|
||||
struct radv_memory_trace_data memory_trace;
|
||||
|
||||
/* SPM. */
|
||||
struct ac_spm_trace_data spm_trace;
|
||||
struct ac_spm spm;
|
||||
|
||||
/* Radeon Raytracing Analyzer trace. */
|
||||
struct radv_rra_trace_data rra_trace;
|
||||
|
||||
+30
-30
@@ -37,24 +37,24 @@ radv_spm_init_bo(struct radv_device *device)
|
||||
uint16_t sample_interval = 4096; /* Default to 4096 clk. */
|
||||
VkResult result;
|
||||
|
||||
device->spm_trace.buffer_size = size;
|
||||
device->spm_trace.sample_interval = sample_interval;
|
||||
device->spm.buffer_size = size;
|
||||
device->spm.sample_interval = sample_interval;
|
||||
|
||||
struct radeon_winsys_bo *bo = NULL;
|
||||
result = ws->buffer_create(
|
||||
ws, size, 4096, RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
|
||||
RADV_BO_PRIORITY_SCRATCH, 0, &bo);
|
||||
device->spm_trace.bo = bo;
|
||||
device->spm.bo = bo;
|
||||
if (result != VK_SUCCESS)
|
||||
return false;
|
||||
|
||||
result = ws->buffer_make_resident(ws, device->spm_trace.bo, true);
|
||||
result = ws->buffer_make_resident(ws, device->spm.bo, true);
|
||||
if (result != VK_SUCCESS)
|
||||
return false;
|
||||
|
||||
device->spm_trace.ptr = ws->buffer_map(device->spm_trace.bo);
|
||||
if (!device->spm_trace.ptr)
|
||||
device->spm.ptr = ws->buffer_map(device->spm.bo);
|
||||
if (!device->spm.ptr)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@@ -63,10 +63,10 @@ radv_spm_init_bo(struct radv_device *device)
|
||||
static void
|
||||
radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
{
|
||||
struct ac_spm_trace_data *spm_trace = &device->spm_trace;
|
||||
struct ac_spm *spm = &device->spm;
|
||||
|
||||
for (uint32_t b = 0; b < spm_trace->num_used_sq_block_sel; b++) {
|
||||
struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[b];
|
||||
for (uint32_t b = 0; b < spm->num_used_sq_block_sel; b++) {
|
||||
struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[b];
|
||||
const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
|
||||
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
|
||||
|
||||
@@ -74,8 +74,8 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
|
||||
}
|
||||
|
||||
for (uint32_t b = 0; b < spm_trace->num_block_sel; b++) {
|
||||
struct ac_spm_block_select *block_sel = &spm_trace->block_sel[b];
|
||||
for (uint32_t b = 0; b < spm->num_block_sel; b++) {
|
||||
struct ac_spm_block_select *block_sel = &spm->block_sel[b];
|
||||
struct ac_pc_block_base *regs = block_sel->b->b->b;
|
||||
|
||||
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index);
|
||||
@@ -103,19 +103,19 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
void
|
||||
radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
{
|
||||
struct ac_spm_trace_data *spm_trace = &device->spm_trace;
|
||||
uint64_t va = radv_buffer_get_va(spm_trace->bo);
|
||||
uint64_t ring_size = spm_trace->buffer_size;
|
||||
struct ac_spm *spm = &device->spm;
|
||||
uint64_t va = radv_buffer_get_va(spm->bo);
|
||||
uint64_t ring_size = spm->buffer_size;
|
||||
|
||||
/* It's required that the ring VA and the size are correctly aligned. */
|
||||
assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
|
||||
assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
|
||||
assert(spm_trace->sample_interval >= 32);
|
||||
assert(spm->sample_interval >= 32);
|
||||
|
||||
/* Configure the SPM ring buffer. */
|
||||
radeon_set_uconfig_reg(cs, R_037200_RLC_SPM_PERFMON_CNTL,
|
||||
S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
|
||||
S_037200_PERFMON_SAMPLE_INTERVAL(spm_trace->sample_interval)); /* in sclk */
|
||||
S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
|
||||
radeon_set_uconfig_reg(cs, R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
|
||||
radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
|
||||
S_037208_RING_BASE_HI(va >> 32));
|
||||
@@ -124,19 +124,19 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
/* Configure the muxsel. */
|
||||
uint32_t total_muxsel_lines = 0;
|
||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
||||
total_muxsel_lines += spm_trace->num_muxsel_lines[s];
|
||||
total_muxsel_lines += spm->num_muxsel_lines[s];
|
||||
}
|
||||
|
||||
radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0);
|
||||
radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
|
||||
radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
|
||||
S_03727C_SE0_NUM_LINE(spm_trace->num_muxsel_lines[0]) |
|
||||
S_03727C_SE1_NUM_LINE(spm_trace->num_muxsel_lines[1]) |
|
||||
S_03727C_SE2_NUM_LINE(spm_trace->num_muxsel_lines[2]) |
|
||||
S_03727C_SE3_NUM_LINE(spm_trace->num_muxsel_lines[3]));
|
||||
S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[0]) |
|
||||
S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[1]) |
|
||||
S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[2]) |
|
||||
S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[3]));
|
||||
radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
|
||||
S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
|
||||
S_037280_GLOBAL_NUM_LINE(spm_trace->num_muxsel_lines[4]));
|
||||
S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[4]));
|
||||
|
||||
/* Upload each muxsel ram to the RLC. */
|
||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
||||
@@ -144,7 +144,7 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
|
||||
S_030800_INSTANCE_BROADCAST_WRITES(1);
|
||||
|
||||
if (!spm_trace->num_muxsel_lines[s])
|
||||
if (!spm->num_muxsel_lines[s])
|
||||
continue;
|
||||
|
||||
if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
|
||||
@@ -161,8 +161,8 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs)
|
||||
|
||||
radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
|
||||
|
||||
for (unsigned l = 0; l < spm_trace->num_muxsel_lines[s]; l++) {
|
||||
uint32_t *data = (uint32_t *)spm_trace->muxsel_lines[s][l].muxsel;
|
||||
for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
|
||||
uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
|
||||
|
||||
/* Select MUXSEL_ADDR to point to the next muxsel. */
|
||||
radeon_set_uconfig_reg(cs, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
|
||||
@@ -207,7 +207,7 @@ radv_spm_init(struct radv_device *device)
|
||||
if (!pc->blocks)
|
||||
return false;
|
||||
|
||||
if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &device->spm_trace))
|
||||
if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &device->spm))
|
||||
return false;
|
||||
|
||||
if (!radv_spm_init_bo(device))
|
||||
@@ -221,10 +221,10 @@ radv_spm_finish(struct radv_device *device)
|
||||
{
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
|
||||
if (device->spm_trace.bo) {
|
||||
ws->buffer_make_resident(ws, device->spm_trace.bo, false);
|
||||
ws->buffer_destroy(ws, device->spm_trace.bo);
|
||||
if (device->spm.bo) {
|
||||
ws->buffer_make_resident(ws, device->spm.bo, false);
|
||||
ws->buffer_destroy(ws, device->spm.bo);
|
||||
}
|
||||
|
||||
ac_destroy_spm(&device->spm_trace);
|
||||
ac_destroy_spm(&device->spm);
|
||||
}
|
||||
|
||||
@@ -673,7 +673,7 @@ radv_begin_thread_trace(struct radv_queue *queue)
|
||||
|
||||
radv_perfcounter_emit_spm_reset(cs);
|
||||
|
||||
if (device->spm_trace.bo) {
|
||||
if (device->spm.bo) {
|
||||
/* Enable all shader stages by default. */
|
||||
radv_perfcounter_emit_shaders(cs, 0x7f);
|
||||
|
||||
@@ -683,7 +683,7 @@ radv_begin_thread_trace(struct radv_queue *queue)
|
||||
/* Start SQTT. */
|
||||
radv_emit_thread_trace_start(device, cs, family);
|
||||
|
||||
if (device->spm_trace.bo)
|
||||
if (device->spm.bo)
|
||||
radv_perfcounter_emit_spm_start(device, cs, family);
|
||||
|
||||
result = ws->cs_finalize(cs);
|
||||
@@ -734,7 +734,7 @@ radv_end_thread_trace(struct radv_queue *queue)
|
||||
/* Make sure to wait-for-idle before stopping SQTT. */
|
||||
radv_emit_wait_for_idle(device, cs, family);
|
||||
|
||||
if (device->spm_trace.bo)
|
||||
if (device->spm.bo)
|
||||
radv_perfcounter_emit_spm_stop(device, cs, family);
|
||||
|
||||
/* Stop SQTT. */
|
||||
|
||||
@@ -730,29 +730,29 @@ si_spm_init_bo(struct si_context *sctx)
|
||||
struct radeon_winsys *ws = sctx->ws;
|
||||
uint64_t size = 32 * 1024 * 1024; /* Default to 32MB. */
|
||||
|
||||
sctx->spm_trace.buffer_size = size;
|
||||
sctx->spm_trace.sample_interval = 4096; /* Default to 4096 clk. */
|
||||
sctx->spm.buffer_size = size;
|
||||
sctx->spm.sample_interval = 4096; /* Default to 4096 clk. */
|
||||
|
||||
sctx->spm_trace.bo = ws->buffer_create(
|
||||
sctx->spm.bo = ws->buffer_create(
|
||||
ws, size, 4096,
|
||||
RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||
RADEON_FLAG_GTT_WC |
|
||||
RADEON_FLAG_NO_SUBALLOC);
|
||||
|
||||
return sctx->spm_trace.bo != NULL;
|
||||
return sctx->spm.bo != NULL;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||
{
|
||||
struct ac_spm_trace_data *spm_trace = &sctx->spm_trace;
|
||||
struct ac_spm *spm = &sctx->spm;
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
for (uint32_t b = 0; b < spm_trace->num_used_sq_block_sel; b++) {
|
||||
struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[b];
|
||||
for (uint32_t b = 0; b < spm->num_used_sq_block_sel; b++) {
|
||||
struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[b];
|
||||
const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
|
||||
uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
|
||||
|
||||
@@ -760,8 +760,8 @@ si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||
radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
|
||||
}
|
||||
|
||||
for (uint32_t b = 0; b < spm_trace->num_block_sel; b++) {
|
||||
struct ac_spm_block_select *block_sel = &spm_trace->block_sel[b];
|
||||
for (uint32_t b = 0; b < spm->num_block_sel; b++) {
|
||||
struct ac_spm_block_select *block_sel = &spm->block_sel[b];
|
||||
struct ac_pc_block_base *regs = block_sel->b->b->b;
|
||||
|
||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index);
|
||||
@@ -793,21 +793,21 @@ si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||
void
|
||||
si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||
{
|
||||
struct ac_spm_trace_data *spm_trace = &sctx->spm_trace;
|
||||
uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm_trace->bo);
|
||||
uint64_t ring_size = spm_trace->buffer_size;
|
||||
struct ac_spm *spm = &sctx->spm;
|
||||
uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm->bo);
|
||||
uint64_t ring_size = spm->buffer_size;
|
||||
|
||||
/* It's required that the ring VA and the size are correctly aligned. */
|
||||
assert(!(va & (SPM_RING_BASE_ALIGN - 1)));
|
||||
assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1)));
|
||||
assert(spm_trace->sample_interval >= 32);
|
||||
assert(spm->sample_interval >= 32);
|
||||
|
||||
radeon_begin(cs);
|
||||
|
||||
/* Configure the SPM ring buffer. */
|
||||
radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL,
|
||||
S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */
|
||||
S_037200_PERFMON_SAMPLE_INTERVAL(spm_trace->sample_interval)); /* in sclk */
|
||||
S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */
|
||||
radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va);
|
||||
radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI,
|
||||
S_037208_RING_BASE_HI(va >> 32));
|
||||
@@ -816,19 +816,19 @@ si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||
/* Configure the muxsel. */
|
||||
uint32_t total_muxsel_lines = 0;
|
||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
||||
total_muxsel_lines += spm_trace->num_muxsel_lines[s];
|
||||
total_muxsel_lines += spm->num_muxsel_lines[s];
|
||||
}
|
||||
|
||||
radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0);
|
||||
radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
|
||||
radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
|
||||
S_03727C_SE0_NUM_LINE(spm_trace->num_muxsel_lines[0]) |
|
||||
S_03727C_SE1_NUM_LINE(spm_trace->num_muxsel_lines[1]) |
|
||||
S_03727C_SE2_NUM_LINE(spm_trace->num_muxsel_lines[2]) |
|
||||
S_03727C_SE3_NUM_LINE(spm_trace->num_muxsel_lines[3]));
|
||||
S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[0]) |
|
||||
S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[1]) |
|
||||
S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[2]) |
|
||||
S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[3]));
|
||||
radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
|
||||
S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
|
||||
S_037280_GLOBAL_NUM_LINE(spm_trace->num_muxsel_lines[4]));
|
||||
S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[4]));
|
||||
|
||||
/* Upload each muxsel ram to the RLC. */
|
||||
for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
|
||||
@@ -836,7 +836,7 @@ si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||
unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) |
|
||||
S_030800_INSTANCE_BROADCAST_WRITES(1);
|
||||
|
||||
if (!spm_trace->num_muxsel_lines[s])
|
||||
if (!spm->num_muxsel_lines[s])
|
||||
continue;
|
||||
|
||||
if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
|
||||
@@ -853,8 +853,8 @@ si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs)
|
||||
|
||||
radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index);
|
||||
|
||||
for (unsigned l = 0; l < spm_trace->num_muxsel_lines[s]; l++) {
|
||||
uint32_t *data = (uint32_t *)spm_trace->muxsel_lines[s][l].muxsel;
|
||||
for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) {
|
||||
uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel;
|
||||
|
||||
/* Select MUXSEL_ADDR to point to the next muxsel. */
|
||||
radeon_set_uconfig_reg(rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE);
|
||||
@@ -914,7 +914,7 @@ si_spm_init(struct si_context *sctx)
|
||||
if (!ac_init_perfcounters(info, false, false, pc))
|
||||
return false;
|
||||
|
||||
if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &sctx->spm_trace))
|
||||
if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &sctx->spm))
|
||||
return false;
|
||||
|
||||
if (!si_spm_init_bo(sctx))
|
||||
@@ -926,8 +926,8 @@ si_spm_init(struct si_context *sctx)
|
||||
void
|
||||
si_spm_finish(struct si_context *sctx)
|
||||
{
|
||||
struct pb_buffer *bo = sctx->spm_trace.bo;
|
||||
struct pb_buffer *bo = sctx->spm.bo;
|
||||
radeon_bo_reference(sctx->screen->ws, &bo, NULL);
|
||||
|
||||
ac_destroy_spm(&sctx->spm_trace);
|
||||
ac_destroy_spm(&sctx->spm);
|
||||
}
|
||||
|
||||
@@ -1319,7 +1319,7 @@ struct si_context {
|
||||
|
||||
/* SQTT */
|
||||
struct ac_thread_trace_data *thread_trace;
|
||||
struct ac_spm_trace_data spm_trace;
|
||||
struct ac_spm spm;
|
||||
struct pipe_fence_handle *last_sqtt_fence;
|
||||
enum rgp_sqtt_marker_event_type sqtt_next_event;
|
||||
bool thread_trace_enabled;
|
||||
|
||||
@@ -481,9 +481,9 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf
|
||||
sctx->thread_trace->bo,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_DOMAIN_VRAM);
|
||||
if (sctx->spm_trace.bo)
|
||||
if (sctx->spm.bo)
|
||||
ws->cs_add_buffer(cs,
|
||||
sctx->spm_trace.bo,
|
||||
sctx->spm.bo,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_DOMAIN_VRAM);
|
||||
|
||||
@@ -501,7 +501,7 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf
|
||||
/* Enable SQG events that collects thread trace data. */
|
||||
si_emit_spi_config_cntl(sctx, cs, true);
|
||||
|
||||
if (sctx->spm_trace.bo) {
|
||||
if (sctx->spm.bo) {
|
||||
si_pc_emit_spm_reset(cs);
|
||||
si_pc_emit_shaders(cs, 0x7f);
|
||||
si_emit_spm_setup(sctx, cs);
|
||||
@@ -509,7 +509,7 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf
|
||||
|
||||
si_emit_thread_trace_start(sctx, cs, family);
|
||||
|
||||
if (sctx->spm_trace.bo)
|
||||
if (sctx->spm.bo)
|
||||
si_pc_emit_spm_start(cs);
|
||||
}
|
||||
|
||||
@@ -538,15 +538,15 @@ si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_DOMAIN_VRAM);
|
||||
|
||||
if (sctx->spm_trace.bo)
|
||||
if (sctx->spm.bo)
|
||||
ws->cs_add_buffer(cs,
|
||||
sctx->spm_trace.bo,
|
||||
sctx->spm.bo,
|
||||
RADEON_USAGE_READWRITE,
|
||||
RADEON_DOMAIN_VRAM);
|
||||
|
||||
si_cp_dma_wait_for_idle(sctx, cs);
|
||||
|
||||
if (sctx->spm_trace.bo)
|
||||
if (sctx->spm.bo)
|
||||
si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters,
|
||||
sctx->screen->info.never_send_perfcounter_stop);
|
||||
|
||||
@@ -559,7 +559,7 @@ si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf *
|
||||
|
||||
si_emit_thread_trace_stop(sctx, cs, family);
|
||||
|
||||
if (sctx->spm_trace.bo)
|
||||
if (sctx->spm.bo)
|
||||
si_pc_emit_spm_reset(cs);
|
||||
|
||||
/* Restore previous state by disabling SQG events. */
|
||||
@@ -797,7 +797,7 @@ si_destroy_thread_trace(struct si_context *sctx)
|
||||
free(sctx->thread_trace);
|
||||
sctx->thread_trace = NULL;
|
||||
|
||||
if (sctx->spm_trace.bo)
|
||||
if (sctx->spm.bo)
|
||||
si_spm_finish(sctx);
|
||||
}
|
||||
|
||||
@@ -852,16 +852,16 @@ si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs)
|
||||
struct ac_spm_trace spm_trace;
|
||||
|
||||
/* Map the SPM counter buffer */
|
||||
if (sctx->spm_trace.bo) {
|
||||
sctx->spm_trace.ptr = sctx->ws->buffer_map(sctx->ws, sctx->spm_trace.bo,
|
||||
if (sctx->spm.bo) {
|
||||
sctx->spm.ptr = sctx->ws->buffer_map(sctx->ws, sctx->spm.bo,
|
||||
NULL, PIPE_MAP_READ | RADEON_MAP_TEMPORARY);
|
||||
ac_spm_get_trace(&sctx->spm_trace, &spm_trace);
|
||||
ac_spm_get_trace(&sctx->spm, &spm_trace);
|
||||
}
|
||||
|
||||
ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, sctx->spm_trace.bo ? &spm_trace : NULL);
|
||||
ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, sctx->spm.bo ? &spm_trace : NULL);
|
||||
|
||||
if (sctx->spm_trace.ptr)
|
||||
sctx->ws->buffer_unmap(sctx->ws, sctx->spm_trace.bo);
|
||||
if (sctx->spm.ptr)
|
||||
sctx->ws->buffer_unmap(sctx->ws, sctx->spm.bo);
|
||||
} else {
|
||||
fprintf(stderr, "Failed to read the trace\n");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user