diff --git a/src/amd/common/ac_spm.c b/src/amd/common/ac_spm.c index b8546bef630..ef8b59740e7 100644 --- a/src/amd/common/ac_spm.c +++ b/src/amd/common/ac_spm.c @@ -29,28 +29,27 @@ #include "ac_perfcounter.h" static struct ac_spm_block_select * -ac_spm_get_block_select(struct ac_spm_trace_data *spm_trace, - const struct ac_pc_block *block) +ac_spm_get_block_select(struct ac_spm *spm, const struct ac_pc_block *block) { struct ac_spm_block_select *block_sel, *new_block_sel; uint32_t num_block_sel; - for (uint32_t i = 0; i < spm_trace->num_block_sel; i++) { - if (spm_trace->block_sel[i].b->b->b->gpu_block == block->b->b->gpu_block) - return &spm_trace->block_sel[i]; + for (uint32_t i = 0; i < spm->num_block_sel; i++) { + if (spm->block_sel[i].b->b->b->gpu_block == block->b->b->gpu_block) + return &spm->block_sel[i]; } /* Allocate a new select block if it doesn't already exist. */ - num_block_sel = spm_trace->num_block_sel + 1; - block_sel = realloc(spm_trace->block_sel, num_block_sel * sizeof(*block_sel)); + num_block_sel = spm->num_block_sel + 1; + block_sel = realloc(spm->block_sel, num_block_sel * sizeof(*block_sel)); if (!block_sel) return NULL; - spm_trace->num_block_sel = num_block_sel; - spm_trace->block_sel = block_sel; + spm->num_block_sel = num_block_sel; + spm->block_sel = block_sel; /* Initialize the new select block. */ - new_block_sel = &spm_trace->block_sel[spm_trace->num_block_sel - 1]; + new_block_sel = &spm->block_sel[spm->num_block_sel - 1]; memset(new_block_sel, 0, sizeof(*new_block_sel)); new_block_sel->b = block; @@ -81,16 +80,15 @@ ac_spm_init_muxsel(const struct ac_pc_block *block, } static bool -ac_spm_map_counter(struct ac_spm_trace_data *spm_trace, - struct ac_spm_block_select *block_sel, +ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel, struct ac_spm_counter_info *counter, uint32_t *spm_wire) { if (block_sel->b->b->b->gpu_block == SQ) { - for (unsigned i = 0; i < ARRAY_SIZE(spm_trace->sq_block_sel); i++) { - struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[i]; + for (unsigned i = 0; i < ARRAY_SIZE(spm->sq_block_sel); i++) { + struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[i]; struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0]; - if (i < spm_trace->num_used_sq_block_sel) + if (i < spm->num_used_sq_block_sel) continue; /* SQ doesn't support 16-bit counters. */ @@ -105,7 +103,7 @@ ac_spm_map_counter(struct ac_spm_trace_data *spm_trace, /* One wire per SQ module. */ *spm_wire = i; - spm_trace->num_used_sq_block_sel++; + spm->num_used_sq_block_sel++; return true; } } else { @@ -154,7 +152,7 @@ ac_spm_map_counter(struct ac_spm_trace_data *spm_trace, static bool ac_spm_add_counter(const struct ac_perfcounters *pc, - struct ac_spm_trace_data *spm_trace, + struct ac_spm *spm, const struct ac_spm_counter_create_info *info) { struct ac_spm_counter_info *counter; @@ -181,20 +179,20 @@ ac_spm_add_counter(const struct ac_perfcounters *pc, return false; } - counter = &spm_trace->counters[spm_trace->num_counters]; - spm_trace->num_counters++; + counter = &spm->counters[spm->num_counters]; + spm->num_counters++; counter->gpu_block = info->gpu_block; counter->instance = info->instance; counter->event_id = info->event_id; /* Get the select block used to configure the counter. */ - block_sel = ac_spm_get_block_select(spm_trace, block); + block_sel = ac_spm_get_block_select(spm, block); if (!block_sel) return false; /* Map the counter to the select block. */ - if (!ac_spm_map_counter(spm_trace, block_sel, counter, &spm_wire)) { + if (!ac_spm_map_counter(spm, block_sel, counter, &spm_wire)) { fprintf(stderr, "ac/spm: No free slots available!\n"); return false; } @@ -216,14 +214,14 @@ bool ac_init_spm(const struct radeon_info *info, const struct ac_perfcounters *pc, unsigned num_counters, const struct ac_spm_counter_create_info *counters, - struct ac_spm_trace_data *spm_trace) + struct ac_spm *spm) { - spm_trace->counters = CALLOC(num_counters, sizeof(*spm_trace->counters)); - if (!spm_trace->counters) + spm->counters = CALLOC(num_counters, sizeof(*spm->counters)); + if (!spm->counters) return false; for (unsigned i = 0; i < num_counters; i++) { - if (!ac_spm_add_counter(pc, spm_trace, &counters[i])) { + if (!ac_spm_add_counter(pc, spm, &counters[i])) { fprintf(stderr, "ac/spm: Failed to add SPM counter (%d).\n", i); return false; } @@ -239,8 +237,8 @@ bool ac_init_spm(const struct radeon_info *info, } /* Count the number of even/odd counters for this segment. */ - for (unsigned c = 0; c < spm_trace->num_counters; c++) { - struct ac_spm_counter_info *counter = &spm_trace->counters[c]; + for (unsigned c = 0; c < spm->num_counters; c++) { + struct ac_spm_counter_info *counter = &spm->counters[c]; if (counter->segment_type != s) continue; @@ -259,10 +257,10 @@ bool ac_init_spm(const struct radeon_info *info, DIV_ROUND_UP(num_odd_counters, AC_SPM_NUM_COUNTER_PER_MUXSEL); unsigned num_lines = (even_lines > odd_lines) ? (2 * even_lines - 1) : (2 * odd_lines); - spm_trace->muxsel_lines[s] = CALLOC(num_lines, sizeof(*spm_trace->muxsel_lines[s])); - if (!spm_trace->muxsel_lines[s]) + spm->muxsel_lines[s] = CALLOC(num_lines, sizeof(*spm->muxsel_lines[s])); + if (!spm->muxsel_lines[s]) return false; - spm_trace->num_muxsel_lines[s] = num_lines; + spm->num_muxsel_lines[s] = num_lines; } /* RLC uses the following order: Global, SE0, SE1, SE2, SE3. */ @@ -276,12 +274,12 @@ bool ac_init_spm(const struct radeon_info *info, }; for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { - if (!spm_trace->muxsel_lines[s]) + if (!spm->muxsel_lines[s]) continue; uint32_t segment_offset = 0; for (unsigned i = 0; s != ordered_segment[i]; i++) { - segment_offset += spm_trace->num_muxsel_lines[ordered_segment[i]] * + segment_offset += spm->num_muxsel_lines[ordered_segment[i]] * AC_SPM_NUM_COUNTER_PER_MUXSEL; } @@ -298,12 +296,12 @@ bool ac_init_spm(const struct radeon_info *info, }; for (unsigned i = 0; i < 4; i++) { - spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx++] = global_timestamp_muxsel; + spm->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx++] = global_timestamp_muxsel; } } - for (unsigned i = 0; i < spm_trace->num_counters; i++) { - struct ac_spm_counter_info *counter = &spm_trace->counters[i]; + for (unsigned i = 0; i < spm->num_counters; i++) { + struct ac_spm_counter_info *counter = &spm->counters[i]; if (counter->segment_type != s) continue; @@ -312,7 +310,7 @@ bool ac_init_spm(const struct radeon_info *info, counter->offset = segment_offset + even_line_idx * AC_SPM_NUM_COUNTER_PER_MUXSEL + even_counter_idx; - spm_trace->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx] = spm_trace->counters[i].muxsel; + spm->muxsel_lines[s][even_line_idx].muxsel[even_counter_idx] = spm->counters[i].muxsel; if (++even_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) { even_counter_idx = 0; even_line_idx += 2; @@ -321,7 +319,7 @@ bool ac_init_spm(const struct radeon_info *info, counter->offset = segment_offset + odd_line_idx * AC_SPM_NUM_COUNTER_PER_MUXSEL + odd_counter_idx; - spm_trace->muxsel_lines[s][odd_line_idx].muxsel[odd_counter_idx] = spm_trace->counters[i].muxsel; + spm->muxsel_lines[s][odd_line_idx].muxsel[odd_counter_idx] = spm->counters[i].muxsel; if (++odd_counter_idx == AC_SPM_NUM_COUNTER_PER_MUXSEL) { odd_counter_idx = 0; odd_line_idx += 2; @@ -333,30 +331,30 @@ bool ac_init_spm(const struct radeon_info *info, return true; } -void ac_destroy_spm(struct ac_spm_trace_data *spm_trace) +void ac_destroy_spm(struct ac_spm *spm) { for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { - FREE(spm_trace->muxsel_lines[s]); + FREE(spm->muxsel_lines[s]); } - FREE(spm_trace->block_sel); - FREE(spm_trace->counters); + FREE(spm->block_sel); + FREE(spm->counters); } -static uint32_t ac_spm_get_sample_size(const struct ac_spm_trace_data *spm_trace) +static uint32_t ac_spm_get_sample_size(const struct ac_spm *spm) { uint32_t sample_size = 0; /* in bytes */ for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { - sample_size += spm_trace->num_muxsel_lines[s] * AC_SPM_MUXSEL_LINE_SIZE * 4; + sample_size += spm->num_muxsel_lines[s] * AC_SPM_MUXSEL_LINE_SIZE * 4; } return sample_size; } -static uint32_t ac_spm_get_num_samples(const struct ac_spm_trace_data *spm_trace) +static uint32_t ac_spm_get_num_samples(const struct ac_spm *spm) { - uint32_t sample_size = ac_spm_get_sample_size(spm_trace); - uint32_t *ptr = (uint32_t *)spm_trace->ptr; + uint32_t sample_size = ac_spm_get_sample_size(spm); + uint32_t *ptr = (uint32_t *)spm->ptr; uint32_t data_size, num_lines_written; uint32_t num_samples = 0; @@ -376,8 +374,7 @@ static uint32_t ac_spm_get_num_samples(const struct ac_spm_trace_data *spm_trace return num_samples; } -void ac_spm_get_trace(const struct ac_spm_trace_data *spm, - struct ac_spm_trace *trace) +void ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace) { memset(trace, 0, sizeof(*trace)); diff --git a/src/amd/common/ac_spm.h b/src/amd/common/ac_spm.h index 1fc6c71c141..7b6d2b0610e 100644 --- a/src/amd/common/ac_spm.h +++ b/src/amd/common/ac_spm.h @@ -90,7 +90,7 @@ struct ac_spm_block_select { struct ac_spm_counter_select counters[AC_SPM_MAX_COUNTER_PER_BLOCK]; }; -struct ac_spm_trace_data { +struct ac_spm { /* struct radeon_winsys_bo or struct pb_buffer */ void *bo; void *ptr; @@ -125,10 +125,9 @@ bool ac_init_spm(const struct radeon_info *info, const struct ac_perfcounters *pc, unsigned num_counters, const struct ac_spm_counter_create_info *counters, - struct ac_spm_trace_data *spm_trace); -void ac_destroy_spm(struct ac_spm_trace_data *spm_trace); + struct ac_spm *spm); +void ac_destroy_spm(struct ac_spm *spm); -void ac_spm_get_trace(const struct ac_spm_trace_data *spm, - struct ac_spm_trace *trace); +void ac_spm_get_trace(const struct ac_spm *spm, struct ac_spm_trace *trace); #endif diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c index 310f9fb98cc..071314295bb 100644 --- a/src/amd/vulkan/layers/radv_sqtt_layer.c +++ b/src/amd/vulkan/layers/radv_sqtt_layer.c @@ -543,11 +543,11 @@ radv_handle_thread_trace(VkQueue _queue) if (radv_get_thread_trace(queue, &thread_trace)) { struct ac_spm_trace spm_trace; - if (queue->device->spm_trace.bo) - ac_spm_get_trace(&queue->device->spm_trace, &spm_trace); + if (queue->device->spm.bo) + ac_spm_get_trace(&queue->device->spm, &spm_trace); ac_dump_rgp_capture(&queue->device->physical_device->rad_info, &thread_trace, - queue->device->spm_trace.bo ? &spm_trace : NULL); + queue->device->spm.bo ? &spm_trace : NULL); } else { /* Trigger a new capture if the driver failed to get * the trace because the buffer was too small. diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index e8a0d29461f..8fa8165ad55 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1027,7 +1027,7 @@ struct radv_device { struct radv_memory_trace_data memory_trace; /* SPM. */ - struct ac_spm_trace_data spm_trace; + struct ac_spm spm; /* Radeon Raytracing Analyzer trace. */ struct radv_rra_trace_data rra_trace; diff --git a/src/amd/vulkan/radv_spm.c b/src/amd/vulkan/radv_spm.c index 8ef3dafc73b..fc81fbaae45 100644 --- a/src/amd/vulkan/radv_spm.c +++ b/src/amd/vulkan/radv_spm.c @@ -37,24 +37,24 @@ radv_spm_init_bo(struct radv_device *device) uint16_t sample_interval = 4096; /* Default to 4096 clk. */ VkResult result; - device->spm_trace.buffer_size = size; - device->spm_trace.sample_interval = sample_interval; + device->spm.buffer_size = size; + device->spm.sample_interval = sample_interval; struct radeon_winsys_bo *bo = NULL; result = ws->buffer_create( ws, size, 4096, RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, RADV_BO_PRIORITY_SCRATCH, 0, &bo); - device->spm_trace.bo = bo; + device->spm.bo = bo; if (result != VK_SUCCESS) return false; - result = ws->buffer_make_resident(ws, device->spm_trace.bo, true); + result = ws->buffer_make_resident(ws, device->spm.bo, true); if (result != VK_SUCCESS) return false; - device->spm_trace.ptr = ws->buffer_map(device->spm_trace.bo); - if (!device->spm_trace.ptr) + device->spm.ptr = ws->buffer_map(device->spm.bo); + if (!device->spm.ptr) return false; return true; @@ -63,10 +63,10 @@ radv_spm_init_bo(struct radv_device *device) static void radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs) { - struct ac_spm_trace_data *spm_trace = &device->spm_trace; + struct ac_spm *spm = &device->spm; - for (uint32_t b = 0; b < spm_trace->num_used_sq_block_sel; b++) { - struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[b]; + for (uint32_t b = 0; b < spm->num_used_sq_block_sel; b++) { + struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[b]; const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0]; uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; @@ -74,8 +74,8 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ } - for (uint32_t b = 0; b < spm_trace->num_block_sel; b++) { - struct ac_spm_block_select *block_sel = &spm_trace->block_sel[b]; + for (uint32_t b = 0; b < spm->num_block_sel; b++) { + struct ac_spm_block_select *block_sel = &spm->block_sel[b]; struct ac_pc_block_base *regs = block_sel->b->b->b; radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index); @@ -103,19 +103,19 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs) void radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs) { - struct ac_spm_trace_data *spm_trace = &device->spm_trace; - uint64_t va = radv_buffer_get_va(spm_trace->bo); - uint64_t ring_size = spm_trace->buffer_size; + struct ac_spm *spm = &device->spm; + uint64_t va = radv_buffer_get_va(spm->bo); + uint64_t ring_size = spm->buffer_size; /* It's required that the ring VA and the size are correctly aligned. */ assert(!(va & (SPM_RING_BASE_ALIGN - 1))); assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1))); - assert(spm_trace->sample_interval >= 32); + assert(spm->sample_interval >= 32); /* Configure the SPM ring buffer. */ radeon_set_uconfig_reg(cs, R_037200_RLC_SPM_PERFMON_CNTL, S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */ - S_037200_PERFMON_SAMPLE_INTERVAL(spm_trace->sample_interval)); /* in sclk */ + S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */ radeon_set_uconfig_reg(cs, R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va); radeon_set_uconfig_reg(cs, R_037208_RLC_SPM_PERFMON_RING_BASE_HI, S_037208_RING_BASE_HI(va >> 32)); @@ -124,19 +124,19 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs) /* Configure the muxsel. */ uint32_t total_muxsel_lines = 0; for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { - total_muxsel_lines += spm_trace->num_muxsel_lines[s]; + total_muxsel_lines += spm->num_muxsel_lines[s]; } radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0); radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0); radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, - S_03727C_SE0_NUM_LINE(spm_trace->num_muxsel_lines[0]) | - S_03727C_SE1_NUM_LINE(spm_trace->num_muxsel_lines[1]) | - S_03727C_SE2_NUM_LINE(spm_trace->num_muxsel_lines[2]) | - S_03727C_SE3_NUM_LINE(spm_trace->num_muxsel_lines[3])); + S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[0]) | + S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[1]) | + S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[2]) | + S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[3])); radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | - S_037280_GLOBAL_NUM_LINE(spm_trace->num_muxsel_lines[4])); + S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[4])); /* Upload each muxsel ram to the RLC. */ for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { @@ -144,7 +144,7 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs) unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1); - if (!spm_trace->num_muxsel_lines[s]) + if (!spm->num_muxsel_lines[s]) continue; if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) { @@ -161,8 +161,8 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs) radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, grbm_gfx_index); - for (unsigned l = 0; l < spm_trace->num_muxsel_lines[s]; l++) { - uint32_t *data = (uint32_t *)spm_trace->muxsel_lines[s][l].muxsel; + for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) { + uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel; /* Select MUXSEL_ADDR to point to the next muxsel. */ radeon_set_uconfig_reg(cs, rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE); @@ -207,7 +207,7 @@ radv_spm_init(struct radv_device *device) if (!pc->blocks) return false; - if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &device->spm_trace)) + if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &device->spm)) return false; if (!radv_spm_init_bo(device)) @@ -221,10 +221,10 @@ radv_spm_finish(struct radv_device *device) { struct radeon_winsys *ws = device->ws; - if (device->spm_trace.bo) { - ws->buffer_make_resident(ws, device->spm_trace.bo, false); - ws->buffer_destroy(ws, device->spm_trace.bo); + if (device->spm.bo) { + ws->buffer_make_resident(ws, device->spm.bo, false); + ws->buffer_destroy(ws, device->spm.bo); } - ac_destroy_spm(&device->spm_trace); + ac_destroy_spm(&device->spm); } diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 339d5af9630..04356a36617 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -673,7 +673,7 @@ radv_begin_thread_trace(struct radv_queue *queue) radv_perfcounter_emit_spm_reset(cs); - if (device->spm_trace.bo) { + if (device->spm.bo) { /* Enable all shader stages by default. */ radv_perfcounter_emit_shaders(cs, 0x7f); @@ -683,7 +683,7 @@ radv_begin_thread_trace(struct radv_queue *queue) /* Start SQTT. */ radv_emit_thread_trace_start(device, cs, family); - if (device->spm_trace.bo) + if (device->spm.bo) radv_perfcounter_emit_spm_start(device, cs, family); result = ws->cs_finalize(cs); @@ -734,7 +734,7 @@ radv_end_thread_trace(struct radv_queue *queue) /* Make sure to wait-for-idle before stopping SQTT. */ radv_emit_wait_for_idle(device, cs, family); - if (device->spm_trace.bo) + if (device->spm.bo) radv_perfcounter_emit_spm_stop(device, cs, family); /* Stop SQTT. */ diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 4097f7904e2..ab10de1ba6e 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -730,29 +730,29 @@ si_spm_init_bo(struct si_context *sctx) struct radeon_winsys *ws = sctx->ws; uint64_t size = 32 * 1024 * 1024; /* Default to 32MB. */ - sctx->spm_trace.buffer_size = size; - sctx->spm_trace.sample_interval = 4096; /* Default to 4096 clk. */ + sctx->spm.buffer_size = size; + sctx->spm.sample_interval = 4096; /* Default to 4096 clk. */ - sctx->spm_trace.bo = ws->buffer_create( + sctx->spm.bo = ws->buffer_create( ws, size, 4096, RADEON_DOMAIN_VRAM, RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_GTT_WC | RADEON_FLAG_NO_SUBALLOC); - return sctx->spm_trace.bo != NULL; + return sctx->spm.bo != NULL; } static void si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs) { - struct ac_spm_trace_data *spm_trace = &sctx->spm_trace; + struct ac_spm *spm = &sctx->spm; radeon_begin(cs); - for (uint32_t b = 0; b < spm_trace->num_used_sq_block_sel; b++) { - struct ac_spm_block_select *sq_block_sel = &spm_trace->sq_block_sel[b]; + for (uint32_t b = 0; b < spm->num_used_sq_block_sel; b++) { + struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[b]; const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0]; uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; @@ -760,8 +760,8 @@ si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs) radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ } - for (uint32_t b = 0; b < spm_trace->num_block_sel; b++) { - struct ac_spm_block_select *block_sel = &spm_trace->block_sel[b]; + for (uint32_t b = 0; b < spm->num_block_sel; b++) { + struct ac_spm_block_select *block_sel = &spm->block_sel[b]; struct ac_pc_block_base *regs = block_sel->b->b->b; radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index); @@ -793,21 +793,21 @@ si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs) void si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs) { - struct ac_spm_trace_data *spm_trace = &sctx->spm_trace; - uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm_trace->bo); - uint64_t ring_size = spm_trace->buffer_size; + struct ac_spm *spm = &sctx->spm; + uint64_t va = sctx->screen->ws->buffer_get_virtual_address(spm->bo); + uint64_t ring_size = spm->buffer_size; /* It's required that the ring VA and the size are correctly aligned. */ assert(!(va & (SPM_RING_BASE_ALIGN - 1))); assert(!(ring_size & (SPM_RING_BASE_ALIGN - 1))); - assert(spm_trace->sample_interval >= 32); + assert(spm->sample_interval >= 32); radeon_begin(cs); /* Configure the SPM ring buffer. */ radeon_set_uconfig_reg(R_037200_RLC_SPM_PERFMON_CNTL, S_037200_PERFMON_RING_MODE(0) | /* no stall and no interrupt on overflow */ - S_037200_PERFMON_SAMPLE_INTERVAL(spm_trace->sample_interval)); /* in sclk */ + S_037200_PERFMON_SAMPLE_INTERVAL(spm->sample_interval)); /* in sclk */ radeon_set_uconfig_reg(R_037204_RLC_SPM_PERFMON_RING_BASE_LO, va); radeon_set_uconfig_reg(R_037208_RLC_SPM_PERFMON_RING_BASE_HI, S_037208_RING_BASE_HI(va >> 32)); @@ -816,19 +816,19 @@ si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs) /* Configure the muxsel. */ uint32_t total_muxsel_lines = 0; for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { - total_muxsel_lines += spm_trace->num_muxsel_lines[s]; + total_muxsel_lines += spm->num_muxsel_lines[s]; } radeon_set_uconfig_reg(R_03726C_RLC_SPM_ACCUM_MODE, 0); radeon_set_uconfig_reg(R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0); radeon_set_uconfig_reg(R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE, - S_03727C_SE0_NUM_LINE(spm_trace->num_muxsel_lines[0]) | - S_03727C_SE1_NUM_LINE(spm_trace->num_muxsel_lines[1]) | - S_03727C_SE2_NUM_LINE(spm_trace->num_muxsel_lines[2]) | - S_03727C_SE3_NUM_LINE(spm_trace->num_muxsel_lines[3])); + S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[0]) | + S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[1]) | + S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[2]) | + S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[3])); radeon_set_uconfig_reg(R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE, S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) | - S_037280_GLOBAL_NUM_LINE(spm_trace->num_muxsel_lines[4])); + S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[4])); /* Upload each muxsel ram to the RLC. */ for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) { @@ -836,7 +836,7 @@ si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs) unsigned grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1); - if (!spm_trace->num_muxsel_lines[s]) + if (!spm->num_muxsel_lines[s]) continue; if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) { @@ -853,8 +853,8 @@ si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs) radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, grbm_gfx_index); - for (unsigned l = 0; l < spm_trace->num_muxsel_lines[s]; l++) { - uint32_t *data = (uint32_t *)spm_trace->muxsel_lines[s][l].muxsel; + for (unsigned l = 0; l < spm->num_muxsel_lines[s]; l++) { + uint32_t *data = (uint32_t *)spm->muxsel_lines[s][l].muxsel; /* Select MUXSEL_ADDR to point to the next muxsel. */ radeon_set_uconfig_reg(rlc_muxsel_addr, l * AC_SPM_MUXSEL_LINE_SIZE); @@ -914,7 +914,7 @@ si_spm_init(struct si_context *sctx) if (!ac_init_perfcounters(info, false, false, pc)) return false; - if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &sctx->spm_trace)) + if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &sctx->spm)) return false; if (!si_spm_init_bo(sctx)) @@ -926,8 +926,8 @@ si_spm_init(struct si_context *sctx) void si_spm_finish(struct si_context *sctx) { - struct pb_buffer *bo = sctx->spm_trace.bo; + struct pb_buffer *bo = sctx->spm.bo; radeon_bo_reference(sctx->screen->ws, &bo, NULL); - ac_destroy_spm(&sctx->spm_trace); + ac_destroy_spm(&sctx->spm); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e127e0ac0c9..c9ff07247b8 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1319,7 +1319,7 @@ struct si_context { /* SQTT */ struct ac_thread_trace_data *thread_trace; - struct ac_spm_trace_data spm_trace; + struct ac_spm spm; struct pipe_fence_handle *last_sqtt_fence; enum rgp_sqtt_marker_event_type sqtt_next_event; bool thread_trace_enabled; diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index 1b8228ad2cf..31cc68508f1 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -481,9 +481,9 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf sctx->thread_trace->bo, RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - if (sctx->spm_trace.bo) + if (sctx->spm.bo) ws->cs_add_buffer(cs, - sctx->spm_trace.bo, + sctx->spm.bo, RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); @@ -501,7 +501,7 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf /* Enable SQG events that collects thread trace data. */ si_emit_spi_config_cntl(sctx, cs, true); - if (sctx->spm_trace.bo) { + if (sctx->spm.bo) { si_pc_emit_spm_reset(cs); si_pc_emit_shaders(cs, 0x7f); si_emit_spm_setup(sctx, cs); @@ -509,7 +509,7 @@ si_thread_trace_start(struct si_context *sctx, int family, struct radeon_cmdbuf si_emit_thread_trace_start(sctx, cs, family); - if (sctx->spm_trace.bo) + if (sctx->spm.bo) si_pc_emit_spm_start(cs); } @@ -538,15 +538,15 @@ si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf * RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); - if (sctx->spm_trace.bo) + if (sctx->spm.bo) ws->cs_add_buffer(cs, - sctx->spm_trace.bo, + sctx->spm.bo, RADEON_USAGE_READWRITE, RADEON_DOMAIN_VRAM); si_cp_dma_wait_for_idle(sctx, cs); - if (sctx->spm_trace.bo) + if (sctx->spm.bo) si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters, sctx->screen->info.never_send_perfcounter_stop); @@ -559,7 +559,7 @@ si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf * si_emit_thread_trace_stop(sctx, cs, family); - if (sctx->spm_trace.bo) + if (sctx->spm.bo) si_pc_emit_spm_reset(cs); /* Restore previous state by disabling SQG events. */ @@ -797,7 +797,7 @@ si_destroy_thread_trace(struct si_context *sctx) free(sctx->thread_trace); sctx->thread_trace = NULL; - if (sctx->spm_trace.bo) + if (sctx->spm.bo) si_spm_finish(sctx); } @@ -852,16 +852,16 @@ si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs) struct ac_spm_trace spm_trace; /* Map the SPM counter buffer */ - if (sctx->spm_trace.bo) { - sctx->spm_trace.ptr = sctx->ws->buffer_map(sctx->ws, sctx->spm_trace.bo, + if (sctx->spm.bo) { + sctx->spm.ptr = sctx->ws->buffer_map(sctx->ws, sctx->spm.bo, NULL, PIPE_MAP_READ | RADEON_MAP_TEMPORARY); - ac_spm_get_trace(&sctx->spm_trace, &spm_trace); + ac_spm_get_trace(&sctx->spm, &spm_trace); } - ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, sctx->spm_trace.bo ? &spm_trace : NULL); + ac_dump_rgp_capture(&sctx->screen->info, &thread_trace, sctx->spm.bo ? &spm_trace : NULL); - if (sctx->spm_trace.ptr) - sctx->ws->buffer_unmap(sctx->ws, sctx->spm_trace.bo); + if (sctx->spm.ptr) + sctx->ws->buffer_unmap(sctx->ws, sctx->spm.bo); } else { fprintf(stderr, "Failed to read the trace\n"); }