ac/spm: add support for new Memory bytes counters in RGP 2.6

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39013>
This commit is contained in:
Samuel Pitoiset
2025-12-17 17:57:14 +01:00
parent 84ecdc534c
commit 3d2bb52a81
2 changed files with 157 additions and 0 deletions

View File

@@ -41,6 +41,22 @@ static struct ac_spm_counter_descr gfx10_cpf_perf_sel_stat_busy =
{AC_SPM_CPF_PERF_SEL_STAT_BUSY, CPF, 0x18};
static struct ac_spm_counter_descr gfx10_sqc_perf_sel_lds_bank_conflict =
{AC_SPM_SQC_PERF_SEL_LDS_BANK_CONFLICT, SQ, 0x11d};
static struct ac_spm_counter_descr gfx10_gl2c_perf_sel_ea_rdreq_32b =
{AC_SPM_GL2C_PERF_SEL_EA_RDREQ_32B, GL2C, 0x59};
static struct ac_spm_counter_descr gfx10_gl2c_perf_sel_ea_rdreq_64b =
{AC_SPM_GL2C_PERF_SEL_EA_RDREQ_64B, GL2C, 0x5a};
static struct ac_spm_counter_descr gfx10_gl2c_perf_sel_ea_rdreq_96b =
{AC_SPM_GL2C_PERF_SEL_EA_RDREQ_96B, GL2C, 0x5b};
static struct ac_spm_counter_descr gfx10_gl2c_perf_sel_ea_rdreq_128b =
{AC_SPM_GL2C_PERF_SEL_EA_RDREQ_128B, GL2C, 0x5c};
static struct ac_spm_counter_descr gfx10_gl2c_perf_sel_ea_wrreq =
{AC_SPM_GL2C_PERF_SEL_EA_WRREQ, GL2C, 0x4b};
static struct ac_spm_counter_descr gfx10_gl2c_perf_sel_ea_wrreq_64b =
{AC_SPM_GL2C_PERF_SEL_EA_WRREQ_64B, GL2C, 0x4c};
static struct ac_spm_counter_descr gfx10_gcea_perf_sel_sarb_dram_sized_requests =
{AC_SPM_GCEA_PERF_SEL_SARB_DRAM_SIZED_REQUESTS, GCEA, 0x37};
static struct ac_spm_counter_descr gfx10_gcea_perf_sel_sarb_io_sized_requests =
{AC_SPM_GCEA_PERF_SEL_SARB_IO_SIZED_REQUESTS, GCEA, 0x39};
static struct ac_spm_counter_create_info gfx10_spm_counters[] = {
{&gfx10_tcp_perf_sel_req},
@@ -57,11 +73,31 @@ static struct ac_spm_counter_create_info gfx10_spm_counters[] = {
{&gfx10_gl2c_perf_sel_miss},
{&gfx10_cpf_perf_sel_stat_busy},
{&gfx10_sqc_perf_sel_lds_bank_conflict},
{&gfx10_gl2c_perf_sel_ea_rdreq_32b},
{&gfx10_gl2c_perf_sel_ea_rdreq_64b},
{&gfx10_gl2c_perf_sel_ea_rdreq_96b},
{&gfx10_gl2c_perf_sel_ea_rdreq_128b},
{&gfx10_gl2c_perf_sel_ea_wrreq},
{&gfx10_gl2c_perf_sel_ea_wrreq_64b},
{&gfx10_gcea_perf_sel_sarb_dram_sized_requests},
{&gfx10_gcea_perf_sel_sarb_io_sized_requests},
};
/* GFX10.3+ */
static struct ac_spm_counter_descr gfx103_gl2c_perf_sel_miss =
{AC_SPM_GL2C_PERF_SEL_MISS, GL2C, 0x2b};
static struct ac_spm_counter_descr gfx103_gl2c_perf_sel_ea_rdreq_32b =
{AC_SPM_GL2C_PERF_SEL_EA_RDREQ_32B, GL2C, 0x63};
static struct ac_spm_counter_descr gfx103_gl2c_perf_sel_ea_rdreq_64b =
{AC_SPM_GL2C_PERF_SEL_EA_RDREQ_64B, GL2C, 0x64};
static struct ac_spm_counter_descr gfx103_gl2c_perf_sel_ea_rdreq_96b =
{AC_SPM_GL2C_PERF_SEL_EA_RDREQ_96B, GL2C, 0x65};
static struct ac_spm_counter_descr gfx103_gl2c_perf_sel_ea_rdreq_128b =
{AC_SPM_GL2C_PERF_SEL_EA_RDREQ_128B, GL2C, 0x66};
static struct ac_spm_counter_descr gfx103_gl2c_perf_sel_ea_wrreq =
{AC_SPM_GL2C_PERF_SEL_EA_WRREQ, GL2C, 0x53};
static struct ac_spm_counter_descr gfx103_gl2c_perf_sel_ea_wrreq_64b =
{AC_SPM_GL2C_PERF_SEL_EA_WRREQ_64B, GL2C, 0x55};
static struct ac_spm_counter_create_info gfx103_spm_counters[] = {
{&gfx10_tcp_perf_sel_req},
@@ -78,6 +114,14 @@ static struct ac_spm_counter_create_info gfx103_spm_counters[] = {
{&gfx103_gl2c_perf_sel_miss},
{&gfx10_cpf_perf_sel_stat_busy},
{&gfx10_sqc_perf_sel_lds_bank_conflict},
{&gfx103_gl2c_perf_sel_ea_rdreq_32b},
{&gfx103_gl2c_perf_sel_ea_rdreq_64b},
{&gfx103_gl2c_perf_sel_ea_rdreq_96b},
{&gfx103_gl2c_perf_sel_ea_rdreq_128b},
{&gfx103_gl2c_perf_sel_ea_wrreq},
{&gfx103_gl2c_perf_sel_ea_wrreq_64b},
{&gfx10_gcea_perf_sel_sarb_dram_sized_requests},
{&gfx10_gcea_perf_sel_sarb_io_sized_requests},
};
/* GFX11+ */
@@ -113,6 +157,14 @@ static struct ac_spm_counter_create_info gfx11_spm_counters[] = {
{&gfx103_gl2c_perf_sel_miss},
{&gfx10_cpf_perf_sel_stat_busy},
{&gfx11_sqc_perf_sel_lds_bank_conflict},
{&gfx103_gl2c_perf_sel_ea_rdreq_32b},
{&gfx103_gl2c_perf_sel_ea_rdreq_64b},
{&gfx103_gl2c_perf_sel_ea_rdreq_96b},
{&gfx103_gl2c_perf_sel_ea_rdreq_128b},
{&gfx103_gl2c_perf_sel_ea_wrreq},
{&gfx103_gl2c_perf_sel_ea_wrreq_64b},
{&gfx10_gcea_perf_sel_sarb_dram_sized_requests},
{&gfx10_gcea_perf_sel_sarb_io_sized_requests},
};
/* GFX12+ */
@@ -965,6 +1017,46 @@ static struct ac_spm_derived_counter_descr gfx10_cs_lds_bank_conflict_counter =
},
};
static struct ac_spm_derived_counter_descr gfx10_fetch_size_counter = {
.id = AC_SPM_COUNTER_FETCH_SIZE,
.group_id = AC_SPM_GROUP_MEMORY_BYTES,
.name = "Fetch size",
.desc = "The total bytes fetched from the video memory. This is measured "
"with all extra fetches and any cache or memory effects taken into "
"account.",
.usage = AC_SPM_USAGE_BYTES,
.num_components = 0,
};
static struct ac_spm_derived_counter_descr gfx10_write_size_counter = {
.id = AC_SPM_COUNTER_WRITE_SIZE,
.group_id = AC_SPM_GROUP_MEMORY_BYTES,
.name = "Write size",
.desc = "The total bytes written to the video memory. This is measured with "
"all extra fetches and any cache or memory effects taken into account.",
.usage = AC_SPM_USAGE_BYTES,
.num_components = 0,
};
static struct ac_spm_derived_counter_descr gfx10_local_vid_mem_bytes_counter = {
.id = AC_SPM_COUNTER_LOCAL_VID_MEM_BYTES,
.group_id = AC_SPM_GROUP_MEMORY_BYTES,
.name = "Local video memory bytes",
.desc = "Number of bytes read from or written to the Infinity Cache (if "
"available) or local video memory",
.usage = AC_SPM_USAGE_BYTES,
.num_components = 0,
};
static struct ac_spm_derived_counter_descr gfx10_pcie_bytes_counter = {
.id = AC_SPM_COUNTER_PCIE_BYTES,
.group_id = AC_SPM_GROUP_MEMORY_BYTES,
.name = "PCIe bytes",
.desc = "Number of bytes sent and received over the PCIe bus",
.usage = AC_SPM_USAGE_BYTES,
.num_components = 0,
};
/* SPM groups. */
static struct ac_spm_derived_group_descr gfx10_cache_group = {
.id = AC_SPM_GROUP_CACHE,
@@ -988,6 +1080,18 @@ static struct ac_spm_derived_group_descr gfx10_lds_group = {
},
};
static struct ac_spm_derived_group_descr gfx10_memory_bytes_group = {
.id = AC_SPM_GROUP_MEMORY_BYTES,
.name = "Memory (bytes)",
.num_counters = 4,
.counters = {
&gfx10_fetch_size_counter,
&gfx10_write_size_counter,
&gfx10_local_vid_mem_bytes_counter,
&gfx10_pcie_bytes_counter,
},
};
static struct ac_spm_derived_counter *
ac_spm_get_counter_by_id(struct ac_spm_derived_trace *spm_derived_trace,
enum ac_spm_counter_id counter_id)
@@ -1062,6 +1166,14 @@ ac_spm_get_raw_counter_op(enum ac_spm_raw_counter_id id)
case AC_SPM_GL2C_PERF_SEL_MISS:
case AC_SPM_CPF_PERF_SEL_STAT_BUSY:
case AC_SPM_SQC_PERF_SEL_LDS_BANK_CONFLICT:
case AC_SPM_GL2C_PERF_SEL_EA_RDREQ_32B:
case AC_SPM_GL2C_PERF_SEL_EA_RDREQ_64B:
case AC_SPM_GL2C_PERF_SEL_EA_RDREQ_96B:
case AC_SPM_GL2C_PERF_SEL_EA_RDREQ_128B:
case AC_SPM_GL2C_PERF_SEL_EA_WRREQ:
case AC_SPM_GL2C_PERF_SEL_EA_WRREQ_64B:
case AC_SPM_GCEA_PERF_SEL_SARB_DRAM_SIZED_REQUESTS:
case AC_SPM_GCEA_PERF_SEL_SARB_IO_SIZED_REQUESTS:
return AC_SPM_RAW_COUNTER_OP_SUM;
default:
UNREACHABLE("Invalid SPM raw counter ID.");
@@ -1083,6 +1195,7 @@ ac_spm_get_derived_trace(const struct radeon_info *info,
/* Add groups to the trace. */
ac_spm_add_group(spm_derived_trace, &gfx10_cache_group);
ac_spm_add_group(spm_derived_trace, &gfx10_lds_group);
ac_spm_add_group(spm_derived_trace, &gfx10_memory_bytes_group);
spm_derived_trace->timestamps = malloc(spm_trace->num_samples * sizeof(uint64_t));
if (!spm_derived_trace->timestamps) {
@@ -1145,6 +1258,10 @@ ac_spm_get_derived_trace(const struct radeon_info *info,
GET_COUNTER(L1_CACHE_HIT);
GET_COUNTER(L2_CACHE_HIT);
GET_COUNTER(CS_LDS_BANK_CONFLICT);
GET_COUNTER(FETCH_SIZE);
GET_COUNTER(WRITE_SIZE);
GET_COUNTER(LOCAL_VID_MEM_BYTES);
GET_COUNTER(PCIE_BYTES);
GET_COMPONENT(INST_CACHE_REQUEST_COUNT);
GET_COMPONENT(INST_CACHE_HIT_COUNT);
@@ -1263,6 +1380,32 @@ ac_spm_get_derived_trace(const struct radeon_info *info,
ADD(GPU_BUSY_CYCLES, gpu_busy_cycles);
ADD(CS_LDS_BANK_CONFLICT_CYCLES, cs_lds_bank_conflict_cycles);
ADD(CS_LDS_BANK_CONFLICT, cs_lds_bank_conflict);
/* Memmory (bytes) group. */
/* Fetch size. */
double fetch_size = OP_RAW(GL2C_PERF_SEL_EA_RDREQ_32B) * 32 +
OP_RAW(GL2C_PERF_SEL_EA_RDREQ_64B) * 64 +
OP_RAW(GL2C_PERF_SEL_EA_RDREQ_96B) * 96 +
OP_RAW(GL2C_PERF_SEL_EA_RDREQ_128B) * 128;
ADD(FETCH_SIZE, fetch_size);
/* Write size. */
const double write_size = (OP_RAW(GL2C_PERF_SEL_EA_WRREQ) * 32 +
OP_RAW(GL2C_PERF_SEL_EA_WRREQ_64B) * 64) -
(OP_RAW(GL2C_PERF_SEL_EA_WRREQ_64B) * 32);
ADD(WRITE_SIZE, write_size);
/* Local video mem bytes. */
const double local_vid_mem_bytes = OP_RAW(GCEA_PERF_SEL_SARB_DRAM_SIZED_REQUESTS) * 32;
ADD(LOCAL_VID_MEM_BYTES, local_vid_mem_bytes);
/* PCIe bytes. */
const double pcie_bytes = OP_RAW(GCEA_PERF_SEL_SARB_IO_SIZED_REQUESTS) * 32;
ADD(PCIE_BYTES, pcie_bytes);
}
#undef ADD

View File

@@ -103,6 +103,14 @@ enum ac_spm_raw_counter_id {
AC_SPM_GL2C_PERF_SEL_MISS,
AC_SPM_CPF_PERF_SEL_STAT_BUSY,
AC_SPM_SQC_PERF_SEL_LDS_BANK_CONFLICT,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_32B,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_64B,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_96B,
AC_SPM_GL2C_PERF_SEL_EA_RDREQ_128B,
AC_SPM_GL2C_PERF_SEL_EA_WRREQ,
AC_SPM_GL2C_PERF_SEL_EA_WRREQ_64B,
AC_SPM_GCEA_PERF_SEL_SARB_DRAM_SIZED_REQUESTS,
AC_SPM_GCEA_PERF_SEL_SARB_IO_SIZED_REQUESTS,
AC_SPM_RAW_COUNTER_ID_COUNT,
};
@@ -223,6 +231,7 @@ struct ac_spm_trace {
enum ac_spm_group_id {
AC_SPM_GROUP_CACHE,
AC_SPM_GROUP_LDS,
AC_SPM_GROUP_MEMORY_BYTES,
AC_SPM_GROUP_COUNT,
};
@@ -233,6 +242,10 @@ enum ac_spm_counter_id {
AC_SPM_COUNTER_L1_CACHE_HIT, /* < GFX12 */
AC_SPM_COUNTER_L2_CACHE_HIT,
AC_SPM_COUNTER_CS_LDS_BANK_CONFLICT,
AC_SPM_COUNTER_FETCH_SIZE,
AC_SPM_COUNTER_WRITE_SIZE,
AC_SPM_COUNTER_LOCAL_VID_MEM_BYTES,
AC_SPM_COUNTER_PCIE_BYTES,
AC_SPM_COUNTER_COUNT,
};
@@ -260,6 +273,7 @@ enum ac_spm_component_id {
enum ac_spm_usage_type {
AC_SPM_USAGE_PERCENTAGE = 1,
AC_SPM_USAGE_CYCLES = 2,
AC_SPM_USAGE_BYTES = 4,
AC_SPM_USAGE_ITEMS = 5,
};