intel/perf: Store pointer intel_device_info to in intel_perf_config

This will reduce host memory usage a bit.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29077>
This commit is contained in:
José Roberto de Souza
2024-05-03 10:49:09 -07:00
committed by Marge Bot
parent da45594c5e
commit a9a53c914d
4 changed files with 14 additions and 14 deletions

View File

@@ -233,8 +233,8 @@ hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
hw_vars["$XeCoreTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
hw_vars["$EuDualSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
hw_vars["$EuDualSubslicesSlice0123Count"] = "perf->sys_vars.n_eu_slice0123"
hw_vars["$EuThreadsCount"] = "perf->devinfo.num_thread_per_eu"
hw_vars["$VectorEngineThreadsCount"] = "perf->devinfo.num_thread_per_eu"
hw_vars["$EuThreadsCount"] = "perf->devinfo->num_thread_per_eu"
hw_vars["$VectorEngineThreadsCount"] = "perf->devinfo->num_thread_per_eu"
hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask"
hw_vars["$SliceTotalCount"] = "perf->sys_vars.n_eu_slices"
# subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+
@@ -242,10 +242,10 @@ hw_vars["$SliceTotalCount"] = "perf->sys_vars.n_eu_slices"
hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask"
hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask"
hw_vars["$XeCoreMask"] = "perf->sys_vars.subslice_mask"
hw_vars["$GpuTimestampFrequency"] = "perf->devinfo.timestamp_frequency"
hw_vars["$GpuTimestampFrequency"] = "perf->devinfo->timestamp_frequency"
hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq"
hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq"
hw_vars["$SkuRevisionId"] = "perf->devinfo.revision"
hw_vars["$SkuRevisionId"] = "perf->devinfo->revision"
hw_vars["$QueryMode"] = "perf->sys_vars.query_mode"
def resolve_variable(name, set, allow_counters):
@@ -253,10 +253,10 @@ def resolve_variable(name, set, allow_counters):
return hw_vars[name]
m = re.search(r'\$GtSlice([0-9]+)$', name)
if m:
return 'intel_device_info_slice_available(&perf->devinfo, {0})'.format(m.group(1))
return 'intel_device_info_slice_available(perf->devinfo, {0})'.format(m.group(1))
m = re.search(r'\$GtSlice([0-9]+)XeCore([0-9]+)$', name)
if m:
return 'intel_device_info_subslice_available(&perf->devinfo, {0}, {1})'.format(m.group(1), m.group(2))
return 'intel_device_info_subslice_available(perf->devinfo, {0}, {1})'.format(m.group(1), m.group(2))
if allow_counters and name in set.counter_vars:
return set.read_funcs[name[1:]] + "(perf, query, results)"
return None

View File

@@ -362,7 +362,7 @@ init_oa_configs(struct intel_perf_config *perf, int fd,
static void
compute_topology_builtins(struct intel_perf_config *perf)
{
const struct intel_device_info *devinfo = &perf->devinfo;
const struct intel_device_info *devinfo = perf->devinfo;
perf->sys_vars.slice_mask = devinfo->slice_masks;
perf->sys_vars.n_eu_slices = devinfo->num_slices;
@@ -725,7 +725,7 @@ oa_metrics_available(struct intel_perf_config *perf, int fd,
if (devinfo->kmd_type != INTEL_KMD_TYPE_I915)
return false;
perf->devinfo = *devinfo;
perf->devinfo = devinfo;
/* Consider an invalid as supported. */
if (fd == -1) {
@@ -1180,7 +1180,7 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
result->accumulator + query->a_offset + 32 + i);
}
if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) ||
if (can_use_mi_rpc_bc_counters(query->perf->devinfo) ||
!query->perf->sys_vars.query_mode) {
/* A36-37 counters are 32bits */
accumulate_uint32(start + 40, end + 40,
@@ -1222,7 +1222,7 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
result->accumulator + query->a_offset + 32 + i);
}
if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) ||
if (can_use_mi_rpc_bc_counters(query->perf->devinfo) ||
!query->perf->sys_vars.query_mode) {
/* 8x 32bit B counters */
for (i = 0; i < 8; i++) {
@@ -1328,7 +1328,7 @@ intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result
bool no_oa_accumulate)
{
const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
const struct intel_device_info *devinfo = &query->perf->devinfo;
const struct intel_device_info *devinfo = query->perf->devinfo;
for (uint32_t r = 0; r < layout->n_fields; r++) {
const struct intel_perf_query_field *field = &layout->fields[r];

View File

@@ -373,7 +373,7 @@ struct intel_perf_config {
bool query_mode; /** $QueryMode */
} sys_vars;
struct intel_device_info devinfo;
const struct intel_device_info *devinfo;
/* OA metric sets, indexed by GUID, as know by Mesa at build time, to
* cross-reference with the GUIDs of configs advertised by the kernel at

View File

@@ -40,7 +40,7 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters)
query->counters = rzalloc_array(query, struct intel_perf_query_counter, ncounters);
/* Accumulation buffer offsets... */
if (perf->devinfo.verx10 <= 75) {
if (perf->devinfo->verx10 <= 75) {
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
query->gpu_time_offset = 0;
query->a_offset = query->gpu_time_offset + 1;
@@ -48,7 +48,7 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters)
query->c_offset = query->b_offset + 8;
query->perfcnt_offset = query->c_offset + 8;
query->rpstat_offset = query->perfcnt_offset + 2;
} else if (perf->devinfo.verx10 <= 120) {
} else if (perf->devinfo->verx10 <= 120) {
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
query->gpu_time_offset = 0;
query->gpu_clock_offset = query->gpu_time_offset + 1;