intel/perf: Store pointer intel_device_info to in intel_perf_config
This will reduce host memory usage a bit. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29077>
This commit is contained in:
committed by
Marge Bot
parent
da45594c5e
commit
a9a53c914d
@@ -233,8 +233,8 @@ hw_vars["$EuSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
|
||||
hw_vars["$XeCoreTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
|
||||
hw_vars["$EuDualSubslicesTotalCount"] = "perf->sys_vars.n_eu_sub_slices"
|
||||
hw_vars["$EuDualSubslicesSlice0123Count"] = "perf->sys_vars.n_eu_slice0123"
|
||||
hw_vars["$EuThreadsCount"] = "perf->devinfo.num_thread_per_eu"
|
||||
hw_vars["$VectorEngineThreadsCount"] = "perf->devinfo.num_thread_per_eu"
|
||||
hw_vars["$EuThreadsCount"] = "perf->devinfo->num_thread_per_eu"
|
||||
hw_vars["$VectorEngineThreadsCount"] = "perf->devinfo->num_thread_per_eu"
|
||||
hw_vars["$SliceMask"] = "perf->sys_vars.slice_mask"
|
||||
hw_vars["$SliceTotalCount"] = "perf->sys_vars.n_eu_slices"
|
||||
# subslice_mask is interchangeable with subslice/dual-subslice since Gfx12+
|
||||
@@ -242,10 +242,10 @@ hw_vars["$SliceTotalCount"] = "perf->sys_vars.n_eu_slices"
|
||||
hw_vars["$SubsliceMask"] = "perf->sys_vars.subslice_mask"
|
||||
hw_vars["$DualSubsliceMask"] = "perf->sys_vars.subslice_mask"
|
||||
hw_vars["$XeCoreMask"] = "perf->sys_vars.subslice_mask"
|
||||
hw_vars["$GpuTimestampFrequency"] = "perf->devinfo.timestamp_frequency"
|
||||
hw_vars["$GpuTimestampFrequency"] = "perf->devinfo->timestamp_frequency"
|
||||
hw_vars["$GpuMinFrequency"] = "perf->sys_vars.gt_min_freq"
|
||||
hw_vars["$GpuMaxFrequency"] = "perf->sys_vars.gt_max_freq"
|
||||
hw_vars["$SkuRevisionId"] = "perf->devinfo.revision"
|
||||
hw_vars["$SkuRevisionId"] = "perf->devinfo->revision"
|
||||
hw_vars["$QueryMode"] = "perf->sys_vars.query_mode"
|
||||
|
||||
def resolve_variable(name, set, allow_counters):
|
||||
@@ -253,10 +253,10 @@ def resolve_variable(name, set, allow_counters):
|
||||
return hw_vars[name]
|
||||
m = re.search(r'\$GtSlice([0-9]+)$', name)
|
||||
if m:
|
||||
return 'intel_device_info_slice_available(&perf->devinfo, {0})'.format(m.group(1))
|
||||
return 'intel_device_info_slice_available(perf->devinfo, {0})'.format(m.group(1))
|
||||
m = re.search(r'\$GtSlice([0-9]+)XeCore([0-9]+)$', name)
|
||||
if m:
|
||||
return 'intel_device_info_subslice_available(&perf->devinfo, {0}, {1})'.format(m.group(1), m.group(2))
|
||||
return 'intel_device_info_subslice_available(perf->devinfo, {0}, {1})'.format(m.group(1), m.group(2))
|
||||
if allow_counters and name in set.counter_vars:
|
||||
return set.read_funcs[name[1:]] + "(perf, query, results)"
|
||||
return None
|
||||
|
||||
@@ -362,7 +362,7 @@ init_oa_configs(struct intel_perf_config *perf, int fd,
|
||||
static void
|
||||
compute_topology_builtins(struct intel_perf_config *perf)
|
||||
{
|
||||
const struct intel_device_info *devinfo = &perf->devinfo;
|
||||
const struct intel_device_info *devinfo = perf->devinfo;
|
||||
|
||||
perf->sys_vars.slice_mask = devinfo->slice_masks;
|
||||
perf->sys_vars.n_eu_slices = devinfo->num_slices;
|
||||
@@ -725,7 +725,7 @@ oa_metrics_available(struct intel_perf_config *perf, int fd,
|
||||
if (devinfo->kmd_type != INTEL_KMD_TYPE_I915)
|
||||
return false;
|
||||
|
||||
perf->devinfo = *devinfo;
|
||||
perf->devinfo = devinfo;
|
||||
|
||||
/* Consider an invalid as supported. */
|
||||
if (fd == -1) {
|
||||
@@ -1180,7 +1180,7 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
||||
result->accumulator + query->a_offset + 32 + i);
|
||||
}
|
||||
|
||||
if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) ||
|
||||
if (can_use_mi_rpc_bc_counters(query->perf->devinfo) ||
|
||||
!query->perf->sys_vars.query_mode) {
|
||||
/* A36-37 counters are 32bits */
|
||||
accumulate_uint32(start + 40, end + 40,
|
||||
@@ -1222,7 +1222,7 @@ intel_perf_query_result_accumulate(struct intel_perf_query_result *result,
|
||||
result->accumulator + query->a_offset + 32 + i);
|
||||
}
|
||||
|
||||
if (can_use_mi_rpc_bc_counters(&query->perf->devinfo) ||
|
||||
if (can_use_mi_rpc_bc_counters(query->perf->devinfo) ||
|
||||
!query->perf->sys_vars.query_mode) {
|
||||
/* 8x 32bit B counters */
|
||||
for (i = 0; i < 8; i++) {
|
||||
@@ -1328,7 +1328,7 @@ intel_perf_query_result_accumulate_fields(struct intel_perf_query_result *result
|
||||
bool no_oa_accumulate)
|
||||
{
|
||||
const struct intel_perf_query_field_layout *layout = &query->perf->query_layout;
|
||||
const struct intel_device_info *devinfo = &query->perf->devinfo;
|
||||
const struct intel_device_info *devinfo = query->perf->devinfo;
|
||||
|
||||
for (uint32_t r = 0; r < layout->n_fields; r++) {
|
||||
const struct intel_perf_query_field *field = &layout->fields[r];
|
||||
|
||||
@@ -373,7 +373,7 @@ struct intel_perf_config {
|
||||
bool query_mode; /** $QueryMode */
|
||||
} sys_vars;
|
||||
|
||||
struct intel_device_info devinfo;
|
||||
const struct intel_device_info *devinfo;
|
||||
|
||||
/* OA metric sets, indexed by GUID, as know by Mesa at build time, to
|
||||
* cross-reference with the GUIDs of configs advertised by the kernel at
|
||||
|
||||
@@ -40,7 +40,7 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters)
|
||||
query->counters = rzalloc_array(query, struct intel_perf_query_counter, ncounters);
|
||||
|
||||
/* Accumulation buffer offsets... */
|
||||
if (perf->devinfo.verx10 <= 75) {
|
||||
if (perf->devinfo->verx10 <= 75) {
|
||||
query->oa_format = I915_OA_FORMAT_A45_B8_C8;
|
||||
query->gpu_time_offset = 0;
|
||||
query->a_offset = query->gpu_time_offset + 1;
|
||||
@@ -48,7 +48,7 @@ intel_query_alloc(struct intel_perf_config *perf, int ncounters)
|
||||
query->c_offset = query->b_offset + 8;
|
||||
query->perfcnt_offset = query->c_offset + 8;
|
||||
query->rpstat_offset = query->perfcnt_offset + 2;
|
||||
} else if (perf->devinfo.verx10 <= 120) {
|
||||
} else if (perf->devinfo->verx10 <= 120) {
|
||||
query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
|
||||
query->gpu_time_offset = 0;
|
||||
query->gpu_clock_offset = query->gpu_time_offset + 1;
|
||||
|
||||
Reference in New Issue
Block a user