[v1.06][X86] Fix accurate-pp in hybrid architectures (fixes #169)

Overview of changes:
- Adds field max_pp in frequency struct to hold the max freq for peak-performance estimation.
- Instead of getting the max frequency in get_peak_performance, we get it in get_cpu_info (more natural).
- Adds fill_frequency_info_pp which fills the max_pp of the passed cpu by calling measure_frequency.

The approach is to call measure_frequency with a vector where the max frequencies are stored. Then,
the first time measure_frequency is called, the frequency is measured while running all the cores,
and the max frequency is computed per module (e.g., in the case of 2 modules, we would compute
the freq for the first and for the second module), and saved into this vector. Subsequent calls to
measure_frequency will just read the corresponding value for the vector. In other words, the frequency
is only measured once for the whole CPU.
This commit is contained in:
Dr-Noob
2024-09-10 22:43:23 +01:00
parent edbfc9722e
commit ab43a11ef2
5 changed files with 79 additions and 12 deletions

View File

@@ -210,18 +210,14 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
struct topology* topo = ptr->topo;
int64_t max_freq = get_freq(ptr->freq);
int64_t freq = get_freq(ptr->freq);
int64_t freq;
#ifdef __linux__
if(accurate_pp)
freq = measure_frequency(ptr);
else
freq = max_freq;
freq = get_freq_pp(ptr->freq);
#else
// Silence compiler warning
(void)(accurate_pp);
freq = max_freq;
#endif
//First, check we have consistent data
@@ -450,6 +446,23 @@ int32_t get_core_type(void) {
}
}
#ifdef __linux__
// Gets the max frequency for estimating the peak performance,
// filling in the passed cpuInfo parameter with this information.
void fill_frequency_info_pp(struct cpuInfo* cpu) {
int32_t unused;
int32_t *max_freq_pp_vec = malloc(sizeof(int32_t) * cpu->num_cpus);
struct cpuInfo* ptr = cpu;
for (uint32_t i=0; i < cpu->num_cpus; i++) {
set_cpu_module(i, cpu->num_cpus, &unused);
ptr->freq->max_pp = measure_frequency(ptr, max_freq_pp_vec);
ptr = ptr->next_cpu;
}
}
#endif
struct cpuInfo* get_cpu_info(void) {
struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
cpu->peak_performance = -1;
@@ -546,6 +559,7 @@ struct cpuInfo* get_cpu_info(void) {
ptr->core_type = get_core_type();
}
ptr->first_core_id = first_core;
ptr->module_id = i;
ptr->feat = get_features_info(ptr);
ptr->arch = get_cpu_uarch(ptr);
@@ -570,6 +584,13 @@ struct cpuInfo* get_cpu_info(void) {
if(ptr->topo == NULL) return cpu;
}
#ifdef __linux__
// If accurate_pp is requested, we need to get the max frequency
// after fetching the topology for all CPU modules, since the topology
// is required by fill_frequency_info_pp
if (accurate_pp()) fill_frequency_info_pp(cpu);
#endif
cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
return cpu;
@@ -1005,6 +1026,7 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
}
#endif
freq->max_pp = UNKNOWN_DATA;
return freq;
}