WIP: Move accurate-pp to get_cpu_info. Use a vector as argument to store freq

This commit is contained in:
Dr-Noob
2024-09-08 13:46:02 +01:00
parent de4f47a400
commit ee69cffdbb
6 changed files with 53 additions and 19 deletions

View File

@@ -34,6 +34,12 @@ int64_t get_freq(struct frequency* freq) {
return freq->max; return freq->max;
} }
#ifdef ARCH_X86
int64_t get_freq_pp(struct frequency* freq) {
return freq->max_pp;
}
#endif
#if defined(ARCH_X86) || defined(ARCH_PPC) #if defined(ARCH_X86) || defined(ARCH_PPC)
char* get_str_cpu_name(struct cpuInfo* cpu, bool fcpuname) { char* get_str_cpu_name(struct cpuInfo* cpu, bool fcpuname) {
#ifdef ARCH_X86 #ifdef ARCH_X86

View File

@@ -60,6 +60,11 @@ struct frequency {
int32_t max; int32_t max;
// Indicates if max frequency was measured // Indicates if max frequency was measured
bool measured; bool measured;
#ifdef ARCH_X86
// Max frequency when running vectorized code.
// Used only for peak performance computation.
int32_t max_pp;
#endif
}; };
struct hypervisor { struct hypervisor {
@@ -188,6 +193,8 @@ struct cpuInfo {
#ifdef ARCH_X86 #ifdef ARCH_X86
// The index of the first core in the module // The index of the first core in the module
uint32_t first_core_id; uint32_t first_core_id;
// The index of this module
uint32_t module_id;
#endif #endif
#endif #endif
}; };
@@ -200,6 +207,9 @@ uint32_t get_nsockets(struct topology* topo);
VENDOR get_cpu_vendor(struct cpuInfo* cpu); VENDOR get_cpu_vendor(struct cpuInfo* cpu);
int64_t get_freq(struct frequency* freq); int64_t get_freq(struct frequency* freq);
#ifdef ARCH_X86
int64_t get_freq_pp(struct frequency* freq);
#endif
char* get_str_aes(struct cpuInfo* cpu); char* get_str_aes(struct cpuInfo* cpu);
char* get_str_sha(struct cpuInfo* cpu); char* get_str_sha(struct cpuInfo* cpu);

View File

@@ -210,18 +210,14 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
struct topology* topo = ptr->topo; struct topology* topo = ptr->topo;
int64_t max_freq = get_freq(ptr->freq); int64_t freq = get_freq(ptr->freq);
int64_t freq;
#ifdef __linux__ #ifdef __linux__
if(accurate_pp) if(accurate_pp)
freq = measure_frequency(ptr); freq = get_freq_pp(ptr->freq);
else
freq = max_freq;
#else #else
// Silence compiler warning // Silence compiler warning
(void)(accurate_pp); (void)(accurate_pp);
freq = max_freq;
#endif #endif
//First, check we have consistent data //First, check we have consistent data
@@ -518,6 +514,7 @@ struct cpuInfo* get_cpu_info(void) {
if(cpu->hybrid_flag) cpu->num_cpus = 2; if(cpu->hybrid_flag) cpu->num_cpus = 2;
int32_t *max_freq_pp_vec = NULL;
struct cpuInfo* ptr = cpu; struct cpuInfo* ptr = cpu;
for(uint32_t i=0; i < cpu->num_cpus; i++) { for(uint32_t i=0; i < cpu->num_cpus; i++) {
int32_t first_core; int32_t first_core;
@@ -546,10 +543,14 @@ struct cpuInfo* get_cpu_info(void) {
ptr->core_type = get_core_type(); ptr->core_type = get_core_type();
} }
ptr->first_core_id = first_core; ptr->first_core_id = first_core;
ptr->module_id = i;
ptr->feat = get_features_info(ptr); ptr->feat = get_features_info(ptr);
ptr->arch = get_cpu_uarch(ptr); ptr->arch = get_cpu_uarch(ptr);
ptr->freq = get_frequency_info(ptr); // If accurate_pp is requested, we need to get the frequency
// after fetching the topology. Otherwise we can do it now.
if (!accurate_pp())
ptr->freq = get_frequency_info(ptr, accurate_pp(), max_freq_pp_vec);
if (cpu->cpu_name == NULL && ptr == cpu) { if (cpu->cpu_name == NULL && ptr == cpu) {
// If we couldnt read CPU name from cpuid, infer it now // If we couldnt read CPU name from cpuid, infer it now
@@ -568,6 +569,11 @@ struct cpuInfo* get_cpu_info(void) {
// If topo is NULL, return early, as get_peak_performance // If topo is NULL, return early, as get_peak_performance
// requries non-NULL topology. // requries non-NULL topology.
if(ptr->topo == NULL) return cpu; if(ptr->topo == NULL) return cpu;
// If accurate_pp is requested, we need to get the frequency
// after fetching the topology
if (accurate_pp())
ptr->freq = get_frequency_info(ptr, accurate_pp(), max_freq_pp_vec);
} }
cpu->peak_performance = get_peak_performance(cpu, accurate_pp()); cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
@@ -935,7 +941,7 @@ struct cache* get_cache_info(struct cpuInfo* cpu) {
return cach; return cach;
} }
struct frequency* get_frequency_info(struct cpuInfo* cpu) { struct frequency* get_frequency_info(struct cpuInfo* cpu, bool accurate_pp, int32_t *max_freq_pp_vec) {
struct frequency* freq = emalloc(sizeof(struct frequency)); struct frequency* freq = emalloc(sizeof(struct frequency));
freq->measured = false; freq->measured = false;
@@ -1005,6 +1011,15 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
} }
#endif #endif
freq->max_pp = UNKNOWN_DATA;
#ifdef __linux__
if(accurate_pp)
freq->max_pp = measure_frequency(cpu, max_freq_pp_vec);
#else
// Silence compiler warning
(void)(accurate_pp);
#endif
return freq; return freq;
} }

View File

@@ -5,7 +5,7 @@
struct cpuInfo* get_cpu_info(void); struct cpuInfo* get_cpu_info(void);
struct cache* get_cache_info(struct cpuInfo* cpu); struct cache* get_cache_info(struct cpuInfo* cpu);
struct frequency* get_frequency_info(struct cpuInfo* cpu); struct frequency* get_frequency_info(struct cpuInfo* cpu, bool accurate_pp, int32_t *max_freq_pp_vec);
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module); struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module);
char* get_str_avx(struct cpuInfo* cpu); char* get_str_avx(struct cpuInfo* cpu);

View File

@@ -26,7 +26,7 @@ struct freq_thread {
bool end; bool end;
bool measure; bool measure;
// Output // Output
double freq; int32_t *max_pp;
}; };
double vector_average_harmonic(double* v, int len) { double vector_average_harmonic(double* v, int len) {
@@ -88,34 +88,37 @@ void* measure_freq(void *freq_ptr) {
double* freq_vector_ptr = freq_vector; double* freq_vector_ptr = freq_vector;
for (int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { for (int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
ptr->freq->max_pp = vector_average_harmonic(freq_vector_ptr, ptr->topo->total_cores_module); freq->max_pp[i] = vector_average_harmonic(freq_vector_ptr, ptr->topo->total_cores_module);
printWarn("AVX2 measured freq=%d (module %d)", ptr->freq->max_pp, i); printWarn("AVX2 measured freq=%d (module %d)", freq->max_pp[i], i);
freq_vector_ptr = freq_vector_ptr + ptr->topo->total_cores_module; freq_vector_ptr = freq_vector_ptr + ptr->topo->total_cores_module;
} }
} }
else { else {
cpu->freq->max_pp = vector_average_harmonic(freq_vector, v); freq->max_pp[0] = vector_average_harmonic(freq_vector, v);
printWarn("AVX2 measured freq=%d\n", cpu->freq->max_pp); printWarn("AVX2 measured freq=%d\n", freq->max_pp[0]);
} }
return NULL; return NULL;
} }
int64_t measure_frequency(struct cpuInfo* cpu) { int64_t measure_frequency(struct cpuInfo* cpu, int32_t *max_freq_pp_vec) {
if (cpu->hybrid_flag && cpu->first_core_id > 0) { if (cpu->hybrid_flag && cpu->module_id > 0) {
// We have a hybrid architecture and we have already // We have a hybrid architecture and we have already
// measured the frequency for this module in a previous // measured the frequency for this module in a previous
// call to this function, so now just return it. // call to this function, so now just return it.
return get_freq_pp(cpu->freq); return max_freq_pp_vec[cpu->module_id];
} }
max_freq_pp_vec = malloc(sizeof(int32_t) * cpu->num_cpus);
int ret; int ret;
int num_spaces; int num_spaces;
struct freq_thread* freq_struct = malloc(sizeof(struct freq_thread)); struct freq_thread* freq_struct = malloc(sizeof(struct freq_thread));
freq_struct->end = false; freq_struct->end = false;
freq_struct->measure = false; freq_struct->measure = false;
freq_struct->cpu = cpu; freq_struct->cpu = cpu;
freq_struct->max_pp = max_freq_pp_vec;
void* (*compute_function)(void*); void* (*compute_function)(void*);
@@ -187,5 +190,5 @@ int64_t measure_frequency(struct cpuInfo* cpu) {
} }
printf("\r%*c", num_spaces, ' '); printf("\r%*c", num_spaces, ' ');
return cpu->freq->max; return max_freq_pp_vec[0];
} }

View File

@@ -8,6 +8,6 @@
#define MEASURE_TIME_SECONDS 5 #define MEASURE_TIME_SECONDS 5
#define LOOP_ITERS 100000000 #define LOOP_ITERS 100000000
int64_t measure_frequency(struct cpuInfo* cpu); int64_t measure_frequency(struct cpuInfo* cpu, int32_t *max_freq_pp_vec);
#endif #endif