From ee69cffdbbd7399b2981507974d3b407e1f789fd Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Sun, 8 Sep 2024 13:46:02 +0100 Subject: [PATCH] WIP: Move accurate-pp to get_cpu_info. Use a vector as argument to store freq --- src/common/cpu.c | 6 ++++++ src/common/cpu.h | 10 ++++++++++ src/x86/cpuid.c | 31 +++++++++++++++++++++++-------- src/x86/cpuid.h | 2 +- src/x86/freq/freq.c | 21 ++++++++++++--------- src/x86/freq/freq.h | 2 +- 6 files changed, 53 insertions(+), 19 deletions(-) diff --git a/src/common/cpu.c b/src/common/cpu.c index 946c990..e3f6bcb 100644 --- a/src/common/cpu.c +++ b/src/common/cpu.c @@ -34,6 +34,12 @@ int64_t get_freq(struct frequency* freq) { return freq->max; } +#ifdef ARCH_X86 +int64_t get_freq_pp(struct frequency* freq) { + return freq->max_pp; +} +#endif + #if defined(ARCH_X86) || defined(ARCH_PPC) char* get_str_cpu_name(struct cpuInfo* cpu, bool fcpuname) { #ifdef ARCH_X86 diff --git a/src/common/cpu.h b/src/common/cpu.h index aabcee7..3e63e8b 100644 --- a/src/common/cpu.h +++ b/src/common/cpu.h @@ -60,6 +60,11 @@ struct frequency { int32_t max; // Indicates if max frequency was measured bool measured; +#ifdef ARCH_X86 + // Max frequency when running vectorized code. + // Used only for peak performance computation. + int32_t max_pp; +#endif }; struct hypervisor { @@ -188,6 +193,8 @@ struct cpuInfo { #ifdef ARCH_X86 // The index of the first core in the module uint32_t first_core_id; + // The index of this module + uint32_t module_id; #endif #endif }; @@ -200,6 +207,9 @@ uint32_t get_nsockets(struct topology* topo); VENDOR get_cpu_vendor(struct cpuInfo* cpu); int64_t get_freq(struct frequency* freq); +#ifdef ARCH_X86 +int64_t get_freq_pp(struct frequency* freq); +#endif char* get_str_aes(struct cpuInfo* cpu); char* get_str_sha(struct cpuInfo* cpu); diff --git a/src/x86/cpuid.c b/src/x86/cpuid.c index dd67ee0..0753399 100644 --- a/src/x86/cpuid.c +++ b/src/x86/cpuid.c @@ -210,18 +210,14 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) { for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { struct topology* topo = ptr->topo; - int64_t max_freq = get_freq(ptr->freq); + int64_t freq = get_freq(ptr->freq); - int64_t freq; #ifdef __linux__ if(accurate_pp) - freq = measure_frequency(ptr); - else - freq = max_freq; + freq = get_freq_pp(ptr->freq); #else // Silence compiler warning (void)(accurate_pp); - freq = max_freq; #endif //First, check we have consistent data @@ -518,6 +514,7 @@ struct cpuInfo* get_cpu_info(void) { if(cpu->hybrid_flag) cpu->num_cpus = 2; + int32_t *max_freq_pp_vec = NULL; struct cpuInfo* ptr = cpu; for(uint32_t i=0; i < cpu->num_cpus; i++) { int32_t first_core; @@ -546,10 +543,14 @@ struct cpuInfo* get_cpu_info(void) { ptr->core_type = get_core_type(); } ptr->first_core_id = first_core; + ptr->module_id = i; ptr->feat = get_features_info(ptr); ptr->arch = get_cpu_uarch(ptr); - ptr->freq = get_frequency_info(ptr); + // If accurate_pp is requested, we need to get the frequency + // after fetching the topology. Otherwise we can do it now. + if (!accurate_pp()) + ptr->freq = get_frequency_info(ptr, accurate_pp(), max_freq_pp_vec); if (cpu->cpu_name == NULL && ptr == cpu) { // If we couldnt read CPU name from cpuid, infer it now @@ -568,6 +569,11 @@ struct cpuInfo* get_cpu_info(void) { // If topo is NULL, return early, as get_peak_performance // requries non-NULL topology. if(ptr->topo == NULL) return cpu; + + // If accurate_pp is requested, we need to get the frequency + // after fetching the topology + if (accurate_pp()) + ptr->freq = get_frequency_info(ptr, accurate_pp(), max_freq_pp_vec); } cpu->peak_performance = get_peak_performance(cpu, accurate_pp()); @@ -935,7 +941,7 @@ struct cache* get_cache_info(struct cpuInfo* cpu) { return cach; } -struct frequency* get_frequency_info(struct cpuInfo* cpu) { +struct frequency* get_frequency_info(struct cpuInfo* cpu, bool accurate_pp, int32_t *max_freq_pp_vec) { struct frequency* freq = emalloc(sizeof(struct frequency)); freq->measured = false; @@ -1005,6 +1011,15 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) { } #endif + freq->max_pp = UNKNOWN_DATA; + #ifdef __linux__ + if(accurate_pp) + freq->max_pp = measure_frequency(cpu, max_freq_pp_vec); + #else + // Silence compiler warning + (void)(accurate_pp); + #endif + return freq; } diff --git a/src/x86/cpuid.h b/src/x86/cpuid.h index a0a6a5c..0b57bb6 100644 --- a/src/x86/cpuid.h +++ b/src/x86/cpuid.h @@ -5,7 +5,7 @@ struct cpuInfo* get_cpu_info(void); struct cache* get_cache_info(struct cpuInfo* cpu); -struct frequency* get_frequency_info(struct cpuInfo* cpu); +struct frequency* get_frequency_info(struct cpuInfo* cpu, bool accurate_pp, int32_t *max_freq_pp_vec); struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module); char* get_str_avx(struct cpuInfo* cpu); diff --git a/src/x86/freq/freq.c b/src/x86/freq/freq.c index 10171c0..de57a0a 100644 --- a/src/x86/freq/freq.c +++ b/src/x86/freq/freq.c @@ -26,7 +26,7 @@ struct freq_thread { bool end; bool measure; // Output - double freq; + int32_t *max_pp; }; double vector_average_harmonic(double* v, int len) { @@ -88,34 +88,37 @@ void* measure_freq(void *freq_ptr) { double* freq_vector_ptr = freq_vector; for (int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { - ptr->freq->max_pp = vector_average_harmonic(freq_vector_ptr, ptr->topo->total_cores_module); - printWarn("AVX2 measured freq=%d (module %d)", ptr->freq->max_pp, i); + freq->max_pp[i] = vector_average_harmonic(freq_vector_ptr, ptr->topo->total_cores_module); + printWarn("AVX2 measured freq=%d (module %d)", freq->max_pp[i], i); freq_vector_ptr = freq_vector_ptr + ptr->topo->total_cores_module; } } else { - cpu->freq->max_pp = vector_average_harmonic(freq_vector, v); - printWarn("AVX2 measured freq=%d\n", cpu->freq->max_pp); + freq->max_pp[0] = vector_average_harmonic(freq_vector, v); + printWarn("AVX2 measured freq=%d\n", freq->max_pp[0]); } return NULL; } -int64_t measure_frequency(struct cpuInfo* cpu) { - if (cpu->hybrid_flag && cpu->first_core_id > 0) { +int64_t measure_frequency(struct cpuInfo* cpu, int32_t *max_freq_pp_vec) { + if (cpu->hybrid_flag && cpu->module_id > 0) { // We have a hybrid architecture and we have already // measured the frequency for this module in a previous // call to this function, so now just return it. - return get_freq_pp(cpu->freq); + return max_freq_pp_vec[cpu->module_id]; } + max_freq_pp_vec = malloc(sizeof(int32_t) * cpu->num_cpus); + int ret; int num_spaces; struct freq_thread* freq_struct = malloc(sizeof(struct freq_thread)); freq_struct->end = false; freq_struct->measure = false; freq_struct->cpu = cpu; + freq_struct->max_pp = max_freq_pp_vec; void* (*compute_function)(void*); @@ -187,5 +190,5 @@ int64_t measure_frequency(struct cpuInfo* cpu) { } printf("\r%*c", num_spaces, ' '); - return cpu->freq->max; + return max_freq_pp_vec[0]; } diff --git a/src/x86/freq/freq.h b/src/x86/freq/freq.h index de62916..8d79db3 100644 --- a/src/x86/freq/freq.h +++ b/src/x86/freq/freq.h @@ -8,6 +8,6 @@ #define MEASURE_TIME_SECONDS 5 #define LOOP_ITERS 100000000 -int64_t measure_frequency(struct cpuInfo* cpu); +int64_t measure_frequency(struct cpuInfo* cpu, int32_t *max_freq_pp_vec); #endif