diff --git a/src/arm/midr.c b/src/arm/midr.c index 40c5109..809e9f9 100644 --- a/src/arm/midr.c +++ b/src/arm/midr.c @@ -210,37 +210,25 @@ char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_soc return string; } -char* get_str_peak_performance(struct cpuInfo* cpu) { - //7 for GFLOP/s and 6 for digits,eg 412.14 - uint32_t size = 7+6+1+1; - assert(strlen(STRING_UNKNOWN)+1 <= size); - char* string = emalloc(sizeof(char)*size); +bool get_peak_performance(struct cpuInfo* cpu, double* flops) { struct cpuInfo* ptr = cpu; //First check we have consistent data for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { if(get_freq(ptr->freq) == UNKNOWN_FREQ) { - snprintf(string, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN); - return string; + return false; } } - double flops = 0.0; + *flops = 0.0; ptr = cpu; for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { - flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000); + *flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000); } - if(cpu->feat->NEON) flops = flops * 4; + if(cpu->feat->NEON) *flops = *flops * 4; - if(flops >= (double)1000000000000.0) - snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000); - else if(flops >= 1000000000.0) - snprintf(string,size,"%.2f GFLOP/s",flops/1000000000); - else - snprintf(string,size,"%.2f MFLOP/s",flops/1000000); - - return string; + return true; } char* get_str_features(struct cpuInfo* cpu) { diff --git a/src/arm/midr.h b/src/arm/midr.h index 2d468a0..1854ca8 100644 --- a/src/arm/midr.h +++ b/src/arm/midr.h @@ -7,9 +7,10 @@ struct cpuInfo* get_cpu_info(); uint32_t get_nsockets(struct topology* topo); char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket); -char* get_str_peak_performance(struct cpuInfo* cpu); char* get_str_features(struct cpuInfo* cpu); +bool get_peak_performance(struct cpuInfo* cpu, double* flops); + void print_debug(struct cpuInfo* cpu); void free_topo_struct(struct topology* topo); diff --git a/src/common/cpu.c b/src/common/cpu.c index 3a6898e..9a1f197 100644 --- a/src/common/cpu.c +++ b/src/common/cpu.c @@ -151,6 +151,28 @@ char* get_str_freq(struct frequency* freq) { return string; } +char* get_str_peak_performance(double flops, bool valid_flops) { + char* str; + + if(!valid_flops) { + str = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1)); + strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1); + } + + // 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s + uint32_t max_size = 7+1+7+1; + str = ecalloc(max_size, sizeof(char)); + + if(flops >= (double)1000000000000.0) + snprintf(str, max_size, "%.2f TFLOP/s", flops/1000000000000); + else if(flops >= 1000000000.0) + snprintf(str, max_size, "%.2f GFLOP/s", flops/1000000000); + else + snprintf(str, max_size, "%.2f MFLOP/s", flops/1000000); + + return str; +} + void init_topology_struct(struct topology* topo, struct cache* cach) { topo->total_cores = 0; topo->cach = cach; diff --git a/src/common/cpu.h b/src/common/cpu.h index 0b5e9ee..a45f060 100644 --- a/src/common/cpu.h +++ b/src/common/cpu.h @@ -161,6 +161,7 @@ char* get_str_l1d(struct cache* cach); char* get_str_l2(struct cache* cach); char* get_str_l3(struct cache* cach); char* get_str_freq(struct frequency* freq); +char* get_str_peak_performance(double flops, bool valid_pp); void init_topology_struct(struct topology* topo, struct cache* cach); void init_cache_struct(struct cache* cach); diff --git a/src/common/printer.c b/src/common/printer.c index f966082..17cf6d5 100644 --- a/src/common/printer.c +++ b/src/common/printer.c @@ -446,6 +446,9 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct colors* cs) { if(art == NULL) return false; + double flops; + bool valid_pp = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), &flops); + char* uarch = get_str_uarch(cpu); char* manufacturing_process = get_str_process(cpu); char* sockets = get_str_sockets(cpu->topo); @@ -456,12 +459,11 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct colors* cs) { char* avx = get_str_avx(cpu); char* fma = get_str_fma(cpu); - char* l1i = get_str_l1i(cpu->cach); char* l1d = get_str_l1d(cpu->cach); char* l2 = get_str_l2(cpu->cach); char* l3 = get_str_l3(cpu->cach); - char* pp = get_str_peak_performance(cpu,cpu->topo,get_freq(cpu->freq)); + char* pp = get_str_peak_performance(flops, valid_pp); setAttribute(art,ATTRIBUTE_NAME,cpu_name); if(cpu->hv->present) { @@ -568,6 +570,9 @@ bool print_cpufetch_ppc(struct cpuInfo* cpu, STYLE s, struct colors* cs) { if(art == NULL) return false; + double flops; + bool valid_pp = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), &flops); + char* uarch = get_str_uarch(cpu); char* manufacturing_process = get_str_process(cpu); char* sockets = get_str_sockets(cpu->topo); @@ -581,7 +586,7 @@ bool print_cpufetch_ppc(struct cpuInfo* cpu, STYLE s, struct colors* cs) { char* l1d = get_str_l1d(cpu->cach); char* l2 = get_str_l2(cpu->cach); char* l3 = get_str_l3(cpu->cach); - char* pp = get_str_peak_performance(cpu,cpu->topo,get_freq(cpu->freq)); + char* pp = get_str_peak_performance(flops, valid_pp); if(cpu_name != NULL) { setAttribute(art,ATTRIBUTE_NAME,cpu_name); @@ -787,7 +792,9 @@ bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct colors* cs) { } } } - char* pp = get_str_peak_performance(cpu); + double flops; + bool valid_pp = get_peak_performance(cpu, &flops); + char* pp = get_str_peak_performance(flops, valid_pp); setAttribute(art,ATTRIBUTE_PEAK,pp); if(art->n_attributes_set > NUMBER_OF_LINES) { diff --git a/src/ppc/ppc.c b/src/ppc/ppc.c index 8da5b7b..24aa062 100644 --- a/src/ppc/ppc.c +++ b/src/ppc/ppc.c @@ -170,43 +170,30 @@ char* get_str_altivec(struct cpuInfo* cpu) { return string; } -char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) { +bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops) { /* * Not sure about this * PP(SP) = N_CORES * FREQUENCY * 4(If altivec) */ - //7 for GFLOP/s and 6 for digits,eg 412.14 - uint32_t size = 7+6+1+1; - assert(strlen(STRING_UNKNOWN)+1 <= size); - char* string = emalloc(sizeof(char)*size); - //First check we have consistent data if(freq == UNKNOWN_FREQ) { - snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN); - return string; + return false; } struct features* feat = cpu->feat; - double flops = topo->physical_cores * topo->sockets * (freq*1000000); - if(feat->altivec) flops = flops*4; + *flops = topo->physical_cores * topo->sockets * (freq*1000000); + if(feat->altivec) *flops = *flops * 4; // POWER9 has the concept called "slices". Each SMT4 core has two super-slices, // and each super-slice is capable of doing two FLOPS per cycle. In the case of // SMT8, it has 4 super-slices, thus four FLOPS per cycle. if(is_power9(cpu->arch)) { int threads_per_core = topo->logical_cores / topo->physical_cores; - flops = flops * (threads_per_core / 2); + *flops = *flops * (threads_per_core / 2); } - if(flops >= (double)1000000000000.0) - snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000); - else if(flops >= 1000000000.0) - snprintf(string,size,"%.2f GFLOP/s",flops/1000000000); - else - snprintf(string,size,"%.2f MFLOP/s",flops/1000000); - - return string; + return true; } char* get_str_topology(struct topology* topo, bool dual_socket) { diff --git a/src/ppc/ppc.h b/src/ppc/ppc.h index e9924f1..9e0fc41 100644 --- a/src/ppc/ppc.h +++ b/src/ppc/ppc.h @@ -6,7 +6,7 @@ struct cpuInfo* get_cpu_info(); char* get_str_altivec(struct cpuInfo* cpu); char* get_str_topology(struct topology* topo, bool dual_socket); -char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq); +bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops); void print_debug(struct cpuInfo* cpu); #endif diff --git a/src/x86/cpuid.c b/src/x86/cpuid.c index a858c07..6ed8769 100644 --- a/src/x86/cpuid.c +++ b/src/x86/cpuid.c @@ -655,65 +655,48 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) { return freq; } -/*** STRING FUNCTIONS ***/ +// STRING FUNCTIONS +bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops) { + /* + * PP = PeakPerformance + * SP = SinglePrecision + * + * PP(SP) = + * N_CORES * + * FREQUENCY * + * 2(Two vector units) * + * 2(If cpu has fma) * + * 16(If AVX512), 8(If AVX), 4(If SSE) * + */ -char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) { - /*** - PP = PeakPerformance - SP = SinglePrecision - - PP(SP) = - N_CORES * - FREQUENCY * - 2(Two vector units) * - 2(If cpu has fma) * - 16(If AVX512), 8(If AVX), 4(If SSE) * - - ***/ - - //7 for GFLOP/s and 6 for digits,eg 412.14 - uint32_t size = 7+6+1+1; - assert(strlen(STRING_UNKNOWN)+1 <= size); - char* string = emalloc(sizeof(char)*size); - - //First check we have consistent data + //First, check we have consistent data if(freq == UNKNOWN_FREQ) { - snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN); - return string; + return false; } struct features* feat = cpu->feat; - double flops = topo->physical_cores * topo->sockets * (freq*1000000); int vpus = get_number_of_vpus(cpu); - - flops = flops * vpus; + *flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus; if(feat->FMA3 || feat->FMA4) - flops = flops*2; + *flops = *flops*2; // Ice Lake has AVX512, but it has 1 VPU for AVX512, while // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing // the peak performance supposing AVX2, not AVX512 if(feat->AVX512 && vpus_are_AVX512(cpu)) - flops = flops*16; + *flops = *flops*16; else if(feat->AVX || feat->AVX2) - flops = flops*8; + *flops = *flops*8; else if(feat->SSE) - flops = flops*4; + *flops = *flops*4; // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar- // throughput-limitation-on-intels-xeon-phi-x200-knights-landing/ if(is_knights_landing(cpu)) - flops = flops * 6 / 7; + *flops = *flops * 6 / 7; - if(flops >= (double)1000000000000.0) - snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000); - else if(flops >= 1000000000.0) - snprintf(string,size,"%.2f GFLOP/s",flops/1000000000); - else - snprintf(string,size,"%.2f MFLOP/s",flops/1000000); - - return string; + return true; } // TODO: Refactoring diff --git a/src/x86/cpuid.h b/src/x86/cpuid.h index 743ae39..bfe0f6e 100644 --- a/src/x86/cpuid.h +++ b/src/x86/cpuid.h @@ -12,7 +12,8 @@ char* get_str_avx(struct cpuInfo* cpu); char* get_str_sse(struct cpuInfo* cpu); char* get_str_fma(struct cpuInfo* cpu); char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket); -char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq); + +bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops); void print_debug(struct cpuInfo* cpu); void print_raw(struct cpuInfo* cpu);