mirror of
https://github.com/Dr-Noob/cpufetch.git
synced 2026-03-25 07:50:40 +01:00
[v0.98][Refactoring] Unify the use of get_str_peak_performance
This commit is contained in:
@@ -210,37 +210,25 @@ char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_soc
|
||||
return string;
|
||||
}
|
||||
|
||||
char* get_str_peak_performance(struct cpuInfo* cpu) {
|
||||
//7 for GFLOP/s and 6 for digits,eg 412.14
|
||||
uint32_t size = 7+6+1+1;
|
||||
assert(strlen(STRING_UNKNOWN)+1 <= size);
|
||||
char* string = emalloc(sizeof(char)*size);
|
||||
bool get_peak_performance(struct cpuInfo* cpu, double* flops) {
|
||||
struct cpuInfo* ptr = cpu;
|
||||
|
||||
//First check we have consistent data
|
||||
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
|
||||
if(get_freq(ptr->freq) == UNKNOWN_FREQ) {
|
||||
snprintf(string, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
|
||||
return string;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
double flops = 0.0;
|
||||
*flops = 0.0;
|
||||
|
||||
ptr = cpu;
|
||||
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
|
||||
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
|
||||
*flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
|
||||
}
|
||||
if(cpu->feat->NEON) flops = flops * 4;
|
||||
if(cpu->feat->NEON) *flops = *flops * 4;
|
||||
|
||||
if(flops >= (double)1000000000000.0)
|
||||
snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000);
|
||||
else if(flops >= 1000000000.0)
|
||||
snprintf(string,size,"%.2f GFLOP/s",flops/1000000000);
|
||||
else
|
||||
snprintf(string,size,"%.2f MFLOP/s",flops/1000000);
|
||||
|
||||
return string;
|
||||
return true;
|
||||
}
|
||||
|
||||
char* get_str_features(struct cpuInfo* cpu) {
|
||||
|
||||
@@ -7,9 +7,10 @@ struct cpuInfo* get_cpu_info();
|
||||
|
||||
uint32_t get_nsockets(struct topology* topo);
|
||||
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket);
|
||||
char* get_str_peak_performance(struct cpuInfo* cpu);
|
||||
char* get_str_features(struct cpuInfo* cpu);
|
||||
|
||||
bool get_peak_performance(struct cpuInfo* cpu, double* flops);
|
||||
|
||||
void print_debug(struct cpuInfo* cpu);
|
||||
void free_topo_struct(struct topology* topo);
|
||||
|
||||
|
||||
@@ -151,6 +151,28 @@ char* get_str_freq(struct frequency* freq) {
|
||||
return string;
|
||||
}
|
||||
|
||||
char* get_str_peak_performance(double flops, bool valid_flops) {
|
||||
char* str;
|
||||
|
||||
if(!valid_flops) {
|
||||
str = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
|
||||
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
|
||||
}
|
||||
|
||||
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
|
||||
uint32_t max_size = 7+1+7+1;
|
||||
str = ecalloc(max_size, sizeof(char));
|
||||
|
||||
if(flops >= (double)1000000000000.0)
|
||||
snprintf(str, max_size, "%.2f TFLOP/s", flops/1000000000000);
|
||||
else if(flops >= 1000000000.0)
|
||||
snprintf(str, max_size, "%.2f GFLOP/s", flops/1000000000);
|
||||
else
|
||||
snprintf(str, max_size, "%.2f MFLOP/s", flops/1000000);
|
||||
|
||||
return str;
|
||||
}
|
||||
|
||||
void init_topology_struct(struct topology* topo, struct cache* cach) {
|
||||
topo->total_cores = 0;
|
||||
topo->cach = cach;
|
||||
|
||||
@@ -161,6 +161,7 @@ char* get_str_l1d(struct cache* cach);
|
||||
char* get_str_l2(struct cache* cach);
|
||||
char* get_str_l3(struct cache* cach);
|
||||
char* get_str_freq(struct frequency* freq);
|
||||
char* get_str_peak_performance(double flops, bool valid_pp);
|
||||
|
||||
void init_topology_struct(struct topology* topo, struct cache* cach);
|
||||
void init_cache_struct(struct cache* cach);
|
||||
|
||||
@@ -446,6 +446,9 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
|
||||
if(art == NULL)
|
||||
return false;
|
||||
|
||||
double flops;
|
||||
bool valid_pp = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), &flops);
|
||||
|
||||
char* uarch = get_str_uarch(cpu);
|
||||
char* manufacturing_process = get_str_process(cpu);
|
||||
char* sockets = get_str_sockets(cpu->topo);
|
||||
@@ -456,12 +459,11 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
|
||||
char* avx = get_str_avx(cpu);
|
||||
char* fma = get_str_fma(cpu);
|
||||
|
||||
|
||||
char* l1i = get_str_l1i(cpu->cach);
|
||||
char* l1d = get_str_l1d(cpu->cach);
|
||||
char* l2 = get_str_l2(cpu->cach);
|
||||
char* l3 = get_str_l3(cpu->cach);
|
||||
char* pp = get_str_peak_performance(cpu,cpu->topo,get_freq(cpu->freq));
|
||||
char* pp = get_str_peak_performance(flops, valid_pp);
|
||||
|
||||
setAttribute(art,ATTRIBUTE_NAME,cpu_name);
|
||||
if(cpu->hv->present) {
|
||||
@@ -568,6 +570,9 @@ bool print_cpufetch_ppc(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
|
||||
if(art == NULL)
|
||||
return false;
|
||||
|
||||
double flops;
|
||||
bool valid_pp = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), &flops);
|
||||
|
||||
char* uarch = get_str_uarch(cpu);
|
||||
char* manufacturing_process = get_str_process(cpu);
|
||||
char* sockets = get_str_sockets(cpu->topo);
|
||||
@@ -581,7 +586,7 @@ bool print_cpufetch_ppc(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
|
||||
char* l1d = get_str_l1d(cpu->cach);
|
||||
char* l2 = get_str_l2(cpu->cach);
|
||||
char* l3 = get_str_l3(cpu->cach);
|
||||
char* pp = get_str_peak_performance(cpu,cpu->topo,get_freq(cpu->freq));
|
||||
char* pp = get_str_peak_performance(flops, valid_pp);
|
||||
|
||||
if(cpu_name != NULL) {
|
||||
setAttribute(art,ATTRIBUTE_NAME,cpu_name);
|
||||
@@ -787,7 +792,9 @@ bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
|
||||
}
|
||||
}
|
||||
}
|
||||
char* pp = get_str_peak_performance(cpu);
|
||||
double flops;
|
||||
bool valid_pp = get_peak_performance(cpu, &flops);
|
||||
char* pp = get_str_peak_performance(flops, valid_pp);
|
||||
setAttribute(art,ATTRIBUTE_PEAK,pp);
|
||||
|
||||
if(art->n_attributes_set > NUMBER_OF_LINES) {
|
||||
|
||||
@@ -170,43 +170,30 @@ char* get_str_altivec(struct cpuInfo* cpu) {
|
||||
return string;
|
||||
}
|
||||
|
||||
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
|
||||
bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops) {
|
||||
/*
|
||||
* Not sure about this
|
||||
* PP(SP) = N_CORES * FREQUENCY * 4(If altivec)
|
||||
*/
|
||||
|
||||
//7 for GFLOP/s and 6 for digits,eg 412.14
|
||||
uint32_t size = 7+6+1+1;
|
||||
assert(strlen(STRING_UNKNOWN)+1 <= size);
|
||||
char* string = emalloc(sizeof(char)*size);
|
||||
|
||||
//First check we have consistent data
|
||||
if(freq == UNKNOWN_FREQ) {
|
||||
snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN);
|
||||
return string;
|
||||
return false;
|
||||
}
|
||||
|
||||
struct features* feat = cpu->feat;
|
||||
double flops = topo->physical_cores * topo->sockets * (freq*1000000);
|
||||
if(feat->altivec) flops = flops*4;
|
||||
*flops = topo->physical_cores * topo->sockets * (freq*1000000);
|
||||
if(feat->altivec) *flops = *flops * 4;
|
||||
|
||||
// POWER9 has the concept called "slices". Each SMT4 core has two super-slices,
|
||||
// and each super-slice is capable of doing two FLOPS per cycle. In the case of
|
||||
// SMT8, it has 4 super-slices, thus four FLOPS per cycle.
|
||||
if(is_power9(cpu->arch)) {
|
||||
int threads_per_core = topo->logical_cores / topo->physical_cores;
|
||||
flops = flops * (threads_per_core / 2);
|
||||
*flops = *flops * (threads_per_core / 2);
|
||||
}
|
||||
|
||||
if(flops >= (double)1000000000000.0)
|
||||
snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000);
|
||||
else if(flops >= 1000000000.0)
|
||||
snprintf(string,size,"%.2f GFLOP/s",flops/1000000000);
|
||||
else
|
||||
snprintf(string,size,"%.2f MFLOP/s",flops/1000000);
|
||||
|
||||
return string;
|
||||
return true;
|
||||
}
|
||||
|
||||
char* get_str_topology(struct topology* topo, bool dual_socket) {
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
struct cpuInfo* get_cpu_info();
|
||||
char* get_str_altivec(struct cpuInfo* cpu);
|
||||
char* get_str_topology(struct topology* topo, bool dual_socket);
|
||||
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq);
|
||||
bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops);
|
||||
void print_debug(struct cpuInfo* cpu);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -655,65 +655,48 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
|
||||
return freq;
|
||||
}
|
||||
|
||||
/*** STRING FUNCTIONS ***/
|
||||
// STRING FUNCTIONS
|
||||
bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops) {
|
||||
/*
|
||||
* PP = PeakPerformance
|
||||
* SP = SinglePrecision
|
||||
*
|
||||
* PP(SP) =
|
||||
* N_CORES *
|
||||
* FREQUENCY *
|
||||
* 2(Two vector units) *
|
||||
* 2(If cpu has fma) *
|
||||
* 16(If AVX512), 8(If AVX), 4(If SSE) *
|
||||
*/
|
||||
|
||||
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
|
||||
/***
|
||||
PP = PeakPerformance
|
||||
SP = SinglePrecision
|
||||
|
||||
PP(SP) =
|
||||
N_CORES *
|
||||
FREQUENCY *
|
||||
2(Two vector units) *
|
||||
2(If cpu has fma) *
|
||||
16(If AVX512), 8(If AVX), 4(If SSE) *
|
||||
|
||||
***/
|
||||
|
||||
//7 for GFLOP/s and 6 for digits,eg 412.14
|
||||
uint32_t size = 7+6+1+1;
|
||||
assert(strlen(STRING_UNKNOWN)+1 <= size);
|
||||
char* string = emalloc(sizeof(char)*size);
|
||||
|
||||
//First check we have consistent data
|
||||
//First, check we have consistent data
|
||||
if(freq == UNKNOWN_FREQ) {
|
||||
snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN);
|
||||
return string;
|
||||
return false;
|
||||
}
|
||||
|
||||
struct features* feat = cpu->feat;
|
||||
double flops = topo->physical_cores * topo->sockets * (freq*1000000);
|
||||
int vpus = get_number_of_vpus(cpu);
|
||||
|
||||
flops = flops * vpus;
|
||||
*flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
|
||||
|
||||
if(feat->FMA3 || feat->FMA4)
|
||||
flops = flops*2;
|
||||
*flops = *flops*2;
|
||||
|
||||
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while
|
||||
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
|
||||
// the peak performance supposing AVX2, not AVX512
|
||||
if(feat->AVX512 && vpus_are_AVX512(cpu))
|
||||
flops = flops*16;
|
||||
*flops = *flops*16;
|
||||
else if(feat->AVX || feat->AVX2)
|
||||
flops = flops*8;
|
||||
*flops = *flops*8;
|
||||
else if(feat->SSE)
|
||||
flops = flops*4;
|
||||
*flops = *flops*4;
|
||||
|
||||
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
|
||||
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
|
||||
if(is_knights_landing(cpu))
|
||||
flops = flops * 6 / 7;
|
||||
*flops = *flops * 6 / 7;
|
||||
|
||||
if(flops >= (double)1000000000000.0)
|
||||
snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000);
|
||||
else if(flops >= 1000000000.0)
|
||||
snprintf(string,size,"%.2f GFLOP/s",flops/1000000000);
|
||||
else
|
||||
snprintf(string,size,"%.2f MFLOP/s",flops/1000000);
|
||||
|
||||
return string;
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: Refactoring
|
||||
|
||||
@@ -12,7 +12,8 @@ char* get_str_avx(struct cpuInfo* cpu);
|
||||
char* get_str_sse(struct cpuInfo* cpu);
|
||||
char* get_str_fma(struct cpuInfo* cpu);
|
||||
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket);
|
||||
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq);
|
||||
|
||||
bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops);
|
||||
|
||||
void print_debug(struct cpuInfo* cpu);
|
||||
void print_raw(struct cpuInfo* cpu);
|
||||
|
||||
Reference in New Issue
Block a user