[v0.98][Refactoring] Use int for peak performance, which makes code cleaner

This commit is contained in:
Dr-Noob
2021-08-06 11:04:29 +02:00
parent 6953d8dda5
commit c24dd7cbb6
9 changed files with 106 additions and 115 deletions

View File

@@ -61,6 +61,27 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, uint
return topo; return topo;
} }
int64_t get_peak_performance(struct cpuInfo* cpu) {
struct cpuInfo* ptr = cpu;
//First check we have consistent data
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
if(get_freq(ptr->freq) == UNKNOWN_FREQ) {
return -1;
}
}
int64_t flops = 0;
ptr = cpu;
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
}
if(cpu->feat->NEON) flops = flops * 4;
return flops;
}
bool cores_are_equal(int c1pos, int c2pos, uint32_t* midr_array, int32_t* freq_array) { bool cores_are_equal(int c1pos, int c2pos, uint32_t* midr_array, int32_t* freq_array) {
return midr_array[c1pos] == midr_array[c2pos] && freq_array[c1pos] == freq_array[c2pos]; return midr_array[c1pos] == midr_array[c2pos] && freq_array[c1pos] == freq_array[c2pos];
} }
@@ -198,6 +219,7 @@ struct cpuInfo* get_cpu_info() {
cpu->hv = emalloc(sizeof(struct hypervisor)); cpu->hv = emalloc(sizeof(struct hypervisor));
cpu->hv->present = false; cpu->hv->present = false;
cpu->soc = get_soc(); cpu->soc = get_soc();
cpu->peak_performance = get_peak_performance(cpu);
return cpu; return cpu;
} }
@@ -210,27 +232,6 @@ char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_soc
return string; return string;
} }
bool get_peak_performance(struct cpuInfo* cpu, double* flops) {
struct cpuInfo* ptr = cpu;
//First check we have consistent data
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
if(get_freq(ptr->freq) == UNKNOWN_FREQ) {
return false;
}
}
*flops = 0.0;
ptr = cpu;
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
*flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
}
if(cpu->feat->NEON) *flops = *flops * 4;
return true;
}
char* get_str_features(struct cpuInfo* cpu) { char* get_str_features(struct cpuInfo* cpu) {
struct features* feat = cpu->feat; struct features* feat = cpu->feat;
char* string = emalloc(sizeof(char) * 25); char* string = emalloc(sizeof(char) * 25);

View File

@@ -9,8 +9,6 @@ uint32_t get_nsockets(struct topology* topo);
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket); char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket);
char* get_str_features(struct cpuInfo* cpu); char* get_str_features(struct cpuInfo* cpu);
bool get_peak_performance(struct cpuInfo* cpu, double* flops);
void print_debug(struct cpuInfo* cpu); void print_debug(struct cpuInfo* cpu);
void free_topo_struct(struct topology* topo); void free_topo_struct(struct topology* topo);

View File

@@ -151,24 +151,25 @@ char* get_str_freq(struct frequency* freq) {
return string; return string;
} }
char* get_str_peak_performance(double flops, bool valid_flops) { char* get_str_peak_performance(int64_t flops) {
char* str; char* str;
if(!valid_flops) { if(flops == -1) {
str = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1)); str = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1); strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
} }
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s // 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
double flopsd = (double) flops;
uint32_t max_size = 7+1+7+1; uint32_t max_size = 7+1+7+1;
str = ecalloc(max_size, sizeof(char)); str = ecalloc(max_size, sizeof(char));
if(flops >= (double)1000000000000.0) if(flopsd >= (double)1000000000000.0)
snprintf(str, max_size, "%.2f TFLOP/s", flops/1000000000000); snprintf(str, max_size, "%.2f TFLOP/s", flopsd/1000000000000);
else if(flops >= 1000000000.0) else if(flopsd >= 1000000000.0)
snprintf(str, max_size, "%.2f GFLOP/s", flops/1000000000); snprintf(str, max_size, "%.2f GFLOP/s", flopsd/1000000000);
else else
snprintf(str, max_size, "%.2f MFLOP/s", flops/1000000); snprintf(str, max_size, "%.2f MFLOP/s", flopsd/1000000);
return str; return str;
} }

View File

@@ -115,6 +115,7 @@ struct cpuInfo {
struct cache* cach; struct cache* cach;
struct topology* topo; struct topology* topo;
struct features* feat; struct features* feat;
int64_t peak_performance;
#if defined(ARCH_X86) || defined(ARCH_PPC) #if defined(ARCH_X86) || defined(ARCH_PPC)
// CPU name from model // CPU name from model
@@ -161,7 +162,7 @@ char* get_str_l1d(struct cache* cach);
char* get_str_l2(struct cache* cach); char* get_str_l2(struct cache* cach);
char* get_str_l3(struct cache* cach); char* get_str_l3(struct cache* cach);
char* get_str_freq(struct frequency* freq); char* get_str_freq(struct frequency* freq);
char* get_str_peak_performance(double flops, bool valid_pp); char* get_str_peak_performance(int64_t flops);
void init_topology_struct(struct topology* topo, struct cache* cach); void init_topology_struct(struct topology* topo, struct cache* cach);
void init_cache_struct(struct cache* cach); void init_cache_struct(struct cache* cach);

View File

@@ -446,9 +446,6 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
if(art == NULL) if(art == NULL)
return false; return false;
double flops;
bool valid_pp = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), &flops);
char* uarch = get_str_uarch(cpu); char* uarch = get_str_uarch(cpu);
char* manufacturing_process = get_str_process(cpu); char* manufacturing_process = get_str_process(cpu);
char* sockets = get_str_sockets(cpu->topo); char* sockets = get_str_sockets(cpu->topo);
@@ -463,7 +460,7 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
char* l1d = get_str_l1d(cpu->cach); char* l1d = get_str_l1d(cpu->cach);
char* l2 = get_str_l2(cpu->cach); char* l2 = get_str_l2(cpu->cach);
char* l3 = get_str_l3(cpu->cach); char* l3 = get_str_l3(cpu->cach);
char* pp = get_str_peak_performance(flops, valid_pp); char* pp = get_str_peak_performance(cpu->peak_performance);
setAttribute(art,ATTRIBUTE_NAME,cpu_name); setAttribute(art,ATTRIBUTE_NAME,cpu_name);
if(cpu->hv->present) { if(cpu->hv->present) {
@@ -570,9 +567,6 @@ bool print_cpufetch_ppc(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
if(art == NULL) if(art == NULL)
return false; return false;
double flops;
bool valid_pp = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), &flops);
char* uarch = get_str_uarch(cpu); char* uarch = get_str_uarch(cpu);
char* manufacturing_process = get_str_process(cpu); char* manufacturing_process = get_str_process(cpu);
char* sockets = get_str_sockets(cpu->topo); char* sockets = get_str_sockets(cpu->topo);
@@ -586,7 +580,7 @@ bool print_cpufetch_ppc(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
char* l1d = get_str_l1d(cpu->cach); char* l1d = get_str_l1d(cpu->cach);
char* l2 = get_str_l2(cpu->cach); char* l2 = get_str_l2(cpu->cach);
char* l3 = get_str_l3(cpu->cach); char* l3 = get_str_l3(cpu->cach);
char* pp = get_str_peak_performance(flops, valid_pp); char* pp = get_str_peak_performance(cpu->peak_performance);
if(cpu_name != NULL) { if(cpu_name != NULL) {
setAttribute(art,ATTRIBUTE_NAME,cpu_name); setAttribute(art,ATTRIBUTE_NAME,cpu_name);
@@ -792,9 +786,7 @@ bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
} }
} }
} }
double flops; char* pp = get_str_peak_performance(cpu->peak_performance);
bool valid_pp = get_peak_performance(cpu, &flops);
char* pp = get_str_peak_performance(flops, valid_pp);
setAttribute(art,ATTRIBUTE_PEAK,pp); setAttribute(art,ATTRIBUTE_PEAK,pp);
if(art->n_attributes_set > NUMBER_OF_LINES) { if(art->n_attributes_set > NUMBER_OF_LINES) {

View File

@@ -132,6 +132,32 @@ struct frequency* get_frequency_info() {
return freq; return freq;
} }
int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
/*
* Not sure about this
* PP(SP) = N_CORES * FREQUENCY * 4(If altivec)
*/
//First check we have consistent data
if(freq == UNKNOWN_FREQ) {
return -1;
}
struct features* feat = cpu->feat;
int64_t flops = topo->physical_cores * topo->sockets * (freq * 1000000);
if(feat->altivec) flops = flops * 4;
// POWER9 has the concept called "slices". Each SMT4 core has two super-slices,
// and each super-slice is capable of doing two FLOPS per cycle. In the case of
// SMT8, it has 4 super-slices, thus four FLOPS per cycle.
if(is_power9(cpu->arch)) {
int threads_per_core = topo->logical_cores / topo->physical_cores;
flops = flops * (threads_per_core / 2);
}
return flops;
}
struct cpuInfo* get_cpu_info() { struct cpuInfo* get_cpu_info() {
struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo)); struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
struct features* feat = emalloc(sizeof(struct features)); struct features* feat = emalloc(sizeof(struct features));
@@ -152,8 +178,8 @@ struct cpuInfo* get_cpu_info() {
cpu->freq = get_frequency_info(); cpu->freq = get_frequency_info();
cpu->topo = get_topology_info(cpu->cach); cpu->topo = get_topology_info(cpu->cach);
cpu->cach = get_cache_info(cpu); cpu->cach = get_cache_info(cpu);
feat->altivec = has_altivec(cpu->arch); feat->altivec = has_altivec(cpu->arch);
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq));
if(cpu->cach == NULL || cpu->topo == NULL) { if(cpu->cach == NULL || cpu->topo == NULL) {
return NULL; return NULL;
@@ -170,32 +196,6 @@ char* get_str_altivec(struct cpuInfo* cpu) {
return string; return string;
} }
bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops) {
/*
* Not sure about this
* PP(SP) = N_CORES * FREQUENCY * 4(If altivec)
*/
//First check we have consistent data
if(freq == UNKNOWN_FREQ) {
return false;
}
struct features* feat = cpu->feat;
*flops = topo->physical_cores * topo->sockets * (freq*1000000);
if(feat->altivec) *flops = *flops * 4;
// POWER9 has the concept called "slices". Each SMT4 core has two super-slices,
// and each super-slice is capable of doing two FLOPS per cycle. In the case of
// SMT8, it has 4 super-slices, thus four FLOPS per cycle.
if(is_power9(cpu->arch)) {
int threads_per_core = topo->logical_cores / topo->physical_cores;
*flops = *flops * (threads_per_core / 2);
}
return true;
}
char* get_str_topology(struct topology* topo, bool dual_socket) { char* get_str_topology(struct topology* topo, bool dual_socket) {
char* string; char* string;
if(topo->smt_supported > 1) { if(topo->smt_supported > 1) {

View File

@@ -6,7 +6,6 @@
struct cpuInfo* get_cpu_info(); struct cpuInfo* get_cpu_info();
char* get_str_altivec(struct cpuInfo* cpu); char* get_str_altivec(struct cpuInfo* cpu);
char* get_str_topology(struct topology* topo, bool dual_socket); char* get_str_topology(struct topology* topo, bool dual_socket);
bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops);
void print_debug(struct cpuInfo* cpu); void print_debug(struct cpuInfo* cpu);
#endif #endif

View File

@@ -133,6 +133,49 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
return get_uarch_from_cpuid(cpu, efamily, family, emodel, model, (int)stepping); return get_uarch_from_cpuid(cpu, efamily, family, emodel, model, (int)stepping);
} }
int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
/*
* PP = PeakPerformance
* SP = SinglePrecision
*
* PP(SP) =
* N_CORES *
* FREQUENCY *
* 2(Two vector units) *
* 2(If cpu has fma) *
* 16(If AVX512), 8(If AVX), 4(If SSE) *
*/
//First, check we have consistent data
if(freq == UNKNOWN_FREQ) {
return -1;
}
struct features* feat = cpu->feat;
int vpus = get_number_of_vpus(cpu);
int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
if(feat->FMA3 || feat->FMA4)
flops = flops*2;
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
// the peak performance supposing AVX2, not AVX512
if(feat->AVX512 && vpus_are_AVX512(cpu))
flops = flops*16;
else if(feat->AVX || feat->AVX2)
flops = flops*8;
else if(feat->SSE)
flops = flops*4;
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
if(is_knights_landing(cpu))
flops = flops * 6 / 7;
return flops;
}
struct hypervisor* get_hp_info(bool hv_present) { struct hypervisor* get_hp_info(bool hv_present) {
struct hypervisor* hv = emalloc(sizeof(struct hypervisor)); struct hypervisor* hv = emalloc(sizeof(struct hypervisor));
if(!hv_present) { if(!hv_present) {
@@ -289,6 +332,7 @@ struct cpuInfo* get_cpu_info() {
cpu->freq = get_frequency_info(cpu); cpu->freq = get_frequency_info(cpu);
cpu->cach = get_cache_info(cpu); cpu->cach = get_cache_info(cpu);
cpu->topo = get_topology_info(cpu, cpu->cach); cpu->topo = get_topology_info(cpu, cpu->cach);
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq));
if(cpu->cach == NULL || cpu->topo == NULL) { if(cpu->cach == NULL || cpu->topo == NULL) {
return NULL; return NULL;
@@ -656,49 +700,6 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
} }
// STRING FUNCTIONS // STRING FUNCTIONS
bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops) {
/*
* PP = PeakPerformance
* SP = SinglePrecision
*
* PP(SP) =
* N_CORES *
* FREQUENCY *
* 2(Two vector units) *
* 2(If cpu has fma) *
* 16(If AVX512), 8(If AVX), 4(If SSE) *
*/
//First, check we have consistent data
if(freq == UNKNOWN_FREQ) {
return false;
}
struct features* feat = cpu->feat;
int vpus = get_number_of_vpus(cpu);
*flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
if(feat->FMA3 || feat->FMA4)
*flops = *flops*2;
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
// the peak performance supposing AVX2, not AVX512
if(feat->AVX512 && vpus_are_AVX512(cpu))
*flops = *flops*16;
else if(feat->AVX || feat->AVX2)
*flops = *flops*8;
else if(feat->SSE)
*flops = *flops*4;
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
if(is_knights_landing(cpu))
*flops = *flops * 6 / 7;
return true;
}
// TODO: Refactoring // TODO: Refactoring
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket) { char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket) {
char* string; char* string;

View File

@@ -13,8 +13,6 @@ char* get_str_sse(struct cpuInfo* cpu);
char* get_str_fma(struct cpuInfo* cpu); char* get_str_fma(struct cpuInfo* cpu);
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket); char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket);
bool get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq, double* flops);
void print_debug(struct cpuInfo* cpu); void print_debug(struct cpuInfo* cpu);
void print_raw(struct cpuInfo* cpu); void print_raw(struct cpuInfo* cpu);