[v0.94] Refactor CPU features in a separate struct. Remove x86 debug functions

This commit is contained in:
Dr-Noob
2020-12-01 12:14:24 +01:00
parent d04d535807
commit b1f3196e0d
4 changed files with 74 additions and 112 deletions

View File

@@ -119,18 +119,19 @@ uint32_t fill_ids_from_midr(uint32_t* midr_array, int32_t* freq_array, uint32_t*
} }
void init_cpu_info(struct cpuInfo* cpu) { void init_cpu_info(struct cpuInfo* cpu) {
cpu->NEON = false;
cpu->AES = false;
cpu->SHA1 = false;
cpu->SHA2 = false;
cpu->CRC32 = false;
cpu->next_cpu = NULL; cpu->next_cpu = NULL;
} }
// We assume all cpus share the same hardware // We assume all cpus share the same hardware
// capabilities but I'm not sure it is always // capabilities but I'm not sure it is always
// true... // true...
void fill_cpu_features(struct cpuInfo* cpu) { struct features* get_features_info() {
struct features* feat = malloc(sizeof(struct features));
bool *ptr = &(feat->AES);
for(int i = 0; i < sizeof(struct features)/sizeof(bool); i++, *ptr++) {
*ptr = false;
}
errno = 0; errno = 0;
long hwcaps = getauxval(AT_HWCAP); long hwcaps = getauxval(AT_HWCAP);
@@ -139,15 +140,15 @@ void fill_cpu_features(struct cpuInfo* cpu) {
} }
#ifdef __aarch64__ #ifdef __aarch64__
else { else {
cpu->AES = hwcaps & HWCAP_AES; feat->AES = hwcaps & HWCAP_AES;
cpu->CRC32 = hwcaps & HWCAP_CRC32; feat->CRC32 = hwcaps & HWCAP_CRC32;
cpu->SHA1 = hwcaps & HWCAP_SHA1; feat->SHA1 = hwcaps & HWCAP_SHA1;
cpu->SHA2 = hwcaps & HWCAP_SHA2; feat->SHA2 = hwcaps & HWCAP_SHA2;
cpu->NEON = hwcaps & HWCAP_ASIMD; feat->NEON = hwcaps & HWCAP_ASIMD;
} }
#else #else
else { else {
cpu->NEON = hwcaps & HWCAP_NEON; feat->NEON = hwcaps & HWCAP_NEON;
} }
hwcaps = getauxval(AT_HWCAP2); hwcaps = getauxval(AT_HWCAP2);
@@ -155,12 +156,14 @@ void fill_cpu_features(struct cpuInfo* cpu) {
printWarn("Unable to retrieve AT_HWCAP2 using getauxval"); printWarn("Unable to retrieve AT_HWCAP2 using getauxval");
} }
else { else {
cpu->AES = hwcaps & HWCAP2_AES; feat->AES = hwcaps & HWCAP2_AES;
cpu->CRC32 = hwcaps & HWCAP2_CRC32; feat->CRC32 = hwcaps & HWCAP2_CRC32;
cpu->SHA1 = hwcaps & HWCAP2_SHA1; feat->SHA1 = hwcaps & HWCAP2_SHA1;
cpu->SHA2 = hwcaps & HWCAP2_SHA2; feat->SHA2 = hwcaps & HWCAP2_SHA2;
} }
#endif #endif
return feat;
} }
struct cpuInfo* get_cpu_info() { struct cpuInfo* get_cpu_info() {
@@ -188,7 +191,6 @@ struct cpuInfo* get_cpu_info() {
} }
} }
uint32_t sockets = fill_ids_from_midr(midr_array, freq_array, ids_array, ncores); uint32_t sockets = fill_ids_from_midr(midr_array, freq_array, ids_array, ncores);
fill_cpu_features(cpu);
struct cpuInfo* ptr = cpu; struct cpuInfo* ptr = cpu;
int midr_idx = 0; int midr_idx = 0;
@@ -207,6 +209,7 @@ struct cpuInfo* get_cpu_info() {
ptr->midr = midr_array[midr_idx]; ptr->midr = midr_array[midr_idx];
ptr->arch = get_uarch_from_midr(ptr->midr, ptr); ptr->arch = get_uarch_from_midr(ptr->midr, ptr);
ptr->feat = get_features_info();
ptr->freq = get_frequency_info(midr_idx); ptr->freq = get_frequency_info(midr_idx);
ptr->cach = get_cache_info(ptr); ptr->cach = get_cache_info(ptr);
ptr->topo = get_topology_info(ptr, ptr->cach, midr_array, i, ncores); ptr->topo = get_topology_info(ptr, ptr->cach, midr_array, i, ncores);
@@ -249,7 +252,7 @@ char* get_str_peak_performance(struct cpuInfo* cpu) {
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000); flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
} }
if(cpu->NEON) flops = flops * 4; if(cpu->feat->NEON) flops = flops * 4;
if(flops >= (double)1000000000000.0) if(flops >= (double)1000000000000.0)
snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000); snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000);
@@ -262,26 +265,27 @@ char* get_str_peak_performance(struct cpuInfo* cpu) {
} }
char* get_str_features(struct cpuInfo* cpu) { char* get_str_features(struct cpuInfo* cpu) {
char* string = malloc(sizeof(char) * 100); //TODO: Fix struct features* feat = cpu->feat;
char* string = malloc(sizeof(char) * 25);
uint32_t len = 0; uint32_t len = 0;
if(cpu->NEON) { if(feat->NEON) {
strcat(string, "NEON,"); strcat(string, "NEON,");
len += 5; len += 5;
} }
if(cpu->SHA1) { if(feat->SHA1) {
strcat(string, "SHA1,"); strcat(string, "SHA1,");
len += 5; len += 5;
} }
if(cpu->SHA2) { if(feat->SHA2) {
strcat(string, "SHA2,"); strcat(string, "SHA2,");
len += 5; len += 5;
} }
if(cpu->AES) { if(feat->AES) {
strcat(string, "AES,"); strcat(string, "AES,");
len += 4; len += 4;
} }
if(cpu->CRC32) { if(feat->CRC32) {
strcat(string, "CRC32,"); strcat(string, "CRC32,");
len += 6; len += 6;
} }

View File

@@ -79,7 +79,8 @@ struct topology {
#endif #endif
}; };
struct cpuInfo { struct features {
bool AES; // Must be the first field of features struct!
#ifdef ARCH_X86 #ifdef ARCH_X86
bool AVX; bool AVX;
bool AVX2; bool AVX2;
@@ -100,14 +101,16 @@ struct cpuInfo {
bool SHA2; bool SHA2;
bool CRC32; bool CRC32;
#endif #endif
bool AES; };
struct cpuInfo {
VENDOR cpu_vendor; VENDOR cpu_vendor;
struct uarch* arch; struct uarch* arch;
struct hypervisor* hv; struct hypervisor* hv;
struct frequency* freq; struct frequency* freq;
struct cache* cach; struct cache* cach;
struct topology* topo; struct topology* topo;
struct features* feat;
#ifdef ARCH_X86 #ifdef ARCH_X86
// CPU name from model // CPU name from model

View File

@@ -51,23 +51,6 @@ static char *hv_vendors_name[] = {
* cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf * cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf
*/ */
void init_cpu_info(struct cpuInfo* cpu) {
cpu->AVX = false;
cpu->AVX2 = false;
cpu->AVX512 = false;
cpu->SSE = false;
cpu->SSE2 = false;
cpu->SSE3 = false;
cpu->SSSE3 = false;
cpu->SSE4a = false;
cpu->SSE4_1 = false;
cpu->SSE4_2 = false;
cpu->FMA3 = false;
cpu->FMA4 = false;
cpu->AES = false;
cpu->SHA = false;
}
void init_topology_struct(struct topology* topo, struct cache* cach) { void init_topology_struct(struct topology* topo, struct cache* cach) {
topo->total_cores = 0; topo->total_cores = 0;
topo->physical_cores = 0; topo->physical_cores = 0;
@@ -224,7 +207,13 @@ struct hypervisor* get_hp_info(bool hv_present) {
struct cpuInfo* get_cpu_info() { struct cpuInfo* get_cpu_info() {
struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo)); struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo));
init_cpu_info(cpu); struct features* feat = malloc(sizeof(struct features));
cpu->feat = feat;
bool *ptr = &(feat->AES);
for(int i = 0; i < sizeof(struct features)/sizeof(bool); i++, *ptr++) {
*ptr = false;
}
uint32_t eax = 0; uint32_t eax = 0;
uint32_t ebx = 0; uint32_t ebx = 0;
@@ -262,18 +251,18 @@ struct cpuInfo* get_cpu_info() {
if (cpu->maxLevels >= 0x00000001){ if (cpu->maxLevels >= 0x00000001){
eax = 0x00000001; eax = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx); cpuid(&eax, &ebx, &ecx, &edx);
cpu->SSE = (edx & ((int)1 << 25)) != 0; feat->SSE = (edx & ((int)1 << 25)) != 0;
cpu->SSE2 = (edx & ((int)1 << 26)) != 0; feat->SSE2 = (edx & ((int)1 << 26)) != 0;
cpu->SSE3 = (ecx & ((int)1 << 0)) != 0; feat->SSE3 = (ecx & ((int)1 << 0)) != 0;
cpu->SSSE3 = (ecx & ((int)1 << 9)) != 0; feat->SSSE3 = (ecx & ((int)1 << 9)) != 0;
cpu->SSE4_1 = (ecx & ((int)1 << 19)) != 0; feat->SSE4_1 = (ecx & ((int)1 << 19)) != 0;
cpu->SSE4_2 = (ecx & ((int)1 << 20)) != 0; feat->SSE4_2 = (ecx & ((int)1 << 20)) != 0;
cpu->AES = (ecx & ((int)1 << 25)) != 0; feat->AES = (ecx & ((int)1 << 25)) != 0;
cpu->AVX = (ecx & ((int)1 << 28)) != 0; feat->AVX = (ecx & ((int)1 << 28)) != 0;
cpu->FMA3 = (ecx & ((int)1 << 12)) != 0; feat->FMA3 = (ecx & ((int)1 << 12)) != 0;
bool hv_present = (ecx & ((int)1 << 31)) != 0; bool hv_present = (ecx & ((int)1 << 31)) != 0;
if((cpu->hv = get_hp_info(hv_present)) == NULL) if((cpu->hv = get_hp_info(hv_present)) == NULL)
@@ -287,9 +276,9 @@ struct cpuInfo* get_cpu_info() {
eax = 0x00000007; eax = 0x00000007;
ecx = 0x00000000; ecx = 0x00000000;
cpuid(&eax, &ebx, &ecx, &edx); cpuid(&eax, &ebx, &ecx, &edx);
cpu->AVX2 = (ebx & ((int)1 << 5)) != 0; feat->AVX2 = (ebx & ((int)1 << 5)) != 0;
cpu->SHA = (ebx & ((int)1 << 29)) != 0; feat->SHA = (ebx & ((int)1 << 29)) != 0;
cpu->AVX512 = (((ebx & ((int)1 << 16)) != 0) || feat->AVX512 = (((ebx & ((int)1 << 16)) != 0) ||
((ebx & ((int)1 << 28)) != 0) || ((ebx & ((int)1 << 28)) != 0) ||
((ebx & ((int)1 << 26)) != 0) || ((ebx & ((int)1 << 26)) != 0) ||
((ebx & ((int)1 << 27)) != 0) || ((ebx & ((int)1 << 27)) != 0) ||
@@ -305,8 +294,8 @@ struct cpuInfo* get_cpu_info() {
if (cpu->maxExtendedLevels >= 0x80000001){ if (cpu->maxExtendedLevels >= 0x80000001){
eax = 0x80000001; eax = 0x80000001;
cpuid(&eax, &ebx, &ecx, &edx); cpuid(&eax, &ebx, &ecx, &edx);
cpu->SSE4a = (ecx & ((int)1 << 6)) != 0; feat->SSE4a = (ecx & ((int)1 << 6)) != 0;
cpu->FMA4 = (ecx & ((int)1 << 16)) != 0; feat->FMA4 = (ecx & ((int)1 << 16)) != 0;
} }
else { else {
printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels); printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels);
@@ -732,22 +721,23 @@ char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64
return string; return string;
} }
struct features* feat = cpu->feat;
double flops = topo->physical_cores * topo->sockets * (freq*1000000); double flops = topo->physical_cores * topo->sockets * (freq*1000000);
int vpus = get_number_of_vpus(cpu); int vpus = get_number_of_vpus(cpu);
flops = flops * vpus; flops = flops * vpus;
if(cpu->FMA3 || cpu->FMA4) if(feat->FMA3 || feat->FMA4)
flops = flops*2; flops = flops*2;
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while // Ice Lake has AVX512, but it has 1 VPU for AVX512, while
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
// the peak performance supposing AVX2, not AVX512 // the peak performance supposing AVX2, not AVX512
if(cpu->AVX512 && vpus_are_AVX512(cpu)) if(feat->AVX512 && vpus_are_AVX512(cpu))
flops = flops*16; flops = flops*16;
else if(cpu->AVX || cpu->AVX2) else if(feat->AVX || feat->AVX2)
flops = flops*8; flops = flops*8;
else if(cpu->SSE) else if(feat->SSE)
flops = flops*4; flops = flops*4;
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar- // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
@@ -807,11 +797,11 @@ char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_soc
char* get_str_avx(struct cpuInfo* cpu) { char* get_str_avx(struct cpuInfo* cpu) {
//If all AVX are available, it will use up to 15 //If all AVX are available, it will use up to 15
char* string = malloc(sizeof(char)*17+1); char* string = malloc(sizeof(char)*17+1);
if(!cpu->AVX) if(!cpu->feat->AVX)
snprintf(string,2+1,"No"); snprintf(string,2+1,"No");
else if(!cpu->AVX2) else if(!cpu->feat->AVX2)
snprintf(string,3+1,"AVX"); snprintf(string,3+1,"AVX");
else if(!cpu->AVX512) else if(!cpu->feat->AVX512)
snprintf(string,8+1,"AVX,AVX2"); snprintf(string,8+1,"AVX,AVX2");
else else
snprintf(string,15+1,"AVX,AVX2,AVX512"); snprintf(string,15+1,"AVX,AVX2,AVX512");
@@ -830,31 +820,31 @@ char* get_str_sse(struct cpuInfo* cpu) {
uint32_t SSE4_2_sl = 7; uint32_t SSE4_2_sl = 7;
char* string = malloc(sizeof(char)*SSE_sl+SSE2_sl+SSE3_sl+SSSE3_sl+SSE4a_sl+SSE4_1_sl+SSE4_2_sl+1); char* string = malloc(sizeof(char)*SSE_sl+SSE2_sl+SSE3_sl+SSSE3_sl+SSE4a_sl+SSE4_1_sl+SSE4_2_sl+1);
if(cpu->SSE) { if(cpu->feat->SSE) {
snprintf(string+last,SSE_sl+1,"SSE,"); snprintf(string+last,SSE_sl+1,"SSE,");
last+=SSE_sl; last+=SSE_sl;
} }
if(cpu->SSE2) { if(cpu->feat->SSE2) {
snprintf(string+last,SSE2_sl+1,"SSE2,"); snprintf(string+last,SSE2_sl+1,"SSE2,");
last+=SSE2_sl; last+=SSE2_sl;
} }
if(cpu->SSE3) { if(cpu->feat->SSE3) {
snprintf(string+last,SSE3_sl+1,"SSE3,"); snprintf(string+last,SSE3_sl+1,"SSE3,");
last+=SSE3_sl; last+=SSE3_sl;
} }
if(cpu->SSSE3) { if(cpu->feat->SSSE3) {
snprintf(string+last,SSSE3_sl+1,"SSSE3,"); snprintf(string+last,SSSE3_sl+1,"SSSE3,");
last+=SSSE3_sl; last+=SSSE3_sl;
} }
if(cpu->SSE4a) { if(cpu->feat->SSE4a) {
snprintf(string+last,SSE4a_sl+1,"SSE4a,"); snprintf(string+last,SSE4a_sl+1,"SSE4a,");
last+=SSE4a_sl; last+=SSE4a_sl;
} }
if(cpu->SSE4_1) { if(cpu->feat->SSE4_1) {
snprintf(string+last,SSE4_1_sl+1,"SSE4.1,"); snprintf(string+last,SSE4_1_sl+1,"SSE4.1,");
last+=SSE4_1_sl; last+=SSE4_1_sl;
} }
if(cpu->SSE4_2) { if(cpu->feat->SSE4_2) {
snprintf(string+last,SSE4_2_sl+1,"SSE4.2,"); snprintf(string+last,SSE4_2_sl+1,"SSE4.2,");
last+=SSE4_2_sl; last+=SSE4_2_sl;
} }
@@ -866,9 +856,9 @@ char* get_str_sse(struct cpuInfo* cpu) {
char* get_str_fma(struct cpuInfo* cpu) { char* get_str_fma(struct cpuInfo* cpu) {
char* string = malloc(sizeof(char)*9+1); char* string = malloc(sizeof(char)*9+1);
if(!cpu->FMA3) if(!cpu->feat->FMA3)
snprintf(string,2+1,"No"); snprintf(string,2+1,"No");
else if(!cpu->FMA4) else if(!cpu->feat->FMA4)
snprintf(string,4+1,"FMA3"); snprintf(string,4+1,"FMA3");
else else
snprintf(string,9+1,"FMA3,FMA4"); snprintf(string,9+1,"FMA3,FMA4");
@@ -886,38 +876,6 @@ void print_debug(struct cpuInfo* cpu) {
free_cpuinfo_struct(cpu); free_cpuinfo_struct(cpu);
} }
void debug_cpu_info(struct cpuInfo* cpu) {
printf("AVX=%s\n", cpu->AVX ? "true" : "false");
printf("AVX2=%s\n", cpu->AVX2 ? "true" : "false");
printf("AVX512=%s\n\n", cpu->AVX512 ? "true" : "false");
printf("SSE=%s\n", cpu->SSE ? "true" : "false");
printf("SSE2=%s\n", cpu->SSE2 ? "true" : "false");
printf("SSE3=%s\n", cpu->SSE3 ? "true" : "false");
printf("SSSE3=%s\n", cpu->SSSE3 ? "true" : "false");
printf("SSE4a=%s\n", cpu->SSE4a ? "true" : "false");
printf("SSE4_1=%s\n", cpu->SSE4_1 ? "true" : "false");
printf("SSE4_2=%s\n\n", cpu->SSE4_2 ? "true" : "false");
printf("FMA3=%s\n", cpu->FMA3 ? "true" : "false");
printf("FMA4=%s\n\n", cpu->FMA4 ? "true" : "false");
printf("AES=%s\n", cpu->AES ? "true" : "false");
printf("SHA=%s\n", cpu->SHA ? "true" : "false");
}
void debug_cache(struct cache* cach) {
printf("L1i=%dB\n",cach->L1i->size);
printf("L1d=%dB\n",cach->L1d->size);
printf("L2=%dB\n",cach->L2->size);
printf("L3=%dB\n",cach->L3->size);
}
void debug_frequency(struct frequency* freq) {
printf("maxf=%d Mhz\n",freq->max);
printf("basef=%d Mhz\n",freq->base);
}
void free_topo_struct(struct topology* topo) { void free_topo_struct(struct topology* topo) {
free(topo->apic->cache_select_mask); free(topo->apic->cache_select_mask);
free(topo->apic->cache_id_apic); free(topo->apic->cache_id_apic);

View File

@@ -15,9 +15,6 @@ char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_soc
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq); char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq);
void print_debug(struct cpuInfo* cpu); void print_debug(struct cpuInfo* cpu);
void debug_cpu_info(struct cpuInfo* cpu);
void debug_cache(struct cache* cach);
void debug_frequency(struct frequency* freq);
void free_topo_struct(struct topology* topo); void free_topo_struct(struct topology* topo);