From b1f3196e0d088d7b75e2ee12c0266acf737fd5af Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Tue, 1 Dec 2020 12:14:24 +0100 Subject: [PATCH] [v0.94] Refactor CPU features in a separate struct. Remove x86 debug functions --- src/arm/midr.c | 54 ++++++++++++---------- src/common/cpu.h | 11 +++-- src/x86/cpuid.c | 118 +++++++++++++++-------------------------------- src/x86/cpuid.h | 3 -- 4 files changed, 74 insertions(+), 112 deletions(-) diff --git a/src/arm/midr.c b/src/arm/midr.c index 6eedf0d..0a0168a 100644 --- a/src/arm/midr.c +++ b/src/arm/midr.c @@ -119,35 +119,36 @@ uint32_t fill_ids_from_midr(uint32_t* midr_array, int32_t* freq_array, uint32_t* } void init_cpu_info(struct cpuInfo* cpu) { - cpu->NEON = false; - cpu->AES = false; - cpu->SHA1 = false; - cpu->SHA2 = false; - cpu->CRC32 = false; cpu->next_cpu = NULL; } // We assume all cpus share the same hardware // capabilities but I'm not sure it is always // true... -void fill_cpu_features(struct cpuInfo* cpu) { +struct features* get_features_info() { + struct features* feat = malloc(sizeof(struct features)); + bool *ptr = &(feat->AES); + for(int i = 0; i < sizeof(struct features)/sizeof(bool); i++, *ptr++) { + *ptr = false; + } + errno = 0; long hwcaps = getauxval(AT_HWCAP); if(errno == ENOENT) { - printWarn("Unable to retrieve AT_HWCAP using getauxval"); + printWarn("Unable to retrieve AT_HWCAP using getauxval"); } #ifdef __aarch64__ else { - cpu->AES = hwcaps & HWCAP_AES; - cpu->CRC32 = hwcaps & HWCAP_CRC32; - cpu->SHA1 = hwcaps & HWCAP_SHA1; - cpu->SHA2 = hwcaps & HWCAP_SHA2; - cpu->NEON = hwcaps & HWCAP_ASIMD; + feat->AES = hwcaps & HWCAP_AES; + feat->CRC32 = hwcaps & HWCAP_CRC32; + feat->SHA1 = hwcaps & HWCAP_SHA1; + feat->SHA2 = hwcaps & HWCAP_SHA2; + feat->NEON = hwcaps & HWCAP_ASIMD; } #else else { - cpu->NEON = hwcaps & HWCAP_NEON; + feat->NEON = hwcaps & HWCAP_NEON; } hwcaps = getauxval(AT_HWCAP2); @@ -155,12 +156,14 @@ void fill_cpu_features(struct cpuInfo* cpu) { printWarn("Unable to retrieve AT_HWCAP2 using getauxval"); } else { - cpu->AES = hwcaps & HWCAP2_AES; - cpu->CRC32 = hwcaps & HWCAP2_CRC32; - cpu->SHA1 = hwcaps & HWCAP2_SHA1; - cpu->SHA2 = hwcaps & HWCAP2_SHA2; + feat->AES = hwcaps & HWCAP2_AES; + feat->CRC32 = hwcaps & HWCAP2_CRC32; + feat->SHA1 = hwcaps & HWCAP2_SHA1; + feat->SHA2 = hwcaps & HWCAP2_SHA2; } #endif + + return feat; } struct cpuInfo* get_cpu_info() { @@ -188,7 +191,6 @@ struct cpuInfo* get_cpu_info() { } } uint32_t sockets = fill_ids_from_midr(midr_array, freq_array, ids_array, ncores); - fill_cpu_features(cpu); struct cpuInfo* ptr = cpu; int midr_idx = 0; @@ -207,6 +209,7 @@ struct cpuInfo* get_cpu_info() { ptr->midr = midr_array[midr_idx]; ptr->arch = get_uarch_from_midr(ptr->midr, ptr); + ptr->feat = get_features_info(); ptr->freq = get_frequency_info(midr_idx); ptr->cach = get_cache_info(ptr); ptr->topo = get_topology_info(ptr, ptr->cach, midr_array, i, ncores); @@ -249,7 +252,7 @@ char* get_str_peak_performance(struct cpuInfo* cpu) { for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000); } - if(cpu->NEON) flops = flops * 4; + if(cpu->feat->NEON) flops = flops * 4; if(flops >= (double)1000000000000.0) snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000); @@ -262,26 +265,27 @@ char* get_str_peak_performance(struct cpuInfo* cpu) { } char* get_str_features(struct cpuInfo* cpu) { - char* string = malloc(sizeof(char) * 100); //TODO: Fix + struct features* feat = cpu->feat; + char* string = malloc(sizeof(char) * 25); uint32_t len = 0; - if(cpu->NEON) { + if(feat->NEON) { strcat(string, "NEON,"); len += 5; } - if(cpu->SHA1) { + if(feat->SHA1) { strcat(string, "SHA1,"); len += 5; } - if(cpu->SHA2) { + if(feat->SHA2) { strcat(string, "SHA2,"); len += 5; } - if(cpu->AES) { + if(feat->AES) { strcat(string, "AES,"); len += 4; } - if(cpu->CRC32) { + if(feat->CRC32) { strcat(string, "CRC32,"); len += 6; } diff --git a/src/common/cpu.h b/src/common/cpu.h index 9bffea2..77507f1 100644 --- a/src/common/cpu.h +++ b/src/common/cpu.h @@ -79,7 +79,8 @@ struct topology { #endif }; -struct cpuInfo { +struct features { + bool AES; // Must be the first field of features struct! #ifdef ARCH_X86 bool AVX; bool AVX2; @@ -93,21 +94,23 @@ struct cpuInfo { bool SSE4_2; bool FMA3; bool FMA4; - bool SHA; + bool SHA; #elif ARCH_ARM bool NEON; bool SHA1; bool SHA2; bool CRC32; -#endif - bool AES; +#endif +}; +struct cpuInfo { VENDOR cpu_vendor; struct uarch* arch; struct hypervisor* hv; struct frequency* freq; struct cache* cach; struct topology* topo; + struct features* feat; #ifdef ARCH_X86 // CPU name from model diff --git a/src/x86/cpuid.c b/src/x86/cpuid.c index 7fe9851..02624e8 100755 --- a/src/x86/cpuid.c +++ b/src/x86/cpuid.c @@ -51,23 +51,6 @@ static char *hv_vendors_name[] = { * cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf */ -void init_cpu_info(struct cpuInfo* cpu) { - cpu->AVX = false; - cpu->AVX2 = false; - cpu->AVX512 = false; - cpu->SSE = false; - cpu->SSE2 = false; - cpu->SSE3 = false; - cpu->SSSE3 = false; - cpu->SSE4a = false; - cpu->SSE4_1 = false; - cpu->SSE4_2 = false; - cpu->FMA3 = false; - cpu->FMA4 = false; - cpu->AES = false; - cpu->SHA = false; -} - void init_topology_struct(struct topology* topo, struct cache* cach) { topo->total_cores = 0; topo->physical_cores = 0; @@ -224,7 +207,13 @@ struct hypervisor* get_hp_info(bool hv_present) { struct cpuInfo* get_cpu_info() { struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo)); - init_cpu_info(cpu); + struct features* feat = malloc(sizeof(struct features)); + cpu->feat = feat; + + bool *ptr = &(feat->AES); + for(int i = 0; i < sizeof(struct features)/sizeof(bool); i++, *ptr++) { + *ptr = false; + } uint32_t eax = 0; uint32_t ebx = 0; @@ -262,18 +251,18 @@ struct cpuInfo* get_cpu_info() { if (cpu->maxLevels >= 0x00000001){ eax = 0x00000001; cpuid(&eax, &ebx, &ecx, &edx); - cpu->SSE = (edx & ((int)1 << 25)) != 0; - cpu->SSE2 = (edx & ((int)1 << 26)) != 0; - cpu->SSE3 = (ecx & ((int)1 << 0)) != 0; + feat->SSE = (edx & ((int)1 << 25)) != 0; + feat->SSE2 = (edx & ((int)1 << 26)) != 0; + feat->SSE3 = (ecx & ((int)1 << 0)) != 0; - cpu->SSSE3 = (ecx & ((int)1 << 9)) != 0; - cpu->SSE4_1 = (ecx & ((int)1 << 19)) != 0; - cpu->SSE4_2 = (ecx & ((int)1 << 20)) != 0; + feat->SSSE3 = (ecx & ((int)1 << 9)) != 0; + feat->SSE4_1 = (ecx & ((int)1 << 19)) != 0; + feat->SSE4_2 = (ecx & ((int)1 << 20)) != 0; - cpu->AES = (ecx & ((int)1 << 25)) != 0; + feat->AES = (ecx & ((int)1 << 25)) != 0; - cpu->AVX = (ecx & ((int)1 << 28)) != 0; - cpu->FMA3 = (ecx & ((int)1 << 12)) != 0; + feat->AVX = (ecx & ((int)1 << 28)) != 0; + feat->FMA3 = (ecx & ((int)1 << 12)) != 0; bool hv_present = (ecx & ((int)1 << 31)) != 0; if((cpu->hv = get_hp_info(hv_present)) == NULL) @@ -287,9 +276,9 @@ struct cpuInfo* get_cpu_info() { eax = 0x00000007; ecx = 0x00000000; cpuid(&eax, &ebx, &ecx, &edx); - cpu->AVX2 = (ebx & ((int)1 << 5)) != 0; - cpu->SHA = (ebx & ((int)1 << 29)) != 0; - cpu->AVX512 = (((ebx & ((int)1 << 16)) != 0) || + feat->AVX2 = (ebx & ((int)1 << 5)) != 0; + feat->SHA = (ebx & ((int)1 << 29)) != 0; + feat->AVX512 = (((ebx & ((int)1 << 16)) != 0) || ((ebx & ((int)1 << 28)) != 0) || ((ebx & ((int)1 << 26)) != 0) || ((ebx & ((int)1 << 27)) != 0) || @@ -305,8 +294,8 @@ struct cpuInfo* get_cpu_info() { if (cpu->maxExtendedLevels >= 0x80000001){ eax = 0x80000001; cpuid(&eax, &ebx, &ecx, &edx); - cpu->SSE4a = (ecx & ((int)1 << 6)) != 0; - cpu->FMA4 = (ecx & ((int)1 << 16)) != 0; + feat->SSE4a = (ecx & ((int)1 << 6)) != 0; + feat->FMA4 = (ecx & ((int)1 << 16)) != 0; } else { printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels); @@ -732,22 +721,23 @@ char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64 return string; } + struct features* feat = cpu->feat; double flops = topo->physical_cores * topo->sockets * (freq*1000000); int vpus = get_number_of_vpus(cpu); flops = flops * vpus; - if(cpu->FMA3 || cpu->FMA4) + if(feat->FMA3 || feat->FMA4) flops = flops*2; // Ice Lake has AVX512, but it has 1 VPU for AVX512, while // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing // the peak performance supposing AVX2, not AVX512 - if(cpu->AVX512 && vpus_are_AVX512(cpu)) + if(feat->AVX512 && vpus_are_AVX512(cpu)) flops = flops*16; - else if(cpu->AVX || cpu->AVX2) + else if(feat->AVX || feat->AVX2) flops = flops*8; - else if(cpu->SSE) + else if(feat->SSE) flops = flops*4; // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar- @@ -807,11 +797,11 @@ char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_soc char* get_str_avx(struct cpuInfo* cpu) { //If all AVX are available, it will use up to 15 char* string = malloc(sizeof(char)*17+1); - if(!cpu->AVX) + if(!cpu->feat->AVX) snprintf(string,2+1,"No"); - else if(!cpu->AVX2) + else if(!cpu->feat->AVX2) snprintf(string,3+1,"AVX"); - else if(!cpu->AVX512) + else if(!cpu->feat->AVX512) snprintf(string,8+1,"AVX,AVX2"); else snprintf(string,15+1,"AVX,AVX2,AVX512"); @@ -830,31 +820,31 @@ char* get_str_sse(struct cpuInfo* cpu) { uint32_t SSE4_2_sl = 7; char* string = malloc(sizeof(char)*SSE_sl+SSE2_sl+SSE3_sl+SSSE3_sl+SSE4a_sl+SSE4_1_sl+SSE4_2_sl+1); - if(cpu->SSE) { + if(cpu->feat->SSE) { snprintf(string+last,SSE_sl+1,"SSE,"); last+=SSE_sl; } - if(cpu->SSE2) { + if(cpu->feat->SSE2) { snprintf(string+last,SSE2_sl+1,"SSE2,"); last+=SSE2_sl; } - if(cpu->SSE3) { + if(cpu->feat->SSE3) { snprintf(string+last,SSE3_sl+1,"SSE3,"); last+=SSE3_sl; } - if(cpu->SSSE3) { + if(cpu->feat->SSSE3) { snprintf(string+last,SSSE3_sl+1,"SSSE3,"); last+=SSSE3_sl; } - if(cpu->SSE4a) { + if(cpu->feat->SSE4a) { snprintf(string+last,SSE4a_sl+1,"SSE4a,"); last+=SSE4a_sl; } - if(cpu->SSE4_1) { + if(cpu->feat->SSE4_1) { snprintf(string+last,SSE4_1_sl+1,"SSE4.1,"); last+=SSE4_1_sl; } - if(cpu->SSE4_2) { + if(cpu->feat->SSE4_2) { snprintf(string+last,SSE4_2_sl+1,"SSE4.2,"); last+=SSE4_2_sl; } @@ -866,9 +856,9 @@ char* get_str_sse(struct cpuInfo* cpu) { char* get_str_fma(struct cpuInfo* cpu) { char* string = malloc(sizeof(char)*9+1); - if(!cpu->FMA3) + if(!cpu->feat->FMA3) snprintf(string,2+1,"No"); - else if(!cpu->FMA4) + else if(!cpu->feat->FMA4) snprintf(string,4+1,"FMA3"); else snprintf(string,9+1,"FMA3,FMA4"); @@ -886,38 +876,6 @@ void print_debug(struct cpuInfo* cpu) { free_cpuinfo_struct(cpu); } -void debug_cpu_info(struct cpuInfo* cpu) { - printf("AVX=%s\n", cpu->AVX ? "true" : "false"); - printf("AVX2=%s\n", cpu->AVX2 ? "true" : "false"); - printf("AVX512=%s\n\n", cpu->AVX512 ? "true" : "false"); - - printf("SSE=%s\n", cpu->SSE ? "true" : "false"); - printf("SSE2=%s\n", cpu->SSE2 ? "true" : "false"); - printf("SSE3=%s\n", cpu->SSE3 ? "true" : "false"); - printf("SSSE3=%s\n", cpu->SSSE3 ? "true" : "false"); - printf("SSE4a=%s\n", cpu->SSE4a ? "true" : "false"); - printf("SSE4_1=%s\n", cpu->SSE4_1 ? "true" : "false"); - printf("SSE4_2=%s\n\n", cpu->SSE4_2 ? "true" : "false"); - - printf("FMA3=%s\n", cpu->FMA3 ? "true" : "false"); - printf("FMA4=%s\n\n", cpu->FMA4 ? "true" : "false"); - - printf("AES=%s\n", cpu->AES ? "true" : "false"); - printf("SHA=%s\n", cpu->SHA ? "true" : "false"); -} - -void debug_cache(struct cache* cach) { - printf("L1i=%dB\n",cach->L1i->size); - printf("L1d=%dB\n",cach->L1d->size); - printf("L2=%dB\n",cach->L2->size); - printf("L3=%dB\n",cach->L3->size); -} - -void debug_frequency(struct frequency* freq) { - printf("maxf=%d Mhz\n",freq->max); - printf("basef=%d Mhz\n",freq->base); -} - void free_topo_struct(struct topology* topo) { free(topo->apic->cache_select_mask); free(topo->apic->cache_id_apic); diff --git a/src/x86/cpuid.h b/src/x86/cpuid.h index d2be947..4025f64 100644 --- a/src/x86/cpuid.h +++ b/src/x86/cpuid.h @@ -15,9 +15,6 @@ char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_soc char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq); void print_debug(struct cpuInfo* cpu); -void debug_cpu_info(struct cpuInfo* cpu); -void debug_cache(struct cache* cach); -void debug_frequency(struct frequency* freq); void free_topo_struct(struct topology* topo);