diff --git a/src/common/cpu.h b/src/common/cpu.h index f1623f6..f2fb41c 100644 --- a/src/common/cpu.h +++ b/src/common/cpu.h @@ -78,6 +78,7 @@ struct topology { uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled) #ifdef ARCH_X86 uint32_t smt_available; // Number of SMT that is currently enabled + int32_t total_cores_module; // Total cores in the current module (only makes sense in hybrid archs, like ADL) struct apic* apic; #endif #endif @@ -142,10 +143,13 @@ struct cpuInfo { #ifdef ARCH_ARM struct system_on_chip* soc; +#endif + +#if defined(ARCH_X86) || defined(ARCH_ARM) // If SoC contains more than one CPU and they // are different, the others will be stored in // the next_cpu field - struct cpuInfo* next_cpu; + struct cpuInfo* next_cpu; uint8_t num_cpus; #endif }; diff --git a/src/x86/apic.c b/src/x86/apic.c index 3e7a6d8..865a9e3 100644 --- a/src/x86/apic.c +++ b/src/x86/apic.c @@ -102,6 +102,59 @@ bool bind_to_cpu(int cpu_id) { } #endif +int get_total_cores_module(int total_cores, int module) { + int total_modules = 2; + int32_t current_module_idx = -1; + bool end = false; + int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules); + for(int i=0; i < total_modules; i++) core_types[i] = -1; + int cores_in_module = 0; + int i = 0; + + // Get the original mask to restore it later + cpu_set_t original_mask; + if(sched_getaffinity(0, sizeof(original_mask), &original_mask) == -1) { + printWarn("sched_getaffinity: %s", strerror(errno)); + return false; + } + + while(!end) { + if(!bind_to_cpu(i)) { + return -1; + } + uint32_t eax = 0x0000001A; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + int32_t core_type = eax >> 24 & 0xFF; + bool found = false; + + for(int j=0; j < total_modules && !found; j++) { + if(core_types[j] == core_type) found = true; + } + if(!found) { + current_module_idx++; + core_types[current_module_idx] = core_type; + } + if(current_module_idx == module) { + cores_in_module++; + if(i+1 == total_cores) end = true; + } + else if(cores_in_module > 0) end = true; + i++; + } + + // Reset the original affinity + if (sched_setaffinity (0, sizeof(original_mask), &original_mask) == -1) { + printWarn("sched_setaffinity: %s", strerror(errno)); + return false; + } + + printf("Module %d has %d cores\n", module, cores_in_module); + return cores_in_module; +} + bool fill_topo_masks_apic(struct topology* topo) { uint32_t eax = 0x00000001; uint32_t ebx = 0; @@ -197,14 +250,14 @@ uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) { uint32_t max = 0; for(int i=0; i < topo->cach->max_cache_level; i++) { - for(int j=0; j < topo->total_cores; j++) { + for(int j=0; j < topo->total_cores_module; j++) { if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i]; } } max++; - if(max > (uint32_t) topo->total_cores) return max; - return topo->total_cores; + if(max > (uint32_t) topo->total_cores_module) return max; + return topo->total_cores_module; } bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) { @@ -219,18 +272,18 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac memset(apic_id, 0, sizeof(uint32_t) * size); // System topology - for(int i=0; i < topo->total_cores; i++) { + for(int i=0; i < topo->total_cores_module; i++) { sockets[apic_pkg[i]] = 1; smt[apic_smt[i]] = 1; } - for(int i=0; i < topo->total_cores; i++) { + for(int i=0; i < topo->total_cores_module; i++) { if(sockets[i] != 0) topo->sockets++; if(smt[i] != 0) topo->smt_available++; } - topo->logical_cores = topo->total_cores / topo->sockets; + topo->logical_cores = topo->total_cores_module / topo->sockets; topo->physical_cores = topo->logical_cores / topo->smt_available; // Cache topology @@ -238,7 +291,7 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac num_caches = 0; memset(apic_id, 0, sizeof(uint32_t) * size); - for(int c=0; c < topo->total_cores; c++) { + for(int c=0; c < topo->total_cores_module; c++) { apic_id[cache_id_apic[c][i]]++; } for(uint32_t c=0; c < size; c++) { @@ -297,7 +350,7 @@ void add_apic_to_array(uint32_t apic, uint32_t* apic_ids, int n) { } } -bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) { +bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) { #ifdef __APPLE__ // macOS extremely dirty approach... printf("cpufetch is computing APIC IDs, please wait...\n"); @@ -322,12 +375,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) { } #endif - for(int i=0; i < n; i++) { + for(int i=first_core; i < first_core+n; i++) { if(!bind_to_cpu(i)) { printErr("Failed binding the process to CPU %d", i); return false; } - apic_ids[i] = get_apic_id(x2apic_id); + apic_ids[i-first_core] = get_apic_id(x2apic_id); } #ifdef __linux__ @@ -342,14 +395,14 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) { return true; } -bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { +bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo, int module) { uint32_t apic_id; - uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores); - uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores); - uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores); - uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores); - uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores); - uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores); + uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module); + uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module); + uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores_module); + uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores_module); + uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module); + uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module); bool x2apic_id; if(cpu->maxLevels >= 0x0000000B) { @@ -367,7 +420,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { x2apic_id = false; } - for(int i=0; i < topo->total_cores; i++) { + for(int i=0; i < topo->total_cores_module; i++) { cache_smt_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level)); cache_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level)); } @@ -383,12 +436,16 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { return false; } + // TODO: Fix this + int first_core = 0; + if(module == 1) first_core = 16; + get_cache_topology_from_apic(topo); - if(!fill_apic_ids(apic_ids, topo->total_cores, x2apic_id)) + if(!fill_apic_ids(apic_ids, first_core, topo->total_cores_module, x2apic_id)) return false; - for(int i=0; i < topo->total_cores; i++) { + for(int i=0; i < topo->total_cores_module; i++) { apic_id = apic_ids[i]; apic_pkg[i] = (apic_id & topo->apic->pkg_mask) >> topo->apic->pkg_mask_shift; @@ -404,20 +461,19 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { /* DEBUG for(int i=0; i < topo->cach->max_cache_level; i++) { printf("[CACH %1d]", i); - for(int j=0; j < topo->total_cores; j++) + for(int j=0; j < topo->total_cores_module; j++) printf("[%03d]", cache_id_apic[j][i]); printf("\n"); } - for(int i=0; i < topo->total_cores; i++) + for(int i=0; i < topo->total_cores_module; i++) printf("[%2d] 0x%.8X\n", i, apic_pkg[i]); printf("\n"); - for(int i=0; i < topo->total_cores; i++) + for(int i=0; i < topo->total_cores_module; i++) printf("[%2d] 0x%.8X\n", i, apic_core[i]); printf("\n"); - for(int i=0; i < topo->total_cores; i++) + for(int i=0; i < topo->total_cores_module; i++) printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/ - bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo); // Assumption: If we cant get smt_available, we assume it is equal to smt_supported... @@ -429,7 +485,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { free(apic_pkg); free(apic_core); free(apic_smt); - for(int i=0; i < topo->total_cores; i++) { + for(int i=0; i < topo->total_cores_module; i++) { free(cache_smt_id_apic[i]); free(cache_id_apic[i]); } diff --git a/src/x86/apic.h b/src/x86/apic.h index 1b183b4..51891f6 100644 --- a/src/x86/apic.h +++ b/src/x86/apic.h @@ -14,11 +14,13 @@ struct apic { uint32_t* cache_id_apic; }; -bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo); +bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo, int module); uint32_t is_smt_enabled_amd(struct topology* topo); #ifndef __APPLE__ bool bind_to_cpu(int cpu_id); #endif +int get_total_cores_module(int total_cores, int module); + #endif diff --git a/src/x86/cpuid.c b/src/x86/cpuid.c index e2022b3..ca59789 100644 --- a/src/x86/cpuid.c +++ b/src/x86/cpuid.c @@ -344,13 +344,61 @@ struct features* get_features_info(struct cpuInfo* cpu) { return feat; } +bool set_cpu_module(int m, int total_modules) { + if(total_modules > 1) { + // We have a hybrid architecture. + // 1. Find the first core from module m + int32_t core_id = -1; + int32_t currrent_module_idx = -1; + int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules); + for(int i=0; i < total_modules; i++) core_types[i] = -1; + int i = 0; + + while(core_id == -1) { + if(!bind_to_cpu(i)) { + return false; + } + uint32_t eax = 0x0000001A; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + cpuid(&eax, &ebx, &ecx, &edx); + int32_t core_type = eax >> 24 & 0xFF; + bool found = false; + + for(int j=0; j < total_modules && !found; j++) { + if(core_types[j] == core_type) found = true; + } + if(!found) { + currrent_module_idx++; + core_types[currrent_module_idx] = core_type; + if(currrent_module_idx == m) { + core_id = i; + } + } + + i++; + } + + printf("Module %d: Core %d\n", m, core_id); + // 2. Now bind to that core + if(!bind_to_cpu(core_id)) { + return false; + } + } + + return true; +} + struct cpuInfo* get_cpu_info() { struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo)); cpu->peak_performance = -1; + cpu->next_cpu = NULL; cpu->topo = NULL; cpu->cach = NULL; cpu->feat = NULL; + uint32_t modules = 1; uint32_t eax = 0; uint32_t ebx = 0; uint32_t ecx = 0; @@ -383,8 +431,6 @@ struct cpuInfo* get_cpu_info() { cpuid(&eax, &ebx, &ecx, &edx); cpu->maxExtendedLevels = eax; - cpu->feat = get_features_info(cpu); - if (cpu->maxExtendedLevels >= 0x80000004){ cpu->cpu_name = get_str_cpu_name_internal(); } @@ -409,18 +455,49 @@ struct cpuInfo* get_cpu_info() { cpu->hybrid_flag = (edx >> 15) & 0x1; } - // If any field of the struct is NULL, - // return inmideately, as further functions - // require valid fields (cach, topo, etc) - cpu->arch = get_cpu_uarch(cpu); - cpu->freq = get_frequency_info(cpu); + if(cpu->hybrid_flag) modules = 2; - cpu->cach = get_cache_info(cpu); - if(cpu->cach == NULL) return cpu; + struct cpuInfo* ptr = cpu; + for(uint32_t i=0; i < modules; i++) { + set_cpu_module(i, modules); - cpu->topo = get_topology_info(cpu, cpu->cach); - if(cpu->topo == NULL) return cpu; + if(i > 0) { + ptr->next_cpu = emalloc(sizeof(struct cpuInfo)); + ptr = ptr->next_cpu; + ptr->next_cpu = NULL; + ptr->peak_performance = -1; + ptr->topo = NULL; + ptr->cach = NULL; + ptr->feat = NULL; + // We assume that this cores have the + // same cpuid capabilities + ptr->cpu_vendor = cpu->cpu_vendor; + ptr->maxLevels = cpu->maxLevels; + ptr->maxExtendedLevels = cpu->maxExtendedLevels; + ptr->hybrid_flag = cpu->hybrid_flag; + } + ptr->feat = get_features_info(ptr); + + // If any field of the struct is NULL, + // return inmideately, as further functions + // require valid fields (cach, topo, etc) + ptr->arch = get_cpu_uarch(ptr); + ptr->freq = get_frequency_info(ptr); + + ptr->cach = get_cache_info(ptr); + if(ptr->cach == NULL) return cpu; + + if(cpu->hybrid_flag) { + ptr->topo = get_topology_info(ptr, ptr->cach, i); + } + else { + ptr->topo = get_topology_info(ptr, ptr->cach, -1); + } + if(cpu->topo == NULL) return cpu; + } + + cpu->num_cpus = modules; cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp()); return cpu; @@ -512,7 +589,7 @@ void get_topology_from_udev(struct topology* topo) { // Main reference: https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html // Very interesting resource: https://wiki.osdev.org/Detecting_CPU_Topology_(80x86) -struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { +struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module) { struct topology* topo = emalloc(sizeof(struct topology)); init_topology_struct(topo, cach); @@ -536,10 +613,17 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { } #endif + if(cpu->hybrid_flag) { + topo->total_cores_module = get_total_cores_module(topo->total_cores, module); + } + else { + topo->total_cores_module = topo->total_cores; + } + switch(cpu->cpu_vendor) { case CPU_VENDOR_INTEL: if (cpu->maxLevels >= 0x00000004) { - bool toporet = get_topology_from_apic(cpu, topo); + bool toporet = get_topology_from_apic(cpu, topo, module); if(!toporet) { #ifdef __linux__ printWarn("Failed to retrieve topology from APIC, using udev...\n"); diff --git a/src/x86/cpuid.h b/src/x86/cpuid.h index d78517a..3b0e21b 100644 --- a/src/x86/cpuid.h +++ b/src/x86/cpuid.h @@ -6,7 +6,7 @@ struct cpuInfo* get_cpu_info(); struct cache* get_cache_info(struct cpuInfo* cpu); struct frequency* get_frequency_info(struct cpuInfo* cpu); -struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach); +struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module); char* get_str_avx(struct cpuInfo* cpu); char* get_str_sse(struct cpuInfo* cpu);