diff --git a/src/apic.c b/src/apic.c index cd7de50..31668c6 100644 --- a/src/apic.c +++ b/src/apic.c @@ -48,22 +48,15 @@ uint32_t create_mask(uint32_t num_entries, uint32_t *mask_width) { return (1 << i) -1; } -uint32_t get_apic_id(bool x2apic_id) { +uint32_t get_apic_id() { uint32_t eax = 0; uint32_t ebx = 0; uint32_t ecx = 0; uint32_t edx = 0; - if(x2apic_id) { - eax = 0x0000000B; - cpuid(&eax, &ebx, &ecx, &edx); - return edx; - } - else { - eax = 0x00000001; - cpuid(&eax, &ebx, &ecx, &edx); - return (ebx >> 24); - } + eax = 0x00000001; + cpuid(&eax, &ebx, &ecx, &edx); + return (ebx >> 24); } bool bind_to_cpu(int cpu_id) { @@ -172,6 +165,52 @@ bool fill_topo_masks_x2apic(struct topology** topo) { return true; } +uint8_t get_number_llc_amd(struct topology* topo) { + uint32_t eax = 0x8000001D; + uint32_t ebx = 0; + uint32_t ecx = 3; // LLC Level + uint32_t edx = 0; + uint32_t num_sharing_cache = 0; + + cpuid(&eax, &ebx, &ecx, &edx); + + num_sharing_cache = ((eax >> 14) & 0xfff) + 1; + + return topo->logical_cores / num_sharing_cache; +} + +void guess_cach_sizes_amd(struct topology** topo) { + (*topo)->cach->L1i->num_caches = (*topo)->physical_cores; + (*topo)->cach->L1d->num_caches = (*topo)->physical_cores; + (*topo)->cach->L2->num_caches = (*topo)->physical_cores; + (*topo)->cach->L3->num_caches = get_number_llc_amd(*topo); +} + +bool get_topology_amd(struct topology** topo) { + /*uint32_t eax = 0x8000001E; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + uint32_t err; + + cpuid(&eax, &ebx, &ecx, &edx); + + uint32_t node_id = ecx & 0xff; + uint32_t cpu_core_id = ebx & 0xff; + uint32_t smp_num_siblings = ((ebx >> 8) & 0xff) + 1; + uint32_t x86_max_cores = 0; + + if (smp_num_siblings > 1) + x86_max_cores /= smp_num_siblings;*/ + + // AMD does not support CPUID 0xB or 0x1F to query topology + // err = detect_extended_topology(cpu, topo); + + guess_cach_sizes_amd(topo); + + return true; +} + bool arr_contains_value(uint32_t* arr, uint32_t value, uint32_t arr_size) { for(uint32_t i=0; i < arr_size; i++) { if(arr[i] == value) return true; @@ -267,14 +306,17 @@ bool get_cache_topology_from_apic(struct topology** topo) { return true; } -bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) { +bool get_topology_from_apic(struct cpuInfo* cpu, struct topology** topo) { + if(cpu->cpu_vendor == VENDOR_AMD) + return get_topology_amd(topo); + uint32_t apic_id; uint32_t* apic_pkg = malloc(sizeof(uint32_t) * (*topo)->total_cores); uint32_t* apic_core = malloc(sizeof(uint32_t) * (*topo)->total_cores); uint32_t* apic_smt = malloc(sizeof(uint32_t) * (*topo)->total_cores); uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores); uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores); - bool x2apic_id = cpuid_max_levels >= 0x0000000B; + bool x2apic_id = cpu->maxLevels >= 0x0000000B; for(int i=0; i < (*topo)->total_cores; i++) { cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * ((*topo)->cach->max_cache_level)); @@ -299,7 +341,7 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) { printErr("Failed binding to CPU %d", i); return false; } - apic_id = get_apic_id(x2apic_id); + apic_id = get_apic_id(); apic_pkg[i] = (apic_id & (*topo)->apic->pkg_mask) >> (*topo)->apic->pkg_mask_shift; apic_core[i] = (apic_id & (*topo)->apic->core_mask) >> (*topo)->apic->smt_mask_width; @@ -331,7 +373,7 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) { bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo); // Assumption: If we cant get smt_available, we assume it is equal to smt_supported... - if(!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available; + if (!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available; //TODO: free @@ -348,8 +390,7 @@ uint32_t is_smt_enabled(struct topology* topo) { return false; } id = get_apic_id(false) & 1; // get the last bit - printf("0x%.8X %d\n", get_apic_id(false), id); - //if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core + if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core } return 1; diff --git a/src/apic.h b/src/apic.h index 25b9032..9615086 100644 --- a/src/apic.h +++ b/src/apic.h @@ -14,7 +14,7 @@ struct apic { uint32_t* cache_id_apic; }; -bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo); +bool get_topology_from_apic(struct cpuInfo* cpu, struct topology** topo); uint32_t is_smt_enabled(struct topology* topo); #endif diff --git a/src/cpuid.c b/src/cpuid.c index 63d33b1..515ec0c 100644 --- a/src/cpuid.c +++ b/src/cpuid.c @@ -37,31 +37,6 @@ * cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf */ -struct cpuInfo { - bool AVX; - bool AVX2; - bool AVX512; - bool SSE; - bool SSE2; - bool SSE3; - bool SSSE3; - bool SSE4a; - bool SSE4_1; - bool SSE4_2; - bool FMA3; - bool FMA4; - bool AES; - bool SHA; - - VENDOR cpu_vendor; - - char* cpu_name; - // Max cpuids levels - uint32_t maxLevels; - // Max cpuids extended levels - uint32_t maxExtendedLevels; -}; - struct frequency { int64_t base; int64_t max; @@ -287,7 +262,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { switch(cpu->cpu_vendor) { case VENDOR_INTEL: if (cpu->maxLevels >= 0x00000004) { - get_topology_from_apic(cpu->maxLevels, &topo); + get_topology_from_apic(cpu, &topo); } else { printErr("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels); @@ -297,7 +272,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { topo->smt_supported = 1; } break; - case VENDOR_AMD: + case VENDOR_AMD: if (cpu->maxExtendedLevels >= 0x80000008) { eax = 0x80000008; cpuid(&eax, &ebx, &ecx, &edx); @@ -319,6 +294,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { topo->logical_cores = 1; topo->smt_supported = 1; } + if (cpu->maxLevels >= 0x0000000B) { topo->smt_available = is_smt_enabled(topo); } @@ -331,9 +307,14 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { if(topo->smt_supported > 1) topo->sockets = topo->total_cores / topo->smt_supported / topo->physical_cores; // Idea borrowed from lscpu else - topo->sockets = topo->total_cores / topo->physical_cores; + topo->sockets = topo->total_cores / topo->physical_cores; + + if (cpu->maxExtendedLevels >= 0x8000001D) { + get_topology_from_apic(cpu, &topo); + } break; + default: printBug("Cant get topology because VENDOR is empty"); return NULL; @@ -342,20 +323,6 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { return topo; } -uint8_t get_number_llc_amd(struct topology* topo) { - uint32_t eax = 0x8000001D; - uint32_t ebx = 0; - uint32_t ecx = 3; // LLC Level - uint32_t edx = 0; - uint32_t num_sharing_cache = 0; - - cpuid(&eax, &ebx, &ecx, &edx); - - num_sharing_cache = ((eax >> 14) & 0xfff) + 1; - - return topo->logical_cores / num_sharing_cache; -} - struct cache* get_cache_info(struct cpuInfo* cpu) { struct cache* cach = malloc(sizeof(struct cache)); cach->L1i = malloc(sizeof(struct cach)); diff --git a/src/cpuid.h b/src/cpuid.h index 3b2ad71..ec29924 100644 --- a/src/cpuid.h +++ b/src/cpuid.h @@ -10,9 +10,35 @@ #define UNKNOWN -1 -struct cpuInfo; +typedef int32_t VENDOR; + struct frequency; +struct cpuInfo { + bool AVX; + bool AVX2; + bool AVX512; + bool SSE; + bool SSE2; + bool SSE3; + bool SSSE3; + bool SSE4a; + bool SSE4_1; + bool SSE4_2; + bool FMA3; + bool FMA4; + bool AES; + bool SHA; + + VENDOR cpu_vendor; + + char* cpu_name; + // Max cpuids levels + uint32_t maxLevels; + // Max cpuids extended levels + uint32_t maxExtendedLevels; +}; + struct cach { int32_t size; uint8_t num_caches; @@ -40,8 +66,6 @@ struct topology { struct cache* cach; }; -typedef int32_t VENDOR; - struct cpuInfo* get_cpu_info(); VENDOR get_cpu_vendor(struct cpuInfo* cpu); uint32_t get_nsockets(struct topology* topo); diff --git a/src/main.c b/src/main.c index d227099..82a3a8f 100644 --- a/src/main.c +++ b/src/main.c @@ -6,7 +6,7 @@ #include "cpuid.h" #include "global.h" -static const char* VERSION = "0.61"; +static const char* VERSION = "0.62"; void print_help(char *argv[]) { printf("Usage: %s [--version] [--help] [--levels] [--style fancy|retro|legacy] [--color 'R,G,B:R,G,B:R,G,B:R,G,B']\n\