From 69cc08759a0eac74fba3da45df95c730bff1ddf9 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Sat, 29 Aug 2020 21:50:51 +0200 Subject: [PATCH] Fix #21 and #22: Obtain the number of caches of every level instead of guessing them. It is done by fetching cache topology from apic. It works, but it needs a big refactoring. Moreover, it currently works only on Intel CPUs, so this breaks the cache in AMD. --- src/apic.c | 112 +++++++++++++++++++++++++++++++++++--- src/apic.h | 2 + src/cpuid.c | 147 +++++++++++++++++++++++--------------------------- src/cpuid.h | 30 ++++++++--- src/main.c | 10 ++-- src/printer.c | 8 +-- 6 files changed, 206 insertions(+), 103 deletions(-) diff --git a/src/apic.c b/src/apic.c index cafcd30..cd7de50 100644 --- a/src/apic.c +++ b/src/apic.c @@ -172,18 +172,43 @@ bool fill_topo_masks_x2apic(struct topology** topo) { return true; } -bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, struct topology** topo) { - uint32_t sockets[64]; - uint32_t smt[64]; +bool arr_contains_value(uint32_t* arr, uint32_t value, uint32_t arr_size) { + for(uint32_t i=0; i < arr_size; i++) { + if(arr[i] == value) return true; + } + return false; +} + +uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology** topo) { + uint32_t max = 0; - memset(sockets, 0, sizeof(uint32_t) * 64); - memset(smt, 0, sizeof(uint32_t) * 64); + for(int i=0; i < (*topo)->cach->max_cache_level; i++) { + for(int j=0; j < (*topo)->total_cores; j++) { + if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i]; + } + } + + max++; + if(max > (*topo)->total_cores) return max; + return (*topo)->total_cores; +} + +bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology** topo) { + uint32_t size = max_apic_id_size(cache_id_apic, topo); + uint32_t* sockets = malloc(sizeof(uint32_t) * size); + uint32_t* smt = malloc(sizeof(uint32_t) * size); + uint32_t* apic_id = malloc(sizeof(uint32_t) * size); + uint32_t num_caches = 0; + + memset(sockets, 0, sizeof(uint32_t) * size); + memset(smt, 0, sizeof(uint32_t) * size); + memset(apic_id, 0, sizeof(uint32_t) * size); for(int i=0; i < (*topo)->total_cores; i++) { sockets[apic_pkg[i]] = 1; smt[apic_smt[i]] = 1; } - for(int i=0; i < 64; i++) { + for(int i=0; i < (*topo)->total_cores; i++) { if(sockets[i] != 0) (*topo)->sockets++; if(smt[i] != 0) @@ -193,6 +218,52 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, struct topolog (*topo)->logical_cores = (*topo)->total_cores / (*topo)->sockets; (*topo)->physical_cores = (*topo)->logical_cores / (*topo)->smt_available; + for(int i=0; i < (*topo)->cach->max_cache_level; i++) { + num_caches = 0; + memset(apic_id, 0, sizeof(uint32_t) * size); + + for(int c=0; c < (*topo)->total_cores; c++) { + apic_id[cache_id_apic[c][i]]++; + } + for(uint32_t c=0; c < size; c++) { + if(apic_id[c] > 0) num_caches++; + } + + (*topo)->cach->cach_arr[i]->num_caches = num_caches; + } + + free(sockets); + free(smt); + free(apic_id); + + return true; +} + +unsigned long getBitsFrom(const unsigned int val, const char from, const char to) +{ + unsigned long mask = (1<<(to+1)) - 1; + if (to == 31) return val >> from; + + return (val & mask) >> from; +} + +bool get_cache_topology_from_apic(struct topology** topo) { + uint32_t eax = 0x4; + uint32_t ebx = 0; + uint32_t ecx = 0; + uint32_t edx = 0; + + for(int i=0; i < (*topo)->cach->max_cache_level; i++) { + eax = 0x4; + ecx = i; + + cpuid(&eax, &ebx, &ecx, &edx); + + uint32_t SMTMaxCntPerEachCache = getBitsFrom(eax, 14, 25) + 1; // ((eax >> 14) & 0xFFF) + 1; + uint32_t EachCacheMaskWidth_targ_subleaf; + (*topo)->apic->cache_select_mask[i] = create_mask(SMTMaxCntPerEachCache,&EachCacheMaskWidth_targ_subleaf); + } + return true; } @@ -201,8 +272,17 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) { uint32_t* apic_pkg = malloc(sizeof(uint32_t) * (*topo)->total_cores); uint32_t* apic_core = malloc(sizeof(uint32_t) * (*topo)->total_cores); uint32_t* apic_smt = malloc(sizeof(uint32_t) * (*topo)->total_cores); + uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores); + uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores); bool x2apic_id = cpuid_max_levels >= 0x0000000B; + for(int i=0; i < (*topo)->total_cores; i++) { + cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * ((*topo)->cach->max_cache_level)); + cache_id_apic[i] = malloc(sizeof(uint32_t) * ((*topo)->cach->max_cache_level)); + } + (*topo)->apic->cache_select_mask = malloc(sizeof(uint32_t) * ((*topo)->cach->max_cache_level)); + (*topo)->apic->cache_id_apic = malloc(sizeof(uint32_t) * ((*topo)->cach->max_cache_level)); + if(x2apic_id) { if(!fill_topo_masks_x2apic(topo)) return false; @@ -212,6 +292,8 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) { return false; } + get_cache_topology_from_apic(topo); + for(int i=0; i < (*topo)->total_cores; i++) { if(!bind_to_cpu(i)) { printErr("Failed binding to CPU %d", i); @@ -222,9 +304,20 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) { apic_pkg[i] = (apic_id & (*topo)->apic->pkg_mask) >> (*topo)->apic->pkg_mask_shift; apic_core[i] = (apic_id & (*topo)->apic->core_mask) >> (*topo)->apic->smt_mask_width; apic_smt[i] = apic_id & (*topo)->apic->smt_mask; + + for(int c=0; c < (*topo)->cach->max_cache_level; c++) { + cache_smt_id_apic[i][c] = apic_id & (*topo)->apic->cache_select_mask[c]; + cache_id_apic[i][c] = apic_id & (-1 ^ (*topo)->apic->cache_select_mask[c]); + } } /* DEBUG + for(int i=0; i < (*topo)->cach->max_cache_level; i++) { + printf("[CACH %1d]", i); + for(int j=0; j < (*topo)->total_cores; j++) + printf("[%03d]", cache_id_apic[j][i]); + printf("\n"); + } for(int i=0; i < (*topo)->total_cores; i++) printf("[%2d] 0x%.8X\n", i, apic_pkg[i]); printf("\n"); @@ -235,11 +328,13 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) { printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/ - bool ret = build_topo_from_apic(apic_pkg, apic_smt, topo); + bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo); // Assumption: If we cant get smt_available, we assume it is equal to smt_supported... if(!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available; + //TODO: free + return ret; } @@ -253,7 +348,8 @@ uint32_t is_smt_enabled(struct topology* topo) { return false; } id = get_apic_id(false) & 1; // get the last bit - if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core + printf("0x%.8X %d\n", get_apic_id(false), id); + //if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core } return 1; diff --git a/src/apic.h b/src/apic.h index 5035667..25b9032 100644 --- a/src/apic.h +++ b/src/apic.h @@ -10,6 +10,8 @@ struct apic { uint32_t core_mask; uint32_t smt_mask_width; uint32_t smt_mask; + uint32_t* cache_select_mask; + uint32_t* cache_id_apic; }; bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo); diff --git a/src/cpuid.c b/src/cpuid.c index b88b6ad..63d33b1 100644 --- a/src/cpuid.c +++ b/src/cpuid.c @@ -62,14 +62,6 @@ struct cpuInfo { uint32_t maxExtendedLevels; }; -struct cache { - int32_t L1i; - int32_t L1d; - int32_t L2; - int32_t L3; - int8_t nL3; -}; - struct frequency { int64_t base; int64_t max; @@ -99,6 +91,7 @@ void init_topology_struct(struct topology** topo) { (*topo)->smt_available = 0; (*topo)->smt_supported = 0; (*topo)->sockets = 0; + // TODO: The other fields... } void get_cpu_vendor_internal(char* name, uint32_t ebx,uint32_t ecx,uint32_t edx) { @@ -265,10 +258,12 @@ struct cpuInfo* get_cpu_info() { // Main reference: https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html // Very interesting resource: https://wiki.osdev.org/Detecting_CPU_Topology_(80x86) -struct topology* get_topology_info(struct cpuInfo* cpu) { +struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { struct topology* topo = malloc(sizeof(struct topology)); topo->apic = malloc(sizeof(struct apic)); + topo->cach = cach; init_topology_struct(&topo); + uint32_t eax = 0; uint32_t ebx = 0; uint32_t ecx = 0; @@ -361,8 +356,19 @@ uint8_t get_number_llc_amd(struct topology* topo) { return topo->logical_cores / num_sharing_cache; } -struct cache* get_cache_info(struct cpuInfo* cpu, struct topology* topo) { - struct cache* cach = malloc(sizeof(struct cache)); +struct cache* get_cache_info(struct cpuInfo* cpu) { + struct cache* cach = malloc(sizeof(struct cache)); + cach->L1i = malloc(sizeof(struct cach)); + cach->L1d = malloc(sizeof(struct cach)); + cach->L2 = malloc(sizeof(struct cach)); + cach->L3 = malloc(sizeof(struct cach)); + cach->cach_arr = malloc(sizeof(struct cach*) * 4); + cach->cach_arr[0] = cach->L1i; + cach->cach_arr[1] = cach->L1d; + cach->cach_arr[2] = cach->L2; + cach->cach_arr[3] = cach->L3; + cach->max_cache_level = 0; + uint32_t eax = 0; uint32_t ebx = 0; uint32_t ecx = 0; @@ -398,7 +404,7 @@ struct cache* get_cache_info(struct cpuInfo* cpu, struct topology* topo) { int32_t cache_type = eax & 0x1F; // If its 0, we tried fetching a non existing cache - if (cache_type > 0) { + if (cache_type > 0) { // TODO: Change to while not == 0 int32_t cache_level = (eax >>= 5) & 0x7; uint32_t cache_sets = ecx + 1; uint32_t cache_coherency_line_size = (ebx & 0xFFF) + 1; @@ -406,14 +412,15 @@ struct cache* get_cache_info(struct cpuInfo* cpu, struct topology* topo) { uint32_t cache_ways_of_associativity = ((ebx >>= 10) & 0x3FF) + 1; int32_t cache_total_size = cache_ways_of_associativity * cache_physical_line_partitions * cache_coherency_line_size * cache_sets; - - switch (cache_type) { + cach->max_cache_level++; + + switch (cache_type) { case 1: // Data Cache (We assume this is L1d) if(cache_level != 1) { printBug("Found data cache at level %d (expected 1)", cache_level); return NULL; } - cach->L1d = cache_total_size; + cach->L1d->size = cache_total_size; break; case 2: // Instruction Cache (We assume this is L1i) @@ -421,12 +428,12 @@ struct cache* get_cache_info(struct cpuInfo* cpu, struct topology* topo) { printBug("Found instruction cache at level %d (expected 1)", cache_level); return NULL; } - cach->L1i = cache_total_size; + cach->L1i->size = cache_total_size; break; case 3: // Unified Cache (This may be L2 or L3) - if(cache_level == 2) cach->L2 = cache_total_size; - else if(cache_level == 3) cach->L3 = cache_total_size; + if(cache_level == 2) cach->L2->size = cache_total_size; + else if(cache_level == 3) cach->L3->size = cache_total_size; else { printBug("Found unified cache at level %d (expected == 2 or 3)", cache_level); return NULL; @@ -435,11 +442,15 @@ struct cache* get_cache_info(struct cpuInfo* cpu, struct topology* topo) { default: // Unknown Type Cache printBug("Unknown Type Cache found at ID %d", i); - return NULL; + return NULL; } } - else if(i == 2) cach->L2 = UNKNOWN; - else if(i == 3) cach->L3 = UNKNOWN; + else if(i == 2) { + cach->L2->size = UNKNOWN; + } + else if(i == 3) { + cach->L3->size = UNKNOWN; + } else { printBug("Could not find cache ID %d", i); return NULL; @@ -448,40 +459,33 @@ struct cache* get_cache_info(struct cpuInfo* cpu, struct topology* topo) { // Sanity checks. If we read values greater than this, they can't be valid ones // The values were chosen by me - if(cach->L1i > 64 * 1024) { - printBug("Invalid L1i size: %dKB", cach->L1i/1024); + if(cach->L1i->size > 64 * 1024) { + printBug("Invalid L1i size: %dKB", cach->L1i->size/1024); return NULL; } - if(cach->L1d > 64 * 1024) { - printBug("Invalid L1d size: %dKB", cach->L1d/1024); + if(cach->L1d->size > 64 * 1024) { + printBug("Invalid L1d size: %dKB", cach->L1d->size/1024); return NULL; } - if(cach->L2 != UNKNOWN) { - if(cach->L3 != UNKNOWN && cach->L2 > 2 * 1048576) { - printBug("Invalid L2 size: %dMB", cach->L2/(1048576)); + if(cach->L2->size != UNKNOWN) { + if(cach->L3->size != UNKNOWN && cach->L2->size > 2 * 1048576) { + printBug("Invalid L2 size: %dMB", cach->L2->size/(1048576)); return NULL; } - else if(cach->L2 > 100 * 1048576) { - printBug("Invalid L2 size: %dMB", cach->L2/(1048576)); + else if(cach->L2->size > 100 * 1048576) { + printBug("Invalid L2 size: %dMB", cach->L2->size/(1048576)); return NULL; } } - if(cach->L3 != UNKNOWN && cach->L3 > 100 * 1048576) { - printBug("Invalid L3 size: %dMB", cach->L3/(1048576)); + if(cach->L3->size != UNKNOWN && cach->L3->size > 100 * 1048576) { + printBug("Invalid L3 size: %dMB", cach->L3->size/(1048576)); return NULL; } - if(cach->L2 == UNKNOWN) { + if(cach->L2->size == UNKNOWN) { printBug("Could not find L2 cache"); return NULL; } - if(cpu->cpu_vendor == VENDOR_INTEL) { - if(cach->L3 != UNKNOWN) cach->nL3 = 1; - } - else { - if(cach->L3 != UNKNOWN) cach->nL3 = get_number_llc_amd(topo); - } - return cach; } @@ -547,10 +551,10 @@ void debug_cpu_info(struct cpuInfo* cpu) { } void debug_cache(struct cache* cach) { - printf("L1i=%dB\n",cach->L1i); - printf("L1d=%dB\n",cach->L1d); - printf("L2=%dB\n",cach->L2); - printf("L3=%dB\n",cach->L3); + printf("L1i=%dB\n",cach->L1i->size); + printf("L1d=%dB\n",cach->L1d->size); + printf("L2=%dB\n",cach->L2->size); + printf("L3=%dB\n",cach->L3->size); } void debug_frequency(struct frequency* freq) { @@ -827,45 +831,30 @@ char* get_str_cache_one(int32_t cache_size) { return string; } -char* get_str_cache(int32_t cache_size, struct topology* topo, bool llc, int nllc) { - if(topo->sockets == 1) { - if(llc) { - if(nllc > 1) - return get_str_cache_two(cache_size, nllc); - else - return get_str_cache_one(cache_size); - } - else - return get_str_cache_two(cache_size, topo->physical_cores); - } - else { - if(llc) - return get_str_cache_two(cache_size, nllc * topo->sockets); - else - return get_str_cache_two(cache_size, topo->physical_cores * topo->sockets); - } -} - -char* get_str_l1i(struct cache* cach, struct topology* topo) { - return get_str_cache(cach->L1i, topo, false, 1); -} - -char* get_str_l1d(struct cache* cach, struct topology* topo) { - return get_str_cache(cach->L1d, topo, false, 1); -} - -char* get_str_l2(struct cache* cach, struct topology* topo) { - assert(cach->L2 != UNKNOWN); - if(cach->L3 == UNKNOWN) - return get_str_cache(cach->L2, topo, true, 1); +char* get_str_cache(int32_t cache_size, int32_t num_caches) { + if(num_caches > 1) + return get_str_cache_two(cache_size, num_caches); else - return get_str_cache(cach->L2, topo, false, 1); + return get_str_cache_one(cache_size); } -char* get_str_l3(struct cache* cach, struct topology* topo) { - if(cach->L3 == UNKNOWN) +char* get_str_l1i(struct cache* cach) { + return get_str_cache(cach->L1i->size, cach->L1i->num_caches); +} + +char* get_str_l1d(struct cache* cach) { + return get_str_cache(cach->L1d->size, cach->L1d->num_caches); +} + +char* get_str_l2(struct cache* cach) { + assert(cach->L2->size != UNKNOWN); + return get_str_cache(cach->L2->size, cach->L2->num_caches); +} + +char* get_str_l3(struct cache* cach) { + if(cach->L3->size == UNKNOWN) return NULL; - return get_str_cache(cach->L3, topo, true, cach->nL3); + return get_str_cache(cach->L3->size, cach->L3->num_caches); } char* get_str_freq(struct frequency* freq) { diff --git a/src/cpuid.h b/src/cpuid.h index 61895bd..3b2ad71 100644 --- a/src/cpuid.h +++ b/src/cpuid.h @@ -12,7 +12,22 @@ struct cpuInfo; struct frequency; -struct cache; + +struct cach { + int32_t size; + uint8_t num_caches; + // plenty of more properties to include in the future... +}; + +struct cache { + struct cach* L1i; + struct cach* L1d; + struct cach* L2; + struct cach* L3; + struct cach** cach_arr; + + uint8_t max_cache_level; +}; struct topology { int64_t total_cores; @@ -22,6 +37,7 @@ struct topology { uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled) uint32_t sockets; struct apic* apic; + struct cache* cach; }; typedef int32_t VENDOR; @@ -30,9 +46,9 @@ struct cpuInfo* get_cpu_info(); VENDOR get_cpu_vendor(struct cpuInfo* cpu); uint32_t get_nsockets(struct topology* topo); int64_t get_freq(struct frequency* freq); -struct cache* get_cache_info(struct cpuInfo* cpu, struct topology* topo); +struct cache* get_cache_info(struct cpuInfo* cpu); struct frequency* get_frequency_info(struct cpuInfo* cpu); -struct topology* get_topology_info(struct cpuInfo* cpu); +struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach); char* get_str_cpu_name(struct cpuInfo* cpu); char* get_str_ncores(struct cpuInfo* cpu); @@ -42,10 +58,10 @@ char* get_str_fma(struct cpuInfo* cpu); char* get_str_aes(struct cpuInfo* cpu); char* get_str_sha(struct cpuInfo* cpu); -char* get_str_l1i(struct cache* cach, struct topology* topo); -char* get_str_l1d(struct cache* cach, struct topology* topo); -char* get_str_l2(struct cache* cach, struct topology* topo); -char* get_str_l3(struct cache* cach, struct topology* topo); +char* get_str_l1i(struct cache* cach); +char* get_str_l1d(struct cache* cach); +char* get_str_l2(struct cache* cach); +char* get_str_l3(struct cache* cach); char* get_str_freq(struct frequency* freq); diff --git a/src/main.c b/src/main.c index 61dbf30..d227099 100644 --- a/src/main.c +++ b/src/main.c @@ -6,7 +6,7 @@ #include "cpuid.h" #include "global.h" -static const char* VERSION = "0.6"; +static const char* VERSION = "0.61"; void print_help(char *argv[]) { printf("Usage: %s [--version] [--help] [--levels] [--style fancy|retro|legacy] [--color 'R,G,B:R,G,B:R,G,B:R,G,B']\n\ @@ -59,12 +59,12 @@ int main(int argc, char* argv[]) { if(freq == NULL) return EXIT_FAILURE; - struct topology* topo = get_topology_info(cpu); - if(topo == NULL) + struct cache* cach = get_cache_info(cpu); + if(cach == NULL) return EXIT_FAILURE; - struct cache* cach = get_cache_info(cpu, topo); - if(cach == NULL) + struct topology* topo = get_topology_info(cpu, cach); + if(topo == NULL) return EXIT_FAILURE; if(print_cpufetch(cpu, cach, freq, topo, get_style(), get_colors())) diff --git a/src/printer.c b/src/printer.c index f2c0f8e..0f148c1 100644 --- a/src/printer.c +++ b/src/printer.c @@ -323,10 +323,10 @@ bool print_cpufetch(struct cpuInfo* cpu, struct cache* cach, struct frequency* f char* fma = get_str_fma(cpu); char* aes = get_str_aes(cpu); char* sha = get_str_sha(cpu); - char* l1i = get_str_l1i(cach, topo); - char* l1d = get_str_l1d(cach, topo); - char* l2 = get_str_l2(cach, topo); - char* l3 = get_str_l3(cach, topo); + char* l1i = get_str_l1i(topo->cach); + char* l1d = get_str_l1d(topo->cach); + char* l2 = get_str_l2(topo->cach); + char* l3 = get_str_l3(topo->cach); char* pp = get_str_peak_performance(cpu,topo,get_freq(freq)); setAttribute(art,ATTRIBUTE_NAME,cpu_name);