Fix #23. I tried fetching the cache topology in AMD but could not find a proper way, so the code fallback to two commits ago. cpufetch has to guess cache sizes except L3, which can be fetched. Since I have been trying many approaches and stuff, the code needs to be refactored

2026-05-14 21:00:07 +02:00 · 2020-08-30 12:12:25 +02:00
parent 69cc08759a
commit dae0f678ad
5 changed files with 96 additions and 64 deletions
--- a/src/apic.c
+++ b/src/apic.c
@@ -48,22 +48,15 @@ uint32_t create_mask(uint32_t num_entries, uint32_t *mask_width) {
  return (1 << i) -1;
 }
-uint32_t get_apic_id(bool x2apic_id) {
+uint32_t get_apic_id() {
  uint32_t eax = 0;
  uint32_t ebx = 0;
  uint32_t ecx = 0;
  uint32_t edx = 0;
-  if(x2apic_id) {
+  eax = 0x00000001;
-    eax = 0x0000000B;
+  cpuid(&eax, &ebx, &ecx, &edx);
-    cpuid(&eax, &ebx, &ecx, &edx);
+  return (ebx >> 24);
    return edx;
  }
  else {
    eax = 0x00000001;
    cpuid(&eax, &ebx, &ecx, &edx);
    return (ebx >> 24);
  }
 }
 bool bind_to_cpu(int cpu_id) {
@@ -172,6 +165,52 @@ bool fill_topo_masks_x2apic(struct topology** topo) {
  return true;
 }
 uint8_t get_number_llc_amd(struct topology* topo) {
  uint32_t eax = 0x8000001D;
  uint32_t ebx = 0;
  uint32_t ecx = 3; // LLC Level
  uint32_t edx = 0;     
  uint32_t num_sharing_cache = 0;
  cpuid(&eax, &ebx, &ecx, &edx); 
  num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
  return topo->logical_cores / num_sharing_cache;
 }
 void guess_cach_sizes_amd(struct topology** topo) {
  (*topo)->cach->L1i->num_caches = (*topo)->physical_cores;
  (*topo)->cach->L1d->num_caches = (*topo)->physical_cores;
  (*topo)->cach->L2->num_caches = (*topo)->physical_cores;
  (*topo)->cach->L3->num_caches = get_number_llc_amd(*topo);
 }
 bool get_topology_amd(struct topology** topo) {
  /*uint32_t eax = 0x8000001E;
  uint32_t ebx = 0;
  uint32_t ecx = 0;
  uint32_t edx = 0;
  uint32_t err;
  cpuid(&eax, &ebx, &ecx, &edx);
  uint32_t node_id  = ecx & 0xff;
  uint32_t cpu_core_id = ebx & 0xff;  
  uint32_t smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
  uint32_t x86_max_cores = 0;
  if (smp_num_siblings > 1)
    x86_max_cores /= smp_num_siblings;*/
  // AMD does not support CPUID 0xB or 0x1F to query topology
  // err = detect_extended_topology(cpu, topo); 
  guess_cach_sizes_amd(topo);  
  return true;
 }
 bool arr_contains_value(uint32_t* arr, uint32_t value, uint32_t arr_size) {
  for(uint32_t i=0; i < arr_size; i++) {
    if(arr[i] == value) return true;    
@@ -267,14 +306,17 @@ bool get_cache_topology_from_apic(struct topology** topo) {
  return true;
 }
-bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {    
+bool get_topology_from_apic(struct cpuInfo* cpu, struct topology** topo) { 
  if(cpu->cpu_vendor == VENDOR_AMD)
    return get_topology_amd(topo);
  uint32_t apic_id;
  uint32_t* apic_pkg = malloc(sizeof(uint32_t) * (*topo)->total_cores);
  uint32_t* apic_core = malloc(sizeof(uint32_t) * (*topo)->total_cores);
  uint32_t* apic_smt = malloc(sizeof(uint32_t) * (*topo)->total_cores);
  uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores);
  uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores);
-  bool x2apic_id = cpuid_max_levels >= 0x0000000B;
+  bool x2apic_id = cpu->maxLevels >= 0x0000000B;
  for(int i=0; i < (*topo)->total_cores; i++) {
    cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * ((*topo)->cach->max_cache_level));
@@ -299,7 +341,7 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {
      printErr("Failed binding to CPU %d", i);
      return false;
    }
-    apic_id = get_apic_id(x2apic_id);
+    apic_id = get_apic_id();
    apic_pkg[i] = (apic_id & (*topo)->apic->pkg_mask) >> (*topo)->apic->pkg_mask_shift;
    apic_core[i] = (apic_id & (*topo)->apic->core_mask) >> (*topo)->apic->smt_mask_width;
@@ -331,7 +373,7 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {
  bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
  // Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
-  if(!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available;
+  if (!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available;
  //TODO: free
@@ -348,8 +390,7 @@ uint32_t is_smt_enabled(struct topology* topo) {
      return false;
    }
    id = get_apic_id(false) & 1; // get the last bit
-    printf("0x%.8X %d\n", get_apic_id(false), id);
+    if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core
    //if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core
  }
  return 1;  
--- a/src/apic.h
+++ b/src/apic.h
@@ -14,7 +14,7 @@ struct apic {
  uint32_t* cache_id_apic;
 };
-bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo);
+bool get_topology_from_apic(struct cpuInfo* cpu, struct topology** topo);
 uint32_t is_smt_enabled(struct topology* topo);
 #endif
--- a/src/cpuid.c
+++ b/src/cpuid.c
@@ -37,31 +37,6 @@
 * cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf
 */
 struct cpuInfo {
  bool AVX;
  bool AVX2;
  bool AVX512;
  bool SSE;
  bool SSE2;
  bool SSE3;
  bool SSSE3;
  bool SSE4a;
  bool SSE4_1;
  bool SSE4_2;
  bool FMA3;
  bool FMA4;
  bool AES;
  bool SHA;
  VENDOR cpu_vendor;
  char* cpu_name;
  //  Max cpuids levels
  uint32_t maxLevels;
  // Max cpuids extended levels
  uint32_t maxExtendedLevels;
 };
 struct frequency {
  int64_t base;
  int64_t max;
@@ -287,7 +262,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
  switch(cpu->cpu_vendor) {
    case VENDOR_INTEL:
      if (cpu->maxLevels >= 0x00000004) { 
-        get_topology_from_apic(cpu->maxLevels, &topo);
+        get_topology_from_apic(cpu, &topo);
      }
      else {                
        printErr("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels); 
@@ -297,7 +272,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
        topo->smt_supported = 1;
      }      
      break;
-    case VENDOR_AMD:  
+    case VENDOR_AMD:       
      if (cpu->maxExtendedLevels >= 0x80000008) {
        eax = 0x80000008;  
        cpuid(&eax, &ebx, &ecx, &edx);        
@@ -319,6 +294,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
        topo->logical_cores = 1;
        topo->smt_supported = 1;         
      }
      if (cpu->maxLevels >= 0x0000000B) {
        topo->smt_available = is_smt_enabled(topo);
      }
@@ -331,9 +307,14 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
      if(topo->smt_supported > 1)
        topo->sockets = topo->total_cores / topo->smt_supported / topo->physical_cores; // Idea borrowed from lscpu
      else
-        topo->sockets = topo->total_cores / topo->physical_cores;
+        topo->sockets = topo->total_cores / topo->physical_cores;    
      if (cpu->maxExtendedLevels >= 0x8000001D) {
        get_topology_from_apic(cpu, &topo);
      }
      break;
    default:
      printBug("Cant get topology because VENDOR is empty");
      return NULL;
@@ -342,20 +323,6 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
  return topo;
 }
 uint8_t get_number_llc_amd(struct topology* topo) {
  uint32_t eax = 0x8000001D;
  uint32_t ebx = 0;
  uint32_t ecx = 3; // LLC Level
  uint32_t edx = 0;     
  uint32_t num_sharing_cache = 0;
  cpuid(&eax, &ebx, &ecx, &edx); 
  num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
  return topo->logical_cores / num_sharing_cache;
 }
 struct cache* get_cache_info(struct cpuInfo* cpu) {
  struct cache* cach = malloc(sizeof(struct cache));
  cach->L1i = malloc(sizeof(struct cach));
--- a/src/cpuid.h
+++ b/src/cpuid.h
@@ -10,9 +10,35 @@
 #define UNKNOWN -1
-struct cpuInfo;
+typedef int32_t VENDOR;
 struct frequency;
 struct cpuInfo {
  bool AVX;
  bool AVX2;
  bool AVX512;
  bool SSE;
  bool SSE2;
  bool SSE3;
  bool SSSE3;
  bool SSE4a;
  bool SSE4_1;
  bool SSE4_2;
  bool FMA3;
  bool FMA4;
  bool AES;
  bool SHA;
  VENDOR cpu_vendor;
  char* cpu_name;
  //  Max cpuids levels
  uint32_t maxLevels;
  // Max cpuids extended levels
  uint32_t maxExtendedLevels;
 };
 struct cach {
  int32_t size;
  uint8_t num_caches;
@@ -40,8 +66,6 @@ struct topology {
  struct cache* cach;
 };
 typedef int32_t VENDOR;
 struct cpuInfo* get_cpu_info();
 VENDOR get_cpu_vendor(struct cpuInfo* cpu);
 uint32_t get_nsockets(struct topology* topo);
--- a/src/main.c
+++ b/src/main.c
@@ -6,7 +6,7 @@
 #include "cpuid.h"
 #include "global.h"
-static const char* VERSION = "0.61";
+static const char* VERSION = "0.62";
 void print_help(char *argv[]) {
  printf("Usage: %s [--version] [--help] [--levels] [--style fancy|retro|legacy] [--color 'R,G,B:R,G,B:R,G,B:R,G,B']\n\