Fix #23. I tried fetching the cache topology in AMD but could not find a proper way, so the code fallback to two commits ago. cpufetch has to guess cache sizes except L3, which can be fetched. Since I have been trying many approaches and stuff, the code needs to be refactored

2026-05-14 21:00:07 +02:00 · 2020-08-30 12:12:25 +02:00
parent 69cc08759a
commit dae0f678ad
5 changed files with 96 additions and 64 deletions
--- a/src/apic.c
+++ b/src/apic.c
@@ -48,22 +48,15 @@ uint32_t create_mask(uint32_t num_entries, uint32_t *mask_width) {
  return (1 << i) -1;
 }

-uint32_t get_apic_id(bool x2apic_id) {
+uint32_t get_apic_id() {
  uint32_t eax = 0;
  uint32_t ebx = 0;
  uint32_t ecx = 0;
  uint32_t edx = 0;
  
-  if(x2apic_id) {
-    eax = 0x0000000B;
-    cpuid(&eax, &ebx, &ecx, &edx);
-    return edx;
-  }
-  else {
-    eax = 0x00000001;
-    cpuid(&eax, &ebx, &ecx, &edx);
-    return (ebx >> 24);
-  }
+  eax = 0x00000001;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  return (ebx >> 24);
 }

 bool bind_to_cpu(int cpu_id) {
@@ -172,6 +165,52 @@ bool fill_topo_masks_x2apic(struct topology** topo) {
  return true;
 }

+uint8_t get_number_llc_amd(struct topology* topo) {
+  uint32_t eax = 0x8000001D;
+  uint32_t ebx = 0;
+  uint32_t ecx = 3; // LLC Level
+  uint32_t edx = 0;     
+  uint32_t num_sharing_cache = 0;
+  
+  cpuid(&eax, &ebx, &ecx, &edx); 
+  
+  num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
+  
+  return topo->logical_cores / num_sharing_cache;
+}
+
+void guess_cach_sizes_amd(struct topology** topo) {
+  (*topo)->cach->L1i->num_caches = (*topo)->physical_cores;
+  (*topo)->cach->L1d->num_caches = (*topo)->physical_cores;
+  (*topo)->cach->L2->num_caches = (*topo)->physical_cores;
+  (*topo)->cach->L3->num_caches = get_number_llc_amd(*topo);
+}
+
+bool get_topology_amd(struct topology** topo) {
+  /*uint32_t eax = 0x8000001E;
+  uint32_t ebx = 0;
+  uint32_t ecx = 0;
+  uint32_t edx = 0;
+  uint32_t err;
+
+  cpuid(&eax, &ebx, &ecx, &edx);
+
+  uint32_t node_id  = ecx & 0xff;
+  uint32_t cpu_core_id = ebx & 0xff;  
+  uint32_t smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
+  uint32_t x86_max_cores = 0;
+  
+  if (smp_num_siblings > 1)
+    x86_max_cores /= smp_num_siblings;*/
+
+  // AMD does not support CPUID 0xB or 0x1F to query topology
+  // err = detect_extended_topology(cpu, topo); 
+  
+  guess_cach_sizes_amd(topo);  
+  
+  return true;
+}
+
 bool arr_contains_value(uint32_t* arr, uint32_t value, uint32_t arr_size) {
  for(uint32_t i=0; i < arr_size; i++) {
    if(arr[i] == value) return true;    
@@ -267,14 +306,17 @@ bool get_cache_topology_from_apic(struct topology** topo) {
  return true;
 }

-bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {    
+bool get_topology_from_apic(struct cpuInfo* cpu, struct topology** topo) { 
+  if(cpu->cpu_vendor == VENDOR_AMD)
+    return get_topology_amd(topo);
+    
  uint32_t apic_id;
  uint32_t* apic_pkg = malloc(sizeof(uint32_t) * (*topo)->total_cores);
  uint32_t* apic_core = malloc(sizeof(uint32_t) * (*topo)->total_cores);
  uint32_t* apic_smt = malloc(sizeof(uint32_t) * (*topo)->total_cores);
  uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores);
  uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores);
-  bool x2apic_id = cpuid_max_levels >= 0x0000000B;
+  bool x2apic_id = cpu->maxLevels >= 0x0000000B;
  
  for(int i=0; i < (*topo)->total_cores; i++) {
    cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * ((*topo)->cach->max_cache_level));
@@ -299,7 +341,7 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {
      printErr("Failed binding to CPU %d", i);
      return false;
    }
-    apic_id = get_apic_id(x2apic_id);
+    apic_id = get_apic_id();
    
    apic_pkg[i] = (apic_id & (*topo)->apic->pkg_mask) >> (*topo)->apic->pkg_mask_shift;
    apic_core[i] = (apic_id & (*topo)->apic->core_mask) >> (*topo)->apic->smt_mask_width;
@@ -331,7 +373,7 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {
  bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
  
  // Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
-  if(!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available;
+  if (!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available;
  
  //TODO: free
  
@@ -348,8 +390,7 @@ uint32_t is_smt_enabled(struct topology* topo) {
      return false;
    }
    id = get_apic_id(false) & 1; // get the last bit
-    printf("0x%.8X %d\n", get_apic_id(false), id);
-    //if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core
+    if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core
  }
  
  return 1;  
--- a/src/apic.h
+++ b/src/apic.h
@@ -14,7 +14,7 @@ struct apic {
  uint32_t* cache_id_apic;
 };

-bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo);
+bool get_topology_from_apic(struct cpuInfo* cpu, struct topology** topo);
 uint32_t is_smt_enabled(struct topology* topo);

 #endif
--- a/src/cpuid.c
+++ b/src/cpuid.c
@@ -37,31 +37,6 @@
 * cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf
 */

-struct cpuInfo {
-  bool AVX;
-  bool AVX2;
-  bool AVX512;
-  bool SSE;
-  bool SSE2;
-  bool SSE3;
-  bool SSSE3;
-  bool SSE4a;
-  bool SSE4_1;
-  bool SSE4_2;
-  bool FMA3;
-  bool FMA4;
-  bool AES;
-  bool SHA;
-
-  VENDOR cpu_vendor;
-  
-  char* cpu_name;
-  //  Max cpuids levels
-  uint32_t maxLevels;
-  // Max cpuids extended levels
-  uint32_t maxExtendedLevels;
-};
-
 struct frequency {
  int64_t base;
  int64_t max;
@@ -287,7 +262,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
  switch(cpu->cpu_vendor) {
    case VENDOR_INTEL:
      if (cpu->maxLevels >= 0x00000004) { 
-        get_topology_from_apic(cpu->maxLevels, &topo);
+        get_topology_from_apic(cpu, &topo);
      }
      else {                
        printErr("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels); 
@@ -297,7 +272,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
        topo->smt_supported = 1;
      }      
      break;
-    case VENDOR_AMD:  
+    case VENDOR_AMD:       
      if (cpu->maxExtendedLevels >= 0x80000008) {
        eax = 0x80000008;  
        cpuid(&eax, &ebx, &ecx, &edx);        
@@ -319,6 +294,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
        topo->logical_cores = 1;
        topo->smt_supported = 1;         
      }
+      
      if (cpu->maxLevels >= 0x0000000B) {
        topo->smt_available = is_smt_enabled(topo);
      }
@@ -331,9 +307,14 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
      if(topo->smt_supported > 1)
        topo->sockets = topo->total_cores / topo->smt_supported / topo->physical_cores; // Idea borrowed from lscpu
      else
-        topo->sockets = topo->total_cores / topo->physical_cores;
+        topo->sockets = topo->total_cores / topo->physical_cores;    
+      
+      if (cpu->maxExtendedLevels >= 0x8000001D) {
+        get_topology_from_apic(cpu, &topo);
+      }
      
      break;
+      
    default:
      printBug("Cant get topology because VENDOR is empty");
      return NULL;
@@ -342,20 +323,6 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
  return topo;
 }

-uint8_t get_number_llc_amd(struct topology* topo) {
-  uint32_t eax = 0x8000001D;
-  uint32_t ebx = 0;
-  uint32_t ecx = 3; // LLC Level
-  uint32_t edx = 0;     
-  uint32_t num_sharing_cache = 0;
-  
-  cpuid(&eax, &ebx, &ecx, &edx); 
-  
-  num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
-  
-  return topo->logical_cores / num_sharing_cache;
-}
-
 struct cache* get_cache_info(struct cpuInfo* cpu) {
  struct cache* cach = malloc(sizeof(struct cache));
  cach->L1i = malloc(sizeof(struct cach));
--- a/src/cpuid.h
+++ b/src/cpuid.h
@@ -10,9 +10,35 @@

 #define UNKNOWN -1

-struct cpuInfo;
+typedef int32_t VENDOR;
+
 struct frequency;

+struct cpuInfo {
+  bool AVX;
+  bool AVX2;
+  bool AVX512;
+  bool SSE;
+  bool SSE2;
+  bool SSE3;
+  bool SSSE3;
+  bool SSE4a;
+  bool SSE4_1;
+  bool SSE4_2;
+  bool FMA3;
+  bool FMA4;
+  bool AES;
+  bool SHA;
+
+  VENDOR cpu_vendor;
+  
+  char* cpu_name;
+  //  Max cpuids levels
+  uint32_t maxLevels;
+  // Max cpuids extended levels
+  uint32_t maxExtendedLevels;
+};
+
 struct cach {
  int32_t size;
  uint8_t num_caches;
@@ -40,8 +66,6 @@ struct topology {
  struct cache* cach;
 };

-typedef int32_t VENDOR;
-
 struct cpuInfo* get_cpu_info();
 VENDOR get_cpu_vendor(struct cpuInfo* cpu);
 uint32_t get_nsockets(struct topology* topo);
--- a/src/main.c
+++ b/src/main.c
@@ -6,7 +6,7 @@
 #include "cpuid.h"
 #include "global.h"

-static const char* VERSION = "0.61";
+static const char* VERSION = "0.62";

 void print_help(char *argv[]) {
  printf("Usage: %s [--version] [--help] [--levels] [--style fancy|retro|legacy] [--color 'R,G,B:R,G,B:R,G,B:R,G,B']\n\