[v1.03] Bump version

[v1.02] Merge Alder Lake into master branch
[v1.02] Added basic support for Zen4
2026-03-25 16:00:39 +01:00 · 2023-01-05 11:17:49 +01:00 · 2023-01-05 11:17:16 +01:00 · 2022-12-03 16:29:53 +00:00 · 2022-12-02 21:25:30 +00:00 · 2022-12-02 20:53:40 +00:00
16 changed files with 627 additions and 213 deletions
--- a/2
+++ b/2
@@ -28,7 +28,7 @@ ifneq ($(OS),Windows_NT)
 		SRC_DIR=src/ppc/
 		SOURCE += $(COMMON_SRC) $(SRC_DIR)ppc.c $(SRC_DIR)uarch.c $(SRC_DIR)udev.c
 		HEADERS += $(COMMON_HDR) $(SRC_DIR)ppc.h $(SRC_DIR)uarch.h  $(SRC_DIR)udev.c
-		CFLAGS += -DARCH_PPC -std=gnu99 -fstack-protector-all
+		CFLAGS += -DARCH_PPC -std=gnu99 -fstack-protector-all -Wno-language-extension-token
 	else ifeq ($(arch), $(filter $(arch), arm aarch64_be aarch64 arm64 armv8b armv8l armv7l armv6l))
 		SRC_DIR=src/arm/
 		SOURCE += $(COMMON_SRC) $(SRC_DIR)midr.c $(SRC_DIR)uarch.c $(SRC_DIR)soc.c $(SRC_DIR)udev.c
--- a/src/arm/midr.c
+++ b/src/arm/midr.c
@@ -81,12 +81,23 @@ int64_t get_peak_performance(struct cpuInfo* cpu) {
  }
  int64_t flops = 0;
  ptr = cpu;
  if(cpu->soc->soc_vendor == SOC_VENDOR_APPLE) {
    // Special case for M1/M2
    // First we find the E cores, then the P
    // M1 have 2 (E cores) or 4 (P cores) FMA units
    // Source: https://dougallj.github.io/applecpu/firestorm-simd.html
    flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 2;
    ptr = ptr->next_cpu;
    flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 4;
  }
  else {
    for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
      flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
    }
    if(cpu->feat->NEON) flops = flops * 4;
  }
  return flops;
 }
@@ -273,11 +284,46 @@ void fill_cpu_info_firestorm_icestorm(struct cpuInfo* cpu, uint32_t pcores, uint
  fire->next_cpu = NULL;
 }
 void fill_cpu_info_avalanche_blizzard(struct cpuInfo* cpu, uint32_t pcores, uint32_t ecores) {
  // 1. Fill BLIZZARD
  struct cpuInfo* bli = cpu;
  bli->midr = MIDR_APPLE_M2_BLIZZARD;
  bli->arch = get_uarch_from_midr(bli->midr, bli);
  bli->cach = get_cache_info(bli);
  bli->feat = get_features_info();
  bli->topo = malloc(sizeof(struct topology));
  bli->topo->cach = bli->cach;
  bli->topo->total_cores = pcores;
  bli->freq = malloc(sizeof(struct frequency));
  bli->freq->base = UNKNOWN_DATA;
  bli->freq->max = 2800;
  bli->hv = malloc(sizeof(struct hypervisor));
  bli->hv->present = false;
  bli->next_cpu = malloc(sizeof(struct cpuInfo));
  // 2. Fill AVALANCHE
  struct cpuInfo* ava = bli->next_cpu;
  ava->midr = MIDR_APPLE_M2_AVALANCHE;
  ava->arch = get_uarch_from_midr(ava->midr, ava);
  ava->cach = get_cache_info(ava);
  ava->feat = get_features_info();
  ava->topo = malloc(sizeof(struct topology));
  ava->topo->cach = ava->cach;
  ava->topo->total_cores = ecores;
  ava->freq = malloc(sizeof(struct frequency));
  ava->freq->base = UNKNOWN_DATA;
  ava->freq->max = 3500;
  ava->hv = malloc(sizeof(struct hypervisor));
  ava->hv->present = false;
  ava->next_cpu = NULL;
 }
 struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) {
  uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
-  // Manually fill the cpuInfo assuming that the CPU
+  // Manually fill the cpuInfo assuming that
-  // is a ARM_FIRESTORM_ICESTORM (Apple M1)
+  // the CPU is an Apple M1/M2
  if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) {
    cpu->num_cpus = 2;
    // Now detect the M1 version
@@ -287,13 +333,20 @@ struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) {
      fill_cpu_info_firestorm_icestorm(cpu, 4, 4);
    }
    else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS || cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) {
-      // Apple M1 Pro/Max. Detect number of cores
+      // Apple M1 Pro/Max/Ultra. Detect number of cores
      uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu");
-      if(physicalcpu < 8 || physicalcpu > 10) {
+      if(physicalcpu == 20) {
-        printBug("Found invalid physicalcpu: 0x%.8X", physicalcpu);
+        // M1 Ultra
        fill_cpu_info_firestorm_icestorm(cpu, 16, 4);
      }
      else if(physicalcpu == 8 || physicalcpu == 10) {
        // M1 Pro/Max
        fill_cpu_info_firestorm_icestorm(cpu, physicalcpu-2, 2);
      }
      else {
        printBug("Found invalid physical cpu number: %d", physicalcpu);
        return NULL;
      }
      fill_cpu_info_firestorm_icestorm(cpu, physicalcpu-2, 2);
    }
    else {
      printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily);
@@ -302,6 +355,13 @@ struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) {
    cpu->soc = get_soc();
    cpu->peak_performance = get_peak_performance(cpu);
  }
  else if(cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD) {
    // Just the "normal" M2 exists for now
    cpu->num_cpus = 2;
    fill_cpu_info_avalanche_blizzard(cpu, 4, 4);
    cpu->soc = get_soc();
    cpu->peak_performance = get_peak_performance(cpu);
  }
  else {
    printBug("Found invalid cpu_family: 0x%.8X", cpu_family);
    return NULL;
--- a/src/arm/soc.c
+++ b/src/arm/soc.c
@@ -111,11 +111,11 @@ bool match_broadcom(char* soc_name, struct system_on_chip* soc) {
 bool match_hisilicon(char* soc_name, struct system_on_chip* soc) {
  char* tmp;
-  if((tmp = strstr(soc_name, "Hi")) == NULL)
+  if((tmp = strstr(soc_name, "hi")) == NULL)
    return false;
  SOC_START
-  SOC_EQ(tmp, "Hi3620GFC",  "K3V2",  SOC_HISILICON_3620, soc, 40)
+  SOC_EQ(tmp, "hi3620GFC",  "K3V2",  SOC_HISILICON_3620, soc, 40)
  //SOC_EQ(tmp, "?",        "K3V2E", SOC_KIRIN, soc,  ?)
  //SOC_EQ(tmp, "?",        "620",   SOC_KIRIN, soc, 28)
  //SOC_EQ(tmp, "?",        "650",   SOC_KIRIN, soc, 16)
@@ -131,18 +131,18 @@ bool match_hisilicon(char* soc_name, struct system_on_chip* soc) {
  //SOC_EQ(tmp, "?",        "9000E", SOC_KIRIN, soc,  5)
  //SOC_EQ(tmp, "?",        "910",   SOC_KIRIN, soc, 28)
  //SOC_EQ(tmp, "?",        "910T",  SOC_KIRIN, soc, 28)
-  SOC_EQ(tmp, "Hi3630",     "920",   SOC_HISILICON_3630, soc, 28)
+  SOC_EQ(tmp, "hi3630",     "920",   SOC_HISILICON_3630, soc, 28)
  //SOC_EQ(tmp, "?",        "925",   SOC_KIRIN, soc, 28)
  //SOC_EQ(tmp, "?",        "930",   SOC_KIRIN, soc, ?)
  //SOC_EQ(tmp, "?",        "935",   SOC_KIRIN, soc, ?)
-  SOC_EQ(tmp, "Hi3650",     "950",   SOC_HISILICON_3650, soc, 16)
+  SOC_EQ(tmp, "hi3650",     "950",   SOC_HISILICON_3650, soc, 16)
  //SOC_EQ(tmp, "?",        "955",   SOC_KIRIN, soc, ?)
-  SOC_EQ(tmp, "Hi3660",     "960",   SOC_HISILICON_3660, soc, 16)
+  SOC_EQ(tmp, "hi3660",     "960",   SOC_HISILICON_3660, soc, 16)
  //SOC_EQ(tmp, "?",        "960S",  SOC_KIRIN, soc, 16)
-  SOC_EQ(tmp, "Hi3670",     "970",   SOC_HISILICON_3670, soc, 10)
+  SOC_EQ(tmp, "hi3670",     "970",   SOC_HISILICON_3670, soc, 10)
-  SOC_EQ(tmp, "Hi3680",     "980",   SOC_HISILICON_3680, soc,  7)
+  SOC_EQ(tmp, "hi3680",     "980",   SOC_HISILICON_3680, soc,  7)
  //SOC_EQ(tmp, "?",        "985",   SOC_KIRIN, soc,  7)
-  SOC_EQ(tmp, "Hi3690",     "990",   SOC_HISILICON_3690, soc,  7)
+  SOC_EQ(tmp, "hi3690",     "990",   SOC_HISILICON_3690, soc,  7)
  SOC_END
 }
@@ -648,7 +648,11 @@ struct system_on_chip* guess_soc_raspbery_pi(struct system_on_chip* soc) {
 #if defined(__APPLE__) || defined(__MACH__)
 struct system_on_chip* guess_soc_apple(struct system_on_chip* soc) {
  uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
  uint32_t cpu_subfamily = get_sys_info_by_name("hw.cpusubfamily");
  if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) {
    // Check M1 version
    if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) {
      fill_soc(soc, "M1", SOC_APPLE_M1, 5);
    }
@@ -656,8 +660,38 @@ struct system_on_chip* guess_soc_apple(struct system_on_chip* soc) {
      fill_soc(soc, "M1 Pro", SOC_APPLE_M1_PRO, 5);
    }
    else if(cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) {
      // Could be M1 Max or M1 Ultra (2x M1 Max)
      uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu");
      if(physicalcpu == 20) {
        fill_soc(soc, "M1 Ultra", SOC_APPLE_M1_ULTRA, 5);
      }
      else if(physicalcpu == 10) {
        fill_soc(soc, "M1 Max", SOC_APPLE_M1_MAX, 5);
      }
      else {
        printBug("Found invalid physical cpu number: %d", physicalcpu);
        soc->soc_vendor = SOC_VENDOR_UNKNOWN;
      }
    }
    else {
      printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily);
      soc->soc_vendor = SOC_VENDOR_UNKNOWN;
    }
  }
  else if(cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD) {
    // Check M2 version
    if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) {
      fill_soc(soc, "M2", SOC_APPLE_M2, 5);
    }
    else {
      printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily);
      soc->soc_vendor = SOC_VENDOR_UNKNOWN;
    }
  }
  else {
    printBug("Found invalid cpu_family: 0x%.8X", cpu_family);
    soc->soc_vendor = SOC_VENDOR_UNKNOWN;
  }
  return soc;
 }
 #endif
--- a/src/arm/socs.h
+++ b/src/arm/socs.h
@@ -256,6 +256,8 @@ enum {
  SOC_APPLE_M1,
  SOC_APPLE_M1_PRO,
  SOC_APPLE_M1_MAX,
  SOC_APPLE_M1_ULTRA,
  SOC_APPLE_M2,
  // ALLWINNER
  SOC_ALLWINNER_A10,
  SOC_ALLWINNER_A13,
@@ -288,7 +290,7 @@ inline static VENDOR get_soc_vendor_from_soc(SOC soc) {
  else if(soc >= SOC_EXYNOS_3475 && soc <= SOC_EXYNOS_880) return SOC_VENDOR_EXYNOS;
  else if(soc >= SOC_MTK_MT6893 && soc <= SOC_MTK_MT8783) return SOC_VENDOR_MEDIATEK;
  else if(soc >= SOC_SNAPD_QSD8650 && soc <= SOC_SNAPD_SM8350) return SOC_VENDOR_SNAPDRAGON;
-  else if(soc >= SOC_APPLE_M1 && soc <= SOC_APPLE_M1_MAX) return SOC_VENDOR_APPLE;
+  else if(soc >= SOC_APPLE_M1 && soc <= SOC_APPLE_M2) return SOC_VENDOR_APPLE;
  else if(soc >= SOC_ALLWINNER_A10 && soc <= SOC_ALLWINNER_R328) return SOC_VENDOR_ALLWINNER;
  return SOC_VENDOR_UNKNOWN;
 }
--- a/src/arm/sysctl.h
+++ b/src/arm/sysctl.h
@@ -4,9 +4,23 @@
 // From Linux kernel: arch/arm64/include/asm/cputype.h
 #define MIDR_APPLE_M1_ICESTORM  0x610F0220
 #define MIDR_APPLE_M1_FIRESTORM 0x610F0230
 // Kernel does not include those, so I just assume that
 // APPLE_CPU_PART_M2_BLIZZARD=0x30,M2_AVALANCHE=0x31
 #define MIDR_APPLE_M2_BLIZZARD  0x610F0300
 #define MIDR_APPLE_M2_AVALANCHE 0x610F0310
 // M1 / A14
 #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
  #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3
 #endif
 // M2 / A15
 #ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD
  #define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D
 #endif
 // For detecting different M1 types
 // NOTE: Could also be achieved detecting different
 // MIDR values (e.g., APPLE_CPU_PART_M1_ICESTORM_PRO)
 #ifndef CPUSUBFAMILY_ARM_HG
  #define CPUSUBFAMILY_ARM_HG 2
 #endif
--- a/src/arm/uarch.c
+++ b/src/arm/uarch.c
@@ -33,6 +33,7 @@ enum {
  ISA_ARMv8_2_A,
  ISA_ARMv8_3_A,
  ISA_ARMv8_4_A,
  ISA_ARMv8_5_A
 };
 enum {
@@ -95,6 +96,8 @@ enum {
  UARCH_THUNDER,    // Apple A13 processor (little cores).
  UARCH_ICESTORM,   // Apple M1 processor (little cores).
  UARCH_FIRESTORM,  // Apple M1 processor (big cores).
  UARCH_BLIZZARD,   // Apple M2 processor (little cores).
  UARCH_AVALANCHE,  // Apple M2 processor (big cores).
  // CAVIUM
  UARCH_THUNDERX,   // Cavium ThunderX
  UARCH_THUNDERX2,  //  Cavium ThunderX2 (originally Broadcom Vulkan).
@@ -155,8 +158,10 @@ static const ISA isas_uarch[] = {
  [UARCH_EXYNOS_M3]    = ISA_ARMv8_A,
  [UARCH_EXYNOS_M4]    = ISA_ARMv8_2_A,
  [UARCH_EXYNOS_M5]    = ISA_ARMv8_2_A,
-  [UARCH_ICESTORM]     = ISA_ARMv8_4_A,
+  [UARCH_ICESTORM]     = ISA_ARMv8_5_A, // https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Support/AArch64TargetParser.def
-  [UARCH_FIRESTORM]    = ISA_ARMv8_4_A,
+  [UARCH_FIRESTORM]    = ISA_ARMv8_5_A,
  [UARCH_BLIZZARD]     = ISA_ARMv8_5_A, // Not confirmed
  [UARCH_AVALANCHE]    = ISA_ARMv8_5_A,
  [UARCH_PJ4]          = ISA_ARMv7_A,
  [UARCH_XIAOMI]       = ISA_ARMv8_A,
 };
@@ -172,7 +177,8 @@ static char* isas_string[] = {
  [ISA_ARMv8_1_A] = "ARMv8.1",
  [ISA_ARMv8_2_A] = "ARMv8.2",
  [ISA_ARMv8_3_A] = "ARMv8.3",
-  [ISA_ARMv8_4_A] = "ARMv8.4"
+  [ISA_ARMv8_4_A] = "ARMv8.4",
  [ISA_ARMv8_5_A] = "ARMv8.5"
 };
 #define UARCH_START if (false) {}
@@ -297,6 +303,8 @@ struct uarch* get_uarch_from_midr(uint32_t midr, struct cpuInfo* cpu) {
  CHECK_UARCH(arch, cpu, 'a', 0x022, NA, NA, "Icestorm",              UARCH_ICESTORM,     CPU_VENDOR_APPLE)
  CHECK_UARCH(arch, cpu, 'a', 0x023, NA, NA, "Firestorm",             UARCH_FIRESTORM,    CPU_VENDOR_APPLE)
  CHECK_UARCH(arch, cpu, 'a', 0x030, NA, NA, "Blizzard",              UARCH_BLIZZARD,     CPU_VENDOR_APPLE)
  CHECK_UARCH(arch, cpu, 'a', 0x031, NA, NA, "Avalanche",             UARCH_AVALANCHE,    CPU_VENDOR_APPLE)
  CHECK_UARCH(arch, cpu, 'V', 0x581, NA, NA, "PJ4",                   UARCH_PJ4,          CPU_VENDOR_MARVELL)
  CHECK_UARCH(arch, cpu, 'V', 0x584, NA, NA, "PJ4B-MP",               UARCH_PJ4,          CPU_VENDOR_MARVELL)
--- a/src/common/cpu.h
+++ b/src/common/cpu.h
@@ -35,6 +35,12 @@ enum {
  HV_VENDOR_INVALID
 };
 enum {
  CORE_TYPE_EFFICIENCY,
  CORE_TYPE_PERFORMANCE,
  CORE_TYPE_UNKNOWN
 };
 #define UNKNOWN_DATA -1
 #define CPU_NAME_MAX_LENGTH 64
@@ -78,6 +84,7 @@ struct topology {
  uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled)
 #ifdef ARCH_X86
  uint32_t smt_available; // Number of SMT that is currently enabled
  int32_t total_cores_module; // Total cores in the current module (only makes sense in hybrid archs, like ADL)
  struct apic* apic;
 #endif
 #endif
@@ -131,6 +138,10 @@ struct cpuInfo {
  uint32_t maxExtendedLevels;
  // Topology Extensions (AMD only)
  bool topology_extensions;
  // Hybrid Flag (Intel only)
  bool hybrid_flag;
  // Core Type (P/E)
  uint32_t core_type;
 #elif ARCH_PPC
  uint32_t pvr;
 #elif ARCH_ARM
@@ -140,11 +151,18 @@ struct cpuInfo {
 #ifdef ARCH_ARM
  struct system_on_chip* soc;
 #endif
 #if defined(ARCH_X86) || defined(ARCH_ARM)
  // If SoC contains more than one CPU and they
  // are different, the others will be stored in
  // the next_cpu field
  struct cpuInfo* next_cpu;
  uint8_t num_cpus;
 #ifdef ARCH_X86
  // The index of the first core in the module
  uint32_t first_core_id;
 #endif
 #endif
 };
--- a/src/common/main.c
+++ b/src/common/main.c
@@ -33,7 +33,7 @@
  static const char* OS_STR = "Unknown OS";
 #endif
-static const char* VERSION = "1.02";
+static const char* VERSION = "1.03";
 void print_help(char *argv[]) {
  const char **t = args_str;
--- a/src/common/printer.c
+++ b/src/common/printer.c
@@ -44,6 +44,8 @@ enum {
  ATTRIBUTE_NAME,
 #elif ARCH_ARM
  ATTRIBUTE_SOC,
 #endif
 #if defined(ARCH_X86) || defined(ARCH_ARM)
  ATTRIBUTE_CPU_NUM,
 #endif
  ATTRIBUTE_HYPERVISOR,
@@ -75,6 +77,8 @@ static const char* ATTRIBUTE_FIELDS [] = {
  "Part Number:",
 #elif ARCH_ARM
  "SoC:",
 #endif
 #if defined(ARCH_X86) || defined(ARCH_ARM)
  "",
 #endif
  "Hypervisor:",
@@ -106,6 +110,8 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
  "P/N:",
 #elif ARCH_ARM
  "SoC:",
 #endif
 #if defined(ARCH_X86) || defined(ARCH_ARM)
  "",
 #endif
  "Hypervisor:",
@@ -424,11 +430,12 @@ uint32_t longest_field_length(struct ascii* art, int la) {
 }
 #if defined(ARCH_X86) || defined(ARCH_PPC)
-void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const char** attribute_fields) {
+void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const char** attribute_fields, bool hybrid_architecture) {
  struct ascii_logo* logo = art->art;
  int attr_to_print = 0;
  int attr_type;
  char* attr_value;
  int32_t beg_space;
  int32_t space_right;
  int32_t space_up = ((int)logo->height - (int)art->n_attributes_set)/2;
  int32_t space_down = (int)logo->height - (int)art->n_attributes_set - (int)space_up;
@@ -439,6 +446,7 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const ch
  lbuf->buf = emalloc(sizeof(char) * LINE_BUFFER_SIZE);
  lbuf->pos = 0;
  lbuf->chars = 0;
  bool add_space = false;
  printf("\n");
  for(int32_t n=0; n < iters; n++) {
@@ -473,9 +481,24 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const ch
      attr_value = art->attributes[attr_to_print]->value;
      attr_to_print++;
-      space_right = 1 + (la - strlen(attribute_fields[attr_type]));
+      if(attr_type == ATTRIBUTE_L3) {
-      printOut(lbuf, strlen(attribute_fields[attr_type]) + space_right + strlen(attr_value),
+        add_space = false;
-               "%s%s%s%*s%s%s%s", logo->color_text[0], attribute_fields[attr_type], art->reset, space_right, "", logo->color_text[1], attr_value, art->reset);
+      }
      if(attr_type == ATTRIBUTE_CPU_NUM) {
        printOut(lbuf, strlen(attr_value), "%s%s%s", logo->color_text[0], attr_value, art->reset);
        add_space = true;
      }
      else {
        beg_space = 0;
        space_right = 2 + 1 + (la - strlen(attribute_fields[attr_type]));
        if(hybrid_architecture && add_space) {
          beg_space = 2;
          space_right -= 2;
        }
        printOut(lbuf, beg_space + strlen(attribute_fields[attr_type]) + space_right + strlen(attr_value),
                 "%*s%s%s%s%*s%s%s%s", beg_space, "", logo->color_text[0], attribute_fields[attr_type], art->reset, space_right, "", logo->color_text[1], attr_value, art->reset);
      }
    }
    printOutLine(lbuf, art, termw);
    printf("\n");
@@ -501,43 +524,56 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
  art->new_intel_logo = choose_new_intel_logo(cpu);
  // Step 1. Retrieve attributes (if some structures are NULL, like topo
  //         or cache, do not try to retrieve them)
  uint32_t socket_num = 1;
  char* l1i, *l1d, *l2, *l3, *n_cores, *n_cores_dual, *sockets;
  l1i = l1d = l2 = l3 = n_cores = n_cores_dual = sockets = NULL;
  char* uarch = get_str_uarch(cpu);
  char* manufacturing_process = get_str_process(cpu);
  char* max_frequency = get_str_freq(cpu->freq);
  char* cpu_name = get_str_cpu_name(cpu, fcpuname);
-  char* avx = get_str_avx(cpu);
+  char* uarch = get_str_uarch(cpu);
  char* fma = get_str_fma(cpu);
  char* pp = get_str_peak_performance(cpu->peak_performance);
-
+  char* manufacturing_process = get_str_process(cpu);
-  if(cpu->topo != NULL) {
+  bool hybrid_architecture = cpu->next_cpu != NULL;
    sockets = get_str_sockets(cpu->topo);
    n_cores = get_str_topology(cpu, cpu->topo, false);
    n_cores_dual = get_str_topology(cpu, cpu->topo, true);
  }
  if(cpu->cach != NULL) {
    l1i = get_str_l1i(cpu->cach);
    l1d = get_str_l1d(cpu->cach);
    l2 = get_str_l2(cpu->cach);
    l3 = get_str_l3(cpu->cach);
  }
  // Step 2. Set attributes
  setAttribute(art, ATTRIBUTE_NAME, cpu_name);
  if(cpu->hv->present) {
    setAttribute(art, ATTRIBUTE_HYPERVISOR, cpu->hv->hv_name);
  }
  setAttribute(art, ATTRIBUTE_UARCH, uarch);
  setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
  struct cpuInfo* ptr = cpu;
  for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
    char* max_frequency = get_str_freq(ptr->freq);
    char* avx = get_str_avx(ptr);
    char* fma = get_str_fma(ptr);
    char* cpu_num = emalloc(sizeof(char) * 9);
    if(ptr->topo != NULL) {
      sockets = get_str_sockets(ptr->topo);
      n_cores = get_str_topology(ptr, ptr->topo, false);
      n_cores_dual = get_str_topology(ptr, ptr->topo, true);
    }
    if(ptr->cach != NULL) {
      l1i = get_str_l1i(ptr->cach);
      l1d = get_str_l1d(ptr->cach);
      l2 = get_str_l2(ptr->cach);
    }
    if(hybrid_architecture) {
      if(ptr->core_type == CORE_TYPE_EFFICIENCY) sprintf(cpu_num, "E-cores:");
      else if(ptr->core_type == CORE_TYPE_PERFORMANCE) sprintf(cpu_num, "P-cores:");
      else printBug("Found invalid core type!\n");
      setAttribute(art, ATTRIBUTE_CPU_NUM, cpu_num);
    }
    setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
-  if(cpu->topo != NULL) {
+    if(ptr->topo != NULL) {
-    socket_num = get_nsockets(cpu->topo);
+      socket_num = get_nsockets(ptr->topo);
      if (socket_num > 1) {
        setAttribute(art, ATTRIBUTE_SOCKETS, sockets);
        setAttribute(art, ATTRIBUTE_NCORES, n_cores);
@@ -552,6 +588,7 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
    if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
    if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
    if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
  }
  if(l3 != NULL) setAttribute(art, ATTRIBUTE_L3, l3);
  setAttribute(art, ATTRIBUTE_PEAK, pp);
@@ -568,15 +605,12 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
    longest_attribute = longest_attribute_length(art, attribute_fields);
  }
-  print_ascii_generic(art, longest_attribute, term->w, attribute_fields);
+  print_ascii_generic(art, longest_attribute, term->w, attribute_fields, hybrid_architecture);
  free(manufacturing_process);
  free(max_frequency);
  free(sockets);
  free(n_cores);
  free(n_cores_dual);
  free(avx);
  free(fma);
  free(l1i);
  free(l1d);
  free(l2);
--- a/src/common/udev.c
+++ b/src/common/udev.c
@@ -165,48 +165,85 @@ long get_l3_cache_size(uint32_t core) {
  return get_cache_size_from_file(path);
 }
 void add_shared_map(uint32_t** src, int src_idx, uint32_t** dst, int dst_idx, int n) {
  for(int j=0; j < n; j++) {
    dst[dst_idx][j] = src[src_idx][j];
  }
 }
 bool maps_equal(uint32_t* map1, uint32_t* map2, int n) {
  for(int i=0; i < n; i++) {
    if(map1[i] != map2[i]) return false;
  }
  return true;
 }
 int get_num_caches_from_files(char** paths, int num_paths) {
  int SHARED_MAP_MAX_LEN = 8 + 1;
  int filelen;
  char* buf;
-  uint32_t* shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
+  char* tmpbuf;
-  // 1. Read cpu_shared_map from every core
+  // 1. Count the number of bitmasks per file
  if((buf = read_file(paths[0], &filelen)) == NULL) {
    printWarn("Could not open '%s'", paths[0]);
    return -1;
  }
  int num_bitmasks = 1;
  for(int i=0; buf[i]; i++) {
    num_bitmasks += (buf[i] == ',');
  }
  // 2. Read cpu_shared_map from every core
  uint32_t** shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
  for(int i=0; i < num_paths; i++) {
    shared_maps[i] = emalloc(sizeof(uint32_t) * num_bitmasks);
    if((buf = read_file(paths[i], &filelen)) == NULL) {
      printWarn("Could not open '%s'", paths[i]);
      return -1;
    }
-    if(filelen > SHARED_MAP_MAX_LEN) {
+    for(int j=0; j < num_bitmasks; j++) {
      printBug("Shared map length is %d while the max is be %d", filelen, SHARED_MAP_MAX_LEN);
      return -1;
    }
      char* end;
      tmpbuf = emalloc(sizeof(char) * (strlen(buf) + 1));
      char* commaend = strstr(buf, ",");
      if(commaend == NULL) {
        strcpy(tmpbuf, buf);
      }
      else {
        strncpy(tmpbuf, buf, commaend-buf);
      }
      errno = 0;
-    long ret = strtol(buf, &end, 16);
+      long ret = strtol(tmpbuf, &end, 16);
      if(errno != 0) {
-      printBug("strtol: %s", strerror(errno));
+        printf("strtol: %s", strerror(errno));
        free(buf);
        return -1;
      }
-    shared_maps[i] = (uint32_t) ret;
+      shared_maps[i][j] = (uint32_t) ret;
      buf = commaend + 1;
      free(tmpbuf);
    }
  }
  // 2. Count number of different masks; this is the number of caches
  int num_caches = 0;
  bool found = false;
-  uint32_t* unique_shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
+  uint32_t** unique_shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
-  for(int i=0; i < num_paths; i++) unique_shared_maps[i] = 0;
+  for(int i=0; i < num_paths; i++) {
    unique_shared_maps[i] = emalloc(sizeof(uint32_t) * num_bitmasks);
    for(int j=0; j < num_bitmasks; j++) {
      unique_shared_maps[i][j] = 0;
    }
  }
  for(int i=0; i < num_paths; i++) {
    for(int j=0; j < num_paths && !found; j++) {
-      if(shared_maps[i] == unique_shared_maps[j]) found = true;
+      if(maps_equal(shared_maps[i], unique_shared_maps[j], num_bitmasks)) found = true;
    }
    if(!found) {
-      unique_shared_maps[num_caches] = shared_maps[i];
+      add_shared_map(shared_maps, i, unique_shared_maps, num_caches, num_bitmasks);
      num_caches++;
    }
    found = false;
--- a/src/ppc/ppc.h
+++ b/src/ppc/ppc.h
@@ -1,5 +1,5 @@
-#ifndef __POWERPC__
+#ifndef __CPUFETCH_POWERPC__
-#define __POWERPC__
+#define __CPUFETCH_POWERPC__
 #include "../common/cpu.h"
--- a/src/x86/apic.c
+++ b/src/x86/apic.c
@@ -102,6 +102,59 @@ bool bind_to_cpu(int cpu_id) {
 }
 #endif
 int get_total_cores_module(int total_cores, int module) {
  int total_modules = 2;
  int32_t current_module_idx = -1;
  bool end = false;
  int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules);
  for(int i=0; i < total_modules; i++) core_types[i] = -1;
  int cores_in_module = 0;
  int i = 0;
  // Get the original mask to restore it later
  cpu_set_t original_mask;
  if(sched_getaffinity(0, sizeof(original_mask), &original_mask) == -1) {
    printWarn("sched_getaffinity: %s", strerror(errno));
    return false;
  }
  while(!end) {
    if(!bind_to_cpu(i)) {
      return -1;
    }
    uint32_t eax = 0x0000001A;
    uint32_t ebx = 0;
    uint32_t ecx = 0;
    uint32_t edx = 0;
    cpuid(&eax, &ebx, &ecx, &edx);
    int32_t core_type = eax >> 24 & 0xFF;
    bool found = false;
    for(int j=0; j < total_modules && !found; j++) {
      if(core_types[j] == core_type) found = true;
    }
    if(!found) {
      current_module_idx++;
      core_types[current_module_idx] = core_type;
    }
    if(current_module_idx == module) {
      cores_in_module++;
      if(i+1 == total_cores) end = true;
    }
    else if(cores_in_module > 0) end = true;
    i++;
  }
  // Reset the original affinity
  if (sched_setaffinity (0, sizeof(original_mask), &original_mask) == -1) {
    printWarn("sched_setaffinity: %s", strerror(errno));
    return false;
  }
  //printf("Module %d has %d cores\n", module, cores_in_module);
  return cores_in_module;
 }
 bool fill_topo_masks_apic(struct topology* topo) {
  uint32_t eax = 0x00000001;
  uint32_t ebx = 0;
@@ -197,14 +250,14 @@ uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) {
  uint32_t max = 0;
  for(int i=0; i < topo->cach->max_cache_level; i++) {
-    for(int j=0; j < topo->total_cores; j++) {
+    for(int j=0; j < topo->total_cores_module; j++) {
      if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i];
    }
  }
  max++;
-  if(max > (uint32_t) topo->total_cores) return max;
+  if(max > (uint32_t) topo->total_cores_module) return max;
-  return topo->total_cores;
+  return topo->total_cores_module;
 }
 bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) {
@@ -219,18 +272,18 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac
  memset(apic_id, 0, sizeof(uint32_t) * size);
  // System topology
-  for(int i=0; i < topo->total_cores; i++) {
+  for(int i=0; i < topo->total_cores_module; i++) {
    sockets[apic_pkg[i]] = 1;
    smt[apic_smt[i]] = 1;
  }
-  for(int i=0; i < topo->total_cores; i++) {
+  for(int i=0; i < topo->total_cores_module; i++) {
    if(sockets[i] != 0)
      topo->sockets++;
    if(smt[i] != 0)
      topo->smt_available++;
  }
-  topo->logical_cores = topo->total_cores / topo->sockets;
+  topo->logical_cores = topo->total_cores_module / topo->sockets;
  topo->physical_cores = topo->logical_cores / topo->smt_available;
  // Cache topology
@@ -238,7 +291,7 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac
    num_caches = 0;
    memset(apic_id, 0, sizeof(uint32_t) * size);
-    for(int c=0; c < topo->total_cores; c++) {
+    for(int c=0; c < topo->total_cores_module; c++) {
      apic_id[cache_id_apic[c][i]]++;
    }
    for(uint32_t c=0; c < size; c++) {
@@ -297,7 +350,7 @@ void add_apic_to_array(uint32_t apic, uint32_t* apic_ids, int n) {
  }
 }
-bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
+bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) {
 #ifdef __APPLE__
  // macOS extremely dirty approach...
  printf("cpufetch is computing APIC IDs, please wait...\n");
@@ -322,12 +375,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
  }
  #endif
-  for(int i=0; i < n; i++) {
+  for(int i=first_core; i < first_core+n; i++) {
    if(!bind_to_cpu(i)) {
      printErr("Failed binding the process to CPU %d", i);
      return false;
    }
-    apic_ids[i] = get_apic_id(x2apic_id);
+    apic_ids[i-first_core] = get_apic_id(x2apic_id);
  }
  #ifdef __linux__
@@ -344,12 +397,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
 bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
  uint32_t apic_id;
-  uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module);
-  uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module);
-  uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores_module);
-  uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores_module);
-  uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores);
+  uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module);
-  uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores);
+  uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module);
  bool x2apic_id;
  if(cpu->maxLevels >= 0x0000000B) {
@@ -367,7 +420,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
    x2apic_id = false;
  }
-  for(int i=0; i < topo->total_cores; i++) {
+  for(int i=0; i < topo->total_cores_module; i++) {
    cache_smt_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
    cache_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
  }
@@ -385,10 +438,10 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
  get_cache_topology_from_apic(topo);
-  if(!fill_apic_ids(apic_ids, topo->total_cores, x2apic_id))
+  if(!fill_apic_ids(apic_ids, cpu->first_core_id, topo->total_cores_module, x2apic_id))
    return false;
-  for(int i=0; i < topo->total_cores; i++) {
+  for(int i=0; i < topo->total_cores_module; i++) {
    apic_id = apic_ids[i];
    apic_pkg[i] = (apic_id & topo->apic->pkg_mask) >> topo->apic->pkg_mask_shift;
@@ -404,20 +457,19 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
  /* DEBUG
  for(int i=0; i < topo->cach->max_cache_level; i++) {
    printf("[CACH %1d]", i);
-    for(int j=0; j < topo->total_cores; j++)
+    for(int j=0; j < topo->total_cores_module; j++)
      printf("[%03d]", cache_id_apic[j][i]);
    printf("\n");
  }
-  for(int i=0; i < topo->total_cores; i++)
+  for(int i=0; i < topo->total_cores_module; i++)
    printf("[%2d] 0x%.8X\n", i, apic_pkg[i]);
  printf("\n");
-  for(int i=0; i < topo->total_cores; i++)
+  for(int i=0; i < topo->total_cores_module; i++)
    printf("[%2d] 0x%.8X\n", i, apic_core[i]);
  printf("\n");
-  for(int i=0; i < topo->total_cores; i++)
+  for(int i=0; i < topo->total_cores_module; i++)
    printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/
  bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
  // Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
@@ -429,7 +481,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
  free(apic_pkg);
  free(apic_core);
  free(apic_smt);
-  for(int i=0; i < topo->total_cores; i++) {
+  for(int i=0; i < topo->total_cores_module; i++) {
    free(cache_smt_id_apic[i]);
    free(cache_id_apic[i]);
  }
--- a/src/x86/apic.h
+++ b/src/x86/apic.h
@@ -21,4 +21,6 @@ uint32_t is_smt_enabled_amd(struct topology* topo);
 bool bind_to_cpu(int cpu_id);
 #endif
 int get_total_cores_module(int total_cores, int module);
 #endif
--- a/src/x86/cpuid.c
+++ b/src/x86/cpuid.c
@@ -179,7 +179,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
  return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping);
 }
-int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t max_freq, bool accurate_pp) {
+int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
  /*
   * PP = PeakPerformance
   * SP = SinglePrecision
@@ -192,25 +192,32 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
   * 16(If AVX512), 8(If AVX), 4(If SSE) *
   */
  struct cpuInfo* ptr = cpu;
  int64_t total_flops = 0;
  for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
    struct topology* topo = ptr->topo;
    int64_t max_freq = get_freq(ptr->freq);
    int64_t freq;
-#ifdef __linux__
+  #ifdef __linux__
    if(accurate_pp)
-    freq = measure_frequency(cpu);
+      freq = measure_frequency(ptr);
    else
      freq = max_freq;
-#else
+  #else
    // Silence compiler warning
    (void)(accurate_pp);
    freq = max_freq;
-#endif
+  #endif
    //First, check we have consistent data
    if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
      return -1;
    }
-  struct features* feat = cpu->feat;
+    struct features* feat = ptr->feat;
-  int vpus = get_number_of_vpus(cpu);
+    int vpus = get_number_of_vpus(ptr);
    int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
    if(feat->FMA3 || feat->FMA4)
@@ -219,7 +226,7 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
    // Ice Lake has AVX512, but it has 1 VPU for AVX512, while
    // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
    // the peak performance supposing AVX2, not AVX512
-  if(feat->AVX512 && vpus_are_AVX512(cpu))
+    if(feat->AVX512 && vpus_are_AVX512(ptr))
      flops = flops*16;
    else if(feat->AVX || feat->AVX2)
      flops = flops*8;
@@ -228,10 +235,13 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
    // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
    // throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
-  if(is_knights_landing(cpu))
+    if(is_knights_landing(ptr))
      flops = flops * 6 / 7;
-  return flops;
+    total_flops += flops;
  }
  return total_flops;
 }
 struct hypervisor* get_hp_info(bool hv_present) {
@@ -274,51 +284,19 @@ struct hypervisor* get_hp_info(bool hv_present) {
  return hv;
 }
-struct cpuInfo* get_cpu_info() {
+struct features* get_features_info(struct cpuInfo* cpu) {
  struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
  struct features* feat = emalloc(sizeof(struct features));
  cpu->feat = feat;
  cpu->peak_performance = -1;
  cpu->topo = NULL;
  cpu->cach = NULL;
  bool *ptr = &(feat->AES);
  for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
    *ptr = false;
  }
  uint32_t eax = 0;
  uint32_t ebx = 0;
  uint32_t ecx = 0;
  uint32_t edx = 0;
-  //Get max cpuid level
+  struct features* feat = emalloc(sizeof(struct features));
  cpuid(&eax, &ebx, &ecx, &edx);
  cpu->maxLevels = eax;
-  //Fill vendor
+  bool *ptr = &(feat->AES);
-  char name[13];
+  for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
-  memset(name,0,13);
+    *ptr = false;
  get_name_cpuid(name, ebx, edx, ecx);
  if(strcmp(CPU_VENDOR_INTEL_STRING,name) == 0)
    cpu->cpu_vendor = CPU_VENDOR_INTEL;
  else if (strcmp(CPU_VENDOR_AMD_STRING,name) == 0)
    cpu->cpu_vendor = CPU_VENDOR_AMD;
  else {
    cpu->cpu_vendor = CPU_VENDOR_INVALID;
    printErr("Unknown CPU vendor: %s", name);
    return NULL;
  }
  //Get max extended level
  eax = 0x80000000;
  ebx = 0;
  ecx = 0;
  edx = 0;
  cpuid(&eax, &ebx, &ecx, &edx);
  cpu->maxExtendedLevels = eax;
  //Fill instructions support
  if (cpu->maxLevels >= 0x00000001){
    eax = 0x00000001;
@@ -373,6 +351,116 @@ struct cpuInfo* get_cpu_info() {
    printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels);
  }
  return feat;
 }
 bool set_cpu_module(int m, int total_modules, int32_t* first_core) {
  if(total_modules > 1) {
    // We have a hybrid architecture.
    // 1. Find the first core from module m
    int32_t core_id = -1;
    int32_t currrent_module_idx = -1;
    int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules);
    for(int i=0; i < total_modules; i++) core_types[i] = -1;
    int i = 0;
    while(core_id == -1) {
      if(!bind_to_cpu(i)) {
        return false;
      }
      uint32_t eax = 0x0000001A;
      uint32_t ebx = 0;
      uint32_t ecx = 0;
      uint32_t edx = 0;
      cpuid(&eax, &ebx, &ecx, &edx);
      int32_t core_type = eax >> 24 & 0xFF;
      bool found = false;
      for(int j=0; j < total_modules && !found; j++) {
        if(core_types[j] == core_type) found = true;
      }
      if(!found) {
        currrent_module_idx++;
        core_types[currrent_module_idx] = core_type;
        if(currrent_module_idx == m) {
          core_id = i;
        }
      }
      i++;
    }
    *first_core = core_id;
    //printf("Module %d: Core %d\n", m, core_id);
    // 2. Now bind to that core
    if(!bind_to_cpu(core_id)) {
      return false;
    }
  }
  return true;
 }
 int32_t get_core_type() {
  uint32_t eax = 0x0000001A;
  uint32_t ebx = 0;
  uint32_t ecx = 0;
  uint32_t edx = 0;
  eax = 0x0000001A;
  cpuid(&eax, &ebx, &ecx, &edx);
  int32_t type = eax >> 24 & 0xFF;
  if(type == 0x20) return CORE_TYPE_EFFICIENCY;
  else if(type == 0x40) return CORE_TYPE_PERFORMANCE;
  else {
    printErr("Found invalid core type: 0x%.8X\n", type);
    return CORE_TYPE_UNKNOWN;
  }
 }
 struct cpuInfo* get_cpu_info() {
  struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
  cpu->peak_performance = -1;
  cpu->next_cpu = NULL;
  cpu->topo = NULL;
  cpu->cach = NULL;
  cpu->feat = NULL;
  uint32_t modules = 1;
  uint32_t eax = 0;
  uint32_t ebx = 0;
  uint32_t ecx = 0;
  uint32_t edx = 0;
  //Get max cpuid level
  cpuid(&eax, &ebx, &ecx, &edx);
  cpu->maxLevels = eax;
  //Fill vendor
  char name[13];
  memset(name,0,13);
  get_name_cpuid(name, ebx, edx, ecx);
  if(strcmp(CPU_VENDOR_INTEL_STRING,name) == 0)
    cpu->cpu_vendor = CPU_VENDOR_INTEL;
  else if (strcmp(CPU_VENDOR_AMD_STRING,name) == 0)
    cpu->cpu_vendor = CPU_VENDOR_AMD;
  else {
    cpu->cpu_vendor = CPU_VENDOR_INVALID;
    printErr("Unknown CPU vendor: %s", name);
    return NULL;
  }
  //Get max extended level
  eax = 0x80000000;
  ebx = 0;
  ecx = 0;
  edx = 0;
  cpuid(&eax, &ebx, &ecx, &edx);
  cpu->maxExtendedLevels = eax;
  if (cpu->maxExtendedLevels >= 0x80000004){
    cpu->cpu_name = get_str_cpu_name_internal();
  }
@@ -389,19 +477,66 @@ struct cpuInfo* get_cpu_info() {
    cpu->topology_extensions = (ecx >> 22) & 1;
  }
  cpu->hybrid_flag = false;
  if(cpu->cpu_vendor == CPU_VENDOR_INTEL && cpu->maxLevels >= 0x00000007) {
    eax = 0x00000007;
    ecx = 0x00000000;
    cpuid(&eax, &ebx, &ecx, &edx);
    cpu->hybrid_flag = (edx >> 15) & 0x1;
  }
  if(cpu->hybrid_flag) modules = 2;
  struct cpuInfo* ptr = cpu;
  for(uint32_t i=0; i < modules; i++) {
    int32_t first_core;
    set_cpu_module(i, modules, &first_core);
    if(i > 0) {
      ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
      ptr = ptr->next_cpu;
      ptr->next_cpu = NULL;
      ptr->peak_performance = -1;
      ptr->topo = NULL;
      ptr->cach = NULL;
      ptr->feat = NULL;
      // We assume that this cores have the
      // same cpuid capabilities
      ptr->cpu_vendor = cpu->cpu_vendor;
      ptr->maxLevels = cpu->maxLevels;
      ptr->maxExtendedLevels = cpu->maxExtendedLevels;
      ptr->hybrid_flag = cpu->hybrid_flag;
    }
    if(cpu->hybrid_flag) {
      // Detect core type
      eax = 0x0000001A;
      cpuid(&eax, &ebx, &ecx, &edx);
      ptr->core_type = get_core_type();
    }
    ptr->first_core_id = first_core;
    ptr->feat = get_features_info(ptr);
    // If any field of the struct is NULL,
    // return inmideately, as further functions
    // require valid fields (cach, topo, etc)
-  cpu->arch = get_cpu_uarch(cpu);
+    ptr->arch = get_cpu_uarch(ptr);
-  cpu->freq = get_frequency_info(cpu);
+    ptr->freq = get_frequency_info(ptr);
-  cpu->cach = get_cache_info(cpu);
+    ptr->cach = get_cache_info(ptr);
-  if(cpu->cach == NULL) return cpu;
+    if(ptr->cach == NULL) return cpu;
-  cpu->topo = get_topology_info(cpu, cpu->cach);
+    if(cpu->hybrid_flag) {
      ptr->topo = get_topology_info(ptr, ptr->cach, i);
    }
    else {
      ptr->topo = get_topology_info(ptr, ptr->cach, -1);
    }
    if(cpu->topo == NULL) return cpu;
  }
-  cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp());
+  cpu->num_cpus = modules;
  cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
  return cpu;
 }
@@ -492,7 +627,7 @@ void get_topology_from_udev(struct topology* topo) {
 // Main reference: https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html
 // Very interesting resource: https://wiki.osdev.org/Detecting_CPU_Topology_(80x86)
-struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
+struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module) {
  struct topology* topo = emalloc(sizeof(struct topology));
  init_topology_struct(topo, cach);
@@ -516,6 +651,13 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
    }
  #endif
  if(cpu->hybrid_flag) {
    topo->total_cores_module = get_total_cores_module(topo->total_cores, module);
  }
  else {
    topo->total_cores_module = topo->total_cores;
  }
  switch(cpu->cpu_vendor) {
    case CPU_VENDOR_INTEL:
      if (cpu->maxLevels >= 0x00000004) {
@@ -919,6 +1061,9 @@ void print_debug(struct cpuInfo* cpu) {
  if(cpu->cpu_vendor == CPU_VENDOR_AMD) {
    printf("- AMD topology extensions: %d\n", cpu->topology_extensions);
  }
  if(cpu->cpu_vendor == CPU_VENDOR_INTEL) {
    printf("- Hybrid Flag: %d\n", cpu->hybrid_flag);
  }
  printf("- CPUID dump: 0x%.8X\n", eax);
  free_cpuinfo_struct(cpu);
--- a/src/x86/cpuid.h
+++ b/src/x86/cpuid.h
@@ -6,7 +6,7 @@
 struct cpuInfo* get_cpu_info();
 struct cache* get_cache_info(struct cpuInfo* cpu);
 struct frequency* get_frequency_info(struct cpuInfo* cpu);
-struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach);
+struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module);
 char* get_str_avx(struct cpuInfo* cpu);
 char* get_str_sse(struct cpuInfo* cpu);
--- a/src/x86/uarch.c
+++ b/src/x86/uarch.c
@@ -79,7 +79,7 @@ enum {
  UARCH_GOLDMONT_PLUS,
  UARCH_TREMONT,
  UARCH_LAKEMONT,
-  UARCH_COFFE_LAKE,
+  UARCH_COFFEE_LAKE,
  UARCH_ITANIUM,
  UARCH_KNIGHTS_FERRY,
  UARCH_KNIGHTS_CORNER,
@@ -109,7 +109,9 @@ enum {
  UARCH_ZEN,
  UARCH_ZEN_PLUS,
  UARCH_ZEN2,
-  UARCH_ZEN3
+  UARCH_ZEN3,
  UARCH_ZEN3_PLUS,
  UARCH_ZEN4
 };
 struct uarch {
@@ -225,7 +227,7 @@ struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, u
  CHECK_UARCH(arch, 0,  6,  8, 12, NA, "Tiger Lake",      UARCH_TIGER_LAKE,       10) // instlatx64
  CHECK_UARCH(arch, 0,  6,  8, 13, NA, "Tiger Lake",      UARCH_TIGER_LAKE,       10) // instlatx64
  // CHECK_UARCH(arch, 0,  6,  8, 14,  9, ...) It is not possible to determine uarch only from CPUID dump (can be Kaby Lake or Amber Lake)
-  CHECK_UARCH(arch, 0,  6,  8, 14, 10, "Kaby Lake",       UARCH_KABY_LAKE,        14) // wikichip
+  CHECK_UARCH(arch, 0,  6,  8, 14, 10, "Coffee Lake",     UARCH_COFFEE_LAKE,      14) // wikichip
  CHECK_UARCH(arch, 0,  6,  8, 14, 11, "Whiskey Lake",    UARCH_WHISKEY_LAKE,     14) // wikichip
  CHECK_UARCH(arch, 0,  6,  8, 14, 12, "Comet Lake",      UARCH_COMET_LAKE,       14) // wikichip
  CHECK_UARCH(arch, 0,  6,  9,  6, NA, "Tremont",         UARCH_TREMONT,          10) // LX*
@@ -234,10 +236,10 @@ struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, u
  CHECK_UARCH(arch, 0,  6,  9, 12, NA, "Tremont",         UARCH_TREMONT,          10) // LX*
  CHECK_UARCH(arch, 0,  6,  9, 13, NA, "Sunny Cove",      UARCH_SUNNY_COVE,       10) // LX*
  CHECK_UARCH(arch, 0,  6,  9, 14,  9, "Kaby Lake",       UARCH_KABY_LAKE,        14)
-  CHECK_UARCH(arch, 0,  6,  9, 14, 10, "Coffee Lake",     UARCH_COFFE_LAKE,       14)
+  CHECK_UARCH(arch, 0,  6,  9, 14, 10, "Coffee Lake",     UARCH_COFFEE_LAKE,      14)
-  CHECK_UARCH(arch, 0,  6,  9, 14, 11, "Coffee Lake",     UARCH_COFFE_LAKE,       14)
+  CHECK_UARCH(arch, 0,  6,  9, 14, 11, "Coffee Lake",     UARCH_COFFEE_LAKE,      14)
-  CHECK_UARCH(arch, 0,  6,  9, 14, 12, "Coffee Lake",     UARCH_COFFE_LAKE,       14)
+  CHECK_UARCH(arch, 0,  6,  9, 14, 12, "Coffee Lake",     UARCH_COFFEE_LAKE,      14)
-  CHECK_UARCH(arch, 0,  6,  9, 14, 13, "Coffee Lake",     UARCH_COFFE_LAKE,       14)
+  CHECK_UARCH(arch, 0,  6,  9, 14, 13, "Coffee Lake",     UARCH_COFFEE_LAKE,      14)
  CHECK_UARCH(arch, 0,  6, 10,  5, NA, "Comet Lake",      UARCH_COMET_LAKE,       14) // wikichip
  CHECK_UARCH(arch, 0,  6, 10,  6, NA, "Comet Lake",      UARCH_COMET_LAKE,       14) // instlatx64.atw.hu (i7-10710U)
  CHECK_UARCH(arch, 0,  6, 10,  7, NA, "Rocket Lake",     UARCH_ROCKET_LAKE,      14) // instlatx64.atw.hu (i7-11700K)
@@ -257,7 +259,7 @@ struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, u
  return arch;
 }
-// iNApired in Todd Allen's decode_uarch_amd
+// Inspired in Todd Allen's decode_uarch_amd
 struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) {
  struct uarch* arch = emalloc(sizeof(struct uarch));
@@ -358,9 +360,12 @@ struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uin
  CHECK_UARCH(arch,  8, 15,  6,  0, NA, "Zen 2",       UARCH_ZEN2,         7) // undocumented, geekbench.com example
  CHECK_UARCH(arch,  8, 15,  6,  8, NA, "Zen 2",       UARCH_ZEN2,         7) // found on instlatx64
  CHECK_UARCH(arch,  8, 15,  7,  1, NA, "Zen 2",       UARCH_ZEN2,         7) // samples from Steven Noonan and instlatx64
  CHECK_UARCH(arch,  8, 15,  9,  0,  2, "Zen 2",       UARCH_ZEN2,         7) // Steam Deck (instlatx64)
  CHECK_UARCH(arch, 10, 15,  0,  1, NA, "Zen 3",       UARCH_ZEN3,         7) // instlatx64
  CHECK_UARCH(arch, 10, 15,  2,  1, NA, "Zen 3",       UARCH_ZEN3,         7) // instlatx64
  CHECK_UARCH(arch, 10, 15,  4,  4, NA, "Zen 3+",      UARCH_ZEN3_PLUS,    6) // instlatx64 (they say it is Zen3...)
  CHECK_UARCH(arch, 10, 15,  5,  0, NA, "Zen 3",       UARCH_ZEN3,         7) // instlatx64
  CHECK_UARCH(arch, 10, 15,  6,  1,  2, "Zen 4",       UARCH_ZEN4,         5) // instlatx64
  UARCH_END
  return arch;
@@ -408,7 +413,7 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
      case UARCH_ROCKET_LAKE:
      case UARCH_AMBER_LAKE:
      case UARCH_WHISKEY_LAKE:
-      case UARCH_COFFE_LAKE:
+      case UARCH_COFFEE_LAKE:
      case UARCH_PALM_COVE:
      case UARCH_KNIGHTS_LANDING:
@@ -416,10 +421,13 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
      case UARCH_ICE_LAKE:
      case UARCH_TIGER_LAKE:
      case UARCH_ALDER_LAKE:
      // AMD
      case UARCH_ZEN2:
      case UARCH_ZEN3:
      case UARCH_ZEN3_PLUS:
      case UARCH_ZEN4:
        return 2;
      default:
        return 1;
Author	SHA1	Message	Date
Dr-Noob	49941b1717	[v1.03] Bump version	2023-01-05 11:17:49 +01:00
Dr-Noob	a4c0bb1aae	[v1.02] Merge Alder Lake into master branch	2023-01-05 11:17:16 +01:00
Dr-Noob	ea29507b62	[v1.02] Added basic support for Zen4	2022-12-03 16:29:53 +00:00
Dr-Noob	b2aa8194c6	[v1.02][x86] Detect and print core type in ADL	2022-12-02 21:25:30 +00:00
Dr-Noob	d879b06d08	[v1.02][x86] Fix printer for non hybrid architectures	2022-12-02 20:53:40 +00:00
Dr-Noob	6cc18027db	[v1.02][x86] ADL has a shared L3, not one L3 per core type	2022-12-02 20:39:49 +00:00
Dr-Noob	77510c260a	[v1.02][x86] Small fixes to hybrid core detection	2022-11-05 18:28:10 +00:00
Dr-Noob	1eb1a5246e	[v1.02][x86] Extending peakperf computation to hybid cores	2022-11-05 18:17:38 +00:00
Dr-Noob	cec91a1e4d	[v1.02][x86] Adding support printing Intel hybrid CPUs	2022-11-05 17:49:43 +00:00
Dr-Noob	ff5166ea2e	[v1.02][x86] Adding support for topology detection of hybrid cores	2022-11-05 17:48:20 +00:00
Dr-Noob	051d48b7d1	[v1.02][x86] Suport for detecting hybrid_flag	2022-10-20 20:49:10 +00:00
Dr-Noob	e91eef3e65	[v1.02][x86] Split feature detection into a separate function	2022-10-20 20:48:22 +00:00
Dr-Noob	de24d86cd6	[v1.02] Extended get_num_caches_from_files to support maps with commas, should fix #152	2022-09-30 19:44:14 +02:00
Dr-Noob	4b1a087b64	[v1.02][ARM] Kirin SoCs seem to start with hi, not Hi, as reported in #157	2022-09-23 19:36:05 +02:00
Dr-Noob	7eb856ae84	[v1.02] Merge fixes from bugfix	2022-09-23 19:20:30 +02:00
Dr-Noob	65366abe04	[v1.02][ARM] Fixed wrong check for Apple CPU	2022-09-19 11:33:14 +02:00
Dr-Noob	190e5daace	[v1.02] Merge master and bugfix2 branches	2022-09-19 11:27:03 +02:00
Dr-Noob	9f7204d43d	[v1.02][ARM] Updating M1/M2 peak performance calculations according to the discussion in #155	2022-09-14 11:08:36 +02:00
Dr-Noob	87961144d2	[v1.02][X86] Add Zen3+ uarch	2022-09-09 09:02:26 +02:00
Dr-Noob	f4565cb937	[v1.02] Ignore extension warnings in ppc	2022-09-07 08:18:16 +02:00
Dr-Noob	61a1ad8a2b	[v1.02] Merge support for Apple M2	2022-09-06 08:12:44 +02:00
Dr-Noob	ecce0354e5	[v1.02][ARM] Fixed M2 bug, support for M2 should now be complete	2022-09-05 18:17:15 +02:00
Dr-Noob	a955451937	[v1.02][X86] Add Steam Deck CPU as requested in #147	2022-09-05 10:39:06 +02:00
Dr-Noob	db21931118	[v1.02][X86] Fix Coffee Lake spelling	2022-09-05 10:28:07 +02:00
Dr-Noob	71a9308bed	[v1.02][X86] Fixed uarch misclassification as noted by #149	2022-09-05 10:19:48 +02:00
Dr-Noob	dfec2a65d2	[v1.02][ARM] Experimental support for M2	2022-09-05 10:02:26 +02:00
Dr-Noob	b319b52952	[v1.02] Use a different name for ifdef; the other might be already defined!	2022-09-05 08:41:19 +02:00
Dr-Noob	758be60967	[v1.02] Remove wrong check in udev	2022-09-05 08:24:22 +02:00
Dr-Noob	52ba038527	[v1.02][ARM] Add support for M1 Ultra	2022-05-25 22:11:48 +01:00