[v0.8][ARM] Building support in ARM

2026-05-15 05:10:08 +02:00 · 2020-11-05 09:28:41 +01:00
parent 5cc9038f3d
commit 1fad4fd10b
25 changed files with 442 additions and 172 deletions
--- a/src/x86/apic.c
+++ b/src/x86/apic.c
@@ -0,0 +1,352 @@
+#ifdef _WIN32
+#include <windows.h>
+#else
+#define _GNU_SOURCE
+#include <sched.h>
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "apic.h"
+#include "cpuid_asm.h"
+#include "../common/global.h"
+
+/*
+ * bit_scan_reverse and create_mask code taken from:
+ * https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html
+ */
+unsigned char bit_scan_reverse(uint32_t* index, uint64_t mask) {
+  for(uint64_t i = (8 * sizeof(uint64_t)); i > 0; i--) {
+    if((mask & (1LL << (i-1))) != 0) {
+      *index = (uint64_t) (i-1);
+      break;
+    }
+  }
+  return (unsigned char) (mask != 0);
+}
+
+uint32_t create_mask(uint32_t num_entries, uint32_t *mask_width) {
+  uint32_t i = 0;
+  uint64_t k = 0;
+
+  // NearestPo2(numEntries) is the nearest power of 2 integer that is not less than numEntries
+  // The most significant bit of (numEntries * 2 -1) matches the above definition
+
+  k = (uint64_t)(num_entries) * 2 -1;
+
+  if (bit_scan_reverse(&i, k) == 0) {
+    if (mask_width) *mask_width = 0;
+    return 0;
+  }
+
+  if (mask_width) *mask_width = i;
+  if (i == 31) return (uint32_t ) -1;
+
+  return (1 << i) -1;
+}
+
+uint32_t get_apic_id(bool x2apic_id) {
+  uint32_t eax = 0;
+  uint32_t ebx = 0;
+  uint32_t ecx = 0;
+  uint32_t edx = 0;
+  
+  if(x2apic_id) {
+    eax = 0x0000000B;
+    cpuid(&eax, &ebx, &ecx, &edx);
+    return edx;
+  }
+  else {
+    eax = 0x00000001;
+    cpuid(&eax, &ebx, &ecx, &edx);
+    return (ebx >> 24);
+  }
+}
+
+bool bind_to_cpu(int cpu_id) {
+  #ifdef _WIN32
+    HANDLE process = GetCurrentProcess();
+    DWORD_PTR processAffinityMask = 1 << cpu_id;
+    return SetProcessAffinityMask(process, processAffinityMask);
+  #else    
+    cpu_set_t currentCPU;
+    CPU_ZERO(&currentCPU);
+    CPU_SET(cpu_id, &currentCPU);
+    if (sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == -1) {
+      perror("sched_setaffinity");
+      return false;
+    }
+    return true;
+  #endif  
+}
+
+bool fill_topo_masks_apic(struct topology* topo) {
+  uint32_t eax = 0x00000001;
+  uint32_t ebx = 0;
+  uint32_t ecx = 0;
+  uint32_t edx = 0;
+  uint32_t core_plus_smt_id_max_cnt;
+  uint32_t core_id_max_cnt;
+  uint32_t smt_id_per_core_max_cnt;
+  
+  cpuid(&eax, &ebx, &ecx, &edx);
+  
+  core_plus_smt_id_max_cnt = (ebx >> 16) & 0xFF;
+  
+  eax = 0x00000004;
+  ecx = 0;
+  cpuid(&eax, &ebx, &ecx, &edx);
+  
+  core_id_max_cnt = (eax >> 26) + 1;
+  smt_id_per_core_max_cnt = core_plus_smt_id_max_cnt / core_id_max_cnt; 
+            
+  topo->apic->smt_mask = create_mask(smt_id_per_core_max_cnt, &(topo->apic->smt_mask_width));    
+  topo->apic->core_mask = create_mask(core_id_max_cnt,&(topo->apic->pkg_mask_shift));
+  topo->apic->pkg_mask_shift += topo->apic->smt_mask_width;
+  topo->apic->core_mask <<= topo->apic->smt_mask_width;
+  topo->apic->pkg_mask = (-1) ^ (topo->apic->core_mask | topo->apic->smt_mask);
+  
+  return true;
+}
+
+bool fill_topo_masks_x2apic(struct topology* topo) {
+  int32_t level_type;
+  int32_t level_shift;
+  
+  int32_t coreplus_smt_mask = 0;
+  bool level2 = false;
+  bool level1 = false;
+  
+  uint32_t eax = 0;
+  uint32_t ebx = 0;
+  uint32_t ecx = 0;
+  uint32_t edx = 0;
+  uint32_t i = 0;
+  
+  while(true) {
+    eax = 0x0000000B;
+    ecx = i;
+    cpuid(&eax, &ebx, &ecx, &edx);
+    if(ebx == 0) break;
+    
+    level_type = (ecx >> 8) & 0xFF;
+    level_shift = eax & 0xFFF; 
+    
+    switch(level_type) {      
+      case 1: // SMT
+        topo->apic->smt_mask = ~(0xFFFFFFFF << level_shift);
+        topo->apic->smt_mask_width = level_shift;
+        topo->smt_supported = ebx & 0xFFFF;
+        level1 = true;
+        break;
+      case 2: // Core
+        coreplus_smt_mask = ~(0xFFFFFFFF << level_shift);
+        topo->apic->pkg_mask_shift =  level_shift;
+        topo->apic->pkg_mask = (-1) ^ coreplus_smt_mask;
+        level2 = true;
+        break;
+      default:
+        printErr("Found invalid level when querying topology: %d", level_type);
+        break;
+    }
+    
+    i++; // sublevel to query
+  }
+  
+  if (level1 && level2) {
+    topo->apic->core_mask = coreplus_smt_mask ^ topo->apic->smt_mask;
+  }
+  else if (!level2 && level1) {
+    topo->apic->core_mask = 0;
+    topo->apic->pkg_mask_shift = topo->apic->smt_mask_width;
+    topo->apic->pkg_mask =  (-1) ^ topo->apic->smt_mask;
+  }
+  else {
+    printErr("SMT level was not found when querying topology");
+    return false;
+  }
+
+  return true;
+}
+
+// Not a very elegant solution. The width should always be as long
+// as the number of cores, but in the case of Xeon Phi KNL it is not
+uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) {
+  uint32_t max = 0;
+  
+  for(int i=0; i < topo->cach->max_cache_level; i++) {
+    for(int j=0; j < topo->total_cores; j++) {          
+      if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i];
+    }
+  }
+  
+  max++;
+  if(max > topo->total_cores) return max;
+  return topo->total_cores;
+}
+
+bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) {
+  uint32_t size = max_apic_id_size(cache_id_apic, topo);
+  uint32_t* sockets = malloc(sizeof(uint32_t) * size);
+  uint32_t* smt = malloc(sizeof(uint32_t) * size);
+  uint32_t* apic_id = malloc(sizeof(uint32_t) * size);
+  uint32_t num_caches = 0;
+  
+  memset(sockets, 0, sizeof(uint32_t) * size);
+  memset(smt, 0, sizeof(uint32_t) * size);  
+  memset(apic_id, 0, sizeof(uint32_t) * size);  
+  
+  // System topology
+  for(int i=0; i < topo->total_cores; i++) {
+    sockets[apic_pkg[i]] = 1;
+    smt[apic_smt[i]] = 1;
+  }
+  for(int i=0; i < topo->total_cores; i++) {
+    if(sockets[i] != 0)
+      topo->sockets++;
+    if(smt[i] != 0)
+      topo->smt_available++;
+  }
+  
+  topo->logical_cores = topo->total_cores / topo->sockets;
+  topo->physical_cores = topo->logical_cores / topo->smt_available;
+  
+  // Cache topology
+  for(int i=0; i < topo->cach->max_cache_level; i++) {
+    num_caches = 0;
+    memset(apic_id, 0, sizeof(uint32_t) * size);
+    
+    for(int c=0; c < topo->total_cores; c++) {      
+      apic_id[cache_id_apic[c][i]]++;
+    }
+    for(uint32_t c=0; c < size; c++) {      
+      if(apic_id[c] > 0) num_caches++;
+    }
+    
+    topo->cach->cach_arr[i]->num_caches = num_caches;
+  }
+  
+  free(sockets);
+  free(smt);
+  free(apic_id);
+  
+  return true;
+}
+
+void get_cache_topology_from_apic(struct topology* topo) {  
+  uint32_t eax = 0x00000004;
+  uint32_t ebx = 0;
+  uint32_t ecx = 0;
+  uint32_t edx = 0;
+     
+  for(int i=0; i < topo->cach->max_cache_level; i++) { 
+    eax = 0x00000004;
+    ecx = i;
+    
+    cpuid(&eax, &ebx, &ecx, &edx);
+  
+    uint32_t SMTMaxCntPerEachCache = ((eax >> 14) & 0x7FF) + 1;
+    uint32_t dummy;
+    topo->apic->cache_select_mask[i] = create_mask(SMTMaxCntPerEachCache,&dummy);
+  }
+}
+
+bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { 
+  uint32_t apic_id;
+  uint32_t* apic_pkg = malloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t* apic_core = malloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t* apic_smt = malloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * topo->total_cores);
+  uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * topo->total_cores);
+  bool x2apic_id = cpu->maxLevels >= 0x0000000B;
+  
+  for(int i=0; i < topo->total_cores; i++) {
+    cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
+    cache_id_apic[i] = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
+  }
+  topo->apic->cache_select_mask = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
+  topo->apic->cache_id_apic = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
+  
+  if(x2apic_id) {
+    if(!fill_topo_masks_x2apic(topo))
+      return false;
+  }
+  else {
+    if(!fill_topo_masks_apic(topo))
+      return false;    
+  }
+  
+  get_cache_topology_from_apic(topo);  
+  
+  for(int i=0; i < topo->total_cores; i++) {
+    if(!bind_to_cpu(i)) {
+      printErr("Failed binding to CPU %d", i);
+      return false;
+    }
+    apic_id = get_apic_id(x2apic_id);
+    
+    apic_pkg[i] = (apic_id & topo->apic->pkg_mask) >> topo->apic->pkg_mask_shift;
+    apic_core[i] = (apic_id & topo->apic->core_mask) >> topo->apic->smt_mask_width;
+    apic_smt[i] = apic_id & topo->apic->smt_mask;
+    
+    for(int c=0; c < topo->cach->max_cache_level; c++) {
+      cache_smt_id_apic[i][c] = apic_id & topo->apic->cache_select_mask[c];
+      cache_id_apic[i][c] = apic_id & (-1 ^ topo->apic->cache_select_mask[c]);
+    }
+  }
+  
+  /* DEBUG
+  for(int i=0; i < topo->cach->max_cache_level; i++) {
+    printf("[CACH %1d]", i);
+    for(int j=0; j < topo->total_cores; j++)
+      printf("[%03d]", cache_id_apic[j][i]);
+    printf("\n");
+  }  
+  for(int i=0; i < topo->total_cores; i++)
+    printf("[%2d] 0x%.8X\n", i, apic_pkg[i]);
+  printf("\n");
+  for(int i=0; i < topo->total_cores; i++)
+    printf("[%2d] 0x%.8X\n", i, apic_core[i]);
+  printf("\n");
+  for(int i=0; i < topo->total_cores; i++)
+    printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/
+    
+  
+  bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
+  
+  // Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
+  if (!x2apic_id) {
+    printWarn("Can't read SMT from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x0000000B, cpu->maxLevels); 
+    topo->smt_supported = topo->smt_available;
+  }
+  
+  free(apic_pkg);
+  free(apic_core);
+  free(apic_smt);
+  for(int i=0; i < topo->total_cores; i++) {
+    free(cache_smt_id_apic[i]);
+    free(cache_id_apic[i]);
+  }
+  free(cache_smt_id_apic);
+  free(cache_id_apic);
+    
+  return ret;
+} 
+
+uint32_t is_smt_enabled_amd(struct topology* topo) {
+  uint32_t id;
+  
+  for(int i = 0; i < topo->total_cores; i++) {
+    if(!bind_to_cpu(i)) {
+      printErr("Failed binding to CPU %d", i);
+      return false;
+    }
+    id = get_apic_id(false) & 1; // get the last bit
+    if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core.
+  }
+  
+  return 1;  
+}
--- a/src/x86/apic.h
+++ b/src/x86/apic.h
@@ -0,0 +1,20 @@
+#ifndef __APIC__
+#define __APIC__
+
+#include <stdbool.h>
+#include "cpuid.h"
+
+struct apic {
+  uint32_t pkg_mask;
+  uint32_t pkg_mask_shift;
+  uint32_t core_mask;
+  uint32_t smt_mask_width;
+  uint32_t smt_mask;
+  uint32_t* cache_select_mask;
+  uint32_t* cache_id_apic;
+};
+
+bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo);
+uint32_t is_smt_enabled_amd(struct topology* topo);
+
+#endif
--- a/src/x86/cpuid.c
+++ b/src/x86/cpuid.c
--- a/src/x86/cpuid.h
+++ b/src/x86/cpuid.h
@@ -0,0 +1,44 @@
+#ifndef __CPUID__
+#define __CPUID__
+
+#include "../common/cpu.h"
+
+struct cpuInfo* get_cpu_info();
+struct cache* get_cache_info(struct cpuInfo* cpu);
+struct frequency* get_frequency_info(struct cpuInfo* cpu);
+struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach);
+
+VENDOR get_cpu_vendor(struct cpuInfo* cpu);
+uint32_t get_nsockets(struct topology* topo);
+int64_t get_freq(struct frequency* freq);
+
+char* get_str_cpu_name(struct cpuInfo* cpu);
+char* get_str_ncores(struct cpuInfo* cpu);
+char* get_str_avx(struct cpuInfo* cpu);
+char* get_str_sse(struct cpuInfo* cpu);
+char* get_str_fma(struct cpuInfo* cpu);
+char* get_str_aes(struct cpuInfo* cpu);
+char* get_str_sha(struct cpuInfo* cpu);
+
+char* get_str_l1i(struct cache* cach);
+char* get_str_l1d(struct cache* cach);
+char* get_str_l2(struct cache* cach);
+char* get_str_l3(struct cache* cach);
+
+char* get_str_freq(struct frequency* freq);
+
+char* get_str_sockets(struct topology* topo);
+char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket);
+
+char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq);
+
+void free_cache_struct(struct cache* cach);
+void free_topo_struct(struct topology* topo);
+void free_freq_struct(struct frequency* freq);
+void free_cpuinfo_struct(struct cpuInfo* cpu);
+
+void debug_cpu_info(struct cpuInfo* cpu);
+void debug_cache(struct cache* cach);
+void debug_frequency(struct frequency* freq);
+
+#endif
--- a/src/x86/cpuid_asm.c
+++ b/src/x86/cpuid_asm.c
@@ -0,0 +1,10 @@
+#include "cpuid_asm.h"
+
+void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {
+        __asm volatile("cpuid"
+            : "=a" (*eax),
+              "=b" (*ebx),
+              "=c" (*ecx),
+              "=d" (*edx)
+            : "0" (*eax), "2" (*ecx));
+}
--- a/src/x86/cpuid_asm.h
+++ b/src/x86/cpuid_asm.h
@@ -0,0 +1,8 @@
+#ifndef __CPUID_ASM__
+#define __CPUID_ASM__
+
+#include <stdint.h>
+
+void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
+
+#endif
--- a/src/x86/uarch.c
+++ b/src/x86/uarch.c
@@ -0,0 +1,405 @@
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "uarch.h"
+#include "../common/global.h"
+
+/*
+ * - cpuid codes are based on Todd Allen's cpuid program
+ *   http://www.etallen.com/cpuid.html
+ * - This should be updated from time to time, to support newer CPUs. A good reference to look at:
+ *   https://en.wikichip.org/
+ */
+
+// From Todd Allen:
+//
+// MSR_CPUID_table* is a table that appears in Intel document 325462, "Intel 64
+// and IA-32 Architectures Software Developer's Manual Combined Volumes: 1, 2A,
+// 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4" (the name changes from version to version
+// as more volumes are added).  The table moves around from version to version,
+// but in version 071US, was in "Volume 4: Model-Specific Registers", Table 2-1:
+// "CPUID Signature Values of DisplayFamily_DisplayModel".
+
+// MRG* is a table that forms the bulk of Intel Microcode Revision Guidance (or
+// Microcode Update Guidance).  Its purpose is not to list CPUID values, but
+// it does so, and sometimes lists values that appear nowhere else.
+
+// LX* indicates features that I have seen no documentation for, but which are
+// used by the Linux kernel (which is good evidence that they're correct).
+// The "hook" to find these generally is an X86_FEATURE_* flag in:
+//    arch/x86/include/asm/cpufeatures.h
+// For (synth) and (uarch synth) decoding, it often indicates
+// family/model/stepping value which are documented nowhere else.  These usually
+// can be found in:
+//    arch/x86/include/asm/intel-family.h
+
+typedef uint32_t MICROARCH;
+
+// Data not available
+#define NA                   -1
+
+// Unknown manufacturing process
+#define UNK                  -1
+
+enum {
+  UARCH_UNKNOWN,
+  // INTEL //
+  UARCH_P5,
+  UARCH_P6,
+  UARCH_DOTHAN,
+  UARCH_YONAH,
+  UARCH_MEROM,
+  UARCH_PENYR,
+  UARCH_NEHALEM,
+  UARCH_WESTMERE,
+  UARCH_BONNELL,
+  UARCH_SALTWELL,
+  UARCH_SANDY_BRIDGE,
+  UARCH_SILVERMONT,
+  UARCH_IVY_BRIDGE,
+  UARCH_HASWELL,
+  UARCH_BROADWELL,
+  UARCH_AIRMONT,
+  UARCH_KABY_LAKE,
+  UARCH_SKYLAKE,
+  UARCH_CASCADE_LAKE,
+  UARCH_COOPER_LAKE,
+  UARCH_KNIGHTS_LANDING,
+  UARCH_KNIGHTS_MILL,
+  UARCH_GOLDMONT,
+  UARCH_PALM_COVE,
+  UARCH_SUNNY_COVE,
+  UARCH_GOLDMONT_PLUS,
+  UARCH_TREMONT,
+  UARCH_WILLOW_COVE,
+  UARCH_COFFE_LAKE,
+  UARCH_ITANIUM,
+  UARCH_KNIGHTS_FERRY,
+  UARCH_KNIGHTS_CORNER,
+  UARCH_WILLAMETTE,
+  UARCH_NORTHWOOD,
+  UARCH_PRESCOTT,
+  UARCH_CEDAR_MILL,
+  UARCH_ITANIUM2,
+  UARCH_ICE_LAKE,
+  // AMD //
+  UARCH_AM486,
+  UARCH_AM5X86,
+  UARCH_K6,
+  UARCH_K7,
+  UARCH_K8,
+  UARCH_K10,
+  UARCH_PUMA_2008,
+  UARCH_BOBCAT,
+  UARCH_BULLDOZER,
+  UARCH_PILEDRIVER,
+  UARCH_STEAMROLLER,
+  UARCH_EXCAVATOR,
+  UARCH_JAGUAR,
+  UARCH_PUMA_2014,
+  UARCH_ZEN,
+  UARCH_ZEN_PLUS,
+  UARCH_ZEN2,
+  UARCH_ZEN3
+};
+
+struct uarch {
+  MICROARCH uarch;
+  char* uarch_str;
+  int32_t process; // measured in nanometers
+};
+
+#define UARCH_START if (false) {}
+#define CHECK_UARCH(arch, ef_, f_, em_, m_, s_, str, uarch, process) \
+   else if (ef_ == ef && f_ == f && (em_ == NA || em_ == em) && (m_ == NA || m_ == m) && (s_ == NA || s_ == s)) fill_uarch(arch, str, uarch, process);
+#define UARCH_END else { printBug("Unknown microarchitecture detected: M=0x%.8X EM=0x%.8X F=0x%.8X EF=0x%.8X S=0x%.8X", m, em, f, ef, s); fill_uarch(arch, "Unknown", UARCH_UNKNOWN, 0); }
+
+void fill_uarch(struct uarch* arch, char* str, MICROARCH u, uint32_t process) {
+  arch->uarch_str = malloc(sizeof(char) * (strlen(str)+1));
+  strcpy(arch->uarch_str, str);
+  arch->uarch = u;
+  arch->process= process;
+}
+
+// iNApired in Todd Allen's decode_uarch_intel
+struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) {
+  struct uarch* arch = malloc(sizeof(struct uarch));
+  
+  // EF: Extended Family                                                           //
+  // F:  Family                                                                    //
+  // EM: Extended Model                                                            //
+  // M: Model                                                                      //
+  // S: Stepping                                                                   //
+  // ----------------------------------------------------------------------------- //
+  //                EF  F  EM   M   S                                              //
+  UARCH_START  
+  CHECK_UARCH(arch, 0,  5,  0,  0, NA, "P5",              UARCH_P5,              800)
+  CHECK_UARCH(arch, 0,  5,  0,  1, NA, "P5",              UARCH_P5,              800)
+  CHECK_UARCH(arch, 0,  5,  0,  2, NA, "P5",              UARCH_P5,              UNK)
+  CHECK_UARCH(arch, 0,  5,  0,  3, NA, "P5",              UARCH_P5,              600)
+  CHECK_UARCH(arch, 0,  5,  0,  4, NA, "P5 MMX",          UARCH_P5,              UNK)
+  CHECK_UARCH(arch, 0,  5,  0,  7, NA, "P5 MMX",          UARCH_P5,              UNK)
+  CHECK_UARCH(arch, 0,  5,  0,  8, NA, "P5 MMX",          UARCH_P5,              250)
+  CHECK_UARCH(arch, 0,  5,  0,  9, NA, "P5 MMX",          UARCH_P5,              UNK)
+  CHECK_UARCH(arch, 0,  6,  0,  0, NA, "P6 Pentium II",   UARCH_P6,              UNK)
+  CHECK_UARCH(arch, 0,  6,  0,  1, NA, "P6 Pentium II",   UARCH_P6,              UNK) // process depends on core
+  CHECK_UARCH(arch, 0,  6,  0,  2, NA, "P6 Pentium II",   UARCH_P6,              UNK)
+  CHECK_UARCH(arch, 0,  6,  0,  3, NA, "P6 Pentium II",   UARCH_P6,              350)
+  CHECK_UARCH(arch, 0,  6,  0,  4, NA, "P6 Pentium II",   UARCH_P6,              UNK)
+  CHECK_UARCH(arch, 0,  6,  0,  5, NA, "P6 Pentium II",   UARCH_P6,              250)
+  CHECK_UARCH(arch, 0,  6,  0,  6, NA, "P6 Pentium II",   UARCH_P6,              UNK)
+  CHECK_UARCH(arch, 0,  6,  0,  7, NA, "P6 Pentium III",  UARCH_P6,              250)
+  CHECK_UARCH(arch, 0,  6,  0,  8, NA, "P6 Pentium III",  UARCH_P6,              180)
+  CHECK_UARCH(arch, 0,  6,  0,  9, NA, "P6 Pentium M",    UARCH_P6,              130)
+  CHECK_UARCH(arch, 0,  6,  0, 10, NA, "P6 Pentium III",  UARCH_P6,              180)
+  CHECK_UARCH(arch, 0,  6,  0, 11, NA, "P6 Pentium III",  UARCH_P6,              130)
+  CHECK_UARCH(arch, 0,  6,  0, 13, NA, "Dothan",          UARCH_DOTHAN,          UNK)  // process depends on core
+  CHECK_UARCH(arch, 0,  6,  0, 14, NA, "Yonah",           UARCH_YONAH,            65)
+  CHECK_UARCH(arch, 0,  6,  0, 15, NA, "Merom",           UARCH_MEROM,            65)
+  CHECK_UARCH(arch, 0,  6,  1,  5, NA, "Dothan",          UARCH_DOTHAN,           90)
+  CHECK_UARCH(arch, 0,  6,  1,  6, NA, "Merom",           UARCH_MEROM,            65)
+  CHECK_UARCH(arch, 0,  6,  1,  7, NA, "Penryn",          UARCH_PENYR,            45)
+  CHECK_UARCH(arch, 0,  6,  1, 10, NA, "Nehalem",         UARCH_NEHALEM,          45)
+  CHECK_UARCH(arch, 0,  6,  1, 12, NA, "Bonnell",         UARCH_BONNELL,          45)
+  CHECK_UARCH(arch, 0,  6,  1, 13, NA, "Penryn",          UARCH_PENYR,            45)
+  CHECK_UARCH(arch, 0,  6,  1, 14, NA, "Nehalem",         UARCH_NEHALEM,          45)
+  CHECK_UARCH(arch, 0,  6,  1, 15, NA, "Nehalem",         UARCH_NEHALEM,          45)
+  CHECK_UARCH(arch, 0,  6,  2,  5, NA, "Westmere",        UARCH_WESTMERE,         32)
+  CHECK_UARCH(arch, 0,  6,  2 , 6, NA, "Bonnell",         UARCH_BONNELL,          45)
+  CHECK_UARCH(arch, 0,  6,  2,  7, NA, "Saltwell",        UARCH_SALTWELL,         32)
+  CHECK_UARCH(arch, 0,  6,  2, 10, NA, "Sandy Bridge",    UARCH_SANDY_BRIDGE,     32)
+  CHECK_UARCH(arch, 0,  6,  2, 12, NA, "Westmere",        UARCH_WESTMERE,         32)
+  CHECK_UARCH(arch, 0,  6,  2, 13, NA, "Sandy Bridge",    UARCH_SANDY_BRIDGE,     32)
+  CHECK_UARCH(arch, 0,  6,  2, 14, NA, "Nehalem",         UARCH_NEHALEM,          45)
+  CHECK_UARCH(arch, 0,  6,  2, 15, NA, "Westmere",        UARCH_WESTMERE,         32)
+  CHECK_UARCH(arch, 0,  6,  3,  5, NA, "Saltwell",        UARCH_SALTWELL,         14)
+  CHECK_UARCH(arch, 0,  6,  3,  6, NA, "Saltwell",        UARCH_SALTWELL,         32)
+  CHECK_UARCH(arch, 0,  6,  3,  7, NA, "Silvermont",      UARCH_SILVERMONT,       22)
+  CHECK_UARCH(arch, 0,  6,  3, 10, NA, "Ivy Bridge",      UARCH_IVY_BRIDGE,       22)
+  CHECK_UARCH(arch, 0,  6,  3, 12, NA, "Haswell",         UARCH_HASWELL,          22)
+  CHECK_UARCH(arch, 0,  6,  3, 13, NA, "Broadwell",       UARCH_BROADWELL,        14)
+  CHECK_UARCH(arch, 0,  6,  3, 14, NA, "Ivy Bridge",      UARCH_IVY_BRIDGE,       22)
+  CHECK_UARCH(arch, 0,  6,  3, 15, NA, "Haswell",         UARCH_HASWELL,          22)
+  CHECK_UARCH(arch, 0,  6,  4,  5, NA, "Haswell",         UARCH_HASWELL,          22)
+  CHECK_UARCH(arch, 0,  6,  4,  6, NA, "Haswell",         UARCH_HASWELL,          22)
+  CHECK_UARCH(arch, 0,  6,  4,  7, NA, "Broadwell",       UARCH_BROADWELL,        14)
+  CHECK_UARCH(arch, 0,  6,  4, 10, NA, "Silvermont",      UARCH_SILVERMONT,       22) // no docs, but /proc/cpuinfo seen in wild
+  CHECK_UARCH(arch, 0,  6,  4, 12, NA, "Airmont",         UARCH_AIRMONT,          14)
+  CHECK_UARCH(arch, 0,  6,  4, 13, NA, "Silvermont",      UARCH_SILVERMONT,       22)
+  CHECK_UARCH(arch, 0,  6,  4, 14,  8, "Kaby Lake",       UARCH_KABY_LAKE,        14)
+  CHECK_UARCH(arch, 0,  6,  4, 14, NA, "Skylake",         UARCH_SKYLAKE,          14)
+  CHECK_UARCH(arch, 0,  6,  4, 15, NA, "Broadwell",       UARCH_BROADWELL,        14)
+  CHECK_UARCH(arch, 0,  6,  5,  5,  6, "Cascade Lake",    UARCH_CASCADE_LAKE,     14) // no docs, but example from Greg Stewart
+  CHECK_UARCH(arch, 0,  6,  5,  5,  7, "Cascade Lake",    UARCH_CASCADE_LAKE,     14)
+  CHECK_UARCH(arch, 0,  6,  5,  5, 10, "Cooper Lake",     UARCH_COOPER_LAKE,      14)
+  CHECK_UARCH(arch, 0,  6,  5,  5, NA, "Skylake",         UARCH_SKYLAKE,          14)
+  CHECK_UARCH(arch, 0,  6,  5,  6, NA, "Broadwell",       UARCH_BROADWELL,        14)
+  CHECK_UARCH(arch, 0,  6,  5,  7, NA, "Knights Landing", UARCH_KNIGHTS_LANDING,  14)
+  CHECK_UARCH(arch, 0,  6,  5, 10, NA, "Silvermont",      UARCH_SILVERMONT,       22) // no spec update; only MSR_CPUID_table* so far
+  CHECK_UARCH(arch, 0,  6,  5, 12, NA, "Goldmont",        UARCH_GOLDMONT,         14)
+  CHECK_UARCH(arch, 0,  6,  5, 13, NA, "Silvermont",      UARCH_SILVERMONT,       22) // no spec update; only MSR_CPUID_table* so far
+  CHECK_UARCH(arch, 0,  6,  5, 14,  8, "Kaby Lake",       UARCH_KABY_LAKE,        14)
+  CHECK_UARCH(arch, 0,  6,  5, 14, NA, "Skylake",         UARCH_SKYLAKE,          14)
+  CHECK_UARCH(arch, 0,  6,  5, 15, NA, "Goldmont",        UARCH_GOLDMONT,         14)
+  CHECK_UARCH(arch, 0,  6,  6,  6, NA, "Palm Cove",       UARCH_PALM_COVE,        10) // no spec update; only MSR_CPUID_table* so far
+  CHECK_UARCH(arch, 0,  6,  6, 10, NA, "Sunny Cove",      UARCH_SUNNY_COVE,       10) // no spec update; only MSR_CPUID_table* so far
+  CHECK_UARCH(arch, 0,  6,  6, 12, NA, "Sunny Cove",      UARCH_SUNNY_COVE,       10) // no spec update; only MSR_CPUID_table* so far
+  CHECK_UARCH(arch, 0,  6,  7,  5, NA, "Airmont",         UARCH_AIRMONT,          14) // no spec update; whispers & rumors
+  CHECK_UARCH(arch, 0,  6,  7, 10, NA, "Goldmont Plus",   UARCH_GOLDMONT_PLUS,    14)
+  CHECK_UARCH(arch, 0,  6,  7, 13, NA, "Sunny Cove",      UARCH_SUNNY_COVE,       10) // no spec update; only MSR_CPUID_table* so far
+  CHECK_UARCH(arch, 0,  6,  7, 14, NA, "Ice Lake",        UARCH_ICE_LAKE,         10)
+  CHECK_UARCH(arch, 0,  6,  8,  5, NA, "Knights Mill",    UARCH_KNIGHTS_MILL,     14) // no spec update; only MSR_CPUID_table* so far
+  CHECK_UARCH(arch, 0,  6,  8,  6, NA, "Tremont",         UARCH_TREMONT,          10) // LX*
+  CHECK_UARCH(arch, 0,  6,  8, 10, NA, "Tremont",         UARCH_TREMONT,          10) // no spec update; only geekbench.com example
+  CHECK_UARCH(arch, 0,  6,  8, 12, NA, "Willow Cove",     UARCH_WILLOW_COVE,      10) // found only on en.wikichip.org
+  CHECK_UARCH(arch, 0,  6,  8, 13, NA, "Willow Cove",     UARCH_WILLOW_COVE,      10) // LX*
+  CHECK_UARCH(arch, 0,  6,  8, 14, NA, "Kaby Lake",       UARCH_KABY_LAKE,        14)
+  CHECK_UARCH(arch, 0,  6,  9,  6, NA, "Tremont",         UARCH_TREMONT,          10) // LX*
+  CHECK_UARCH(arch, 0,  6,  9, 12, NA, "Tremont",         UARCH_TREMONT,          10) // LX*
+  CHECK_UARCH(arch, 0,  6,  9, 13, NA, "Sunny Cove",      UARCH_SUNNY_COVE,       10) // LX*
+  CHECK_UARCH(arch, 0,  6,  9, 14,  9, "Kaby Lake",       UARCH_KABY_LAKE,        14)
+  CHECK_UARCH(arch, 0,  6,  9, 14, 10, "Coffee Lake",     UARCH_COFFE_LAKE,       14)
+  CHECK_UARCH(arch, 0,  6,  9, 14, 11, "Coffee Lake",     UARCH_COFFE_LAKE,       14)
+  CHECK_UARCH(arch, 0,  6,  9, 14, 12, "Coffee Lake",     UARCH_COFFE_LAKE,       14)
+  CHECK_UARCH(arch, 0,  6,  9, 14, 13, "Coffee Lake",     UARCH_COFFE_LAKE,       14)
+  CHECK_UARCH(arch, 0,  6, 10,  5, NA, "Kaby Lake",       UARCH_KABY_LAKE,        14) // LX*
+  CHECK_UARCH(arch, 0,  6, 10,  6, NA, "Kaby Lake",       UARCH_KABY_LAKE,        14) // no spec update; only iNAtlatx64 example
+  CHECK_UARCH(arch, 0, 11,  0,  0, NA, "Knights Ferry",   UARCH_KNIGHTS_FERRY,    45) // found only on en.wikichip.org
+  CHECK_UARCH(arch, 0, 11,  0,  1, NA, "Knights Corner",  UARCH_KNIGHTS_CORNER,   22)
+  CHECK_UARCH(arch, 0, 15,  0,  0, NA, "Willamette",      UARCH_WILLAMETTE,      180)
+  CHECK_UARCH(arch, 0, 15,  0,  1, NA, "Willamette",      UARCH_WILLAMETTE,      180)
+  CHECK_UARCH(arch, 0, 15,  0,  2, NA, "Northwood",       UARCH_NORTHWOOD,       130)
+  CHECK_UARCH(arch, 0, 15,  0,  3, NA, "Prescott",        UARCH_PRESCOTT,         90)
+  CHECK_UARCH(arch, 0, 15,  0,  4, NA, "Prescott",        UARCH_PRESCOTT,         90)
+  CHECK_UARCH(arch, 0, 15,  0,  6, NA, "Cedar Mill",      UARCH_CEDAR_MILL,       65)
+  CHECK_UARCH(arch, 1, 15,  0,  0, NA, "Itanium2",        UARCH_ITANIUM2,        180)
+  CHECK_UARCH(arch, 1, 15,  0,  1, NA, "Itanium2",        UARCH_ITANIUM2,        130)
+  CHECK_UARCH(arch, 1, 15,  0,  2, NA, "Itanium2",        UARCH_ITANIUM2,        130)
+  UARCH_END
+    
+  return arch;
+}
+
+// iNApired in Todd Allen's decode_uarch_amd
+struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) {
+  struct uarch* arch = malloc(sizeof(struct uarch));
+  
+  // EF: Extended Family                                                           //
+  // F:  Family                                                                    //
+  // EM: Extended Model                                                            //
+  // M: Model                                                                      //
+  // S: Stepping                                                                   //
+  // ----------------------------------------------------------------------------- //
+  //                 EF  F  EM   M   S                                             //
+  UARCH_START  
+  CHECK_UARCH(arch,  0,  4,  0,  3, NA, "Am486",       UARCH_AM486,      UNK)
+  CHECK_UARCH(arch,  0,  4,  0,  7, NA, "Am486",       UARCH_AM486,      UNK)
+  CHECK_UARCH(arch,  0,  4,  0,  8, NA, "Am486",       UARCH_AM486,      UNK)
+  CHECK_UARCH(arch,  0,  4,  0,  9, NA, "Am486",       UARCH_AM486,      UNK)
+  CHECK_UARCH(arch,  0,  4, NA, NA, NA, "Am5x86",      UARCH_AM5X86,     UNK)
+  CHECK_UARCH(arch,  0,  5,  0,  6, NA, "K6",          UARCH_K6,         300)
+  CHECK_UARCH(arch,  0,  5,  0,  7, NA, "K6",          UARCH_K6,         250) // *p from sandpile.org
+  CHECK_UARCH(arch,  0,  5,  0, 13, NA, "K6",          UARCH_K6,         80)  // *p from sandpile.org
+  CHECK_UARCH(arch,  0,  5, NA, NA, NA, "K6",          UARCH_K6,         UNK)
+  CHECK_UARCH(arch,  0,  6,  0,  1, NA, "K7",          UARCH_K7,         250)
+  CHECK_UARCH(arch,  0,  6,  0,  2, NA, "K7",          UARCH_K7,         180)
+  CHECK_UARCH(arch,  0,  6, NA, NA, NA, "K7",          UARCH_K7,         UNK)
+  CHECK_UARCH(arch,  0, 15,  0,  4,  8, "K8",          UARCH_K8,         130)
+  CHECK_UARCH(arch,  0, 15,  0,  4, NA, "K8",          UARCH_K8,         130)
+  CHECK_UARCH(arch,  0, 15,  0,  5, NA, "K8",          UARCH_K8,         130)
+  CHECK_UARCH(arch,  0, 15,  0,  7, NA, "K8",          UARCH_K8,         130)
+  CHECK_UARCH(arch,  0, 15,  0,  8, NA, "K8",          UARCH_K8,         130)
+  CHECK_UARCH(arch,  0, 15,  0, 11, NA, "K8",          UARCH_K8,         130)
+  CHECK_UARCH(arch,  0, 15,  0, 12, NA, "K8",          UARCH_K8,         130)
+  CHECK_UARCH(arch,  0, 15,  0, 14, NA, "K8",          UARCH_K8,         130)
+  CHECK_UARCH(arch,  0, 15,  0, 15, NA, "K8",          UARCH_K8,         130)
+  CHECK_UARCH(arch,  0, 15,  1,  4, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  1,  5, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  1,  7, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  1,  8, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  1, 11, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  1, 12, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  1, 15, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  2,  1, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  2,  3, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  2,  4, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  2,  5, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  2,  7, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  2, 11, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  2, 12, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  2, 15, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  4,  1, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  4,  3, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  4,  8, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  4, 11, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  4, 12, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  4, 15, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  5, 13, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  5, 15, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  0, 15,  6,  8, NA, "K8",          UARCH_K8,          65)
+  CHECK_UARCH(arch,  0, 15,  6, 11, NA, "K8",          UARCH_K8,          65)
+  CHECK_UARCH(arch,  0, 15,  6, 12, NA, "K8",          UARCH_K8,          65)
+  CHECK_UARCH(arch,  0, 15,  6, 15, NA, "K8",          UARCH_K8,          65)
+  CHECK_UARCH(arch,  0, 15,  7, 12, NA, "K8",          UARCH_K8,          65)
+  CHECK_UARCH(arch,  0, 15,  7, 15, NA, "K8",          UARCH_K8,          65)
+  CHECK_UARCH(arch,  0, 15, 12,  1, NA, "K8",          UARCH_K8,          90)
+  CHECK_UARCH(arch,  1, 15,  0,  0, NA, "K10",         UARCH_K10,         65) // sandpile.org
+  CHECK_UARCH(arch,  1, 15,  0,  2, NA, "K10",         UARCH_K10,         65)
+  CHECK_UARCH(arch,  1, 15,  0,  4, NA, "K10",         UARCH_K10,         45)
+  CHECK_UARCH(arch,  1, 15,  0,  5, NA, "K10",         UARCH_K10,         45)
+  CHECK_UARCH(arch,  1, 15,  0,  6, NA, "K10",         UARCH_K10,         45)
+  CHECK_UARCH(arch,  1, 15,  0,  8, NA, "K10",         UARCH_K10,         45)
+  CHECK_UARCH(arch,  1, 15,  0,  9, NA, "K10",         UARCH_K10,         45)
+  CHECK_UARCH(arch,  1, 15,  0, 10, NA, "K10",         UARCH_K10,         45)
+  CHECK_UARCH(arch,  2, 15, NA, NA, NA, "Puma 2008",   UARCH_PUMA_2008,   65)
+  CHECK_UARCH(arch,  3, 15, NA, NA, NA, "K10",         UARCH_K10,         32)
+  CHECK_UARCH(arch,  5, 15, NA, NA, NA, "Bobcat",      UARCH_BOBCAT,      40)
+  CHECK_UARCH(arch,  6, 15,  0,  0, NA, "Bulldozer",   UARCH_BULLDOZER,   32) // iNAtlatx64 engr sample
+  CHECK_UARCH(arch,  6, 15,  0,  1, NA, "Bulldozer",   UARCH_BULLDOZER,   32)
+  CHECK_UARCH(arch,  6, 15,  0,  2, NA, "Piledriver",  UARCH_PILEDRIVER,  32)
+  CHECK_UARCH(arch,  6, 15,  1,  0, NA, "Piledriver",  UARCH_PILEDRIVER,  32)
+  CHECK_UARCH(arch,  6, 15,  1,  3, NA, "Piledriver",  UARCH_PILEDRIVER,  32)
+  CHECK_UARCH(arch,  6, 15,  3,  0, NA, "Steamroller", UARCH_STEAMROLLER, 28)
+  CHECK_UARCH(arch,  6, 15,  3,  8, NA, "Steamroller", UARCH_STEAMROLLER, 28)
+  CHECK_UARCH(arch,  6, 15,  4,  0, NA, "Steamroller", UARCH_STEAMROLLER, 28) // Software Optimization Guide (15h) says it has the same iNAt latencies as (6,15),(3,x).
+  CHECK_UARCH(arch,  6, 15,  6,  0, NA, "Excavator",   UARCH_EXCAVATOR,   28) // undocumented, but iNAtlatx64 samples
+  CHECK_UARCH(arch,  6, 15,  6,  5, NA, "Excavator",   UARCH_EXCAVATOR,   28) // undocumented, but sample from Alexandros Couloumbis
+  CHECK_UARCH(arch,  6, 15,  7,  0, NA, "Excavator",   UARCH_EXCAVATOR,   28)
+  CHECK_UARCH(arch,  7, 15,  0,  0, NA, "Jaguar",      UARCH_JAGUAR,      28)
+  CHECK_UARCH(arch,  7, 15,  3,  0, NA, "Puma 2014",   UARCH_PUMA_2014,   28)
+  CHECK_UARCH(arch,  8, 15,  0,  0, NA, "Zen",         UARCH_ZEN,         14) // iNAtlatx64 engr sample
+  CHECK_UARCH(arch,  8, 15,  0,  1, NA, "Zen",         UARCH_ZEN,         14)
+  CHECK_UARCH(arch,  8, 15,  0,  8, NA, "Zen+",        UARCH_ZEN_PLUS,    12)
+  CHECK_UARCH(arch,  8, 15,  1,  1, NA, "Zen",         UARCH_ZEN,         14) // found only on en.wikichip.org & iNAtlatx64 examples
+  CHECK_UARCH(arch,  8, 15,  1,  8, NA, "Zen+",        UARCH_ZEN_PLUS,    12) // found only on en.wikichip.org
+  CHECK_UARCH(arch,  8, 15,  3,  1, NA, "Zen 2",       UARCH_ZEN2,         7) // found only on en.wikichip.org
+  CHECK_UARCH(arch,  8, 15,  6,  0, NA, "Zen 2",       UARCH_ZEN2,         7) // undocumented, geekbench.com example
+  CHECK_UARCH(arch,  8, 15,  7,  1, NA, "Zen 2",       UARCH_ZEN2,         7) // undocumented, but samples from Steven Noonan
+  CHECK_UARCH(arch, 10, 15, NA, NA, NA, "Zen 3",       UARCH_ZEN3,         7) // undocumented, LX*
+  UARCH_END
+    
+  return arch;
+}
+
+struct uarch* get_uarch_from_cpuid(struct cpuInfo* cpu, uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) {
+  if(cpu->cpu_vendor == CPU_VENDOR_INTEL)
+    return get_uarch_from_cpuid_intel(ef, f, em, m, s);
+  else 
+    return get_uarch_from_cpuid_amd(ef, f, em, m, s);
+}
+
+bool vpus_are_AVX512(struct cpuInfo* cpu) {
+  return cpu->arch->uarch != UARCH_ICE_LAKE;    
+  return cpu->arch->uarch != UARCH_ICE_LAKE;
+}
+
+bool is_knights_landing(struct cpuInfo* cpu) {
+  return cpu->arch->uarch == UARCH_KNIGHTS_LANDING;  
+}
+
+int get_number_of_vpus(struct cpuInfo* cpu) {
+  if(cpu->cpu_vendor == CPU_VENDOR_AMD)
+    return 1;
+  
+  switch(cpu->arch->uarch) {
+      case UARCH_HASWELL:
+      case UARCH_BROADWELL:
+          
+      case UARCH_SKYLAKE:
+      case UARCH_CASCADE_LAKE:                    
+      case UARCH_KABY_LAKE:
+      case UARCH_COFFE_LAKE:
+      case UARCH_PALM_COVE:    
+      
+      case UARCH_KNIGHTS_LANDING:
+      case UARCH_KNIGHTS_MILL:
+          
+      case UARCH_ICE_LAKE:      
+        return 2;
+      default:
+        return 1;
+  }
+}
+
+char* get_str_uarch(struct cpuInfo* cpu) {
+  return cpu->arch->uarch_str;    
+}
+
+char* get_str_process(struct cpuInfo* cpu) {
+  char* str = malloc(sizeof(char) * (4+2+1));
+  uint32_t process = cpu->arch->process;
+  
+  if(process > 100)    
+    sprintf(str, "%.2fum", (double)process/100);
+  else
+    sprintf(str, "%dnm", process);
+  
+  return str;
+}
+
+void free_uarch_struct(struct uarch* arch) {    
+  free(arch->uarch_str);
+  free(arch);
+}
--- a/src/x86/uarch.h
+++ b/src/x86/uarch.h
@@ -0,0 +1,18 @@
+#ifndef __UARCH__
+#define __UARCH__
+
+#include <stdint.h>
+
+#include "cpuid.h"
+
+struct uarch;
+
+struct uarch* get_uarch_from_cpuid(struct cpuInfo* cpu, uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s);
+bool vpus_are_AVX512(struct cpuInfo* cpu);
+bool is_knights_landing(struct cpuInfo* cpu);
+int get_number_of_vpus(struct cpuInfo* cpu);
+char* get_str_uarch(struct cpuInfo* cpu);
+char* get_str_process(struct cpuInfo* cpu);
+void free_uarch_struct(struct uarch* arch);
+
+#endif
--- a/src/x86/udev.c
+++ b/src/x86/udev.c
@@ -0,0 +1,74 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include "../common/global.h"
+#include "cpuid.h"
+
+#define _PATH_SYS_SYSTEM    "/sys/devices/system"
+#define _PATH_SYS_CPU       _PATH_SYS_SYSTEM"/cpu"
+#define _PATH_ONE_CPU       _PATH_SYS_CPU"/cpu0"
+
+#define _PATH_FREQUENCY     _PATH_ONE_CPU"/cpufreq"
+#define _PATH_FREQUENCY_MAX _PATH_FREQUENCY"/cpuinfo_max_freq"
+#define _PATH_FREQUENCY_MIN _PATH_FREQUENCY"/cpuinfo_min_freq"
+
+#define DEFAULT_FILE_SIZE 4096
+
+long get_freq_from_file(char* path) {
+  int fd = open(path, O_RDONLY);
+
+  if(fd == -1) {
+    perror("open");
+    printBug("Could not open '%s'", path);
+    return UNKNOWN_FREQ;
+  }
+
+  //File exists, read it
+  int bytes_read = 0;
+  int offset = 0;
+  int block = 1;
+  char* buf = malloc(sizeof(char)*DEFAULT_FILE_SIZE);
+  memset(buf, 0, sizeof(char)*DEFAULT_FILE_SIZE);
+
+  while (  (bytes_read = read(fd, buf+offset, block)) > 0 ) {
+    offset += bytes_read;
+  }
+
+  char* end;
+  errno = 0;
+  long ret = strtol(buf, &end, 10);
+  if(errno != 0) {
+    perror("strtol");
+    printBug("Failed parsing '%s' file. Read data was: '%s'", path, buf);
+    free(buf);
+    return UNKNOWN_FREQ;
+  }
+  
+  // We will be getting the frequency in KHz
+  // We consider it is an error if frequency is
+  // greater than 10 GHz or less than 100 MHz
+  if(ret > 10000 * 1000 || ret <  100 * 1000) {
+    printBug("Invalid data was read from file '%s': %ld\n", path, ret);
+    return UNKNOWN_FREQ;
+  }
+  
+  free(buf);
+  if (close(fd) == -1) {
+    perror("close");
+    printErr("Closing '%s' failed\n", path);    
+  }
+  
+  return ret/1000;
+}
+
+long get_max_freq_from_file() {
+  return get_freq_from_file(_PATH_FREQUENCY_MAX);
+}
+
+long get_min_freq_from_file() {
+  return get_freq_from_file(_PATH_FREQUENCY_MIN);
+}
--- a/src/x86/udev.h
+++ b/src/x86/udev.h
@@ -0,0 +1,7 @@
+#ifndef __UDEV__
+#define __UDEV__
+
+long get_max_freq_from_file();
+long get_min_freq_from_file();
+
+#endif