[v1.00] Merge latest commits from master branch

2026-03-25 07:50:40 +01:00 · 2021-10-31 20:19:21 +01:00
parent a6714dabc7 bb12a2c276
commit 7692a3cd49
55 changed files with 3626 additions and 1670 deletions
--- a/src/x86/apic.c
+++ b/src/x86/apic.c
@@ -4,6 +4,9 @@
 #elif defined __linux__
  #define _GNU_SOURCE
  #include <sched.h>
+#elif defined __FreeBSD__
+  #include <sys/param.h>
+  #include <sys/cpuset.h>
 #elif defined __APPLE__
  #define UNUSED(x) (void)(x)
 #endif
@@ -13,6 +16,7 @@
 #include <string.h>
 #include <stdio.h>
 #include <unistd.h>
+#include <errno.h>

 #include "apic.h"
 #include "cpuid_asm.h"
@@ -24,7 +28,7 @@
 */
 unsigned char bit_scan_reverse(uint32_t* index, uint64_t mask) {
  for(uint64_t i = (8 * sizeof(uint64_t)); i > 0; i--) {
-    if((mask & (1LL << (i-1))) != 0) {
+    if((mask & (1ULL << (i-1))) != 0) {
      *index = (uint64_t) (i-1);
      break;
    }
@@ -57,7 +61,7 @@ uint32_t get_apic_id(bool x2apic_id) {
  uint32_t ebx = 0;
  uint32_t ecx = 0;
  uint32_t edx = 0;
-  
+
  if(x2apic_id) {
    eax = 0x0000000B;
    cpuid(&eax, &ebx, &ecx, &edx);
@@ -76,16 +80,25 @@ bool bind_to_cpu(int cpu_id) {
    HANDLE process = GetCurrentProcess();
    DWORD_PTR processAffinityMask = 1 << cpu_id;
    return SetProcessAffinityMask(process, processAffinityMask);
-  #else    
+  #elif defined __linux__
    cpu_set_t currentCPU;
    CPU_ZERO(&currentCPU);
    CPU_SET(cpu_id, &currentCPU);
    if (sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == -1) {
-      perror("sched_setaffinity");
+      printWarn("sched_setaffinity: %s", strerror(errno));
      return false;
    }
    return true;
-  #endif  
+  #elif defined __FreeBSD__
+    cpuset_t currentCPU;
+    CPU_ZERO(&currentCPU);
+    CPU_SET(cpu_id, &currentCPU);
+    if(cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(cpuset_t), &currentCPU) == -1) {
+      printWarn("cpuset_setaffinity: %s", strerror(errno));
+      return false;
+    }
+    return true;
+  #endif
 }
 #endif

@@ -97,51 +110,51 @@ bool fill_topo_masks_apic(struct topology* topo) {
  uint32_t core_plus_smt_id_max_cnt;
  uint32_t core_id_max_cnt;
  uint32_t smt_id_per_core_max_cnt;
-  
+
  cpuid(&eax, &ebx, &ecx, &edx);
-  
+
  core_plus_smt_id_max_cnt = (ebx >> 16) & 0xFF;
-  
+
  eax = 0x00000004;
  ecx = 0;
  cpuid(&eax, &ebx, &ecx, &edx);
-  
+
  core_id_max_cnt = (eax >> 26) + 1;
-  smt_id_per_core_max_cnt = core_plus_smt_id_max_cnt / core_id_max_cnt; 
-            
-  topo->apic->smt_mask = create_mask(smt_id_per_core_max_cnt, &(topo->apic->smt_mask_width));    
+  smt_id_per_core_max_cnt = core_plus_smt_id_max_cnt / core_id_max_cnt;
+
+  topo->apic->smt_mask = create_mask(smt_id_per_core_max_cnt, &(topo->apic->smt_mask_width));
  topo->apic->core_mask = create_mask(core_id_max_cnt,&(topo->apic->pkg_mask_shift));
  topo->apic->pkg_mask_shift += topo->apic->smt_mask_width;
  topo->apic->core_mask <<= topo->apic->smt_mask_width;
  topo->apic->pkg_mask = (-1) ^ (topo->apic->core_mask | topo->apic->smt_mask);
-  
+
  return true;
 }

 bool fill_topo_masks_x2apic(struct topology* topo) {
  int32_t level_type;
  int32_t level_shift;
-  
+
  int32_t coreplus_smt_mask = 0;
  bool level2 = false;
  bool level1 = false;
-  
+
  uint32_t eax = 0;
  uint32_t ebx = 0;
  uint32_t ecx = 0;
  uint32_t edx = 0;
  uint32_t i = 0;
-  
+
  while(true) {
    eax = 0x0000000B;
    ecx = i;
    cpuid(&eax, &ebx, &ecx, &edx);
    if(ebx == 0) break;
-    
+
    level_type = (ecx >> 8) & 0xFF;
-    level_shift = eax & 0xFFF; 
-    
-    switch(level_type) {      
+    level_shift = eax & 0xFFF;
+
+    switch(level_type) {
      case 1: // SMT
        topo->apic->smt_mask = ~(0xFFFFFFFF << level_shift);
        topo->apic->smt_mask_width = level_shift;
@@ -158,10 +171,10 @@ bool fill_topo_masks_x2apic(struct topology* topo) {
        printErr("Found invalid level when querying topology: %d", level_type);
        break;
    }
-    
+
    i++; // sublevel to query
  }
-  
+
  if (level1 && level2) {
    topo->apic->core_mask = coreplus_smt_mask ^ topo->apic->smt_mask;
  }
@@ -182,13 +195,13 @@ bool fill_topo_masks_x2apic(struct topology* topo) {
 // as the number of cores, but in the case of Xeon Phi KNL it is not
 uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) {
  uint32_t max = 0;
-  
+
  for(int i=0; i < topo->cach->max_cache_level; i++) {
-    for(int j=0; j < topo->total_cores; j++) {          
+    for(int j=0; j < topo->total_cores; j++) {
      if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i];
    }
  }
-  
+
  max++;
  if(max > (uint32_t) topo->total_cores) return max;
  return topo->total_cores;
@@ -196,15 +209,15 @@ uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) {

 bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) {
  uint32_t size = max_apic_id_size(cache_id_apic, topo);
-  uint32_t* sockets = malloc(sizeof(uint32_t) * size);
-  uint32_t* smt = malloc(sizeof(uint32_t) * size);
-  uint32_t* apic_id = malloc(sizeof(uint32_t) * size);
+  uint32_t* sockets = emalloc(sizeof(uint32_t) * size);
+  uint32_t* smt = emalloc(sizeof(uint32_t) * size);
+  uint32_t* apic_id = emalloc(sizeof(uint32_t) * size);
  uint32_t num_caches = 0;
-  
+
  memset(sockets, 0, sizeof(uint32_t) * size);
  memset(smt, 0, sizeof(uint32_t) * size);  
-  memset(apic_id, 0, sizeof(uint32_t) * size);  
-  
+  memset(apic_id, 0, sizeof(uint32_t) * size);
+
  // System topology
  for(int i=0; i < topo->total_cores; i++) {
    sockets[apic_pkg[i]] = 1;
@@ -216,45 +229,43 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac
    if(smt[i] != 0)
      topo->smt_available++;
  }
-  
+
  topo->logical_cores = topo->total_cores / topo->sockets;
  topo->physical_cores = topo->logical_cores / topo->smt_available;
-  
+
  // Cache topology
  for(int i=0; i < topo->cach->max_cache_level; i++) {
    num_caches = 0;
    memset(apic_id, 0, sizeof(uint32_t) * size);
-    
-    for(int c=0; c < topo->total_cores; c++) {      
+
+    for(int c=0; c < topo->total_cores; c++) {
      apic_id[cache_id_apic[c][i]]++;
    }
-    for(uint32_t c=0; c < size; c++) {      
+    for(uint32_t c=0; c < size; c++) {
      if(apic_id[c] > 0) num_caches++;
    }
-    
-    printf("[i=%d]: %p\n", i, (void *) topo->cach->cach_arr[i]);
    topo->cach->cach_arr[i]->num_caches = num_caches;
  }
-  
+
  free(sockets);
  free(smt);
  free(apic_id);
-  
+
  return true;
 }

-void get_cache_topology_from_apic(struct topology* topo) {  
+void get_cache_topology_from_apic(struct topology* topo) {
  uint32_t eax = 0x00000004;
  uint32_t ebx = 0;
  uint32_t ecx = 0;
  uint32_t edx = 0;
-     
-  for(int i=0; i < topo->cach->max_cache_level; i++) { 
+
+  for(int i=0; i < topo->cach->max_cache_level; i++) {
    eax = 0x00000004;
    ecx = i;
-    
+
    cpuid(&eax, &ebx, &ecx, &edx);
-  
+
    uint32_t SMTMaxCntPerEachCache = ((eax >> 14) & 0x7FF) + 1;
    uint32_t dummy;
    topo->apic->cache_select_mask[i] = create_mask(SMTMaxCntPerEachCache,&dummy);
@@ -278,15 +289,15 @@ void add_apic_to_array(uint32_t apic, uint32_t* apic_ids, int n) {
    if(apic_ids[i] != (uint32_t) -1) last = i+1;
    i++;
  }
-  
-  if(!found) { 
+
+  if(!found) {
    apic_ids[last] = apic;
    //printf("Added %d\n", apic);
  }
 }

 bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
-#ifdef __APPLE__    
+#ifdef __APPLE__
  // macOS extremely dirty approach...
  printf("cpufetch is computing APIC IDs, please wait...\n");
  bool end = false;
@@ -295,12 +306,21 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {

  while(!end) {
    apic = get_apic_id(x2apic_id);
-    
+
    add_apic_to_array(apic, apic_ids, n);
-    end = apic_array_full(apic_ids, n);    
+    end = apic_array_full(apic_ids, n);
    usleep(1000);
-  }        
+  }
 #else
+  #ifdef __linux__
+  // In Linux we reset the affinity; first we get the original mask
+  cpu_set_t original_mask;
+  if(sched_getaffinity(0, sizeof(original_mask), &original_mask) == -1) {
+    printWarn("sched_getaffinity: %s", strerror(errno));
+    return false;
+  }
+  #endif
+
  for(int i=0; i < n; i++) {
    if(!bind_to_cpu(i)) {
      printErr("Failed binding to CPU %d", i);
@@ -308,18 +328,27 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
    }
    apic_ids[i] = get_apic_id(x2apic_id);
  }
+
+  #ifdef __linux__
+  // With the original mask previosly retrieved, we reset the affinity
+  if (sched_setaffinity (0, sizeof(original_mask), &original_mask) == -1) {
+    printWarn("sched_setaffinity: %s", strerror(errno));
+    return false;
+  }
+  #endif
 #endif
+
  return true;
 }

-bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { 
-  uint32_t apic_id;  
-  uint32_t* apic_ids = malloc(sizeof(uint32_t) * topo->total_cores);
-  uint32_t* apic_pkg = malloc(sizeof(uint32_t) * topo->total_cores);
-  uint32_t* apic_core = malloc(sizeof(uint32_t) * topo->total_cores);
-  uint32_t* apic_smt = malloc(sizeof(uint32_t) * topo->total_cores);
-  uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * topo->total_cores);
-  uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * topo->total_cores);
+bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
+  uint32_t apic_id;
+  uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores);
+  uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores);
+  uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores);
  bool x2apic_id;

  if(cpu->maxLevels >= 0x0000000B) {
@@ -336,48 +365,48 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
  else {
    x2apic_id = false;
  }
-  
+
  for(int i=0; i < topo->total_cores; i++) {
-    cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
-    cache_id_apic[i] = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
+    cache_smt_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
+    cache_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
  }
-  topo->apic->cache_select_mask = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
-  topo->apic->cache_id_apic = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
-  
+  topo->apic->cache_select_mask = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
+  topo->apic->cache_id_apic = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
+
  if(x2apic_id) {
    if(!fill_topo_masks_x2apic(topo))
      return false;
  }
  else {
    if(!fill_topo_masks_apic(topo))
-      return false;    
+      return false;
  }
-  
-  get_cache_topology_from_apic(topo);  
-  
+
+  get_cache_topology_from_apic(topo);
+
  if(!fill_apic_ids(apic_ids, topo->total_cores, x2apic_id))
    return false;
-  
-  for(int i=0; i < topo->total_cores; i++) {    
+
+  for(int i=0; i < topo->total_cores; i++) {
    apic_id = apic_ids[i];
-    
+
    apic_pkg[i] = (apic_id & topo->apic->pkg_mask) >> topo->apic->pkg_mask_shift;
    apic_core[i] = (apic_id & topo->apic->core_mask) >> topo->apic->smt_mask_width;
    apic_smt[i] = apic_id & topo->apic->smt_mask;
-    
+
    for(int c=0; c < topo->cach->max_cache_level; c++) {
      cache_smt_id_apic[i][c] = apic_id & topo->apic->cache_select_mask[c];
      cache_id_apic[i][c] = apic_id & (-1 ^ topo->apic->cache_select_mask[c]);
    }
  }
-  
+
  /* DEBUG
  for(int i=0; i < topo->cach->max_cache_level; i++) {
    printf("[CACH %1d]", i);
    for(int j=0; j < topo->total_cores; j++)
      printf("[%03d]", cache_id_apic[j][i]);
    printf("\n");
-  }  
+  }
  for(int i=0; i < topo->total_cores; i++)
    printf("[%2d] 0x%.8X\n", i, apic_pkg[i]);
  printf("\n");
@@ -386,16 +415,16 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
  printf("\n");
  for(int i=0; i < topo->total_cores; i++)
    printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/
-    
-  
+
+
  bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
-  
+
  // Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
  if (!x2apic_id) {
-    printWarn("Can't read SMT from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x0000000B, cpu->maxLevels); 
+    printWarn("Can't read SMT from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x0000000B, cpu->maxLevels);
    topo->smt_supported = topo->smt_available;
  }
-  
+
  free(apic_pkg);
  free(apic_core);
  free(apic_smt);
@@ -405,17 +434,17 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
  }
  free(cache_smt_id_apic);
  free(cache_id_apic);
-    
+
  return ret;
-} 
+}

 uint32_t is_smt_enabled_amd(struct topology* topo) {
 #ifdef __APPLE__
  UNUSED(topo);
  return 1;
-#else  
+#else
  uint32_t id;
-  
+
  for(int i = 0; i < topo->total_cores; i++) {
    if(!bind_to_cpu(i)) {
      printErr("Failed binding to CPU %d", i);
@@ -424,7 +453,7 @@ uint32_t is_smt_enabled_amd(struct topology* topo) {
    id = get_apic_id(false) & 1; // get the last bit
    if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core.
  }
-  
-  return 1;  
-#endif  
+
+  return 1;
+#endif
 }