[v1.05] Continue merging measure-freq #220

- [v1.05][X86] Show SSE if AVX/FMA is not supported - [v1.05][X86] Do not stop if cach is NULL and check for non-NULL cache in get_topology_info functions - [v1.05][X86] Fix bug where the number of cpus were not set if NULL was returned inside the loop. Ensure topo is not NULL in get_peak_performance. Fallback to UNKNOWN_DATA when we have no information about topology
2026-05-14 21:00:07 +02:00 · 2024-07-05 08:37:54 +01:00
parent d4cadbd807
commit b019256515
3 changed files with 41 additions and 18 deletions
--- a/src/common/printer.c
+++ b/src/common/printer.c
@@ -61,6 +61,7 @@ enum {
  ATTRIBUTE_NCORES,
  ATTRIBUTE_NCORES_DUAL,
 #ifdef ARCH_X86
+  ATTRIBUTE_SSE,
  ATTRIBUTE_AVX,
  ATTRIBUTE_FMA,
 #elif ARCH_PPC
@@ -96,6 +97,7 @@ static const char* ATTRIBUTE_FIELDS [] = {
  "Cores:",
  "Cores (Total):",
 #ifdef ARCH_X86
+  "SSE:",
  "AVX:",
  "FMA:",
 #elif ARCH_PPC
@@ -131,6 +133,7 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
  "Cores:",
  "Cores (Total):",
 #ifdef ARCH_X86
+  "SSE:",
  "AVX:",
  "FMA:",
 #elif ARCH_PPC
@@ -591,6 +594,7 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
  for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
    char* max_frequency = get_str_freq(ptr->freq);
    char* avx = get_str_avx(ptr);
+    char* sse = get_str_sse(ptr);
    char* fma = get_str_fma(ptr);
    char* cpu_num = emalloc(sizeof(char) * 9);

@@ -625,8 +629,17 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
        setAttribute(art, ATTRIBUTE_NCORES, n_cores);
      }
    }
-    setAttribute(art, ATTRIBUTE_AVX, avx);
-    setAttribute(art, ATTRIBUTE_FMA, fma);
+
+    // Show the most modern vector instructions.
+    // If AVX is supported show it, otherwise show SSE
+    if (strcmp(avx, "No") == 0) {
+      setAttribute(art, ATTRIBUTE_SSE, sse);
+    }
+    else {
+      setAttribute(art, ATTRIBUTE_AVX, avx);
+      setAttribute(art, ATTRIBUTE_FMA, fma);
+    }
+
    if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
    if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
    if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
--- a/src/x86/apic.c
+++ b/src/x86/apic.c
@@ -369,6 +369,11 @@ bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) {
 }

 bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
+  if (topo->cach == NULL) {
+    printWarn("get_topology_from_apic: cach is NULL");
+    return false;
+  }
+
  uint32_t apic_id;
  uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module);
  uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module);
--- a/src/x86/cpuid.c
+++ b/src/x86/cpuid.c
@@ -218,7 +218,7 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
  #endif

    //First, check we have consistent data
-    if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
+    if(freq == UNKNOWN_DATA || topo == NULL || topo->logical_cores == UNKNOWN_DATA) {
      return -1;
    }

@@ -451,7 +451,7 @@ struct cpuInfo* get_cpu_info(void) {
  cpu->cach = NULL;
  cpu->feat = NULL;

-  uint32_t modules = 1;
+  cpu->num_cpus = 1;
  uint32_t eax = 0;
  uint32_t ebx = 0;
  uint32_t ecx = 0;
@@ -507,12 +507,12 @@ struct cpuInfo* get_cpu_info(void) {
    cpu->hybrid_flag = (edx >> 15) & 0x1;
  }

-  if(cpu->hybrid_flag) modules = 2;
+  if(cpu->hybrid_flag) cpu->num_cpus = 2;

  struct cpuInfo* ptr = cpu;
-  for(uint32_t i=0; i < modules; i++) {
+  for(uint32_t i=0; i < cpu->num_cpus; i++) {
    int32_t first_core;
-    set_cpu_module(i, modules, &first_core);
+    set_cpu_module(i, cpu->num_cpus, &first_core);

    if(i > 0) {
      ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
@@ -547,11 +547,7 @@ struct cpuInfo* get_cpu_info(void) {
      cpu->cpu_name = infer_cpu_name_from_uarch(cpu->arch);
    }

-    // If any field of the struct is NULL,
-    // return early, as next functions
-    // require non NULL fields in cach and topo
    ptr->cach = get_cache_info(ptr);
-    if(ptr->cach == NULL) return cpu;

    if(cpu->hybrid_flag) {
      ptr->topo = get_topology_info(ptr, ptr->cach, i);
@@ -559,16 +555,23 @@ struct cpuInfo* get_cpu_info(void) {
    else {
      ptr->topo = get_topology_info(ptr, ptr->cach, -1);
    }
-    if(cpu->topo == NULL) return cpu;
+
+    // If topo is NULL, return early, as get_peak_performance
+    // requries non-NULL topology.
+    if(ptr->topo == NULL) return cpu;
  }

-  cpu->num_cpus = modules;
  cpu->peak_performance = get_peak_performance(cpu, accurate_pp());

  return cpu;
 }

 bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
+  if (topo->cach == NULL) {
+    printWarn("get_cache_topology_amd: cach is NULL");
+    return false;
+  }
+
  if(cpu->maxExtendedLevels >= 0x8000001D && cpu->topology_extensions) {
    uint32_t i, eax, ebx, ecx, edx, num_sharing_cache, cache_type, cache_level;

@@ -644,10 +647,12 @@ bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {

 #ifdef __linux__
 void get_topology_from_udev(struct topology* topo) {
-  // TODO: To be improved in the future
  topo->total_cores = get_ncores_from_cpuinfo();
-  topo->logical_cores = topo->total_cores;
-  topo->physical_cores = topo->total_cores;
+  // TODO: To be improved in the future
+  // Conservative setting as we only know the total
+  // number of cores.
+  topo->logical_cores = UNKNOWN_DATA;
+  topo->physical_cores = UNKNOWN_DATA;
  topo->smt_available = 1;
  topo->smt_supported = 1;
  topo->sockets = 1;
@@ -711,8 +716,8 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int
      }
      else {
        printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
-        topo->physical_cores = 1;
-        topo->logical_cores = 1;
+        topo->physical_cores = UNKNOWN_DATA;
+        topo->logical_cores = UNKNOWN_DATA;
        topo->smt_available = 1;
        topo->smt_supported = 1;
      }