Small refactor

FIX
Refactor
2026-05-15 05:10:08 +02:00 · 2024-09-10 22:11:09 +01:00 · 2024-09-10 22:07:32 +01:00 · 2024-09-10 22:03:21 +01:00 · 2024-09-10 21:24:21 +01:00 · 2024-09-10 21:01:32 +01:00
12 changed files with 82 additions and 56 deletions
--- a/src/arm/soc.c
+++ b/src/arm/soc.c
@@ -971,11 +971,6 @@ struct system_on_chip* guess_soc_from_devtree(struct system_on_chip* soc) {
  DT_EQ(dt, len, soc, "apple,t6030", "M3 Pro",   SOC_APPLE_M3_PRO,   3)
  DT_EQ(dt, len, soc, "apple,t6031", "M3 Max",   SOC_APPLE_M3_MAX,   3)
  DT_EQ(dt, len, soc, "apple,t6034", "M3 Max",   SOC_APPLE_M3_MAX,   3)
-  // NVIDIA
-  DT_EQ(dt, len, soc, "nvidia,tegra234", "Tegra Orin", SOC_TEGRA_ORIN, 8) // https://www.phoronix.com/news/NVIDIA-Orin-Tegra234-Audio, https://github.com/Dr-Noob/cpufetch/issues/275, https://en.wikipedia.org/wiki/Tegra#Orin
-  // Qualcomm now also in devtree...
-  // TODO: Integrate this with SOC_EQ
-  DT_EQ(dt, len, soc, "qcom,sc8280", "8cx Gen 3", SOC_SNAPD_SC8280XP, 5)
  // grep -oR -h --color -E '"fsl,.*' *.dtsi | sort | uniq | cut -d ',' -f1-2 | grep -v '-'
  // https://elixir.bootlin.com/linux/v6.10.6/source/arch/arm64/boot/dts/freescale    
  DT_EQ(dt, len, soc, "fsl,imx8qm",  "i.MX 8QuadMax",   SOC_NXP_IMX8QM,  28) // https://www.nxp.com/docs/en/fact-sheet/IMX8FAMFS.pdf  
@@ -991,8 +986,6 @@ struct system_on_chip* guess_soc_from_devtree(struct system_on_chip* soc) {
  // https://www.amlogic.com/#Products/393/index.html
  // https://wikimovel.com/index.php/Amlogic_A311D
  DT_EQ(dt, len, soc, "amlogic,a311d", "A311D", SOC_AMLOGIC_A311D, 12)
-  // Marvell
-  DT_EQ(dt, len, soc, "marvell,armada3700", "Armada 3700", SOC_MARVELL_A3700, 28) // http://wiki.espressobin.net/tiki-index.php?page=Armada+3700 (pdf), https://github.com/Dr-Noob/cpufetch/issues/279
  DT_END(dt, len)
 }

--- a/src/arm/socs.h
+++ b/src/arm/socs.h
@@ -318,7 +318,6 @@ enum {
  SOC_SNAPD_SM8550_AB,
  SOC_SNAPD_SM8635,
  SOC_SNAPD_SM8650_AB,
-  SOC_SNAPD_SC8280XP,
  // APPLE
  SOC_APPLE_M1,
  SOC_APPLE_M1_PRO,
@@ -381,7 +380,6 @@ enum {
  SOC_GOOGLE_TENSOR_G3,
  // NVIDIA,
  SOC_TEGRA_X1,
-  SOC_TEGRA_ORIN,
  // ALTRA
  SOC_AMPERE_ALTRA,
  // NXP
@@ -395,8 +393,6 @@ enum {
  SOC_NXP_IMX93,
  // AMLOGIC
  SOC_AMLOGIC_A311D,
-  // MARVELL
-  SOC_MARVELL_A3700,
  // UNKNOWN
  SOC_MODEL_UNKNOWN
 };
@@ -407,16 +403,15 @@ inline static VENDOR get_soc_vendor_from_soc(SOC soc) {
  else if(soc >= SOC_KUNPENG_920 && soc <= SOC_KUNPENG_930) return SOC_VENDOR_KUNPENG;
  else if(soc >= SOC_EXYNOS_3475 && soc <= SOC_EXYNOS_880) return SOC_VENDOR_EXYNOS;
  else if(soc >= SOC_MTK_MT6893 && soc <= SOC_MTK_MT8783) return SOC_VENDOR_MEDIATEK;
-  else if(soc >= SOC_SNAPD_QSD8650 && soc <= SOC_SNAPD_SC8280XP) return SOC_VENDOR_SNAPDRAGON;
+  else if(soc >= SOC_SNAPD_QSD8650 && soc <= SOC_SNAPD_SM8650_AB) return SOC_VENDOR_SNAPDRAGON;
  else if(soc >= SOC_APPLE_M1 && soc <= SOC_APPLE_M3_MAX) return SOC_VENDOR_APPLE;
  else if(soc >= SOC_ALLWINNER_A10 && soc <= SOC_ALLWINNER_R328) return SOC_VENDOR_ALLWINNER;
  else if(soc >= SOC_ROCKCHIP_3288 && soc <= SOC_ROCKCHIP_3588) return SOC_VENDOR_ROCKCHIP;
  else if(soc >= SOC_GOOGLE_TENSOR && soc <= SOC_GOOGLE_TENSOR_G3) return SOC_VENDOR_GOOGLE;
-  else if(soc >= SOC_TEGRA_X1 && soc <= SOC_TEGRA_ORIN) return SOC_VENDOR_NVIDIA;
+  else if(soc >= SOC_TEGRA_X1 && soc <= SOC_TEGRA_X1) return SOC_VENDOR_NVIDIA;
  else if(soc >= SOC_AMPERE_ALTRA && soc <= SOC_AMPERE_ALTRA) return SOC_VENDOR_AMPERE;
  else if(soc >= SOC_NXP_IMX8QM && soc <= SOC_NXP_IMX93) return SOC_VENDOR_NXP;
  else if(soc >= SOC_AMLOGIC_A311D && soc <= SOC_AMLOGIC_A311D) return SOC_VENDOR_AMLOGIC;
-  else if(soc >= SOC_MARVELL_A3700 && soc <= SOC_MARVELL_A3700) return SOC_VENDOR_MARVELL;
  return SOC_VENDOR_UNKNOWN;
 }

--- a/src/common/ascii.h
+++ b/src/common/ascii.h
@@ -433,18 +433,6 @@ $C1#########.###  ##  ##  ##  ##   ###    ######   ##   ###  \
 $C1                                          ###             \
 $C1                                       ###                "

-#define ASCII_MARVELL \
-"$C1                       ...........          ........... \
-$C1                   .###          .       .##          . \
-$C1                 .#####          .      ####          . \
-$C1                #######          .   #######          . \
-$C1             .#########__________. #########__________. \
-$C1          .###########|__________|#########|__________| \
-$C1        ############   ______############   __________  \
-$C1     .#########       |__________|######   |__________| \
-$C1   ###########         ___###########       __________  \
-$C1.##########           |__________|         |__________| "
-
 // --------------------- LONG LOGOS ------------------------- //
 #define ASCII_AMD_L \
 "$C1                                                              \
@@ -623,7 +611,6 @@ asciiL logo_nvidia      = { ASCII_NVIDIA,      45, 19, false, {C_FG_GREEN, C_FG_
 asciiL logo_ampere      = { ASCII_AMPERE,      50, 17, false, {C_FG_RED},                                     {C_FG_WHITE,   C_FG_RED}     };
 asciiL logo_nxp         = { ASCII_NXP,         55,  8, false, {C_FG_YELLOW, C_FG_CYAN, C_FG_GREEN},           {C_FG_CYAN,    C_FG_WHITE}   };
 asciiL logo_amlogic     = { ASCII_AMLOGIC,     58,  8, false, {C_FG_BLUE},                                    {C_FG_BLUE,    C_FG_B_WHITE} };
-asciiL logo_marvell     = { ASCII_MARVELL,     56, 10, false, {C_FG_B_BLACK},                                 {C_FG_B_BLACK, C_FG_B_WHITE} };

 // Long variants          | ----------------------------------------------------------------------------------------------------------------|
 asciiL logo_amd_l       = { ASCII_AMD_L,       62, 19, true,  {C_BG_WHITE, C_BG_GREEN},                       {C_FG_WHITE, C_FG_GREEN}     };
--- a/src/common/cpu.c
+++ b/src/common/cpu.c
@@ -34,6 +34,12 @@ int64_t get_freq(struct frequency* freq) {
  return freq->max;
 }

+#ifdef ARCH_X86
+int64_t get_freq_pp(struct frequency* freq) {
+  return freq->max_pp;
+}
+#endif
+
 #if defined(ARCH_X86) || defined(ARCH_PPC)
 char* get_str_cpu_name(struct cpuInfo* cpu, bool fcpuname) {
  #ifdef ARCH_X86
--- a/src/common/cpu.h
+++ b/src/common/cpu.h
@@ -60,6 +60,11 @@ struct frequency {
  int32_t max;
  // Indicates if max frequency was measured
  bool measured;
+#ifdef ARCH_X86
+  // Max frequency when running vectorized code.
+  // Used only for peak performance computation.
+  int32_t max_pp;
+#endif
 };

 struct hypervisor {
@@ -188,6 +193,8 @@ struct cpuInfo {
 #ifdef ARCH_X86
  // The index of the first core in the module
  uint32_t first_core_id;
+  // The index of this module
+  uint32_t module_id;
 #endif
 #endif
 };
@@ -200,6 +207,9 @@ uint32_t get_nsockets(struct topology* topo);

 VENDOR get_cpu_vendor(struct cpuInfo* cpu);
 int64_t get_freq(struct frequency* freq);
+#ifdef ARCH_X86
+int64_t get_freq_pp(struct frequency* freq);
+#endif

 char* get_str_aes(struct cpuInfo* cpu);
 char* get_str_sha(struct cpuInfo* cpu);
--- a/src/common/printer.c
+++ b/src/common/printer.c
@@ -395,8 +395,6 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
    art->art = &logo_nxp;
  else if(art->vendor == SOC_VENDOR_AMLOGIC)
    art->art = &logo_amlogic;
-  else if(art->vendor == SOC_VENDOR_MARVELL)
-    art->art = &logo_marvell;
  else if(art->vendor == SOC_VENDOR_NVIDIA)
    art->art = choose_ascii_art_aux(&logo_nvidia_l, &logo_nvidia, term, lf);
  else {
--- a/src/common/soc.c
+++ b/src/common/soc.c
@@ -24,7 +24,6 @@ static char* soc_trademark_string[] = {
  [SOC_VENDOR_AMPERE]     = "Ampere ",
  [SOC_VENDOR_NXP]        = "NXP ",
  [SOC_VENDOR_AMLOGIC]    = "Amlogic ",
-  [SOC_VENDOR_MARVELL]    = "Marvell",
  // RISC-V
  [SOC_VENDOR_SIFIVE]     = "SiFive ",
  [SOC_VENDOR_STARFIVE]   = "StarFive ",
--- a/src/common/soc.h
+++ b/src/common/soc.h
@@ -28,7 +28,6 @@ enum {
  SOC_VENDOR_AMPERE,
  SOC_VENDOR_NXP,
  SOC_VENDOR_AMLOGIC,
-  SOC_VENDOR_MARVELL,
  // RISC-V
  SOC_VENDOR_SIFIVE,
  SOC_VENDOR_STARFIVE,
--- a/src/x86/cpuid.c
+++ b/src/x86/cpuid.c
@@ -210,18 +210,14 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {

  for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
    struct topology* topo = ptr->topo;
-    int64_t max_freq = get_freq(ptr->freq);
+    int64_t freq = get_freq(ptr->freq);

-    int64_t freq;
  #ifdef __linux__
    if(accurate_pp)
-      freq = measure_frequency(ptr);
-    else
-      freq = max_freq;
+      freq = get_freq_pp(ptr->freq);
  #else
    // Silence compiler warning
    (void)(accurate_pp);
-    freq = max_freq;
  #endif

    //First, check we have consistent data
@@ -450,6 +446,23 @@ int32_t get_core_type(void) {
  }
 }

+#ifdef __linux__
+// Gets the max frequency for estimating the peak performance
+// and fills in the passed cpuInfo parameter.
+void fill_frequency_info_pp(struct cpuInfo* cpu) {
+  int32_t unused;
+  int32_t *max_freq_pp_vec = malloc(sizeof(int32_t) * cpu->num_cpus);
+  struct cpuInfo* ptr = cpu;
+
+  for (uint32_t i=0; i < cpu->num_cpus; i++) {
+    set_cpu_module(i, cpu->num_cpus, &unused);
+
+    ptr->freq->max_pp = measure_frequency(ptr, max_freq_pp_vec);
+    ptr = ptr->next_cpu;
+  }
+}
+#endif
+
 struct cpuInfo* get_cpu_info(void) {
  struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
  cpu->peak_performance = -1;
@@ -546,6 +559,7 @@ struct cpuInfo* get_cpu_info(void) {
      ptr->core_type = get_core_type();
    }
    ptr->first_core_id = first_core;
+    ptr->module_id = i;
    ptr->feat = get_features_info(ptr);

    ptr->arch = get_cpu_uarch(ptr);
@@ -570,6 +584,13 @@ struct cpuInfo* get_cpu_info(void) {
    if(ptr->topo == NULL) return cpu;
  }

+#ifdef __linux__
+  // If accurate_pp is requested, we need to get the max frequency
+  // after fetching the topology for all CPU modules, since the topology
+  // is required by fill_frequency_info_pp
+  if (accurate_pp()) fill_frequency_info_pp(cpu);
+#endif
+
  cpu->peak_performance = get_peak_performance(cpu, accurate_pp());

  return cpu;
@@ -1005,6 +1026,7 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
    }
  #endif

+  freq->max_pp = UNKNOWN_DATA;
  return freq;
 }

--- a/src/x86/freq/freq.c
+++ b/src/x86/freq/freq.c
@@ -21,9 +21,12 @@
 #define FREQ_VECTOR_SIZE         1<<16

 struct freq_thread {
+  // Inputs
+  struct cpuInfo* cpu;
  bool end;
  bool measure;
-  double freq;
+  // Output
+  int32_t *max_pp;
 };

 double vector_average_harmonic(double* v, int len) {
@@ -48,6 +51,7 @@ void* measure_freq(void *freq_ptr) {
  char* line = NULL;
  size_t len = 0;
  ssize_t read;
+  struct cpuInfo* cpu = freq->cpu;

  int v = 0;
  double* freq_vector = malloc(sizeof(double) * FREQ_VECTOR_SIZE);
@@ -76,18 +80,43 @@ void* measure_freq(void *freq_ptr) {
    sleep_ms(500);
  }

-  freq->freq = vector_average_harmonic(freq_vector, v);
-  printWarn("AVX2 measured freq=%f\n", freq->freq);
+  if (cpu->hybrid_flag) {
+    // We have an heterogeneous architecture. After measuring the
+    // frequency for all cores, we now need to compute the average
+    // independently for each CPU module.
+    struct cpuInfo* ptr = cpu;
+    double* freq_vector_ptr = freq_vector;
+
+    for (int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
+      freq->max_pp[i] = vector_average_harmonic(freq_vector_ptr, ptr->topo->total_cores_module);
+      printWarn("AVX2 measured freq=%d (module %d)", freq->max_pp[i], i);
+
+      freq_vector_ptr = freq_vector_ptr + ptr->topo->total_cores_module;
+    }
+  }
+  else {
+    freq->max_pp[0] = vector_average_harmonic(freq_vector, v);
+    printWarn("AVX2 measured freq=%d\n", freq->max_pp[0]);
+  }

  return NULL;
 }

-int64_t measure_frequency(struct cpuInfo* cpu) {
+int32_t measure_frequency(struct cpuInfo* cpu, int32_t *max_freq_pp_vec) {
+  if (cpu->hybrid_flag && cpu->module_id > 0) {
+    // We have a hybrid architecture and we have already
+    // measured the frequency for this module in a previous
+    // call to this function, so now just return it.
+    return max_freq_pp_vec[cpu->module_id];
+  }
+
  int ret;
  int num_spaces;
  struct freq_thread* freq_struct = malloc(sizeof(struct freq_thread));
  freq_struct->end = false;
  freq_struct->measure = false;
+  freq_struct->cpu = cpu;
+  freq_struct->max_pp = max_freq_pp_vec;

  void* (*compute_function)(void*);

@@ -159,5 +188,5 @@ int64_t measure_frequency(struct cpuInfo* cpu) {
  }

  printf("\r%*c", num_spaces, ' ');
-  return freq_struct->freq;
+  return max_freq_pp_vec[0];
 }
--- a/src/x86/freq/freq.h
+++ b/src/x86/freq/freq.h
@@ -8,6 +8,6 @@
 #define MEASURE_TIME_SECONDS         5
 #define LOOP_ITERS           100000000

-int64_t measure_frequency(struct cpuInfo* cpu);
+int32_t measure_frequency(struct cpuInfo* cpu, int32_t *max_freq_pp_vec);

 #endif
--- a/src/x86/uarch.c
+++ b/src/x86/uarch.c
@@ -119,9 +119,7 @@ enum {
  UARCH_ZEN3,
  UARCH_ZEN3_PLUS,
  UARCH_ZEN4,
-  UARCH_ZEN4C,
-  UARCH_ZEN5,
-  UARCH_ZEN5C,
+  UARCH_ZEN4C
 };

 struct uarch {
@@ -412,12 +410,6 @@ struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uin
  CHECK_UARCH(arch, 10, 15,  8, NA, NA, "Zen 4",       UARCH_ZEN4,         5) // instlatx64 (AMD MI300C)
  CHECK_UARCH(arch, 10, 15,  9, NA, NA, "Zen 4",       UARCH_ZEN4,         5) // instlatx64 (AMD MI300A)
  CHECK_UARCH(arch, 10, 15, 10, NA, NA, "Zen 4c",      UARCH_ZEN4C,        5) // instlatx64
-  CHECK_UARCH(arch, 11, 15,  0, NA, NA, "Zen 5",       UARCH_ZEN5,         4) // Turin/EPYC (instlatx64)
-  CHECK_UARCH(arch, 11, 15,  1, NA, NA, "Zen 5c",      UARCH_ZEN5C,        3) // Zen5c EPYC (instlatx64, https://en.wikipedia.org/wiki/Zen_5#cite_note-10)
-  CHECK_UARCH(arch, 11, 15,  2, NA, NA, "Zen 5",       UARCH_ZEN5,         4) // Strix Point (instlatx64)
-  CHECK_UARCH(arch, 11, 15,  4, NA, NA, "Zen 5",       UARCH_ZEN5,         4) // Granite Ridge (instlatx64)
-  CHECK_UARCH(arch, 11, 15,  6, NA, NA, "Zen 5",       UARCH_ZEN5,         4) // Krackan Point (instlatx64)
-  CHECK_UARCH(arch, 11, 15,  7, NA, NA, "Zen 5",       UARCH_ZEN5,         4) // Strix Halo (instlatx64)
  UARCH_END

  return arch;
@@ -560,8 +552,6 @@ char* infer_cpu_name_from_uarch(struct uarch* arch) {
 }

 bool vpus_are_AVX512(struct cpuInfo* cpu) {
-  // Zen5 actually has 2 x AVX512 units
-  // https://www.anandtech.com/show/21469/amd-details-ryzen-ai-300-series-for-mobile-strix-point-with-rdna-35-igpu-xdna-2-npu
  return cpu->arch->uarch != UARCH_ICE_LAKE &&
         cpu->arch->uarch != UARCH_TIGER_LAKE &&
         cpu->arch->uarch != UARCH_ZEN4 &&
@@ -602,8 +592,6 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
      case UARCH_ZEN3_PLUS:
      case UARCH_ZEN4:
      case UARCH_ZEN4C:
-      case UARCH_ZEN5:
-      case UARCH_ZEN5C:
        return 2;
      default:
        return 1;
Author	SHA1	Message	Date
Dr-Noob	14cee5be6b	Small refactor	2024-09-10 22:11:09 +01:00
Dr-Noob	b6eb450eb3	FIX	2024-09-10 22:07:32 +01:00
Dr-Noob	4ccafdc4fa	Refactor	2024-09-10 22:03:21 +01:00
Dr-Noob	dc9b111e85	FIX	2024-09-10 21:24:21 +01:00
Dr-Noob	5093575f11	FIX	2024-09-10 21:01:32 +01:00
Dr-Noob	dc251a457e	FIX	2024-09-10 09:01:02 +01:00
Dr-Noob	ee69cffdbb	WIP: Move accurate-pp to get_cpu_info. Use a vector as argument to store freq	2024-09-08 13:46:02 +01:00
Dr-Noob	de4f47a400	WIP	2024-09-04 18:04:29 +01:00
Dr-Noob	5833601178	Looks good but need to fix bug first	2024-09-03 19:13:19 +01:00