WIP

[v1.06][ARM] Add more NVIDIA SoCs
2026-03-25 07:50:40 +01:00 · 2024-09-17 08:52:01 +01:00 · 2024-09-17 08:49:51 +01:00 · 2024-09-12 08:08:53 +01:00 · 2024-09-12 07:51:12 +01:00 · 2024-09-11 18:38:56 +01:00
19 changed files with 545 additions and 27 deletions
--- a/25
+++ b/25
@@ -70,12 +70,27 @@ $(error Aborting compilation)

 	OUTPUT=cpufetch
 else
-	# Assume x86_64
+	arch := $(shell cc -dumpmachine)
+	arch := $(firstword $(subst -, ,$(arch)))
+
+	ifeq ($(arch), $(filter $(arch), x86_64 amd64 i386 i486 i586 i686))
+		SRC_DIR=src/x86/
+		SOURCE += $(COMMON_SRC) $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)uarch.c
+		HEADERS += $(COMMON_HDR) $(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)uarch.h
+		CFLAGS += -DARCH_X86 -std=c99
+	else ifeq ($(arch), $(filter $(arch), arm aarch64_be aarch64 arm64 armv8b armv8l armv7l armv6l))
+		SRC_DIR=src/arm/
+		SOURCE += $(COMMON_SRC) $(SRC_DIR)midr.c $(SRC_DIR)uarch.c $(SRC_COMMON)soc.c $(SRC_DIR)soc.c $(SRC_COMMON)pci.c $(SRC_DIR)udev.c sve.o
+		HEADERS += $(COMMON_HDR) $(SRC_DIR)midr.h $(SRC_DIR)uarch.h  $(SRC_COMMON)soc.h $(SRC_DIR)soc.h $(SRC_COMMON)pci.h $(SRC_DIR)udev.c $(SRC_DIR)socs.h
+		CFLAGS += -DARCH_ARM -std=c99
+	else
+		# Error lines should not be tabulated because Makefile complains about it
+$(warning Unsupported arch detected: $(arch). See https://github.com/Dr-Noob/cpufetch#1-support)
+$(warning If your architecture is supported but the compilation fails, please open an issue in https://github.com/Dr-Noob/cpufetch/issues)
+$(error Aborting compilation)
+	endif
+
 	GIT_VERSION := ""
-	SRC_DIR=src/x86/
-	SOURCE += $(COMMON_SRC) $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)uarch.c
-	HEADERS += $(COMMON_HDR) $(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)uarch.h
-	CFLAGS += -DARCH_X86 -std=c99
 	SANITY_FLAGS += -Wno-pedantic-ms-format
 	OUTPUT=cpufetch.exe
 endif
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ cpufetch is a command-line tool written in C that displays the CPU information i
 | OS          | x86_64 / x86       | ARM                | RISC-V             | PowerPC            |
 |:-----------:|:------------------:|:------------------:|:------------------:|:------------------:|
 | GNU / Linux | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: |
-| Windows     | :heavy_check_mark: | :x:                | :x:                | :x:                |
+| Windows     | :heavy_check_mark: | :heavy_check_mark: | :x:                | :x:                |
 | Android     | :heavy_check_mark: | :heavy_check_mark: | :x:                | :x:                |
 | macOS       | :heavy_check_mark: | :heavy_check_mark: | :x:                | :heavy_check_mark: |
 | FreeBSD     | :heavy_check_mark: | :x:                | :x:                | :x:                |
--- a/src/arm/midr.c
+++ b/src/arm/midr.c
@@ -11,6 +11,10 @@
  #include "../common/freq.h"
 #elif defined __APPLE__ || __MACH__
  #include "../common/sysctl.h"
+#elif defined _WIN32
+  #define WIN32_LEAN_AND_MEAN
+  #define NOMINMAX
+  #include <windows.h>
 #endif

 #include "../common/global.h"
@@ -21,6 +25,60 @@
 #include "uarch.h"
 #include "sve.h"

+
+#if defined _WIN32
+// Windows stores processor information in registery at:
+// "HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor"
+// Within this directory, each core will get its own folder with
+// registery entries named `CP ####` that map to ARM system registers.
+// Ex. the MIDR register for core 0 is the `REG_QWORD` at:
+// "HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0\CP 4000"
+// The name of these `CP ####`-registers follow their register ID encoding in hexadecimal
+// (op0&1):op1:crn:crm:op2.
+// More registers can be found here:
+// https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers
+// Some important ones:
+// CP 4000: MIDR_EL1
+// CP 4020: ID_AA64PFR0_EL1
+// CP 4021: ID_AA64PFR1_EL1
+// CP 4028: ID_AA64DFR0_EL1
+// CP 4029: ID_AA64DFR1_EL1
+// CP 402C: ID_AA64AFR0_EL1
+// CP 402D: ID_AA64AFR1_EL1
+// CP 4030: ID_AA64ISAR0_EL1
+// CP 4031: ID_AA64ISAR1_EL1
+// CP 4038: ID_AA64MMFR0_EL1
+// CP 4039: ID_AA64MMFR1_EL1
+// CP 403A: ID_AA64MMFR2_EL1
+
+bool read_registry_hklm_int(char* path, char* name, void* value, bool is64) {  
+  DWORD value_len;
+  int reg_type;
+  if (is64) {
+    value_len = sizeof(int64_t);
+    reg_type = RRF_RT_REG_QWORD;
+  }
+  else {
+    value_len = sizeof(int32_t);
+    reg_type = RRF_RT_REG_DWORD;
+  }
+
+  if(RegGetValueA(HKEY_LOCAL_MACHINE, path, name, reg_type, NULL, value, &value_len) != ERROR_SUCCESS) {
+    printBug("Error reading registry entry \"%s\\%s\"", path, name);
+    return false;
+  }
+  return true;
+}
+
+bool get_win32_core_info_int(uint32_t core_index, char* name, void* value, bool is64) {
+  // path + digits
+  uint32_t max_path_size = 45+3+1;
+  char* path = ecalloc(sizeof(char) * max_path_size, sizeof(char));
+  snprintf(path, max_path_size, "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\%u", core_index);
+  return read_registry_hklm_int(path, name, value, is64);
+}
+#endif
+
 bool cores_are_equal(int c1pos, int c2pos, uint32_t* midr_array, int32_t* freq_array) {
  return midr_array[c1pos] == midr_array[c2pos] && freq_array[c1pos] == freq_array[c2pos];
 }
@@ -208,6 +266,46 @@ struct features* get_features_info(void) {
  feat->NEON = true;
  feat->SVE = false;
  feat->SVE2 = false;
+#elif defined _WIN32
+
+  // CP 4020 maps to the ID_AA64PFR0_EL1 register on Windows
+  // https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers/ID-AA64PFR0-EL1--AArch64-Processor-Feature-Register-0
+  int64_t pfr0 = 0;
+  if(!get_win32_core_info_int(0, "CP 4020", &pfr0, true)) {
+    printWarn("Unable to retrieve PFR0 via registry");
+  }
+  else {
+    // AdvSimd[23:20]
+    // -1: Not available
+    //  0: AdvSimd support
+    //  1: AdvSimd support + FP16
+    int8_t adv_simd = ((int64_t)(pfr0 << (60 - 20)) >> 60);
+    feat->NEON = (adv_simd >= 0);
+
+    // SVE[35:32]
+    feat->SVE = (pfr0 >> 32) & 0xF ? true : false;
+  }
+
+  // Windoes does not expose a registry entry for the ID_AA64ZFR0_EL1 register
+  // this would have mapped to "CP 4024".
+  feat->SVE2 = false;
+
+  // CP 4030 maps to the ID_AA64ISAR0_EL1 register on Windows
+  // https://developer.arm.com/documentation/ddi0601/2024-06/AArch64-Registers/ID-AA64ISAR0-EL1--AArch64-Instruction-Set-Attribute-Register-0
+  int64_t isar0 = 0;
+  if(!get_win32_core_info_int(0, "CP 4030", &isar0, true)) {
+    printWarn("Unable to retrieve ISAR0 via registry");
+  }
+  else {
+    // AES[7:4]
+    feat->AES = (isar0 >> 4) & 0xF ? true : false;
+    // SHA1[11:8]
+    feat->SHA1 = (isar0 >> 8) & 0xF ? true : false;
+    // SHA2[15:12]
+    feat->SHA2 = (isar0 >> 12) & 0xF ? true : false;
+    // CRC32[19:16]
+    feat->CRC32 = (isar0 >> 16) & 0xF ? true : false;
+  }
 #endif  // ifdef __linux__

  if (feat->SVE || feat->SVE2) {
@@ -428,6 +526,68 @@ struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) {

  return cpu;
 }
+#elif defined _WIN32
+struct cpuInfo* get_cpu_info_windows(struct cpuInfo* cpu) {
+  init_cpu_info(cpu);
+
+  SYSTEM_INFO sys_info;
+  GetSystemInfo(&sys_info);
+  int ncores = sys_info.dwNumberOfProcessors;
+
+  uint32_t* midr_array = emalloc(sizeof(uint32_t) * ncores);
+  int32_t* freq_array = emalloc(sizeof(uint32_t) * ncores);
+  uint32_t* ids_array = emalloc(sizeof(uint32_t) * ncores);
+  for(int i=0; i < ncores; i++) {
+    // Cast from 64 to 32 bit to be able to re-use the pre-existing
+    // functions such as fill_ids_from_midr and cores_are_equal
+    int64_t midr_64;
+    if(!get_win32_core_info_int(i, "CP 4000", &midr_64, true)) {
+      return NULL;
+    }
+    midr_array[i] = midr_64;
+    if(!get_win32_core_info_int(i, "~MHz", &freq_array[i], false)) {
+      return NULL;
+    }
+  }
+
+  uint32_t sockets = fill_ids_from_midr(midr_array, freq_array, ids_array, ncores);
+
+  struct cpuInfo* ptr = cpu;
+  int midr_idx = 0;
+  int tmp_midr_idx = 0;
+  for(uint32_t i=0; i < sockets; i++) {
+    if(i > 0) {
+      ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
+      ptr = ptr->next_cpu;
+      init_cpu_info(ptr);
+
+      tmp_midr_idx = midr_idx;
+      while(cores_are_equal(midr_idx, tmp_midr_idx, midr_array, freq_array)) tmp_midr_idx++;
+      midr_idx = tmp_midr_idx;
+    }
+
+    ptr->midr = midr_array[midr_idx];
+    ptr->arch = get_uarch_from_midr(ptr->midr, ptr);
+
+    ptr->feat = get_features_info();
+    
+    ptr->freq = emalloc(sizeof(struct frequency));
+    ptr->freq->measured = false;
+    ptr->freq->base = freq_array[midr_idx];
+    ptr->freq->max = UNKNOWN_DATA;
+
+    ptr->cach = get_cache_info(ptr);
+    ptr->topo = get_topology_info(ptr, ptr->cach, midr_array, freq_array, i, ncores);
+  }
+
+  cpu->num_cpus = sockets;
+  cpu->hv = emalloc(sizeof(struct hypervisor));
+  cpu->hv->present = false;
+  cpu->soc = get_soc(cpu);
+  cpu->peak_performance = get_peak_performance(cpu);
+
+  return cpu;
+}
 #endif

 struct cpuInfo* get_cpu_info(void) {
@@ -438,6 +598,8 @@ struct cpuInfo* get_cpu_info(void) {
    return get_cpu_info_linux(cpu);
  #elif defined __APPLE__ || __MACH__
    return get_cpu_info_mach(cpu);
+  #elif defined _WIN32
+    return get_cpu_info_windows(cpu);
  #endif
 }

--- a/src/arm/soc.c
+++ b/src/arm/soc.c
@@ -14,6 +14,28 @@
  #include "../common/sysctl.h"
 #endif

+#if defined(_WIN32)
+  #define WIN32_LEAN_AND_MEAN
+  #define NOMINMAX
+  #include <windows.h>
+
+// Gets a RRF_RT_REG_SZ-entry from the Windows registry, returning a newly allocated
+// string and its length
+bool read_registry_hklm_sz(char* path, char* value, char** string, LPDWORD length) {
+	// First call to RegGetValueA gets the length of the string and determines how much
+  // memory should be allocated for the new string
+  if(RegGetValueA(HKEY_LOCAL_MACHINE, path, value, RRF_RT_REG_SZ, NULL, NULL, length) != ERROR_SUCCESS) {
+    return false;
+  }
+  *string = ecalloc(*length, sizeof(char));
+  // Second call actually writes the string data
+  if(RegGetValueA(HKEY_LOCAL_MACHINE, path, value, RRF_RT_REG_SZ, NULL, *string, length) != ERROR_SUCCESS) {
+    return false;
+  }
+  return true;
+}
+#endif
+
 #define NA -1
 #define min(a,b) (((a)<(b))?(a):(b))
 #define ARRAY_SIZE(arr)     (sizeof(arr) / sizeof((arr)[0]))
@@ -971,6 +993,18 @@ struct system_on_chip* guess_soc_from_devtree(struct system_on_chip* soc) {
  DT_EQ(dt, len, soc, "apple,t6030", "M3 Pro",   SOC_APPLE_M3_PRO,   3)
  DT_EQ(dt, len, soc, "apple,t6031", "M3 Max",   SOC_APPLE_M3_MAX,   3)
  DT_EQ(dt, len, soc, "apple,t6034", "M3 Max",   SOC_APPLE_M3_MAX,   3)
+  // NVIDIA
+  // https://elixir.bootlin.com/linux/v6.10.6/source/arch/arm64/boot/dts/nvidia
+  // https://elixir.bootlin.com/linux/v6.10.6/source/arch/arm/boot/dts/nvidia
+  DT_EQ(dt, len, soc, "nvidia,tegra20",  "Tegra 2",      SOC_TEGRA_2,      40) // https://en.wikipedia.org/wiki/Tegra#Tegra_2
+  DT_EQ(dt, len, soc, "nvidia,tegra30",  "Tegra 3",      SOC_TEGRA_3,      40) // https://en.wikipedia.org/wiki/Tegra#Tegra_3
+  DT_EQ(dt, len, soc, "nvidia,tegra114", "Tegra 4",      SOC_TEGRA_4,      28) // https://en.wikipedia.org/wiki/Tegra#Tegra_4
+  DT_EQ(dt, len, soc, "nvidia,tegra124", "Tegra K1",     SOC_TEGRA_K1,     28) // https://en.wikipedia.org/wiki/Tegra#Tegra_K1
+  DT_EQ(dt, len, soc, "nvidia,tegra132", "Tegra K1",     SOC_TEGRA_K1,     28) // https://en.wikipedia.org/wiki/Tegra#Tegra_K1
+  DT_EQ(dt, len, soc, "nvidia,tegra210", "Tegra X1",     SOC_TEGRA_X1,     20) // https://en.wikipedia.org/wiki/Tegra#Tegra_X1
+  DT_EQ(dt, len, soc, "nvidia,tegra186", "Tegra X2",     SOC_TEGRA_X2,     16) // https://en.wikipedia.org/wiki/Tegra#Tegra_X2
+  DT_EQ(dt, len, soc, "nvidia,tegra194", "Tegra Xavier", SOC_TEGRA_XAVIER, 12) // https://en.wikipedia.org/wiki/Tegra#Xavier
+  DT_EQ(dt, len, soc, "nvidia,tegra234", "Tegra Orin",   SOC_TEGRA_ORIN,    8) // https://www.phoronix.com/news/NVIDIA-Orin-Tegra234-Audio, https://github.com/Dr-Noob/cpufetch/issues/275, https://en.wikipedia.org/wiki/Tegra#Orin
  // Qualcomm now also in devtree...
  // TODO: Integrate this with SOC_EQ
  DT_EQ(dt, len, soc, "qcom,sc8280", "8cx Gen 3", SOC_SNAPD_SC8280XP, 5)
@@ -984,14 +1018,88 @@ struct system_on_chip* guess_soc_from_devtree(struct system_on_chip* soc) {
  DT_EQ(dt, len, soc, "fsl,imx8dxp", "i.MX 8DualXPlus", SOC_NXP_IMX8DXP, NA)
  DT_EQ(dt, len, soc, "fsl,imx8qxp", "i.MX 8QuadXPlus", SOC_NXP_IMX8QXP, NA)
  DT_EQ(dt, len, soc, "fsl,imx93",   "i.MX 93",         SOC_NXP_IMX93,   NA)  
-  // TODO: Add more Amlogic SoCs: https://elixir.bootlin.com/linux/v6.10.6/source/arch/arm64/boot/dts/amlogic
-  // https://github.com/Dr-Noob/cpufetch/issues/268
-  // https://www.amlogic.com/#Products/393/index.html
-  // https://wikimovel.com/index.php/Amlogic_A311D
-  DT_EQ(dt, len, soc, "amlogic,a311d", "A311D", SOC_AMLOGIC_A311D, 12)
+  // [1] https://elixir.bootlin.com/linux/v6.10.6/source/arch/arm64/boot/dts/amlogic
+  // [2] https://github.com/Dr-Noob/cpufetch/issues/268
+  // [3] https://www.amlogic.com/#Products/393/index.html
+  // [4] https://wikimovel.com
+  // [5] https://wiki.postmarketos.org/wiki/Amlogic_S905W/S905D/S905X/S905L/S805X/S805Y/S905Z
+  DT_EQ(dt, len, soc, "amlogic,a311d",  "A311D",  SOC_AMLOGIC_A311D,  12) // [1,2,3,4]
+  DT_EQ(dt, len, soc, "amlogic,a311d2", "A311D2", SOC_AMLOGIC_A311D2, 12) // [1,4]
+  DT_EQ(dt, len, soc, "amlogic,s905w",  "S905W",  SOC_AMLOGIC_S905W,  28) // [1,5]
+  DT_EQ(dt, len, soc, "amlogic,s905d",  "S905D",  SOC_AMLOGIC_S905D,  28) // [1,5]
+  DT_EQ(dt, len, soc, "amlogic,s905x",  "S905X",  SOC_AMLOGIC_S905X,  28) // [1,4,5]
+  DT_EQ(dt, len, soc, "amlogic,s805x",  "S805X",  SOC_AMLOGIC_S805X,  28) // [1,5]
+  // Marvell
+  // https://elixir.bootlin.com/linux/v6.10.6/source/arch/arm64/boot/dts/marvell
+  DT_EQ(dt, len, soc, "marvell,armada3700", "Armada 3700", SOC_MARVELL_A3700,  28) // http://wiki.espressobin.net/tiki-index.php?page=Armada+3700 (pdf), https://github.com/Dr-Noob/cpufetch/issues/279
+  DT_EQ(dt, len, soc, "marvell,armada3710", "Armada 3710", SOC_MARVELL_A3710,  28) // https://gzhls.at/blob/ldb/2/7/4/2/6eacf9661c5a2d20c4d7cd3328ffba47bfd6.pdf
+  DT_EQ(dt, len, soc, "marvell,armada3720", "Armada 3720", SOC_MARVELL_A3720,  28) // https://gzhls.at/blob/ldb/2/7/4/2/6eacf9661c5a2d20c4d7cd3328ffba47bfd6.pdf
+  DT_EQ(dt, len, soc, "marvell,armada7200", "Armada 7200", SOC_MARVELL_A7200,  28) // Assuming same manufacturing process as 7400
+  DT_EQ(dt, len, soc, "marvell,armada7400", "Armada 7400", SOC_MARVELL_A7400,  28) // https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-embedded-processors-armada-7040-product-brief-2017-12.pdf
+  DT_EQ(dt, len, soc, "marvell,armada8020", "Armada 8020", SOC_MARVELL_A8020,  28) // https://datasheet.datasheetarchive.com/originals/crawler/marvell.com/da7b6a997e49e9e93fa4b1f4cfbed71b.pdf
+  DT_EQ(dt, len, soc, "marvell,armada8040", "Armada 8040", SOC_MARVELL_A8040,  28) // https://www.verical.com/datasheet/marvell-technology-group-application-processors-and-soc-88f8040-a2-bvp4i160-6331367.pdf
+  DT_EQ(dt, len, soc, "marvell,cn9130",     "CN9130",      SOC_MARVELL_CN9130, NA) // https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-infrastructure-processors-octeon-tx2-cn913x-product-brief.pdf
+  DT_EQ(dt, len, soc, "marvell,cn9131",     "CN9131",      SOC_MARVELL_CN9131, NA) // https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-infrastructure-processors-octeon-tx2-cn913x-product-brief.pdf
+  DT_EQ(dt, len, soc, "marvell,cn9132",     "CN9132",      SOC_MARVELL_CN9132, NA) // https://www.marvell.com/content/dam/marvell/en/public-collateral/embedded-processors/marvell-infrastructure-processors-octeon-tx2-cn913x-product-brief.pdf
  DT_END(dt, len)
 }

+// This function is different from the rest guess_soc_from_xxx, which try infering
+// the exact SoC model by matching some string against a list of known values.
+// On the other hand, this function will just try to infer the SoC vendor first by
+// matching the device tree vendor name (i.e., the first value, before the comma).
+// If that is successfull, then it also fills in the SoC name using the string from
+// the device tree.
+// The critical difference is that this function does not need a LUT to fill in the
+// SoC, it just needs to find a known vendor. On the other hand, the detection is
+// less powerful since we cannot get the manufacturing process, and the SoC name will
+// come directly from the device tree, meaning that it will likely be less precise.
+struct system_on_chip* guess_raw_soc_from_devtree(struct system_on_chip* soc) {
+  int num_vendors;
+  struct devtree** dt_vendors = get_devtree_compatible_struct(&num_vendors);
+  if (dt_vendors == NULL) {
+    return soc;
+  }
+
+  typedef struct {
+    char* compatible;
+    VENDOR soc_vendor;
+  } devtreeToVendor;
+
+  // https://elixir.bootlin.com/linux/v6.10.6/source/arch/arm64/boot/dts
+  // grep -oR --color -E 'compatible = ".*"' <soc_vendor> | cut -d '=' -f2 | cut -d ',' -f1 | tr -d '"' | sort | uniq -c | sort
+  // - The following vendors are not included because they dont seem to be present in dts:
+  // SOC_VENDOR_(KIRIN, KUNPENG, GOOGLE, AMPERE).
+  // - The commented vendors are not included intentionally, because I prefer updating its LUT manually.
+  devtreeToVendor socFromDevtree[] = {
+    // {"qcom",       SOC_VENDOR_SNAPDRAGON},
+    // {"samsung",    SOC_VENDOR_EXYNOS},
+    // {"brcm",       SOC_VENDOR_BROADCOM},
+    // {"apple",      SOC_VENDOR_APPLE},
+    // {"rockchip",   SOC_VENDOR_ROCKCHIP},
+    // {"nvidia",     SOC_VENDOR_NVIDIA},
+    {"mediatek",   SOC_VENDOR_MEDIATEK},
+    {"fsl",        SOC_VENDOR_NXP     },
+    {"nxp",        SOC_VENDOR_NXP     },
+    {"amlogic",    SOC_VENDOR_AMLOGIC },
+    {"marvell",    SOC_VENDOR_MARVELL },
+    {NULL,         SOC_VENDOR_UNKNOWN }
+  };
+
+  int index = 0;
+  while (socFromDevtree[index].compatible != 0x0) {
+    for (int i=0; i < num_vendors; i++) {
+      if (strcmp(socFromDevtree[index].compatible, dt_vendors[i]->vendor) == 0) {
+        fill_soc_raw(soc, dt_vendors[i]->model, socFromDevtree[index].soc_vendor);
+      }
+    }
+    index++;
+  }
+
+  printWarn("guess_raw_soc_from_devtree: No device matched the list");
+  return soc;
+}
+
 struct system_on_chip* guess_soc_from_pci(struct system_on_chip* soc, struct cpuInfo* cpu) {
  struct pci_devices * pci = get_pci_devices();
  if (pci == NULL) {
@@ -1218,6 +1326,11 @@ struct system_on_chip* get_soc(struct cpuInfo* cpu) {
    if(soc->vendor == SOC_VENDOR_UNKNOWN) {
      soc = guess_soc_from_pci(soc, cpu);
    }
+    if (soc->vendor == SOC_VENDOR_UNKNOWN) {
+      // If we fall here it means all previous functions failed to detect the SoC.
+      // In such case, try with our last resort. If it also fails, we will just give up
+      soc = guess_raw_soc_from_devtree(soc);
+    }
  }
 #elif defined __APPLE__ || __MACH__
  soc = guess_soc_apple(soc);
@@ -1227,14 +1340,30 @@ struct system_on_chip* get_soc(struct cpuInfo* cpu) {
  else {
    return soc;
  }
-#endif // ifdef __linux__
+#endif

+#if defined _WIN32
+  // Use the first core to determine the SoC
+  char* processor_name_string = NULL;
+  unsigned long processor_name_string_len = 0;
+  if(!read_registry_hklm_sz("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", "ProcessorNameString", &processor_name_string, &processor_name_string_len)) {
+    printWarn("Failed to aquire SoC name from registery");
+    return soc;
+  }
+
+  soc->name = processor_name_string;
+  soc->raw_name = processor_name_string;
+  soc->vendor = try_match_soc_vendor_name(processor_name_string);
+  soc->model = SOC_MODEL_UNKNOWN;
+  soc->process = UNKNOWN;
+#else
  if(soc->model == SOC_MODEL_UNKNOWN) {
    // raw_name might not be NULL, but if we were unable to find
    // the exact SoC, just print "Unkwnown"
    soc->raw_name = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
    snprintf(soc->raw_name, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
  }
+#endif 

  return soc;
 }
--- a/src/arm/socs.h
+++ b/src/arm/socs.h
@@ -380,7 +380,15 @@ enum {
  SOC_GOOGLE_TENSOR_G2,
  SOC_GOOGLE_TENSOR_G3,
  // NVIDIA,
+  SOC_TEGRA_2,
+  SOC_TEGRA_3,
+  SOC_TEGRA_4,
+  SOC_TEGRA_K1,
+  SOC_TEGRA_K2,
  SOC_TEGRA_X1,
+  SOC_TEGRA_X2,
+  SOC_TEGRA_XAVIER,
+  SOC_TEGRA_ORIN,
  // ALTRA
  SOC_AMPERE_ALTRA,
  // NXP
@@ -394,6 +402,22 @@ enum {
  SOC_NXP_IMX93,
  // AMLOGIC
  SOC_AMLOGIC_A311D,
+  SOC_AMLOGIC_A311D2,
+  SOC_AMLOGIC_S905W,
+  SOC_AMLOGIC_S905D,
+  SOC_AMLOGIC_S905X,
+  SOC_AMLOGIC_S805X,
+  // MARVELL
+  SOC_MARVELL_A3700,
+  SOC_MARVELL_A3710,
+  SOC_MARVELL_A3720,
+  SOC_MARVELL_A7200,
+  SOC_MARVELL_A7400,
+  SOC_MARVELL_A8020,
+  SOC_MARVELL_A8040,
+  SOC_MARVELL_CN9130,
+  SOC_MARVELL_CN9131,
+  SOC_MARVELL_CN9132,
  // UNKNOWN
  SOC_MODEL_UNKNOWN
 };
@@ -409,10 +433,11 @@ inline static VENDOR get_soc_vendor_from_soc(SOC soc) {
  else if(soc >= SOC_ALLWINNER_A10 && soc <= SOC_ALLWINNER_R328) return SOC_VENDOR_ALLWINNER;
  else if(soc >= SOC_ROCKCHIP_3288 && soc <= SOC_ROCKCHIP_3588) return SOC_VENDOR_ROCKCHIP;
  else if(soc >= SOC_GOOGLE_TENSOR && soc <= SOC_GOOGLE_TENSOR_G3) return SOC_VENDOR_GOOGLE;
-  else if(soc >= SOC_TEGRA_X1 && soc <= SOC_TEGRA_X1) return SOC_VENDOR_NVIDIA;
+  else if(soc >= SOC_TEGRA_2 && soc <= SOC_TEGRA_ORIN) return SOC_VENDOR_NVIDIA;
  else if(soc >= SOC_AMPERE_ALTRA && soc <= SOC_AMPERE_ALTRA) return SOC_VENDOR_AMPERE;
  else if(soc >= SOC_NXP_IMX8QM && soc <= SOC_NXP_IMX93) return SOC_VENDOR_NXP;
-  else if(soc >= SOC_AMLOGIC_A311D && soc <= SOC_AMLOGIC_A311D) return SOC_VENDOR_AMLOGIC;
+  else if(soc >= SOC_AMLOGIC_A311D && soc <= SOC_AMLOGIC_S805X) return SOC_VENDOR_AMLOGIC;
+  else if(soc >= SOC_MARVELL_A3700 && soc <= SOC_MARVELL_CN9132) return SOC_VENDOR_MARVELL;
  return SOC_VENDOR_UNKNOWN;
 }

--- a/src/common/ascii.h
+++ b/src/common/ascii.h
@@ -433,6 +433,18 @@ $C1#########.###  ##  ##  ##  ##   ###    ######   ##   ###  \
 $C1                                          ###             \
 $C1                                       ###                "

+#define ASCII_MARVELL \
+"$C1                       ...........          ........... \
+$C1                   .###          .       .##          . \
+$C1                 .#####          .      ####          . \
+$C1                #######          .   #######          . \
+$C1             .#########__________. #########__________. \
+$C1          .###########|__________|#########|__________| \
+$C1        ############   ______############   __________  \
+$C1     .#########       |__________|######   |__________| \
+$C1   ###########         ___###########       __________  \
+$C1.##########           |__________|         |__________| "
+
 // --------------------- LONG LOGOS ------------------------- //
 #define ASCII_AMD_L \
 "$C1                                                              \
@@ -611,6 +623,7 @@ asciiL logo_nvidia      = { ASCII_NVIDIA,      45, 19, false, {C_FG_GREEN, C_FG_
 asciiL logo_ampere      = { ASCII_AMPERE,      50, 17, false, {C_FG_RED},                                     {C_FG_WHITE,   C_FG_RED}     };
 asciiL logo_nxp         = { ASCII_NXP,         55,  8, false, {C_FG_YELLOW, C_FG_CYAN, C_FG_GREEN},           {C_FG_CYAN,    C_FG_WHITE}   };
 asciiL logo_amlogic     = { ASCII_AMLOGIC,     58,  8, false, {C_FG_BLUE},                                    {C_FG_BLUE,    C_FG_B_WHITE} };
+asciiL logo_marvell     = { ASCII_MARVELL,     56, 10, false, {C_FG_B_BLACK},                                 {C_FG_B_BLACK, C_FG_B_WHITE} };

 // Long variants          | ----------------------------------------------------------------------------------------------------------------|
 asciiL logo_amd_l       = { ASCII_AMD_L,       62, 19, true,  {C_BG_WHITE, C_BG_GREEN},                       {C_FG_WHITE, C_FG_GREEN}     };
--- a/src/common/cpu.c
+++ b/src/common/cpu.c
@@ -34,6 +34,12 @@ int64_t get_freq(struct frequency* freq) {
  return freq->max;
 }

+#ifdef ARCH_X86
+int64_t get_freq_pp(struct frequency* freq) {
+  return freq->max_pp;
+}
+#endif
+
 #if defined(ARCH_X86) || defined(ARCH_PPC)
 char* get_str_cpu_name(struct cpuInfo* cpu, bool fcpuname) {
  #ifdef ARCH_X86
--- a/src/common/cpu.h
+++ b/src/common/cpu.h
@@ -60,6 +60,11 @@ struct frequency {
  int32_t max;
  // Indicates if max frequency was measured
  bool measured;
+#ifdef ARCH_X86
+  // Max frequency when running vectorized code.
+  // Used only for peak performance computation.
+  int32_t max_pp;
+#endif
 };

 struct hypervisor {
@@ -188,6 +193,8 @@ struct cpuInfo {
 #ifdef ARCH_X86
  // The index of the first core in the module
  uint32_t first_core_id;
+  // The index of this module
+  uint32_t module_id;
 #endif
 #endif
 };
@@ -200,6 +207,9 @@ uint32_t get_nsockets(struct topology* topo);

 VENDOR get_cpu_vendor(struct cpuInfo* cpu);
 int64_t get_freq(struct frequency* freq);
+#ifdef ARCH_X86
+int64_t get_freq_pp(struct frequency* freq);
+#endif

 char* get_str_aes(struct cpuInfo* cpu);
 char* get_str_sha(struct cpuInfo* cpu);
--- a/src/common/printer.c
+++ b/src/common/printer.c
@@ -395,6 +395,8 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
    art->art = &logo_nxp;
  else if(art->vendor == SOC_VENDOR_AMLOGIC)
    art->art = &logo_amlogic;
+  else if(art->vendor == SOC_VENDOR_MARVELL)
+    art->art = &logo_marvell;
  else if(art->vendor == SOC_VENDOR_NVIDIA)
    art->art = choose_ascii_art_aux(&logo_nvidia_l, &logo_nvidia, term, lf);
  else {
@@ -885,7 +887,13 @@ bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
  char* soc_name = get_soc_name(cpu->soc);
  char* features = get_str_features(cpu);
  setAttribute(art, ATTRIBUTE_SOC, soc_name);
+
+  // Currently no reliable way to identify the specific SoC on Windows
+  // https://github.com/Dr-Noob/cpufetch/pull/273
+  // Hide manufacturing process
+#if !defined(_WIN32)
  setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
+#endif

  if(cpu->num_cpus == 1) {
    char* uarch = get_str_uarch(cpu);
--- a/src/common/soc.c
+++ b/src/common/soc.c
@@ -24,6 +24,7 @@ static char* soc_trademark_string[] = {
  [SOC_VENDOR_AMPERE]     = "Ampere ",
  [SOC_VENDOR_NXP]        = "NXP ",
  [SOC_VENDOR_AMLOGIC]    = "Amlogic ",
+  [SOC_VENDOR_MARVELL]    = "Marvell",
  // RISC-V
  [SOC_VENDOR_SIFIVE]     = "SiFive ",
  [SOC_VENDOR_STARFIVE]   = "StarFive ",
@@ -78,6 +79,28 @@ void fill_soc(struct system_on_chip* soc, char* soc_name, SOC soc_model, int32_t
  }
 }

+void fill_soc_raw(struct system_on_chip* soc, char* soc_name, VENDOR vendor) {
+  soc->model = SOC_MODEL_UNKNOWN;
+  soc->vendor = vendor;
+  soc->process = UNKNOWN;
+
+  int len = strlen(soc_name) + strlen(soc_trademark_string[soc->vendor]) + 1;
+  soc->name = emalloc(sizeof(char) * len);
+  sprintf(soc->name, "%s%s", soc_trademark_string[soc->vendor], soc_name);
+}
+
+#ifdef _WIN32
+VENDOR try_match_soc_vendor_name(char* vendor_name)
+{
+  for(size_t i=1; i < sizeof(soc_trademark_string)/sizeof(soc_trademark_string[0]); i++) {
+    if(strstr(vendor_name, soc_trademark_string[i]) != NULL) {
+      return i;
+    }
+  }
+  return SOC_VENDOR_UNKNOWN;
+}
+#endif
+
 bool match_soc(struct system_on_chip* soc, char* raw_name, char* expected_name, char* soc_name, SOC soc_model, int32_t process) {
  int len1 = strlen(raw_name);
  int len2 = strlen(expected_name);
--- a/src/common/soc.h
+++ b/src/common/soc.h
@@ -28,6 +28,7 @@ enum {
  SOC_VENDOR_AMPERE,
  SOC_VENDOR_NXP,
  SOC_VENDOR_AMLOGIC,
+  SOC_VENDOR_MARVELL,
  // RISC-V
  SOC_VENDOR_SIFIVE,
  SOC_VENDOR_STARFIVE,
@@ -50,6 +51,10 @@ VENDOR get_soc_vendor(struct system_on_chip* soc);
 bool match_soc(struct system_on_chip* soc, char* raw_name, char* expected_name, char* soc_name, SOC soc_model, int32_t process);
 char* get_str_process(struct system_on_chip* soc);
 void fill_soc(struct system_on_chip* soc, char* soc_name, SOC soc_model, int32_t process);
+void fill_soc_raw(struct system_on_chip* soc, char* soc_name, VENDOR vendor);
+#ifdef _WIN32
+VENDOR try_match_soc_vendor_name(char* vendor_name);
+#endif

 #define SOC_START if (false) {}
 #define SOC_EQ(raw_name, expected_name, soc_name, soc_model, soc, process) \
--- a/src/common/udev.c
+++ b/src/common/udev.c
@@ -361,3 +361,49 @@ char* get_devtree_compatible(int *filelen) {

  return buf;
 }
+
+// TODO:
+// Returns a list of strings containing the vendors of the compatible
+// file from the device tree. In this context, vendor refers to the first
+// string of every entry. For instance, given a compatible file with:
+// "str1,foo1.str2,foo2" (where . denotes the NULL byte, i.e., the separator),
+// then this function will return a list with str1,str2.
+struct devtree** get_devtree_compatible_struct(int *num_vendors_ptr) {
+  int len;
+  char* dt = get_devtree_compatible(&len);
+  if (dt == NULL) {
+    return NULL;
+  }
+
+  int num_vendors = 0;
+  char* ptr = dt;
+
+  for (int ptrpos = 0; ptrpos < len; ptrpos = (ptr-dt)) {
+    ptr = memchr(ptr, '\0', len)+1;
+    num_vendors++;
+  }
+
+  struct devtree** vendors = emalloc(sizeof(struct devtree *) * num_vendors);
+  ptr = dt;
+
+  for (int ptrpos = 0, i = 0; ptrpos < len; ptrpos = (ptr-dt), i++) {
+    char* comma_ptr = strstr(ptr, ",")-1;
+    char* end_ptr = memchr(comma_ptr, '\0', ptrpos - len);
+
+    // TODO check NULL
+    int vendor_str_len = (comma_ptr-ptr)+1;
+    int model_str_len = (end_ptr-comma_ptr)+1;
+
+    vendors[i] = emalloc(sizeof(struct devtree));
+    vendors[i]->vendor = ecalloc(vendor_str_len, sizeof(char));
+    vendors[i]->model = ecalloc(model_str_len, sizeof(char));
+
+    strncpy(vendors[i]->vendor, ptr, vendor_str_len);
+    strncpy(vendors[i]->model, comma_ptr, model_str_len);
+
+    ptr = memchr(ptr, '\0', len)+1;
+  }
+
+  *num_vendors_ptr = num_vendors;
+  return vendors;
+}
--- a/src/common/udev.h
+++ b/src/common/udev.h
@@ -31,6 +31,11 @@
 #define _PATH_CACHE_MAX_LEN     200
 #define _PATH_PACKAGE_MAX_LEN   200

+struct devtree {
+  char* vendor;
+  char* model;
+};
+
 char* read_file(char* path, int* len);
 long get_max_freq_from_file(uint32_t core);
 long get_min_freq_from_file(uint32_t core);
@@ -44,5 +49,6 @@ int get_ncores_from_cpuinfo(void);
 char* get_field_from_cpuinfo(char* CPUINFO_FIELD);
 bool is_devtree_compatible(char* str);
 char* get_devtree_compatible(int *filelen);
+struct devtree** get_devtree_compatible_struct(int *num_vendors);

 #endif
--- a/src/riscv/riscv.c
+++ b/src/riscv/riscv.c
@@ -62,6 +62,7 @@ int parse_multi_letter_extension(struct extensions* ext, char* e) {
  SET_ISA_EXT_MAP("zicbom",      RISCV_ISA_EXT_ZICBOM)
  SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE)
  SET_ISA_EXT_MAP("svnapot",     RISCV_ISA_EXT_SVNAPOT)
+  SET_ISA_EXT_MAP("zicbop",      RISCV_ISA_EXT_ZICBOP)
  SET_ISA_EXT_MAP("zicboz",      RISCV_ISA_EXT_ZICBOZ)
  SET_ISA_EXT_MAP("smaia",       RISCV_ISA_EXT_SMAIA)
  SET_ISA_EXT_MAP("ssaia",       RISCV_ISA_EXT_SSAIA)
--- a/src/riscv/riscv.h
+++ b/src/riscv/riscv.h
@@ -23,6 +23,7 @@ enum riscv_isa_ext_id {
  RISCV_ISA_EXT_ZICBOM,
  RISCV_ISA_EXT_ZIHINTPAUSE,
  RISCV_ISA_EXT_SVNAPOT,
+  RISCV_ISA_EXT_ZICBOP,
  RISCV_ISA_EXT_ZICBOZ,
  RISCV_ISA_EXT_SMAIA,
  RISCV_ISA_EXT_SSAIA,
@@ -37,6 +38,7 @@ enum riscv_isa_ext_id {

 // https://five-embeddev.com/riscv-isa-manual/latest/preface.html#preface
 // https://en.wikichip.org/wiki/risc-v/standard_extensions
+// (Zicbop) https://github.com/riscv/riscv-CMOs/blob/master/cmobase/Zicbop.adoc
 // Included all except for G
 static const struct extension extension_list[] = {
  { 'i' - 'a', "(I) Integer Instruction Set" },
@@ -64,6 +66,7 @@ static const struct extension extension_list[] = {
  { RISCV_ISA_EXT_ZIHINTPAUSE, "(Zihintpause) Pause Hint" },
  { RISCV_ISA_EXT_SVNAPOT,     "(Svnapot) Naturally Aligned Power of Two Pages" },
  { RISCV_ISA_EXT_ZICBOZ,      "(Zicboz) Cache Block Zero Operations" },
+  { RISCV_ISA_EXT_ZICBOP,      "(Zicbop) Cache Block Prefetch Operations" },
  { RISCV_ISA_EXT_SMAIA,       "(Smaia) Advanced Interrupt Architecture" },
  { RISCV_ISA_EXT_SSAIA,       "(Ssaia) Advanced Interrupt Architecture" },
  { RISCV_ISA_EXT_ZBA,         "(Zba) Address Generation" },
--- a/src/x86/cpuid.c
+++ b/src/x86/cpuid.c
@@ -210,18 +210,14 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {

  for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
    struct topology* topo = ptr->topo;
-    int64_t max_freq = get_freq(ptr->freq);
+    int64_t freq = get_freq(ptr->freq);

-    int64_t freq;
  #ifdef __linux__
    if(accurate_pp)
-      freq = measure_frequency(ptr);
-    else
-      freq = max_freq;
+      freq = get_freq_pp(ptr->freq);
  #else
    // Silence compiler warning
    (void)(accurate_pp);
-    freq = max_freq;
  #endif

    //First, check we have consistent data
@@ -450,6 +446,23 @@ int32_t get_core_type(void) {
  }
 }

+#ifdef __linux__
+// Gets the max frequency for estimating the peak performance,
+// filling in the passed cpuInfo parameter with this information.
+void fill_frequency_info_pp(struct cpuInfo* cpu) {
+  int32_t unused;
+  int32_t *max_freq_pp_vec = malloc(sizeof(int32_t) * cpu->num_cpus);
+  struct cpuInfo* ptr = cpu;
+
+  for (uint32_t i=0; i < cpu->num_cpus; i++) {
+    set_cpu_module(i, cpu->num_cpus, &unused);
+
+    ptr->freq->max_pp = measure_frequency(ptr, max_freq_pp_vec);
+    ptr = ptr->next_cpu;
+  }
+}
+#endif
+
 struct cpuInfo* get_cpu_info(void) {
  struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
  cpu->peak_performance = -1;
@@ -546,6 +559,7 @@ struct cpuInfo* get_cpu_info(void) {
      ptr->core_type = get_core_type();
    }
    ptr->first_core_id = first_core;
+    ptr->module_id = i;
    ptr->feat = get_features_info(ptr);

    ptr->arch = get_cpu_uarch(ptr);
@@ -570,6 +584,13 @@ struct cpuInfo* get_cpu_info(void) {
    if(ptr->topo == NULL) return cpu;
  }

+#ifdef __linux__
+  // If accurate_pp is requested, we need to get the max frequency
+  // after fetching the topology for all CPU modules, since the topology
+  // is required by fill_frequency_info_pp
+  if (accurate_pp()) fill_frequency_info_pp(cpu);
+#endif
+
  cpu->peak_performance = get_peak_performance(cpu, accurate_pp());

  return cpu;
@@ -1005,6 +1026,7 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
    }
  #endif

+  freq->max_pp = UNKNOWN_DATA;
  return freq;
 }

--- a/src/x86/freq/freq.c
+++ b/src/x86/freq/freq.c
@@ -21,9 +21,12 @@
 #define FREQ_VECTOR_SIZE         1<<16

 struct freq_thread {
+  // Inputs
+  struct cpuInfo* cpu;
  bool end;
  bool measure;
-  double freq;
+  // Output
+  int32_t *max_pp;
 };

 double vector_average_harmonic(double* v, int len) {
@@ -48,6 +51,7 @@ void* measure_freq(void *freq_ptr) {
  char* line = NULL;
  size_t len = 0;
  ssize_t read;
+  struct cpuInfo* cpu = freq->cpu;

  int v = 0;
  double* freq_vector = malloc(sizeof(double) * FREQ_VECTOR_SIZE);
@@ -76,18 +80,43 @@ void* measure_freq(void *freq_ptr) {
    sleep_ms(500);
  }

-  freq->freq = vector_average_harmonic(freq_vector, v);
-  printWarn("AVX2 measured freq=%f\n", freq->freq);
+  if (cpu->hybrid_flag) {
+    // We have an heterogeneous architecture. After measuring the
+    // frequency for all cores, we now need to compute the average
+    // independently for each CPU module.
+    struct cpuInfo* ptr = cpu;
+    double* freq_vector_ptr = freq_vector;
+
+    for (int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
+      freq->max_pp[i] = vector_average_harmonic(freq_vector_ptr, ptr->topo->total_cores_module);
+      printWarn("AVX2 measured freq=%d (module %d)", freq->max_pp[i], i);
+
+      freq_vector_ptr = freq_vector_ptr + ptr->topo->total_cores_module;
+    }
+  }
+  else {
+    freq->max_pp[0] = vector_average_harmonic(freq_vector, v);
+    printWarn("AVX2 measured freq=%d\n", freq->max_pp[0]);
+  }

  return NULL;
 }

-int64_t measure_frequency(struct cpuInfo* cpu) {
+int32_t measure_frequency(struct cpuInfo* cpu, int32_t *max_freq_pp_vec) {
+  if (cpu->hybrid_flag && cpu->module_id > 0) {
+    // We have a hybrid architecture and we have already
+    // measured the frequency for this module in a previous
+    // call to this function, so now just return it.
+    return max_freq_pp_vec[cpu->module_id];
+  }
+
  int ret;
  int num_spaces;
  struct freq_thread* freq_struct = malloc(sizeof(struct freq_thread));
  freq_struct->end = false;
  freq_struct->measure = false;
+  freq_struct->cpu = cpu;
+  freq_struct->max_pp = max_freq_pp_vec;

  void* (*compute_function)(void*);

@@ -159,5 +188,5 @@ int64_t measure_frequency(struct cpuInfo* cpu) {
  }

  printf("\r%*c", num_spaces, ' ');
-  return freq_struct->freq;
+  return max_freq_pp_vec[0];
 }
--- a/src/x86/freq/freq.h
+++ b/src/x86/freq/freq.h
@@ -8,6 +8,6 @@
 #define MEASURE_TIME_SECONDS         5
 #define LOOP_ITERS           100000000

-int64_t measure_frequency(struct cpuInfo* cpu);
+int32_t measure_frequency(struct cpuInfo* cpu, int32_t *max_freq_pp_vec);

 #endif
--- a/src/x86/uarch.c
+++ b/src/x86/uarch.c
@@ -93,6 +93,7 @@ enum {
  UARCH_CEDAR_MILL,
  UARCH_ITANIUM2,
  UARCH_ICE_LAKE,
+  UARCH_SAPPHIRE_RAPIDS,
  UARCH_TIGER_LAKE,
  UARCH_ALDER_LAKE,
  UARCH_RAPTOR_LAKE,
@@ -119,7 +120,9 @@ enum {
  UARCH_ZEN3,
  UARCH_ZEN3_PLUS,
  UARCH_ZEN4,
-  UARCH_ZEN4C
+  UARCH_ZEN4C,
+  UARCH_ZEN5,
+  UARCH_ZEN5C,
 };

 struct uarch {
@@ -253,6 +256,7 @@ struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, u
  // CHECK_UARCH(arch, 0,  6,  8, 14, 10, ...) It is not possible to determine uarch only from CPUID dump (can be Kaby Lake R or Coffee Lake U)
  CHECK_UARCH(arch, 0,  6,  8, 14, 11, "Whiskey Lake",      UARCH_WHISKEY_LAKE,     14) // wikichip
  CHECK_UARCH(arch, 0,  6,  8, 14, 12, "Comet Lake",        UARCH_COMET_LAKE,       14) // wikichip
+  CHECK_UARCH(arch, 0,  6,  8, 15,  8, "Sapphire Rapids",   UARCH_SAPPHIRE_RAPIDS,   7) // wikichip
  CHECK_UARCH(arch, 0,  6,  9,  6, NA, "Tremont",           UARCH_TREMONT,          10) // LX*
  CHECK_UARCH(arch, 0,  6,  9,  7, NA, "Alder Lake",        UARCH_ALDER_LAKE,       10) // instlatx64 (Alder Lake-S)
  CHECK_UARCH(arch, 0,  6,  9, 10, NA, "Alder Lake",        UARCH_ALDER_LAKE,       10) // instlatx64 (Alder Lake-P)
@@ -410,6 +414,12 @@ struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uin
  CHECK_UARCH(arch, 10, 15,  8, NA, NA, "Zen 4",       UARCH_ZEN4,         5) // instlatx64 (AMD MI300C)
  CHECK_UARCH(arch, 10, 15,  9, NA, NA, "Zen 4",       UARCH_ZEN4,         5) // instlatx64 (AMD MI300A)
  CHECK_UARCH(arch, 10, 15, 10, NA, NA, "Zen 4c",      UARCH_ZEN4C,        5) // instlatx64
+  CHECK_UARCH(arch, 11, 15,  0, NA, NA, "Zen 5",       UARCH_ZEN5,         4) // Turin/EPYC (instlatx64)
+  CHECK_UARCH(arch, 11, 15,  1, NA, NA, "Zen 5c",      UARCH_ZEN5C,        3) // Zen5c EPYC (instlatx64, https://en.wikipedia.org/wiki/Zen_5#cite_note-10)
+  CHECK_UARCH(arch, 11, 15,  2, NA, NA, "Zen 5",       UARCH_ZEN5,         4) // Strix Point (instlatx64)
+  CHECK_UARCH(arch, 11, 15,  4, NA, NA, "Zen 5",       UARCH_ZEN5,         4) // Granite Ridge (instlatx64)
+  CHECK_UARCH(arch, 11, 15,  6, NA, NA, "Zen 5",       UARCH_ZEN5,         4) // Krackan Point (instlatx64)
+  CHECK_UARCH(arch, 11, 15,  7, NA, NA, "Zen 5",       UARCH_ZEN5,         4) // Strix Halo (instlatx64)
  UARCH_END

  return arch;
@@ -552,6 +562,8 @@ char* infer_cpu_name_from_uarch(struct uarch* arch) {
 }

 bool vpus_are_AVX512(struct cpuInfo* cpu) {
+  // Zen5 actually has 2 x AVX512 units
+  // https://www.anandtech.com/show/21469/amd-details-ryzen-ai-300-series-for-mobile-strix-point-with-rdna-35-igpu-xdna-2-npu
  return cpu->arch->uarch != UARCH_ICE_LAKE &&
         cpu->arch->uarch != UARCH_TIGER_LAKE &&
         cpu->arch->uarch != UARCH_ZEN4 &&
@@ -581,6 +593,7 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
      case UARCH_KNIGHTS_LANDING:
      case UARCH_KNIGHTS_MILL:

+      case UARCH_SAPPHIRE_RAPIDS:
      case UARCH_ICE_LAKE:
      case UARCH_TIGER_LAKE:
      case UARCH_ALDER_LAKE:
@@ -592,6 +605,8 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
      case UARCH_ZEN3_PLUS:
      case UARCH_ZEN4:
      case UARCH_ZEN4C:
+      case UARCH_ZEN5:
+      case UARCH_ZEN5C:
        return 2;
      default:
        return 1;
Author	SHA1	Message	Date
Dr-Noob	51dd89b005	WIP	2024-09-17 08:52:01 +01:00
Dr-Noob	ef1bfa5cc9	WIP	2024-09-17 08:49:51 +01:00
Dr-Noob	2e2e660b97	[v1.06][ARM] Add more NVIDIA SoCs	2024-09-12 08:08:53 +01:00
Dr-Noob	fbd50822cf	[v1.06][ARM] Add more Marvell SoCs	2024-09-12 07:51:12 +01:00
Dr-Noob	bc8a779de6	[v1.06][ARM] Add more Amlogic SoCs	2024-09-11 18:38:56 +01:00
Dr-Noob	995ebc6736	[v1.06][RISCV] Add zicbop multi-letter extension (fixes #285 )	2024-09-11 07:58:12 +01:00
Dr-Noob	ab43a11ef2	[v1.06][X86] Fix accurate-pp in hybrid architectures (fixes #169 ) Overview of changes: - Adds field max_pp in frequency struct to hold the max freq for peak-performance estimation. - Instead of getting the max frequency in get_peak_performance, we get it in get_cpu_info (more natural). - Adds fill_frequency_info_pp which fills the max_pp of the passed cpu by calling measure_frequency. The approach is to call measure_frequency with a vector where the max frequencies are stored. Then, the first time measure_frequency is called, the frequency is measured while running all the cores, and the max frequency is computed per module (e.g., in the case of 2 modules, we would compute the freq for the first and for the second module), and saved into this vector. Subsequent calls to measure_frequency will just read the corresponding value for the vector. In other words, the frequency is only measured once for the whole CPU.	2024-09-10 22:43:23 +01:00
Wunk	edbfc9722e	[v1.06][ARM] Add Windows on Arm support (#273 )	2024-09-10 09:40:46 +02:00
Dr-Noob	57bbe2de4f	[v1.06][X86] Add Sapphire Rapids uarch (#281 )	2024-09-10 08:32:18 +01:00
Dr-Noob	278efb75c9	[v1.06][ARM] Add support for Marvell SoC (#279 )	2024-09-10 07:41:11 +01:00
Dr-Noob	343150e516	[v1.06][ARM] Add Tegra Orin (#275 )	2024-09-09 08:19:18 +01:00
Dr-Noob	1e2c7e565c	[v1.06][ARM] Add SC8280XP (on device tree) (#272 )	2024-09-09 07:11:57 +01:00
Dr-Noob	977c35a9af	[v1.06][X86] Add Zen5 uarch	2024-09-05 20:14:52 +01:00