Compare commits

...

29 Commits
v1.02 ... v1.03

Author SHA1 Message Date
Dr-Noob
49941b1717 [v1.03] Bump version 2023-01-05 11:17:49 +01:00
Dr-Noob
a4c0bb1aae [v1.02] Merge Alder Lake into master branch 2023-01-05 11:17:16 +01:00
Dr-Noob
ea29507b62 [v1.02] Added basic support for Zen4 2022-12-03 16:29:53 +00:00
Dr-Noob
b2aa8194c6 [v1.02][x86] Detect and print core type in ADL 2022-12-02 21:25:30 +00:00
Dr-Noob
d879b06d08 [v1.02][x86] Fix printer for non hybrid architectures 2022-12-02 20:53:40 +00:00
Dr-Noob
6cc18027db [v1.02][x86] ADL has a shared L3, not one L3 per core type 2022-12-02 20:39:49 +00:00
Dr-Noob
77510c260a [v1.02][x86] Small fixes to hybrid core detection 2022-11-05 18:28:10 +00:00
Dr-Noob
1eb1a5246e [v1.02][x86] Extending peakperf computation to hybid cores 2022-11-05 18:17:38 +00:00
Dr-Noob
cec91a1e4d [v1.02][x86] Adding support printing Intel hybrid CPUs 2022-11-05 17:49:43 +00:00
Dr-Noob
ff5166ea2e [v1.02][x86] Adding support for topology detection of hybrid cores 2022-11-05 17:48:20 +00:00
Dr-Noob
051d48b7d1 [v1.02][x86] Suport for detecting hybrid_flag 2022-10-20 20:49:10 +00:00
Dr-Noob
e91eef3e65 [v1.02][x86] Split feature detection into a separate function 2022-10-20 20:48:22 +00:00
Dr-Noob
de24d86cd6 [v1.02] Extended get_num_caches_from_files to support maps with commas, should fix #152 2022-09-30 19:44:14 +02:00
Dr-Noob
4b1a087b64 [v1.02][ARM] Kirin SoCs seem to start with hi, not Hi, as reported in #157 2022-09-23 19:36:05 +02:00
Dr-Noob
7eb856ae84 [v1.02] Merge fixes from bugfix 2022-09-23 19:20:30 +02:00
Dr-Noob
65366abe04 [v1.02][ARM] Fixed wrong check for Apple CPU 2022-09-19 11:33:14 +02:00
Dr-Noob
190e5daace [v1.02] Merge master and bugfix2 branches 2022-09-19 11:27:03 +02:00
Dr-Noob
9f7204d43d [v1.02][ARM] Updating M1/M2 peak performance calculations according to the discussion in #155 2022-09-14 11:08:36 +02:00
Dr-Noob
87961144d2 [v1.02][X86] Add Zen3+ uarch 2022-09-09 09:02:26 +02:00
Dr-Noob
f4565cb937 [v1.02] Ignore extension warnings in ppc 2022-09-07 08:18:16 +02:00
Dr-Noob
61a1ad8a2b [v1.02] Merge support for Apple M2 2022-09-06 08:12:44 +02:00
Dr-Noob
ecce0354e5 [v1.02][ARM] Fixed M2 bug, support for M2 should now be complete 2022-09-05 18:17:15 +02:00
Dr-Noob
a955451937 [v1.02][X86] Add Steam Deck CPU as requested in #147 2022-09-05 10:39:06 +02:00
Dr-Noob
db21931118 [v1.02][X86] Fix Coffee Lake spelling 2022-09-05 10:28:07 +02:00
Dr-Noob
71a9308bed [v1.02][X86] Fixed uarch misclassification as noted by #149 2022-09-05 10:19:48 +02:00
Dr-Noob
dfec2a65d2 [v1.02][ARM] Experimental support for M2 2022-09-05 10:02:26 +02:00
Dr-Noob
b319b52952 [v1.02] Use a different name for ifdef; the other might be already defined! 2022-09-05 08:41:19 +02:00
Dr-Noob
758be60967 [v1.02] Remove wrong check in udev 2022-09-05 08:24:22 +02:00
Dr-Noob
52ba038527 [v1.02][ARM] Add support for M1 Ultra 2022-05-25 22:11:48 +01:00
16 changed files with 627 additions and 213 deletions

View File

@@ -28,7 +28,7 @@ ifneq ($(OS),Windows_NT)
SRC_DIR=src/ppc/ SRC_DIR=src/ppc/
SOURCE += $(COMMON_SRC) $(SRC_DIR)ppc.c $(SRC_DIR)uarch.c $(SRC_DIR)udev.c SOURCE += $(COMMON_SRC) $(SRC_DIR)ppc.c $(SRC_DIR)uarch.c $(SRC_DIR)udev.c
HEADERS += $(COMMON_HDR) $(SRC_DIR)ppc.h $(SRC_DIR)uarch.h $(SRC_DIR)udev.c HEADERS += $(COMMON_HDR) $(SRC_DIR)ppc.h $(SRC_DIR)uarch.h $(SRC_DIR)udev.c
CFLAGS += -DARCH_PPC -std=gnu99 -fstack-protector-all CFLAGS += -DARCH_PPC -std=gnu99 -fstack-protector-all -Wno-language-extension-token
else ifeq ($(arch), $(filter $(arch), arm aarch64_be aarch64 arm64 armv8b armv8l armv7l armv6l)) else ifeq ($(arch), $(filter $(arch), arm aarch64_be aarch64 arm64 armv8b armv8l armv7l armv6l))
SRC_DIR=src/arm/ SRC_DIR=src/arm/
SOURCE += $(COMMON_SRC) $(SRC_DIR)midr.c $(SRC_DIR)uarch.c $(SRC_DIR)soc.c $(SRC_DIR)udev.c SOURCE += $(COMMON_SRC) $(SRC_DIR)midr.c $(SRC_DIR)uarch.c $(SRC_DIR)soc.c $(SRC_DIR)udev.c

View File

@@ -81,12 +81,23 @@ int64_t get_peak_performance(struct cpuInfo* cpu) {
} }
int64_t flops = 0; int64_t flops = 0;
ptr = cpu; ptr = cpu;
if(cpu->soc->soc_vendor == SOC_VENDOR_APPLE) {
// Special case for M1/M2
// First we find the E cores, then the P
// M1 have 2 (E cores) or 4 (P cores) FMA units
// Source: https://dougallj.github.io/applecpu/firestorm-simd.html
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 2;
ptr = ptr->next_cpu;
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 4;
}
else {
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000); flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
} }
if(cpu->feat->NEON) flops = flops * 4; if(cpu->feat->NEON) flops = flops * 4;
}
return flops; return flops;
} }
@@ -273,11 +284,46 @@ void fill_cpu_info_firestorm_icestorm(struct cpuInfo* cpu, uint32_t pcores, uint
fire->next_cpu = NULL; fire->next_cpu = NULL;
} }
void fill_cpu_info_avalanche_blizzard(struct cpuInfo* cpu, uint32_t pcores, uint32_t ecores) {
// 1. Fill BLIZZARD
struct cpuInfo* bli = cpu;
bli->midr = MIDR_APPLE_M2_BLIZZARD;
bli->arch = get_uarch_from_midr(bli->midr, bli);
bli->cach = get_cache_info(bli);
bli->feat = get_features_info();
bli->topo = malloc(sizeof(struct topology));
bli->topo->cach = bli->cach;
bli->topo->total_cores = pcores;
bli->freq = malloc(sizeof(struct frequency));
bli->freq->base = UNKNOWN_DATA;
bli->freq->max = 2800;
bli->hv = malloc(sizeof(struct hypervisor));
bli->hv->present = false;
bli->next_cpu = malloc(sizeof(struct cpuInfo));
// 2. Fill AVALANCHE
struct cpuInfo* ava = bli->next_cpu;
ava->midr = MIDR_APPLE_M2_AVALANCHE;
ava->arch = get_uarch_from_midr(ava->midr, ava);
ava->cach = get_cache_info(ava);
ava->feat = get_features_info();
ava->topo = malloc(sizeof(struct topology));
ava->topo->cach = ava->cach;
ava->topo->total_cores = ecores;
ava->freq = malloc(sizeof(struct frequency));
ava->freq->base = UNKNOWN_DATA;
ava->freq->max = 3500;
ava->hv = malloc(sizeof(struct hypervisor));
ava->hv->present = false;
ava->next_cpu = NULL;
}
struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) { struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) {
uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily"); uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
// Manually fill the cpuInfo assuming that the CPU // Manually fill the cpuInfo assuming that
// is a ARM_FIRESTORM_ICESTORM (Apple M1) // the CPU is an Apple M1/M2
if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) { if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) {
cpu->num_cpus = 2; cpu->num_cpus = 2;
// Now detect the M1 version // Now detect the M1 version
@@ -287,13 +333,20 @@ struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) {
fill_cpu_info_firestorm_icestorm(cpu, 4, 4); fill_cpu_info_firestorm_icestorm(cpu, 4, 4);
} }
else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS || cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) { else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS || cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) {
// Apple M1 Pro/Max. Detect number of cores // Apple M1 Pro/Max/Ultra. Detect number of cores
uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu"); uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu");
if(physicalcpu < 8 || physicalcpu > 10) { if(physicalcpu == 20) {
printBug("Found invalid physicalcpu: 0x%.8X", physicalcpu); // M1 Ultra
fill_cpu_info_firestorm_icestorm(cpu, 16, 4);
}
else if(physicalcpu == 8 || physicalcpu == 10) {
// M1 Pro/Max
fill_cpu_info_firestorm_icestorm(cpu, physicalcpu-2, 2);
}
else {
printBug("Found invalid physical cpu number: %d", physicalcpu);
return NULL; return NULL;
} }
fill_cpu_info_firestorm_icestorm(cpu, physicalcpu-2, 2);
} }
else { else {
printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily); printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily);
@@ -302,6 +355,13 @@ struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) {
cpu->soc = get_soc(); cpu->soc = get_soc();
cpu->peak_performance = get_peak_performance(cpu); cpu->peak_performance = get_peak_performance(cpu);
} }
else if(cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD) {
// Just the "normal" M2 exists for now
cpu->num_cpus = 2;
fill_cpu_info_avalanche_blizzard(cpu, 4, 4);
cpu->soc = get_soc();
cpu->peak_performance = get_peak_performance(cpu);
}
else { else {
printBug("Found invalid cpu_family: 0x%.8X", cpu_family); printBug("Found invalid cpu_family: 0x%.8X", cpu_family);
return NULL; return NULL;

View File

@@ -111,11 +111,11 @@ bool match_broadcom(char* soc_name, struct system_on_chip* soc) {
bool match_hisilicon(char* soc_name, struct system_on_chip* soc) { bool match_hisilicon(char* soc_name, struct system_on_chip* soc) {
char* tmp; char* tmp;
if((tmp = strstr(soc_name, "Hi")) == NULL) if((tmp = strstr(soc_name, "hi")) == NULL)
return false; return false;
SOC_START SOC_START
SOC_EQ(tmp, "Hi3620GFC", "K3V2", SOC_HISILICON_3620, soc, 40) SOC_EQ(tmp, "hi3620GFC", "K3V2", SOC_HISILICON_3620, soc, 40)
//SOC_EQ(tmp, "?", "K3V2E", SOC_KIRIN, soc, ?) //SOC_EQ(tmp, "?", "K3V2E", SOC_KIRIN, soc, ?)
//SOC_EQ(tmp, "?", "620", SOC_KIRIN, soc, 28) //SOC_EQ(tmp, "?", "620", SOC_KIRIN, soc, 28)
//SOC_EQ(tmp, "?", "650", SOC_KIRIN, soc, 16) //SOC_EQ(tmp, "?", "650", SOC_KIRIN, soc, 16)
@@ -131,18 +131,18 @@ bool match_hisilicon(char* soc_name, struct system_on_chip* soc) {
//SOC_EQ(tmp, "?", "9000E", SOC_KIRIN, soc, 5) //SOC_EQ(tmp, "?", "9000E", SOC_KIRIN, soc, 5)
//SOC_EQ(tmp, "?", "910", SOC_KIRIN, soc, 28) //SOC_EQ(tmp, "?", "910", SOC_KIRIN, soc, 28)
//SOC_EQ(tmp, "?", "910T", SOC_KIRIN, soc, 28) //SOC_EQ(tmp, "?", "910T", SOC_KIRIN, soc, 28)
SOC_EQ(tmp, "Hi3630", "920", SOC_HISILICON_3630, soc, 28) SOC_EQ(tmp, "hi3630", "920", SOC_HISILICON_3630, soc, 28)
//SOC_EQ(tmp, "?", "925", SOC_KIRIN, soc, 28) //SOC_EQ(tmp, "?", "925", SOC_KIRIN, soc, 28)
//SOC_EQ(tmp, "?", "930", SOC_KIRIN, soc, ?) //SOC_EQ(tmp, "?", "930", SOC_KIRIN, soc, ?)
//SOC_EQ(tmp, "?", "935", SOC_KIRIN, soc, ?) //SOC_EQ(tmp, "?", "935", SOC_KIRIN, soc, ?)
SOC_EQ(tmp, "Hi3650", "950", SOC_HISILICON_3650, soc, 16) SOC_EQ(tmp, "hi3650", "950", SOC_HISILICON_3650, soc, 16)
//SOC_EQ(tmp, "?", "955", SOC_KIRIN, soc, ?) //SOC_EQ(tmp, "?", "955", SOC_KIRIN, soc, ?)
SOC_EQ(tmp, "Hi3660", "960", SOC_HISILICON_3660, soc, 16) SOC_EQ(tmp, "hi3660", "960", SOC_HISILICON_3660, soc, 16)
//SOC_EQ(tmp, "?", "960S", SOC_KIRIN, soc, 16) //SOC_EQ(tmp, "?", "960S", SOC_KIRIN, soc, 16)
SOC_EQ(tmp, "Hi3670", "970", SOC_HISILICON_3670, soc, 10) SOC_EQ(tmp, "hi3670", "970", SOC_HISILICON_3670, soc, 10)
SOC_EQ(tmp, "Hi3680", "980", SOC_HISILICON_3680, soc, 7) SOC_EQ(tmp, "hi3680", "980", SOC_HISILICON_3680, soc, 7)
//SOC_EQ(tmp, "?", "985", SOC_KIRIN, soc, 7) //SOC_EQ(tmp, "?", "985", SOC_KIRIN, soc, 7)
SOC_EQ(tmp, "Hi3690", "990", SOC_HISILICON_3690, soc, 7) SOC_EQ(tmp, "hi3690", "990", SOC_HISILICON_3690, soc, 7)
SOC_END SOC_END
} }
@@ -648,7 +648,11 @@ struct system_on_chip* guess_soc_raspbery_pi(struct system_on_chip* soc) {
#if defined(__APPLE__) || defined(__MACH__) #if defined(__APPLE__) || defined(__MACH__)
struct system_on_chip* guess_soc_apple(struct system_on_chip* soc) { struct system_on_chip* guess_soc_apple(struct system_on_chip* soc) {
uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
uint32_t cpu_subfamily = get_sys_info_by_name("hw.cpusubfamily"); uint32_t cpu_subfamily = get_sys_info_by_name("hw.cpusubfamily");
if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) {
// Check M1 version
if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) { if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) {
fill_soc(soc, "M1", SOC_APPLE_M1, 5); fill_soc(soc, "M1", SOC_APPLE_M1, 5);
} }
@@ -656,8 +660,38 @@ struct system_on_chip* guess_soc_apple(struct system_on_chip* soc) {
fill_soc(soc, "M1 Pro", SOC_APPLE_M1_PRO, 5); fill_soc(soc, "M1 Pro", SOC_APPLE_M1_PRO, 5);
} }
else if(cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) { else if(cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) {
// Could be M1 Max or M1 Ultra (2x M1 Max)
uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu");
if(physicalcpu == 20) {
fill_soc(soc, "M1 Ultra", SOC_APPLE_M1_ULTRA, 5);
}
else if(physicalcpu == 10) {
fill_soc(soc, "M1 Max", SOC_APPLE_M1_MAX, 5); fill_soc(soc, "M1 Max", SOC_APPLE_M1_MAX, 5);
} }
else {
printBug("Found invalid physical cpu number: %d", physicalcpu);
soc->soc_vendor = SOC_VENDOR_UNKNOWN;
}
}
else {
printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily);
soc->soc_vendor = SOC_VENDOR_UNKNOWN;
}
}
else if(cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD) {
// Check M2 version
if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) {
fill_soc(soc, "M2", SOC_APPLE_M2, 5);
}
else {
printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily);
soc->soc_vendor = SOC_VENDOR_UNKNOWN;
}
}
else {
printBug("Found invalid cpu_family: 0x%.8X", cpu_family);
soc->soc_vendor = SOC_VENDOR_UNKNOWN;
}
return soc; return soc;
} }
#endif #endif

View File

@@ -256,6 +256,8 @@ enum {
SOC_APPLE_M1, SOC_APPLE_M1,
SOC_APPLE_M1_PRO, SOC_APPLE_M1_PRO,
SOC_APPLE_M1_MAX, SOC_APPLE_M1_MAX,
SOC_APPLE_M1_ULTRA,
SOC_APPLE_M2,
// ALLWINNER // ALLWINNER
SOC_ALLWINNER_A10, SOC_ALLWINNER_A10,
SOC_ALLWINNER_A13, SOC_ALLWINNER_A13,
@@ -288,7 +290,7 @@ inline static VENDOR get_soc_vendor_from_soc(SOC soc) {
else if(soc >= SOC_EXYNOS_3475 && soc <= SOC_EXYNOS_880) return SOC_VENDOR_EXYNOS; else if(soc >= SOC_EXYNOS_3475 && soc <= SOC_EXYNOS_880) return SOC_VENDOR_EXYNOS;
else if(soc >= SOC_MTK_MT6893 && soc <= SOC_MTK_MT8783) return SOC_VENDOR_MEDIATEK; else if(soc >= SOC_MTK_MT6893 && soc <= SOC_MTK_MT8783) return SOC_VENDOR_MEDIATEK;
else if(soc >= SOC_SNAPD_QSD8650 && soc <= SOC_SNAPD_SM8350) return SOC_VENDOR_SNAPDRAGON; else if(soc >= SOC_SNAPD_QSD8650 && soc <= SOC_SNAPD_SM8350) return SOC_VENDOR_SNAPDRAGON;
else if(soc >= SOC_APPLE_M1 && soc <= SOC_APPLE_M1_MAX) return SOC_VENDOR_APPLE; else if(soc >= SOC_APPLE_M1 && soc <= SOC_APPLE_M2) return SOC_VENDOR_APPLE;
else if(soc >= SOC_ALLWINNER_A10 && soc <= SOC_ALLWINNER_R328) return SOC_VENDOR_ALLWINNER; else if(soc >= SOC_ALLWINNER_A10 && soc <= SOC_ALLWINNER_R328) return SOC_VENDOR_ALLWINNER;
return SOC_VENDOR_UNKNOWN; return SOC_VENDOR_UNKNOWN;
} }

View File

@@ -4,9 +4,23 @@
// From Linux kernel: arch/arm64/include/asm/cputype.h // From Linux kernel: arch/arm64/include/asm/cputype.h
#define MIDR_APPLE_M1_ICESTORM 0x610F0220 #define MIDR_APPLE_M1_ICESTORM 0x610F0220
#define MIDR_APPLE_M1_FIRESTORM 0x610F0230 #define MIDR_APPLE_M1_FIRESTORM 0x610F0230
// Kernel does not include those, so I just assume that
// APPLE_CPU_PART_M2_BLIZZARD=0x30,M2_AVALANCHE=0x31
#define MIDR_APPLE_M2_BLIZZARD 0x610F0300
#define MIDR_APPLE_M2_AVALANCHE 0x610F0310
// M1 / A14
#ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM #ifndef CPUFAMILY_ARM_FIRESTORM_ICESTORM
#define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3 #define CPUFAMILY_ARM_FIRESTORM_ICESTORM 0x1B588BB3
#endif #endif
// M2 / A15
#ifndef CPUFAMILY_ARM_AVALANCHE_BLIZZARD
#define CPUFAMILY_ARM_AVALANCHE_BLIZZARD 0xDA33D83D
#endif
// For detecting different M1 types
// NOTE: Could also be achieved detecting different
// MIDR values (e.g., APPLE_CPU_PART_M1_ICESTORM_PRO)
#ifndef CPUSUBFAMILY_ARM_HG #ifndef CPUSUBFAMILY_ARM_HG
#define CPUSUBFAMILY_ARM_HG 2 #define CPUSUBFAMILY_ARM_HG 2
#endif #endif

View File

@@ -33,6 +33,7 @@ enum {
ISA_ARMv8_2_A, ISA_ARMv8_2_A,
ISA_ARMv8_3_A, ISA_ARMv8_3_A,
ISA_ARMv8_4_A, ISA_ARMv8_4_A,
ISA_ARMv8_5_A
}; };
enum { enum {
@@ -95,6 +96,8 @@ enum {
UARCH_THUNDER, // Apple A13 processor (little cores). UARCH_THUNDER, // Apple A13 processor (little cores).
UARCH_ICESTORM, // Apple M1 processor (little cores). UARCH_ICESTORM, // Apple M1 processor (little cores).
UARCH_FIRESTORM, // Apple M1 processor (big cores). UARCH_FIRESTORM, // Apple M1 processor (big cores).
UARCH_BLIZZARD, // Apple M2 processor (little cores).
UARCH_AVALANCHE, // Apple M2 processor (big cores).
// CAVIUM // CAVIUM
UARCH_THUNDERX, // Cavium ThunderX UARCH_THUNDERX, // Cavium ThunderX
UARCH_THUNDERX2, // Cavium ThunderX2 (originally Broadcom Vulkan). UARCH_THUNDERX2, // Cavium ThunderX2 (originally Broadcom Vulkan).
@@ -155,8 +158,10 @@ static const ISA isas_uarch[] = {
[UARCH_EXYNOS_M3] = ISA_ARMv8_A, [UARCH_EXYNOS_M3] = ISA_ARMv8_A,
[UARCH_EXYNOS_M4] = ISA_ARMv8_2_A, [UARCH_EXYNOS_M4] = ISA_ARMv8_2_A,
[UARCH_EXYNOS_M5] = ISA_ARMv8_2_A, [UARCH_EXYNOS_M5] = ISA_ARMv8_2_A,
[UARCH_ICESTORM] = ISA_ARMv8_4_A, [UARCH_ICESTORM] = ISA_ARMv8_5_A, // https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Support/AArch64TargetParser.def
[UARCH_FIRESTORM] = ISA_ARMv8_4_A, [UARCH_FIRESTORM] = ISA_ARMv8_5_A,
[UARCH_BLIZZARD] = ISA_ARMv8_5_A, // Not confirmed
[UARCH_AVALANCHE] = ISA_ARMv8_5_A,
[UARCH_PJ4] = ISA_ARMv7_A, [UARCH_PJ4] = ISA_ARMv7_A,
[UARCH_XIAOMI] = ISA_ARMv8_A, [UARCH_XIAOMI] = ISA_ARMv8_A,
}; };
@@ -172,7 +177,8 @@ static char* isas_string[] = {
[ISA_ARMv8_1_A] = "ARMv8.1", [ISA_ARMv8_1_A] = "ARMv8.1",
[ISA_ARMv8_2_A] = "ARMv8.2", [ISA_ARMv8_2_A] = "ARMv8.2",
[ISA_ARMv8_3_A] = "ARMv8.3", [ISA_ARMv8_3_A] = "ARMv8.3",
[ISA_ARMv8_4_A] = "ARMv8.4" [ISA_ARMv8_4_A] = "ARMv8.4",
[ISA_ARMv8_5_A] = "ARMv8.5"
}; };
#define UARCH_START if (false) {} #define UARCH_START if (false) {}
@@ -297,6 +303,8 @@ struct uarch* get_uarch_from_midr(uint32_t midr, struct cpuInfo* cpu) {
CHECK_UARCH(arch, cpu, 'a', 0x022, NA, NA, "Icestorm", UARCH_ICESTORM, CPU_VENDOR_APPLE) CHECK_UARCH(arch, cpu, 'a', 0x022, NA, NA, "Icestorm", UARCH_ICESTORM, CPU_VENDOR_APPLE)
CHECK_UARCH(arch, cpu, 'a', 0x023, NA, NA, "Firestorm", UARCH_FIRESTORM, CPU_VENDOR_APPLE) CHECK_UARCH(arch, cpu, 'a', 0x023, NA, NA, "Firestorm", UARCH_FIRESTORM, CPU_VENDOR_APPLE)
CHECK_UARCH(arch, cpu, 'a', 0x030, NA, NA, "Blizzard", UARCH_BLIZZARD, CPU_VENDOR_APPLE)
CHECK_UARCH(arch, cpu, 'a', 0x031, NA, NA, "Avalanche", UARCH_AVALANCHE, CPU_VENDOR_APPLE)
CHECK_UARCH(arch, cpu, 'V', 0x581, NA, NA, "PJ4", UARCH_PJ4, CPU_VENDOR_MARVELL) CHECK_UARCH(arch, cpu, 'V', 0x581, NA, NA, "PJ4", UARCH_PJ4, CPU_VENDOR_MARVELL)
CHECK_UARCH(arch, cpu, 'V', 0x584, NA, NA, "PJ4B-MP", UARCH_PJ4, CPU_VENDOR_MARVELL) CHECK_UARCH(arch, cpu, 'V', 0x584, NA, NA, "PJ4B-MP", UARCH_PJ4, CPU_VENDOR_MARVELL)

View File

@@ -35,6 +35,12 @@ enum {
HV_VENDOR_INVALID HV_VENDOR_INVALID
}; };
enum {
CORE_TYPE_EFFICIENCY,
CORE_TYPE_PERFORMANCE,
CORE_TYPE_UNKNOWN
};
#define UNKNOWN_DATA -1 #define UNKNOWN_DATA -1
#define CPU_NAME_MAX_LENGTH 64 #define CPU_NAME_MAX_LENGTH 64
@@ -78,6 +84,7 @@ struct topology {
uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled) uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled)
#ifdef ARCH_X86 #ifdef ARCH_X86
uint32_t smt_available; // Number of SMT that is currently enabled uint32_t smt_available; // Number of SMT that is currently enabled
int32_t total_cores_module; // Total cores in the current module (only makes sense in hybrid archs, like ADL)
struct apic* apic; struct apic* apic;
#endif #endif
#endif #endif
@@ -131,6 +138,10 @@ struct cpuInfo {
uint32_t maxExtendedLevels; uint32_t maxExtendedLevels;
// Topology Extensions (AMD only) // Topology Extensions (AMD only)
bool topology_extensions; bool topology_extensions;
// Hybrid Flag (Intel only)
bool hybrid_flag;
// Core Type (P/E)
uint32_t core_type;
#elif ARCH_PPC #elif ARCH_PPC
uint32_t pvr; uint32_t pvr;
#elif ARCH_ARM #elif ARCH_ARM
@@ -140,11 +151,18 @@ struct cpuInfo {
#ifdef ARCH_ARM #ifdef ARCH_ARM
struct system_on_chip* soc; struct system_on_chip* soc;
#endif
#if defined(ARCH_X86) || defined(ARCH_ARM)
// If SoC contains more than one CPU and they // If SoC contains more than one CPU and they
// are different, the others will be stored in // are different, the others will be stored in
// the next_cpu field // the next_cpu field
struct cpuInfo* next_cpu; struct cpuInfo* next_cpu;
uint8_t num_cpus; uint8_t num_cpus;
#ifdef ARCH_X86
// The index of the first core in the module
uint32_t first_core_id;
#endif
#endif #endif
}; };

View File

@@ -33,7 +33,7 @@
static const char* OS_STR = "Unknown OS"; static const char* OS_STR = "Unknown OS";
#endif #endif
static const char* VERSION = "1.02"; static const char* VERSION = "1.03";
void print_help(char *argv[]) { void print_help(char *argv[]) {
const char **t = args_str; const char **t = args_str;

View File

@@ -44,6 +44,8 @@ enum {
ATTRIBUTE_NAME, ATTRIBUTE_NAME,
#elif ARCH_ARM #elif ARCH_ARM
ATTRIBUTE_SOC, ATTRIBUTE_SOC,
#endif
#if defined(ARCH_X86) || defined(ARCH_ARM)
ATTRIBUTE_CPU_NUM, ATTRIBUTE_CPU_NUM,
#endif #endif
ATTRIBUTE_HYPERVISOR, ATTRIBUTE_HYPERVISOR,
@@ -75,6 +77,8 @@ static const char* ATTRIBUTE_FIELDS [] = {
"Part Number:", "Part Number:",
#elif ARCH_ARM #elif ARCH_ARM
"SoC:", "SoC:",
#endif
#if defined(ARCH_X86) || defined(ARCH_ARM)
"", "",
#endif #endif
"Hypervisor:", "Hypervisor:",
@@ -106,6 +110,8 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
"P/N:", "P/N:",
#elif ARCH_ARM #elif ARCH_ARM
"SoC:", "SoC:",
#endif
#if defined(ARCH_X86) || defined(ARCH_ARM)
"", "",
#endif #endif
"Hypervisor:", "Hypervisor:",
@@ -424,11 +430,12 @@ uint32_t longest_field_length(struct ascii* art, int la) {
} }
#if defined(ARCH_X86) || defined(ARCH_PPC) #if defined(ARCH_X86) || defined(ARCH_PPC)
void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const char** attribute_fields) { void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const char** attribute_fields, bool hybrid_architecture) {
struct ascii_logo* logo = art->art; struct ascii_logo* logo = art->art;
int attr_to_print = 0; int attr_to_print = 0;
int attr_type; int attr_type;
char* attr_value; char* attr_value;
int32_t beg_space;
int32_t space_right; int32_t space_right;
int32_t space_up = ((int)logo->height - (int)art->n_attributes_set)/2; int32_t space_up = ((int)logo->height - (int)art->n_attributes_set)/2;
int32_t space_down = (int)logo->height - (int)art->n_attributes_set - (int)space_up; int32_t space_down = (int)logo->height - (int)art->n_attributes_set - (int)space_up;
@@ -439,6 +446,7 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const ch
lbuf->buf = emalloc(sizeof(char) * LINE_BUFFER_SIZE); lbuf->buf = emalloc(sizeof(char) * LINE_BUFFER_SIZE);
lbuf->pos = 0; lbuf->pos = 0;
lbuf->chars = 0; lbuf->chars = 0;
bool add_space = false;
printf("\n"); printf("\n");
for(int32_t n=0; n < iters; n++) { for(int32_t n=0; n < iters; n++) {
@@ -473,9 +481,24 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const ch
attr_value = art->attributes[attr_to_print]->value; attr_value = art->attributes[attr_to_print]->value;
attr_to_print++; attr_to_print++;
space_right = 1 + (la - strlen(attribute_fields[attr_type])); if(attr_type == ATTRIBUTE_L3) {
printOut(lbuf, strlen(attribute_fields[attr_type]) + space_right + strlen(attr_value), add_space = false;
"%s%s%s%*s%s%s%s", logo->color_text[0], attribute_fields[attr_type], art->reset, space_right, "", logo->color_text[1], attr_value, art->reset); }
if(attr_type == ATTRIBUTE_CPU_NUM) {
printOut(lbuf, strlen(attr_value), "%s%s%s", logo->color_text[0], attr_value, art->reset);
add_space = true;
}
else {
beg_space = 0;
space_right = 2 + 1 + (la - strlen(attribute_fields[attr_type]));
if(hybrid_architecture && add_space) {
beg_space = 2;
space_right -= 2;
}
printOut(lbuf, beg_space + strlen(attribute_fields[attr_type]) + space_right + strlen(attr_value),
"%*s%s%s%s%*s%s%s%s", beg_space, "", logo->color_text[0], attribute_fields[attr_type], art->reset, space_right, "", logo->color_text[1], attr_value, art->reset);
}
} }
printOutLine(lbuf, art, termw); printOutLine(lbuf, art, termw);
printf("\n"); printf("\n");
@@ -501,43 +524,56 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
art->new_intel_logo = choose_new_intel_logo(cpu); art->new_intel_logo = choose_new_intel_logo(cpu);
// Step 1. Retrieve attributes (if some structures are NULL, like topo
// or cache, do not try to retrieve them)
uint32_t socket_num = 1; uint32_t socket_num = 1;
char* l1i, *l1d, *l2, *l3, *n_cores, *n_cores_dual, *sockets; char* l1i, *l1d, *l2, *l3, *n_cores, *n_cores_dual, *sockets;
l1i = l1d = l2 = l3 = n_cores = n_cores_dual = sockets = NULL; l1i = l1d = l2 = l3 = n_cores = n_cores_dual = sockets = NULL;
char* uarch = get_str_uarch(cpu);
char* manufacturing_process = get_str_process(cpu);
char* max_frequency = get_str_freq(cpu->freq);
char* cpu_name = get_str_cpu_name(cpu, fcpuname); char* cpu_name = get_str_cpu_name(cpu, fcpuname);
char* avx = get_str_avx(cpu); char* uarch = get_str_uarch(cpu);
char* fma = get_str_fma(cpu);
char* pp = get_str_peak_performance(cpu->peak_performance); char* pp = get_str_peak_performance(cpu->peak_performance);
char* manufacturing_process = get_str_process(cpu);
if(cpu->topo != NULL) { bool hybrid_architecture = cpu->next_cpu != NULL;
sockets = get_str_sockets(cpu->topo);
n_cores = get_str_topology(cpu, cpu->topo, false);
n_cores_dual = get_str_topology(cpu, cpu->topo, true);
}
if(cpu->cach != NULL) { if(cpu->cach != NULL) {
l1i = get_str_l1i(cpu->cach);
l1d = get_str_l1d(cpu->cach);
l2 = get_str_l2(cpu->cach);
l3 = get_str_l3(cpu->cach); l3 = get_str_l3(cpu->cach);
} }
// Step 2. Set attributes
setAttribute(art, ATTRIBUTE_NAME, cpu_name); setAttribute(art, ATTRIBUTE_NAME, cpu_name);
if(cpu->hv->present) { if(cpu->hv->present) {
setAttribute(art, ATTRIBUTE_HYPERVISOR, cpu->hv->hv_name); setAttribute(art, ATTRIBUTE_HYPERVISOR, cpu->hv->hv_name);
} }
setAttribute(art, ATTRIBUTE_UARCH, uarch); setAttribute(art, ATTRIBUTE_UARCH, uarch);
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process); setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
struct cpuInfo* ptr = cpu;
for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
char* max_frequency = get_str_freq(ptr->freq);
char* avx = get_str_avx(ptr);
char* fma = get_str_fma(ptr);
char* cpu_num = emalloc(sizeof(char) * 9);
if(ptr->topo != NULL) {
sockets = get_str_sockets(ptr->topo);
n_cores = get_str_topology(ptr, ptr->topo, false);
n_cores_dual = get_str_topology(ptr, ptr->topo, true);
}
if(ptr->cach != NULL) {
l1i = get_str_l1i(ptr->cach);
l1d = get_str_l1d(ptr->cach);
l2 = get_str_l2(ptr->cach);
}
if(hybrid_architecture) {
if(ptr->core_type == CORE_TYPE_EFFICIENCY) sprintf(cpu_num, "E-cores:");
else if(ptr->core_type == CORE_TYPE_PERFORMANCE) sprintf(cpu_num, "P-cores:");
else printBug("Found invalid core type!\n");
setAttribute(art, ATTRIBUTE_CPU_NUM, cpu_num);
}
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency); setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
if(cpu->topo != NULL) { if(ptr->topo != NULL) {
socket_num = get_nsockets(cpu->topo); socket_num = get_nsockets(ptr->topo);
if (socket_num > 1) { if (socket_num > 1) {
setAttribute(art, ATTRIBUTE_SOCKETS, sockets); setAttribute(art, ATTRIBUTE_SOCKETS, sockets);
setAttribute(art, ATTRIBUTE_NCORES, n_cores); setAttribute(art, ATTRIBUTE_NCORES, n_cores);
@@ -552,6 +588,7 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i); if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d); if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2); if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
}
if(l3 != NULL) setAttribute(art, ATTRIBUTE_L3, l3); if(l3 != NULL) setAttribute(art, ATTRIBUTE_L3, l3);
setAttribute(art, ATTRIBUTE_PEAK, pp); setAttribute(art, ATTRIBUTE_PEAK, pp);
@@ -568,15 +605,12 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
longest_attribute = longest_attribute_length(art, attribute_fields); longest_attribute = longest_attribute_length(art, attribute_fields);
} }
print_ascii_generic(art, longest_attribute, term->w, attribute_fields); print_ascii_generic(art, longest_attribute, term->w, attribute_fields, hybrid_architecture);
free(manufacturing_process); free(manufacturing_process);
free(max_frequency);
free(sockets); free(sockets);
free(n_cores); free(n_cores);
free(n_cores_dual); free(n_cores_dual);
free(avx);
free(fma);
free(l1i); free(l1i);
free(l1d); free(l1d);
free(l2); free(l2);

View File

@@ -165,48 +165,85 @@ long get_l3_cache_size(uint32_t core) {
return get_cache_size_from_file(path); return get_cache_size_from_file(path);
} }
void add_shared_map(uint32_t** src, int src_idx, uint32_t** dst, int dst_idx, int n) {
for(int j=0; j < n; j++) {
dst[dst_idx][j] = src[src_idx][j];
}
}
bool maps_equal(uint32_t* map1, uint32_t* map2, int n) {
for(int i=0; i < n; i++) {
if(map1[i] != map2[i]) return false;
}
return true;
}
int get_num_caches_from_files(char** paths, int num_paths) { int get_num_caches_from_files(char** paths, int num_paths) {
int SHARED_MAP_MAX_LEN = 8 + 1;
int filelen; int filelen;
char* buf; char* buf;
uint32_t* shared_maps = emalloc(sizeof(uint32_t *) * num_paths); char* tmpbuf;
// 1. Read cpu_shared_map from every core // 1. Count the number of bitmasks per file
if((buf = read_file(paths[0], &filelen)) == NULL) {
printWarn("Could not open '%s'", paths[0]);
return -1;
}
int num_bitmasks = 1;
for(int i=0; buf[i]; i++) {
num_bitmasks += (buf[i] == ',');
}
// 2. Read cpu_shared_map from every core
uint32_t** shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
for(int i=0; i < num_paths; i++) { for(int i=0; i < num_paths; i++) {
shared_maps[i] = emalloc(sizeof(uint32_t) * num_bitmasks);
if((buf = read_file(paths[i], &filelen)) == NULL) { if((buf = read_file(paths[i], &filelen)) == NULL) {
printWarn("Could not open '%s'", paths[i]); printWarn("Could not open '%s'", paths[i]);
return -1; return -1;
} }
if(filelen > SHARED_MAP_MAX_LEN) { for(int j=0; j < num_bitmasks; j++) {
printBug("Shared map length is %d while the max is be %d", filelen, SHARED_MAP_MAX_LEN);
return -1;
}
char* end; char* end;
tmpbuf = emalloc(sizeof(char) * (strlen(buf) + 1));
char* commaend = strstr(buf, ",");
if(commaend == NULL) {
strcpy(tmpbuf, buf);
}
else {
strncpy(tmpbuf, buf, commaend-buf);
}
errno = 0; errno = 0;
long ret = strtol(buf, &end, 16); long ret = strtol(tmpbuf, &end, 16);
if(errno != 0) { if(errno != 0) {
printBug("strtol: %s", strerror(errno)); printf("strtol: %s", strerror(errno));
free(buf); free(buf);
return -1; return -1;
} }
shared_maps[i] = (uint32_t) ret; shared_maps[i][j] = (uint32_t) ret;
buf = commaend + 1;
free(tmpbuf);
}
} }
// 2. Count number of different masks; this is the number of caches // 2. Count number of different masks; this is the number of caches
int num_caches = 0; int num_caches = 0;
bool found = false; bool found = false;
uint32_t* unique_shared_maps = emalloc(sizeof(uint32_t *) * num_paths); uint32_t** unique_shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
for(int i=0; i < num_paths; i++) unique_shared_maps[i] = 0; for(int i=0; i < num_paths; i++) {
unique_shared_maps[i] = emalloc(sizeof(uint32_t) * num_bitmasks);
for(int j=0; j < num_bitmasks; j++) {
unique_shared_maps[i][j] = 0;
}
}
for(int i=0; i < num_paths; i++) { for(int i=0; i < num_paths; i++) {
for(int j=0; j < num_paths && !found; j++) { for(int j=0; j < num_paths && !found; j++) {
if(shared_maps[i] == unique_shared_maps[j]) found = true; if(maps_equal(shared_maps[i], unique_shared_maps[j], num_bitmasks)) found = true;
} }
if(!found) { if(!found) {
unique_shared_maps[num_caches] = shared_maps[i]; add_shared_map(shared_maps, i, unique_shared_maps, num_caches, num_bitmasks);
num_caches++; num_caches++;
} }
found = false; found = false;

View File

@@ -1,5 +1,5 @@
#ifndef __POWERPC__ #ifndef __CPUFETCH_POWERPC__
#define __POWERPC__ #define __CPUFETCH_POWERPC__
#include "../common/cpu.h" #include "../common/cpu.h"

View File

@@ -102,6 +102,59 @@ bool bind_to_cpu(int cpu_id) {
} }
#endif #endif
int get_total_cores_module(int total_cores, int module) {
int total_modules = 2;
int32_t current_module_idx = -1;
bool end = false;
int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules);
for(int i=0; i < total_modules; i++) core_types[i] = -1;
int cores_in_module = 0;
int i = 0;
// Get the original mask to restore it later
cpu_set_t original_mask;
if(sched_getaffinity(0, sizeof(original_mask), &original_mask) == -1) {
printWarn("sched_getaffinity: %s", strerror(errno));
return false;
}
while(!end) {
if(!bind_to_cpu(i)) {
return -1;
}
uint32_t eax = 0x0000001A;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
int32_t core_type = eax >> 24 & 0xFF;
bool found = false;
for(int j=0; j < total_modules && !found; j++) {
if(core_types[j] == core_type) found = true;
}
if(!found) {
current_module_idx++;
core_types[current_module_idx] = core_type;
}
if(current_module_idx == module) {
cores_in_module++;
if(i+1 == total_cores) end = true;
}
else if(cores_in_module > 0) end = true;
i++;
}
// Reset the original affinity
if (sched_setaffinity (0, sizeof(original_mask), &original_mask) == -1) {
printWarn("sched_setaffinity: %s", strerror(errno));
return false;
}
//printf("Module %d has %d cores\n", module, cores_in_module);
return cores_in_module;
}
bool fill_topo_masks_apic(struct topology* topo) { bool fill_topo_masks_apic(struct topology* topo) {
uint32_t eax = 0x00000001; uint32_t eax = 0x00000001;
uint32_t ebx = 0; uint32_t ebx = 0;
@@ -197,14 +250,14 @@ uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) {
uint32_t max = 0; uint32_t max = 0;
for(int i=0; i < topo->cach->max_cache_level; i++) { for(int i=0; i < topo->cach->max_cache_level; i++) {
for(int j=0; j < topo->total_cores; j++) { for(int j=0; j < topo->total_cores_module; j++) {
if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i]; if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i];
} }
} }
max++; max++;
if(max > (uint32_t) topo->total_cores) return max; if(max > (uint32_t) topo->total_cores_module) return max;
return topo->total_cores; return topo->total_cores_module;
} }
bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) { bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) {
@@ -219,18 +272,18 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac
memset(apic_id, 0, sizeof(uint32_t) * size); memset(apic_id, 0, sizeof(uint32_t) * size);
// System topology // System topology
for(int i=0; i < topo->total_cores; i++) { for(int i=0; i < topo->total_cores_module; i++) {
sockets[apic_pkg[i]] = 1; sockets[apic_pkg[i]] = 1;
smt[apic_smt[i]] = 1; smt[apic_smt[i]] = 1;
} }
for(int i=0; i < topo->total_cores; i++) { for(int i=0; i < topo->total_cores_module; i++) {
if(sockets[i] != 0) if(sockets[i] != 0)
topo->sockets++; topo->sockets++;
if(smt[i] != 0) if(smt[i] != 0)
topo->smt_available++; topo->smt_available++;
} }
topo->logical_cores = topo->total_cores / topo->sockets; topo->logical_cores = topo->total_cores_module / topo->sockets;
topo->physical_cores = topo->logical_cores / topo->smt_available; topo->physical_cores = topo->logical_cores / topo->smt_available;
// Cache topology // Cache topology
@@ -238,7 +291,7 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac
num_caches = 0; num_caches = 0;
memset(apic_id, 0, sizeof(uint32_t) * size); memset(apic_id, 0, sizeof(uint32_t) * size);
for(int c=0; c < topo->total_cores; c++) { for(int c=0; c < topo->total_cores_module; c++) {
apic_id[cache_id_apic[c][i]]++; apic_id[cache_id_apic[c][i]]++;
} }
for(uint32_t c=0; c < size; c++) { for(uint32_t c=0; c < size; c++) {
@@ -297,7 +350,7 @@ void add_apic_to_array(uint32_t apic, uint32_t* apic_ids, int n) {
} }
} }
bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) { bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) {
#ifdef __APPLE__ #ifdef __APPLE__
// macOS extremely dirty approach... // macOS extremely dirty approach...
printf("cpufetch is computing APIC IDs, please wait...\n"); printf("cpufetch is computing APIC IDs, please wait...\n");
@@ -322,12 +375,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
} }
#endif #endif
for(int i=0; i < n; i++) { for(int i=first_core; i < first_core+n; i++) {
if(!bind_to_cpu(i)) { if(!bind_to_cpu(i)) {
printErr("Failed binding the process to CPU %d", i); printErr("Failed binding the process to CPU %d", i);
return false; return false;
} }
apic_ids[i] = get_apic_id(x2apic_id); apic_ids[i-first_core] = get_apic_id(x2apic_id);
} }
#ifdef __linux__ #ifdef __linux__
@@ -344,12 +397,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
uint32_t apic_id; uint32_t apic_id;
uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores); uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module);
uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores); uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module);
uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores); uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores_module);
uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores); uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores_module);
uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores); uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module);
uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores); uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module);
bool x2apic_id; bool x2apic_id;
if(cpu->maxLevels >= 0x0000000B) { if(cpu->maxLevels >= 0x0000000B) {
@@ -367,7 +420,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
x2apic_id = false; x2apic_id = false;
} }
for(int i=0; i < topo->total_cores; i++) { for(int i=0; i < topo->total_cores_module; i++) {
cache_smt_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level)); cache_smt_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
cache_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level)); cache_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
} }
@@ -385,10 +438,10 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
get_cache_topology_from_apic(topo); get_cache_topology_from_apic(topo);
if(!fill_apic_ids(apic_ids, topo->total_cores, x2apic_id)) if(!fill_apic_ids(apic_ids, cpu->first_core_id, topo->total_cores_module, x2apic_id))
return false; return false;
for(int i=0; i < topo->total_cores; i++) { for(int i=0; i < topo->total_cores_module; i++) {
apic_id = apic_ids[i]; apic_id = apic_ids[i];
apic_pkg[i] = (apic_id & topo->apic->pkg_mask) >> topo->apic->pkg_mask_shift; apic_pkg[i] = (apic_id & topo->apic->pkg_mask) >> topo->apic->pkg_mask_shift;
@@ -404,20 +457,19 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
/* DEBUG /* DEBUG
for(int i=0; i < topo->cach->max_cache_level; i++) { for(int i=0; i < topo->cach->max_cache_level; i++) {
printf("[CACH %1d]", i); printf("[CACH %1d]", i);
for(int j=0; j < topo->total_cores; j++) for(int j=0; j < topo->total_cores_module; j++)
printf("[%03d]", cache_id_apic[j][i]); printf("[%03d]", cache_id_apic[j][i]);
printf("\n"); printf("\n");
} }
for(int i=0; i < topo->total_cores; i++) for(int i=0; i < topo->total_cores_module; i++)
printf("[%2d] 0x%.8X\n", i, apic_pkg[i]); printf("[%2d] 0x%.8X\n", i, apic_pkg[i]);
printf("\n"); printf("\n");
for(int i=0; i < topo->total_cores; i++) for(int i=0; i < topo->total_cores_module; i++)
printf("[%2d] 0x%.8X\n", i, apic_core[i]); printf("[%2d] 0x%.8X\n", i, apic_core[i]);
printf("\n"); printf("\n");
for(int i=0; i < topo->total_cores; i++) for(int i=0; i < topo->total_cores_module; i++)
printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/ printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/
bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo); bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
// Assumption: If we cant get smt_available, we assume it is equal to smt_supported... // Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
@@ -429,7 +481,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
free(apic_pkg); free(apic_pkg);
free(apic_core); free(apic_core);
free(apic_smt); free(apic_smt);
for(int i=0; i < topo->total_cores; i++) { for(int i=0; i < topo->total_cores_module; i++) {
free(cache_smt_id_apic[i]); free(cache_smt_id_apic[i]);
free(cache_id_apic[i]); free(cache_id_apic[i]);
} }

View File

@@ -21,4 +21,6 @@ uint32_t is_smt_enabled_amd(struct topology* topo);
bool bind_to_cpu(int cpu_id); bool bind_to_cpu(int cpu_id);
#endif #endif
int get_total_cores_module(int total_cores, int module);
#endif #endif

View File

@@ -179,7 +179,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping); return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping);
} }
int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t max_freq, bool accurate_pp) { int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
/* /*
* PP = PeakPerformance * PP = PeakPerformance
* SP = SinglePrecision * SP = SinglePrecision
@@ -192,25 +192,32 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
* 16(If AVX512), 8(If AVX), 4(If SSE) * * 16(If AVX512), 8(If AVX), 4(If SSE) *
*/ */
struct cpuInfo* ptr = cpu;
int64_t total_flops = 0;
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
struct topology* topo = ptr->topo;
int64_t max_freq = get_freq(ptr->freq);
int64_t freq; int64_t freq;
#ifdef __linux__ #ifdef __linux__
if(accurate_pp) if(accurate_pp)
freq = measure_frequency(cpu); freq = measure_frequency(ptr);
else else
freq = max_freq; freq = max_freq;
#else #else
// Silence compiler warning // Silence compiler warning
(void)(accurate_pp); (void)(accurate_pp);
freq = max_freq; freq = max_freq;
#endif #endif
//First, check we have consistent data //First, check we have consistent data
if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) { if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
return -1; return -1;
} }
struct features* feat = cpu->feat; struct features* feat = ptr->feat;
int vpus = get_number_of_vpus(cpu); int vpus = get_number_of_vpus(ptr);
int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus; int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
if(feat->FMA3 || feat->FMA4) if(feat->FMA3 || feat->FMA4)
@@ -219,7 +226,7 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while // Ice Lake has AVX512, but it has 1 VPU for AVX512, while
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
// the peak performance supposing AVX2, not AVX512 // the peak performance supposing AVX2, not AVX512
if(feat->AVX512 && vpus_are_AVX512(cpu)) if(feat->AVX512 && vpus_are_AVX512(ptr))
flops = flops*16; flops = flops*16;
else if(feat->AVX || feat->AVX2) else if(feat->AVX || feat->AVX2)
flops = flops*8; flops = flops*8;
@@ -228,10 +235,13 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar- // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/ // throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
if(is_knights_landing(cpu)) if(is_knights_landing(ptr))
flops = flops * 6 / 7; flops = flops * 6 / 7;
return flops; total_flops += flops;
}
return total_flops;
} }
struct hypervisor* get_hp_info(bool hv_present) { struct hypervisor* get_hp_info(bool hv_present) {
@@ -274,51 +284,19 @@ struct hypervisor* get_hp_info(bool hv_present) {
return hv; return hv;
} }
struct cpuInfo* get_cpu_info() { struct features* get_features_info(struct cpuInfo* cpu) {
struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
struct features* feat = emalloc(sizeof(struct features));
cpu->feat = feat;
cpu->peak_performance = -1;
cpu->topo = NULL;
cpu->cach = NULL;
bool *ptr = &(feat->AES);
for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
*ptr = false;
}
uint32_t eax = 0; uint32_t eax = 0;
uint32_t ebx = 0; uint32_t ebx = 0;
uint32_t ecx = 0; uint32_t ecx = 0;
uint32_t edx = 0; uint32_t edx = 0;
//Get max cpuid level struct features* feat = emalloc(sizeof(struct features));
cpuid(&eax, &ebx, &ecx, &edx);
cpu->maxLevels = eax;
//Fill vendor bool *ptr = &(feat->AES);
char name[13]; for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
memset(name,0,13); *ptr = false;
get_name_cpuid(name, ebx, edx, ecx);
if(strcmp(CPU_VENDOR_INTEL_STRING,name) == 0)
cpu->cpu_vendor = CPU_VENDOR_INTEL;
else if (strcmp(CPU_VENDOR_AMD_STRING,name) == 0)
cpu->cpu_vendor = CPU_VENDOR_AMD;
else {
cpu->cpu_vendor = CPU_VENDOR_INVALID;
printErr("Unknown CPU vendor: %s", name);
return NULL;
} }
//Get max extended level
eax = 0x80000000;
ebx = 0;
ecx = 0;
edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->maxExtendedLevels = eax;
//Fill instructions support //Fill instructions support
if (cpu->maxLevels >= 0x00000001){ if (cpu->maxLevels >= 0x00000001){
eax = 0x00000001; eax = 0x00000001;
@@ -373,6 +351,116 @@ struct cpuInfo* get_cpu_info() {
printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels); printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels);
} }
return feat;
}
bool set_cpu_module(int m, int total_modules, int32_t* first_core) {
if(total_modules > 1) {
// We have a hybrid architecture.
// 1. Find the first core from module m
int32_t core_id = -1;
int32_t currrent_module_idx = -1;
int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules);
for(int i=0; i < total_modules; i++) core_types[i] = -1;
int i = 0;
while(core_id == -1) {
if(!bind_to_cpu(i)) {
return false;
}
uint32_t eax = 0x0000001A;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
int32_t core_type = eax >> 24 & 0xFF;
bool found = false;
for(int j=0; j < total_modules && !found; j++) {
if(core_types[j] == core_type) found = true;
}
if(!found) {
currrent_module_idx++;
core_types[currrent_module_idx] = core_type;
if(currrent_module_idx == m) {
core_id = i;
}
}
i++;
}
*first_core = core_id;
//printf("Module %d: Core %d\n", m, core_id);
// 2. Now bind to that core
if(!bind_to_cpu(core_id)) {
return false;
}
}
return true;
}
int32_t get_core_type() {
uint32_t eax = 0x0000001A;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
eax = 0x0000001A;
cpuid(&eax, &ebx, &ecx, &edx);
int32_t type = eax >> 24 & 0xFF;
if(type == 0x20) return CORE_TYPE_EFFICIENCY;
else if(type == 0x40) return CORE_TYPE_PERFORMANCE;
else {
printErr("Found invalid core type: 0x%.8X\n", type);
return CORE_TYPE_UNKNOWN;
}
}
struct cpuInfo* get_cpu_info() {
struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
cpu->peak_performance = -1;
cpu->next_cpu = NULL;
cpu->topo = NULL;
cpu->cach = NULL;
cpu->feat = NULL;
uint32_t modules = 1;
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
//Get max cpuid level
cpuid(&eax, &ebx, &ecx, &edx);
cpu->maxLevels = eax;
//Fill vendor
char name[13];
memset(name,0,13);
get_name_cpuid(name, ebx, edx, ecx);
if(strcmp(CPU_VENDOR_INTEL_STRING,name) == 0)
cpu->cpu_vendor = CPU_VENDOR_INTEL;
else if (strcmp(CPU_VENDOR_AMD_STRING,name) == 0)
cpu->cpu_vendor = CPU_VENDOR_AMD;
else {
cpu->cpu_vendor = CPU_VENDOR_INVALID;
printErr("Unknown CPU vendor: %s", name);
return NULL;
}
//Get max extended level
eax = 0x80000000;
ebx = 0;
ecx = 0;
edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->maxExtendedLevels = eax;
if (cpu->maxExtendedLevels >= 0x80000004){ if (cpu->maxExtendedLevels >= 0x80000004){
cpu->cpu_name = get_str_cpu_name_internal(); cpu->cpu_name = get_str_cpu_name_internal();
} }
@@ -389,19 +477,66 @@ struct cpuInfo* get_cpu_info() {
cpu->topology_extensions = (ecx >> 22) & 1; cpu->topology_extensions = (ecx >> 22) & 1;
} }
cpu->hybrid_flag = false;
if(cpu->cpu_vendor == CPU_VENDOR_INTEL && cpu->maxLevels >= 0x00000007) {
eax = 0x00000007;
ecx = 0x00000000;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->hybrid_flag = (edx >> 15) & 0x1;
}
if(cpu->hybrid_flag) modules = 2;
struct cpuInfo* ptr = cpu;
for(uint32_t i=0; i < modules; i++) {
int32_t first_core;
set_cpu_module(i, modules, &first_core);
if(i > 0) {
ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
ptr = ptr->next_cpu;
ptr->next_cpu = NULL;
ptr->peak_performance = -1;
ptr->topo = NULL;
ptr->cach = NULL;
ptr->feat = NULL;
// We assume that this cores have the
// same cpuid capabilities
ptr->cpu_vendor = cpu->cpu_vendor;
ptr->maxLevels = cpu->maxLevels;
ptr->maxExtendedLevels = cpu->maxExtendedLevels;
ptr->hybrid_flag = cpu->hybrid_flag;
}
if(cpu->hybrid_flag) {
// Detect core type
eax = 0x0000001A;
cpuid(&eax, &ebx, &ecx, &edx);
ptr->core_type = get_core_type();
}
ptr->first_core_id = first_core;
ptr->feat = get_features_info(ptr);
// If any field of the struct is NULL, // If any field of the struct is NULL,
// return inmideately, as further functions // return inmideately, as further functions
// require valid fields (cach, topo, etc) // require valid fields (cach, topo, etc)
cpu->arch = get_cpu_uarch(cpu); ptr->arch = get_cpu_uarch(ptr);
cpu->freq = get_frequency_info(cpu); ptr->freq = get_frequency_info(ptr);
cpu->cach = get_cache_info(cpu); ptr->cach = get_cache_info(ptr);
if(cpu->cach == NULL) return cpu; if(ptr->cach == NULL) return cpu;
cpu->topo = get_topology_info(cpu, cpu->cach); if(cpu->hybrid_flag) {
ptr->topo = get_topology_info(ptr, ptr->cach, i);
}
else {
ptr->topo = get_topology_info(ptr, ptr->cach, -1);
}
if(cpu->topo == NULL) return cpu; if(cpu->topo == NULL) return cpu;
}
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp()); cpu->num_cpus = modules;
cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
return cpu; return cpu;
} }
@@ -492,7 +627,7 @@ void get_topology_from_udev(struct topology* topo) {
// Main reference: https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html // Main reference: https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html
// Very interesting resource: https://wiki.osdev.org/Detecting_CPU_Topology_(80x86) // Very interesting resource: https://wiki.osdev.org/Detecting_CPU_Topology_(80x86)
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) { struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module) {
struct topology* topo = emalloc(sizeof(struct topology)); struct topology* topo = emalloc(sizeof(struct topology));
init_topology_struct(topo, cach); init_topology_struct(topo, cach);
@@ -516,6 +651,13 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
} }
#endif #endif
if(cpu->hybrid_flag) {
topo->total_cores_module = get_total_cores_module(topo->total_cores, module);
}
else {
topo->total_cores_module = topo->total_cores;
}
switch(cpu->cpu_vendor) { switch(cpu->cpu_vendor) {
case CPU_VENDOR_INTEL: case CPU_VENDOR_INTEL:
if (cpu->maxLevels >= 0x00000004) { if (cpu->maxLevels >= 0x00000004) {
@@ -919,6 +1061,9 @@ void print_debug(struct cpuInfo* cpu) {
if(cpu->cpu_vendor == CPU_VENDOR_AMD) { if(cpu->cpu_vendor == CPU_VENDOR_AMD) {
printf("- AMD topology extensions: %d\n", cpu->topology_extensions); printf("- AMD topology extensions: %d\n", cpu->topology_extensions);
} }
if(cpu->cpu_vendor == CPU_VENDOR_INTEL) {
printf("- Hybrid Flag: %d\n", cpu->hybrid_flag);
}
printf("- CPUID dump: 0x%.8X\n", eax); printf("- CPUID dump: 0x%.8X\n", eax);
free_cpuinfo_struct(cpu); free_cpuinfo_struct(cpu);

View File

@@ -6,7 +6,7 @@
struct cpuInfo* get_cpu_info(); struct cpuInfo* get_cpu_info();
struct cache* get_cache_info(struct cpuInfo* cpu); struct cache* get_cache_info(struct cpuInfo* cpu);
struct frequency* get_frequency_info(struct cpuInfo* cpu); struct frequency* get_frequency_info(struct cpuInfo* cpu);
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach); struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module);
char* get_str_avx(struct cpuInfo* cpu); char* get_str_avx(struct cpuInfo* cpu);
char* get_str_sse(struct cpuInfo* cpu); char* get_str_sse(struct cpuInfo* cpu);

View File

@@ -79,7 +79,7 @@ enum {
UARCH_GOLDMONT_PLUS, UARCH_GOLDMONT_PLUS,
UARCH_TREMONT, UARCH_TREMONT,
UARCH_LAKEMONT, UARCH_LAKEMONT,
UARCH_COFFE_LAKE, UARCH_COFFEE_LAKE,
UARCH_ITANIUM, UARCH_ITANIUM,
UARCH_KNIGHTS_FERRY, UARCH_KNIGHTS_FERRY,
UARCH_KNIGHTS_CORNER, UARCH_KNIGHTS_CORNER,
@@ -109,7 +109,9 @@ enum {
UARCH_ZEN, UARCH_ZEN,
UARCH_ZEN_PLUS, UARCH_ZEN_PLUS,
UARCH_ZEN2, UARCH_ZEN2,
UARCH_ZEN3 UARCH_ZEN3,
UARCH_ZEN3_PLUS,
UARCH_ZEN4
}; };
struct uarch { struct uarch {
@@ -225,7 +227,7 @@ struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, u
CHECK_UARCH(arch, 0, 6, 8, 12, NA, "Tiger Lake", UARCH_TIGER_LAKE, 10) // instlatx64 CHECK_UARCH(arch, 0, 6, 8, 12, NA, "Tiger Lake", UARCH_TIGER_LAKE, 10) // instlatx64
CHECK_UARCH(arch, 0, 6, 8, 13, NA, "Tiger Lake", UARCH_TIGER_LAKE, 10) // instlatx64 CHECK_UARCH(arch, 0, 6, 8, 13, NA, "Tiger Lake", UARCH_TIGER_LAKE, 10) // instlatx64
// CHECK_UARCH(arch, 0, 6, 8, 14, 9, ...) It is not possible to determine uarch only from CPUID dump (can be Kaby Lake or Amber Lake) // CHECK_UARCH(arch, 0, 6, 8, 14, 9, ...) It is not possible to determine uarch only from CPUID dump (can be Kaby Lake or Amber Lake)
CHECK_UARCH(arch, 0, 6, 8, 14, 10, "Kaby Lake", UARCH_KABY_LAKE, 14) // wikichip CHECK_UARCH(arch, 0, 6, 8, 14, 10, "Coffee Lake", UARCH_COFFEE_LAKE, 14) // wikichip
CHECK_UARCH(arch, 0, 6, 8, 14, 11, "Whiskey Lake", UARCH_WHISKEY_LAKE, 14) // wikichip CHECK_UARCH(arch, 0, 6, 8, 14, 11, "Whiskey Lake", UARCH_WHISKEY_LAKE, 14) // wikichip
CHECK_UARCH(arch, 0, 6, 8, 14, 12, "Comet Lake", UARCH_COMET_LAKE, 14) // wikichip CHECK_UARCH(arch, 0, 6, 8, 14, 12, "Comet Lake", UARCH_COMET_LAKE, 14) // wikichip
CHECK_UARCH(arch, 0, 6, 9, 6, NA, "Tremont", UARCH_TREMONT, 10) // LX* CHECK_UARCH(arch, 0, 6, 9, 6, NA, "Tremont", UARCH_TREMONT, 10) // LX*
@@ -234,10 +236,10 @@ struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, u
CHECK_UARCH(arch, 0, 6, 9, 12, NA, "Tremont", UARCH_TREMONT, 10) // LX* CHECK_UARCH(arch, 0, 6, 9, 12, NA, "Tremont", UARCH_TREMONT, 10) // LX*
CHECK_UARCH(arch, 0, 6, 9, 13, NA, "Sunny Cove", UARCH_SUNNY_COVE, 10) // LX* CHECK_UARCH(arch, 0, 6, 9, 13, NA, "Sunny Cove", UARCH_SUNNY_COVE, 10) // LX*
CHECK_UARCH(arch, 0, 6, 9, 14, 9, "Kaby Lake", UARCH_KABY_LAKE, 14) CHECK_UARCH(arch, 0, 6, 9, 14, 9, "Kaby Lake", UARCH_KABY_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 9, 14, 10, "Coffee Lake", UARCH_COFFE_LAKE, 14) CHECK_UARCH(arch, 0, 6, 9, 14, 10, "Coffee Lake", UARCH_COFFEE_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 9, 14, 11, "Coffee Lake", UARCH_COFFE_LAKE, 14) CHECK_UARCH(arch, 0, 6, 9, 14, 11, "Coffee Lake", UARCH_COFFEE_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 9, 14, 12, "Coffee Lake", UARCH_COFFE_LAKE, 14) CHECK_UARCH(arch, 0, 6, 9, 14, 12, "Coffee Lake", UARCH_COFFEE_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 9, 14, 13, "Coffee Lake", UARCH_COFFE_LAKE, 14) CHECK_UARCH(arch, 0, 6, 9, 14, 13, "Coffee Lake", UARCH_COFFEE_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 10, 5, NA, "Comet Lake", UARCH_COMET_LAKE, 14) // wikichip CHECK_UARCH(arch, 0, 6, 10, 5, NA, "Comet Lake", UARCH_COMET_LAKE, 14) // wikichip
CHECK_UARCH(arch, 0, 6, 10, 6, NA, "Comet Lake", UARCH_COMET_LAKE, 14) // instlatx64.atw.hu (i7-10710U) CHECK_UARCH(arch, 0, 6, 10, 6, NA, "Comet Lake", UARCH_COMET_LAKE, 14) // instlatx64.atw.hu (i7-10710U)
CHECK_UARCH(arch, 0, 6, 10, 7, NA, "Rocket Lake", UARCH_ROCKET_LAKE, 14) // instlatx64.atw.hu (i7-11700K) CHECK_UARCH(arch, 0, 6, 10, 7, NA, "Rocket Lake", UARCH_ROCKET_LAKE, 14) // instlatx64.atw.hu (i7-11700K)
@@ -257,7 +259,7 @@ struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, u
return arch; return arch;
} }
// iNApired in Todd Allen's decode_uarch_amd // Inspired in Todd Allen's decode_uarch_amd
struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) { struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) {
struct uarch* arch = emalloc(sizeof(struct uarch)); struct uarch* arch = emalloc(sizeof(struct uarch));
@@ -358,9 +360,12 @@ struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uin
CHECK_UARCH(arch, 8, 15, 6, 0, NA, "Zen 2", UARCH_ZEN2, 7) // undocumented, geekbench.com example CHECK_UARCH(arch, 8, 15, 6, 0, NA, "Zen 2", UARCH_ZEN2, 7) // undocumented, geekbench.com example
CHECK_UARCH(arch, 8, 15, 6, 8, NA, "Zen 2", UARCH_ZEN2, 7) // found on instlatx64 CHECK_UARCH(arch, 8, 15, 6, 8, NA, "Zen 2", UARCH_ZEN2, 7) // found on instlatx64
CHECK_UARCH(arch, 8, 15, 7, 1, NA, "Zen 2", UARCH_ZEN2, 7) // samples from Steven Noonan and instlatx64 CHECK_UARCH(arch, 8, 15, 7, 1, NA, "Zen 2", UARCH_ZEN2, 7) // samples from Steven Noonan and instlatx64
CHECK_UARCH(arch, 8, 15, 9, 0, 2, "Zen 2", UARCH_ZEN2, 7) // Steam Deck (instlatx64)
CHECK_UARCH(arch, 10, 15, 0, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64 CHECK_UARCH(arch, 10, 15, 0, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64
CHECK_UARCH(arch, 10, 15, 2, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64 CHECK_UARCH(arch, 10, 15, 2, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64
CHECK_UARCH(arch, 10, 15, 4, 4, NA, "Zen 3+", UARCH_ZEN3_PLUS, 6) // instlatx64 (they say it is Zen3...)
CHECK_UARCH(arch, 10, 15, 5, 0, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64 CHECK_UARCH(arch, 10, 15, 5, 0, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64
CHECK_UARCH(arch, 10, 15, 6, 1, 2, "Zen 4", UARCH_ZEN4, 5) // instlatx64
UARCH_END UARCH_END
return arch; return arch;
@@ -408,7 +413,7 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
case UARCH_ROCKET_LAKE: case UARCH_ROCKET_LAKE:
case UARCH_AMBER_LAKE: case UARCH_AMBER_LAKE:
case UARCH_WHISKEY_LAKE: case UARCH_WHISKEY_LAKE:
case UARCH_COFFE_LAKE: case UARCH_COFFEE_LAKE:
case UARCH_PALM_COVE: case UARCH_PALM_COVE:
case UARCH_KNIGHTS_LANDING: case UARCH_KNIGHTS_LANDING:
@@ -416,10 +421,13 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
case UARCH_ICE_LAKE: case UARCH_ICE_LAKE:
case UARCH_TIGER_LAKE: case UARCH_TIGER_LAKE:
case UARCH_ALDER_LAKE:
// AMD // AMD
case UARCH_ZEN2: case UARCH_ZEN2:
case UARCH_ZEN3: case UARCH_ZEN3:
case UARCH_ZEN3_PLUS:
case UARCH_ZEN4:
return 2; return 2;
default: default:
return 1; return 1;