mirror of
https://github.com/Dr-Noob/cpufetch.git
synced 2026-03-25 16:00:39 +01:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b2aa8194c6 | ||
|
|
d879b06d08 | ||
|
|
6cc18027db | ||
|
|
77510c260a | ||
|
|
1eb1a5246e | ||
|
|
cec91a1e4d | ||
|
|
ff5166ea2e | ||
|
|
051d48b7d1 | ||
|
|
e91eef3e65 | ||
|
|
de24d86cd6 | ||
|
|
4b1a087b64 | ||
|
|
7eb856ae84 | ||
|
|
65366abe04 | ||
|
|
190e5daace | ||
|
|
9f7204d43d | ||
|
|
87961144d2 | ||
|
|
f4565cb937 | ||
|
|
61a1ad8a2b | ||
|
|
a955451937 | ||
|
|
db21931118 | ||
|
|
71a9308bed | ||
|
|
b319b52952 |
2
Makefile
2
Makefile
@@ -28,7 +28,7 @@ ifneq ($(OS),Windows_NT)
|
||||
SRC_DIR=src/ppc/
|
||||
SOURCE += $(COMMON_SRC) $(SRC_DIR)ppc.c $(SRC_DIR)uarch.c $(SRC_DIR)udev.c
|
||||
HEADERS += $(COMMON_HDR) $(SRC_DIR)ppc.h $(SRC_DIR)uarch.h $(SRC_DIR)udev.c
|
||||
CFLAGS += -DARCH_PPC -std=gnu99 -fstack-protector-all
|
||||
CFLAGS += -DARCH_PPC -std=gnu99 -fstack-protector-all -Wno-language-extension-token
|
||||
else ifeq ($(arch), $(filter $(arch), arm aarch64_be aarch64 arm64 armv8b armv8l armv7l armv6l))
|
||||
SRC_DIR=src/arm/
|
||||
SOURCE += $(COMMON_SRC) $(SRC_DIR)midr.c $(SRC_DIR)uarch.c $(SRC_DIR)soc.c $(SRC_DIR)udev.c
|
||||
|
||||
@@ -81,12 +81,23 @@ int64_t get_peak_performance(struct cpuInfo* cpu) {
|
||||
}
|
||||
|
||||
int64_t flops = 0;
|
||||
|
||||
ptr = cpu;
|
||||
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
|
||||
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
|
||||
|
||||
if(cpu->soc->soc_vendor == SOC_VENDOR_APPLE) {
|
||||
// Special case for M1/M2
|
||||
// First we find the E cores, then the P
|
||||
// M1 have 2 (E cores) or 4 (P cores) FMA units
|
||||
// Source: https://dougallj.github.io/applecpu/firestorm-simd.html
|
||||
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 2;
|
||||
ptr = ptr->next_cpu;
|
||||
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 4;
|
||||
}
|
||||
else {
|
||||
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
|
||||
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
|
||||
}
|
||||
if(cpu->feat->NEON) flops = flops * 4;
|
||||
}
|
||||
if(cpu->feat->NEON) flops = flops * 4;
|
||||
|
||||
return flops;
|
||||
}
|
||||
|
||||
@@ -111,11 +111,11 @@ bool match_broadcom(char* soc_name, struct system_on_chip* soc) {
|
||||
bool match_hisilicon(char* soc_name, struct system_on_chip* soc) {
|
||||
char* tmp;
|
||||
|
||||
if((tmp = strstr(soc_name, "Hi")) == NULL)
|
||||
if((tmp = strstr(soc_name, "hi")) == NULL)
|
||||
return false;
|
||||
|
||||
SOC_START
|
||||
SOC_EQ(tmp, "Hi3620GFC", "K3V2", SOC_HISILICON_3620, soc, 40)
|
||||
SOC_EQ(tmp, "hi3620GFC", "K3V2", SOC_HISILICON_3620, soc, 40)
|
||||
//SOC_EQ(tmp, "?", "K3V2E", SOC_KIRIN, soc, ?)
|
||||
//SOC_EQ(tmp, "?", "620", SOC_KIRIN, soc, 28)
|
||||
//SOC_EQ(tmp, "?", "650", SOC_KIRIN, soc, 16)
|
||||
@@ -131,18 +131,18 @@ bool match_hisilicon(char* soc_name, struct system_on_chip* soc) {
|
||||
//SOC_EQ(tmp, "?", "9000E", SOC_KIRIN, soc, 5)
|
||||
//SOC_EQ(tmp, "?", "910", SOC_KIRIN, soc, 28)
|
||||
//SOC_EQ(tmp, "?", "910T", SOC_KIRIN, soc, 28)
|
||||
SOC_EQ(tmp, "Hi3630", "920", SOC_HISILICON_3630, soc, 28)
|
||||
SOC_EQ(tmp, "hi3630", "920", SOC_HISILICON_3630, soc, 28)
|
||||
//SOC_EQ(tmp, "?", "925", SOC_KIRIN, soc, 28)
|
||||
//SOC_EQ(tmp, "?", "930", SOC_KIRIN, soc, ?)
|
||||
//SOC_EQ(tmp, "?", "935", SOC_KIRIN, soc, ?)
|
||||
SOC_EQ(tmp, "Hi3650", "950", SOC_HISILICON_3650, soc, 16)
|
||||
SOC_EQ(tmp, "hi3650", "950", SOC_HISILICON_3650, soc, 16)
|
||||
//SOC_EQ(tmp, "?", "955", SOC_KIRIN, soc, ?)
|
||||
SOC_EQ(tmp, "Hi3660", "960", SOC_HISILICON_3660, soc, 16)
|
||||
SOC_EQ(tmp, "hi3660", "960", SOC_HISILICON_3660, soc, 16)
|
||||
//SOC_EQ(tmp, "?", "960S", SOC_KIRIN, soc, 16)
|
||||
SOC_EQ(tmp, "Hi3670", "970", SOC_HISILICON_3670, soc, 10)
|
||||
SOC_EQ(tmp, "Hi3680", "980", SOC_HISILICON_3680, soc, 7)
|
||||
SOC_EQ(tmp, "hi3670", "970", SOC_HISILICON_3670, soc, 10)
|
||||
SOC_EQ(tmp, "hi3680", "980", SOC_HISILICON_3680, soc, 7)
|
||||
//SOC_EQ(tmp, "?", "985", SOC_KIRIN, soc, 7)
|
||||
SOC_EQ(tmp, "Hi3690", "990", SOC_HISILICON_3690, soc, 7)
|
||||
SOC_EQ(tmp, "hi3690", "990", SOC_HISILICON_3690, soc, 7)
|
||||
SOC_END
|
||||
}
|
||||
|
||||
|
||||
@@ -35,6 +35,12 @@ enum {
|
||||
HV_VENDOR_INVALID
|
||||
};
|
||||
|
||||
enum {
|
||||
CORE_TYPE_EFFICIENCY,
|
||||
CORE_TYPE_PERFORMANCE,
|
||||
CORE_TYPE_UNKNOWN
|
||||
};
|
||||
|
||||
#define UNKNOWN_DATA -1
|
||||
#define CPU_NAME_MAX_LENGTH 64
|
||||
|
||||
@@ -78,6 +84,7 @@ struct topology {
|
||||
uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled)
|
||||
#ifdef ARCH_X86
|
||||
uint32_t smt_available; // Number of SMT that is currently enabled
|
||||
int32_t total_cores_module; // Total cores in the current module (only makes sense in hybrid archs, like ADL)
|
||||
struct apic* apic;
|
||||
#endif
|
||||
#endif
|
||||
@@ -131,6 +138,10 @@ struct cpuInfo {
|
||||
uint32_t maxExtendedLevels;
|
||||
// Topology Extensions (AMD only)
|
||||
bool topology_extensions;
|
||||
// Hybrid Flag (Intel only)
|
||||
bool hybrid_flag;
|
||||
// Core Type (P/E)
|
||||
uint32_t core_type;
|
||||
#elif ARCH_PPC
|
||||
uint32_t pvr;
|
||||
#elif ARCH_ARM
|
||||
@@ -140,11 +151,18 @@ struct cpuInfo {
|
||||
|
||||
#ifdef ARCH_ARM
|
||||
struct system_on_chip* soc;
|
||||
#endif
|
||||
|
||||
#if defined(ARCH_X86) || defined(ARCH_ARM)
|
||||
// If SoC contains more than one CPU and they
|
||||
// are different, the others will be stored in
|
||||
// the next_cpu field
|
||||
struct cpuInfo* next_cpu;
|
||||
uint8_t num_cpus;
|
||||
#ifdef ARCH_X86
|
||||
// The index of the first core in the module
|
||||
uint32_t first_core_id;
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@@ -44,6 +44,8 @@ enum {
|
||||
ATTRIBUTE_NAME,
|
||||
#elif ARCH_ARM
|
||||
ATTRIBUTE_SOC,
|
||||
#endif
|
||||
#if defined(ARCH_X86) || defined(ARCH_ARM)
|
||||
ATTRIBUTE_CPU_NUM,
|
||||
#endif
|
||||
ATTRIBUTE_HYPERVISOR,
|
||||
@@ -75,6 +77,8 @@ static const char* ATTRIBUTE_FIELDS [] = {
|
||||
"Part Number:",
|
||||
#elif ARCH_ARM
|
||||
"SoC:",
|
||||
#endif
|
||||
#if defined(ARCH_X86) || defined(ARCH_ARM)
|
||||
"",
|
||||
#endif
|
||||
"Hypervisor:",
|
||||
@@ -106,6 +110,8 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
|
||||
"P/N:",
|
||||
#elif ARCH_ARM
|
||||
"SoC:",
|
||||
#endif
|
||||
#if defined(ARCH_X86) || defined(ARCH_ARM)
|
||||
"",
|
||||
#endif
|
||||
"Hypervisor:",
|
||||
@@ -424,11 +430,12 @@ uint32_t longest_field_length(struct ascii* art, int la) {
|
||||
}
|
||||
|
||||
#if defined(ARCH_X86) || defined(ARCH_PPC)
|
||||
void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const char** attribute_fields) {
|
||||
void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const char** attribute_fields, bool hybrid_architecture) {
|
||||
struct ascii_logo* logo = art->art;
|
||||
int attr_to_print = 0;
|
||||
int attr_type;
|
||||
char* attr_value;
|
||||
int32_t beg_space;
|
||||
int32_t space_right;
|
||||
int32_t space_up = ((int)logo->height - (int)art->n_attributes_set)/2;
|
||||
int32_t space_down = (int)logo->height - (int)art->n_attributes_set - (int)space_up;
|
||||
@@ -439,6 +446,7 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const ch
|
||||
lbuf->buf = emalloc(sizeof(char) * LINE_BUFFER_SIZE);
|
||||
lbuf->pos = 0;
|
||||
lbuf->chars = 0;
|
||||
bool add_space = false;
|
||||
|
||||
printf("\n");
|
||||
for(int32_t n=0; n < iters; n++) {
|
||||
@@ -473,9 +481,24 @@ void print_ascii_generic(struct ascii* art, uint32_t la, int32_t termw, const ch
|
||||
attr_value = art->attributes[attr_to_print]->value;
|
||||
attr_to_print++;
|
||||
|
||||
space_right = 1 + (la - strlen(attribute_fields[attr_type]));
|
||||
printOut(lbuf, strlen(attribute_fields[attr_type]) + space_right + strlen(attr_value),
|
||||
"%s%s%s%*s%s%s%s", logo->color_text[0], attribute_fields[attr_type], art->reset, space_right, "", logo->color_text[1], attr_value, art->reset);
|
||||
if(attr_type == ATTRIBUTE_L3) {
|
||||
add_space = false;
|
||||
}
|
||||
if(attr_type == ATTRIBUTE_CPU_NUM) {
|
||||
printOut(lbuf, strlen(attr_value), "%s%s%s", logo->color_text[0], attr_value, art->reset);
|
||||
add_space = true;
|
||||
}
|
||||
else {
|
||||
beg_space = 0;
|
||||
space_right = 2 + 1 + (la - strlen(attribute_fields[attr_type]));
|
||||
if(hybrid_architecture && add_space) {
|
||||
beg_space = 2;
|
||||
space_right -= 2;
|
||||
}
|
||||
|
||||
printOut(lbuf, beg_space + strlen(attribute_fields[attr_type]) + space_right + strlen(attr_value),
|
||||
"%*s%s%s%s%*s%s%s%s", beg_space, "", logo->color_text[0], attribute_fields[attr_type], art->reset, space_right, "", logo->color_text[1], attr_value, art->reset);
|
||||
}
|
||||
}
|
||||
printOutLine(lbuf, art, termw);
|
||||
printf("\n");
|
||||
@@ -501,57 +524,71 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
|
||||
|
||||
art->new_intel_logo = choose_new_intel_logo(cpu);
|
||||
|
||||
// Step 1. Retrieve attributes (if some structures are NULL, like topo
|
||||
// or cache, do not try to retrieve them)
|
||||
uint32_t socket_num = 1;
|
||||
char* l1i, *l1d, *l2, *l3, *n_cores, *n_cores_dual, *sockets;
|
||||
l1i = l1d = l2 = l3 = n_cores = n_cores_dual = sockets = NULL;
|
||||
|
||||
char* uarch = get_str_uarch(cpu);
|
||||
char* manufacturing_process = get_str_process(cpu);
|
||||
char* max_frequency = get_str_freq(cpu->freq);
|
||||
char* cpu_name = get_str_cpu_name(cpu, fcpuname);
|
||||
char* avx = get_str_avx(cpu);
|
||||
char* fma = get_str_fma(cpu);
|
||||
char* uarch = get_str_uarch(cpu);
|
||||
char* pp = get_str_peak_performance(cpu->peak_performance);
|
||||
|
||||
if(cpu->topo != NULL) {
|
||||
sockets = get_str_sockets(cpu->topo);
|
||||
n_cores = get_str_topology(cpu, cpu->topo, false);
|
||||
n_cores_dual = get_str_topology(cpu, cpu->topo, true);
|
||||
}
|
||||
char* manufacturing_process = get_str_process(cpu);
|
||||
bool hybrid_architecture = cpu->next_cpu != NULL;
|
||||
|
||||
if(cpu->cach != NULL) {
|
||||
l1i = get_str_l1i(cpu->cach);
|
||||
l1d = get_str_l1d(cpu->cach);
|
||||
l2 = get_str_l2(cpu->cach);
|
||||
l3 = get_str_l3(cpu->cach);
|
||||
}
|
||||
|
||||
// Step 2. Set attributes
|
||||
setAttribute(art, ATTRIBUTE_NAME, cpu_name);
|
||||
if(cpu->hv->present) {
|
||||
setAttribute(art, ATTRIBUTE_HYPERVISOR, cpu->hv->hv_name);
|
||||
}
|
||||
setAttribute(art, ATTRIBUTE_UARCH, uarch);
|
||||
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
|
||||
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
|
||||
if(cpu->topo != NULL) {
|
||||
socket_num = get_nsockets(cpu->topo);
|
||||
if (socket_num > 1) {
|
||||
setAttribute(art, ATTRIBUTE_SOCKETS, sockets);
|
||||
setAttribute(art, ATTRIBUTE_NCORES, n_cores);
|
||||
setAttribute(art, ATTRIBUTE_NCORES_DUAL, n_cores_dual);
|
||||
|
||||
struct cpuInfo* ptr = cpu;
|
||||
for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
|
||||
char* max_frequency = get_str_freq(ptr->freq);
|
||||
char* avx = get_str_avx(ptr);
|
||||
char* fma = get_str_fma(ptr);
|
||||
char* cpu_num = emalloc(sizeof(char) * 9);
|
||||
|
||||
if(ptr->topo != NULL) {
|
||||
sockets = get_str_sockets(ptr->topo);
|
||||
n_cores = get_str_topology(ptr, ptr->topo, false);
|
||||
n_cores_dual = get_str_topology(ptr, ptr->topo, true);
|
||||
}
|
||||
else {
|
||||
setAttribute(art, ATTRIBUTE_NCORES, n_cores);
|
||||
|
||||
if(ptr->cach != NULL) {
|
||||
l1i = get_str_l1i(ptr->cach);
|
||||
l1d = get_str_l1d(ptr->cach);
|
||||
l2 = get_str_l2(ptr->cach);
|
||||
}
|
||||
|
||||
if(hybrid_architecture) {
|
||||
if(ptr->core_type == CORE_TYPE_EFFICIENCY) sprintf(cpu_num, "E-cores:");
|
||||
else if(ptr->core_type == CORE_TYPE_PERFORMANCE) sprintf(cpu_num, "P-cores:");
|
||||
else printBug("Found invalid core type!\n");
|
||||
|
||||
setAttribute(art, ATTRIBUTE_CPU_NUM, cpu_num);
|
||||
}
|
||||
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
|
||||
if(ptr->topo != NULL) {
|
||||
socket_num = get_nsockets(ptr->topo);
|
||||
if (socket_num > 1) {
|
||||
setAttribute(art, ATTRIBUTE_SOCKETS, sockets);
|
||||
setAttribute(art, ATTRIBUTE_NCORES, n_cores);
|
||||
setAttribute(art, ATTRIBUTE_NCORES_DUAL, n_cores_dual);
|
||||
}
|
||||
else {
|
||||
setAttribute(art, ATTRIBUTE_NCORES, n_cores);
|
||||
}
|
||||
}
|
||||
setAttribute(art, ATTRIBUTE_AVX, avx);
|
||||
setAttribute(art, ATTRIBUTE_FMA, fma);
|
||||
if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
|
||||
if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
|
||||
if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
|
||||
}
|
||||
setAttribute(art, ATTRIBUTE_AVX, avx);
|
||||
setAttribute(art, ATTRIBUTE_FMA, fma);
|
||||
if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
|
||||
if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
|
||||
if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
|
||||
if(l3 != NULL) setAttribute(art, ATTRIBUTE_L3, l3);
|
||||
setAttribute(art, ATTRIBUTE_PEAK, pp);
|
||||
|
||||
@@ -568,15 +605,12 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
|
||||
longest_attribute = longest_attribute_length(art, attribute_fields);
|
||||
}
|
||||
|
||||
print_ascii_generic(art, longest_attribute, term->w, attribute_fields);
|
||||
print_ascii_generic(art, longest_attribute, term->w, attribute_fields, hybrid_architecture);
|
||||
|
||||
free(manufacturing_process);
|
||||
free(max_frequency);
|
||||
free(sockets);
|
||||
free(n_cores);
|
||||
free(n_cores_dual);
|
||||
free(avx);
|
||||
free(fma);
|
||||
free(l1i);
|
||||
free(l1d);
|
||||
free(l2);
|
||||
|
||||
@@ -165,42 +165,85 @@ long get_l3_cache_size(uint32_t core) {
|
||||
return get_cache_size_from_file(path);
|
||||
}
|
||||
|
||||
void add_shared_map(uint32_t** src, int src_idx, uint32_t** dst, int dst_idx, int n) {
|
||||
for(int j=0; j < n; j++) {
|
||||
dst[dst_idx][j] = src[src_idx][j];
|
||||
}
|
||||
}
|
||||
|
||||
bool maps_equal(uint32_t* map1, uint32_t* map2, int n) {
|
||||
for(int i=0; i < n; i++) {
|
||||
if(map1[i] != map2[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int get_num_caches_from_files(char** paths, int num_paths) {
|
||||
int filelen;
|
||||
char* buf;
|
||||
uint32_t* shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
|
||||
char* tmpbuf;
|
||||
|
||||
// 1. Read cpu_shared_map from every core
|
||||
// 1. Count the number of bitmasks per file
|
||||
if((buf = read_file(paths[0], &filelen)) == NULL) {
|
||||
printWarn("Could not open '%s'", paths[0]);
|
||||
return -1;
|
||||
}
|
||||
int num_bitmasks = 1;
|
||||
for(int i=0; buf[i]; i++) {
|
||||
num_bitmasks += (buf[i] == ',');
|
||||
}
|
||||
|
||||
// 2. Read cpu_shared_map from every core
|
||||
uint32_t** shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
|
||||
for(int i=0; i < num_paths; i++) {
|
||||
shared_maps[i] = emalloc(sizeof(uint32_t) * num_bitmasks);
|
||||
|
||||
if((buf = read_file(paths[i], &filelen)) == NULL) {
|
||||
printWarn("Could not open '%s'", paths[i]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
char* end;
|
||||
errno = 0;
|
||||
long ret = strtol(buf, &end, 16);
|
||||
if(errno != 0) {
|
||||
printBug("strtol: %s", strerror(errno));
|
||||
free(buf);
|
||||
return -1;
|
||||
}
|
||||
for(int j=0; j < num_bitmasks; j++) {
|
||||
char* end;
|
||||
tmpbuf = emalloc(sizeof(char) * (strlen(buf) + 1));
|
||||
char* commaend = strstr(buf, ",");
|
||||
if(commaend == NULL) {
|
||||
strcpy(tmpbuf, buf);
|
||||
}
|
||||
else {
|
||||
strncpy(tmpbuf, buf, commaend-buf);
|
||||
}
|
||||
errno = 0;
|
||||
long ret = strtol(tmpbuf, &end, 16);
|
||||
if(errno != 0) {
|
||||
printf("strtol: %s", strerror(errno));
|
||||
free(buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
shared_maps[i] = (uint32_t) ret;
|
||||
shared_maps[i][j] = (uint32_t) ret;
|
||||
buf = commaend + 1;
|
||||
free(tmpbuf);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Count number of different masks; this is the number of caches
|
||||
int num_caches = 0;
|
||||
bool found = false;
|
||||
uint32_t* unique_shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
|
||||
for(int i=0; i < num_paths; i++) unique_shared_maps[i] = 0;
|
||||
uint32_t** unique_shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
|
||||
for(int i=0; i < num_paths; i++) {
|
||||
unique_shared_maps[i] = emalloc(sizeof(uint32_t) * num_bitmasks);
|
||||
for(int j=0; j < num_bitmasks; j++) {
|
||||
unique_shared_maps[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for(int i=0; i < num_paths; i++) {
|
||||
for(int j=0; j < num_paths && !found; j++) {
|
||||
if(shared_maps[i] == unique_shared_maps[j]) found = true;
|
||||
if(maps_equal(shared_maps[i], unique_shared_maps[j], num_bitmasks)) found = true;
|
||||
}
|
||||
if(!found) {
|
||||
unique_shared_maps[num_caches] = shared_maps[i];
|
||||
add_shared_map(shared_maps, i, unique_shared_maps, num_caches, num_bitmasks);
|
||||
num_caches++;
|
||||
}
|
||||
found = false;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#ifndef __POWERPC__
|
||||
#define __POWERPC__
|
||||
#ifndef __CPUFETCH_POWERPC__
|
||||
#define __CPUFETCH_POWERPC__
|
||||
|
||||
#include "../common/cpu.h"
|
||||
|
||||
|
||||
102
src/x86/apic.c
102
src/x86/apic.c
@@ -102,6 +102,59 @@ bool bind_to_cpu(int cpu_id) {
|
||||
}
|
||||
#endif
|
||||
|
||||
int get_total_cores_module(int total_cores, int module) {
|
||||
int total_modules = 2;
|
||||
int32_t current_module_idx = -1;
|
||||
bool end = false;
|
||||
int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules);
|
||||
for(int i=0; i < total_modules; i++) core_types[i] = -1;
|
||||
int cores_in_module = 0;
|
||||
int i = 0;
|
||||
|
||||
// Get the original mask to restore it later
|
||||
cpu_set_t original_mask;
|
||||
if(sched_getaffinity(0, sizeof(original_mask), &original_mask) == -1) {
|
||||
printWarn("sched_getaffinity: %s", strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
while(!end) {
|
||||
if(!bind_to_cpu(i)) {
|
||||
return -1;
|
||||
}
|
||||
uint32_t eax = 0x0000001A;
|
||||
uint32_t ebx = 0;
|
||||
uint32_t ecx = 0;
|
||||
uint32_t edx = 0;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
int32_t core_type = eax >> 24 & 0xFF;
|
||||
bool found = false;
|
||||
|
||||
for(int j=0; j < total_modules && !found; j++) {
|
||||
if(core_types[j] == core_type) found = true;
|
||||
}
|
||||
if(!found) {
|
||||
current_module_idx++;
|
||||
core_types[current_module_idx] = core_type;
|
||||
}
|
||||
if(current_module_idx == module) {
|
||||
cores_in_module++;
|
||||
if(i+1 == total_cores) end = true;
|
||||
}
|
||||
else if(cores_in_module > 0) end = true;
|
||||
i++;
|
||||
}
|
||||
|
||||
// Reset the original affinity
|
||||
if (sched_setaffinity (0, sizeof(original_mask), &original_mask) == -1) {
|
||||
printWarn("sched_setaffinity: %s", strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
//printf("Module %d has %d cores\n", module, cores_in_module);
|
||||
return cores_in_module;
|
||||
}
|
||||
|
||||
bool fill_topo_masks_apic(struct topology* topo) {
|
||||
uint32_t eax = 0x00000001;
|
||||
uint32_t ebx = 0;
|
||||
@@ -197,14 +250,14 @@ uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) {
|
||||
uint32_t max = 0;
|
||||
|
||||
for(int i=0; i < topo->cach->max_cache_level; i++) {
|
||||
for(int j=0; j < topo->total_cores; j++) {
|
||||
for(int j=0; j < topo->total_cores_module; j++) {
|
||||
if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i];
|
||||
}
|
||||
}
|
||||
|
||||
max++;
|
||||
if(max > (uint32_t) topo->total_cores) return max;
|
||||
return topo->total_cores;
|
||||
if(max > (uint32_t) topo->total_cores_module) return max;
|
||||
return topo->total_cores_module;
|
||||
}
|
||||
|
||||
bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) {
|
||||
@@ -219,18 +272,18 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac
|
||||
memset(apic_id, 0, sizeof(uint32_t) * size);
|
||||
|
||||
// System topology
|
||||
for(int i=0; i < topo->total_cores; i++) {
|
||||
for(int i=0; i < topo->total_cores_module; i++) {
|
||||
sockets[apic_pkg[i]] = 1;
|
||||
smt[apic_smt[i]] = 1;
|
||||
}
|
||||
for(int i=0; i < topo->total_cores; i++) {
|
||||
for(int i=0; i < topo->total_cores_module; i++) {
|
||||
if(sockets[i] != 0)
|
||||
topo->sockets++;
|
||||
if(smt[i] != 0)
|
||||
topo->smt_available++;
|
||||
}
|
||||
|
||||
topo->logical_cores = topo->total_cores / topo->sockets;
|
||||
topo->logical_cores = topo->total_cores_module / topo->sockets;
|
||||
topo->physical_cores = topo->logical_cores / topo->smt_available;
|
||||
|
||||
// Cache topology
|
||||
@@ -238,7 +291,7 @@ bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cac
|
||||
num_caches = 0;
|
||||
memset(apic_id, 0, sizeof(uint32_t) * size);
|
||||
|
||||
for(int c=0; c < topo->total_cores; c++) {
|
||||
for(int c=0; c < topo->total_cores_module; c++) {
|
||||
apic_id[cache_id_apic[c][i]]++;
|
||||
}
|
||||
for(uint32_t c=0; c < size; c++) {
|
||||
@@ -297,7 +350,7 @@ void add_apic_to_array(uint32_t apic, uint32_t* apic_ids, int n) {
|
||||
}
|
||||
}
|
||||
|
||||
bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
|
||||
bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) {
|
||||
#ifdef __APPLE__
|
||||
// macOS extremely dirty approach...
|
||||
printf("cpufetch is computing APIC IDs, please wait...\n");
|
||||
@@ -322,12 +375,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
|
||||
}
|
||||
#endif
|
||||
|
||||
for(int i=0; i < n; i++) {
|
||||
for(int i=first_core; i < first_core+n; i++) {
|
||||
if(!bind_to_cpu(i)) {
|
||||
printErr("Failed binding the process to CPU %d", i);
|
||||
return false;
|
||||
}
|
||||
apic_ids[i] = get_apic_id(x2apic_id);
|
||||
apic_ids[i-first_core] = get_apic_id(x2apic_id);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
@@ -344,12 +397,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
|
||||
|
||||
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
|
||||
uint32_t apic_id;
|
||||
uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores);
|
||||
uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores);
|
||||
uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores);
|
||||
uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores);
|
||||
uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores);
|
||||
uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores);
|
||||
uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module);
|
||||
uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module);
|
||||
uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores_module);
|
||||
uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores_module);
|
||||
uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module);
|
||||
uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores_module);
|
||||
bool x2apic_id;
|
||||
|
||||
if(cpu->maxLevels >= 0x0000000B) {
|
||||
@@ -367,7 +420,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
|
||||
x2apic_id = false;
|
||||
}
|
||||
|
||||
for(int i=0; i < topo->total_cores; i++) {
|
||||
for(int i=0; i < topo->total_cores_module; i++) {
|
||||
cache_smt_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
|
||||
cache_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
|
||||
}
|
||||
@@ -385,10 +438,10 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
|
||||
|
||||
get_cache_topology_from_apic(topo);
|
||||
|
||||
if(!fill_apic_ids(apic_ids, topo->total_cores, x2apic_id))
|
||||
if(!fill_apic_ids(apic_ids, cpu->first_core_id, topo->total_cores_module, x2apic_id))
|
||||
return false;
|
||||
|
||||
for(int i=0; i < topo->total_cores; i++) {
|
||||
for(int i=0; i < topo->total_cores_module; i++) {
|
||||
apic_id = apic_ids[i];
|
||||
|
||||
apic_pkg[i] = (apic_id & topo->apic->pkg_mask) >> topo->apic->pkg_mask_shift;
|
||||
@@ -404,20 +457,19 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
|
||||
/* DEBUG
|
||||
for(int i=0; i < topo->cach->max_cache_level; i++) {
|
||||
printf("[CACH %1d]", i);
|
||||
for(int j=0; j < topo->total_cores; j++)
|
||||
for(int j=0; j < topo->total_cores_module; j++)
|
||||
printf("[%03d]", cache_id_apic[j][i]);
|
||||
printf("\n");
|
||||
}
|
||||
for(int i=0; i < topo->total_cores; i++)
|
||||
for(int i=0; i < topo->total_cores_module; i++)
|
||||
printf("[%2d] 0x%.8X\n", i, apic_pkg[i]);
|
||||
printf("\n");
|
||||
for(int i=0; i < topo->total_cores; i++)
|
||||
for(int i=0; i < topo->total_cores_module; i++)
|
||||
printf("[%2d] 0x%.8X\n", i, apic_core[i]);
|
||||
printf("\n");
|
||||
for(int i=0; i < topo->total_cores; i++)
|
||||
for(int i=0; i < topo->total_cores_module; i++)
|
||||
printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/
|
||||
|
||||
|
||||
bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
|
||||
|
||||
// Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
|
||||
@@ -429,7 +481,7 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
|
||||
free(apic_pkg);
|
||||
free(apic_core);
|
||||
free(apic_smt);
|
||||
for(int i=0; i < topo->total_cores; i++) {
|
||||
for(int i=0; i < topo->total_cores_module; i++) {
|
||||
free(cache_smt_id_apic[i]);
|
||||
free(cache_id_apic[i]);
|
||||
}
|
||||
|
||||
@@ -21,4 +21,6 @@ uint32_t is_smt_enabled_amd(struct topology* topo);
|
||||
bool bind_to_cpu(int cpu_id);
|
||||
#endif
|
||||
|
||||
int get_total_cores_module(int total_cores, int module);
|
||||
|
||||
#endif
|
||||
|
||||
317
src/x86/cpuid.c
317
src/x86/cpuid.c
@@ -179,7 +179,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
|
||||
return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping);
|
||||
}
|
||||
|
||||
int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t max_freq, bool accurate_pp) {
|
||||
int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
|
||||
/*
|
||||
* PP = PeakPerformance
|
||||
* SP = SinglePrecision
|
||||
@@ -192,46 +192,56 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
|
||||
* 16(If AVX512), 8(If AVX), 4(If SSE) *
|
||||
*/
|
||||
|
||||
int64_t freq;
|
||||
#ifdef __linux__
|
||||
if(accurate_pp)
|
||||
freq = measure_frequency(cpu);
|
||||
else
|
||||
freq = max_freq;
|
||||
#else
|
||||
// Silence compiler warning
|
||||
(void)(accurate_pp);
|
||||
freq = max_freq;
|
||||
#endif
|
||||
struct cpuInfo* ptr = cpu;
|
||||
int64_t total_flops = 0;
|
||||
|
||||
//First, check we have consistent data
|
||||
if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
|
||||
return -1;
|
||||
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
|
||||
struct topology* topo = ptr->topo;
|
||||
int64_t max_freq = get_freq(ptr->freq);
|
||||
|
||||
int64_t freq;
|
||||
#ifdef __linux__
|
||||
if(accurate_pp)
|
||||
freq = measure_frequency(ptr);
|
||||
else
|
||||
freq = max_freq;
|
||||
#else
|
||||
// Silence compiler warning
|
||||
(void)(accurate_pp);
|
||||
freq = max_freq;
|
||||
#endif
|
||||
|
||||
//First, check we have consistent data
|
||||
if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
struct features* feat = ptr->feat;
|
||||
int vpus = get_number_of_vpus(ptr);
|
||||
int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
|
||||
|
||||
if(feat->FMA3 || feat->FMA4)
|
||||
flops = flops*2;
|
||||
|
||||
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while
|
||||
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
|
||||
// the peak performance supposing AVX2, not AVX512
|
||||
if(feat->AVX512 && vpus_are_AVX512(ptr))
|
||||
flops = flops*16;
|
||||
else if(feat->AVX || feat->AVX2)
|
||||
flops = flops*8;
|
||||
else if(feat->SSE)
|
||||
flops = flops*4;
|
||||
|
||||
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
|
||||
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
|
||||
if(is_knights_landing(ptr))
|
||||
flops = flops * 6 / 7;
|
||||
|
||||
total_flops += flops;
|
||||
}
|
||||
|
||||
struct features* feat = cpu->feat;
|
||||
int vpus = get_number_of_vpus(cpu);
|
||||
int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
|
||||
|
||||
if(feat->FMA3 || feat->FMA4)
|
||||
flops = flops*2;
|
||||
|
||||
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while
|
||||
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
|
||||
// the peak performance supposing AVX2, not AVX512
|
||||
if(feat->AVX512 && vpus_are_AVX512(cpu))
|
||||
flops = flops*16;
|
||||
else if(feat->AVX || feat->AVX2)
|
||||
flops = flops*8;
|
||||
else if(feat->SSE)
|
||||
flops = flops*4;
|
||||
|
||||
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
|
||||
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
|
||||
if(is_knights_landing(cpu))
|
||||
flops = flops * 6 / 7;
|
||||
|
||||
return flops;
|
||||
return total_flops;
|
||||
}
|
||||
|
||||
struct hypervisor* get_hp_info(bool hv_present) {
|
||||
@@ -274,51 +284,19 @@ struct hypervisor* get_hp_info(bool hv_present) {
|
||||
return hv;
|
||||
}
|
||||
|
||||
struct cpuInfo* get_cpu_info() {
|
||||
struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
|
||||
struct features* feat = emalloc(sizeof(struct features));
|
||||
cpu->feat = feat;
|
||||
cpu->peak_performance = -1;
|
||||
cpu->topo = NULL;
|
||||
cpu->cach = NULL;
|
||||
|
||||
bool *ptr = &(feat->AES);
|
||||
for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
|
||||
*ptr = false;
|
||||
}
|
||||
|
||||
struct features* get_features_info(struct cpuInfo* cpu) {
|
||||
uint32_t eax = 0;
|
||||
uint32_t ebx = 0;
|
||||
uint32_t ecx = 0;
|
||||
uint32_t edx = 0;
|
||||
|
||||
//Get max cpuid level
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
cpu->maxLevels = eax;
|
||||
struct features* feat = emalloc(sizeof(struct features));
|
||||
|
||||
//Fill vendor
|
||||
char name[13];
|
||||
memset(name,0,13);
|
||||
get_name_cpuid(name, ebx, edx, ecx);
|
||||
|
||||
if(strcmp(CPU_VENDOR_INTEL_STRING,name) == 0)
|
||||
cpu->cpu_vendor = CPU_VENDOR_INTEL;
|
||||
else if (strcmp(CPU_VENDOR_AMD_STRING,name) == 0)
|
||||
cpu->cpu_vendor = CPU_VENDOR_AMD;
|
||||
else {
|
||||
cpu->cpu_vendor = CPU_VENDOR_INVALID;
|
||||
printErr("Unknown CPU vendor: %s", name);
|
||||
return NULL;
|
||||
bool *ptr = &(feat->AES);
|
||||
for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
|
||||
*ptr = false;
|
||||
}
|
||||
|
||||
//Get max extended level
|
||||
eax = 0x80000000;
|
||||
ebx = 0;
|
||||
ecx = 0;
|
||||
edx = 0;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
cpu->maxExtendedLevels = eax;
|
||||
|
||||
//Fill instructions support
|
||||
if (cpu->maxLevels >= 0x00000001){
|
||||
eax = 0x00000001;
|
||||
@@ -373,6 +351,116 @@ struct cpuInfo* get_cpu_info() {
|
||||
printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels);
|
||||
}
|
||||
|
||||
return feat;
|
||||
}
|
||||
|
||||
bool set_cpu_module(int m, int total_modules, int32_t* first_core) {
|
||||
if(total_modules > 1) {
|
||||
// We have a hybrid architecture.
|
||||
// 1. Find the first core from module m
|
||||
int32_t core_id = -1;
|
||||
int32_t currrent_module_idx = -1;
|
||||
int32_t* core_types = emalloc(sizeof(uint32_t) * total_modules);
|
||||
for(int i=0; i < total_modules; i++) core_types[i] = -1;
|
||||
int i = 0;
|
||||
|
||||
while(core_id == -1) {
|
||||
if(!bind_to_cpu(i)) {
|
||||
return false;
|
||||
}
|
||||
uint32_t eax = 0x0000001A;
|
||||
uint32_t ebx = 0;
|
||||
uint32_t ecx = 0;
|
||||
uint32_t edx = 0;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
int32_t core_type = eax >> 24 & 0xFF;
|
||||
bool found = false;
|
||||
|
||||
for(int j=0; j < total_modules && !found; j++) {
|
||||
if(core_types[j] == core_type) found = true;
|
||||
}
|
||||
if(!found) {
|
||||
currrent_module_idx++;
|
||||
core_types[currrent_module_idx] = core_type;
|
||||
if(currrent_module_idx == m) {
|
||||
core_id = i;
|
||||
}
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
*first_core = core_id;
|
||||
|
||||
//printf("Module %d: Core %d\n", m, core_id);
|
||||
// 2. Now bind to that core
|
||||
if(!bind_to_cpu(core_id)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int32_t get_core_type() {
|
||||
uint32_t eax = 0x0000001A;
|
||||
uint32_t ebx = 0;
|
||||
uint32_t ecx = 0;
|
||||
uint32_t edx = 0;
|
||||
|
||||
eax = 0x0000001A;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
|
||||
int32_t type = eax >> 24 & 0xFF;
|
||||
if(type == 0x20) return CORE_TYPE_EFFICIENCY;
|
||||
else if(type == 0x40) return CORE_TYPE_PERFORMANCE;
|
||||
else {
|
||||
printErr("Found invalid core type: 0x%.8X\n", type);
|
||||
return CORE_TYPE_UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
struct cpuInfo* get_cpu_info() {
|
||||
struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
|
||||
cpu->peak_performance = -1;
|
||||
cpu->next_cpu = NULL;
|
||||
cpu->topo = NULL;
|
||||
cpu->cach = NULL;
|
||||
cpu->feat = NULL;
|
||||
|
||||
uint32_t modules = 1;
|
||||
uint32_t eax = 0;
|
||||
uint32_t ebx = 0;
|
||||
uint32_t ecx = 0;
|
||||
uint32_t edx = 0;
|
||||
|
||||
//Get max cpuid level
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
cpu->maxLevels = eax;
|
||||
|
||||
//Fill vendor
|
||||
char name[13];
|
||||
memset(name,0,13);
|
||||
get_name_cpuid(name, ebx, edx, ecx);
|
||||
|
||||
if(strcmp(CPU_VENDOR_INTEL_STRING,name) == 0)
|
||||
cpu->cpu_vendor = CPU_VENDOR_INTEL;
|
||||
else if (strcmp(CPU_VENDOR_AMD_STRING,name) == 0)
|
||||
cpu->cpu_vendor = CPU_VENDOR_AMD;
|
||||
else {
|
||||
cpu->cpu_vendor = CPU_VENDOR_INVALID;
|
||||
printErr("Unknown CPU vendor: %s", name);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//Get max extended level
|
||||
eax = 0x80000000;
|
||||
ebx = 0;
|
||||
ecx = 0;
|
||||
edx = 0;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
cpu->maxExtendedLevels = eax;
|
||||
|
||||
if (cpu->maxExtendedLevels >= 0x80000004){
|
||||
cpu->cpu_name = get_str_cpu_name_internal();
|
||||
}
|
||||
@@ -389,19 +477,66 @@ struct cpuInfo* get_cpu_info() {
|
||||
cpu->topology_extensions = (ecx >> 22) & 1;
|
||||
}
|
||||
|
||||
// If any field of the struct is NULL,
|
||||
// return inmideately, as further functions
|
||||
// require valid fields (cach, topo, etc)
|
||||
cpu->arch = get_cpu_uarch(cpu);
|
||||
cpu->freq = get_frequency_info(cpu);
|
||||
cpu->hybrid_flag = false;
|
||||
if(cpu->cpu_vendor == CPU_VENDOR_INTEL && cpu->maxLevels >= 0x00000007) {
|
||||
eax = 0x00000007;
|
||||
ecx = 0x00000000;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
cpu->hybrid_flag = (edx >> 15) & 0x1;
|
||||
}
|
||||
|
||||
cpu->cach = get_cache_info(cpu);
|
||||
if(cpu->cach == NULL) return cpu;
|
||||
if(cpu->hybrid_flag) modules = 2;
|
||||
|
||||
cpu->topo = get_topology_info(cpu, cpu->cach);
|
||||
if(cpu->topo == NULL) return cpu;
|
||||
struct cpuInfo* ptr = cpu;
|
||||
for(uint32_t i=0; i < modules; i++) {
|
||||
int32_t first_core;
|
||||
set_cpu_module(i, modules, &first_core);
|
||||
|
||||
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp());
|
||||
if(i > 0) {
|
||||
ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
|
||||
ptr = ptr->next_cpu;
|
||||
ptr->next_cpu = NULL;
|
||||
ptr->peak_performance = -1;
|
||||
ptr->topo = NULL;
|
||||
ptr->cach = NULL;
|
||||
ptr->feat = NULL;
|
||||
// We assume that this cores have the
|
||||
// same cpuid capabilities
|
||||
ptr->cpu_vendor = cpu->cpu_vendor;
|
||||
ptr->maxLevels = cpu->maxLevels;
|
||||
ptr->maxExtendedLevels = cpu->maxExtendedLevels;
|
||||
ptr->hybrid_flag = cpu->hybrid_flag;
|
||||
}
|
||||
|
||||
if(cpu->hybrid_flag) {
|
||||
// Detect core type
|
||||
eax = 0x0000001A;
|
||||
cpuid(&eax, &ebx, &ecx, &edx);
|
||||
ptr->core_type = get_core_type();
|
||||
}
|
||||
ptr->first_core_id = first_core;
|
||||
ptr->feat = get_features_info(ptr);
|
||||
|
||||
// If any field of the struct is NULL,
|
||||
// return inmideately, as further functions
|
||||
// require valid fields (cach, topo, etc)
|
||||
ptr->arch = get_cpu_uarch(ptr);
|
||||
ptr->freq = get_frequency_info(ptr);
|
||||
|
||||
ptr->cach = get_cache_info(ptr);
|
||||
if(ptr->cach == NULL) return cpu;
|
||||
|
||||
if(cpu->hybrid_flag) {
|
||||
ptr->topo = get_topology_info(ptr, ptr->cach, i);
|
||||
}
|
||||
else {
|
||||
ptr->topo = get_topology_info(ptr, ptr->cach, -1);
|
||||
}
|
||||
if(cpu->topo == NULL) return cpu;
|
||||
}
|
||||
|
||||
cpu->num_cpus = modules;
|
||||
cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
|
||||
|
||||
return cpu;
|
||||
}
|
||||
@@ -492,7 +627,7 @@ void get_topology_from_udev(struct topology* topo) {
|
||||
|
||||
// Main reference: https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html
|
||||
// Very interesting resource: https://wiki.osdev.org/Detecting_CPU_Topology_(80x86)
|
||||
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
|
||||
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module) {
|
||||
struct topology* topo = emalloc(sizeof(struct topology));
|
||||
init_topology_struct(topo, cach);
|
||||
|
||||
@@ -516,6 +651,13 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
|
||||
}
|
||||
#endif
|
||||
|
||||
if(cpu->hybrid_flag) {
|
||||
topo->total_cores_module = get_total_cores_module(topo->total_cores, module);
|
||||
}
|
||||
else {
|
||||
topo->total_cores_module = topo->total_cores;
|
||||
}
|
||||
|
||||
switch(cpu->cpu_vendor) {
|
||||
case CPU_VENDOR_INTEL:
|
||||
if (cpu->maxLevels >= 0x00000004) {
|
||||
@@ -919,6 +1061,9 @@ void print_debug(struct cpuInfo* cpu) {
|
||||
if(cpu->cpu_vendor == CPU_VENDOR_AMD) {
|
||||
printf("- AMD topology extensions: %d\n", cpu->topology_extensions);
|
||||
}
|
||||
if(cpu->cpu_vendor == CPU_VENDOR_INTEL) {
|
||||
printf("- Hybrid Flag: %d\n", cpu->hybrid_flag);
|
||||
}
|
||||
printf("- CPUID dump: 0x%.8X\n", eax);
|
||||
|
||||
free_cpuinfo_struct(cpu);
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
struct cpuInfo* get_cpu_info();
|
||||
struct cache* get_cache_info(struct cpuInfo* cpu);
|
||||
struct frequency* get_frequency_info(struct cpuInfo* cpu);
|
||||
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach);
|
||||
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int module);
|
||||
|
||||
char* get_str_avx(struct cpuInfo* cpu);
|
||||
char* get_str_sse(struct cpuInfo* cpu);
|
||||
|
||||
@@ -79,7 +79,7 @@ enum {
|
||||
UARCH_GOLDMONT_PLUS,
|
||||
UARCH_TREMONT,
|
||||
UARCH_LAKEMONT,
|
||||
UARCH_COFFE_LAKE,
|
||||
UARCH_COFFEE_LAKE,
|
||||
UARCH_ITANIUM,
|
||||
UARCH_KNIGHTS_FERRY,
|
||||
UARCH_KNIGHTS_CORNER,
|
||||
@@ -109,7 +109,8 @@ enum {
|
||||
UARCH_ZEN,
|
||||
UARCH_ZEN_PLUS,
|
||||
UARCH_ZEN2,
|
||||
UARCH_ZEN3
|
||||
UARCH_ZEN3,
|
||||
UARCH_ZEN3_PLUS
|
||||
};
|
||||
|
||||
struct uarch {
|
||||
@@ -225,7 +226,7 @@ struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, u
|
||||
CHECK_UARCH(arch, 0, 6, 8, 12, NA, "Tiger Lake", UARCH_TIGER_LAKE, 10) // instlatx64
|
||||
CHECK_UARCH(arch, 0, 6, 8, 13, NA, "Tiger Lake", UARCH_TIGER_LAKE, 10) // instlatx64
|
||||
// CHECK_UARCH(arch, 0, 6, 8, 14, 9, ...) It is not possible to determine uarch only from CPUID dump (can be Kaby Lake or Amber Lake)
|
||||
CHECK_UARCH(arch, 0, 6, 8, 14, 10, "Kaby Lake", UARCH_KABY_LAKE, 14) // wikichip
|
||||
CHECK_UARCH(arch, 0, 6, 8, 14, 10, "Coffee Lake", UARCH_COFFEE_LAKE, 14) // wikichip
|
||||
CHECK_UARCH(arch, 0, 6, 8, 14, 11, "Whiskey Lake", UARCH_WHISKEY_LAKE, 14) // wikichip
|
||||
CHECK_UARCH(arch, 0, 6, 8, 14, 12, "Comet Lake", UARCH_COMET_LAKE, 14) // wikichip
|
||||
CHECK_UARCH(arch, 0, 6, 9, 6, NA, "Tremont", UARCH_TREMONT, 10) // LX*
|
||||
@@ -234,10 +235,10 @@ struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, u
|
||||
CHECK_UARCH(arch, 0, 6, 9, 12, NA, "Tremont", UARCH_TREMONT, 10) // LX*
|
||||
CHECK_UARCH(arch, 0, 6, 9, 13, NA, "Sunny Cove", UARCH_SUNNY_COVE, 10) // LX*
|
||||
CHECK_UARCH(arch, 0, 6, 9, 14, 9, "Kaby Lake", UARCH_KABY_LAKE, 14)
|
||||
CHECK_UARCH(arch, 0, 6, 9, 14, 10, "Coffee Lake", UARCH_COFFE_LAKE, 14)
|
||||
CHECK_UARCH(arch, 0, 6, 9, 14, 11, "Coffee Lake", UARCH_COFFE_LAKE, 14)
|
||||
CHECK_UARCH(arch, 0, 6, 9, 14, 12, "Coffee Lake", UARCH_COFFE_LAKE, 14)
|
||||
CHECK_UARCH(arch, 0, 6, 9, 14, 13, "Coffee Lake", UARCH_COFFE_LAKE, 14)
|
||||
CHECK_UARCH(arch, 0, 6, 9, 14, 10, "Coffee Lake", UARCH_COFFEE_LAKE, 14)
|
||||
CHECK_UARCH(arch, 0, 6, 9, 14, 11, "Coffee Lake", UARCH_COFFEE_LAKE, 14)
|
||||
CHECK_UARCH(arch, 0, 6, 9, 14, 12, "Coffee Lake", UARCH_COFFEE_LAKE, 14)
|
||||
CHECK_UARCH(arch, 0, 6, 9, 14, 13, "Coffee Lake", UARCH_COFFEE_LAKE, 14)
|
||||
CHECK_UARCH(arch, 0, 6, 10, 5, NA, "Comet Lake", UARCH_COMET_LAKE, 14) // wikichip
|
||||
CHECK_UARCH(arch, 0, 6, 10, 6, NA, "Comet Lake", UARCH_COMET_LAKE, 14) // instlatx64.atw.hu (i7-10710U)
|
||||
CHECK_UARCH(arch, 0, 6, 10, 7, NA, "Rocket Lake", UARCH_ROCKET_LAKE, 14) // instlatx64.atw.hu (i7-11700K)
|
||||
@@ -358,8 +359,10 @@ struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uin
|
||||
CHECK_UARCH(arch, 8, 15, 6, 0, NA, "Zen 2", UARCH_ZEN2, 7) // undocumented, geekbench.com example
|
||||
CHECK_UARCH(arch, 8, 15, 6, 8, NA, "Zen 2", UARCH_ZEN2, 7) // found on instlatx64
|
||||
CHECK_UARCH(arch, 8, 15, 7, 1, NA, "Zen 2", UARCH_ZEN2, 7) // samples from Steven Noonan and instlatx64
|
||||
CHECK_UARCH(arch, 8, 15, 9, 0, 2, "Zen 2", UARCH_ZEN2, 7) // Steam Deck (instlatx64)
|
||||
CHECK_UARCH(arch, 10, 15, 0, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64
|
||||
CHECK_UARCH(arch, 10, 15, 2, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64
|
||||
CHECK_UARCH(arch, 10, 15, 4, 4, NA, "Zen 3+", UARCH_ZEN3_PLUS, 6) // instlatx64 (they say it is Zen3...)
|
||||
CHECK_UARCH(arch, 10, 15, 5, 0, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64
|
||||
UARCH_END
|
||||
|
||||
@@ -408,7 +411,7 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
|
||||
case UARCH_ROCKET_LAKE:
|
||||
case UARCH_AMBER_LAKE:
|
||||
case UARCH_WHISKEY_LAKE:
|
||||
case UARCH_COFFE_LAKE:
|
||||
case UARCH_COFFEE_LAKE:
|
||||
case UARCH_PALM_COVE:
|
||||
|
||||
case UARCH_KNIGHTS_LANDING:
|
||||
@@ -416,10 +419,12 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
|
||||
|
||||
case UARCH_ICE_LAKE:
|
||||
case UARCH_TIGER_LAKE:
|
||||
case UARCH_ALDER_LAKE:
|
||||
|
||||
// AMD
|
||||
case UARCH_ZEN2:
|
||||
case UARCH_ZEN3:
|
||||
case UARCH_ZEN3_PLUS:
|
||||
return 2;
|
||||
default:
|
||||
return 1;
|
||||
|
||||
Reference in New Issue
Block a user