Files
cpufetch/src/arm/midr.c
2023-12-22 23:23:42 +01:00

541 lines
15 KiB
C

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <stdbool.h>
#include <errno.h>
#ifdef __linux__
#include <sys/auxv.h>
#include <asm/hwcap.h>
#elif defined __APPLE__ || __MACH__
#include "sysctl.h"
#endif
#include "../common/global.h"
#include "../common/soc.h"
#include "udev.h"
#include "midr.h"
#include "uarch.h"
bool cores_are_equal(int c1pos, int c2pos, uint32_t* midr_array, int32_t* freq_array) {
return midr_array[c1pos] == midr_array[c2pos] && freq_array[c1pos] == freq_array[c2pos];
}
struct cache* get_cache_info(struct cpuInfo* cpu) {
struct cache* cach = emalloc(sizeof(struct cache));
init_cache_struct(cach);
cach->max_cache_level = 2;
for(int i=0; i < cach->max_cache_level + 1; i++) {
cach->cach_arr[i]->exists = true;
cach->cach_arr[i]->num_caches = 1;
cach->cach_arr[i]->size = 0;
}
return cach;
}
struct frequency* get_frequency_info(uint32_t core) {
struct frequency* freq = emalloc(sizeof(struct frequency));
freq->base = UNKNOWN_DATA;
freq->max = get_max_freq_from_file(core);
return freq;
}
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, uint32_t* midr_array, int32_t* freq_array, int socket_idx, int ncores) {
struct topology* topo = emalloc(sizeof(struct topology));
init_topology_struct(topo, cach);
int sockets_seen = 0;
int first_core_idx = 0;
int currrent_core_idx = 0;
int cores_in_socket = 0;
while(socket_idx + 1 > sockets_seen) {
if(currrent_core_idx < ncores && cores_are_equal(first_core_idx, currrent_core_idx, midr_array, freq_array)) {
currrent_core_idx++;
cores_in_socket++;
}
else {
topo->total_cores = cores_in_socket;
cores_in_socket = 0;
first_core_idx = currrent_core_idx;
sockets_seen++;
}
}
return topo;
}
int64_t get_peak_performance(struct cpuInfo* cpu) {
struct cpuInfo* ptr = cpu;
//First check we have consistent data
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
if(get_freq(ptr->freq) == UNKNOWN_DATA) {
return -1;
}
}
int64_t total_flops = 0;
ptr = cpu;
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
int vpus = get_number_of_vpus(ptr);
int vpus_width = get_vpus_width(ptr);
bool has_fma = has_fma_support(ptr);
int64_t flops = ptr->topo->total_cores * get_freq(ptr->freq) * 1000000 * vpus * (vpus_width/32);
if(has_fma) flops = flops * 2;
total_flops += flops;
}
return total_flops;
}
uint32_t fill_ids_from_midr(uint32_t* midr_array, int32_t* freq_array, uint32_t* ids_array, int len) {
uint32_t latest_id = 0;
bool found;
ids_array[0] = latest_id;
for (int i = 1; i < len; i++) {
int j = 0;
found = false;
for (j = 0; j < len && !found; j++) {
if (i != j && cores_are_equal(i, j, midr_array, freq_array)) {
if(j > i) {
latest_id++;
ids_array[i] = latest_id;
}
else {
ids_array[i] = ids_array[j];
}
found = true;
}
}
if(!found) {
latest_id++;
ids_array[i] = latest_id;
}
}
return latest_id+1;
}
void init_cpu_info(struct cpuInfo* cpu) {
cpu->next_cpu = NULL;
}
// We assume all cpus share the same hardware
// capabilities but I'm not sure it is always
// true...
// ARM32 https://elixir.bootlin.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h
// ARM64 https://elixir.bootlin.com/linux/latest/source/arch/arm64/include/uapi/asm/hwcap.h
struct features* get_features_info(void) {
struct features* feat = emalloc(sizeof(struct features));
bool *ptr = &(feat->AES);
for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
*ptr = false;
}
#ifdef __linux__
errno = 0;
long hwcaps = getauxval(AT_HWCAP);
if(errno == ENOENT) {
printWarn("Unable to retrieve AT_HWCAP using getauxval");
}
#ifdef __aarch64__
else {
feat->AES = hwcaps & HWCAP_AES;
feat->CRC32 = hwcaps & HWCAP_CRC32;
feat->SHA1 = hwcaps & HWCAP_SHA1;
feat->SHA2 = hwcaps & HWCAP_SHA2;
feat->NEON = hwcaps & HWCAP_ASIMD;
}
#else
else {
feat->NEON = hwcaps & HWCAP_NEON;
}
hwcaps = getauxval(AT_HWCAP2);
if(errno == ENOENT) {
printWarn("Unable to retrieve AT_HWCAP2 using getauxval");
}
else {
feat->AES = hwcaps & HWCAP2_AES;
feat->CRC32 = hwcaps & HWCAP2_CRC32;
feat->SHA1 = hwcaps & HWCAP2_SHA1;
feat->SHA2 = hwcaps & HWCAP2_SHA2;
}
#endif // ifdef __aarch64__
#elif defined __APPLE__ || __MACH__
// Must be M1
feat->AES = true;
feat->CRC32 = true;
feat->SHA1 = true;
feat->SHA2 = true;
feat->NEON = true;
#endif // ifdef __linux__
return feat;
}
#ifdef __linux__
struct cpuInfo* get_cpu_info_linux(struct cpuInfo* cpu) {
init_cpu_info(cpu);
int ncores = get_ncores_from_cpuinfo();
bool success = false;
int32_t* freq_array = emalloc(sizeof(uint32_t) * ncores);
uint32_t* midr_array = emalloc(sizeof(uint32_t) * ncores);
uint32_t* ids_array = emalloc(sizeof(uint32_t) * ncores);
for(int i=0; i < ncores; i++) {
midr_array[i] = get_midr_from_cpuinfo(i, &success);
if(!success) {
printWarn("Unable to fetch MIDR for core %d. This is probably because the core is offline", i);
midr_array[i] = midr_array[0];
}
freq_array[i] = get_max_freq_from_file(i);
if(freq_array[i] == UNKNOWN_DATA) {
printWarn("Unable to fetch max frequency for core %d. This is probably because the core is offline", i);
freq_array[i] = freq_array[0];
}
}
uint32_t sockets = fill_ids_from_midr(midr_array, freq_array, ids_array, ncores);
struct cpuInfo* ptr = cpu;
int midr_idx = 0;
int tmp_midr_idx = 0;
for(uint32_t i=0; i < sockets; i++) {
if(i > 0) {
ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
ptr = ptr->next_cpu;
init_cpu_info(ptr);
tmp_midr_idx = midr_idx;
while(cores_are_equal(midr_idx, tmp_midr_idx, midr_array, freq_array)) tmp_midr_idx++;
midr_idx = tmp_midr_idx;
}
ptr->midr = midr_array[midr_idx];
ptr->arch = get_uarch_from_midr(ptr->midr, ptr);
ptr->feat = get_features_info();
ptr->freq = get_frequency_info(midr_idx);
ptr->cach = get_cache_info(ptr);
ptr->topo = get_topology_info(ptr, ptr->cach, midr_array, freq_array, i, ncores);
}
cpu->num_cpus = sockets;
cpu->hv = emalloc(sizeof(struct hypervisor));
cpu->hv->present = false;
cpu->soc = get_soc();
cpu->peak_performance = get_peak_performance(cpu);
return cpu;
}
#elif defined __APPLE__ || __MACH__
void fill_cpu_info_firestorm_icestorm(struct cpuInfo* cpu, uint32_t pcores, uint32_t ecores) {
// 1. Fill ICESTORM
struct cpuInfo* ice = cpu;
ice->midr = MIDR_APPLE_M1_ICESTORM;
ice->arch = get_uarch_from_midr(ice->midr, ice);
ice->cach = get_cache_info(ice);
ice->feat = get_features_info();
ice->topo = malloc(sizeof(struct topology));
ice->topo->cach = ice->cach;
ice->topo->total_cores = ecores;
ice->freq = malloc(sizeof(struct frequency));
ice->freq->base = UNKNOWN_DATA;
ice->freq->max = 2064;
ice->hv = malloc(sizeof(struct hypervisor));
ice->hv->present = false;
ice->next_cpu = malloc(sizeof(struct cpuInfo));
// 2. Fill FIRESTORM
struct cpuInfo* fire = ice->next_cpu;
fire->midr = MIDR_APPLE_M1_FIRESTORM;
fire->arch = get_uarch_from_midr(fire->midr, fire);
fire->cach = get_cache_info(fire);
fire->feat = get_features_info();
fire->topo = malloc(sizeof(struct topology));
fire->topo->cach = fire->cach;
fire->topo->total_cores = pcores;
fire->freq = malloc(sizeof(struct frequency));
fire->freq->base = UNKNOWN_DATA;
fire->freq->max = 3200;
fire->hv = malloc(sizeof(struct hypervisor));
fire->hv->present = false;
fire->next_cpu = NULL;
}
void fill_cpu_info_avalanche_blizzard(struct cpuInfo* cpu, uint32_t pcores, uint32_t ecores) {
// 1. Fill BLIZZARD
struct cpuInfo* bli = cpu;
bli->midr = MIDR_APPLE_M2_BLIZZARD;
bli->arch = get_uarch_from_midr(bli->midr, bli);
bli->cach = get_cache_info(bli);
bli->feat = get_features_info();
bli->topo = malloc(sizeof(struct topology));
bli->topo->cach = bli->cach;
bli->topo->total_cores = pcores;
bli->freq = malloc(sizeof(struct frequency));
bli->freq->base = UNKNOWN_DATA;
bli->freq->max = 2800;
bli->hv = malloc(sizeof(struct hypervisor));
bli->hv->present = false;
bli->next_cpu = malloc(sizeof(struct cpuInfo));
// 2. Fill AVALANCHE
struct cpuInfo* ava = bli->next_cpu;
ava->midr = MIDR_APPLE_M2_AVALANCHE;
ava->arch = get_uarch_from_midr(ava->midr, ava);
ava->cach = get_cache_info(ava);
ava->feat = get_features_info();
ava->topo = malloc(sizeof(struct topology));
ava->topo->cach = ava->cach;
ava->topo->total_cores = ecores;
ava->freq = malloc(sizeof(struct frequency));
ava->freq->base = UNKNOWN_DATA;
ava->freq->max = 3500;
ava->hv = malloc(sizeof(struct hypervisor));
ava->hv->present = false;
ava->next_cpu = NULL;
}
void fill_cpu_info_everest_sawtooth(struct cpuInfo* cpu, uint32_t pcores, uint32_t ecores) {
// 1. Fill SAWTOOTH
struct cpuInfo* saw = cpu;
saw->midr = MIDR_APPLE_M3_SAWTOOTH;
saw->arch = get_uarch_from_midr(saw->midr, saw);
saw->cach = get_cache_info(saw);
saw->feat = get_features_info();
saw->topo = malloc(sizeof(struct topology));
saw->topo->cach = saw->cach;
saw->topo->total_cores = pcores;
saw->freq = malloc(sizeof(struct frequency));
saw->freq->base = UNKNOWN_DATA;
saw->freq->max = 2750;
saw->hv = malloc(sizeof(struct hypervisor));
saw->hv->present = false;
saw->next_cpu = malloc(sizeof(struct cpuInfo));
// 2. Fill EVEREST
struct cpuInfo* eve = saw->next_cpu;
eve->midr = MIDR_APPLE_M3_EVEREST;
eve->arch = get_uarch_from_midr(eve->midr, eve);
eve->cach = get_cache_info(eve);
eve->feat = get_features_info();
eve->topo = malloc(sizeof(struct topology));
eve->topo->cach = eve->cach;
eve->topo->total_cores = ecores;
eve->freq = malloc(sizeof(struct frequency));
eve->freq->base = UNKNOWN_DATA;
eve->freq->max = 4050;
eve->hv = malloc(sizeof(struct hypervisor));
eve->hv->present = false;
eve->next_cpu = NULL;
}
struct cpuInfo* get_cpu_info_mach(struct cpuInfo* cpu) {
uint32_t cpu_family = get_sys_info_by_name("hw.cpufamily");
// Manually fill the cpuInfo assuming that
// the CPU is an Apple M1/M2
if(cpu_family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) {
cpu->num_cpus = 2;
// Now detect the M1 version
uint32_t cpu_subfamily = get_sys_info_by_name("hw.cpusubfamily");
if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) {
// Apple M1
fill_cpu_info_firestorm_icestorm(cpu, 4, 4);
}
else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS || cpu_subfamily == CPUSUBFAMILY_ARM_HC_HD) {
// Apple M1 Pro/Max/Ultra. Detect number of cores
uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu");
if(physicalcpu == 20) {
// M1 Ultra
fill_cpu_info_firestorm_icestorm(cpu, 16, 4);
}
else if(physicalcpu == 8 || physicalcpu == 10) {
// M1 Pro/Max
fill_cpu_info_firestorm_icestorm(cpu, physicalcpu-2, 2);
}
else {
printBug("Found invalid physical cpu number: %d", physicalcpu);
return NULL;
}
}
else {
printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily);
return NULL;
}
cpu->soc = get_soc();
cpu->peak_performance = get_peak_performance(cpu);
}
else if(cpu_family == CPUFAMILY_ARM_AVALANCHE_BLIZZARD) {
cpu->num_cpus = 2;
// Now detect the M2 version
uint32_t cpu_subfamily = get_sys_info_by_name("hw.cpusubfamily");
if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) {
// Apple M2
fill_cpu_info_avalanche_blizzard(cpu, 4, 4);
}
else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS) {
// Apple M2 Pro/Max/Ultra. Detect number of cores
uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu");
if(physicalcpu == 24) {
// M2 Ultra
fill_cpu_info_avalanche_blizzard(cpu, 16, 8);
}
else if(physicalcpu == 10 || physicalcpu == 12) {
// M2 Pro/Max
fill_cpu_info_avalanche_blizzard(cpu, physicalcpu-4, 4);
}
else {
printBug("Found invalid physical cpu number: %d", physicalcpu);
return NULL;
}
}
else {
printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily);
return NULL;
}
cpu->soc = get_soc();
cpu->peak_performance = get_peak_performance(cpu);
}
else if(cpu_family == CPUFAMILY_ARM_EVEREST_SAWTOOTH) {
cpu->num_cpus = 2;
// Now detect the M3 version
uint32_t cpu_subfamily = get_sys_info_by_name("hw.cpusubfamily");
if(cpu_subfamily == CPUSUBFAMILY_ARM_HG) {
// Apple M3
fill_cpu_info_everest_sawtooth(cpu, 4, 4);
}
else if(cpu_subfamily == CPUSUBFAMILY_ARM_HS) {
// Apple M3 Pro/Max/Ultra. Detect number of cores
uint32_t physicalcpu = get_sys_info_by_name("hw.physicalcpu");
if(physicalcpu == 11 || physicalcpu == 12) {
// M3 Pro
fill_cpu_info_everest_sawtooth(cpu, physicalcpu-6, 6);
}
else if(physicalcpu >= 14 && physicalcpu <= 16) {
// M3 Max
fill_cpu_info_everest_sawtooth(cpu, physicalcpu-4, 4);
}
else {
printBug("Found invalid physical cpu number: %d", physicalcpu);
return NULL;
}
}
else {
printBug("Found invalid cpu_subfamily: 0x%.8X", cpu_subfamily);
return NULL;
}
cpu->soc = get_soc();
cpu->peak_performance = get_peak_performance(cpu);
}
else {
printBug("Found invalid cpu_family: 0x%.8X", cpu_family);
return NULL;
}
return cpu;
}
#endif
struct cpuInfo* get_cpu_info(void) {
struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo));
init_cpu_info(cpu);
#ifdef __linux__
return get_cpu_info_linux(cpu);
#elif defined __APPLE__ || __MACH__
return get_cpu_info_mach(cpu);
#endif
}
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket) {
uint32_t size = 3+7+1;
char* string = emalloc(sizeof(char)*size);
snprintf(string, size, "%d cores", topo->total_cores);
return string;
}
char* get_str_features(struct cpuInfo* cpu) {
struct features* feat = cpu->feat;
uint32_t max_len = strlen("NEON,SHA1,SHA2,AES,CRC32,") + 1;
uint32_t len = 0;
char* string = ecalloc(max_len, sizeof(char));
if(feat->NEON) {
strcat(string, "NEON,");
len += 5;
}
if(feat->SHA1) {
strcat(string, "SHA1,");
len += 5;
}
if(feat->SHA2) {
strcat(string, "SHA2,");
len += 5;
}
if(feat->AES) {
strcat(string, "AES,");
len += 4;
}
if(feat->CRC32) {
strcat(string, "CRC32,");
len += 6;
}
if(len > 0) {
string[len-1] = '\0';
return string;
}
else
return NULL;
}
void print_debug(struct cpuInfo* cpu) {
int ncores = get_ncores_from_cpuinfo();
bool success = false;
for(int i=0; i < ncores; i++) {
printf("[Core %d] ", i);
long freq = get_max_freq_from_file(i);
uint32_t midr = get_midr_from_cpuinfo(i, &success);
if(!success) {
printWarn("Unable to fetch MIDR for core %d. This is probably because the core is offline", i);
printf("0x%.8X ", get_midr_from_cpuinfo(0, &success));
}
else {
printf("0x%.8X ", midr);
}
if(freq == UNKNOWN_DATA) {
printWarn("Unable to fetch max frequency for core %d. This is probably because the core is offline", i);
printf("%ld MHz\n", get_max_freq_from_file(0));
}
else {
printf("%ld MHz\n", freq);
}
}
}
void free_topo_struct(struct topology* topo) {
free(topo);
}