[v0.94][ARM] Add CPU feature detection, such as NEON, AES, SHA... Take into account NEON capabilities for PP computation

This commit is contained in:
Dr-Noob
2020-12-01 11:36:38 +01:00
parent 35c2aa7e6f
commit d04d535807
5 changed files with 109 additions and 9 deletions

View File

@@ -3,6 +3,9 @@
#include <string.h> #include <string.h>
#include <assert.h> #include <assert.h>
#include <stdbool.h> #include <stdbool.h>
#include <errno.h>
#include <sys/auxv.h>
#include <asm/hwcap.h>
#include "../common/global.h" #include "../common/global.h"
#include "udev.h" #include "udev.h"
@@ -115,9 +118,54 @@ uint32_t fill_ids_from_midr(uint32_t* midr_array, int32_t* freq_array, uint32_t*
return latest_id+1; return latest_id+1;
} }
void init_cpu_info(struct cpuInfo* cpu) {
cpu->NEON = false;
cpu->AES = false;
cpu->SHA1 = false;
cpu->SHA2 = false;
cpu->CRC32 = false;
cpu->next_cpu = NULL;
}
// We assume all cpus share the same hardware
// capabilities but I'm not sure it is always
// true...
void fill_cpu_features(struct cpuInfo* cpu) {
errno = 0;
long hwcaps = getauxval(AT_HWCAP);
if(errno == ENOENT) {
printWarn("Unable to retrieve AT_HWCAP using getauxval");
}
#ifdef __aarch64__
else {
cpu->AES = hwcaps & HWCAP_AES;
cpu->CRC32 = hwcaps & HWCAP_CRC32;
cpu->SHA1 = hwcaps & HWCAP_SHA1;
cpu->SHA2 = hwcaps & HWCAP_SHA2;
cpu->NEON = hwcaps & HWCAP_ASIMD;
}
#else
else {
cpu->NEON = hwcaps & HWCAP_NEON;
}
hwcaps = getauxval(AT_HWCAP2);
if(errno == ENOENT) {
printWarn("Unable to retrieve AT_HWCAP2 using getauxval");
}
else {
cpu->AES = hwcaps & HWCAP2_AES;
cpu->CRC32 = hwcaps & HWCAP2_CRC32;
cpu->SHA1 = hwcaps & HWCAP2_SHA1;
cpu->SHA2 = hwcaps & HWCAP2_SHA2;
}
#endif
}
struct cpuInfo* get_cpu_info() { struct cpuInfo* get_cpu_info() {
struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo)); struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo));
cpu->next_cpu = NULL; init_cpu_info(cpu);
int ncores = get_ncores_from_cpuinfo(); int ncores = get_ncores_from_cpuinfo();
bool success = false; bool success = false;
@@ -140,6 +188,7 @@ struct cpuInfo* get_cpu_info() {
} }
} }
uint32_t sockets = fill_ids_from_midr(midr_array, freq_array, ids_array, ncores); uint32_t sockets = fill_ids_from_midr(midr_array, freq_array, ids_array, ncores);
fill_cpu_features(cpu);
struct cpuInfo* ptr = cpu; struct cpuInfo* ptr = cpu;
int midr_idx = 0; int midr_idx = 0;
@@ -148,12 +197,12 @@ struct cpuInfo* get_cpu_info() {
if(i > 0) { if(i > 0) {
ptr->next_cpu = malloc(sizeof(struct cpuInfo)); ptr->next_cpu = malloc(sizeof(struct cpuInfo));
ptr = ptr->next_cpu; ptr = ptr->next_cpu;
init_cpu_info(ptr);
tmp_midr_idx = midr_idx; tmp_midr_idx = midr_idx;
while(cores_are_equal(midr_idx, tmp_midr_idx, midr_array, freq_array)) tmp_midr_idx++; while(cores_are_equal(midr_idx, tmp_midr_idx, midr_array, freq_array)) tmp_midr_idx++;
midr_idx = tmp_midr_idx; midr_idx = tmp_midr_idx;
} }
ptr->next_cpu = NULL;
ptr->midr = midr_array[midr_idx]; ptr->midr = midr_array[midr_idx];
ptr->arch = get_uarch_from_midr(ptr->midr, ptr); ptr->arch = get_uarch_from_midr(ptr->midr, ptr);
@@ -200,6 +249,7 @@ char* get_str_peak_performance(struct cpuInfo* cpu) {
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000); flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
} }
if(cpu->NEON) flops = flops * 4;
if(flops >= (double)1000000000000.0) if(flops >= (double)1000000000000.0)
snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000); snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000);
@@ -211,6 +261,39 @@ char* get_str_peak_performance(struct cpuInfo* cpu) {
return string; return string;
} }
char* get_str_features(struct cpuInfo* cpu) {
char* string = malloc(sizeof(char) * 100); //TODO: Fix
uint32_t len = 0;
if(cpu->NEON) {
strcat(string, "NEON,");
len += 5;
}
if(cpu->SHA1) {
strcat(string, "SHA1,");
len += 5;
}
if(cpu->SHA2) {
strcat(string, "SHA2,");
len += 5;
}
if(cpu->AES) {
strcat(string, "AES,");
len += 4;
}
if(cpu->CRC32) {
strcat(string, "CRC32,");
len += 6;
}
if(len > 0) {
string[len-1] = '\0';
return string;
}
else
return NULL;
}
void print_debug(struct cpuInfo* cpu) { void print_debug(struct cpuInfo* cpu) {
int ncores = get_ncores_from_cpuinfo(); int ncores = get_ncores_from_cpuinfo();
bool success = false; bool success = false;

View File

@@ -8,6 +8,7 @@ struct cpuInfo* get_cpu_info();
uint32_t get_nsockets(struct topology* topo); uint32_t get_nsockets(struct topology* topo);
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket); char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket);
char* get_str_peak_performance(struct cpuInfo* cpu); char* get_str_peak_performance(struct cpuInfo* cpu);
char* get_str_features(struct cpuInfo* cpu);
void print_debug(struct cpuInfo* cpu); void print_debug(struct cpuInfo* cpu);
void free_topo_struct(struct topology* topo); void free_topo_struct(struct topology* topo);

View File

@@ -93,9 +93,14 @@ struct cpuInfo {
bool SSE4_2; bool SSE4_2;
bool FMA3; bool FMA3;
bool FMA4; bool FMA4;
bool SHA;
#elif ARCH_ARM
bool NEON;
bool SHA1;
bool SHA2;
bool CRC32;
#endif #endif
bool AES; bool AES;
bool SHA;
VENDOR cpu_vendor; VENDOR cpu_vendor;
struct uarch* arch; struct uarch* arch;

View File

@@ -13,7 +13,7 @@
#include "../arm/midr.h" #include "../arm/midr.h"
#endif #endif
static const char* VERSION = "0.93"; static const char* VERSION = "0.94";
void print_help(char *argv[]) { void print_help(char *argv[]) {
printf("Usage: %s [--version] [--help] [--debug] [--style \"fancy\"|\"retro\"|\"legacy\"] [--color \"intel\"|\"amd\"|'R,G,B:R,G,B:R,G,B:R,G,B']\n\n", argv[0]); printf("Usage: %s [--version] [--help] [--debug] [--style \"fancy\"|\"retro\"|\"legacy\"] [--color \"intel\"|\"amd\"|'R,G,B:R,G,B:R,G,B:R,G,B']\n\n", argv[0]);

View File

@@ -61,6 +61,8 @@ enum {
#ifdef ARCH_X86 #ifdef ARCH_X86
ATTRIBUTE_AVX, ATTRIBUTE_AVX,
ATTRIBUTE_FMA, ATTRIBUTE_FMA,
#elif ARCH_ARM
ATTRIBUTE_FEATURES,
#endif #endif
ATTRIBUTE_L1i, ATTRIBUTE_L1i,
ATTRIBUTE_L1d, ATTRIBUTE_L1d,
@@ -86,6 +88,8 @@ static const char* ATTRIBUTE_FIELDS [] = {
#ifdef ARCH_X86 #ifdef ARCH_X86
"AVX:", "AVX:",
"FMA:", "FMA:",
#elif ARCH_ARM
"Features: ",
#endif #endif
"L1i Size:", "L1i Size:",
"L1d Size:", "L1d Size:",
@@ -611,6 +615,7 @@ bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
char* manufacturing_process = get_str_process(cpu->soc); char* manufacturing_process = get_str_process(cpu->soc);
char* soc_name = get_soc_name(cpu->soc); char* soc_name = get_soc_name(cpu->soc);
char* features = get_str_features(cpu);
setAttribute(art,ATTRIBUTE_SOC,soc_name); setAttribute(art,ATTRIBUTE_SOC,soc_name);
setAttribute(art,ATTRIBUTE_TECHNOLOGY,manufacturing_process); setAttribute(art,ATTRIBUTE_TECHNOLOGY,manufacturing_process);
@@ -626,6 +631,9 @@ bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
setAttribute(art,ATTRIBUTE_UARCH,uarch); setAttribute(art,ATTRIBUTE_UARCH,uarch);
setAttribute(art,ATTRIBUTE_FREQUENCY,max_frequency); setAttribute(art,ATTRIBUTE_FREQUENCY,max_frequency);
setAttribute(art,ATTRIBUTE_NCORES,n_cores); setAttribute(art,ATTRIBUTE_NCORES,n_cores);
if(features != NULL) {
setAttribute(art, ATTRIBUTE_FEATURES, features);
}
setAttribute(art,ATTRIBUTE_L1i,l1i); setAttribute(art,ATTRIBUTE_L1i,l1i);
setAttribute(art,ATTRIBUTE_L1d,l1d); setAttribute(art,ATTRIBUTE_L1d,l1d);
setAttribute(art,ATTRIBUTE_L2,l2); setAttribute(art,ATTRIBUTE_L2,l2);
@@ -650,6 +658,9 @@ bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
setAttribute(art, ATTRIBUTE_UARCH, uarch); setAttribute(art, ATTRIBUTE_UARCH, uarch);
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency); setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
setAttribute(art, ATTRIBUTE_NCORES, n_cores); setAttribute(art, ATTRIBUTE_NCORES, n_cores);
if(features != NULL) {
setAttribute(art, ATTRIBUTE_FEATURES, features);
}
setAttribute(art, ATTRIBUTE_L1i, l1i); setAttribute(art, ATTRIBUTE_L1i, l1i);
setAttribute(art, ATTRIBUTE_L1d, l1d); setAttribute(art, ATTRIBUTE_L1d, l1d);
setAttribute(art, ATTRIBUTE_L2, l2); setAttribute(art, ATTRIBUTE_L2, l2);