From 26af5ff83eb8284125a552b30a3f6ff575505e45 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Thu, 1 Aug 2024 15:38:48 +0100 Subject: [PATCH] [v1.05][ARM] Properly implement SVE detection --- Makefile | 5 ---- src/arm/midr.c | 40 +++++++++++++++++++++++++- src/arm/midr.h | 1 - src/arm/sve_test.c | 71 ---------------------------------------------- src/arm/uarch.c | 5 +++- src/common/cpu.h | 3 ++ src/common/main.c | 1 - 7 files changed, 46 insertions(+), 80 deletions(-) delete mode 100644 src/arm/sve_test.c diff --git a/Makefile b/Makefile index a4fb675..4606f97 100644 --- a/Makefile +++ b/Makefile @@ -46,8 +46,6 @@ ifneq ($(OS),Windows_NT) SOURCE += $(SRC_COMMON)sysctl.c HEADERS += $(SRC_COMMON)sysctl.h endif - - SOURCE += $(SRC_DIR)sve_test.o else ifeq ($(arch), $(filter $(arch), riscv64 riscv32)) SRC_DIR=src/riscv/ SOURCE += $(COMMON_SRC) $(SRC_DIR)riscv.c $(SRC_DIR)uarch.c $(SRC_COMMON)soc.c $(SRC_DIR)soc.c $(SRC_DIR)udev.c @@ -93,9 +91,6 @@ freq_avx.o: Makefile $(SRC_DIR)freq/freq_avx.c $(SRC_DIR)freq/freq_avx.h $(SRC_D freq_avx512.o: Makefile $(SRC_DIR)freq/freq_avx512.c $(SRC_DIR)freq/freq_avx512.h $(SRC_DIR)freq/freq.h $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx512f -pthread $(SRC_DIR)freq/freq_avx512.c -o $@ -$(SRC_DIR)sve_test.o: Makefile $(SRC_DIR)sve_test.c - $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -march=armv8-a+sve $(SRC_DIR)sve_test.c -o $@ - $(OUTPUT): Makefile $(SOURCE) $(HEADERS) ifeq ($(GIT_VERSION),"") $(CC) $(CFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT) diff --git a/src/arm/midr.c b/src/arm/midr.c index a4ad6b3..ee677ea 100644 --- a/src/arm/midr.c +++ b/src/arm/midr.c @@ -142,6 +142,20 @@ void init_cpu_info(struct cpuInfo* cpu) { cpu->next_cpu = NULL; } +// https://learn.arm.com/learning-paths/servers-and-cloud-computing/sve/sve_basics/#:~:text=Using%20a%20text%20editor%20of%20your%20choice%2C%20copy,svcntb%28%29%29%3B%20%7D%20This%20program%20prints%20the%20vector%20length +static inline uint32_t sve_cntb(void) { + uint32_t x0 = 0; + +#ifdef __ARM_FEATURE_SVE + __asm volatile("cntb %0" + : "=r"(x0)); + printf("cntb=%d\n", x0); +#endif + + return x0; +} + + // We assume all cpus share the same hardware // capabilities but I'm not sure it is always // true... @@ -168,6 +182,15 @@ struct features* get_features_info(void) { feat->SHA1 = hwcaps & HWCAP_SHA1; feat->SHA2 = hwcaps & HWCAP_SHA2; feat->NEON = hwcaps & HWCAP_ASIMD; + feat->SVE = hwcaps & HWCAP_SVE; + + hwcaps = getauxval(AT_HWCAP2); + if (errno == ENOENT) { + printWarn("Unable to retrieve AT_HWCAP2 using getauxval"); + } + else { + feat->SVE2 = hwcaps & HWCAP2_SVE2; + } } #else else { @@ -183,6 +206,8 @@ struct features* get_features_info(void) { feat->CRC32 = hwcaps & HWCAP2_CRC32; feat->SHA1 = hwcaps & HWCAP2_SHA1; feat->SHA2 = hwcaps & HWCAP2_SHA2; + feat->SVE = false; + feat->SVE2 = false; } #endif // ifdef __aarch64__ #elif defined __APPLE__ || __MACH__ @@ -192,8 +217,13 @@ struct features* get_features_info(void) { feat->SHA1 = true; feat->SHA2 = true; feat->NEON = true; + feat->SVE = false; + feat->SVE2 = false; #endif // ifdef __linux__ + if (feat->SVE || feat->SVE2) + feat->cntb = sve_cntb(); + return feat; } @@ -430,7 +460,7 @@ char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_soc char* get_str_features(struct cpuInfo* cpu) { struct features* feat = cpu->feat; - uint32_t max_len = strlen("NEON,SHA1,SHA2,AES,CRC32,") + 1; + uint32_t max_len = strlen("NEON,SHA1,SHA2,AES,CRC32,SVE,SVE2") + 1; uint32_t len = 0; char* string = ecalloc(max_len, sizeof(char)); @@ -438,6 +468,14 @@ char* get_str_features(struct cpuInfo* cpu) { strcat(string, "NEON,"); len += 5; } + if(feat->SVE) { + strcat(string, "SVE,"); + len += 4; + } + if(feat->SVE2) { + strcat(string, "SVE2,"); + len += 5; + } if(feat->SHA1) { strcat(string, "SHA1,"); len += 5; diff --git a/src/arm/midr.h b/src/arm/midr.h index 0c00ca7..a133ca0 100644 --- a/src/arm/midr.h +++ b/src/arm/midr.h @@ -11,7 +11,6 @@ char* get_str_features(struct cpuInfo* cpu); void print_debug(struct cpuInfo* cpu); void free_topo_struct(struct topology* topo); -void SVE_exp(struct cpuInfo* cpu); // Code taken from cpuinfo (https://github.com/pytorch/cpuinfo/blob/master/src/arm/midr.h) #define CPUINFO_ARM_MIDR_IMPLEMENTER_MASK UINT32_C(0xFF000000) diff --git a/src/arm/sve_test.c b/src/arm/sve_test.c deleted file mode 100644 index cc2493f..0000000 --- a/src/arm/sve_test.c +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#ifdef __linux__ - #include - #include - #include "../common/freq.h" -#elif defined __APPLE__ || __MACH__ - #include "../common/sysctl.h" -#endif - -#include "../common/global.h" -#include "../common/soc.h" -#include "../common/args.h" -#include "udev.h" -#include "midr.h" -#include "uarch.h" - -// https://docs.kernel.org/arch/arm64/elf_hwcaps.html -void check_for_SVE(void) { - errno = 0; - long hwcaps = getauxval(AT_HWCAP); - - if(errno == ENOENT) { - printWarn("Unable to retrieve AT_HWCAP using getauxval"); - } - - if (hwcaps & HWCAP_SVE) printf("SVE: Yes\n"); - else printf("SVE: No\n"); - - hwcaps = getauxval(AT_HWCAP2); - - if (hwcaps & HWCAP2_SVE2) printf("SVE2: Yes\n"); - else printf("SVE2: No\n"); -} - -#include - -#ifndef __ARM_FEATURE_SVE -#warning "Make sure to compile for SVE!" -#endif - -// https://learn.arm.com/learning-paths/servers-and-cloud-computing/sve/sve_basics/#:~:text=Using%20a%20text%20editor%20of%20your%20choice%2C%20copy,svcntb%28%29%29%3B%20%7D%20This%20program%20prints%20the%20vector%20length -void SVE_exp(struct cpuInfo* cpu) { - check_for_SVE(); - - uint32_t core = 4; - if (!bind_to_cpu(core)) { - printErr("Failed binding the process to CPU %d", core); - return; - } - printf("[%d] SVE vector length is: %ld bytes\n", core, svcntb()); - - core = 7; - if (!bind_to_cpu(core)) { - printErr("Failed binding the process to CPU %d", core); - return; - } - printf("[%d] SVE vector length is: %ld bytes\n", core, svcntb()); - - core = 0; - if (!bind_to_cpu(core)) { - printErr("Failed binding the process to CPU %d", core); - return; - } - printf("[%d] SVE vector length is: %ld bytes\n", core, svcntb()); -} diff --git a/src/arm/uarch.c b/src/arm/uarch.c index 9d0c62b..802434a 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -294,7 +294,10 @@ int get_vpus_width(struct cpuInfo* cpu) { case UARCH_NEOVERSE_V1: return 256; default: - if(cpu->feat->NEON) { + if (cpu->feat->SVE) { + return cpu->feat->cntb * 8; + } + else if (cpu->feat->NEON) { if(is_ARMv8_or_newer(cpu)) { return 128; } diff --git a/src/common/cpu.h b/src/common/cpu.h index 51886b4..1f31bcd 100644 --- a/src/common/cpu.h +++ b/src/common/cpu.h @@ -124,6 +124,9 @@ struct features { bool SHA1; bool SHA2; bool CRC32; + bool SVE; + bool SVE2; + uint32_t cntb; #endif }; diff --git a/src/common/main.c b/src/common/main.c index 5e8742d..f26e9aa 100644 --- a/src/common/main.c +++ b/src/common/main.c @@ -132,7 +132,6 @@ int main(int argc, char* argv[]) { } if(print_cpufetch(cpu, get_style(), get_colors(), show_full_cpu_name())) { - SVE_exp(cpu); return EXIT_SUCCESS; } else {