From 90624b9aaa778db324f79095ae2e10ae50b29011 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Sat, 10 Aug 2024 11:06:28 +0100 Subject: [PATCH] [v1.05][ARM] Preeliminary support for SVE detection (#259) --- Makefile | 11 ++++++++++- src/arm/midr.c | 28 +++++++++++++++++++++++++++- src/arm/sve.c | 15 +++++++++++++++ src/arm/sve.h | 6 ++++++ src/arm/uarch.c | 5 ++++- src/common/cpu.h | 3 +++ 6 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 src/arm/sve.c create mode 100644 src/arm/sve.h diff --git a/Makefile b/Makefile index 4606f97..a4e5d13 100644 --- a/Makefile +++ b/Makefile @@ -38,10 +38,16 @@ ifneq ($(OS),Windows_NT) CFLAGS += -DARCH_PPC -std=gnu99 -fstack-protector-all -Wno-language-extension-token else ifeq ($(arch), $(filter $(arch), arm aarch64_be aarch64 arm64 armv8b armv8l armv7l armv6l)) SRC_DIR=src/arm/ - SOURCE += $(COMMON_SRC) $(SRC_DIR)midr.c $(SRC_DIR)uarch.c $(SRC_COMMON)soc.c $(SRC_DIR)soc.c $(SRC_COMMON)pci.c $(SRC_DIR)udev.c + SOURCE += $(COMMON_SRC) $(SRC_DIR)midr.c $(SRC_DIR)uarch.c $(SRC_COMMON)soc.c $(SRC_DIR)soc.c $(SRC_COMMON)pci.c $(SRC_DIR)udev.c sve.o HEADERS += $(COMMON_HDR) $(SRC_DIR)midr.h $(SRC_DIR)uarch.h $(SRC_COMMON)soc.h $(SRC_DIR)soc.h $(SRC_COMMON)pci.h $(SRC_DIR)udev.c $(SRC_DIR)socs.h CFLAGS += -DARCH_ARM -Wno-unused-parameter -std=c99 -fstack-protector-all + # Check if the compiler supports -march=armv8-a+sve. We will use it (if supported) to compile SVE detection code later + is_sve_flag_supported := $(shell $(CC) -march=armv8-a+sve -c $(SRC_DIR)sve.c -o sve_test.o 2> /dev/null && echo 'yes'; rm -f sve_test.o) + ifeq ($(is_sve_flag_supported), yes) + SVE_FLAGS += -march=armv8-a+sve + endif + ifeq ($(os), Darwin) SOURCE += $(SRC_COMMON)sysctl.c HEADERS += $(SRC_COMMON)sysctl.h @@ -91,6 +97,9 @@ freq_avx.o: Makefile $(SRC_DIR)freq/freq_avx.c $(SRC_DIR)freq/freq_avx.h $(SRC_D freq_avx512.o: Makefile $(SRC_DIR)freq/freq_avx512.c $(SRC_DIR)freq/freq_avx512.h $(SRC_DIR)freq/freq.h $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx512f -pthread $(SRC_DIR)freq/freq_avx512.c -o $@ +sve.o: Makefile $(SRC_DIR)sve.c $(SRC_DIR)sve.h + $(CC) $(CFLAGS) $(SANITY_FLAGS) $(SVE_FLAGS) -c $(SRC_DIR)sve.c -o $@ + $(OUTPUT): Makefile $(SOURCE) $(HEADERS) ifeq ($(GIT_VERSION),"") $(CC) $(CFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT) diff --git a/src/arm/midr.c b/src/arm/midr.c index a4ad6b3..a04f664 100644 --- a/src/arm/midr.c +++ b/src/arm/midr.c @@ -19,6 +19,7 @@ #include "udev.h" #include "midr.h" #include "uarch.h" +#include "sve.h" bool cores_are_equal(int c1pos, int c2pos, uint32_t* midr_array, int32_t* freq_array) { return midr_array[c1pos] == midr_array[c2pos] && freq_array[c1pos] == freq_array[c2pos]; @@ -168,6 +169,15 @@ struct features* get_features_info(void) { feat->SHA1 = hwcaps & HWCAP_SHA1; feat->SHA2 = hwcaps & HWCAP_SHA2; feat->NEON = hwcaps & HWCAP_ASIMD; + feat->SVE = hwcaps & HWCAP_SVE; + + hwcaps = getauxval(AT_HWCAP2); + if (errno == ENOENT) { + printWarn("Unable to retrieve AT_HWCAP2 using getauxval"); + } + else { + feat->SVE2 = hwcaps & HWCAP2_SVE2; + } } #else else { @@ -183,6 +193,8 @@ struct features* get_features_info(void) { feat->CRC32 = hwcaps & HWCAP2_CRC32; feat->SHA1 = hwcaps & HWCAP2_SHA1; feat->SHA2 = hwcaps & HWCAP2_SHA2; + feat->SVE = false; + feat->SVE2 = false; } #endif // ifdef __aarch64__ #elif defined __APPLE__ || __MACH__ @@ -192,8 +204,14 @@ struct features* get_features_info(void) { feat->SHA1 = true; feat->SHA2 = true; feat->NEON = true; + feat->SVE = false; + feat->SVE2 = false; #endif // ifdef __linux__ + if (feat->SVE || feat->SVE2) { + feat->cntb = sve_cntb(); + } + return feat; } @@ -430,7 +448,7 @@ char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_soc char* get_str_features(struct cpuInfo* cpu) { struct features* feat = cpu->feat; - uint32_t max_len = strlen("NEON,SHA1,SHA2,AES,CRC32,") + 1; + uint32_t max_len = strlen("NEON,SHA1,SHA2,AES,CRC32,SVE,SVE2") + 1; uint32_t len = 0; char* string = ecalloc(max_len, sizeof(char)); @@ -438,6 +456,14 @@ char* get_str_features(struct cpuInfo* cpu) { strcat(string, "NEON,"); len += 5; } + if(feat->SVE) { + strcat(string, "SVE,"); + len += 4; + } + if(feat->SVE2) { + strcat(string, "SVE2,"); + len += 5; + } if(feat->SHA1) { strcat(string, "SHA1,"); len += 5; diff --git a/src/arm/sve.c b/src/arm/sve.c new file mode 100644 index 0000000..edb0f81 --- /dev/null +++ b/src/arm/sve.c @@ -0,0 +1,15 @@ +#include +#include "../common/global.h" + +// https://learn.arm.com/learning-paths/servers-and-cloud-computing/sve/sve_basics/#:~:text=Using%20a%20text%20editor%20of%20your%20choice%2C%20copy,svcntb%28%29%29%3B%20%7D%20This%20program%20prints%20the%20vector%20length +uint64_t sve_cntb(void) { + #ifdef __ARM_FEATURE_SVE + uint64_t x0 = 0; + __asm volatile("cntb %0" + : "=r"(x0)); + return x0; + #else + printWarn("sve_cntb: Hardware supports SVE, but it was not enabled by the compiler"); + return 0; + #endif +} diff --git a/src/arm/sve.h b/src/arm/sve.h new file mode 100644 index 0000000..ddb3f70 --- /dev/null +++ b/src/arm/sve.h @@ -0,0 +1,6 @@ +#ifndef __SVE_DETECTION__ +#define __SVE_DETECTION__ + +uint64_t sve_cntb(void); + +#endif diff --git a/src/arm/uarch.c b/src/arm/uarch.c index 9d0c62b..32d6e33 100644 --- a/src/arm/uarch.c +++ b/src/arm/uarch.c @@ -294,7 +294,10 @@ int get_vpus_width(struct cpuInfo* cpu) { case UARCH_NEOVERSE_V1: return 256; default: - if(cpu->feat->NEON) { + if (cpu->feat->SVE && cpu->feat->cntb > 0) { + return cpu->feat->cntb * 8; + } + else if (cpu->feat->NEON) { if(is_ARMv8_or_newer(cpu)) { return 128; } diff --git a/src/common/cpu.h b/src/common/cpu.h index 51886b4..1f31bcd 100644 --- a/src/common/cpu.h +++ b/src/common/cpu.h @@ -124,6 +124,9 @@ struct features { bool SHA1; bool SHA2; bool CRC32; + bool SVE; + bool SVE2; + uint32_t cntb; #endif };