Compare commits

..

11 Commits
v1.01 ... feat2

10 changed files with 227 additions and 82 deletions

View File

@@ -12,7 +12,7 @@ COMMON_HDR = $(SRC_COMMON)ascii.h $(SRC_COMMON)cpu.h $(SRC_COMMON)udev.h $(SRC_C
ifneq ($(OS),Windows_NT)
arch := $(shell uname -m)
ifeq ($(arch), $(filter $(arch), x86_64 amd64 i686))
ifeq ($(arch), $(filter $(arch), x86_64 amd64 i386 i486 i586 i686))
SRC_DIR=src/x86/
SOURCE += $(COMMON_SRC) $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)uarch.c
HEADERS += $(COMMON_HDR) $(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)uarch.h $(SRC_DIR)freq/freq.h
@@ -74,10 +74,10 @@ freq_nov.o: Makefile $(SRC_DIR)freq/freq_nov.c $(SRC_DIR)freq/freq_nov.h
$(CC) $(CFLAGS) $(SANITY_FLAGS) -c -pthread $(SRC_DIR)freq/freq_nov.c -o $@
freq_avx.o: Makefile $(SRC_DIR)freq/freq_avx.c $(SRC_DIR)freq/freq_avx.h
$(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx -mfma -pthread $(SRC_DIR)freq/freq_avx.c -o $@
$(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx -pthread $(SRC_DIR)freq/freq_avx.c -o $@
freq_avx512.o: Makefile $(SRC_DIR)freq/freq_avx512.c $(SRC_DIR)freq/freq_avx512.h
$(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx512f -mfma -pthread $(SRC_DIR)freq/freq_avx512.c -o $@
$(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx512f -pthread $(SRC_DIR)freq/freq_avx512.c -o $@
$(OUTPUT): Makefile $(SOURCE) $(HEADERS)
$(CC) $(CFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT)

View File

@@ -17,7 +17,9 @@ static char* soc_trademark_string[] = {
[SOC_VENDOR_EXYNOS] = "Exynos ",
[SOC_VENDOR_KIRIN] = "Kirin ",
[SOC_VENDOR_BROADCOM] = "Broadcom BCM",
[SOC_VENDOR_APPLE] = "Apple "
[SOC_VENDOR_APPLE] = "Apple ",
[SOC_VENDOR_ALLWINNER] = "Allwinner ",
[SOC_VENDOR_GOOGLE] = "Google "
};
static char* soc_rpi_string[] = {
@@ -38,10 +40,10 @@ void fill_soc(struct system_on_chip* soc, char* soc_name, SOC soc_model, int32_t
}
bool match_soc(struct system_on_chip* soc, char* raw_name, char* expected_name, char* soc_name, SOC soc_model, int32_t process) {
if(strlen(raw_name) > strlen(expected_name))
return false;
int len1 = strlen(raw_name);
int len2 = strlen(expected_name);
int len = min(len1, len2);
int len = strlen(raw_name);
if(strncmp(raw_name, expected_name, len) != 0) {
return false;
}
@@ -454,6 +456,49 @@ bool match_qualcomm(char* soc_name, struct system_on_chip* soc) {
SOC_END
}
// https://linux-sunxi.org/Allwinner_SoC_Family
bool match_allwinner(char* soc_name, struct system_on_chip* soc) {
char* tmp;
if((tmp = strstr(soc_name, "sun")) == NULL)
return false;
SOC_START
// A series 32 bits
SOC_EQ(tmp, "sun4i", "A10", SOC_ALLWINNER_A10, soc, 55)
SOC_EQ(tmp, "sun5i", "A13", SOC_ALLWINNER_A13, soc, 55)
SOC_EQ(tmp, "sun5i", "A10s", SOC_ALLWINNER_A10S, soc, 55)
SOC_EQ(tmp, "sun7i", "A20", SOC_ALLWINNER_A20, soc, 40)
SOC_EQ(tmp, "sun8i", "A23", SOC_ALLWINNER_A23, soc, 40)
SOC_EQ(tmp, "sun6i", "A31", SOC_ALLWINNER_A31, soc, 40)
SOC_EQ(tmp, "sun6i", "A31s", SOC_ALLWINNER_A31S, soc, 40)
SOC_EQ(tmp, "sun8i", "A33", SOC_ALLWINNER_A33, soc, 40)
SOC_EQ(tmp, "sun8i", "A40", SOC_ALLWINNER_A40, soc, 40)
SOC_EQ(tmp, "sun8i", "A50", SOC_ALLWINNER_A50, soc, 28)
SOC_EQ(tmp, "sun9i", "A80", SOC_ALLWINNER_A80, soc, 28)
SOC_EQ(tmp, "sun8i", "A83T", SOC_ALLWINNER_A83T, soc, 28)
// H series 32 bits
SOC_EQ(tmp, "sun8i", "H2+", SOC_ALLWINNER_HZP, soc, 40)
SOC_EQ(tmp, "sun8i", "H3", SOC_ALLWINNER_H3, soc, 40)
SOC_EQ(tmp, "sun8i", "H8", SOC_ALLWINNER_H8, soc, 28)
// H series 64 bits
SOC_EQ(tmp, "sun50i", "H5", SOC_ALLWINNER_H5, soc, 40)
SOC_EQ(tmp, "sun50i", "H6", SOC_ALLWINNER_H6, soc, 28)
SOC_EQ(tmp, "sun50i", "H616", SOC_ALLWINNER_H616, soc, 28)
// R series 32 bits
SOC_EQ(tmp, "sun5i", "R8", SOC_ALLWINNER_R8, soc, 55)
SOC_EQ(tmp, "sun8i", "R16", SOC_ALLWINNER_R16, soc, 40)
SOC_EQ(tmp, "sun8i", "R40", SOC_ALLWINNER_R40, soc, 40)
SOC_EQ(tmp, "sun8i", "R58", SOC_ALLWINNER_R58, soc, 28)
// R series 64 bits
SOC_EQ(tmp, "sun50i", "R329", SOC_ALLWINNER_R328, soc, 28)
SOC_END
}
bool match_special(char* soc_name, struct system_on_chip* soc) {
char* tmp;
@@ -469,6 +514,12 @@ bool match_special(char* soc_name, struct system_on_chip* soc) {
return true;
}
// Google Pixel 6 (Proably stands for Google Silicon 101)'
if((tmp = strstr(soc_name, "gs101")) != NULL) {
fill_soc(soc, "Tensor", SOC_GOOGLE_TENSOR, 5);
return true;
}
return false;
}
@@ -490,6 +541,9 @@ struct system_on_chip* parse_soc_from_string(struct system_on_chip* soc) {
if(match_hisilicon(raw_name, soc))
return soc;
if(match_allwinner(raw_name, soc))
return soc;
match_broadcom(raw_name, soc);
return soc;
}

View File

@@ -13,7 +13,9 @@ enum {
SOC_VENDOR_EXYNOS,
SOC_VENDOR_KIRIN,
SOC_VENDOR_BROADCOM,
SOC_VENDOR_APPLE
SOC_VENDOR_APPLE,
SOC_VENDOR_ALLWINNER,
SOC_VENDOR_GOOGLE
};
struct system_on_chip {

View File

@@ -253,7 +253,33 @@ enum {
SOC_SNAPD_SM8250_AB,
SOC_SNAPD_SM8350,
// APPLE
SOC_APPLE_M1
SOC_APPLE_M1,
// ALLWINNER
SOC_ALLWINNER_A10,
SOC_ALLWINNER_A13,
SOC_ALLWINNER_A10S,
SOC_ALLWINNER_A20,
SOC_ALLWINNER_A23,
SOC_ALLWINNER_A31,
SOC_ALLWINNER_A31S,
SOC_ALLWINNER_A33,
SOC_ALLWINNER_A40,
SOC_ALLWINNER_A50,
SOC_ALLWINNER_A80,
SOC_ALLWINNER_A83T,
SOC_ALLWINNER_HZP,
SOC_ALLWINNER_H3,
SOC_ALLWINNER_H8,
SOC_ALLWINNER_H5,
SOC_ALLWINNER_H6,
SOC_ALLWINNER_H616,
SOC_ALLWINNER_R8,
SOC_ALLWINNER_R16,
SOC_ALLWINNER_R40,
SOC_ALLWINNER_R58,
SOC_ALLWINNER_R328,
// GOOGLE
SOC_GOOGLE_TENSOR
};
inline static VENDOR get_soc_vendor_from_soc(SOC soc) {
@@ -263,6 +289,8 @@ inline static VENDOR get_soc_vendor_from_soc(SOC soc) {
else if(soc >= SOC_MTK_MT6893 && soc <= SOC_MTK_MT8783) return SOC_VENDOR_MEDIATEK;
else if(soc >= SOC_SNAPD_QSD8650 && soc <= SOC_SNAPD_SM8350) return SOC_VENDOR_SNAPDRAGON;
else if(soc >= SOC_APPLE_M1 && soc <= SOC_APPLE_M1) return SOC_VENDOR_APPLE;
else if(soc >= SOC_ALLWINNER_A10 && soc <= SOC_ALLWINNER_R328) return SOC_VENDOR_ALLWINNER;
else if(soc == SOC_GOOGLE_TENSOR) return SOC_VENDOR_GOOGLE;
return SOC_VENDOR_UNKNOWN;
}

View File

@@ -33,7 +33,7 @@ struct ascii_logo {
uint32_t width;
uint32_t height;
bool replace_blocks;
char color_ascii[3][100];
char color_ascii[4][100];
char color_text[2][100];
};
@@ -218,6 +218,40 @@ $C1 kMMMMMMMMMMMMMMMMMMMMMMd \
$C1 'KMMMMMMMWXXWMMMMMMMk. \
$C1 \"cooc\"* \"*coo'\" "
#define ASCII_ALLWINNER \
"$C1 \
$C1 ################# \
$C1 .######## ##### #### \
$C1 ###### ####### \
$C1 #####. ## ..## ####. \
$C1 .#### #### ##### #### \
$C1 #### ## ### ###. ##### . \
$C1#### ## ## #### .###### ####* . \
$C1### ## ##.### ## #### .###### \
$C1### #.## ### ##### ##### . \
$C1### ### ### .### ### . \
$C1 #### ### #### #. \
$C1 #### #* \
$C1 ##### ##. \
$C1 ###########. \
$C1 "
#define ASCII_GOOGLE \
"$C1 \
$C1 ############ \
$C1 ################### \
$C1 ##################### \
$C1 ######## \
$C2 ###$C1#### \
$C2 #######$C3 ############## \
$C2 #######$C3 ############## \
$C2 #######$C3 ############## \
$C2 ###$C4####$C3 ####### \
$C4 ######## $C3######## \
$C4 #################$C3####### \
$C4 ###############$C3#### \
$C4 ########### "
// --------------------- LONG LOGOS ------------------------- //
#define ASCII_AMD_L \
"$C1 \
@@ -304,9 +338,9 @@ $C1 ############ ################ ######### ## ######### "
typedef struct ascii_logo asciiL;
// ------------------------------------------------------------------------------------------------------+
// ------------------------------------------------------------------------------------------------------------------+
// | LOGO | W | H | REPLACE | COLORS LOGO (>0 && <10) | COLORS TEXT (=2) |
// ------------------------------------------------------------------------------------------------------+
// ------------------------------------------------------------------------------------------------------------------+
asciiL logo_amd = { ASCII_AMD, 39, 15, false, {C_FG_WHITE, C_FG_GREEN}, {C_FG_WHITE, C_FG_GREEN} };
asciiL logo_intel = { ASCII_INTEL, 48, 14, false, {C_FG_CYAN}, {C_FG_CYAN, C_FG_WHITE} };
asciiL logo_intel_new = { ASCII_INTEL_NEW, 51, 9, false, {C_FG_CYAN}, {C_FG_CYAN, C_FG_WHITE} };
@@ -317,8 +351,11 @@ asciiL logo_kirin = { ASCII_KIRIN, 53, 12, false, {C_FG_RED},
asciiL logo_broadcom = { ASCII_BROADCOM, 44, 19, false, {C_FG_WHITE, C_FG_RED}, {C_FG_WHITE, C_FG_RED} };
asciiL logo_arm = { ASCII_ARM, 42, 5, false, {C_FG_CYAN}, {C_FG_WHITE, C_FG_CYAN} };
asciiL logo_ibm = { ASCII_IBM, 42, 9, false, {C_FG_CYAN, C_FG_WHITE}, {C_FG_CYAN, C_FG_WHITE} };
asciiL logo_apple = { ASCII_APPLE, 32, 17, false, {C_FG_WHITE}, {C_FG_B_BLACK, C_FG_B_WHITE} };
// Long variants | ----------------------------------------------------------------------------------------------------|
asciiL logo_apple = { ASCII_APPLE, 32, 17, false, {C_FG_WHITE}, {C_FG_CYAN, C_FG_B_WHITE} };
asciiL logo_allwinner = { ASCII_ALLWINNER, 47, 16, false, {C_FG_CYAN}, {C_FG_B_BLACK, C_FG_B_CYAN } };
asciiL logo_google = { ASCII_GOOGLE, 34, 14, false, {C_FG_RED, C_FG_YELLOW, C_FG_BLUE, C_FG_GREEN}, {C_FG_BLUE} };
// Long variants | ---------------------------------------------------------------------------------------------------------------|
asciiL logo_amd_l = { ASCII_AMD_L, 62, 19, true, {C_BG_WHITE, C_BG_GREEN}, {C_FG_WHITE, C_FG_GREEN} };
asciiL logo_intel_l = { ASCII_INTEL_L, 62, 19, true, {C_BG_CYAN, C_BG_WHITE}, {C_FG_CYAN, C_FG_WHITE} };
asciiL logo_intel_l_new = { ASCII_INTEL_L_NEW, 57, 14, true, {C_BG_CYAN, C_BG_WHITE, C_BG_BLUE}, {C_FG_CYAN, C_FG_WHITE} };

View File

@@ -353,6 +353,10 @@ void choose_ascii_art(struct ascii* art, struct color** cs, struct terminal* ter
art->art = &logo_broadcom;
else if(art->vendor == SOC_VENDOR_APPLE)
art->art = &logo_apple;
else if(art->vendor == SOC_VENDOR_ALLWINNER)
art->art = &logo_allwinner;
else if(art->vendor == SOC_VENDOR_GOOGLE)
art->art = &logo_google;
else {
art->art = choose_ascii_art_aux(&logo_arm_l, &logo_arm, term, lf);
}
@@ -498,48 +502,62 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
return false;
art->new_intel_logo = choose_new_intel_logo(cpu);
// Step 1. Retrieve attributes (if some structures are NULL, like topo
// or cache, do not try to retrieve them)
uint32_t socket_num = 1;
char* l1i, *l1d, *l2, *l3, *n_cores, *n_cores_dual, *sockets;
l1i = l1d = l2 = l3 = n_cores = n_cores_dual = sockets = NULL;
char* uarch = get_str_uarch(cpu);
char* manufacturing_process = get_str_process(cpu);
char* sockets = get_str_sockets(cpu->topo);
char* max_frequency = get_str_freq(cpu->freq);
char* n_cores = get_str_topology(cpu, cpu->topo, false);
char* n_cores_dual = get_str_topology(cpu, cpu->topo, true);
char* cpu_name = get_str_cpu_name(cpu, fcpuname);
char* avx = get_str_avx(cpu);
char* fma = get_str_fma(cpu);
char* l1i = get_str_l1i(cpu->cach);
char* l1d = get_str_l1d(cpu->cach);
char* l2 = get_str_l2(cpu->cach);
char* l3 = get_str_l3(cpu->cach);
char* pp = get_str_peak_performance(cpu->peak_performance);
setAttribute(art,ATTRIBUTE_NAME,cpu_name);
if(cpu->topo != NULL) {
sockets = get_str_sockets(cpu->topo);
n_cores = get_str_topology(cpu, cpu->topo, false);
n_cores_dual = get_str_topology(cpu, cpu->topo, true);
}
if(cpu->cach != NULL) {
l1i = get_str_l1i(cpu->cach);
l1d = get_str_l1d(cpu->cach);
l2 = get_str_l2(cpu->cach);
l3 = get_str_l3(cpu->cach);
}
// Step 2. Set attributes
setAttribute(art, ATTRIBUTE_NAME, cpu_name);
if(cpu->hv->present) {
setAttribute(art, ATTRIBUTE_HYPERVISOR, cpu->hv->hv_name);
}
setAttribute(art,ATTRIBUTE_UARCH,uarch);
setAttribute(art,ATTRIBUTE_TECHNOLOGY,manufacturing_process);
setAttribute(art,ATTRIBUTE_FREQUENCY,max_frequency);
uint32_t socket_num = get_nsockets(cpu->topo);
setAttribute(art, ATTRIBUTE_UARCH, uarch);
setAttribute(art, ATTRIBUTE_TECHNOLOGY, manufacturing_process);
setAttribute(art, ATTRIBUTE_FREQUENCY, max_frequency);
if(cpu->topo != NULL) {
socket_num = get_nsockets(cpu->topo);
if (socket_num > 1) {
setAttribute(art, ATTRIBUTE_SOCKETS, sockets);
setAttribute(art, ATTRIBUTE_NCORES,n_cores);
setAttribute(art, ATTRIBUTE_NCORES, n_cores);
setAttribute(art, ATTRIBUTE_NCORES_DUAL, n_cores_dual);
}
else {
setAttribute(art,ATTRIBUTE_NCORES,n_cores);
setAttribute(art, ATTRIBUTE_NCORES, n_cores);
}
setAttribute(art,ATTRIBUTE_AVX,avx);
setAttribute(art,ATTRIBUTE_FMA,fma);
setAttribute(art,ATTRIBUTE_L1i,l1i);
setAttribute(art,ATTRIBUTE_L1d,l1d);
setAttribute(art,ATTRIBUTE_L2,l2);
if(l3 != NULL) {
setAttribute(art,ATTRIBUTE_L3,l3);
}
setAttribute(art,ATTRIBUTE_PEAK,pp);
setAttribute(art, ATTRIBUTE_AVX, avx);
setAttribute(art, ATTRIBUTE_FMA, fma);
if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
if(l3 != NULL) setAttribute(art, ATTRIBUTE_L3, l3);
setAttribute(art, ATTRIBUTE_PEAK, pp);
// Step 3. Print output
const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
uint32_t longest_field = longest_field_length(art, longest_attribute);
@@ -571,8 +589,8 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
free(art);
if(cs != NULL) free_colors_struct(cs);
free_cache_struct(cpu->cach);
free_topo_struct(cpu->topo);
if(cpu->cach != NULL) free_cache_struct(cpu->cach);
if(cpu->topo != NULL) free_topo_struct(cpu->topo);
free_freq_struct(cpu->freq);
free_cpuinfo_struct(cpu);
@@ -586,6 +604,7 @@ bool print_cpufetch_ppc(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
if(art == NULL)
return false;
// Step 1. Retrieve attributes
char* uarch = get_str_uarch(cpu);
char* manufacturing_process = get_str_process(cpu);
char* sockets = get_str_sockets(cpu->topo);
@@ -601,6 +620,7 @@ bool print_cpufetch_ppc(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
char* l3 = get_str_l3(cpu->cach);
char* pp = get_str_peak_performance(cpu->peak_performance);
// Step 2. Set attributes
if(cpu_name != NULL) {
setAttribute(art,ATTRIBUTE_NAME,cpu_name);
}
@@ -625,6 +645,7 @@ bool print_cpufetch_ppc(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
}
setAttribute(art,ATTRIBUTE_PEAK,pp);
// Step 3. Print output
const char** attribute_fields = ATTRIBUTE_FIELDS;
uint32_t longest_attribute = longest_attribute_length(art, attribute_fields);
uint32_t longest_field = longest_field_length(art, longest_attribute);

View File

@@ -181,9 +181,6 @@ struct cpuInfo* get_cpu_info() {
feat->altivec = has_altivec(cpu->arch);
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq));
if(cpu->cach == NULL || cpu->topo == NULL) {
return NULL;
}
return cpu;
}

View File

@@ -278,6 +278,9 @@ struct cpuInfo* get_cpu_info() {
struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
struct features* feat = emalloc(sizeof(struct features));
cpu->feat = feat;
cpu->peak_performance = -1;
cpu->topo = NULL;
cpu->cach = NULL;
bool *ptr = &(feat->AES);
for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
@@ -386,15 +389,20 @@ struct cpuInfo* get_cpu_info() {
cpu->topology_extensions = (ecx >> 22) & 1;
}
// If any field of the struct is NULL,
// return inmideately, as further functions
// require valid fields (cach, topo, etc)
cpu->arch = get_cpu_uarch(cpu);
cpu->freq = get_frequency_info(cpu);
cpu->cach = get_cache_info(cpu);
if(cpu->cach == NULL) return cpu;
cpu->topo = get_topology_info(cpu, cpu->cach);
if(cpu->topo == NULL) return cpu;
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp());
if(cpu->cach == NULL || cpu->topo == NULL) {
return NULL;
}
return cpu;
}
@@ -472,7 +480,7 @@ bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
return true;
}
void get_topology_from_udev(struct cpuInfo* cpu, struct topology* topo) {
void get_topology_from_udev(struct topology* topo) {
// TODO: To be improved in the future
topo->total_cores = get_ncores_from_cpuinfo();
topo->logical_cores = topo->total_cores;
@@ -515,7 +523,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
if(!toporet) {
#ifdef __linux__
printWarn("Failed to retrieve topology from APIC, using udev...\n");
get_topology_from_udev(cpu, topo);
get_topology_from_udev(topo);
#else
printErr("Failed to retrieve topology from APIC, assumming default values...\n");
topo->logical_cores = UNKNOWN_DATA;

View File

@@ -17,12 +17,11 @@ void* compute_avx() {
__m256 a = _mm256_set1_ps(1.5);
__m256 b = _mm256_set1_ps(1.2);
__m256 c = _mm256_set1_ps(0.0);
gettimeofday(&begin, NULL);
while(!end) {
for(uint64_t i=0; i < LOOP_ITERS; i++) {
c = _mm256_fmadd_ps(a, b, c);
a = _mm256_add_ps(a, b);
}
gettimeofday(&now, NULL);
@@ -35,7 +34,7 @@ void* compute_avx() {
printf("fopen: %s", strerror(errno));
}
else {
fprintf(fp, "%f", c[0]);
fprintf(fp, "%f", a[0]);
fclose(fp);
}

View File

@@ -23,7 +23,6 @@ void* compute_avx512() {
__m512 a[8];
__m512 b[8];
__m512 mult;
for(int i=0; i < 8; i++) {
a[i] = _mm512_set1_ps(1.5);
@@ -33,14 +32,14 @@ void* compute_avx512() {
gettimeofday(&begin, NULL);
while(!end) {
for(uint64_t i=0; i < LOOP_ITERS; i++) {
a[0] = _mm512_fmadd_ps(mult, a[0], b[0]);
a[1] = _mm512_fmadd_ps(mult, a[1], b[1]);
a[2] = _mm512_fmadd_ps(mult, a[2], b[2]);
a[3] = _mm512_fmadd_ps(mult, a[3], b[3]);
a[4] = _mm512_fmadd_ps(mult, a[4], b[4]);
a[5] = _mm512_fmadd_ps(mult, a[5], b[5]);
a[6] = _mm512_fmadd_ps(mult, a[6], b[6]);
a[7] = _mm512_fmadd_ps(mult, a[7], b[7]);
a[0] = _mm512_add_ps(a[0], b[0]);
a[1] = _mm512_add_ps(a[1], b[1]);
a[2] = _mm512_add_ps(a[2], b[2]);
a[3] = _mm512_add_ps(a[3], b[3]);
a[4] = _mm512_add_ps(a[4], b[4]);
a[5] = _mm512_add_ps(a[5], b[5]);
a[6] = _mm512_add_ps(a[6], b[6]);
a[7] = _mm512_add_ps(a[7], b[7]);
}
gettimeofday(&now, NULL);