diff --git a/Makefile b/Makefile index af04584..ece9d22 100644 --- a/Makefile +++ b/Makefile @@ -74,10 +74,10 @@ freq_nov.o: Makefile $(SRC_DIR)freq/freq_nov.c $(SRC_DIR)freq/freq_nov.h $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -pthread $(SRC_DIR)freq/freq_nov.c -o $@ freq_avx.o: Makefile $(SRC_DIR)freq/freq_avx.c $(SRC_DIR)freq/freq_avx.h - $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx -mfma -pthread $(SRC_DIR)freq/freq_avx.c -o $@ + $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx -pthread $(SRC_DIR)freq/freq_avx.c -o $@ freq_avx512.o: Makefile $(SRC_DIR)freq/freq_avx512.c $(SRC_DIR)freq/freq_avx512.h - $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx512f -mfma -pthread $(SRC_DIR)freq/freq_avx512.c -o $@ + $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx512f -pthread $(SRC_DIR)freq/freq_avx512.c -o $@ $(OUTPUT): Makefile $(SOURCE) $(HEADERS) $(CC) $(CFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT) diff --git a/src/x86/freq/freq_avx.c b/src/x86/freq/freq_avx.c index 340b8ed..4935972 100644 --- a/src/x86/freq/freq_avx.c +++ b/src/x86/freq/freq_avx.c @@ -17,12 +17,11 @@ void* compute_avx() { __m256 a = _mm256_set1_ps(1.5); __m256 b = _mm256_set1_ps(1.2); - __m256 c = _mm256_set1_ps(0.0); gettimeofday(&begin, NULL); while(!end) { for(uint64_t i=0; i < LOOP_ITERS; i++) { - c = _mm256_fmadd_ps(a, b, c); + a = _mm256_add_ps(a, b); } gettimeofday(&now, NULL); @@ -35,7 +34,7 @@ void* compute_avx() { printf("fopen: %s", strerror(errno)); } else { - fprintf(fp, "%f", c[0]); + fprintf(fp, "%f", a[0]); fclose(fp); } diff --git a/src/x86/freq/freq_avx512.c b/src/x86/freq/freq_avx512.c index e6bd917..b546574 100644 --- a/src/x86/freq/freq_avx512.c +++ b/src/x86/freq/freq_avx512.c @@ -23,7 +23,6 @@ void* compute_avx512() { __m512 a[8]; __m512 b[8]; - __m512 mult; for(int i=0; i < 8; i++) { a[i] = _mm512_set1_ps(1.5); @@ -33,14 +32,14 @@ void* compute_avx512() { gettimeofday(&begin, NULL); while(!end) { for(uint64_t i=0; i < LOOP_ITERS; i++) { - a[0] = _mm512_fmadd_ps(mult, a[0], b[0]); - a[1] = _mm512_fmadd_ps(mult, a[1], b[1]); - a[2] = _mm512_fmadd_ps(mult, a[2], b[2]); - a[3] = _mm512_fmadd_ps(mult, a[3], b[3]); - a[4] = _mm512_fmadd_ps(mult, a[4], b[4]); - a[5] = _mm512_fmadd_ps(mult, a[5], b[5]); - a[6] = _mm512_fmadd_ps(mult, a[6], b[6]); - a[7] = _mm512_fmadd_ps(mult, a[7], b[7]); + a[0] = _mm512_add_ps(a[0], b[0]); + a[1] = _mm512_add_ps(a[1], b[1]); + a[2] = _mm512_add_ps(a[2], b[2]); + a[3] = _mm512_add_ps(a[3], b[3]); + a[4] = _mm512_add_ps(a[4], b[4]); + a[5] = _mm512_add_ps(a[5], b[5]); + a[6] = _mm512_add_ps(a[6], b[6]); + a[7] = _mm512_add_ps(a[7], b[7]); } gettimeofday(&now, NULL);