[v1.01] Do not use FMA for frequency measurement. It is sufficient to run any other AVX instructions, and some CPUs support AVX but not FMA.

This commit is contained in:
Dr-Noob
2021-11-20 10:25:36 +01:00
parent 4b50740516
commit fe3bc6163c
3 changed files with 12 additions and 14 deletions

View File

@@ -74,10 +74,10 @@ freq_nov.o: Makefile $(SRC_DIR)freq/freq_nov.c $(SRC_DIR)freq/freq_nov.h
$(CC) $(CFLAGS) $(SANITY_FLAGS) -c -pthread $(SRC_DIR)freq/freq_nov.c -o $@ $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -pthread $(SRC_DIR)freq/freq_nov.c -o $@
freq_avx.o: Makefile $(SRC_DIR)freq/freq_avx.c $(SRC_DIR)freq/freq_avx.h freq_avx.o: Makefile $(SRC_DIR)freq/freq_avx.c $(SRC_DIR)freq/freq_avx.h
$(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx -mfma -pthread $(SRC_DIR)freq/freq_avx.c -o $@ $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx -pthread $(SRC_DIR)freq/freq_avx.c -o $@
freq_avx512.o: Makefile $(SRC_DIR)freq/freq_avx512.c $(SRC_DIR)freq/freq_avx512.h freq_avx512.o: Makefile $(SRC_DIR)freq/freq_avx512.c $(SRC_DIR)freq/freq_avx512.h
$(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx512f -mfma -pthread $(SRC_DIR)freq/freq_avx512.c -o $@ $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx512f -pthread $(SRC_DIR)freq/freq_avx512.c -o $@
$(OUTPUT): Makefile $(SOURCE) $(HEADERS) $(OUTPUT): Makefile $(SOURCE) $(HEADERS)
$(CC) $(CFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT) $(CC) $(CFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT)

View File

@@ -17,12 +17,11 @@ void* compute_avx() {
__m256 a = _mm256_set1_ps(1.5); __m256 a = _mm256_set1_ps(1.5);
__m256 b = _mm256_set1_ps(1.2); __m256 b = _mm256_set1_ps(1.2);
__m256 c = _mm256_set1_ps(0.0);
gettimeofday(&begin, NULL); gettimeofday(&begin, NULL);
while(!end) { while(!end) {
for(uint64_t i=0; i < LOOP_ITERS; i++) { for(uint64_t i=0; i < LOOP_ITERS; i++) {
c = _mm256_fmadd_ps(a, b, c); a = _mm256_add_ps(a, b);
} }
gettimeofday(&now, NULL); gettimeofday(&now, NULL);
@@ -35,7 +34,7 @@ void* compute_avx() {
printf("fopen: %s", strerror(errno)); printf("fopen: %s", strerror(errno));
} }
else { else {
fprintf(fp, "%f", c[0]); fprintf(fp, "%f", a[0]);
fclose(fp); fclose(fp);
} }

View File

@@ -23,7 +23,6 @@ void* compute_avx512() {
__m512 a[8]; __m512 a[8];
__m512 b[8]; __m512 b[8];
__m512 mult;
for(int i=0; i < 8; i++) { for(int i=0; i < 8; i++) {
a[i] = _mm512_set1_ps(1.5); a[i] = _mm512_set1_ps(1.5);
@@ -33,14 +32,14 @@ void* compute_avx512() {
gettimeofday(&begin, NULL); gettimeofday(&begin, NULL);
while(!end) { while(!end) {
for(uint64_t i=0; i < LOOP_ITERS; i++) { for(uint64_t i=0; i < LOOP_ITERS; i++) {
a[0] = _mm512_fmadd_ps(mult, a[0], b[0]); a[0] = _mm512_add_ps(a[0], b[0]);
a[1] = _mm512_fmadd_ps(mult, a[1], b[1]); a[1] = _mm512_add_ps(a[1], b[1]);
a[2] = _mm512_fmadd_ps(mult, a[2], b[2]); a[2] = _mm512_add_ps(a[2], b[2]);
a[3] = _mm512_fmadd_ps(mult, a[3], b[3]); a[3] = _mm512_add_ps(a[3], b[3]);
a[4] = _mm512_fmadd_ps(mult, a[4], b[4]); a[4] = _mm512_add_ps(a[4], b[4]);
a[5] = _mm512_fmadd_ps(mult, a[5], b[5]); a[5] = _mm512_add_ps(a[5], b[5]);
a[6] = _mm512_fmadd_ps(mult, a[6], b[6]); a[6] = _mm512_add_ps(a[6], b[6]);
a[7] = _mm512_fmadd_ps(mult, a[7], b[7]); a[7] = _mm512_add_ps(a[7], b[7]);
} }
gettimeofday(&now, NULL); gettimeofday(&now, NULL);