mirror of
https://github.com/Dr-Noob/cpufetch.git
synced 2026-03-25 07:50:40 +01:00
[v1.00] Add --accurate-pp option (Linux x86_64) only. Needs more work to make it stable (add support for non-AVX, AVX512, etc)
This commit is contained in:
17
Makefile
17
Makefile
@@ -16,6 +16,12 @@ ifneq ($(OS),Windows_NT)
|
|||||||
SRC_DIR=src/x86/
|
SRC_DIR=src/x86/
|
||||||
SOURCE += $(COMMON_SRC) $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)uarch.c
|
SOURCE += $(COMMON_SRC) $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)uarch.c
|
||||||
HEADERS += $(COMMON_HDR) $(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)uarch.h
|
HEADERS += $(COMMON_HDR) $(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)uarch.h
|
||||||
|
|
||||||
|
os := $(shell uname -s)
|
||||||
|
ifeq ($(os), Linux)
|
||||||
|
SOURCE += freq.o
|
||||||
|
CFLAGS += -pthread
|
||||||
|
endif
|
||||||
CFLAGS += -DARCH_X86 -std=c99 -fstack-protector-all
|
CFLAGS += -DARCH_X86 -std=c99 -fstack-protector-all
|
||||||
else ifeq ($(arch), $(filter $(arch), ppc64le ppc64 ppcle ppc))
|
else ifeq ($(arch), $(filter $(arch), ppc64le ppc64 ppcle ppc))
|
||||||
SRC_DIR=src/ppc/
|
SRC_DIR=src/ppc/
|
||||||
@@ -51,18 +57,21 @@ else
|
|||||||
OUTPUT=cpufetch.exe
|
OUTPUT=cpufetch.exe
|
||||||
endif
|
endif
|
||||||
|
|
||||||
all: CFLAGS += -O3
|
all: CFLAGS += -O2
|
||||||
all: $(OUTPUT)
|
all: $(OUTPUT)
|
||||||
|
|
||||||
debug: CFLAGS += -g -O0
|
debug: CFLAGS += -g -O0
|
||||||
debug: $(OUTPUT)
|
debug: $(OUTPUT)
|
||||||
|
|
||||||
static: CFLAGS += -static -O3
|
static: CFLAGS += -static -O2
|
||||||
static: $(OUTPUT)
|
static: $(OUTPUT)
|
||||||
|
|
||||||
strict: CFLAGS += -O3 -Werror -fsanitize=undefined -D_FORTIFY_SOURCE=2
|
strict: CFLAGS += -O2 -Werror -fsanitize=undefined -D_FORTIFY_SOURCE=2
|
||||||
strict: $(OUTPUT)
|
strict: $(OUTPUT)
|
||||||
|
|
||||||
|
freq.o: Makefile $(SRC_DIR)freq.c $(SRC_DIR)freq.h
|
||||||
|
$(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx -mfma -pthread $(SRC_DIR)freq.c -o freq.o
|
||||||
|
|
||||||
$(OUTPUT): Makefile $(SOURCE) $(HEADERS)
|
$(OUTPUT): Makefile $(SOURCE) $(HEADERS)
|
||||||
$(CC) $(CFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT)
|
$(CC) $(CFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT)
|
||||||
|
|
||||||
@@ -70,7 +79,7 @@ run: $(OUTPUT)
|
|||||||
./$(OUTPUT)
|
./$(OUTPUT)
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
@rm -f $(OUTPUT)
|
@rm -f $(OUTPUT) freq.o
|
||||||
|
|
||||||
install: $(OUTPUT)
|
install: $(OUTPUT)
|
||||||
install -Dm755 "cpufetch" "$(DESTDIR)$(PREFIX)/bin/cpufetch"
|
install -Dm755 "cpufetch" "$(DESTDIR)$(PREFIX)/bin/cpufetch"
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ struct args_struct {
|
|||||||
bool debug_flag;
|
bool debug_flag;
|
||||||
bool help_flag;
|
bool help_flag;
|
||||||
bool raw_flag;
|
bool raw_flag;
|
||||||
|
bool accurate_pp;
|
||||||
bool full_cpu_name_flag;
|
bool full_cpu_name_flag;
|
||||||
bool logo_long;
|
bool logo_long;
|
||||||
bool logo_short;
|
bool logo_short;
|
||||||
@@ -46,6 +47,7 @@ const char args_chr[] = {
|
|||||||
/* [ARG_LOGO_SHORT] = */ 2,
|
/* [ARG_LOGO_SHORT] = */ 2,
|
||||||
/* [ARG_LOGO_INTEL_NEW] = */ 3,
|
/* [ARG_LOGO_INTEL_NEW] = */ 3,
|
||||||
/* [ARG_LOGO_INTEL_OLD] = */ 4,
|
/* [ARG_LOGO_INTEL_OLD] = */ 4,
|
||||||
|
/* [ARG_ACCURATE_PP] = */ 5,
|
||||||
/* [ARG_DEBUG] = */ 'd',
|
/* [ARG_DEBUG] = */ 'd',
|
||||||
/* [ARG_VERBOSE] = */ 'v',
|
/* [ARG_VERBOSE] = */ 'v',
|
||||||
/* [ARG_VERSION] = */ 'V',
|
/* [ARG_VERSION] = */ 'V',
|
||||||
@@ -61,6 +63,7 @@ const char *args_str[] = {
|
|||||||
/* [ARG_LOGO_SHORT] = */ "logo-short",
|
/* [ARG_LOGO_SHORT] = */ "logo-short",
|
||||||
/* [ARG_LOGO_INTEL_NEW] = */ "logo-intel-new",
|
/* [ARG_LOGO_INTEL_NEW] = */ "logo-intel-new",
|
||||||
/* [ARG_LOGO_INTEL_OLD] = */ "logo-intel-old",
|
/* [ARG_LOGO_INTEL_OLD] = */ "logo-intel-old",
|
||||||
|
/* [ARG_ACCURATE_PP] = */ "accurate-pp",
|
||||||
/* [ARG_DEBUG] = */ "debug",
|
/* [ARG_DEBUG] = */ "debug",
|
||||||
/* [ARG_VERBOSE] = */ "verbose",
|
/* [ARG_VERBOSE] = */ "verbose",
|
||||||
/* [ARG_VERSION] = */ "version",
|
/* [ARG_VERSION] = */ "version",
|
||||||
@@ -92,6 +95,10 @@ bool show_raw() {
|
|||||||
return args.raw_flag;
|
return args.raw_flag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool accurate_pp() {
|
||||||
|
return args.accurate_pp;
|
||||||
|
}
|
||||||
|
|
||||||
bool show_full_cpu_name() {
|
bool show_full_cpu_name() {
|
||||||
return args.full_cpu_name_flag;
|
return args.full_cpu_name_flag;
|
||||||
}
|
}
|
||||||
@@ -211,12 +218,13 @@ char* build_short_options() {
|
|||||||
memset(str, 0, sizeof(char) * (len*2 + 1));
|
memset(str, 0, sizeof(char) * (len*2 + 1));
|
||||||
|
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
sprintf(str, "%c:%c:%c%c%c%c%c%c%c%c%c%c",
|
sprintf(str, "%c:%c:%c%c%c%c%c%c%c%c%c%c%c",
|
||||||
c[ARG_STYLE], c[ARG_COLOR], c[ARG_HELP],
|
c[ARG_STYLE], c[ARG_COLOR], c[ARG_HELP],
|
||||||
c[ARG_RAW], c[ARG_FULLCPUNAME],
|
c[ARG_RAW], c[ARG_FULLCPUNAME],
|
||||||
c[ARG_LOGO_SHORT], c[ARG_LOGO_LONG],
|
c[ARG_LOGO_SHORT], c[ARG_LOGO_LONG],
|
||||||
c[ARG_LOGO_INTEL_NEW], c[ARG_LOGO_INTEL_OLD],
|
c[ARG_LOGO_INTEL_NEW], c[ARG_LOGO_INTEL_OLD],
|
||||||
c[ARG_DEBUG], c[ARG_VERBOSE], c[ARG_VERSION]);
|
c[ARG_ACCURATE_PP], c[ARG_DEBUG], c[ARG_VERBOSE],
|
||||||
|
c[ARG_VERSION]);
|
||||||
#else
|
#else
|
||||||
sprintf(str, "%c:%c:%c%c%c%c%c%c",
|
sprintf(str, "%c:%c:%c%c%c%c%c%c",
|
||||||
c[ARG_STYLE], c[ARG_COLOR], c[ARG_HELP],
|
c[ARG_STYLE], c[ARG_COLOR], c[ARG_HELP],
|
||||||
@@ -235,6 +243,7 @@ bool parse_args(int argc, char* argv[]) {
|
|||||||
|
|
||||||
bool color_flag = false;
|
bool color_flag = false;
|
||||||
args.debug_flag = false;
|
args.debug_flag = false;
|
||||||
|
args.accurate_pp = false;
|
||||||
args.full_cpu_name_flag = false;
|
args.full_cpu_name_flag = false;
|
||||||
args.raw_flag = false;
|
args.raw_flag = false;
|
||||||
args.verbose_flag = false;
|
args.verbose_flag = false;
|
||||||
@@ -256,6 +265,7 @@ bool parse_args(int argc, char* argv[]) {
|
|||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
{args_str[ARG_LOGO_INTEL_NEW], no_argument, 0, args_chr[ARG_LOGO_INTEL_NEW] },
|
{args_str[ARG_LOGO_INTEL_NEW], no_argument, 0, args_chr[ARG_LOGO_INTEL_NEW] },
|
||||||
{args_str[ARG_LOGO_INTEL_OLD], no_argument, 0, args_chr[ARG_LOGO_INTEL_OLD] },
|
{args_str[ARG_LOGO_INTEL_OLD], no_argument, 0, args_chr[ARG_LOGO_INTEL_OLD] },
|
||||||
|
{args_str[ARG_ACCURATE_PP], no_argument, 0, args_chr[ARG_ACCURATE_PP] },
|
||||||
{args_str[ARG_FULLCPUNAME], no_argument, 0, args_chr[ARG_FULLCPUNAME] },
|
{args_str[ARG_FULLCPUNAME], no_argument, 0, args_chr[ARG_FULLCPUNAME] },
|
||||||
{args_str[ARG_RAW], no_argument, 0, args_chr[ARG_RAW] },
|
{args_str[ARG_RAW], no_argument, 0, args_chr[ARG_RAW] },
|
||||||
#endif
|
#endif
|
||||||
@@ -296,6 +306,9 @@ bool parse_args(int argc, char* argv[]) {
|
|||||||
else if(opt == args_chr[ARG_HELP]) {
|
else if(opt == args_chr[ARG_HELP]) {
|
||||||
args.help_flag = true;
|
args.help_flag = true;
|
||||||
}
|
}
|
||||||
|
else if(opt == args_chr[ARG_ACCURATE_PP]) {
|
||||||
|
args.accurate_pp = true;
|
||||||
|
}
|
||||||
else if(opt == args_chr[ARG_FULLCPUNAME]) {
|
else if(opt == args_chr[ARG_FULLCPUNAME]) {
|
||||||
args.full_cpu_name_flag = true;
|
args.full_cpu_name_flag = true;
|
||||||
}
|
}
|
||||||
@@ -349,6 +362,13 @@ bool parse_args(int argc, char* argv[]) {
|
|||||||
args.logo_long = false;
|
args.logo_long = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(ARCH_X86) && ! defined(__linux__)
|
||||||
|
if(args.accurate_pp) {
|
||||||
|
printWarn("%s option is valid only in Linux x86_64", args_str[ARG_ACCURATE_PP]);
|
||||||
|
args.help_flag = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Leave log level untouched after returning
|
// Leave log level untouched after returning
|
||||||
set_log_level(false);
|
set_log_level(false);
|
||||||
|
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ enum {
|
|||||||
ARG_LOGO_SHORT,
|
ARG_LOGO_SHORT,
|
||||||
ARG_LOGO_INTEL_NEW,
|
ARG_LOGO_INTEL_NEW,
|
||||||
ARG_LOGO_INTEL_OLD,
|
ARG_LOGO_INTEL_OLD,
|
||||||
|
ARG_ACCURATE_PP,
|
||||||
ARG_DEBUG,
|
ARG_DEBUG,
|
||||||
ARG_VERBOSE,
|
ARG_VERBOSE,
|
||||||
ARG_VERSION
|
ARG_VERSION
|
||||||
@@ -41,6 +42,7 @@ extern const char *args_str[];
|
|||||||
int max_arg_str_length();
|
int max_arg_str_length();
|
||||||
bool parse_args(int argc, char* argv[]);
|
bool parse_args(int argc, char* argv[]);
|
||||||
bool show_help();
|
bool show_help();
|
||||||
|
bool accurate_pp();
|
||||||
bool show_full_cpu_name();
|
bool show_full_cpu_name();
|
||||||
bool show_logo_long();
|
bool show_logo_long();
|
||||||
bool show_logo_short();
|
bool show_logo_short();
|
||||||
|
|||||||
@@ -57,6 +57,9 @@ void print_help(char *argv[]) {
|
|||||||
printf(" --%s %*s Show the long version of the logo\n", t[ARG_LOGO_LONG], (int) (max_len-strlen(t[ARG_LOGO_LONG])), "");
|
printf(" --%s %*s Show the long version of the logo\n", t[ARG_LOGO_LONG], (int) (max_len-strlen(t[ARG_LOGO_LONG])), "");
|
||||||
printf(" -%c, --%s %*s Print extra information (if available) about how cpufetch tried fetching information\n", c[ARG_VERBOSE], t[ARG_VERBOSE], (int) (max_len-strlen(t[ARG_VERBOSE])), "");
|
printf(" -%c, --%s %*s Print extra information (if available) about how cpufetch tried fetching information\n", c[ARG_VERBOSE], t[ARG_VERBOSE], (int) (max_len-strlen(t[ARG_VERBOSE])), "");
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
|
#ifdef __linux__
|
||||||
|
printf(" --%s %*s Compute the peak performance accurately (measure the CPU frequency instead of using the maximum)\n", t[ARG_ACCURATE_PP], (int) (max_len-strlen(t[ARG_ACCURATE_PP])), "");
|
||||||
|
#endif
|
||||||
printf(" --%s %*s Show the old Intel logo\n", t[ARG_LOGO_INTEL_OLD], (int) (max_len-strlen(t[ARG_LOGO_INTEL_OLD])), "");
|
printf(" --%s %*s Show the old Intel logo\n", t[ARG_LOGO_INTEL_OLD], (int) (max_len-strlen(t[ARG_LOGO_INTEL_OLD])), "");
|
||||||
printf(" --%s %*s Show the new Intel logo\n", t[ARG_LOGO_INTEL_NEW], (int) (max_len-strlen(t[ARG_LOGO_INTEL_NEW])), "");
|
printf(" --%s %*s Show the new Intel logo\n", t[ARG_LOGO_INTEL_NEW], (int) (max_len-strlen(t[ARG_LOGO_INTEL_NEW])), "");
|
||||||
printf(" -%c, --%s %*s Show the full CPU name (do not abbreviate it)\n", c[ARG_FULLCPUNAME], t[ARG_FULLCPUNAME], (int) (max_len-strlen(t[ARG_FULLCPUNAME])), "");
|
printf(" -%c, --%s %*s Show the full CPU name (do not abbreviate it)\n", c[ARG_FULLCPUNAME], t[ARG_FULLCPUNAME], (int) (max_len-strlen(t[ARG_FULLCPUNAME])), "");
|
||||||
@@ -133,6 +136,7 @@ int main(int argc, char* argv[]) {
|
|||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: This should be moved to the end of args.c
|
||||||
if(show_raw()) {
|
if(show_raw()) {
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
print_version();
|
print_version();
|
||||||
|
|||||||
@@ -15,8 +15,10 @@
|
|||||||
#include "cpuid.h"
|
#include "cpuid.h"
|
||||||
#include "cpuid_asm.h"
|
#include "cpuid_asm.h"
|
||||||
#include "../common/global.h"
|
#include "../common/global.h"
|
||||||
|
#include "../common/args.h"
|
||||||
#include "apic.h"
|
#include "apic.h"
|
||||||
#include "uarch.h"
|
#include "uarch.h"
|
||||||
|
#include "freq.h"
|
||||||
|
|
||||||
#define CPU_VENDOR_INTEL_STRING "GenuineIntel"
|
#define CPU_VENDOR_INTEL_STRING "GenuineIntel"
|
||||||
#define CPU_VENDOR_AMD_STRING "AuthenticAMD"
|
#define CPU_VENDOR_AMD_STRING "AuthenticAMD"
|
||||||
@@ -177,7 +179,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
|
|||||||
return get_uarch_from_cpuid(cpu, efamily, family, emodel, model, (int)stepping);
|
return get_uarch_from_cpuid(cpu, efamily, family, emodel, model, (int)stepping);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
|
int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t max_freq, bool accurate_pp) {
|
||||||
/*
|
/*
|
||||||
* PP = PeakPerformance
|
* PP = PeakPerformance
|
||||||
* SP = SinglePrecision
|
* SP = SinglePrecision
|
||||||
@@ -190,6 +192,16 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
|
|||||||
* 16(If AVX512), 8(If AVX), 4(If SSE) *
|
* 16(If AVX512), 8(If AVX), 4(If SSE) *
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
int64_t freq;
|
||||||
|
#ifdef __linux__
|
||||||
|
if(accurate_pp) freq = measure_avx_frequency(cpu);
|
||||||
|
else freq = max_freq;
|
||||||
|
#else
|
||||||
|
// Silence compiler warning
|
||||||
|
(void)(accurate_pp);
|
||||||
|
freq = max_freq;
|
||||||
|
#endif
|
||||||
|
|
||||||
//First, check we have consistent data
|
//First, check we have consistent data
|
||||||
if(freq == UNKNOWN_FREQ) {
|
if(freq == UNKNOWN_FREQ) {
|
||||||
return -1;
|
return -1;
|
||||||
@@ -376,7 +388,7 @@ struct cpuInfo* get_cpu_info() {
|
|||||||
cpu->freq = get_frequency_info(cpu);
|
cpu->freq = get_frequency_info(cpu);
|
||||||
cpu->cach = get_cache_info(cpu);
|
cpu->cach = get_cache_info(cpu);
|
||||||
cpu->topo = get_topology_info(cpu, cpu->cach);
|
cpu->topo = get_topology_info(cpu, cpu->cach);
|
||||||
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq));
|
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp());
|
||||||
|
|
||||||
if(cpu->cach == NULL || cpu->topo == NULL) {
|
if(cpu->cach == NULL || cpu->topo == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|||||||
158
src/x86/freq.c
Normal file
158
src/x86/freq.c
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include "freq.h"
|
||||||
|
#include "../common/global.h"
|
||||||
|
|
||||||
|
#include <immintrin.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
|
#define MEASURE_TIME_SECONDS 5
|
||||||
|
#define MAX_NUMBER_THREADS 512
|
||||||
|
#define FREQ_VECTOR_SIZE 1<<16
|
||||||
|
#define LOOP_ITERS 100000000
|
||||||
|
|
||||||
|
struct freq_thread {
|
||||||
|
bool end;
|
||||||
|
bool measure;
|
||||||
|
double freq;
|
||||||
|
};
|
||||||
|
|
||||||
|
double vector_average_harmonic(double* v, int len) {
|
||||||
|
double acc = 0.0;
|
||||||
|
for(int i=0; i < len; i++) {
|
||||||
|
acc += 1 / v[i];
|
||||||
|
}
|
||||||
|
return len / acc;
|
||||||
|
}
|
||||||
|
|
||||||
|
void sleep_ms(int64_t ms) {
|
||||||
|
struct timespec ts;
|
||||||
|
ts.tv_sec = ms / 1000;
|
||||||
|
ts.tv_nsec = (ms % 1000) * 1000000;
|
||||||
|
nanosleep(&ts, &ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
void* measure_freq(void *freq_ptr) {
|
||||||
|
struct freq_thread* freq = (struct freq_thread*) freq_ptr;
|
||||||
|
|
||||||
|
char* end = NULL;
|
||||||
|
char* line = NULL;
|
||||||
|
size_t len = 0;
|
||||||
|
ssize_t read;
|
||||||
|
|
||||||
|
int v = 0;
|
||||||
|
double* freq_vector = malloc(sizeof(double) * FREQ_VECTOR_SIZE);
|
||||||
|
|
||||||
|
while(!freq->end) {
|
||||||
|
if(!freq->measure) continue;
|
||||||
|
|
||||||
|
FILE* fp = fopen("/proc/cpuinfo", "r");
|
||||||
|
if(fp == NULL) return NULL;
|
||||||
|
while ((read = getline(&line, &len, fp)) != -1) {
|
||||||
|
if((line = strstr(line, "cpu MHz")) != NULL) {
|
||||||
|
line = strstr(line, "\t: ");
|
||||||
|
if(line == NULL) return NULL;
|
||||||
|
line += sizeof("\t: ") - 1;
|
||||||
|
double f = strtold(line, &end);
|
||||||
|
if(errno != 0) {
|
||||||
|
printf("strtol: %s", strerror(errno));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
freq_vector[v] = f;
|
||||||
|
v++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(fp);
|
||||||
|
sleep_ms(500);
|
||||||
|
}
|
||||||
|
|
||||||
|
freq->freq = vector_average_harmonic(freq_vector, v);
|
||||||
|
printWarn("AVX2 measured freq=%f\n", freq->freq);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* compute_avx() {
|
||||||
|
bool end = false;
|
||||||
|
|
||||||
|
struct timeval begin, now;
|
||||||
|
|
||||||
|
__m256 a = _mm256_set1_ps(1.5);
|
||||||
|
__m256 b = _mm256_set1_ps(1.2);
|
||||||
|
__m256 c = _mm256_set1_ps(0.0);
|
||||||
|
|
||||||
|
gettimeofday(&begin, NULL);
|
||||||
|
while(!end) {
|
||||||
|
for(uint64_t i=0; i < LOOP_ITERS; i++) {
|
||||||
|
c = _mm256_fmadd_ps(a, b, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday(&now, NULL);
|
||||||
|
double elapsed = (now.tv_sec - begin.tv_sec) + ((now.tv_usec - begin.tv_usec)/1000000.0);
|
||||||
|
end = elapsed >= (double) MEASURE_TIME_SECONDS;
|
||||||
|
}
|
||||||
|
|
||||||
|
FILE* fp = fopen("/dev/null", "w");
|
||||||
|
if(fp == NULL) {
|
||||||
|
printf("fopen: %s", strerror(errno));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fprintf(fp, "%f", c[0]);
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t measure_avx_frequency(struct cpuInfo* cpu) {
|
||||||
|
int ret;
|
||||||
|
struct freq_thread* freq_struct = malloc(sizeof(struct freq_thread));
|
||||||
|
freq_struct->end = false;
|
||||||
|
freq_struct->measure = false;
|
||||||
|
|
||||||
|
pthread_t freq_t;
|
||||||
|
if(pthread_create(&freq_t, NULL, measure_freq, freq_struct)) {
|
||||||
|
fprintf(stderr, "Error creating thread\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_t* compute_th = malloc(sizeof(pthread_t) * cpu->topo->total_cores);
|
||||||
|
for(int i=0; i < cpu->topo->total_cores; i++) {
|
||||||
|
ret = pthread_create(&compute_th[i], NULL, compute_avx, NULL);
|
||||||
|
|
||||||
|
if(ret != 0) {
|
||||||
|
fprintf(stderr, "Error creating thread\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("cpufetch is measuring AVX2 frequency...");
|
||||||
|
fflush(stdout);
|
||||||
|
|
||||||
|
sleep_ms(500);
|
||||||
|
freq_struct->measure = true;
|
||||||
|
|
||||||
|
for(int i=0; i < cpu->topo->total_cores; i++) {
|
||||||
|
if(pthread_join(compute_th[i], NULL)) {
|
||||||
|
fprintf(stderr, "Error joining thread\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
freq_struct->end = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(pthread_join(freq_t, NULL)) {
|
||||||
|
fprintf(stderr, "Error joining thread\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("\r ");
|
||||||
|
return freq_struct->freq;
|
||||||
|
}
|
||||||
9
src/x86/freq.h
Normal file
9
src/x86/freq.h
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
#ifndef __FREQ__
|
||||||
|
#define __FREQ__
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "../common/cpu.h"
|
||||||
|
|
||||||
|
int64_t measure_avx_frequency(struct cpuInfo* cpu);
|
||||||
|
|
||||||
|
#endif
|
||||||
Reference in New Issue
Block a user