From 2cdc31392a17cdfefb690b91389d747ef9591016 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Thu, 16 Sep 2021 15:12:34 +0200 Subject: [PATCH] [v1.00] Add --accurate-pp option (Linux x86_64) only. Needs more work to make it stable (add support for non-AVX, AVX512, etc) --- Makefile | 17 +++-- src/common/args.c | 24 ++++++- src/common/args.h | 2 + src/common/main.c | 4 ++ src/x86/cpuid.c | 16 ++++- src/x86/freq.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++ src/x86/freq.h | 9 +++ 7 files changed, 222 insertions(+), 8 deletions(-) create mode 100644 src/x86/freq.c create mode 100644 src/x86/freq.h diff --git a/Makefile b/Makefile index c03cd5a..0c806af 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,12 @@ ifneq ($(OS),Windows_NT) SRC_DIR=src/x86/ SOURCE += $(COMMON_SRC) $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)uarch.c HEADERS += $(COMMON_HDR) $(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)uarch.h + + os := $(shell uname -s) + ifeq ($(os), Linux) + SOURCE += freq.o + CFLAGS += -pthread + endif CFLAGS += -DARCH_X86 -std=c99 -fstack-protector-all else ifeq ($(arch), $(filter $(arch), ppc64le ppc64 ppcle ppc)) SRC_DIR=src/ppc/ @@ -51,18 +57,21 @@ else OUTPUT=cpufetch.exe endif -all: CFLAGS += -O3 +all: CFLAGS += -O2 all: $(OUTPUT) debug: CFLAGS += -g -O0 debug: $(OUTPUT) -static: CFLAGS += -static -O3 +static: CFLAGS += -static -O2 static: $(OUTPUT) -strict: CFLAGS += -O3 -Werror -fsanitize=undefined -D_FORTIFY_SOURCE=2 +strict: CFLAGS += -O2 -Werror -fsanitize=undefined -D_FORTIFY_SOURCE=2 strict: $(OUTPUT) +freq.o: Makefile $(SRC_DIR)freq.c $(SRC_DIR)freq.h + $(CC) $(CFLAGS) $(SANITY_FLAGS) -c -mavx -mfma -pthread $(SRC_DIR)freq.c -o freq.o + $(OUTPUT): Makefile $(SOURCE) $(HEADERS) $(CC) $(CFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT) @@ -70,7 +79,7 @@ run: $(OUTPUT) ./$(OUTPUT) clean: - @rm -f $(OUTPUT) + @rm -f $(OUTPUT) freq.o install: $(OUTPUT) install -Dm755 "cpufetch" "$(DESTDIR)$(PREFIX)/bin/cpufetch" diff --git a/src/common/args.c b/src/common/args.c index 616cd39..475855e 100644 --- a/src/common/args.c +++ b/src/common/args.c @@ -25,6 +25,7 @@ struct args_struct { bool debug_flag; bool help_flag; bool raw_flag; + bool accurate_pp; bool full_cpu_name_flag; bool logo_long; bool logo_short; @@ -46,6 +47,7 @@ const char args_chr[] = { /* [ARG_LOGO_SHORT] = */ 2, /* [ARG_LOGO_INTEL_NEW] = */ 3, /* [ARG_LOGO_INTEL_OLD] = */ 4, + /* [ARG_ACCURATE_PP] = */ 5, /* [ARG_DEBUG] = */ 'd', /* [ARG_VERBOSE] = */ 'v', /* [ARG_VERSION] = */ 'V', @@ -61,6 +63,7 @@ const char *args_str[] = { /* [ARG_LOGO_SHORT] = */ "logo-short", /* [ARG_LOGO_INTEL_NEW] = */ "logo-intel-new", /* [ARG_LOGO_INTEL_OLD] = */ "logo-intel-old", + /* [ARG_ACCURATE_PP] = */ "accurate-pp", /* [ARG_DEBUG] = */ "debug", /* [ARG_VERBOSE] = */ "verbose", /* [ARG_VERSION] = */ "version", @@ -92,6 +95,10 @@ bool show_raw() { return args.raw_flag; } +bool accurate_pp() { + return args.accurate_pp; +} + bool show_full_cpu_name() { return args.full_cpu_name_flag; } @@ -211,12 +218,13 @@ char* build_short_options() { memset(str, 0, sizeof(char) * (len*2 + 1)); #ifdef ARCH_X86 - sprintf(str, "%c:%c:%c%c%c%c%c%c%c%c%c%c", + sprintf(str, "%c:%c:%c%c%c%c%c%c%c%c%c%c%c", c[ARG_STYLE], c[ARG_COLOR], c[ARG_HELP], c[ARG_RAW], c[ARG_FULLCPUNAME], c[ARG_LOGO_SHORT], c[ARG_LOGO_LONG], c[ARG_LOGO_INTEL_NEW], c[ARG_LOGO_INTEL_OLD], - c[ARG_DEBUG], c[ARG_VERBOSE], c[ARG_VERSION]); + c[ARG_ACCURATE_PP], c[ARG_DEBUG], c[ARG_VERBOSE], + c[ARG_VERSION]); #else sprintf(str, "%c:%c:%c%c%c%c%c%c", c[ARG_STYLE], c[ARG_COLOR], c[ARG_HELP], @@ -235,6 +243,7 @@ bool parse_args(int argc, char* argv[]) { bool color_flag = false; args.debug_flag = false; + args.accurate_pp = false; args.full_cpu_name_flag = false; args.raw_flag = false; args.verbose_flag = false; @@ -256,6 +265,7 @@ bool parse_args(int argc, char* argv[]) { #ifdef ARCH_X86 {args_str[ARG_LOGO_INTEL_NEW], no_argument, 0, args_chr[ARG_LOGO_INTEL_NEW] }, {args_str[ARG_LOGO_INTEL_OLD], no_argument, 0, args_chr[ARG_LOGO_INTEL_OLD] }, + {args_str[ARG_ACCURATE_PP], no_argument, 0, args_chr[ARG_ACCURATE_PP] }, {args_str[ARG_FULLCPUNAME], no_argument, 0, args_chr[ARG_FULLCPUNAME] }, {args_str[ARG_RAW], no_argument, 0, args_chr[ARG_RAW] }, #endif @@ -296,6 +306,9 @@ bool parse_args(int argc, char* argv[]) { else if(opt == args_chr[ARG_HELP]) { args.help_flag = true; } + else if(opt == args_chr[ARG_ACCURATE_PP]) { + args.accurate_pp = true; + } else if(opt == args_chr[ARG_FULLCPUNAME]) { args.full_cpu_name_flag = true; } @@ -349,6 +362,13 @@ bool parse_args(int argc, char* argv[]) { args.logo_long = false; } +#if defined(ARCH_X86) && ! defined(__linux__) + if(args.accurate_pp) { + printWarn("%s option is valid only in Linux x86_64", args_str[ARG_ACCURATE_PP]); + args.help_flag = true; + } +#endif + // Leave log level untouched after returning set_log_level(false); diff --git a/src/common/args.h b/src/common/args.h index 9945a88..7e7c3a9 100644 --- a/src/common/args.h +++ b/src/common/args.h @@ -28,6 +28,7 @@ enum { ARG_LOGO_SHORT, ARG_LOGO_INTEL_NEW, ARG_LOGO_INTEL_OLD, + ARG_ACCURATE_PP, ARG_DEBUG, ARG_VERBOSE, ARG_VERSION @@ -41,6 +42,7 @@ extern const char *args_str[]; int max_arg_str_length(); bool parse_args(int argc, char* argv[]); bool show_help(); +bool accurate_pp(); bool show_full_cpu_name(); bool show_logo_long(); bool show_logo_short(); diff --git a/src/common/main.c b/src/common/main.c index 3e4a50d..c6706c5 100644 --- a/src/common/main.c +++ b/src/common/main.c @@ -57,6 +57,9 @@ void print_help(char *argv[]) { printf(" --%s %*s Show the long version of the logo\n", t[ARG_LOGO_LONG], (int) (max_len-strlen(t[ARG_LOGO_LONG])), ""); printf(" -%c, --%s %*s Print extra information (if available) about how cpufetch tried fetching information\n", c[ARG_VERBOSE], t[ARG_VERBOSE], (int) (max_len-strlen(t[ARG_VERBOSE])), ""); #ifdef ARCH_X86 +#ifdef __linux__ + printf(" --%s %*s Compute the peak performance accurately (measure the CPU frequency instead of using the maximum)\n", t[ARG_ACCURATE_PP], (int) (max_len-strlen(t[ARG_ACCURATE_PP])), ""); +#endif printf(" --%s %*s Show the old Intel logo\n", t[ARG_LOGO_INTEL_OLD], (int) (max_len-strlen(t[ARG_LOGO_INTEL_OLD])), ""); printf(" --%s %*s Show the new Intel logo\n", t[ARG_LOGO_INTEL_NEW], (int) (max_len-strlen(t[ARG_LOGO_INTEL_NEW])), ""); printf(" -%c, --%s %*s Show the full CPU name (do not abbreviate it)\n", c[ARG_FULLCPUNAME], t[ARG_FULLCPUNAME], (int) (max_len-strlen(t[ARG_FULLCPUNAME])), ""); @@ -133,6 +136,7 @@ int main(int argc, char* argv[]) { return EXIT_SUCCESS; } + // TODO: This should be moved to the end of args.c if(show_raw()) { #ifdef ARCH_X86 print_version(); diff --git a/src/x86/cpuid.c b/src/x86/cpuid.c index 5dfa96d..0bedc2a 100644 --- a/src/x86/cpuid.c +++ b/src/x86/cpuid.c @@ -15,8 +15,10 @@ #include "cpuid.h" #include "cpuid_asm.h" #include "../common/global.h" +#include "../common/args.h" #include "apic.h" #include "uarch.h" +#include "freq.h" #define CPU_VENDOR_INTEL_STRING "GenuineIntel" #define CPU_VENDOR_AMD_STRING "AuthenticAMD" @@ -177,7 +179,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) { return get_uarch_from_cpuid(cpu, efamily, family, emodel, model, (int)stepping); } -int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) { +int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t max_freq, bool accurate_pp) { /* * PP = PeakPerformance * SP = SinglePrecision @@ -190,6 +192,16 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t * 16(If AVX512), 8(If AVX), 4(If SSE) * */ + int64_t freq; +#ifdef __linux__ + if(accurate_pp) freq = measure_avx_frequency(cpu); + else freq = max_freq; +#else + // Silence compiler warning + (void)(accurate_pp); + freq = max_freq; +#endif + //First, check we have consistent data if(freq == UNKNOWN_FREQ) { return -1; @@ -376,7 +388,7 @@ struct cpuInfo* get_cpu_info() { cpu->freq = get_frequency_info(cpu); cpu->cach = get_cache_info(cpu); cpu->topo = get_topology_info(cpu, cpu->cach); - cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq)); + cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp()); if(cpu->cach == NULL || cpu->topo == NULL) { return NULL; diff --git a/src/x86/freq.c b/src/x86/freq.c new file mode 100644 index 0000000..d103426 --- /dev/null +++ b/src/x86/freq.c @@ -0,0 +1,158 @@ +#define _GNU_SOURCE +#include + +#include "freq.h" +#include "../common/global.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#define MEASURE_TIME_SECONDS 5 +#define MAX_NUMBER_THREADS 512 +#define FREQ_VECTOR_SIZE 1<<16 +#define LOOP_ITERS 100000000 + +struct freq_thread { + bool end; + bool measure; + double freq; +}; + +double vector_average_harmonic(double* v, int len) { + double acc = 0.0; + for(int i=0; i < len; i++) { + acc += 1 / v[i]; + } + return len / acc; +} + +void sleep_ms(int64_t ms) { + struct timespec ts; + ts.tv_sec = ms / 1000; + ts.tv_nsec = (ms % 1000) * 1000000; + nanosleep(&ts, &ts); +} + +void* measure_freq(void *freq_ptr) { + struct freq_thread* freq = (struct freq_thread*) freq_ptr; + + char* end = NULL; + char* line = NULL; + size_t len = 0; + ssize_t read; + + int v = 0; + double* freq_vector = malloc(sizeof(double) * FREQ_VECTOR_SIZE); + + while(!freq->end) { + if(!freq->measure) continue; + + FILE* fp = fopen("/proc/cpuinfo", "r"); + if(fp == NULL) return NULL; + while ((read = getline(&line, &len, fp)) != -1) { + if((line = strstr(line, "cpu MHz")) != NULL) { + line = strstr(line, "\t: "); + if(line == NULL) return NULL; + line += sizeof("\t: ") - 1; + double f = strtold(line, &end); + if(errno != 0) { + printf("strtol: %s", strerror(errno)); + return NULL; + } + + freq_vector[v] = f; + v++; + } + } + fclose(fp); + sleep_ms(500); + } + + freq->freq = vector_average_harmonic(freq_vector, v); + printWarn("AVX2 measured freq=%f\n", freq->freq); + + return NULL; +} + +void* compute_avx() { + bool end = false; + + struct timeval begin, now; + + __m256 a = _mm256_set1_ps(1.5); + __m256 b = _mm256_set1_ps(1.2); + __m256 c = _mm256_set1_ps(0.0); + + gettimeofday(&begin, NULL); + while(!end) { + for(uint64_t i=0; i < LOOP_ITERS; i++) { + c = _mm256_fmadd_ps(a, b, c); + } + + gettimeofday(&now, NULL); + double elapsed = (now.tv_sec - begin.tv_sec) + ((now.tv_usec - begin.tv_usec)/1000000.0); + end = elapsed >= (double) MEASURE_TIME_SECONDS; + } + + FILE* fp = fopen("/dev/null", "w"); + if(fp == NULL) { + printf("fopen: %s", strerror(errno)); + } + else { + fprintf(fp, "%f", c[0]); + fclose(fp); + } + + return NULL; +} + +int64_t measure_avx_frequency(struct cpuInfo* cpu) { + int ret; + struct freq_thread* freq_struct = malloc(sizeof(struct freq_thread)); + freq_struct->end = false; + freq_struct->measure = false; + + pthread_t freq_t; + if(pthread_create(&freq_t, NULL, measure_freq, freq_struct)) { + fprintf(stderr, "Error creating thread\n"); + return -1; + } + + pthread_t* compute_th = malloc(sizeof(pthread_t) * cpu->topo->total_cores); + for(int i=0; i < cpu->topo->total_cores; i++) { + ret = pthread_create(&compute_th[i], NULL, compute_avx, NULL); + + if(ret != 0) { + fprintf(stderr, "Error creating thread\n"); + return -1; + } + } + + printf("cpufetch is measuring AVX2 frequency..."); + fflush(stdout); + + sleep_ms(500); + freq_struct->measure = true; + + for(int i=0; i < cpu->topo->total_cores; i++) { + if(pthread_join(compute_th[i], NULL)) { + fprintf(stderr, "Error joining thread\n"); + return -1; + } + freq_struct->end = true; + } + + if(pthread_join(freq_t, NULL)) { + fprintf(stderr, "Error joining thread\n"); + return -1; + } + + printf("\r "); + return freq_struct->freq; +} diff --git a/src/x86/freq.h b/src/x86/freq.h new file mode 100644 index 0000000..a9b38b6 --- /dev/null +++ b/src/x86/freq.h @@ -0,0 +1,9 @@ +#ifndef __FREQ__ +#define __FREQ__ + +#include +#include "../common/cpu.h" + +int64_t measure_avx_frequency(struct cpuInfo* cpu); + +#endif