From 1ed3a0f2bf33ae3ccbf94fbdded88c92450d9132 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Thu, 11 Jul 2024 21:55:01 +0100 Subject: [PATCH] [v1.05] Adapt frequency measurement iterations depending on CPU speed This is achieved by running a first measurement which gets a taste of CPU speed, then estimate a reasonable value for the iterations of the real measurement and then running the actual measurement. This is very helpful to reduce the runtime of the measurement, especially for slow CPUs --- src/common/freq.c | 89 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 29 deletions(-) diff --git a/src/common/freq.c b/src/common/freq.c index 001b5b3..7f9b4b6 100644 --- a/src/common/freq.c +++ b/src/common/freq.c @@ -69,28 +69,15 @@ void nop_function(uint64_t iters) { } } -// Differences between x86 measure_frequency this measure_max_frequency: -// - measure_frequency employs all cores simultaneously wherease -// measure_max_frequency only employs 1. -// - measure_frequency runs the computation and checks /proc/cpuinfo whereas -// measure_max_frequency does not rely on /proc/cpuinfo and simply -// counts cpu cycles to measure frequency. -// - measure_frequency uses actual computation while measuring the frequency -// whereas measure_max_frequency uses nop instructions. This makes the former -// x86 dependant whereas the latter is architecture independant. -int64_t measure_max_frequency(uint32_t core) { - if (!bind_to_cpu(core)) { - printErr("Failed binding the process to CPU %d", core); - return -1; - } - +// Run the nop_function with the number of iterations specified and +// measure both the time and number of cycles +int measure_freq_iters(uint64_t iters, uint32_t core, double* freq) { clockid_t clock = CLOCK_PROCESS_CPUTIME_ID; - + struct timespec start, end; struct perf_event_attr pe; - uint64_t instructions; + uint64_t cycles; int fd; int pid = 0; - memset(&pe, 0, sizeof(struct perf_event_attr)); pe.type = PERF_TYPE_HARDWARE; pe.size = sizeof(struct perf_event_attr); @@ -109,12 +96,6 @@ int64_t measure_max_frequency(uint32_t core) { return -1; } - const char* frequency_banner = "cpufetch is measuring the max frequency..."; - printf("%s", frequency_banner); - fflush(stdout); - - uint64_t iters = 10000000; - struct timespec start, end; if (clock_gettime(clock, &start) == -1) { perror("clock_gettime"); return -1; @@ -130,10 +111,7 @@ int64_t measure_max_frequency(uint32_t core) { nop_function(iters); - // Clean screen once measurement is finished - printf("\r%*c\r", (int) strlen(frequency_banner), ' '); - - ssize_t ret = read(fd, &instructions, sizeof(uint64_t)); + ssize_t ret = read(fd, &cycles, sizeof(uint64_t)); if (ret == -1) { perror("read"); return -1; @@ -153,7 +131,60 @@ int64_t measure_max_frequency(uint32_t core) { uint64_t nsecs = (end.tv_sec*1e9 + end.tv_nsec) - (start.tv_sec*1e9 + start.tv_nsec); uint64_t usecs = nsecs/1000; - double frequency = instructions/((double)usecs); + *freq = cycles/((double)usecs); + return 0; +} + +// Return a good number of iterations to run the nop_function in +// order to get a precise measurement of the frequency without taking +// too much time. +uint64_t get_num_iters_from_freq(double frequency) { + // Truncate to reduce variability + uint64_t freq_trunc = ((uint64_t) frequency / 100) * 100; + uint64_t osp_per_iter = 4 * 1000; + + return freq_trunc * 1e7 * 1/osp_per_iter; +} + +// Differences between x86 measure_frequency and this measure_max_frequency: +// - measure_frequency employs all cores simultaneously whereas +// measure_max_frequency only employs 1. +// - measure_frequency runs the computation and checks /proc/cpuinfo whereas +// measure_max_frequency does not rely on /proc/cpuinfo and simply +// counts cpu cycles to measure frequency. +// - measure_frequency uses actual computation while measuring the frequency +// whereas measure_max_frequency uses nop instructions. This makes the former +// x86 dependant whereas the latter is architecture independant. +int64_t measure_max_frequency(uint32_t core) { + if (!bind_to_cpu(core)) { + printErr("Failed binding the process to CPU %d", core); + return -1; + } + + // First, get very rough estimation of clock cycle to + // compute a reasonable value for the iterations + double estimation_freq, frequency; + uint64_t iters = 100000; + if (measure_freq_iters(iters, core, &estimation_freq) == -1) + return -1; + + if (estimation_freq <= 0.0) { + printErr("First frequency measurement yielded an invalid value: %f", estimation_freq); + return -1; + } + iters = get_num_iters_from_freq(estimation_freq); + printWarn("Running frequency measurement with %ld iterations on core %d...", iters, core); + + // Now perform actual measurement + const char* frequency_banner = "cpufetch is measuring the max frequency..."; + printf("%s", frequency_banner); + fflush(stdout); + + if (measure_freq_iters(iters, core, &frequency) == -1) + return -1; + + // Clean screen once measurement is finished + printf("\r%*c\r", (int) strlen(frequency_banner), ' '); // Discard last digit in the frequency, which should help providing // more reliable and predictable values.