Compare commits

...

11 Commits

12 changed files with 266 additions and 53 deletions

View File

@@ -7,8 +7,8 @@ PREFIX ?= /usr
SRC_COMMON=src/common/ SRC_COMMON=src/common/
COMMON_SRC = $(SRC_COMMON)main.c $(SRC_COMMON)cpu.c $(SRC_COMMON)udev.c $(SRC_COMMON)printer.c $(SRC_COMMON)args.c $(SRC_COMMON)global.c COMMON_SRC = $(SRC_COMMON)main.c $(SRC_COMMON)cpu.c $(SRC_COMMON)udev.c $(SRC_COMMON)printer.c $(SRC_COMMON)args.c $(SRC_COMMON)global.c $(SRC_COMMON)freq.c
COMMON_HDR = $(SRC_COMMON)ascii.h $(SRC_COMMON)cpu.h $(SRC_COMMON)udev.h $(SRC_COMMON)printer.h $(SRC_COMMON)args.h $(SRC_COMMON)global.h COMMON_HDR = $(SRC_COMMON)ascii.h $(SRC_COMMON)cpu.h $(SRC_COMMON)udev.h $(SRC_COMMON)printer.h $(SRC_COMMON)args.h $(SRC_COMMON)global.h $(SRC_COMMON)freq.h
ifneq ($(OS),Windows_NT) ifneq ($(OS),Windows_NT)
GIT_VERSION := "$(shell git describe --abbrev=4 --dirty --always --tags)" GIT_VERSION := "$(shell git describe --abbrev=4 --dirty --always --tags)"

View File

@@ -14,6 +14,7 @@
#include "../common/global.h" #include "../common/global.h"
#include "../common/soc.h" #include "../common/soc.h"
#include "../common/freq.h"
#include "udev.h" #include "udev.h"
#include "midr.h" #include "midr.h"
#include "uarch.h" #include "uarch.h"
@@ -41,6 +42,12 @@ struct frequency* get_frequency_info(uint32_t core) {
freq->base = UNKNOWN_DATA; freq->base = UNKNOWN_DATA;
freq->max = get_max_freq_from_file(core); freq->max = get_max_freq_from_file(core);
#ifdef __linux__
if (freq->max == UNKNOWN_DATA) {
printWarn("Unable to find max frequency from udev, measuring CPU frequency");
freq->max = measure_max_frequency(core);
}
#endif
return freq; return freq;
} }

154
src/common/freq.c Normal file
View File

@@ -0,0 +1,154 @@
#ifdef __linux__
#define _GNU_SOURCE
#include <time.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <asm/unistd.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include "global.h"
static long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags) {
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
#define INSERT_ASM_ONCE __asm volatile("nop");
#define INSERT_ASM_10_TIMES \
INSERT_ASM_ONCE \
INSERT_ASM_ONCE \
INSERT_ASM_ONCE \
INSERT_ASM_ONCE \
INSERT_ASM_ONCE \
INSERT_ASM_ONCE \
INSERT_ASM_ONCE \
INSERT_ASM_ONCE \
INSERT_ASM_ONCE \
INSERT_ASM_ONCE \
#define INSERT_ASM_100_TIMES \
INSERT_ASM_10_TIMES \
INSERT_ASM_10_TIMES \
INSERT_ASM_10_TIMES \
INSERT_ASM_10_TIMES \
INSERT_ASM_10_TIMES \
INSERT_ASM_10_TIMES \
INSERT_ASM_10_TIMES \
INSERT_ASM_10_TIMES \
INSERT_ASM_10_TIMES \
INSERT_ASM_10_TIMES
#define INSERT_ASM_1000_TIMES \
INSERT_ASM_100_TIMES \
INSERT_ASM_100_TIMES \
INSERT_ASM_100_TIMES \
INSERT_ASM_100_TIMES \
INSERT_ASM_100_TIMES \
INSERT_ASM_100_TIMES \
INSERT_ASM_100_TIMES \
INSERT_ASM_100_TIMES \
INSERT_ASM_100_TIMES \
INSERT_ASM_100_TIMES \
void nop_function(uint64_t iters) {
for (uint64_t i = 0; i < iters; i++) {
INSERT_ASM_1000_TIMES
INSERT_ASM_1000_TIMES
INSERT_ASM_1000_TIMES
INSERT_ASM_1000_TIMES
}
}
// Differences between x86 measure_frequency this measure_max_frequency:
// - measure_frequency employs all cores simultaneously wherease
// measure_max_frequency only employs 1.
// - measure_frequency runs the computation and checks /proc/cpuinfo whereas
// measure_max_frequency does not rely on /proc/cpuinfo and simply
// counts cpu cycles to measure frequency.
// - measure_frequency uses actual computation while measuring the frequency
// whereas measure_max_frequency uses nop instructions. This makes the former
// x86 dependant whereas the latter is architecture independant.
int64_t measure_max_frequency(uint32_t core) {
if (!bind_to_cpu(core)) {
printErr("Failed binding the process to CPU %d", core);
return -1;
}
clockid_t clock = CLOCK_PROCESS_CPUTIME_ID;
struct perf_event_attr pe;
uint64_t instructions;
int fd;
int pid = 0;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_CPU_CYCLES;
pe.disabled = 1;
pe.exclude_kernel = 1;
pe.exclude_hv = 1;
fd = perf_event_open(&pe, pid, core, -1, 0);
if (fd == -1) {
perror("perf_event_open");
if (errno == EPERM || errno == EACCES) {
printf("You may not have permission to collect stats.\n");
printf("Consider tweaking /proc/sys/kernel/perf_event_paranoid or running as root.\n");
}
return -1;
}
const char* frequency_banner = "cpufetch is measuring the max frequency...";
printf("%s", frequency_banner);
fflush(stdout);
uint64_t iters = 10000000;
struct timespec start, end;
if (clock_gettime(clock, &start) == -1) {
perror("clock_gettime");
return -1;
}
if(ioctl(fd, PERF_EVENT_IOC_RESET, 0) == -1) {
perror("ioctl");
return -1;
}
if(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) == -1) {
perror("ioctl");
return -1;
}
nop_function(iters);
read(fd, &instructions, sizeof(uint64_t));
if(ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) == -1) {
perror("ioctl");
return -1;
}
if (clock_gettime(clock, &end) == -1) {
perror("clock_gettime");
return -1;
}
uint64_t nsecs = (end.tv_sec*1e9 + end.tv_nsec) - (start.tv_sec*1e9 + start.tv_nsec);
uint64_t usecs = nsecs/1000;
double frequency = instructions/((double)usecs);
printf("\r%*c\r", (int) strlen(frequency_banner), ' ');
// Discard last digit in the frequency, which should help providing
// more reliable and predictable values.
return (((int) frequency + 5)/10) * 10;
}
#endif // #ifdef __linux__

6
src/common/freq.h Normal file
View File

@@ -0,0 +1,6 @@
#ifndef __COMMON_FREQ__
#define __COMMON_FREQ__
int64_t measure_max_frequency(uint32_t core);
#endif

View File

@@ -1,3 +1,14 @@
#ifdef _WIN32
#define NOMINMAX
#include <windows.h>
#elif defined __linux__
#define _GNU_SOURCE
#include <sched.h>
#elif defined __FreeBSD__
#include <sys/param.h>
#include <sys/cpuset.h>
#endif
#include <stdarg.h> #include <stdarg.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@@ -199,6 +210,34 @@ void* erealloc(void *ptr, size_t size) {
return newptr; return newptr;
} }
#ifndef __APPLE__
bool bind_to_cpu(int cpu_id) {
#ifdef _WIN32
HANDLE process = GetCurrentProcess();
DWORD_PTR processAffinityMask = 1 << cpu_id;
return SetProcessAffinityMask(process, processAffinityMask);
#elif defined __linux__
cpu_set_t currentCPU;
CPU_ZERO(&currentCPU);
CPU_SET(cpu_id, &currentCPU);
if (sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == -1) {
printWarn("sched_setaffinity: %s", strerror(errno));
return false;
}
return true;
#elif defined __FreeBSD__
cpuset_t currentCPU;
CPU_ZERO(&currentCPU);
CPU_SET(cpu_id, &currentCPU);
if(cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(cpuset_t), &currentCPU) == -1) {
printWarn("cpuset_setaffinity: %s", strerror(errno));
return false;
}
return true;
#endif
}
#endif
void print_version(FILE *restrict stream) { void print_version(FILE *restrict stream) {
#ifdef GIT_FULL_VERSION #ifdef GIT_FULL_VERSION
fprintf(stream, "cpufetch %s (%s %s)\n", GIT_FULL_VERSION, OS_STR, ARCH_STR); fprintf(stream, "cpufetch %s (%s %s)\n", GIT_FULL_VERSION, OS_STR, ARCH_STR);

View File

@@ -19,6 +19,9 @@ char *strremove(char *str, const char *sub);
void* emalloc(size_t size); void* emalloc(size_t size);
void* ecalloc(size_t nmemb, size_t size); void* ecalloc(size_t nmemb, size_t size);
void* erealloc(void *ptr, size_t size); void* erealloc(void *ptr, size_t size);
#ifndef __APPLE__
bool bind_to_cpu(int cpu_id);
#endif
void print_version(FILE *restrict stream); void print_version(FILE *restrict stream);
#endif #endif

View File

@@ -61,6 +61,7 @@ enum {
ATTRIBUTE_NCORES, ATTRIBUTE_NCORES,
ATTRIBUTE_NCORES_DUAL, ATTRIBUTE_NCORES_DUAL,
#ifdef ARCH_X86 #ifdef ARCH_X86
ATTRIBUTE_SSE,
ATTRIBUTE_AVX, ATTRIBUTE_AVX,
ATTRIBUTE_FMA, ATTRIBUTE_FMA,
#elif ARCH_PPC #elif ARCH_PPC
@@ -96,6 +97,7 @@ static const char* ATTRIBUTE_FIELDS [] = {
"Cores:", "Cores:",
"Cores (Total):", "Cores (Total):",
#ifdef ARCH_X86 #ifdef ARCH_X86
"SSE:",
"AVX:", "AVX:",
"FMA:", "FMA:",
#elif ARCH_PPC #elif ARCH_PPC
@@ -131,6 +133,7 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
"Cores:", "Cores:",
"Cores (Total):", "Cores (Total):",
#ifdef ARCH_X86 #ifdef ARCH_X86
"SSE:",
"AVX:", "AVX:",
"FMA:", "FMA:",
#elif ARCH_PPC #elif ARCH_PPC
@@ -589,6 +592,7 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
char* max_frequency = get_str_freq(ptr->freq); char* max_frequency = get_str_freq(ptr->freq);
char* avx = get_str_avx(ptr); char* avx = get_str_avx(ptr);
char* sse = get_str_sse(ptr);
char* fma = get_str_fma(ptr); char* fma = get_str_fma(ptr);
char* cpu_num = emalloc(sizeof(char) * 9); char* cpu_num = emalloc(sizeof(char) * 9);
@@ -623,8 +627,17 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
setAttribute(art, ATTRIBUTE_NCORES, n_cores); setAttribute(art, ATTRIBUTE_NCORES, n_cores);
} }
} }
// Show the most modern vector instructions.
// If AVX is supported show it, otherwise show SSE
if (strcmp(avx, "No") == 0) {
setAttribute(art, ATTRIBUTE_SSE, sse);
}
else {
setAttribute(art, ATTRIBUTE_AVX, avx); setAttribute(art, ATTRIBUTE_AVX, avx);
setAttribute(art, ATTRIBUTE_FMA, fma); setAttribute(art, ATTRIBUTE_FMA, fma);
}
if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i); if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d); if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2); if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);

View File

@@ -72,34 +72,6 @@ uint32_t get_apic_id(bool x2apic_id) {
} }
} }
#ifndef __APPLE__
bool bind_to_cpu(int cpu_id) {
#ifdef _WIN32
HANDLE process = GetCurrentProcess();
DWORD_PTR processAffinityMask = 1 << cpu_id;
return SetProcessAffinityMask(process, processAffinityMask);
#elif defined __linux__
cpu_set_t currentCPU;
CPU_ZERO(&currentCPU);
CPU_SET(cpu_id, &currentCPU);
if (sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == -1) {
printWarn("sched_setaffinity: %s", strerror(errno));
return false;
}
return true;
#elif defined __FreeBSD__
cpuset_t currentCPU;
CPU_ZERO(&currentCPU);
CPU_SET(cpu_id, &currentCPU);
if(cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(cpuset_t), &currentCPU) == -1) {
printWarn("cpuset_setaffinity: %s", strerror(errno));
return false;
}
return true;
#endif
}
#endif
#ifdef __linux__ #ifdef __linux__
int get_total_cores_module(int total_cores, int module) { int get_total_cores_module(int total_cores, int module) {
int total_modules = 2; int total_modules = 2;
@@ -397,6 +369,11 @@ bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) {
} }
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) { bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
if (topo->cach == NULL) {
printWarn("get_topology_from_apic: cach is NULL");
return false;
}
uint32_t apic_id; uint32_t apic_id;
uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module); uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module);
uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module); uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module);

View File

@@ -17,10 +17,6 @@ struct apic {
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo); bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo);
uint32_t is_smt_enabled_amd(struct topology* topo); uint32_t is_smt_enabled_amd(struct topology* topo);
#ifndef __APPLE__
bool bind_to_cpu(int cpu_id);
#endif
#ifdef __linux__ #ifdef __linux__
int get_total_cores_module(int total_cores, int module); int get_total_cores_module(int total_cores, int module);
#endif #endif

View File

@@ -6,6 +6,10 @@
#include <unistd.h> #include <unistd.h>
#endif #endif
#ifdef __linux__
#include "../common/freq.h"
#endif
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
@@ -216,7 +220,7 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
#endif #endif
//First, check we have consistent data //First, check we have consistent data
if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) { if(freq == UNKNOWN_DATA || topo == NULL || topo->logical_cores == UNKNOWN_DATA) {
return -1; return -1;
} }
@@ -449,7 +453,7 @@ struct cpuInfo* get_cpu_info(void) {
cpu->cach = NULL; cpu->cach = NULL;
cpu->feat = NULL; cpu->feat = NULL;
uint32_t modules = 1; cpu->num_cpus = 1;
uint32_t eax = 0; uint32_t eax = 0;
uint32_t ebx = 0; uint32_t ebx = 0;
uint32_t ecx = 0; uint32_t ecx = 0;
@@ -505,12 +509,12 @@ struct cpuInfo* get_cpu_info(void) {
cpu->hybrid_flag = (edx >> 15) & 0x1; cpu->hybrid_flag = (edx >> 15) & 0x1;
} }
if(cpu->hybrid_flag) modules = 2; if(cpu->hybrid_flag) cpu->num_cpus = 2;
struct cpuInfo* ptr = cpu; struct cpuInfo* ptr = cpu;
for(uint32_t i=0; i < modules; i++) { for(uint32_t i=0; i < cpu->num_cpus; i++) {
int32_t first_core; int32_t first_core;
set_cpu_module(i, modules, &first_core); set_cpu_module(i, cpu->num_cpus, &first_core);
if(i > 0) { if(i > 0) {
ptr->next_cpu = emalloc(sizeof(struct cpuInfo)); ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
@@ -545,11 +549,7 @@ struct cpuInfo* get_cpu_info(void) {
cpu->cpu_name = infer_cpu_name_from_uarch(cpu->arch); cpu->cpu_name = infer_cpu_name_from_uarch(cpu->arch);
} }
// If any field of the struct is NULL,
// return early, as next functions
// require non NULL fields in cach and topo
ptr->cach = get_cache_info(ptr); ptr->cach = get_cache_info(ptr);
if(ptr->cach == NULL) return cpu;
if(cpu->hybrid_flag) { if(cpu->hybrid_flag) {
ptr->topo = get_topology_info(ptr, ptr->cach, i); ptr->topo = get_topology_info(ptr, ptr->cach, i);
@@ -557,16 +557,23 @@ struct cpuInfo* get_cpu_info(void) {
else { else {
ptr->topo = get_topology_info(ptr, ptr->cach, -1); ptr->topo = get_topology_info(ptr, ptr->cach, -1);
} }
if(cpu->topo == NULL) return cpu;
// If topo is NULL, return early, as get_peak_performance
// requries non-NULL topology.
if(ptr->topo == NULL) return cpu;
} }
cpu->num_cpus = modules;
cpu->peak_performance = get_peak_performance(cpu, accurate_pp()); cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
return cpu; return cpu;
} }
bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) { bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
if (topo->cach == NULL) {
printWarn("get_cache_topology_amd: cach is NULL");
return false;
}
if(cpu->maxExtendedLevels >= 0x8000001D && cpu->topology_extensions) { if(cpu->maxExtendedLevels >= 0x8000001D && cpu->topology_extensions) {
uint32_t i, eax, ebx, ecx, edx, num_sharing_cache, cache_type, cache_level; uint32_t i, eax, ebx, ecx, edx, num_sharing_cache, cache_type, cache_level;
@@ -642,10 +649,12 @@ bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
#ifdef __linux__ #ifdef __linux__
void get_topology_from_udev(struct topology* topo) { void get_topology_from_udev(struct topology* topo) {
// TODO: To be improved in the future
topo->total_cores = get_ncores_from_cpuinfo(); topo->total_cores = get_ncores_from_cpuinfo();
topo->logical_cores = topo->total_cores; // TODO: To be improved in the future
topo->physical_cores = topo->total_cores; // Conservative setting as we only know the total
// number of cores.
topo->logical_cores = UNKNOWN_DATA;
topo->physical_cores = UNKNOWN_DATA;
topo->smt_available = 1; topo->smt_available = 1;
topo->smt_supported = 1; topo->smt_supported = 1;
topo->sockets = 1; topo->sockets = 1;
@@ -709,8 +718,8 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int
} }
else { else {
printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels); printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
topo->physical_cores = 1; topo->physical_cores = UNKNOWN_DATA;
topo->logical_cores = 1; topo->logical_cores = UNKNOWN_DATA;
topo->smt_available = 1; topo->smt_available = 1;
topo->smt_supported = 1; topo->smt_supported = 1;
} }
@@ -960,6 +969,14 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
} }
} }
#ifdef __linux__
if (freq->max == UNKNOWN_DATA) {
printWarn("All previous methods failed, measuring CPU frequency");
// TODO: Support hybrid architectures
freq->max = measure_max_frequency(0);
}
#endif
return freq; return freq;
} }

View File

@@ -9,5 +9,6 @@
#define LOOP_ITERS 100000000 #define LOOP_ITERS 100000000
int64_t measure_frequency(struct cpuInfo* cpu); int64_t measure_frequency(struct cpuInfo* cpu);
void nop_function_x86(uint64_t iters);
#endif #endif