mirror of
https://github.com/Dr-Noob/cpufetch.git
synced 2026-03-25 07:50:40 +01:00
Compare commits
20 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e5e317ea92 | ||
|
|
41a194948d | ||
|
|
5dc2234e97 | ||
|
|
21dddc63ff | ||
|
|
8fca4cb250 | ||
|
|
7c947bdf64 | ||
|
|
1ed3a0f2bf | ||
|
|
0fe6fc3f4d | ||
|
|
96c784026b | ||
|
|
59cd2dd128 | ||
|
|
da1981b97c | ||
|
|
8506c91e00 | ||
|
|
ece28cbdee | ||
|
|
7b46c78249 | ||
|
|
e0095c303d | ||
|
|
65378aaed9 | ||
|
|
946729dd06 | ||
|
|
9212f19de1 | ||
|
|
b019256515 | ||
|
|
d4cadbd807 |
13
Makefile
13
Makefile
@@ -13,12 +13,18 @@ COMMON_HDR = $(SRC_COMMON)ascii.h $(SRC_COMMON)cpu.h $(SRC_COMMON)udev.h $(SRC_C
|
|||||||
ifneq ($(OS),Windows_NT)
|
ifneq ($(OS),Windows_NT)
|
||||||
GIT_VERSION := "$(shell git describe --abbrev=4 --dirty --always --tags)"
|
GIT_VERSION := "$(shell git describe --abbrev=4 --dirty --always --tags)"
|
||||||
arch := $(shell uname -m)
|
arch := $(shell uname -m)
|
||||||
|
os := $(shell uname -s)
|
||||||
|
|
||||||
|
ifeq ($(os), Linux)
|
||||||
|
COMMON_SRC += $(SRC_COMMON)freq.c
|
||||||
|
COMMON_HDR += $(SRC_COMMON)freq.h
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(arch), $(filter $(arch), x86_64 amd64 i386 i486 i586 i686))
|
ifeq ($(arch), $(filter $(arch), x86_64 amd64 i386 i486 i586 i686))
|
||||||
SRC_DIR=src/x86/
|
SRC_DIR=src/x86/
|
||||||
SOURCE += $(COMMON_SRC) $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)uarch.c
|
SOURCE += $(COMMON_SRC) $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)uarch.c
|
||||||
HEADERS += $(COMMON_HDR) $(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)uarch.h $(SRC_DIR)freq/freq.h
|
HEADERS += $(COMMON_HDR) $(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)uarch.h $(SRC_DIR)freq/freq.h
|
||||||
|
|
||||||
os := $(shell uname -s)
|
|
||||||
ifeq ($(os), Linux)
|
ifeq ($(os), Linux)
|
||||||
SOURCE += $(SRC_DIR)freq/freq.c freq_nov.o freq_avx.o freq_avx512.o
|
SOURCE += $(SRC_DIR)freq/freq.c freq_nov.o freq_avx.o freq_avx512.o
|
||||||
HEADERS += $(SRC_DIR)freq/freq.h
|
HEADERS += $(SRC_DIR)freq/freq.h
|
||||||
@@ -36,10 +42,9 @@ ifneq ($(OS),Windows_NT)
|
|||||||
HEADERS += $(COMMON_HDR) $(SRC_DIR)midr.h $(SRC_DIR)uarch.h $(SRC_COMMON)soc.h $(SRC_DIR)soc.h $(SRC_COMMON)pci.h $(SRC_DIR)udev.c $(SRC_DIR)socs.h
|
HEADERS += $(COMMON_HDR) $(SRC_DIR)midr.h $(SRC_DIR)uarch.h $(SRC_COMMON)soc.h $(SRC_DIR)soc.h $(SRC_COMMON)pci.h $(SRC_DIR)udev.c $(SRC_DIR)socs.h
|
||||||
CFLAGS += -DARCH_ARM -Wno-unused-parameter -std=c99 -fstack-protector-all
|
CFLAGS += -DARCH_ARM -Wno-unused-parameter -std=c99 -fstack-protector-all
|
||||||
|
|
||||||
os := $(shell uname -s)
|
|
||||||
ifeq ($(os), Darwin)
|
ifeq ($(os), Darwin)
|
||||||
SOURCE += $(SRC_DIR)sysctl.c
|
SOURCE += $(SRC_COMMON)sysctl.c
|
||||||
HEADERS += $(SRC_DIR)sysctl.h
|
HEADERS += $(SRC_COMMON)sysctl.h
|
||||||
endif
|
endif
|
||||||
else ifeq ($(arch), $(filter $(arch), riscv64 riscv32))
|
else ifeq ($(arch), $(filter $(arch), riscv64 riscv32))
|
||||||
SRC_DIR=src/riscv/
|
SRC_DIR=src/riscv/
|
||||||
|
|||||||
@@ -8,12 +8,14 @@
|
|||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <sys/auxv.h>
|
#include <sys/auxv.h>
|
||||||
#include <asm/hwcap.h>
|
#include <asm/hwcap.h>
|
||||||
|
#include "../common/freq.h"
|
||||||
#elif defined __APPLE__ || __MACH__
|
#elif defined __APPLE__ || __MACH__
|
||||||
#include "sysctl.h"
|
#include "../common/sysctl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "../common/global.h"
|
#include "../common/global.h"
|
||||||
#include "../common/soc.h"
|
#include "../common/soc.h"
|
||||||
|
#include "../common/args.h"
|
||||||
#include "udev.h"
|
#include "udev.h"
|
||||||
#include "midr.h"
|
#include "midr.h"
|
||||||
#include "uarch.h"
|
#include "uarch.h"
|
||||||
@@ -39,8 +41,17 @@ struct cache* get_cache_info(struct cpuInfo* cpu) {
|
|||||||
struct frequency* get_frequency_info(uint32_t core) {
|
struct frequency* get_frequency_info(uint32_t core) {
|
||||||
struct frequency* freq = emalloc(sizeof(struct frequency));
|
struct frequency* freq = emalloc(sizeof(struct frequency));
|
||||||
|
|
||||||
|
freq->measured = false;
|
||||||
freq->base = UNKNOWN_DATA;
|
freq->base = UNKNOWN_DATA;
|
||||||
freq->max = get_max_freq_from_file(core);
|
freq->max = get_max_freq_from_file(core);
|
||||||
|
#ifdef __linux__
|
||||||
|
if (freq->max == UNKNOWN_DATA || measure_max_frequency_flag()) {
|
||||||
|
if (freq->max == UNKNOWN_DATA)
|
||||||
|
printWarn("Unable to find max frequency from udev, measuring CPU frequency");
|
||||||
|
freq->max = measure_max_frequency(core);
|
||||||
|
freq->measured = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return freq;
|
return freq;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,7 @@
|
|||||||
#include "../common/pci.h"
|
#include "../common/pci.h"
|
||||||
|
|
||||||
#if defined(__APPLE__) || defined(__MACH__)
|
#if defined(__APPLE__) || defined(__MACH__)
|
||||||
#include "sysctl.h"
|
#include "../common/sysctl.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define NA -1
|
#define NA -1
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ struct args_struct {
|
|||||||
bool help_flag;
|
bool help_flag;
|
||||||
bool raw_flag;
|
bool raw_flag;
|
||||||
bool accurate_pp;
|
bool accurate_pp;
|
||||||
|
bool measure_max_frequency_flag;
|
||||||
bool full_cpu_name_flag;
|
bool full_cpu_name_flag;
|
||||||
bool logo_long;
|
bool logo_long;
|
||||||
bool logo_short;
|
bool logo_short;
|
||||||
@@ -50,6 +51,7 @@ const char args_chr[] = {
|
|||||||
/* [ARG_LOGO_INTEL_NEW] = */ 3,
|
/* [ARG_LOGO_INTEL_NEW] = */ 3,
|
||||||
/* [ARG_LOGO_INTEL_OLD] = */ 4,
|
/* [ARG_LOGO_INTEL_OLD] = */ 4,
|
||||||
/* [ARG_ACCURATE_PP] = */ 5,
|
/* [ARG_ACCURATE_PP] = */ 5,
|
||||||
|
/* [ARG_MEASURE_MAX_FREQ] = */ 6,
|
||||||
/* [ARG_DEBUG] = */ 'd',
|
/* [ARG_DEBUG] = */ 'd',
|
||||||
/* [ARG_VERBOSE] = */ 'v',
|
/* [ARG_VERBOSE] = */ 'v',
|
||||||
/* [ARG_VERSION] = */ 'V',
|
/* [ARG_VERSION] = */ 'V',
|
||||||
@@ -66,6 +68,7 @@ const char *args_str[] = {
|
|||||||
/* [ARG_LOGO_INTEL_NEW] = */ "logo-intel-new",
|
/* [ARG_LOGO_INTEL_NEW] = */ "logo-intel-new",
|
||||||
/* [ARG_LOGO_INTEL_OLD] = */ "logo-intel-old",
|
/* [ARG_LOGO_INTEL_OLD] = */ "logo-intel-old",
|
||||||
/* [ARG_ACCURATE_PP] = */ "accurate-pp",
|
/* [ARG_ACCURATE_PP] = */ "accurate-pp",
|
||||||
|
/* [ARG_MEASURE_MAX_FREQ] = */ "measure-max-freq",
|
||||||
/* [ARG_DEBUG] = */ "debug",
|
/* [ARG_DEBUG] = */ "debug",
|
||||||
/* [ARG_VERBOSE] = */ "verbose",
|
/* [ARG_VERBOSE] = */ "verbose",
|
||||||
/* [ARG_VERSION] = */ "version",
|
/* [ARG_VERSION] = */ "version",
|
||||||
@@ -101,6 +104,10 @@ bool accurate_pp(void) {
|
|||||||
return args.accurate_pp;
|
return args.accurate_pp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool measure_max_frequency_flag(void) {
|
||||||
|
return args.measure_max_frequency_flag;
|
||||||
|
}
|
||||||
|
|
||||||
bool show_full_cpu_name(void) {
|
bool show_full_cpu_name(void) {
|
||||||
return args.full_cpu_name_flag;
|
return args.full_cpu_name_flag;
|
||||||
}
|
}
|
||||||
@@ -222,12 +229,20 @@ char* build_short_options(void) {
|
|||||||
memset(str, 0, sizeof(char) * (len*2 + 1));
|
memset(str, 0, sizeof(char) * (len*2 + 1));
|
||||||
|
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
sprintf(str, "%c:%c:%c%c%c%c%c%c%c%c%c%c%c",
|
sprintf(str, "%c:%c:%c%c%c%c%c%c%c%c%c%c%c%c",
|
||||||
c[ARG_STYLE], c[ARG_COLOR], c[ARG_HELP],
|
c[ARG_STYLE], c[ARG_COLOR], c[ARG_HELP],
|
||||||
c[ARG_RAW], c[ARG_FULLCPUNAME],
|
c[ARG_RAW], c[ARG_FULLCPUNAME],
|
||||||
c[ARG_LOGO_SHORT], c[ARG_LOGO_LONG],
|
c[ARG_LOGO_SHORT], c[ARG_LOGO_LONG],
|
||||||
c[ARG_LOGO_INTEL_NEW], c[ARG_LOGO_INTEL_OLD],
|
c[ARG_LOGO_INTEL_NEW], c[ARG_LOGO_INTEL_OLD],
|
||||||
c[ARG_ACCURATE_PP], c[ARG_DEBUG], c[ARG_VERBOSE],
|
c[ARG_ACCURATE_PP], c[ARG_MEASURE_MAX_FREQ],
|
||||||
|
c[ARG_DEBUG], c[ARG_VERBOSE],
|
||||||
|
c[ARG_VERSION]);
|
||||||
|
#elif ARCH_ARM
|
||||||
|
sprintf(str, "%c:%c:%c%c%c%c%c%c%c",
|
||||||
|
c[ARG_STYLE], c[ARG_COLOR], c[ARG_HELP],
|
||||||
|
c[ARG_LOGO_SHORT], c[ARG_LOGO_LONG],
|
||||||
|
c[ARG_MEASURE_MAX_FREQ],
|
||||||
|
c[ARG_DEBUG], c[ARG_VERBOSE],
|
||||||
c[ARG_VERSION]);
|
c[ARG_VERSION]);
|
||||||
#else
|
#else
|
||||||
sprintf(str, "%c:%c:%c%c%c%c%c%c",
|
sprintf(str, "%c:%c:%c%c%c%c%c%c",
|
||||||
@@ -270,8 +285,11 @@ bool parse_args(int argc, char* argv[]) {
|
|||||||
{args_str[ARG_LOGO_INTEL_NEW], no_argument, 0, args_chr[ARG_LOGO_INTEL_NEW] },
|
{args_str[ARG_LOGO_INTEL_NEW], no_argument, 0, args_chr[ARG_LOGO_INTEL_NEW] },
|
||||||
{args_str[ARG_LOGO_INTEL_OLD], no_argument, 0, args_chr[ARG_LOGO_INTEL_OLD] },
|
{args_str[ARG_LOGO_INTEL_OLD], no_argument, 0, args_chr[ARG_LOGO_INTEL_OLD] },
|
||||||
{args_str[ARG_ACCURATE_PP], no_argument, 0, args_chr[ARG_ACCURATE_PP] },
|
{args_str[ARG_ACCURATE_PP], no_argument, 0, args_chr[ARG_ACCURATE_PP] },
|
||||||
|
{args_str[ARG_MEASURE_MAX_FREQ], no_argument, 0, args_chr[ARG_MEASURE_MAX_FREQ] },
|
||||||
{args_str[ARG_FULLCPUNAME], no_argument, 0, args_chr[ARG_FULLCPUNAME] },
|
{args_str[ARG_FULLCPUNAME], no_argument, 0, args_chr[ARG_FULLCPUNAME] },
|
||||||
{args_str[ARG_RAW], no_argument, 0, args_chr[ARG_RAW] },
|
{args_str[ARG_RAW], no_argument, 0, args_chr[ARG_RAW] },
|
||||||
|
#elif ARCH_ARM
|
||||||
|
{args_str[ARG_MEASURE_MAX_FREQ], no_argument, 0, args_chr[ARG_MEASURE_MAX_FREQ] },
|
||||||
#endif
|
#endif
|
||||||
{args_str[ARG_LOGO_SHORT], no_argument, 0, args_chr[ARG_LOGO_SHORT] },
|
{args_str[ARG_LOGO_SHORT], no_argument, 0, args_chr[ARG_LOGO_SHORT] },
|
||||||
{args_str[ARG_LOGO_LONG], no_argument, 0, args_chr[ARG_LOGO_LONG] },
|
{args_str[ARG_LOGO_LONG], no_argument, 0, args_chr[ARG_LOGO_LONG] },
|
||||||
@@ -313,6 +331,9 @@ bool parse_args(int argc, char* argv[]) {
|
|||||||
else if(opt == args_chr[ARG_ACCURATE_PP]) {
|
else if(opt == args_chr[ARG_ACCURATE_PP]) {
|
||||||
args.accurate_pp = true;
|
args.accurate_pp = true;
|
||||||
}
|
}
|
||||||
|
else if(opt == args_chr[ARG_MEASURE_MAX_FREQ]) {
|
||||||
|
args.measure_max_frequency_flag = true;
|
||||||
|
}
|
||||||
else if(opt == args_chr[ARG_FULLCPUNAME]) {
|
else if(opt == args_chr[ARG_FULLCPUNAME]) {
|
||||||
args.full_cpu_name_flag = true;
|
args.full_cpu_name_flag = true;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ enum {
|
|||||||
ARG_LOGO_INTEL_NEW,
|
ARG_LOGO_INTEL_NEW,
|
||||||
ARG_LOGO_INTEL_OLD,
|
ARG_LOGO_INTEL_OLD,
|
||||||
ARG_ACCURATE_PP,
|
ARG_ACCURATE_PP,
|
||||||
|
ARG_MEASURE_MAX_FREQ,
|
||||||
ARG_DEBUG,
|
ARG_DEBUG,
|
||||||
ARG_VERBOSE,
|
ARG_VERBOSE,
|
||||||
ARG_VERSION
|
ARG_VERSION
|
||||||
@@ -43,6 +44,7 @@ int max_arg_str_length(void);
|
|||||||
bool parse_args(int argc, char* argv[]);
|
bool parse_args(int argc, char* argv[]);
|
||||||
bool show_help(void);
|
bool show_help(void);
|
||||||
bool accurate_pp(void);
|
bool accurate_pp(void);
|
||||||
|
bool measure_max_frequency_flag(void);
|
||||||
bool show_full_cpu_name(void);
|
bool show_full_cpu_name(void);
|
||||||
bool show_logo_long(void);
|
bool show_logo_long(void);
|
||||||
bool show_logo_short(void);
|
bool show_logo_short(void);
|
||||||
|
|||||||
@@ -145,17 +145,25 @@ char* get_str_l3(struct cache* cach) {
|
|||||||
|
|
||||||
char* get_str_freq(struct frequency* freq) {
|
char* get_str_freq(struct frequency* freq) {
|
||||||
//Max 3 digits and 3 for '(M/G)Hz' plus 1 for '\0'
|
//Max 3 digits and 3 for '(M/G)Hz' plus 1 for '\0'
|
||||||
uint32_t size = (5+1+3+1);
|
uint32_t size = (1+5+1+3+1);
|
||||||
assert(strlen(STRING_UNKNOWN)+1 <= size);
|
assert(strlen(STRING_UNKNOWN)+1 <= size);
|
||||||
char* string = emalloc(sizeof(char)*size);
|
char* string = ecalloc(size, sizeof(char));
|
||||||
memset(string, 0, sizeof(char)*size);
|
|
||||||
|
|
||||||
if(freq->max == UNKNOWN_DATA || freq->max < 0)
|
if(freq->max == UNKNOWN_DATA || freq->max < 0) {
|
||||||
snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN);
|
snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN);
|
||||||
else if(freq->max >= 1000)
|
}
|
||||||
|
else if(freq->max >= 1000) {
|
||||||
|
if (freq->measured)
|
||||||
|
snprintf(string,size,"~%.3f "STRING_GIGAHERZ,(float)(freq->max)/1000);
|
||||||
|
else
|
||||||
snprintf(string,size,"%.3f "STRING_GIGAHERZ,(float)(freq->max)/1000);
|
snprintf(string,size,"%.3f "STRING_GIGAHERZ,(float)(freq->max)/1000);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (freq->measured)
|
||||||
|
snprintf(string,size,"~%d "STRING_MEGAHERZ,freq->max);
|
||||||
else
|
else
|
||||||
snprintf(string,size,"%d "STRING_MEGAHERZ,freq->max);
|
snprintf(string,size,"%d "STRING_MEGAHERZ,freq->max);
|
||||||
|
}
|
||||||
|
|
||||||
return string;
|
return string;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -58,6 +58,8 @@ typedef int32_t VENDOR;
|
|||||||
struct frequency {
|
struct frequency {
|
||||||
int32_t base;
|
int32_t base;
|
||||||
int32_t max;
|
int32_t max;
|
||||||
|
// Indicates if max frequency was measured
|
||||||
|
bool measured;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct hypervisor {
|
struct hypervisor {
|
||||||
|
|||||||
195
src/common/freq.c
Normal file
195
src/common/freq.c
Normal file
@@ -0,0 +1,195 @@
|
|||||||
|
#ifdef __linux__
|
||||||
|
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
|
||||||
|
#include <time.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <asm/unistd.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
|
#include "global.h"
|
||||||
|
#include "cpu.h"
|
||||||
|
|
||||||
|
static long
|
||||||
|
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
||||||
|
int cpu, int group_fd, unsigned long flags) {
|
||||||
|
int ret;
|
||||||
|
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
|
||||||
|
group_fd, flags);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define INSERT_ASM_ONCE __asm volatile("nop");
|
||||||
|
#define INSERT_ASM_10_TIMES \
|
||||||
|
INSERT_ASM_ONCE \
|
||||||
|
INSERT_ASM_ONCE \
|
||||||
|
INSERT_ASM_ONCE \
|
||||||
|
INSERT_ASM_ONCE \
|
||||||
|
INSERT_ASM_ONCE \
|
||||||
|
INSERT_ASM_ONCE \
|
||||||
|
INSERT_ASM_ONCE \
|
||||||
|
INSERT_ASM_ONCE \
|
||||||
|
INSERT_ASM_ONCE \
|
||||||
|
INSERT_ASM_ONCE \
|
||||||
|
|
||||||
|
#define INSERT_ASM_100_TIMES \
|
||||||
|
INSERT_ASM_10_TIMES \
|
||||||
|
INSERT_ASM_10_TIMES \
|
||||||
|
INSERT_ASM_10_TIMES \
|
||||||
|
INSERT_ASM_10_TIMES \
|
||||||
|
INSERT_ASM_10_TIMES \
|
||||||
|
INSERT_ASM_10_TIMES \
|
||||||
|
INSERT_ASM_10_TIMES \
|
||||||
|
INSERT_ASM_10_TIMES \
|
||||||
|
INSERT_ASM_10_TIMES \
|
||||||
|
INSERT_ASM_10_TIMES
|
||||||
|
|
||||||
|
#define INSERT_ASM_1000_TIMES \
|
||||||
|
INSERT_ASM_100_TIMES \
|
||||||
|
INSERT_ASM_100_TIMES \
|
||||||
|
INSERT_ASM_100_TIMES \
|
||||||
|
INSERT_ASM_100_TIMES \
|
||||||
|
INSERT_ASM_100_TIMES \
|
||||||
|
INSERT_ASM_100_TIMES \
|
||||||
|
INSERT_ASM_100_TIMES \
|
||||||
|
INSERT_ASM_100_TIMES \
|
||||||
|
INSERT_ASM_100_TIMES \
|
||||||
|
INSERT_ASM_100_TIMES \
|
||||||
|
|
||||||
|
void nop_function(uint64_t iters) {
|
||||||
|
for (uint64_t i = 0; i < iters; i++) {
|
||||||
|
INSERT_ASM_1000_TIMES
|
||||||
|
INSERT_ASM_1000_TIMES
|
||||||
|
INSERT_ASM_1000_TIMES
|
||||||
|
INSERT_ASM_1000_TIMES
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the nop_function with the number of iterations specified and
|
||||||
|
// measure both the time and number of cycles
|
||||||
|
int measure_freq_iters(uint64_t iters, uint32_t core, double* freq) {
|
||||||
|
clockid_t clock = CLOCK_PROCESS_CPUTIME_ID;
|
||||||
|
struct timespec start, end;
|
||||||
|
struct perf_event_attr pe;
|
||||||
|
uint64_t cycles;
|
||||||
|
int fd;
|
||||||
|
int pid = 0;
|
||||||
|
memset(&pe, 0, sizeof(struct perf_event_attr));
|
||||||
|
pe.type = PERF_TYPE_HARDWARE;
|
||||||
|
pe.size = sizeof(struct perf_event_attr);
|
||||||
|
pe.config = PERF_COUNT_HW_CPU_CYCLES;
|
||||||
|
pe.disabled = 1;
|
||||||
|
pe.exclude_kernel = 1;
|
||||||
|
pe.exclude_hv = 1;
|
||||||
|
|
||||||
|
fd = perf_event_open(&pe, pid, core, -1, 0);
|
||||||
|
if (fd == -1) {
|
||||||
|
perror("perf_event_open");
|
||||||
|
if (errno == EPERM || errno == EACCES) {
|
||||||
|
printErr("You may not have permission to collect stats.\n"\
|
||||||
|
"Consider tweaking /proc/sys/kernel/perf_event_paranoid or running as root");
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clock_gettime(clock, &start) == -1) {
|
||||||
|
perror("clock_gettime");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if(ioctl(fd, PERF_EVENT_IOC_RESET, 0) == -1) {
|
||||||
|
perror("ioctl");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) == -1) {
|
||||||
|
perror("ioctl");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
nop_function(iters);
|
||||||
|
|
||||||
|
ssize_t ret = read(fd, &cycles, sizeof(uint64_t));
|
||||||
|
if (ret == -1) {
|
||||||
|
perror("read");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (ret != sizeof(uint64_t)) {
|
||||||
|
printErr("Read returned %d, expected %d", ret, sizeof(uint64_t));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if(ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) == -1) {
|
||||||
|
perror("ioctl");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (clock_gettime(clock, &end) == -1) {
|
||||||
|
perror("clock_gettime");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t nsecs = (end.tv_sec*1e9 + end.tv_nsec) - (start.tv_sec*1e9 + start.tv_nsec);
|
||||||
|
uint64_t usecs = nsecs/1000;
|
||||||
|
*freq = cycles/((double)usecs);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a good number of iterations to run the nop_function in
|
||||||
|
// order to get a precise measurement of the frequency without taking
|
||||||
|
// too much time.
|
||||||
|
uint64_t get_num_iters_from_freq(double frequency) {
|
||||||
|
// Truncate to reduce variability
|
||||||
|
uint64_t freq_trunc = ((uint64_t) frequency / 100) * 100;
|
||||||
|
uint64_t osp_per_iter = 4 * 1000;
|
||||||
|
|
||||||
|
return freq_trunc * 1e7 * 1/osp_per_iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Differences between x86 measure_frequency and this measure_max_frequency:
|
||||||
|
// - measure_frequency employs all cores simultaneously whereas
|
||||||
|
// measure_max_frequency only employs 1.
|
||||||
|
// - measure_frequency runs the computation and checks /proc/cpuinfo whereas
|
||||||
|
// measure_max_frequency does not rely on /proc/cpuinfo and simply
|
||||||
|
// counts cpu cycles to measure frequency.
|
||||||
|
// - measure_frequency uses actual computation while measuring the frequency
|
||||||
|
// whereas measure_max_frequency uses nop instructions. This makes the former
|
||||||
|
// x86 dependant whereas the latter is architecture independant.
|
||||||
|
int64_t measure_max_frequency(uint32_t core) {
|
||||||
|
if (!bind_to_cpu(core)) {
|
||||||
|
printErr("Failed binding the process to CPU %d", core);
|
||||||
|
return UNKNOWN_DATA;
|
||||||
|
}
|
||||||
|
|
||||||
|
// First, get very rough estimation of clock cycle to
|
||||||
|
// compute a reasonable value for the iterations
|
||||||
|
double estimation_freq, frequency;
|
||||||
|
uint64_t iters = 100000;
|
||||||
|
if (measure_freq_iters(iters, core, &estimation_freq) == -1)
|
||||||
|
return UNKNOWN_DATA;
|
||||||
|
|
||||||
|
if (estimation_freq <= 0.0) {
|
||||||
|
printErr("First frequency measurement yielded an invalid value: %f", estimation_freq);
|
||||||
|
return UNKNOWN_DATA;
|
||||||
|
}
|
||||||
|
iters = get_num_iters_from_freq(estimation_freq);
|
||||||
|
printWarn("Running frequency measurement with %ld iterations on core %d...", iters, core);
|
||||||
|
|
||||||
|
// Now perform actual measurement
|
||||||
|
const char* frequency_banner = "cpufetch is measuring the max frequency...";
|
||||||
|
printf("%s", frequency_banner);
|
||||||
|
fflush(stdout);
|
||||||
|
|
||||||
|
if (measure_freq_iters(iters, core, &frequency) == -1)
|
||||||
|
return UNKNOWN_DATA;
|
||||||
|
|
||||||
|
// Clean screen once measurement is finished
|
||||||
|
printf("\r%*c\r", (int) strlen(frequency_banner), ' ');
|
||||||
|
|
||||||
|
// Discard last digit in the frequency, which should help providing
|
||||||
|
// more reliable and predictable values.
|
||||||
|
return (((int) frequency + 5)/10) * 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // #ifdef __linux__
|
||||||
6
src/common/freq.h
Normal file
6
src/common/freq.h
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
#ifndef __COMMON_FREQ__
|
||||||
|
#define __COMMON_FREQ__
|
||||||
|
|
||||||
|
int64_t measure_max_frequency(uint32_t core);
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -1,3 +1,14 @@
|
|||||||
|
#ifdef _WIN32
|
||||||
|
#define NOMINMAX
|
||||||
|
#include <windows.h>
|
||||||
|
#elif defined __linux__
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <sched.h>
|
||||||
|
#elif defined __FreeBSD__
|
||||||
|
#include <sys/param.h>
|
||||||
|
#include <sys/cpuset.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@@ -199,6 +210,34 @@ void* erealloc(void *ptr, size_t size) {
|
|||||||
return newptr;
|
return newptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
|
bool bind_to_cpu(int cpu_id) {
|
||||||
|
#ifdef _WIN32
|
||||||
|
HANDLE process = GetCurrentProcess();
|
||||||
|
DWORD_PTR processAffinityMask = 1 << cpu_id;
|
||||||
|
return SetProcessAffinityMask(process, processAffinityMask);
|
||||||
|
#elif defined __linux__
|
||||||
|
cpu_set_t currentCPU;
|
||||||
|
CPU_ZERO(¤tCPU);
|
||||||
|
CPU_SET(cpu_id, ¤tCPU);
|
||||||
|
if (sched_setaffinity (0, sizeof(currentCPU), ¤tCPU) == -1) {
|
||||||
|
printWarn("sched_setaffinity: %s", strerror(errno));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
#elif defined __FreeBSD__
|
||||||
|
cpuset_t currentCPU;
|
||||||
|
CPU_ZERO(¤tCPU);
|
||||||
|
CPU_SET(cpu_id, ¤tCPU);
|
||||||
|
if(cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(cpuset_t), ¤tCPU) == -1) {
|
||||||
|
printWarn("cpuset_setaffinity: %s", strerror(errno));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void print_version(FILE *restrict stream) {
|
void print_version(FILE *restrict stream) {
|
||||||
#ifdef GIT_FULL_VERSION
|
#ifdef GIT_FULL_VERSION
|
||||||
fprintf(stream, "cpufetch %s (%s %s)\n", GIT_FULL_VERSION, OS_STR, ARCH_STR);
|
fprintf(stream, "cpufetch %s (%s %s)\n", GIT_FULL_VERSION, OS_STR, ARCH_STR);
|
||||||
|
|||||||
@@ -19,6 +19,9 @@ char *strremove(char *str, const char *sub);
|
|||||||
void* emalloc(size_t size);
|
void* emalloc(size_t size);
|
||||||
void* ecalloc(size_t nmemb, size_t size);
|
void* ecalloc(size_t nmemb, size_t size);
|
||||||
void* erealloc(void *ptr, size_t size);
|
void* erealloc(void *ptr, size_t size);
|
||||||
|
#ifndef __APPLE__
|
||||||
|
bool bind_to_cpu(int cpu_id);
|
||||||
|
#endif
|
||||||
void print_version(FILE *restrict stream);
|
void print_version(FILE *restrict stream);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -30,11 +30,17 @@ void print_help(char *argv[]) {
|
|||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
printf(" --%s %*s Compute the peak performance accurately (measure the CPU frequency instead of using the maximum)\n", t[ARG_ACCURATE_PP], (int) (max_len-strlen(t[ARG_ACCURATE_PP])), "");
|
printf(" --%s %*s Compute the peak performance accurately (measure the CPU frequency instead of using the maximum)\n", t[ARG_ACCURATE_PP], (int) (max_len-strlen(t[ARG_ACCURATE_PP])), "");
|
||||||
#endif
|
printf(" --%s %*s Measure the max CPU frequency instead of reading it\n", t[ARG_MEASURE_MAX_FREQ], (int) (max_len-strlen(t[ARG_MEASURE_MAX_FREQ])), "");
|
||||||
|
#endif // __linux__
|
||||||
printf(" --%s %*s Show the old Intel logo\n", t[ARG_LOGO_INTEL_OLD], (int) (max_len-strlen(t[ARG_LOGO_INTEL_OLD])), "");
|
printf(" --%s %*s Show the old Intel logo\n", t[ARG_LOGO_INTEL_OLD], (int) (max_len-strlen(t[ARG_LOGO_INTEL_OLD])), "");
|
||||||
printf(" --%s %*s Show the new Intel logo\n", t[ARG_LOGO_INTEL_NEW], (int) (max_len-strlen(t[ARG_LOGO_INTEL_NEW])), "");
|
printf(" --%s %*s Show the new Intel logo\n", t[ARG_LOGO_INTEL_NEW], (int) (max_len-strlen(t[ARG_LOGO_INTEL_NEW])), "");
|
||||||
printf(" -%c, --%s %*s Show the full CPU name (do not abbreviate it)\n", c[ARG_FULLCPUNAME], t[ARG_FULLCPUNAME], (int) (max_len-strlen(t[ARG_FULLCPUNAME])), "");
|
printf(" -%c, --%s %*s Show the full CPU name (do not abbreviate it)\n", c[ARG_FULLCPUNAME], t[ARG_FULLCPUNAME], (int) (max_len-strlen(t[ARG_FULLCPUNAME])), "");
|
||||||
printf(" -%c, --%s %*s Print raw cpuid data (debug purposes)\n", c[ARG_RAW], t[ARG_RAW], (int) (max_len-strlen(t[ARG_RAW])), "");
|
printf(" -%c, --%s %*s Print raw cpuid data (debug purposes)\n", c[ARG_RAW], t[ARG_RAW], (int) (max_len-strlen(t[ARG_RAW])), "");
|
||||||
|
#endif // ARCH_X86
|
||||||
|
#ifdef ARCH_ARM
|
||||||
|
#ifdef __linux__
|
||||||
|
printf(" --%s %*s Measure the max CPU frequency instead of reading it\n", t[ARG_MEASURE_MAX_FREQ], (int) (max_len-strlen(t[ARG_MEASURE_MAX_FREQ])), "");
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
printf(" -%c, --%s %*s Print this help and exit\n", c[ARG_HELP], t[ARG_HELP], (int) (max_len-strlen(t[ARG_HELP])), "");
|
printf(" -%c, --%s %*s Print this help and exit\n", c[ARG_HELP], t[ARG_HELP], (int) (max_len-strlen(t[ARG_HELP])), "");
|
||||||
printf(" -%c, --%s %*s Print cpufetch version and exit\n", c[ARG_VERSION], t[ARG_VERSION], (int) (max_len-strlen(t[ARG_VERSION])), "");
|
printf(" -%c, --%s %*s Print cpufetch version and exit\n", c[ARG_VERSION], t[ARG_VERSION], (int) (max_len-strlen(t[ARG_VERSION])), "");
|
||||||
@@ -45,7 +51,7 @@ void print_help(char *argv[]) {
|
|||||||
printf(" * \"amd\": Use AMD color scheme \n");
|
printf(" * \"amd\": Use AMD color scheme \n");
|
||||||
printf(" * \"ibm\", Use IBM color scheme \n");
|
printf(" * \"ibm\", Use IBM color scheme \n");
|
||||||
printf(" * \"arm\": Use ARM color scheme \n");
|
printf(" * \"arm\": Use ARM color scheme \n");
|
||||||
printf(" * \"rockchip\": Use ARM color scheme \n");
|
printf(" * \"rockchip\": Use Rockchip color scheme \n");
|
||||||
printf(" * \"sifive\": Use SiFive color scheme \n");
|
printf(" * \"sifive\": Use SiFive color scheme \n");
|
||||||
printf(" * custom: If the argument of --color does not match any of the previous strings, a custom scheme can be specified.\n");
|
printf(" * custom: If the argument of --color does not match any of the previous strings, a custom scheme can be specified.\n");
|
||||||
printf(" 5 colors must be given in RGB with the format: R,G,B:R,G,B:...\n");
|
printf(" 5 colors must be given in RGB with the format: R,G,B:R,G,B:...\n");
|
||||||
@@ -80,6 +86,11 @@ void print_help(char *argv[]) {
|
|||||||
printf(" --accurate-pp option, which will measure the AVX frequency and show a more precise estimation\n");
|
printf(" --accurate-pp option, which will measure the AVX frequency and show a more precise estimation\n");
|
||||||
printf(" (this option is only available in x86 architectures).\n");
|
printf(" (this option is only available in x86 architectures).\n");
|
||||||
printf(" To precisely measure peak performance, see: https://github.com/Dr-Noob/peakperf\n");
|
printf(" To precisely measure peak performance, see: https://github.com/Dr-Noob/peakperf\n");
|
||||||
|
printf("\n");
|
||||||
|
printf(" Both --accurate-pp and --measure-max-freq measure the actual frequency of the CPU. However,\n");
|
||||||
|
printf(" they differ slightly. The former measures the max frequency while running vectorized SSE/AVX\n");
|
||||||
|
printf(" instructions and it is thus x86 only, whereas the latter simply measures the max clock cycle\n");
|
||||||
|
printf(" and is architecture independent.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
|
|||||||
@@ -61,6 +61,7 @@ enum {
|
|||||||
ATTRIBUTE_NCORES,
|
ATTRIBUTE_NCORES,
|
||||||
ATTRIBUTE_NCORES_DUAL,
|
ATTRIBUTE_NCORES_DUAL,
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
|
ATTRIBUTE_SSE,
|
||||||
ATTRIBUTE_AVX,
|
ATTRIBUTE_AVX,
|
||||||
ATTRIBUTE_FMA,
|
ATTRIBUTE_FMA,
|
||||||
#elif ARCH_PPC
|
#elif ARCH_PPC
|
||||||
@@ -96,6 +97,7 @@ static const char* ATTRIBUTE_FIELDS [] = {
|
|||||||
"Cores:",
|
"Cores:",
|
||||||
"Cores (Total):",
|
"Cores (Total):",
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
|
"SSE:",
|
||||||
"AVX:",
|
"AVX:",
|
||||||
"FMA:",
|
"FMA:",
|
||||||
#elif ARCH_PPC
|
#elif ARCH_PPC
|
||||||
@@ -131,6 +133,7 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
|
|||||||
"Cores:",
|
"Cores:",
|
||||||
"Cores (Total):",
|
"Cores (Total):",
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
|
"SSE:",
|
||||||
"AVX:",
|
"AVX:",
|
||||||
"FMA:",
|
"FMA:",
|
||||||
#elif ARCH_PPC
|
#elif ARCH_PPC
|
||||||
@@ -594,6 +597,7 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
|
|||||||
for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
|
for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
|
||||||
char* max_frequency = get_str_freq(ptr->freq);
|
char* max_frequency = get_str_freq(ptr->freq);
|
||||||
char* avx = get_str_avx(ptr);
|
char* avx = get_str_avx(ptr);
|
||||||
|
char* sse = get_str_sse(ptr);
|
||||||
char* fma = get_str_fma(ptr);
|
char* fma = get_str_fma(ptr);
|
||||||
char* cpu_num = emalloc(sizeof(char) * 9);
|
char* cpu_num = emalloc(sizeof(char) * 9);
|
||||||
|
|
||||||
@@ -628,8 +632,17 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
|
|||||||
setAttribute(art, ATTRIBUTE_NCORES, n_cores);
|
setAttribute(art, ATTRIBUTE_NCORES, n_cores);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Show the most modern vector instructions.
|
||||||
|
// If AVX is supported show it, otherwise show SSE
|
||||||
|
if (strcmp(avx, "No") == 0) {
|
||||||
|
setAttribute(art, ATTRIBUTE_SSE, sse);
|
||||||
|
}
|
||||||
|
else {
|
||||||
setAttribute(art, ATTRIBUTE_AVX, avx);
|
setAttribute(art, ATTRIBUTE_AVX, avx);
|
||||||
setAttribute(art, ATTRIBUTE_FMA, fma);
|
setAttribute(art, ATTRIBUTE_FMA, fma);
|
||||||
|
}
|
||||||
|
|
||||||
if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
|
if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
|
||||||
if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
|
if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
|
||||||
if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
|
if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);
|
||||||
|
|||||||
@@ -4,8 +4,8 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
#include "../common/global.h"
|
#include "global.h"
|
||||||
#include "../common/cpu.h"
|
#include "cpu.h"
|
||||||
|
|
||||||
uint32_t get_sys_info_by_name(char* name) {
|
uint32_t get_sys_info_by_name(char* name) {
|
||||||
size_t size = 0;
|
size_t size = 0;
|
||||||
@@ -146,6 +146,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
|
|||||||
struct frequency* get_frequency_info(void) {
|
struct frequency* get_frequency_info(void) {
|
||||||
struct frequency* freq = emalloc(sizeof(struct frequency));
|
struct frequency* freq = emalloc(sizeof(struct frequency));
|
||||||
|
|
||||||
|
freq->measured = false;
|
||||||
freq->max = get_max_freq_from_file(0);
|
freq->max = get_max_freq_from_file(0);
|
||||||
freq->base = get_min_freq_from_file(0);
|
freq->base = get_min_freq_from_file(0);
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@
|
|||||||
struct frequency* get_frequency_info(uint32_t core) {
|
struct frequency* get_frequency_info(uint32_t core) {
|
||||||
struct frequency* freq = emalloc(sizeof(struct frequency));
|
struct frequency* freq = emalloc(sizeof(struct frequency));
|
||||||
|
|
||||||
|
freq->measured = false;
|
||||||
freq->base = UNKNOWN_DATA;
|
freq->base = UNKNOWN_DATA;
|
||||||
freq->max = get_max_freq_from_file(core);
|
freq->max = get_max_freq_from_file(core);
|
||||||
|
|
||||||
|
|||||||
@@ -72,34 +72,6 @@ uint32_t get_apic_id(bool x2apic_id) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __APPLE__
|
|
||||||
bool bind_to_cpu(int cpu_id) {
|
|
||||||
#ifdef _WIN32
|
|
||||||
HANDLE process = GetCurrentProcess();
|
|
||||||
DWORD_PTR processAffinityMask = 1 << cpu_id;
|
|
||||||
return SetProcessAffinityMask(process, processAffinityMask);
|
|
||||||
#elif defined __linux__
|
|
||||||
cpu_set_t currentCPU;
|
|
||||||
CPU_ZERO(¤tCPU);
|
|
||||||
CPU_SET(cpu_id, ¤tCPU);
|
|
||||||
if (sched_setaffinity (0, sizeof(currentCPU), ¤tCPU) == -1) {
|
|
||||||
printWarn("sched_setaffinity: %s", strerror(errno));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
#elif defined __FreeBSD__
|
|
||||||
cpuset_t currentCPU;
|
|
||||||
CPU_ZERO(¤tCPU);
|
|
||||||
CPU_SET(cpu_id, ¤tCPU);
|
|
||||||
if(cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(cpuset_t), ¤tCPU) == -1) {
|
|
||||||
printWarn("cpuset_setaffinity: %s", strerror(errno));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
int get_total_cores_module(int total_cores, int module) {
|
int get_total_cores_module(int total_cores, int module) {
|
||||||
int total_modules = 2;
|
int total_modules = 2;
|
||||||
@@ -397,6 +369,11 @@ bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
|
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
|
||||||
|
if (topo->cach == NULL) {
|
||||||
|
printWarn("get_topology_from_apic: cach is NULL");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t apic_id;
|
uint32_t apic_id;
|
||||||
uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module);
|
uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module);
|
||||||
uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module);
|
uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module);
|
||||||
|
|||||||
@@ -17,10 +17,6 @@ struct apic {
|
|||||||
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo);
|
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo);
|
||||||
uint32_t is_smt_enabled_amd(struct topology* topo);
|
uint32_t is_smt_enabled_amd(struct topology* topo);
|
||||||
|
|
||||||
#ifndef __APPLE__
|
|
||||||
bool bind_to_cpu(int cpu_id);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
int get_total_cores_module(int total_cores, int module);
|
int get_total_cores_module(int total_cores, int module);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -6,6 +6,10 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
#include "../common/freq.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@@ -219,7 +223,7 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
//First, check we have consistent data
|
//First, check we have consistent data
|
||||||
if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
|
if(freq == UNKNOWN_DATA || topo == NULL || topo->logical_cores == UNKNOWN_DATA) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -452,7 +456,7 @@ struct cpuInfo* get_cpu_info(void) {
|
|||||||
cpu->cach = NULL;
|
cpu->cach = NULL;
|
||||||
cpu->feat = NULL;
|
cpu->feat = NULL;
|
||||||
|
|
||||||
uint32_t modules = 1;
|
cpu->num_cpus = 1;
|
||||||
uint32_t eax = 0;
|
uint32_t eax = 0;
|
||||||
uint32_t ebx = 0;
|
uint32_t ebx = 0;
|
||||||
uint32_t ecx = 0;
|
uint32_t ecx = 0;
|
||||||
@@ -510,12 +514,12 @@ struct cpuInfo* get_cpu_info(void) {
|
|||||||
cpu->hybrid_flag = (edx >> 15) & 0x1;
|
cpu->hybrid_flag = (edx >> 15) & 0x1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(cpu->hybrid_flag) modules = 2;
|
if(cpu->hybrid_flag) cpu->num_cpus = 2;
|
||||||
|
|
||||||
struct cpuInfo* ptr = cpu;
|
struct cpuInfo* ptr = cpu;
|
||||||
for(uint32_t i=0; i < modules; i++) {
|
for(uint32_t i=0; i < cpu->num_cpus; i++) {
|
||||||
int32_t first_core;
|
int32_t first_core;
|
||||||
set_cpu_module(i, modules, &first_core);
|
set_cpu_module(i, cpu->num_cpus, &first_core);
|
||||||
|
|
||||||
if(i > 0) {
|
if(i > 0) {
|
||||||
ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
|
ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
|
||||||
@@ -550,11 +554,7 @@ struct cpuInfo* get_cpu_info(void) {
|
|||||||
cpu->cpu_name = infer_cpu_name_from_uarch(cpu->arch);
|
cpu->cpu_name = infer_cpu_name_from_uarch(cpu->arch);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If any field of the struct is NULL,
|
|
||||||
// return early, as next functions
|
|
||||||
// require non NULL fields in cach and topo
|
|
||||||
ptr->cach = get_cache_info(ptr);
|
ptr->cach = get_cache_info(ptr);
|
||||||
if(ptr->cach == NULL) return cpu;
|
|
||||||
|
|
||||||
if(cpu->hybrid_flag) {
|
if(cpu->hybrid_flag) {
|
||||||
ptr->topo = get_topology_info(ptr, ptr->cach, i);
|
ptr->topo = get_topology_info(ptr, ptr->cach, i);
|
||||||
@@ -562,16 +562,23 @@ struct cpuInfo* get_cpu_info(void) {
|
|||||||
else {
|
else {
|
||||||
ptr->topo = get_topology_info(ptr, ptr->cach, -1);
|
ptr->topo = get_topology_info(ptr, ptr->cach, -1);
|
||||||
}
|
}
|
||||||
if(cpu->topo == NULL) return cpu;
|
|
||||||
|
// If topo is NULL, return early, as get_peak_performance
|
||||||
|
// requries non-NULL topology.
|
||||||
|
if(ptr->topo == NULL) return cpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu->num_cpus = modules;
|
|
||||||
cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
|
cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
|
||||||
|
|
||||||
return cpu;
|
return cpu;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
|
bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
|
||||||
|
if (topo->cach == NULL) {
|
||||||
|
printWarn("get_cache_topology_amd: cach is NULL");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if(cpu->maxExtendedLevels >= 0x8000001D && cpu->topology_extensions) {
|
if(cpu->maxExtendedLevels >= 0x8000001D && cpu->topology_extensions) {
|
||||||
uint32_t i, eax, ebx, ecx, edx, num_sharing_cache, cache_type, cache_level;
|
uint32_t i, eax, ebx, ecx, edx, num_sharing_cache, cache_type, cache_level;
|
||||||
|
|
||||||
@@ -647,10 +654,17 @@ bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
|
|||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
void get_topology_from_udev(struct topology* topo) {
|
void get_topology_from_udev(struct topology* topo) {
|
||||||
// TODO: To be improved in the future
|
|
||||||
topo->total_cores = get_ncores_from_cpuinfo();
|
topo->total_cores = get_ncores_from_cpuinfo();
|
||||||
|
// TODO: To be improved in the future
|
||||||
|
if (topo->total_cores == 1) {
|
||||||
|
// We can assume it's a single core CPU
|
||||||
topo->logical_cores = topo->total_cores;
|
topo->logical_cores = topo->total_cores;
|
||||||
topo->physical_cores = topo->total_cores;
|
topo->physical_cores = topo->total_cores;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
topo->logical_cores = UNKNOWN_DATA;
|
||||||
|
topo->physical_cores = UNKNOWN_DATA;
|
||||||
|
}
|
||||||
topo->smt_available = 1;
|
topo->smt_available = 1;
|
||||||
topo->smt_supported = 1;
|
topo->smt_supported = 1;
|
||||||
topo->sockets = 1;
|
topo->sockets = 1;
|
||||||
@@ -697,28 +711,26 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int
|
|||||||
|
|
||||||
switch(cpu->cpu_vendor) {
|
switch(cpu->cpu_vendor) {
|
||||||
case CPU_VENDOR_INTEL:
|
case CPU_VENDOR_INTEL:
|
||||||
|
bool toporet = false;
|
||||||
if (cpu->maxLevels >= 0x00000004) {
|
if (cpu->maxLevels >= 0x00000004) {
|
||||||
bool toporet = get_topology_from_apic(cpu, topo);
|
toporet = get_topology_from_apic(cpu, topo);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000004, cpu->maxLevels);
|
||||||
|
}
|
||||||
if(!toporet) {
|
if(!toporet) {
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
printWarn("Failed to retrieve topology from APIC, using udev...\n");
|
printWarn("Failed to retrieve topology from APIC, using udev...");
|
||||||
get_topology_from_udev(topo);
|
get_topology_from_udev(topo);
|
||||||
#else
|
#else
|
||||||
printErr("Failed to retrieve topology from APIC, assumming default values...\n");
|
if (cpu->maxLevels >= 0x00000004)
|
||||||
|
printErr("Failed to retrieve topology from APIC, assumming default values...");
|
||||||
topo->logical_cores = UNKNOWN_DATA;
|
topo->logical_cores = UNKNOWN_DATA;
|
||||||
topo->physical_cores = UNKNOWN_DATA;
|
topo->physical_cores = UNKNOWN_DATA;
|
||||||
topo->smt_available = 1;
|
topo->smt_available = 1;
|
||||||
topo->smt_supported = 1;
|
topo->smt_supported = 1;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else {
|
|
||||||
printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
|
|
||||||
topo->physical_cores = 1;
|
|
||||||
topo->logical_cores = 1;
|
|
||||||
topo->smt_available = 1;
|
|
||||||
topo->smt_supported = 1;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case CPU_VENDOR_AMD:
|
case CPU_VENDOR_AMD:
|
||||||
case CPU_VENDOR_HYGON:
|
case CPU_VENDOR_HYGON:
|
||||||
@@ -918,6 +930,7 @@ struct cache* get_cache_info(struct cpuInfo* cpu) {
|
|||||||
|
|
||||||
struct frequency* get_frequency_info(struct cpuInfo* cpu) {
|
struct frequency* get_frequency_info(struct cpuInfo* cpu) {
|
||||||
struct frequency* freq = emalloc(sizeof(struct frequency));
|
struct frequency* freq = emalloc(sizeof(struct frequency));
|
||||||
|
freq->measured = false;
|
||||||
|
|
||||||
if(cpu->maxLevels < 0x00000016) {
|
if(cpu->maxLevels < 0x00000016) {
|
||||||
#if defined (_WIN32) || defined (__APPLE__)
|
#if defined (_WIN32) || defined (__APPLE__)
|
||||||
@@ -927,7 +940,7 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
|
|||||||
#else
|
#else
|
||||||
printWarn("Can't read frequency information from cpuid (needed level is 0x%.8X, max is 0x%.8X). Using udev", 0x00000016, cpu->maxLevels);
|
printWarn("Can't read frequency information from cpuid (needed level is 0x%.8X, max is 0x%.8X). Using udev", 0x00000016, cpu->maxLevels);
|
||||||
freq->base = UNKNOWN_DATA;
|
freq->base = UNKNOWN_DATA;
|
||||||
freq->max = get_max_freq_from_file(0);
|
freq->max = get_max_freq_from_file(cpu->first_core_id);
|
||||||
|
|
||||||
if(freq->max == 0) {
|
if(freq->max == 0) {
|
||||||
printWarn("Read max CPU frequency from udev and got 0 MHz");
|
printWarn("Read max CPU frequency from udev and got 0 MHz");
|
||||||
@@ -954,7 +967,7 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
|
|||||||
printWarn("Read max CPU frequency from CPUID and got 0 MHz");
|
printWarn("Read max CPU frequency from CPUID and got 0 MHz");
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
printWarn("Using udev to detect frequency");
|
printWarn("Using udev to detect frequency");
|
||||||
freq->max = get_max_freq_from_file(0);
|
freq->max = get_max_freq_from_file(cpu->first_core_id);
|
||||||
|
|
||||||
if(freq->max == 0) {
|
if(freq->max == 0) {
|
||||||
printWarn("Read max CPU frequency from udev and got 0 MHz");
|
printWarn("Read max CPU frequency from udev and got 0 MHz");
|
||||||
@@ -966,6 +979,15 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __linux__
|
||||||
|
if (freq->max == UNKNOWN_DATA || measure_max_frequency_flag()) {
|
||||||
|
if (freq->max == UNKNOWN_DATA)
|
||||||
|
printWarn("All previous methods failed, measuring CPU frequency");
|
||||||
|
freq->max = measure_max_frequency(cpu->first_core_id);
|
||||||
|
freq->measured = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return freq;
|
return freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -987,24 +1009,33 @@ char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_soc
|
|||||||
string = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
|
string = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
|
||||||
strcpy(string, STRING_UNKNOWN);
|
strcpy(string, STRING_UNKNOWN);
|
||||||
}
|
}
|
||||||
else if(topo->smt_supported > 1) {
|
else {
|
||||||
|
char cores_str[6];
|
||||||
|
memset(cores_str, 0, sizeof(char) * 6);
|
||||||
|
if (topo->physical_cores * topo_sockets > 1)
|
||||||
|
strcpy(cores_str, "cores");
|
||||||
|
else
|
||||||
|
strcpy(cores_str, "core");
|
||||||
|
|
||||||
|
if(topo->smt_supported > 1) {
|
||||||
// 4 for digits, 21 for ' cores (SMT disabled)' which is the longest possible output
|
// 4 for digits, 21 for ' cores (SMT disabled)' which is the longest possible output
|
||||||
uint32_t max_size = 4+21+1;
|
uint32_t max_size = 4+21+1;
|
||||||
string = emalloc(sizeof(char) * max_size);
|
string = emalloc(sizeof(char) * max_size);
|
||||||
|
|
||||||
if(topo->smt_available > 1)
|
if(topo->smt_available > 1)
|
||||||
snprintf(string, max_size, "%d cores (%d threads)", topo->physical_cores * topo_sockets, topo->logical_cores * topo_sockets);
|
snprintf(string, max_size, "%d %s (%d threads)", topo->physical_cores * topo_sockets, cores_str, topo->logical_cores * topo_sockets);
|
||||||
else {
|
else {
|
||||||
if(cpu->cpu_vendor == CPU_VENDOR_AMD)
|
if(cpu->cpu_vendor == CPU_VENDOR_AMD)
|
||||||
snprintf(string, max_size, "%d cores (SMT disabled)", topo->physical_cores * topo_sockets);
|
snprintf(string, max_size, "%d %s (SMT disabled)", topo->physical_cores * topo_sockets, cores_str);
|
||||||
else
|
else
|
||||||
snprintf(string, max_size, "%d cores (HT disabled)", topo->physical_cores * topo_sockets);
|
snprintf(string, max_size, "%d %s (HT disabled)", topo->physical_cores * topo_sockets, cores_str);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
uint32_t max_size = 4+7+1;
|
uint32_t max_size = 4+7+1;
|
||||||
string = emalloc(sizeof(char) * max_size);
|
string = emalloc(sizeof(char) * max_size);
|
||||||
snprintf(string, max_size, "%d cores",topo->physical_cores * topo_sockets);
|
snprintf(string, max_size, "%d %s",topo->physical_cores * topo_sockets, cores_str);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return string;
|
return string;
|
||||||
|
|||||||
Reference in New Issue
Block a user