Compare commits

...

34 Commits
v0.410 ... v0.7

Author SHA1 Message Date
Dr-Noob
60bc02185d Small fix 2020-09-01 20:48:37 +02:00
Dr-Noob
ae752bac77 Add images 2020-09-01 20:45:52 +02:00
Dr-Noob
500ccfa871 Stable version 0.7 heavily tested in many different CPUs 2020-09-01 20:44:48 +02:00
Dr-Noob
877833db0a Dont fetch if smt is enabled if its not supported (AMD). Dont guess cache topology, fetch it from CPUID (AMD) 2020-09-01 13:08:44 +02:00
Dr-Noob
5cca6df218 Fix memory leaks. Add debug message when microarch is unknown 2020-09-01 11:32:08 +02:00
Dr-Noob
de8952b4ea Fix bug which caused you couldnt use --version. Change --style to be more user friendly. Update --help 2020-09-01 11:00:11 +02:00
Dr-Noob
1f80566f63 New info to be displayed (uarch and process) instead of other info (sha, aes, sse) 2020-09-01 09:37:53 +02:00
Dr-Noob
ab1416563c Fix PP in Ice Lake 2020-08-31 18:27:32 +02:00
Dr-Noob
1a9c0546f2 Add support for detecting AMD microarch 2020-08-31 15:56:21 +02:00
Dr-Noob
35efdd8f2c Fix #26. Guess number of VPUs according to microarchitecture 2020-08-31 14:04:41 +02:00
Dr-Noob
5148962fa3 Add code to detect CPU microarchitecture (Intel only, at the moment) 2020-08-31 13:18:25 +02:00
Dr-Noob
d998acdcdf Fix #25: Compute PP taking into account the number of sockets 2020-08-31 09:33:39 +02:00
Dr-Noob
81a45628f0 Code refactoring. Forgot to add verbose option to help 2020-08-30 13:55:37 +02:00
Dr-Noob
4f98a5bccf Refactor previous commit 2020-08-30 12:42:38 +02:00
Dr-Noob
dae0f678ad Fix #23. I tried fetching the cache topology in AMD but could not find a proper way, so the code fallback to two commits ago. cpufetch has to guess cache sizes except L3, which can be fetched. Since I have been trying many approaches and stuff, the code needs to be refactored 2020-08-30 12:12:25 +02:00
Dr-Noob
69cc08759a Fix #21 and #22: Obtain the number of caches of every level instead of guessing them. It is done by fetching cache topology from apic. It works, but it needs a big refactoring. Moreover, it currently works only on Intel CPUs, so this breaks the cache in AMD. 2020-08-29 21:51:14 +02:00
Dr-Noob
d8dad29a57 Fix SMT bug in AMD. I would like to improve it, since Intel can use APIC with 0x1 and 0xB (extended) while AMD does with 0x1 and extended seems to be 0x1E. Add support to detect more than one L3 cache. This is not a very elegant solution, since we still assume that we have the same number of caches as caches in a given level. To fix it, cpufetch should know how many caches are in a given level (hint, Linux knows using shared_cpu_map) 2020-08-29 15:42:56 +02:00
Dr-Noob
e08b60b1c8 Project stopped until I have time to continue 2020-07-12 19:08:38 +02:00
Dr-Noob
ad6c3c88ce Small corrections in code and Makefile 2020-07-12 15:39:34 +02:00
Dr-Noob
e114bde128 Complete topology read in AMD 2020-07-06 01:58:48 +02:00
Dr-Noob
7164409ca2 Add legacy style (for Windows) and make it the default for Windows. Add verbose flag 2020-07-06 01:30:14 +02:00
Dr-Noob
08f79bb914 Fix compilation in Windows and add support for bind to specific cores. Separate APIC code in other file 2020-07-06 01:16:59 +02:00
Dr-Noob
b457c86100 Add support for obtaining topology in old processors (with CPUID less than 0xB) 2020-07-05 19:59:55 +02:00
Dr-Noob
e5d86289b5 Use APIC to obtain topology. This is interesting because this will allow us to obtain it even on older CPUs (without CPUID 0xB) (will be added in future commits) 2020-07-05 16:52:41 +02:00
Dr-Noob
c6c4d8b6fd Fix spaces bug in CPU name 2020-07-03 19:42:05 +02:00
Dr-Noob
c8fde107dd Fix ascii logo in AMD. Fix output on CPUs without L3 2020-07-03 16:24:14 +02:00
Dr-Noob
b076189b32 Add support to detect if HT/SMT is enabled or disabled 2020-07-03 16:11:09 +02:00
Dr-Noob
d43229359a Support for 4 colors with --color (2 for ascii, 2 for text) 2020-07-03 09:42:30 +02:00
Dr-Noob
ba047c76e3 Add two different styles. The old one is now called retro, and the new one, which is the default, is called fancy 2020-07-02 18:53:28 +02:00
Dr-Noob
942a86c04f Remove styles and add option to specify custom color output in RGB format 2020-07-02 16:14:37 +02:00
Dr-Noob
ea338a68c8 Forgot to support AMD in printer 2020-06-29 17:32:50 +02:00
Dr-Noob
d7b7e2b62d Support printing dual socket. Fix bug where cache sizes were not displayed correctly (they were truncated) 2020-06-29 17:13:37 +02:00
Dr-Noob
941bf35d03 Big refactoring. Move ascii managment to printer. Mix extended and standart cpuid functions in cpuid file. Old cpuid renamed to cpuid_asm. Store cpu name in cpuInfo struct 2020-06-28 15:51:30 +02:00
Dr-Noob
131d860de6 Print total cache sizes (for L1 and L2, but also for L3 in case we run in dual socket!) 2020-06-28 12:47:03 +02:00
27 changed files with 2488 additions and 1206 deletions

View File

@@ -1,19 +1,21 @@
CXX=gcc CXX=gcc
CXXFLAGS=-Wall -Wextra -Werror -fstack-protector-all -pedantic -Wno-unused -std=c99 CXXFLAGS=-Wall -Wextra -Werror -pedantic -fstack-protector-all -pedantic -std=c99
SANITY_FLAGS=-Wfloat-equal -Wshadow -Wpointer-arith -Wstrict-overflow=5 -Wformat=2 SANITY_FLAGS=-Wfloat-equal -Wshadow -Wpointer-arith -Wstrict-overflow=5 -Wformat=2
SRC_DIR=src/ SRC_DIR=src/
SOURCE=$(SRC_DIR)main.c $(SRC_DIR)standart.c $(SRC_DIR)extended.c $(SRC_DIR)cpuid.c $(SRC_DIR)printer.c $(SRC_DIR)args.c $(SRC_DIR)global.c SOURCE=$(SRC_DIR)main.c $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)printer.c $(SRC_DIR)args.c $(SRC_DIR)global.c $(SRC_DIR)uarch.c
HEADERS=$(SRC_DIR)standart.h $(SRC_DIR)extended.h $(SRC_DIR)cpuid.h $(SRC_DIR)printer.h $(SRC_DIR)ascii.h $(SRC_DIR)args.h $(SRC_DIR)global.h HEADERS=$(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)printer.h $(SRC_DIR)ascii.h $(SRC_DIR)args.h $(SRC_DIR)global.h $(SRC_DIR)uarch.h
ifneq ($(OS),Windows_NT) ifneq ($(OS),Windows_NT)
SOURCE += $(SRC_DIR)udev.c SOURCE += $(SRC_DIR)udev.c
HEADERS += $(SRC_DIR)udev.h HEADERS += $(SRC_DIR)udev.h
OUTPUT=cpufetch
else
SANITY_FLAGS += -Wno-pedantic-ms-format
OUTPUT=cpufetch.exe
endif endif
OUTPUT=cpufetch
all: $(OUTPUT) all: $(OUTPUT)
debug: CXXFLAGS += -g -O0 debug: CXXFLAGS += -g -O0

View File

@@ -1,12 +1,28 @@
# cpufetch # cpufetch
Prints a fancy summary of the CPU with some advanced information Prints a fancy summary of the CPU with some advanced information
![cpu1](i9.png)
### Platforms ### Platforms
This tool works on both 64 only and under Linux because of its [implementation details](#implementation). AMD support is not guaranteed so information may not be correct cpufetch currently supports x86 CPUs (both Intel and AMD CPUs)
| Platform | Intel | AMD | Notes |
|:---------:|:-------------------------:|:------------------------:|:-----------------:|
| Linux | :heavy_check_mark: | :heavy_check_mark: | Prefered platform |
| Windows | :heavy_check_mark: | :heavy_check_mark: | Some information may be missing. <br> No colors and worse CPU art |
| macOS | :heavy_exclamation_mark: | :heavy_exclamation_mark: | Untested |
### Usage and installation ### Usage and installation
#### Linux
There is a cpufetch package available in Arch Linux ([cpufetch-git](https://aur.archlinux.org/packages/cpufetch-git)).
If you are in other distro, you can build `cpufetch` from source (see below)
#### Windows
In the [releases](https://github.com/Dr-Noob/cpufetch/releases) section you will find some cpufetch executables compiled for Windows. Just download and run it from Windows CMD.
#### Building from source
Just clone the repo and use `make` to compile it Just clone the repo and use `make` to compile it
``` ```
@@ -16,38 +32,34 @@ make
./cpufetch ./cpufetch
``` ```
The Makefile is designed to work on both Linux and Windows.
### Example ### Example
This is the output of `cpufetch` in a i7-4790K Here are more examples of how `cpufetch` looks on different CPUs.
![Example](/preview.png) ![cpu2](epyc.png)
### Output ![cpu3](cascade_lake.png)
Output is detailed as follows: ### Colors and style
By default, `cpufetch` will print the CPU art with the system colorscheme. However, you can always set a custom color scheme, either
specifying Intel or AMD, or specifying the colors in RGB format:
| Field | Description | Possible Values | ```
|:----------:|:-----------------------:|:-----------------:| ./cpufetch --color intel (default color for Intel)
| Name | Name of the CPU | Any valid CPU name | ./cpufetch --color amd (default color for AND)
| Frequency | Max frequency of the CPU(in GHz) | X.XX(GHz or MHz) ./cpufetch --color 239,90,45:210,200,200:100,200,45:0,200,200 (example)
| N.Cores | Number of cores the CPU has. If CPU supports `Hyperthreading` or similar, this will show cores and threads separately | X(cores)X(threads) ```
| AVX | Type of AVX supported by the CPU or None. AVX instructions allows the CPU to vectorize the code with a witdh of 256 bits in single precision(or 512bits if AVX512 is supported) | AVX,AVX2,AVX512,None
| SSE | Same as AVX, but SSE family are 128bits witdh | SSE, SSE2, SSE3, SSSE3, SSE4a, SSE4_1, SSE4_2,None |
| FMA | Does this CPU support FMA(Fused Multiply Add)?This instruction allows the CPU to multiply and add a value on the same clock cycle | FMA3,FMA4,None |
| AES | Does this CPU support AES? This instruction is allows the CPU to make AES cypher efficiently | Yes or No |
| SHA | Does this CPU support SHA? This instruction is allows the CPU to make SHA hashing efficiently | Yes or No |
| L1 Size | Size(in bytes) of the L1 cache, separated in data and instructions | XXB(Data)XXB(instructions) |
| L2 Size | Size(in bytes) of the L2 cache(both are unified) | XXXKB or None |
| L3 Size | Same as L3 | XXXXKB or None |
| Peak FLOPS | Max FLOPS(Floating Point Operation Per Second) this CPU could theoretical achieve. This is calculated by: `N.Cores*Freq*2(Because 2 functional units)*2(If has FMA)*VectorWidth` | XXX.XX (G/T)FLOPs |
`cpufetch` also prints a simple ascii art of the manufacturer logo. In the case of setting the colors using RGB, 4 colors must be given in with the format: ``[R,G,B:R,G,B:R,G,B:R,G,B]``. These colors correspond to CPU art color (2 colors) and for the text colors (following 2). Thus, you can customize all the colors.
### Implementation ### Implementation
`cpufetch` makes use of two techniques to fetch data: `cpufetch` fetches all of the information using the `CPUID` x86 instruction. There are, however, some cases where the CPU does not support fetching some needed information. In this case, `cpufetch` will use `/sys/devices/system/cpu` in Linux as a fallback. If `cpufetch` is running on Windows and `CPUID` does not give all the data, `cpufetch` won't be able to show it. [I hope this can be fixed in the future](https://github.com/Dr-Noob/cpufetch/issues/30)
* __cpuid__: CPU name, number of threads per core and instructions features are fetched via _cpuid_. See [this](http://www.sandpile.org/x86/cpuid.htm) and [Intel Processor Identification and the CPUID Instruction](https://www.scss.tcd.ie/~jones/CS4021/processor-identification-cpuid-instruction-note.pdf) for more information.
* __udev__: Cache and frequency are fetched via _udev_, by looking at specific files from `/sys/devices/system/cpu`
### Bugs or improvements ### Bugs or improvements
Feel free to open a issue on the repo to report a issue or propose any improvement in the tool There are many open issues in github (see [issues](https://github.com/Dr-Noob/cpufetch/issues)). Feel free to open a new one report a issue or propose any improvement in `cpufetch`
### Testers
I would like to thank [Gonzalocl](https://github.com/Gonzalocl) and [OdnetninI](https://github.com/OdnetninI) for their help, running `cpufeth` in many different CPUs they have access to, which makes it easier to debug and check the correctness of `cpufetch`.

BIN
cascade_lake.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

View File

@@ -1,33 +1,57 @@
.TH man 8 "22 Jun 2018" "0.32" "cpufetch man page" .TH man 8 "1 Sep 2020" "0.7" "cpufetch man page"
.SH NAME .SH NAME
cpufetch \- Prints a fancy summary of the CPU with some advanced information cpufetch \- Prints a fancy summary of the CPU with some advanced information
.SH SYNOPSIS .SH SYNOPSIS
cpufetch [--help] [--style STYLE] cpufetch [--version] [--help] [--levels] [--style fancy|retro|legacy] [--color intel|amd|'R,G,B:R,G,B:R,G,B:R,G,B']
.SH DESCRIPTION .SH DESCRIPTION
cpufetch will print CPU information, for which will query cpuid instructions and udev directories on Linux. It should display: cpufetch will print CPU information, for which will query CPUID instructions and udev directories on Linux as a fallback method. Some of this features are:
.IP \[bu] 2 .IP \[bu] 2
Name Name
.IP \[bu] .IP \[bu]
Frequency Frequency
.IP \[bu] .IP \[bu]
Number of cores(Physical and Logical) Number of cores (Physical and Logical)
.IP \[bu] .IP \[bu]
AVX,SSE,FMA,AES and SHA support Cache sizes
.IP \[bu] .IP \[bu]
L1,L2 and L3 size Theoretical peak performance in floating point operations per second (FLOP/s)
.IP \[bu]
Theoretical peak flops
.SH OPTIONS .SH OPTIONS
.TP .TP
\fB\-\-style\fR \f[I][intel|amd|R,G,B:R,G,B:R,G,B:R,G,B]\f[]
Set the color scheme. By default, cpufetch uses the system color scheme. This option lets the user use different colors to print the CPU art:
.IP \[bu]
\fB"intel"\fR: Use intel color scheme
.IP \[bu]
\fB"amd"\fR: Use amd color scheme
.IP \[bu]
\fBcustom\fR: If color do not match "intel" or "amd", a custom scheme can be specified: 4 colors must be given in RGB with the format: R,G,B:R,G,B:...
These colors correspond to CPU art color (2 colors) and for the text colors (following 2)
.TP
\fB\-\-style\fR \f[I]STYLE\f[]
Specify the style of ascii logo:
.IP \[bu]
\fB"fancy"\fR: Default style
.IP \[bu]
\fB"retro"\fR: Old cpufetch style
.IP \[bu]
\fB"legacy"\fR: Fallback style for terminals that does not support colors
.TP
\fB\-\-levels\fR
Prints CPUID levels and CPU name
.TP
\fB\-\-verbose\fR
Prints extra information (if available) about how cpufetch tried fetching information
.TP
\fB\-\-help\fR \fB\-\-help\fR
Prints help Prints help
.TP .TP
\fB\-\-version\fR \fB\-\-version\fR
Prints cpufetch version Prints cpufetch version
.TP
\fB\-\-style\fR
Specify the color style of ascii logo
.SH BUGS .SH BUGS
No known bugs. AMD CPUs may not be fully supported Bugs should be posted on: https://github.com/Dr-Noob/cpufetch/issues
.SH NOTES
Peak performance information is NOT accurate. cpufetch computes peak performance using the max
frequency. However, to properly compute peak performance, you need to know the frequency of the
CPU running AVX code, which is not be fetched by cpufetch since it depends on each specific CPU.
.SH AUTHOR .SH AUTHOR
Dr-Noob (https://github.com/Dr-Noob) Dr-Noob (https://github.com/Dr-Noob)

BIN
epyc.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

BIN
i9.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

352
src/apic.c Normal file
View File

@@ -0,0 +1,352 @@
#ifdef _WIN32
#include <windows.h>
#else
#define _GNU_SOURCE
#include <sched.h>
#endif
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "apic.h"
#include "cpuid_asm.h"
#include "global.h"
/*
* bit_scan_reverse and create_mask code taken from:
* https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html
*/
unsigned char bit_scan_reverse(uint32_t* index, uint64_t mask) {
for(uint64_t i = (8 * sizeof(uint64_t)); i > 0; i--) {
if((mask & (1LL << (i-1))) != 0) {
*index = (uint64_t) (i-1);
break;
}
}
return (unsigned char) (mask != 0);
}
uint32_t create_mask(uint32_t num_entries, uint32_t *mask_width) {
uint32_t i = 0;
uint64_t k = 0;
// NearestPo2(numEntries) is the nearest power of 2 integer that is not less than numEntries
// The most significant bit of (numEntries * 2 -1) matches the above definition
k = (uint64_t)(num_entries) * 2 -1;
if (bit_scan_reverse(&i, k) == 0) {
if (mask_width) *mask_width = 0;
return 0;
}
if (mask_width) *mask_width = i;
if (i == 31) return (uint32_t ) -1;
return (1 << i) -1;
}
uint32_t get_apic_id(bool x2apic_id) {
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
if(x2apic_id) {
eax = 0x0000000B;
cpuid(&eax, &ebx, &ecx, &edx);
return edx;
}
else {
eax = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
return (ebx >> 24);
}
}
bool bind_to_cpu(int cpu_id) {
#ifdef _WIN32
HANDLE process = GetCurrentProcess();
DWORD_PTR processAffinityMask = 1 << cpu_id;
return SetProcessAffinityMask(process, processAffinityMask);
#else
cpu_set_t currentCPU;
CPU_ZERO(&currentCPU);
CPU_SET(cpu_id, &currentCPU);
if (sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == -1) {
perror("sched_setaffinity");
return false;
}
return true;
#endif
}
bool fill_topo_masks_apic(struct topology* topo) {
uint32_t eax = 0x00000001;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
uint32_t core_plus_smt_id_max_cnt;
uint32_t core_id_max_cnt;
uint32_t smt_id_per_core_max_cnt;
cpuid(&eax, &ebx, &ecx, &edx);
core_plus_smt_id_max_cnt = (ebx >> 16) & 0xFF;
eax = 0x00000004;
ecx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
core_id_max_cnt = (eax >> 26) + 1;
smt_id_per_core_max_cnt = core_plus_smt_id_max_cnt / core_id_max_cnt;
topo->apic->smt_mask = create_mask(smt_id_per_core_max_cnt, &(topo->apic->smt_mask_width));
topo->apic->core_mask = create_mask(core_id_max_cnt,&(topo->apic->pkg_mask_shift));
topo->apic->pkg_mask_shift += topo->apic->smt_mask_width;
topo->apic->core_mask <<= topo->apic->smt_mask_width;
topo->apic->pkg_mask = (-1) ^ (topo->apic->core_mask | topo->apic->smt_mask);
return true;
}
bool fill_topo_masks_x2apic(struct topology* topo) {
int32_t level_type;
int32_t level_shift;
int32_t coreplus_smt_mask = 0;
bool level2 = false;
bool level1 = false;
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
uint32_t i = 0;
while(true) {
eax = 0x0000000B;
ecx = i;
cpuid(&eax, &ebx, &ecx, &edx);
if(ebx == 0) break;
level_type = (ecx >> 8) & 0xFF;
level_shift = eax & 0xFFF;
switch(level_type) {
case 1: // SMT
topo->apic->smt_mask = ~(0xFFFFFFFF << level_shift);
topo->apic->smt_mask_width = level_shift;
topo->smt_supported = ebx & 0xFFFF;
level1 = true;
break;
case 2: // Core
coreplus_smt_mask = ~(0xFFFFFFFF << level_shift);
topo->apic->pkg_mask_shift = level_shift;
topo->apic->pkg_mask = (-1) ^ coreplus_smt_mask;
level2 = true;
break;
default:
printErr("Found invalid level when querying topology: %d", level_type);
break;
}
i++; // sublevel to query
}
if (level1 && level2) {
topo->apic->core_mask = coreplus_smt_mask ^ topo->apic->smt_mask;
}
else if (!level2 && level1) {
topo->apic->core_mask = 0;
topo->apic->pkg_mask_shift = topo->apic->smt_mask_width;
topo->apic->pkg_mask = (-1) ^ topo->apic->smt_mask;
}
else {
printErr("SMT level was not found when querying topology");
return false;
}
return true;
}
// Not a very elegant solution. The width should always be as long
// as the number of cores, but in the case of Xeon Phi KNL it is not
uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) {
uint32_t max = 0;
for(int i=0; i < topo->cach->max_cache_level; i++) {
for(int j=0; j < topo->total_cores; j++) {
if(cache_id_apic[j][i] > max) max = cache_id_apic[j][i];
}
}
max++;
if(max > topo->total_cores) return max;
return topo->total_cores;
}
bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) {
uint32_t size = max_apic_id_size(cache_id_apic, topo);
uint32_t* sockets = malloc(sizeof(uint32_t) * size);
uint32_t* smt = malloc(sizeof(uint32_t) * size);
uint32_t* apic_id = malloc(sizeof(uint32_t) * size);
uint32_t num_caches = 0;
memset(sockets, 0, sizeof(uint32_t) * size);
memset(smt, 0, sizeof(uint32_t) * size);
memset(apic_id, 0, sizeof(uint32_t) * size);
// System topology
for(int i=0; i < topo->total_cores; i++) {
sockets[apic_pkg[i]] = 1;
smt[apic_smt[i]] = 1;
}
for(int i=0; i < topo->total_cores; i++) {
if(sockets[i] != 0)
topo->sockets++;
if(smt[i] != 0)
topo->smt_available++;
}
topo->logical_cores = topo->total_cores / topo->sockets;
topo->physical_cores = topo->logical_cores / topo->smt_available;
// Cache topology
for(int i=0; i < topo->cach->max_cache_level; i++) {
num_caches = 0;
memset(apic_id, 0, sizeof(uint32_t) * size);
for(int c=0; c < topo->total_cores; c++) {
apic_id[cache_id_apic[c][i]]++;
}
for(uint32_t c=0; c < size; c++) {
if(apic_id[c] > 0) num_caches++;
}
topo->cach->cach_arr[i]->num_caches = num_caches;
}
free(sockets);
free(smt);
free(apic_id);
return true;
}
void get_cache_topology_from_apic(struct topology* topo) {
uint32_t eax = 0x00000004;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
for(int i=0; i < topo->cach->max_cache_level; i++) {
eax = 0x00000004;
ecx = i;
cpuid(&eax, &ebx, &ecx, &edx);
uint32_t SMTMaxCntPerEachCache = ((eax >> 14) & 0x7FF) + 1;
uint32_t dummy;
topo->apic->cache_select_mask[i] = create_mask(SMTMaxCntPerEachCache,&dummy);
}
}
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
uint32_t apic_id;
uint32_t* apic_pkg = malloc(sizeof(uint32_t) * topo->total_cores);
uint32_t* apic_core = malloc(sizeof(uint32_t) * topo->total_cores);
uint32_t* apic_smt = malloc(sizeof(uint32_t) * topo->total_cores);
uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * topo->total_cores);
uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * topo->total_cores);
bool x2apic_id = cpu->maxLevels >= 0x0000000B;
for(int i=0; i < topo->total_cores; i++) {
cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
cache_id_apic[i] = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
}
topo->apic->cache_select_mask = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
topo->apic->cache_id_apic = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
if(x2apic_id) {
if(!fill_topo_masks_x2apic(topo))
return false;
}
else {
if(!fill_topo_masks_apic(topo))
return false;
}
get_cache_topology_from_apic(topo);
for(int i=0; i < topo->total_cores; i++) {
if(!bind_to_cpu(i)) {
printErr("Failed binding to CPU %d", i);
return false;
}
apic_id = get_apic_id(x2apic_id);
apic_pkg[i] = (apic_id & topo->apic->pkg_mask) >> topo->apic->pkg_mask_shift;
apic_core[i] = (apic_id & topo->apic->core_mask) >> topo->apic->smt_mask_width;
apic_smt[i] = apic_id & topo->apic->smt_mask;
for(int c=0; c < topo->cach->max_cache_level; c++) {
cache_smt_id_apic[i][c] = apic_id & topo->apic->cache_select_mask[c];
cache_id_apic[i][c] = apic_id & (-1 ^ topo->apic->cache_select_mask[c]);
}
}
/* DEBUG
for(int i=0; i < topo->cach->max_cache_level; i++) {
printf("[CACH %1d]", i);
for(int j=0; j < topo->total_cores; j++)
printf("[%03d]", cache_id_apic[j][i]);
printf("\n");
}
for(int i=0; i < topo->total_cores; i++)
printf("[%2d] 0x%.8X\n", i, apic_pkg[i]);
printf("\n");
for(int i=0; i < topo->total_cores; i++)
printf("[%2d] 0x%.8X\n", i, apic_core[i]);
printf("\n");
for(int i=0; i < topo->total_cores; i++)
printf("[%2d] 0x%.8X\n", i, apic_smt[i]);*/
bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
// Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
if (!x2apic_id) {
printWarn("Can't read SMT from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x0000000B, cpu->maxLevels);
topo->smt_supported = topo->smt_available;
}
free(apic_pkg);
free(apic_core);
free(apic_smt);
for(int i=0; i < topo->total_cores; i++) {
free(cache_smt_id_apic[i]);
free(cache_id_apic[i]);
}
free(cache_smt_id_apic);
free(cache_id_apic);
return ret;
}
uint32_t is_smt_enabled_amd(struct topology* topo) {
uint32_t id;
for(int i = 0; i < topo->total_cores; i++) {
if(!bind_to_cpu(i)) {
printErr("Failed binding to CPU %d", i);
return false;
}
id = get_apic_id(false) & 1; // get the last bit
if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core.
}
return 1;
}

20
src/apic.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef __APIC__
#define __APIC__
#include <stdbool.h>
#include "cpuid.h"
struct apic {
uint32_t pkg_mask;
uint32_t pkg_mask_shift;
uint32_t core_mask;
uint32_t smt_mask_width;
uint32_t smt_mask;
uint32_t* cache_select_mask;
uint32_t* cache_id_apic;
};
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo);
uint32_t is_smt_enabled_amd(struct topology* topo);
#endif

View File

@@ -1,44 +1,51 @@
#include <getopt.h> #include <getopt.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <stdlib.h>
#include "args.h" #include "args.h"
#include "global.h"
#define ARG_STR_STYLE "style" #define ARG_STR_STYLE "style"
#define ARG_STR_COLOR "color"
#define ARG_STR_HELP "help" #define ARG_STR_HELP "help"
#define ARG_STR_LEVELS "levels" #define ARG_STR_LEVELS "levels"
#define ARG_STR_VERBOSE "verbose"
#define ARG_STR_VERSION "version" #define ARG_STR_VERSION "version"
#define ARG_CHAR_STYLE 's'
#define ARG_CHAR_HELP 'h' #define ARG_CHAR_STYLE 0
#define ARG_CHAR_LEVELS 'l' #define ARG_CHAR_COLOR 1
#define ARG_CHAR_VERSION 'v' #define ARG_CHAR_HELP 2
#define STYLE_STR_1 "default" #define ARG_CHAR_LEVELS 3
#define STYLE_STR_2 "dark" #define ARG_CHAR_VERBOSE 4
#define STYLE_STR_3 "none" #define ARG_CHAR_VERSION 5
#define STYLE_STR_1 "fancy"
#define STYLE_STR_2 "retro"
#define STYLE_STR_3 "legacy"
#define COLOR_STR_INTEL "intel"
#define COLOR_STR_AMD "amd"
struct args_struct { struct args_struct {
bool levels_flag; bool levels_flag;
bool help_flag; bool help_flag;
bool verbose_flag;
bool version_flag; bool version_flag;
STYLE style; STYLE style;
struct colors* colors;
}; };
static const char* SYTLES_STR_LIST[STYLES_COUNT] = { STYLE_STR_1, STYLE_STR_2, STYLE_STR_3 }; static const char* SYTLES_STR_LIST[STYLES_COUNT] = { STYLE_STR_1, STYLE_STR_2, STYLE_STR_3 };
static struct args_struct args; static struct args_struct args;
STYLE parse_style(char* style) {
int i = 0;
while(i != STYLES_COUNT && strcmp(SYTLES_STR_LIST[i],style) != 0)
i++;
if(i == STYLES_COUNT)
return STYLE_INVALID;
return i;
}
STYLE get_style() { STYLE get_style() {
return args.style; return args.style;
} }
struct colors* get_colors() {
return args.colors;
}
bool show_help() { bool show_help() {
return args.help_flag; return args.help_flag;
} }
@@ -52,81 +59,192 @@ bool show_levels() {
} }
bool verbose_enabled() { bool verbose_enabled() {
return false; return args.verbose_flag;
}
STYLE parse_style(char* style) {
int i = 0;
while(i != STYLES_COUNT && strcmp(SYTLES_STR_LIST[i],style) != 0)
i++;
if(i == STYLES_COUNT)
return STYLE_INVALID;
return i;
}
void free_colors_struct(struct colors* cs) {
free(cs->c1);
free(cs->c2);
free(cs->c3);
free(cs->c4);
free(cs);
}
bool parse_color(char* optarg, struct colors** cs) {
*cs = malloc(sizeof(struct colors));
(*cs)->c1 = malloc(sizeof(struct color));
(*cs)->c2 = malloc(sizeof(struct color));
(*cs)->c3 = malloc(sizeof(struct color));
(*cs)->c4 = malloc(sizeof(struct color));
struct color** c1 = &((*cs)->c1);
struct color** c2 = &((*cs)->c2);
struct color** c3 = &((*cs)->c3);
struct color** c4 = &((*cs)->c4);
int32_t ret;
char* str_to_parse = NULL;
bool free_ptr;
if(strcmp(optarg, COLOR_STR_INTEL) == 0) {
str_to_parse = malloc(sizeof(char) * 46);
strcpy(str_to_parse, COLOR_DEFAULT_INTEL);
free_ptr = true;
}
else if(strcmp(optarg, COLOR_STR_AMD) == 0) {
str_to_parse = malloc(sizeof(char) * 44);
strcpy(str_to_parse, COLOR_DEFAULT_AMD);
free_ptr = true;
}
else {
str_to_parse = optarg;
free_ptr = false;
}
ret = sscanf(str_to_parse, "%d,%d,%d:%d,%d,%d:%d,%d,%d:%d,%d,%d",
&(*c1)->R, &(*c1)->G, &(*c1)->B,
&(*c2)->R, &(*c2)->G, &(*c2)->B,
&(*c3)->R, &(*c3)->G, &(*c3)->B,
&(*c4)->R, &(*c4)->G, &(*c4)->B);
if(ret != 12) {
printErr("Expected to read 12 values for color but read %d", ret);
return false;
}
//TODO: Refactor c1->R c2->R ... to c[i]->R
if((*c1)->R < 0 || (*c1)->R > 255) {
printErr("Red in color 1 is invalid. Must be in range (0, 255)");
return false;
}
if((*c1)->G < 0 || (*c1)->G > 255) {
printErr("Green in color 1 is invalid. Must be in range (0, 255)");
return false;
}
if((*c1)->B < 0 || (*c1)->B > 255) {
printErr("Blue in color 1 is invalid. Must be in range (0, 255)");
return false;
}
if((*c2)->R < 0 || (*c2)->R > 255) {
printErr("Red in color 2 is invalid. Must be in range (0, 255)");
return false;
}
if((*c2)->G < 0 || (*c2)->G > 255) {
printErr("Green in color 2 is invalid. Must be in range (0, 255)");
return false;
}
if((*c2)->B < 0 || (*c2)->B > 255) {
printErr("Blue in color 2 is invalid. Must be in range (0, 255)");
return false;
}
if(free_ptr) free (str_to_parse);
return true;
} }
bool parse_args(int argc, char* argv[]) { bool parse_args(int argc, char* argv[]) {
int c; int c;
int digit_optind = 0;
int option_index = 0; int option_index = 0;
opterr = 0; opterr = 0;
bool color_flag = false;
args.levels_flag = false; args.levels_flag = false;
args.verbose_flag = false;
args.help_flag = false; args.help_flag = false;
args.style = STYLE_EMPTY; args.style = STYLE_EMPTY;
args.colors = NULL;
static struct option long_options[] = { static struct option long_options[] = {
{ARG_STR_STYLE, required_argument, 0, ARG_CHAR_STYLE }, {ARG_STR_STYLE, required_argument, 0, ARG_CHAR_STYLE },
{ARG_STR_COLOR, required_argument, 0, ARG_CHAR_COLOR },
{ARG_STR_HELP, no_argument, 0, ARG_CHAR_HELP }, {ARG_STR_HELP, no_argument, 0, ARG_CHAR_HELP },
{ARG_STR_LEVELS, no_argument, 0, ARG_CHAR_LEVELS }, {ARG_STR_LEVELS, no_argument, 0, ARG_CHAR_LEVELS },
{ARG_STR_VERBOSE, no_argument, 0, ARG_CHAR_VERBOSE },
{ARG_STR_VERSION, no_argument, 0, ARG_CHAR_VERSION }, {ARG_STR_VERSION, no_argument, 0, ARG_CHAR_VERSION },
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
c = getopt_long(argc, argv,"",long_options, &option_index); c = getopt_long(argc, argv, "", long_options, &option_index);
while (c != -1) { while (c != -1) {
if(c == ARG_CHAR_STYLE) { if(c == ARG_CHAR_COLOR) {
if(color_flag) {
printErr("Color option specified more than once");
return false;
}
color_flag = true;
if(!parse_color(optarg, &args.colors)) {
printErr("Color parsing failed");
return false;
}
}
else if(c == ARG_CHAR_STYLE) {
if(args.style != STYLE_EMPTY) { if(args.style != STYLE_EMPTY) {
printf("ERROR: Style option specified more than once\n"); printErr("Style option specified more than once");
return false; return false;
} }
args.style = parse_style(optarg); args.style = parse_style(optarg);
if(args.style == STYLE_INVALID) { if(args.style == STYLE_INVALID) {
printf("ERROR: Invalid style '%s'\n",optarg); printErr("Invalid style '%s'",optarg);
return false; return false;
} }
} }
else if(c == ARG_CHAR_HELP) { else if(c == ARG_CHAR_HELP) {
if(args.help_flag) { if(args.help_flag) {
printf("ERROR: Help option specified more than once\n"); printErr("Help option specified more than once");
return false; return false;
} }
args.help_flag = true; args.help_flag = true;
} }
else if(c == ARG_CHAR_VERBOSE) {
if(args.verbose_flag) {
printErr("Verbose option specified more than once");
return false;
}
args.verbose_flag = true;
}
else if(c == ARG_CHAR_LEVELS) { else if(c == ARG_CHAR_LEVELS) {
if(args.levels_flag) { if(args.levels_flag) {
printf("ERROR: Levels option specified more than once\n"); printErr("Levels option specified more than once");
return false; return false;
} }
args.levels_flag = true; args.levels_flag = true;
} }
else if (c == ARG_CHAR_VERSION) { else if (c == ARG_CHAR_VERSION) {
if(args.version_flag) { if(args.version_flag) {
printf("ERROR: Version option specified more than once\n"); printErr("Version option specified more than once");
return false; return false;
} }
args.version_flag = true; args.version_flag = true;
} }
else if(c == '?') { else if(c == '?') {
printf("WARNING: Invalid options\n"); printWarn("Invalid options");
args.help_flag = true; args.help_flag = true;
break; break;
} }
else else
printf("Bug at line number %d in file %s\n", __LINE__, __FILE__); printBug("Bug at line number %d in file %s", __LINE__, __FILE__);
option_index = 0; option_index = 0;
c = getopt_long(argc, argv,"",long_options, &option_index); c = getopt_long(argc, argv,"",long_options, &option_index);
} }
if (optind < argc) { if (optind < argc) {
printf("WARNING: Invalid options\n"); printWarn("Invalid options");
args.help_flag = true; args.help_flag = true;
} }
if((args.help_flag + args.version_flag + (args.style != STYLE_EMPTY)) > 1) { if((args.help_flag + args.version_flag + color_flag) > 1) {
printf("WARNING: You should specify just one option\n"); printWarn("You should specify just one option");
args.help_flag = true; args.help_flag = true;
} }

View File

@@ -2,13 +2,30 @@
#define __ARGS__ #define __ARGS__
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h>
struct color {
int32_t R;
int32_t G;
int32_t B;
};
struct colors {
struct color* c1;
struct color* c2;
struct color* c3;
struct color* c4;
};
#include "printer.h" #include "printer.h"
bool parse_args(int argc, char* argv[]); bool parse_args(int argc, char* argv[]);
STYLE get_style();
bool show_help(); bool show_help();
bool show_levels(); bool show_levels();
bool show_version(); bool show_version();
bool verbose_enabled(); bool verbose_enabled();
void free_colors_struct(struct colors* cs);
struct colors* get_colors();
STYLE get_style();
#endif #endif

View File

@@ -1,7 +1,7 @@
#ifndef __ASCII__ #ifndef __ASCII__
#define __ASCII__ #define __ASCII__
#define NUMBER_OF_LINES 20 #define NUMBER_OF_LINES 19
#define LINE_SIZE 62 #define LINE_SIZE 62
#define AMD_ASCII \ #define AMD_ASCII \
@@ -23,8 +23,7 @@
\ \
\ \
\ \
\ "
"
#define INTEL_ASCII \ #define INTEL_ASCII \
" ################ \ " ################ \
@@ -45,7 +44,6 @@
#### #### \ #### #### \
##### ########## \ ##### ########## \
########## ################ \ ########## ################ \
############################### \ ############################### "
"
#endif #endif

View File

@@ -1,10 +1,978 @@
#include "cpuid.h" #ifdef _WIN32
#include <windows.h>
#else
#include "udev.h"
#include <unistd.h>
#endif
void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { #include <stdio.h>
__asm volatile("cpuid" #include <stdlib.h>
: "=a" (*eax), #include <string.h>
"=b" (*ebx), #include <assert.h>
"=c" (*ecx), #include <stdbool.h>
"=d" (*edx)
: "0" (*eax), "2" (*ecx)); #include "cpuid.h"
#include "cpuid_asm.h"
#include "global.h"
#include "apic.h"
#include "uarch.h"
#define VENDOR_INTEL_STRING "GenuineIntel"
#define VENDOR_AMD_STRING "AuthenticAMD"
#define STRING_YES "Yes"
#define STRING_NO "No"
#define STRING_UNKNOWN "Unknown"
#define STRING_NONE "None"
#define STRING_MEGAHERZ "MHz"
#define STRING_GIGAHERZ "GHz"
#define STRING_KILOBYTES "KB"
#define STRING_MEGABYTES "MB"
#define CPU_NAME_MAX_LENGTH 64
#define MASK 0xFF
/*
* cpuid reference: http://www.sandpile.org/x86/cpuid.htm
* cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf
*/
struct frequency {
int64_t base;
int64_t max;
};
void init_cpu_info(struct cpuInfo* cpu) {
cpu->AVX = false;
cpu->AVX2 = false;
cpu->AVX512 = false;
cpu->SSE = false;
cpu->SSE2 = false;
cpu->SSE3 = false;
cpu->SSSE3 = false;
cpu->SSE4a = false;
cpu->SSE4_1 = false;
cpu->SSE4_2 = false;
cpu->FMA3 = false;
cpu->FMA4 = false;
cpu->AES = false;
cpu->SHA = false;
}
void init_topology_struct(struct topology* topo, struct cache* cach) {
topo->total_cores = 0;
topo->physical_cores = 0;
topo->logical_cores = 0;
topo->smt_available = 0;
topo->smt_supported = 0;
topo->sockets = 0;
topo->apic = malloc(sizeof(struct apic));
topo->cach = cach;
}
void init_cache_struct(struct cache* cach) {
cach->L1i = malloc(sizeof(struct cach));
cach->L1d = malloc(sizeof(struct cach));
cach->L2 = malloc(sizeof(struct cach));
cach->L3 = malloc(sizeof(struct cach));
cach->cach_arr = malloc(sizeof(struct cach*) * 4);
cach->cach_arr[0] = cach->L1i;
cach->cach_arr[1] = cach->L1d;
cach->cach_arr[2] = cach->L2;
cach->cach_arr[3] = cach->L3;
cach->max_cache_level = 0;
cach->L1i->exists = false;
cach->L1d->exists = false;
cach->L2->exists = false;
cach->L3->exists = false;
}
void get_cpu_vendor_internal(char* name, uint32_t ebx,uint32_t ecx,uint32_t edx) {
name[__COUNTER__] = ebx & MASK;
name[__COUNTER__] = (ebx>>8) & MASK;
name[__COUNTER__] = (ebx>>16) & MASK;
name[__COUNTER__] = (ebx>>24) & MASK;
name[__COUNTER__] = edx & MASK;
name[__COUNTER__] = (edx>>8) & MASK;
name[__COUNTER__] = (edx>>16) & MASK;
name[__COUNTER__] = (edx>>24) & MASK;
name[__COUNTER__] = ecx & MASK;
name[__COUNTER__] = (ecx>>8) & MASK;
name[__COUNTER__] = (ecx>>16) & MASK;
name[__COUNTER__] = (ecx>>24) & MASK;
}
char* get_str_cpu_name_internal() {
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
uint32_t c = 0;
char * name = malloc(sizeof(char) * CPU_NAME_MAX_LENGTH);
memset(name, 0, CPU_NAME_MAX_LENGTH);
for(int i=0; i < 3; i++) {
eax = 0x80000002 + i;
cpuid(&eax, &ebx, &ecx, &edx);
name[c++] = eax & MASK;
name[c++] = (eax>>8) & MASK;
name[c++] = (eax>>16) & MASK;
name[c++] = (eax>>24) & MASK;
name[c++] = ebx & MASK;
name[c++] = (ebx>>8) & MASK;
name[c++] = (ebx>>16) & MASK;
name[c++] = (ebx>>24) & MASK;
name[c++] = ecx & MASK;
name[c++] = (ecx>>8) & MASK;
name[c++] = (ecx>>16) & MASK;
name[c++] = (ecx>>24) & MASK;
name[c++] = edx & MASK;
name[c++] = (edx>>8) & MASK;
name[c++] = (edx>>16) & MASK;
name[c++] = (edx>>24) & MASK;
}
name[c] = '\0';
//Remove unused characters
char *str = name;
char *dest = name;
// Remove spaces before name
while (*str != '\0' && *str == ' ')str++;
// Remove spaces between the name and after it
while (*str != '\0') {
while (*str == ' ' && *(str + 1) == ' ') str++;
*dest++ = *str++;
}
*dest = '\0';
return name;
}
struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
uint32_t eax = 0x00000001;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
uint32_t stepping = eax & 0xF;
uint32_t model = (eax >> 4) & 0xF;
uint32_t emodel = (eax >> 16) & 0xF;
uint32_t family = (eax >> 8) & 0xF;
uint32_t efamily = (eax >> 20) & 0xFF;
return get_uarch_from_cpuid(cpu, efamily, family, emodel, model, (int)stepping);
}
struct cpuInfo* get_cpu_info() {
struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo));
init_cpu_info(cpu);
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
//Get max cpuid level
cpuid(&eax, &ebx, &ecx, &edx);
cpu->maxLevels = eax;
//Fill vendor
char name[13];
memset(name,0,13);
get_cpu_vendor_internal(name, ebx, ecx, edx);
if(strcmp(VENDOR_INTEL_STRING,name) == 0)
cpu->cpu_vendor = VENDOR_INTEL;
else if (strcmp(VENDOR_AMD_STRING,name) == 0)
cpu->cpu_vendor = VENDOR_AMD;
else {
cpu->cpu_vendor = VENDOR_INVALID;
printErr("Unknown CPU vendor: %s", name);
return NULL;
}
//Get max extended level
eax = 0x80000000;
ebx = 0;
ecx = 0;
edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->maxExtendedLevels = eax;
//Fill instructions support
if (cpu->maxLevels >= 0x00000001){
eax = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->SSE = (edx & ((int)1 << 25)) != 0;
cpu->SSE2 = (edx & ((int)1 << 26)) != 0;
cpu->SSE3 = (ecx & ((int)1 << 0)) != 0;
cpu->SSSE3 = (ecx & ((int)1 << 9)) != 0;
cpu->SSE4_1 = (ecx & ((int)1 << 19)) != 0;
cpu->SSE4_2 = (ecx & ((int)1 << 20)) != 0;
cpu->AES = (ecx & ((int)1 << 25)) != 0;
cpu->AVX = (ecx & ((int)1 << 28)) != 0;
cpu->FMA3 = (ecx & ((int)1 << 12)) != 0;
}
else {
printWarn("Can't read features information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
}
if (cpu->maxLevels >= 0x00000007){
eax = 0x00000007;
ecx = 0x00000000;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->AVX2 = (ebx & ((int)1 << 5)) != 0;
cpu->SHA = (ebx & ((int)1 << 29)) != 0;
cpu->AVX512 = (((ebx & ((int)1 << 16)) != 0) ||
((ebx & ((int)1 << 28)) != 0) ||
((ebx & ((int)1 << 26)) != 0) ||
((ebx & ((int)1 << 27)) != 0) ||
((ebx & ((int)1 << 31)) != 0) ||
((ebx & ((int)1 << 30)) != 0) ||
((ebx & ((int)1 << 17)) != 0) ||
((ebx & ((int)1 << 21)) != 0));
}
else {
printWarn("Can't read features information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000007, cpu->maxLevels);
}
if (cpu->maxExtendedLevels >= 0x80000001){
eax = 0x80000001;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->SSE4a = (ecx & ((int)1 << 6)) != 0;
cpu->FMA4 = (ecx & ((int)1 << 16)) != 0;
}
else {
printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels);
}
if (cpu->maxExtendedLevels >= 0x80000004){
cpu->cpu_name = get_str_cpu_name_internal();
}
else {
cpu->cpu_name = malloc(sizeof(char)*8);
sprintf(cpu->cpu_name,"Unknown");
printWarn("Can't read cpu name from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000004, cpu->maxExtendedLevels);
}
cpu->arch = get_cpu_uarch(cpu);
return cpu;
}
bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
if(cpu->maxExtendedLevels >= 0x8000001D) {
uint32_t i, eax, ebx, ecx, edx, num_sharing_cache, cache_type, cache_level;
i = 0;
do {
eax = 0x8000001D;
ebx = 0;
ecx = i; // cache id
edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
cache_type = eax & 0x1F;
if(cache_type > 0) {
num_sharing_cache = ((eax >> 14) & 0xFFF) + 1;
cache_level = (eax >>= 5) & 0x7;
switch (cache_type) {
case 1: // Data Cache (We assume this is L1d)
if(cache_level != 1) {
printBug("Found data cache at level %d (expected 1)", cache_level);
return false;
}
topo->cach->L1d->num_caches = topo->logical_cores / num_sharing_cache;
break;
case 2: // Instruction Cache (We assume this is L1i)
if(cache_level != 1) {
printBug("Found instruction cache at level %d (expected 1)", cache_level);
return false;
}
topo->cach->L1i->num_caches = topo->logical_cores / num_sharing_cache;
break;
case 3: // Unified Cache (This may be L2 or L3)
if(cache_level == 2) {
topo->cach->L2->num_caches = topo->logical_cores / num_sharing_cache;
}
else if(cache_level == 3) {
topo->cach->L3->num_caches = topo->logical_cores / num_sharing_cache;
}
else {
printBug("Found unified cache at level %d (expected == 2 or 3)", cache_level);
return false;
}
break;
default: // Unknown Type Cache
printBug("Unknown Type Cache found at ID %d", i);
return false;
}
}
i++;
} while (cache_type > 0);
}
else {
printWarn("Can't read topology information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X). Guessing cache sizes", 0x8000001D, cpu->maxExtendedLevels);
topo->cach->L1i->num_caches = topo->physical_cores;
topo->cach->L1d->num_caches = topo->physical_cores;
if(topo->cach->L3->exists) {
topo->cach->L2->num_caches = topo->physical_cores;
topo->cach->L3->num_caches = 1;
}
else {
topo->cach->L2->num_caches = 1;
}
}
return true;
}
// Main reference: https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html
// Very interesting resource: https://wiki.osdev.org/Detecting_CPU_Topology_(80x86)
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
struct topology* topo = malloc(sizeof(struct topology));
init_topology_struct(topo, cach);
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
// Ask the OS the total number of cores it sees
// If we have one socket, it will be same as the cpuid,
// but in dual socket it will not!
// TODO: Replace by apic?
#ifdef _WIN32
SYSTEM_INFO info;
GetSystemInfo(&info);
topo->total_cores = info.dwNumberOfProcessors;
#else
if((topo->total_cores = sysconf(_SC_NPROCESSORS_ONLN)) == -1) {
perror("sysconf");
topo->total_cores = topo->logical_cores; // fallback
}
#endif
switch(cpu->cpu_vendor) {
case VENDOR_INTEL:
if (cpu->maxLevels >= 0x00000004) {
get_topology_from_apic(cpu, topo);
}
else {
printErr("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
topo->physical_cores = 1;
topo->logical_cores = 1;
topo->smt_available = 1;
topo->smt_supported = 1;
}
break;
case VENDOR_AMD:
if (cpu->maxExtendedLevels >= 0x80000008) {
eax = 0x80000008;
cpuid(&eax, &ebx, &ecx, &edx);
topo->logical_cores = (ecx & 0xFF) + 1;
if (cpu->maxExtendedLevels >= 0x8000001E) {
eax = 0x8000001E;
cpuid(&eax, &ebx, &ecx, &edx);
topo->smt_supported = ((ebx >> 8) & 0x03) + 1;
}
else {
printWarn("Can't read topology information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x8000001E, cpu->maxExtendedLevels);
topo->smt_supported = 1;
}
}
else {
printErr("Can't read topology information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000008, cpu->maxExtendedLevels);
topo->physical_cores = 1;
topo->logical_cores = 1;
topo->smt_supported = 1;
}
if (cpu->maxLevels >= 0x00000001) {
if(topo->smt_supported > 1)
topo->smt_available = is_smt_enabled_amd(topo);
else
topo->smt_available = 1;
}
else {
printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x0000000B, cpu->maxLevels);
topo->smt_available = 1;
}
topo->physical_cores = topo->logical_cores / topo->smt_available;
if(topo->smt_supported > 1)
topo->sockets = topo->total_cores / topo->smt_supported / topo->physical_cores; // Idea borrowed from lscpu
else
topo->sockets = topo->total_cores / topo->physical_cores;
get_cache_topology_amd(cpu, topo);
break;
default:
printBug("Cant get topology because VENDOR is empty");
return NULL;
}
return topo;
}
struct cache* get_cache_info(struct cpuInfo* cpu) {
struct cache* cach = malloc(sizeof(struct cache));
init_cache_struct(cach);
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
uint32_t level;
// We use standart 0x00000004 for Intel
// We use extended 0x8000001D for AMD
if(cpu->cpu_vendor == VENDOR_INTEL) {
level = 0x00000004;
if(cpu->maxLevels < level) {
printErr("Can't read cache information from cpuid (needed level is %d, max is %d)", level, cpu->maxLevels);
return NULL;
}
}
else {
level = 0x8000001D;
if(cpu->maxExtendedLevels < level) {
printErr("Can't read cache information from cpuid (needed extended level is %d, max is %d)", level, cpu->maxExtendedLevels);
return NULL;
}
}
int i=0;
int32_t cache_type;
do {
eax = level; // get cache info
ebx = 0;
ecx = i; // cache id
edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
cache_type = eax & 0x1F;
// If its 0, we tried fetching a non existing cache
if (cache_type > 0) {
int32_t cache_level = (eax >>= 5) & 0x7;
uint32_t cache_sets = ecx + 1;
uint32_t cache_coherency_line_size = (ebx & 0xFFF) + 1;
uint32_t cache_physical_line_partitions = ((ebx >>= 12) & 0x3FF) + 1;
uint32_t cache_ways_of_associativity = ((ebx >>= 10) & 0x3FF) + 1;
int32_t cache_total_size = cache_ways_of_associativity * cache_physical_line_partitions * cache_coherency_line_size * cache_sets;
cach->max_cache_level++;
switch (cache_type) {
case 1: // Data Cache (We assume this is L1d)
if(cache_level != 1) {
printBug("Found data cache at level %d (expected 1)", cache_level);
return NULL;
}
cach->L1d->size = cache_total_size;
cach->L1d->exists = true;
break;
case 2: // Instruction Cache (We assume this is L1i)
if(cache_level != 1) {
printBug("Found instruction cache at level %d (expected 1)", cache_level);
return NULL;
}
cach->L1i->size = cache_total_size;
cach->L1i->exists = true;
break;
case 3: // Unified Cache (This may be L2 or L3)
if(cache_level == 2) {
cach->L2->size = cache_total_size;
cach->L2->exists = true;
}
else if(cache_level == 3) {
cach->L3->size = cache_total_size;
cach->L3->exists = true;
}
else {
printBug("Found unified cache at level %d (expected == 2 or 3)", cache_level);
return NULL;
}
break;
default: // Unknown Type Cache
printBug("Unknown Type Cache found at ID %d", i);
return NULL;
}
}
i++;
} while (cache_type > 0);
// Sanity checks. If we read values greater than this, they can't be valid ones
// The values were chosen by me
if(cach->L1i->size > 64 * 1024) {
printBug("Invalid L1i size: %dKB", cach->L1i->size/1024);
return NULL;
}
if(cach->L1d->size > 64 * 1024) {
printBug("Invalid L1d size: %dKB", cach->L1d->size/1024);
return NULL;
}
if(cach->L2->exists) {
if(cach->L3->exists && cach->L2->size > 2 * 1048576) {
printBug("Invalid L2 size: %dMB", cach->L2->size/(1048576));
return NULL;
}
else if(cach->L2->size > 100 * 1048576) {
printBug("Invalid L2 size: %dMB", cach->L2->size/(1048576));
return NULL;
}
}
if(cach->L3->exists && cach->L3->size > 100 * 1048576) {
printBug("Invalid L3 size: %dMB", cach->L3->size/(1048576));
return NULL;
}
if(!cach->L2->exists) {
printBug("Could not find L2 cache");
return NULL;
}
return cach;
}
struct frequency* get_frequency_info(struct cpuInfo* cpu) {
struct frequency* freq = malloc(sizeof(struct frequency));
if(cpu->maxLevels < 0x16) {
#ifdef _WIN32
printErr("Can't read frequency information from cpuid (needed level is %d, max is %d)", 0x16, cpu->maxLevels);
freq->base = UNKNOWN_FREQ;
freq->max = UNKNOWN_FREQ;
#else
printWarn("Can't read frequency information from cpuid (needed level is %d, max is %d). Using udev", 0x16, cpu->maxLevels);
freq->base = UNKNOWN_FREQ;
freq->max = get_max_freq_from_file();
#endif
}
else {
uint32_t eax = 0x16;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
freq->base = eax;
freq->max = ebx;
}
return freq;
}
uint32_t get_nsockets(struct topology* topo) {
return topo->sockets;
}
int64_t get_freq(struct frequency* freq) {
return freq->max;
}
VENDOR get_cpu_vendor(struct cpuInfo* cpu) {
return cpu->cpu_vendor;
}
void debug_cpu_info(struct cpuInfo* cpu) {
printf("AVX=%s\n", cpu->AVX ? "true" : "false");
printf("AVX2=%s\n", cpu->AVX2 ? "true" : "false");
printf("AVX512=%s\n\n", cpu->AVX512 ? "true" : "false");
printf("SSE=%s\n", cpu->SSE ? "true" : "false");
printf("SSE2=%s\n", cpu->SSE2 ? "true" : "false");
printf("SSE3=%s\n", cpu->SSE3 ? "true" : "false");
printf("SSSE3=%s\n", cpu->SSSE3 ? "true" : "false");
printf("SSE4a=%s\n", cpu->SSE4a ? "true" : "false");
printf("SSE4_1=%s\n", cpu->SSE4_1 ? "true" : "false");
printf("SSE4_2=%s\n\n", cpu->SSE4_2 ? "true" : "false");
printf("FMA3=%s\n", cpu->FMA3 ? "true" : "false");
printf("FMA4=%s\n\n", cpu->FMA4 ? "true" : "false");
printf("AES=%s\n", cpu->AES ? "true" : "false");
printf("SHA=%s\n", cpu->SHA ? "true" : "false");
}
void debug_cache(struct cache* cach) {
printf("L1i=%dB\n",cach->L1i->size);
printf("L1d=%dB\n",cach->L1d->size);
printf("L2=%dB\n",cach->L2->size);
printf("L3=%dB\n",cach->L3->size);
}
void debug_frequency(struct frequency* freq) {
#ifdef _WIN32
printf("maxf=%I64d Mhz\n",freq->max);
printf("basef=%I64d Mhz\n",freq->base);
#else
printf("maxf=%ld Mhz\n",freq->max);
printf("basef=%ld Mhz\n",freq->base);
#endif
}
/*** STRING FUNCTIONS ***/
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
/***
PP = PeakPerformance
SP = SinglePrecision
PP(SP) =
N_CORES *
FREQUENCY *
2(Two vector units) *
2(If cpu has fma) *
16(If AVX512), 8(If AVX), 4(If SSE) *
***/
//7 for GFLOP/s and 6 for digits,eg 412.14
uint32_t size = 7+6+1+1;
assert(strlen(STRING_UNKNOWN)+1 <= size);
char* string = malloc(sizeof(char)*size);
//First check we have consistent data
if(freq == UNKNOWN_FREQ) {
snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN);
return string;
}
double flops = topo->physical_cores * topo->sockets * (freq*1000000);
int vpus = get_number_of_vpus(cpu);
flops = flops * vpus;
if(cpu->FMA3 || cpu->FMA4)
flops = flops*2;
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
// the peak performance supposing AVX2, not AVX512
if(cpu->AVX512 && vpus_are_AVX512(cpu))
flops = flops*16;
else if(cpu->AVX || cpu->AVX2)
flops = flops*8;
else if(cpu->SSE)
flops = flops*4;
if(flops >= (double)1000000000000.0)
snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000);
else if(flops >= 1000000000.0)
snprintf(string,size,"%.2f GFLOP/s",flops/1000000000);
else
snprintf(string,size,"%.2f MFLOP/s",flops/1000000);
return string;
}
// TODO: Refactoring
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket) {
char* string;
if(topo->smt_supported > 1) {
//3 for digits, 21 for ' cores (SMT disabled)' which is the longest possible output
uint32_t size = 3+21+1;
string = malloc(sizeof(char)*size);
if(dual_socket) {
if(topo->smt_available > 1)
snprintf(string, size, "%d cores (%d threads)",topo->physical_cores * topo->sockets, topo->logical_cores * topo->sockets);
else {
if(cpu->cpu_vendor == VENDOR_AMD)
snprintf(string, size, "%d cores (SMT disabled)",topo->physical_cores * topo->sockets);
else
snprintf(string, size, "%d cores (HT disabled)",topo->physical_cores * topo->sockets);
}
}
else {
if(topo->smt_available > 1)
snprintf(string, size, "%d cores (%d threads)",topo->physical_cores,topo->logical_cores);
else {
if(cpu->cpu_vendor == VENDOR_AMD)
snprintf(string, size, "%d cores (SMT disabled)",topo->physical_cores);
else
snprintf(string, size, "%d cores (HT disabled)",topo->physical_cores);
}
}
}
else {
uint32_t size = 3+7+1;
string = malloc(sizeof(char)*size);
if(dual_socket)
snprintf(string, size, "%d cores",topo->physical_cores * topo->sockets);
else
snprintf(string, size, "%d cores",topo->physical_cores);
}
return string;
}
char* get_str_sockets(struct topology* topo) {
char* string = malloc(sizeof(char) * 2);
int32_t sanity_ret = snprintf(string, 2, "%d", topo->sockets);
if(sanity_ret < 0) {
printBug("get_str_sockets: snprintf returned a negative value for input: '%d'", topo->sockets);
return NULL;
}
return string;
}
char* get_str_cpu_name(struct cpuInfo* cpu) {
return cpu->cpu_name;
}
char* get_str_avx(struct cpuInfo* cpu) {
//If all AVX are available, it will use up to 15
char* string = malloc(sizeof(char)*17+1);
if(!cpu->AVX)
snprintf(string,2+1,"No");
else if(!cpu->AVX2)
snprintf(string,3+1,"AVX");
else if(!cpu->AVX512)
snprintf(string,8+1,"AVX,AVX2");
else
snprintf(string,15+1,"AVX,AVX2,AVX512");
return string;
}
char* get_str_sse(struct cpuInfo* cpu) {
uint32_t last = 0;
uint32_t SSE_sl = 4;
uint32_t SSE2_sl = 5;
uint32_t SSE3_sl = 5;
uint32_t SSSE3_sl = 6;
uint32_t SSE4a_sl = 6;
uint32_t SSE4_1_sl = 7;
uint32_t SSE4_2_sl = 7;
char* string = malloc(sizeof(char)*SSE_sl+SSE2_sl+SSE3_sl+SSSE3_sl+SSE4a_sl+SSE4_1_sl+SSE4_2_sl+1);
if(cpu->SSE) {
snprintf(string+last,SSE_sl+1,"SSE,");
last+=SSE_sl;
}
if(cpu->SSE2) {
snprintf(string+last,SSE2_sl+1,"SSE2,");
last+=SSE2_sl;
}
if(cpu->SSE3) {
snprintf(string+last,SSE3_sl+1,"SSE3,");
last+=SSE3_sl;
}
if(cpu->SSSE3) {
snprintf(string+last,SSSE3_sl+1,"SSSE3,");
last+=SSSE3_sl;
}
if(cpu->SSE4a) {
snprintf(string+last,SSE4a_sl+1,"SSE4a,");
last+=SSE4a_sl;
}
if(cpu->SSE4_1) {
snprintf(string+last,SSE4_1_sl+1,"SSE4.1,");
last+=SSE4_1_sl;
}
if(cpu->SSE4_2) {
snprintf(string+last,SSE4_2_sl+1,"SSE4.2,");
last+=SSE4_2_sl;
}
//Purge last comma
string[last-1] = '\0';
return string;
}
char* get_str_fma(struct cpuInfo* cpu) {
char* string = malloc(sizeof(char)*9+1);
if(!cpu->FMA3)
snprintf(string,2+1,"No");
else if(!cpu->FMA4)
snprintf(string,4+1,"FMA3");
else
snprintf(string,9+1,"FMA3,FMA4");
return string;
}
char* get_str_aes(struct cpuInfo* cpu) {
char* string = malloc(sizeof(char)*3+1);
if(cpu->AES)
snprintf(string,3+1,STRING_YES);
else
snprintf(string,2+1,STRING_NO);
return string;
}
char* get_str_sha(struct cpuInfo* cpu) {
char* string = malloc(sizeof(char)*3+1);
if(cpu->SHA)
snprintf(string,3+1,STRING_YES);
else
snprintf(string,2+1,STRING_NO);
return string;
}
int32_t get_value_as_smallest_unit(char ** str, uint32_t value) {
int32_t sanity_ret;
*str = malloc(sizeof(char)* 11); //8 for digits, 2 for units
if(value/1024 >= 1024)
sanity_ret = snprintf(*str, 10,"%.4g"STRING_MEGABYTES, (double)value/(1<<20));
else
sanity_ret = snprintf(*str, 10,"%.4g"STRING_KILOBYTES, (double)value/(1<<10));
return sanity_ret;
}
// String functions
char* get_str_cache_two(int32_t cache_size, uint32_t physical_cores) {
// 4 for digits, 2 for units, 2 for ' (', 3 digits, 2 for units and 7 for ' Total)'
uint32_t max_size = 4+2 + 2 + 4+2 + 7 + 1;
int32_t sanity_ret;
char* string = malloc(sizeof(char) * max_size);
char* tmp1;
char* tmp2;
int32_t tmp1_len = get_value_as_smallest_unit(&tmp1, cache_size);
int32_t tmp2_len = get_value_as_smallest_unit(&tmp2, cache_size * physical_cores);
if(tmp1_len < 0) {
printBug("get_value_as_smallest_unit: snprintf returned a negative value for input: %d\n", cache_size);
return NULL;
}
if(tmp2_len < 0) {
printBug("get_value_as_smallest_unit: snprintf returned a negative value for input: %d\n", cache_size * physical_cores);
return NULL;
}
uint32_t size = tmp1_len + 2 + tmp2_len + 7 + 1;
sanity_ret = snprintf(string, size, "%s (%s Total)", tmp1, tmp2);
if(sanity_ret < 0) {
printBug("get_str_cache_two: snprintf returned a negative value for input: '%s' and '%s'\n", tmp1, tmp2);
return NULL;
}
free(tmp1);
free(tmp2);
return string;
}
char* get_str_cache_one(int32_t cache_size) {
// 4 for digits, 2 for units, 2 for ' (', 3 digits, 2 for units and 7 for ' Total)'
uint32_t max_size = 4+2 + 1;
int32_t sanity_ret;
char* string = malloc(sizeof(char) * max_size);
char* tmp;
int32_t tmp_len = get_value_as_smallest_unit(&tmp, cache_size);
if(tmp_len < 0) {
printBug("get_value_as_smallest_unit: snprintf returned a negative value for input: %d", cache_size);
return NULL;
}
uint32_t size = tmp_len + 1;
sanity_ret = snprintf(string, size, "%s", tmp);
if(sanity_ret < 0) {
printBug("get_str_cache_one: snprintf returned a negative value for input: '%s'", tmp);
return NULL;
}
free(tmp);
return string;
}
char* get_str_cache(int32_t cache_size, int32_t num_caches) {
if(num_caches > 1)
return get_str_cache_two(cache_size, num_caches);
else
return get_str_cache_one(cache_size);
}
char* get_str_l1i(struct cache* cach) {
return get_str_cache(cach->L1i->size, cach->L1i->num_caches);
}
char* get_str_l1d(struct cache* cach) {
return get_str_cache(cach->L1d->size, cach->L1d->num_caches);
}
char* get_str_l2(struct cache* cach) {
assert(cach->L2->exists);
return get_str_cache(cach->L2->size, cach->L2->num_caches);
}
char* get_str_l3(struct cache* cach) {
if(!cach->L3->exists)
return NULL;
return get_str_cache(cach->L3->size, cach->L3->num_caches);
}
char* get_str_freq(struct frequency* freq) {
//Max 3 digits and 3 for '(M/G)Hz' plus 1 for '\0'
uint32_t size = (4+3+1);
assert(strlen(STRING_UNKNOWN)+1 <= size);
char* string = malloc(sizeof(char)*size);
if(freq->max == UNKNOWN_FREQ)
snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN);
else if(freq->max >= 1000)
snprintf(string,size,"%.2f"STRING_GIGAHERZ,(float)(freq->max)/1000);
else
snprintf(string,size,"%.2f"STRING_MEGAHERZ,(float)(freq->max));
return string;
}
void print_levels(struct cpuInfo* cpu) {
printf("%s\n", cpu->cpu_name);
printf("- Max standart level: 0x%.8X\n", cpu->maxLevels);
printf("- Max extended level: 0x%.8X\n", cpu->maxExtendedLevels);
free_cpuinfo_struct(cpu);
}
void free_topo_struct(struct topology* topo) {
free(topo->apic->cache_select_mask);
free(topo->apic->cache_id_apic);
free(topo->apic);
free(topo);
}
void free_cache_struct(struct cache* cach) {
for(int i=0; i < 4; i++) free(cach->cach_arr[i]);
free(cach->cach_arr);
free(cach);
}
void free_freq_struct(struct frequency* freq) {
free(freq);
}
void free_cpuinfo_struct(struct cpuInfo* cpu) {
free_uarch_struct(cpu->arch);
free(cpu->cpu_name);
free(cpu);
} }

View File

@@ -3,6 +3,109 @@
#include <stdint.h> #include <stdint.h>
void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); #define VENDOR_EMPTY 0
#define VENDOR_INTEL 1
#define VENDOR_AMD 2
#define VENDOR_INVALID 3
#define UNKNOWN_FREQ -1
typedef int32_t VENDOR;
struct frequency;
struct cpuInfo {
bool AVX;
bool AVX2;
bool AVX512;
bool SSE;
bool SSE2;
bool SSE3;
bool SSSE3;
bool SSE4a;
bool SSE4_1;
bool SSE4_2;
bool FMA3;
bool FMA4;
bool AES;
bool SHA;
VENDOR cpu_vendor;
char* cpu_name;
// Max cpuids levels
uint32_t maxLevels;
// Max cpuids extended levels
uint32_t maxExtendedLevels;
struct uarch* arch;
};
struct cach {
int32_t size;
uint8_t num_caches;
bool exists;
// plenty of more properties to include in the future...
};
struct cache {
struct cach* L1i;
struct cach* L1d;
struct cach* L2;
struct cach* L3;
struct cach** cach_arr;
uint8_t max_cache_level;
};
struct topology {
int64_t total_cores;
uint32_t physical_cores;
uint32_t logical_cores;
uint32_t smt_available; // Number of SMT that is currently enabled
uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled)
uint32_t sockets;
struct apic* apic;
struct cache* cach;
};
struct cpuInfo* get_cpu_info();
VENDOR get_cpu_vendor(struct cpuInfo* cpu);
uint32_t get_nsockets(struct topology* topo);
int64_t get_freq(struct frequency* freq);
struct cache* get_cache_info(struct cpuInfo* cpu);
struct frequency* get_frequency_info(struct cpuInfo* cpu);
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach);
char* get_str_cpu_name(struct cpuInfo* cpu);
char* get_str_ncores(struct cpuInfo* cpu);
char* get_str_avx(struct cpuInfo* cpu);
char* get_str_sse(struct cpuInfo* cpu);
char* get_str_fma(struct cpuInfo* cpu);
char* get_str_aes(struct cpuInfo* cpu);
char* get_str_sha(struct cpuInfo* cpu);
char* get_str_l1i(struct cache* cach);
char* get_str_l1d(struct cache* cach);
char* get_str_l2(struct cache* cach);
char* get_str_l3(struct cache* cach);
char* get_str_freq(struct frequency* freq);
char* get_str_sockets(struct topology* topo);
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket);
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq);
void print_levels(struct cpuInfo* cpu);
void free_cache_struct(struct cache* cach);
void free_topo_struct(struct topology* topo);
void free_freq_struct(struct frequency* freq);
void free_cpuinfo_struct(struct cpuInfo* cpu);
void debug_cpu_info(struct cpuInfo* cpu);
void debug_cache(struct cache* cach);
void debug_frequency(struct frequency* freq);
#endif #endif

10
src/cpuid_asm.c Normal file
View File

@@ -0,0 +1,10 @@
#include "cpuid_asm.h"
void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {
__asm volatile("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (*eax), "2" (*ecx));
}

8
src/cpuid_asm.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef __CPUID_ASM__
#define __CPUID_ASM__
#include <stdint.h>
void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
#endif

View File

@@ -1,96 +0,0 @@
#include <stdio.h>
#include <string.h>
#include "extended.h"
char* get_str_cpu_name() {
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
char *name = malloc(sizeof(char)*64);
memset(name, 0, 64);
//First, check we can use extended
eax = 0x80000000;
cpuid(&eax, &ebx, &ecx, &edx);
if(eax < 0x80000001) {
char* none = malloc(sizeof(char)*64);
sprintf(none,"Unknown");
return none;
}
//We can, fetch name
eax = 0x80000002;
cpuid(&eax, &ebx, &ecx, &edx);
name[__COUNTER__] = eax & MASK;
name[__COUNTER__] = (eax>>8) & MASK;
name[__COUNTER__] = (eax>>16) & MASK;
name[__COUNTER__] = (eax>>24) & MASK;
name[__COUNTER__] = ebx & MASK;
name[__COUNTER__] = (ebx>>8) & MASK;
name[__COUNTER__] = (ebx>>16) & MASK;
name[__COUNTER__] = (ebx>>24) & MASK;
name[__COUNTER__] = ecx & MASK;
name[__COUNTER__] = (ecx>>8) & MASK;
name[__COUNTER__] = (ecx>>16) & MASK;
name[__COUNTER__] = (ecx>>24) & MASK;
name[__COUNTER__] = edx & MASK;
name[__COUNTER__] = (edx>>8) & MASK;
name[__COUNTER__] = (edx>>16) & MASK;
name[__COUNTER__] = (edx>>24) & MASK;
eax = 0x80000003;
cpuid(&eax, &ebx, &ecx, &edx);
name[__COUNTER__] = eax & MASK;
name[__COUNTER__] = (eax>>8) & MASK;
name[__COUNTER__] = (eax>>16) & MASK;
name[__COUNTER__] = (eax>>24) & MASK;
name[__COUNTER__] = ebx & MASK;
name[__COUNTER__] = (ebx>>8) & MASK;
name[__COUNTER__] = (ebx>>16) & MASK;
name[__COUNTER__] = (ebx>>24) & MASK;
name[__COUNTER__] = ecx & MASK;
name[__COUNTER__] = (ecx>>8) & MASK;
name[__COUNTER__] = (ecx>>16) & MASK;
name[__COUNTER__] = (ecx>>24) & MASK;
name[__COUNTER__] = edx & MASK;
name[__COUNTER__] = (edx>>8) & MASK;
name[__COUNTER__] = (edx>>16) & MASK;
name[__COUNTER__] = (edx>>24) & MASK;
eax = 0x80000004;
cpuid(&eax, &ebx, &ecx, &edx);
name[__COUNTER__] = eax & MASK;
name[__COUNTER__] = (eax>>8) & MASK;
name[__COUNTER__] = (eax>>16) & MASK;
name[__COUNTER__] = (eax>>24) & MASK;
name[__COUNTER__] = ebx & MASK;
name[__COUNTER__] = (ebx>>8) & MASK;
name[__COUNTER__] = (ebx>>16) & MASK;
name[__COUNTER__] = (ebx>>24) & MASK;
name[__COUNTER__] = ecx & MASK;
name[__COUNTER__] = (ecx>>8) & MASK;
name[__COUNTER__] = (ecx>>16) & MASK;
name[__COUNTER__] = (ecx>>24) & MASK;
name[__COUNTER__] = edx & MASK;
name[__COUNTER__] = (edx>>8) & MASK;
name[__COUNTER__] = (edx>>16) & MASK;
name[__COUNTER__] = (edx>>24) & MASK;
name[__COUNTER__] = '\0';
//Remove unused characters
char *str = name;
char *dest = name;
while (*str != '\0') {
while (*str == ' ' && *(str + 1) == ' ') str++;
*dest++ = *str++;
}
*dest = '\0';
return name;
}

View File

@@ -1,11 +0,0 @@
#ifndef __EXTENDED__
#define __EXTENDED__
#define MASK 0xFF
#include "cpuid.h"
#include <stdint.h>
#include <stdlib.h>
char* get_str_cpu_name();
#endif

View File

@@ -51,7 +51,7 @@ void printBug(const char *fmt, ...) {
vsnprintf(buffer,buffer_size, fmt, args); vsnprintf(buffer,buffer_size, fmt, args);
va_end(args); va_end(args);
fprintf(stderr,RED "[ERROR]: "RESET "%s\n",buffer); fprintf(stderr,RED "[ERROR]: "RESET "%s\n",buffer);
fprintf(stderr,"Please, create a new issue with this error message and your CPU in https://github.com/Dr-Noob/cpufetch/issues\n"); fprintf(stderr,"Please, create a new issue with this error message and your CPU model in https://github.com/Dr-Noob/cpufetch/issues\n");
} }
void set_log_level(bool verbose) { void set_log_level(bool verbose) {

View File

@@ -3,40 +3,36 @@
#include "args.h" #include "args.h"
#include "printer.h" #include "printer.h"
#include "standart.h" #include "cpuid.h"
#include "extended.h"
#include "global.h" #include "global.h"
/*** static const char* VERSION = "0.7";
SAMPLE OUTPUT
Name: Intel Core i7-4790K
Frequency: 4.0 GHz
NºCores: 4 cores(8 threads)
AXV: AVX,AVX2
SSE: SSE,SSE2,SSE4.1,SSE4.2
FMA: FMA3
AES: Yes
SHA: No
L1 Size: 32KB(Data)32KB(Instructions)
L2 Size: 512KB
L3 Size: 8MB
Peak FLOPS: 512 GFLOP/s(in simple precision)
***/
static const char* VERSION = "0.410";
void print_help(char *argv[]) { void print_help(char *argv[]) {
printf("Usage: %s [--version] [--help] [--style STYLE]\n\ printf("Usage: %s [--version] [--help] [--levels] [--style \"fancy\"|\"retro\"|\"legacy\"] [--color \"intel\"|\"amd\"|'R,G,B:R,G,B:R,G,B:R,G,B']\n\n\
Options: \n\ Options: \n\
--style Set logo style color\n\ --color Set the color scheme. By default, cpufetch uses the system color scheme. This option \n\
default: Default style color\n\ lets the user use different colors to print the CPU art: \n\
dark: Dark style color\n\ * \"intel\": Use intel default color scheme \n\
none: Don't use colors\n\ * \"amd\": Use amd default color scheme \n\
--help Prints this help and exit\n\ * custom: If color do not match \"intel\" or \"amd\", a custom scheme can be specified: \n\
--levels Prints CPU model and cpuid levels (debug purposes)\n\ 4 colors must be given in RGB with the format: R,G,B:R,G,B:... \n\
--version Prints cpufetch version and exit\n", These colors correspond to CPU art color (2 colors) and for the text colors (following 2) \n\
For example: --color 239,90,45:210,200,200:100,200,45:0,200,200 \n\n\
--style Set the style of CPU art: \n\
* \"fancy\": Default style \n\
* \"retro\": Old cpufetch style \n\
* \"legacy\": Fallback style for terminals that does not support colors \n\n\
--levels Prints CPU model and cpuid levels (debug purposes)\n\n\
--verbose Prints extra information (if available) about how cpufetch tried fetching information\n\n\
--help Prints this help and exit\n\n\
--version Prints cpufetch version and exit\n\n\
\n\
NOTES: \n\
- Bugs or improvements should be submitted to: github.com/Dr-Noob/cpufetch/issues \n\
- Peak performance information is NOT accurate. cpufetch computes peak performance using the max \n\
frequency. However, to properly compute peak performance, you need to know the frequency of the \n\
CPU running AVX code, which is not be fetched by cpufetch since it depends on each specific CPU. \n",
argv[0]); argv[0]);
} }
@@ -63,75 +59,27 @@ int main(int argc, char* argv[]) {
struct cpuInfo* cpu = get_cpu_info(); struct cpuInfo* cpu = get_cpu_info();
if(cpu == NULL) if(cpu == NULL)
return EXIT_FAILURE; return EXIT_FAILURE;
char* cpuName = get_str_cpu_name();
if(show_levels()) { if(show_levels()) {
print_version(); print_version();
print_levels(cpu, cpuName); print_levels(cpu);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }
struct cache* cach = get_cache_info(cpu);
if(cach == NULL)
return EXIT_FAILURE;
struct frequency* freq = get_frequency_info(cpu); struct frequency* freq = get_frequency_info(cpu);
if(freq == NULL) if(freq == NULL)
return EXIT_FAILURE; return EXIT_FAILURE;
struct topology* topo = get_topology_info(cpu); struct cache* cach = get_cache_info(cpu);
if(cach == NULL)
return EXIT_FAILURE;
struct topology* topo = get_topology_info(cpu, cach);
if(topo == NULL) if(topo == NULL)
return EXIT_FAILURE; return EXIT_FAILURE;
struct ascii* art = set_ascii(get_cpu_vendor(cpu),get_style()); if(print_cpufetch(cpu, cach, freq, topo, get_style(), get_colors()))
if(art == NULL) return EXIT_SUCCESS;
else
return EXIT_FAILURE; return EXIT_FAILURE;
char* maxFrequency = get_str_freq(freq);
char* nCores = get_str_topology(topo);
char* avx = get_str_avx(cpu);
char* sse = get_str_sse(cpu);
char* fma = get_str_fma(cpu);
char* aes = get_str_aes(cpu);
char* sha = get_str_sha(cpu);
char* l1 = get_str_l1(cach);
char* l2 = get_str_l2(cach);
char* l3 = get_str_l3(cach);
char* pp = get_str_peak_performance(cpu,topo,get_freq(freq));
setAttribute(art,ATTRIBUTE_NAME,cpuName);
setAttribute(art,ATTRIBUTE_FREQUENCY,maxFrequency);
setAttribute(art,ATTRIBUTE_NCORES,nCores);
setAttribute(art,ATTRIBUTE_AVX,avx);
setAttribute(art,ATTRIBUTE_SSE,sse);
setAttribute(art,ATTRIBUTE_FMA,fma);
setAttribute(art,ATTRIBUTE_AES,aes);
setAttribute(art,ATTRIBUTE_SHA,sha);
setAttribute(art,ATTRIBUTE_L1,l1);
setAttribute(art,ATTRIBUTE_L2,l2);
setAttribute(art,ATTRIBUTE_L3,l3);
setAttribute(art,ATTRIBUTE_PEAK,pp);
print_ascii(art);
free(cpuName);
free(maxFrequency);
free(nCores);
free(avx);
free(sse);
free(fma);
free(aes);
free(sha);
free(l1);
free(l2);
free(l3);
free(pp);
free(cpu);
free(art);
free_cache_struct(cach);
free_topo_struct(topo);
free_freq_struct(freq);
return EXIT_SUCCESS;
} }

View File

@@ -6,112 +6,207 @@
#include "printer.h" #include "printer.h"
#include "ascii.h" #include "ascii.h"
#include "global.h" #include "global.h"
#include "cpuid.h"
#include "uarch.h"
#define COL_NONE "" #define COL_NONE ""
#define COL_INTEL_DEFAULT_1 "\x1b[36;1m" #define COL_INTEL_FANCY_1 "\x1b[46;1m"
#define COL_INTEL_DEFAULT_2 "\x1b[37;1m" #define COL_INTEL_FANCY_2 "\x1b[47;1m"
#define COL_INTEL_DARK_1 "\x1b[34;1m" #define COL_INTEL_FANCY_3 "\x1b[36;1m"
#define COL_INTEL_DARK_2 "\x1b[30m" #define COL_INTEL_FANCY_4 "\x1b[37;1m"
#define COL_AMD_DEFAULT_1 "\x1b[37;1m" #define COL_INTEL_RETRO_1 "\x1b[36;1m"
#define COL_AMD_DEFAULT_2 "\x1b[31;1m" #define COL_INTEL_RETRO_2 "\x1b[37;1m"
#define COL_AMD_DARK_1 "\x1b[30;1m" #define COL_AMD_FANCY_1 "\x1b[47;1m"
#define COL_AMD_DARK_2 "\x1b[32;1m" #define COL_AMD_FANCY_2 "\x1b[42;1m"
#define RESET "\x1b[0m" #define COL_AMD_FANCY_3 "\x1b[37;1m"
#define COL_AMD_FANCY_4 "\x1b[32;1m"
#define COL_AMD_RETRO_1 "\x1b[37;1m"
#define COL_AMD_RETRO_2 "\x1b[32;1m"
#define RESET "\x1b[m"
#define TITLE_NAME "Name: " #define TITLE_NAME "Name:"
#define TITLE_FREQUENCY "Frequency: " #define TITLE_FREQUENCY "Max Frequency:"
#define TITLE_NCORES "N.Cores: " #define TITLE_SOCKETS "Sockets:"
#define TITLE_AVX "AVX: " #define TITLE_NCORES "Cores:"
#define TITLE_SSE "SSE: " #define TITLE_NCORES_DUAL "Cores (Total):"
#define TITLE_FMA "FMA: " #define TITLE_AVX "AVX:"
#define TITLE_AES "AES: " #define TITLE_SSE "SSE:"
#define TITLE_SHA "SHA: " #define TITLE_FMA "FMA:"
#define TITLE_L1 "L1 Size: " #define TITLE_AES "AES:"
#define TITLE_L2 "L2 Size: " #define TITLE_SHA "SHA:"
#define TITLE_L3 "L3 Size: " #define TITLE_L1i "L1i Size:"
#define TITLE_PEAK "Peak FLOPS: " #define TITLE_L1d "L1d Size:"
#define TITLE_L2 "L2 Size:"
#define TITLE_L3 "L3 Size:"
#define TITLE_PEAK "Peak Performance:"
#define TITLE_UARCH "Microarchitecture:"
#define TITLE_TECHNOLOGY "Technology:"
/*** CENTER TEXT ***/ #define MAX_ATTRIBUTE_COUNT 14
#define LINES_SPACE_UP 4 #define ATTRIBUTE_NAME 0
#define LINES_SPACE_DOWN 4 #define ATTRIBUTE_UARCH 1
#define ATTRIBUTE_TECHNOLOGY 2
#define ATTRIBUTE_FREQUENCY 3
#define ATTRIBUTE_SOCKETS 4
#define ATTRIBUTE_NCORES 5
#define ATTRIBUTE_NCORES_DUAL 6
#define ATTRIBUTE_AVX 7
#define ATTRIBUTE_FMA 8
#define ATTRIBUTE_L1i 9
#define ATTRIBUTE_L1d 10
#define ATTRIBUTE_L2 11
#define ATTRIBUTE_L3 12
#define ATTRIBUTE_PEAK 13
static const char* ATTRIBUTE_FIELDS [ATTRIBUTE_COUNT] = { TITLE_NAME, TITLE_FREQUENCY, static const char* ATTRIBUTE_FIELDS [MAX_ATTRIBUTE_COUNT] = { TITLE_NAME, TITLE_UARCH, TITLE_TECHNOLOGY,
TITLE_NCORES, TITLE_AVX, TITLE_SSE, TITLE_FREQUENCY, TITLE_SOCKETS,
TITLE_FMA, TITLE_AES, TITLE_SHA, TITLE_NCORES, TITLE_NCORES_DUAL,
TITLE_L1, TITLE_L2, TITLE_L3, TITLE_AVX,
TITLE_PEAK }; TITLE_FMA, TITLE_L1i, TITLE_L1d, TITLE_L2, TITLE_L3,
TITLE_PEAK,
};
static const int ATTRIBUTE_LIST[ATTRIBUTE_COUNT] = { ATTRIBUTE_NAME, ATTRIBUTE_FREQUENCY, static const int ATTRIBUTE_LIST[MAX_ATTRIBUTE_COUNT] = { ATTRIBUTE_NAME, ATTRIBUTE_UARCH, ATTRIBUTE_TECHNOLOGY,
ATTRIBUTE_NCORES, ATTRIBUTE_AVX, ATTRIBUTE_SSE, ATTRIBUTE_FREQUENCY, ATTRIBUTE_SOCKETS,
ATTRIBUTE_FMA, ATTRIBUTE_AES, ATTRIBUTE_SHA, ATTRIBUTE_NCORES, ATTRIBUTE_NCORES_DUAL, ATTRIBUTE_AVX,
ATTRIBUTE_L1, ATTRIBUTE_L2, ATTRIBUTE_L3, ATTRIBUTE_FMA,
ATTRIBUTE_L1i, ATTRIBUTE_L1d, ATTRIBUTE_L2, ATTRIBUTE_L3,
ATTRIBUTE_PEAK }; ATTRIBUTE_PEAK };
struct ascii { struct ascii {
char art[NUMBER_OF_LINES][LINE_SIZE]; char art[NUMBER_OF_LINES][LINE_SIZE];
char color1[10]; char color1_ascii[100];
char color2[10]; char color2_ascii[100];
char reset[10]; char color1_text[100];
char* atributes[ATTRIBUTE_COUNT]; char color2_text[100];
char ascii_chars[2];
char reset[100];
char* attributes[MAX_ATTRIBUTE_COUNT];
uint32_t n_attributes_set;
VENDOR vendor; VENDOR vendor;
}; };
void setAttribute(struct ascii* art, int type, char* value) { void setAttribute(struct ascii* art, int type, char* value) {
int i = 0; art->attributes[type] = value;
while(i < ATTRIBUTE_COUNT && type != ATTRIBUTE_LIST[i]) art->n_attributes_set++;
i++;
if(i != ATTRIBUTE_COUNT)
art->atributes[i] = value;
else
printBug("Setting attribute failed because it was not found");
} }
struct ascii* set_ascii(VENDOR cpuVendor, STYLE style) { char* rgb_to_ansi(struct color* c, bool background, bool bold) {
/*** Check that number of lines of ascii art matches the number char* str = malloc(sizeof(char) * 100);
of spaces plus the number of lines filled with text ***/ if(background) {
if(LINES_SPACE_UP+LINES_SPACE_DOWN+ATTRIBUTE_COUNT != NUMBER_OF_LINES) { snprintf(str, 44, "\x1b[48;2;%.3d;%.3d;%.3dm", c->R, c->G, c->B);
printBug("Number of lines do not match (%d vs %d)",LINES_SPACE_UP+LINES_SPACE_DOWN+ATTRIBUTE_COUNT,NUMBER_OF_LINES); }
return NULL; else {
if(bold)
snprintf(str, 48, "\x1b[1m\x1b[38;2;%.3d;%.3d;%.3dm", c->R, c->G, c->B);
else
snprintf(str, 44, "\x1b[38;2;%.3d;%.3d;%.3dm", c->R, c->G, c->B);
} }
char *COL_DEFAULT_1, *COL_DEFAULT_2, *COL_DARK_1, *COL_DARK_2; return str;
}
struct ascii* set_ascii(VENDOR cpuVendor, STYLE style, struct colors* cs) {
// Sanity checks //
for(int i=0; i < MAX_ATTRIBUTE_COUNT; i++) {
if(ATTRIBUTE_FIELDS[i] == NULL) {
printBug("Attribute field at position %d is empty", i);
return NULL;
}
if(i > 0 && ATTRIBUTE_LIST[i] == 0) {
printBug("Attribute list at position %d is empty", i);
return NULL;
}
}
char *COL_FANCY_1, *COL_FANCY_2, *COL_FANCY_3, *COL_FANCY_4, *COL_RETRO_1, *COL_RETRO_2, *COL_RETRO_3, *COL_RETRO_4;
struct ascii* art = malloc(sizeof(struct ascii)); struct ascii* art = malloc(sizeof(struct ascii));
art->n_attributes_set = 0;
art->vendor = cpuVendor; art->vendor = cpuVendor;
for(int i=0; i < MAX_ATTRIBUTE_COUNT; i++)
art->attributes[i] = NULL;
strcpy(art->reset,RESET); strcpy(art->reset,RESET);
if(cpuVendor == VENDOR_INTEL) { if(cpuVendor == VENDOR_INTEL) {
COL_DEFAULT_1 = COL_INTEL_DEFAULT_1; COL_FANCY_1 = COL_INTEL_FANCY_1;
COL_DEFAULT_2 = COL_INTEL_DEFAULT_2; COL_FANCY_2 = COL_INTEL_FANCY_2;
COL_DARK_1 = COL_INTEL_DARK_1; COL_FANCY_3 = COL_INTEL_FANCY_3;
COL_DARK_2 = COL_INTEL_DARK_2; COL_FANCY_4 = COL_INTEL_FANCY_4;
COL_RETRO_1 = COL_INTEL_RETRO_1;
COL_RETRO_2 = COL_INTEL_RETRO_2;
COL_RETRO_3 = COL_INTEL_RETRO_1;
COL_RETRO_4 = COL_INTEL_RETRO_2;
art->ascii_chars[0] = '#';
} }
else { else {
COL_DEFAULT_1 = COL_AMD_DEFAULT_1; COL_FANCY_1 = COL_AMD_FANCY_1;
COL_DEFAULT_2 = COL_AMD_DEFAULT_2; COL_FANCY_2 = COL_AMD_FANCY_2;
COL_DARK_1 = COL_AMD_DARK_1; COL_FANCY_3 = COL_AMD_FANCY_3;
COL_DARK_2 = COL_AMD_DARK_2; COL_FANCY_4 = COL_AMD_FANCY_4;
COL_RETRO_1 = COL_AMD_RETRO_1;
COL_RETRO_2 = COL_AMD_RETRO_2;
COL_RETRO_3 = COL_AMD_RETRO_1;
COL_RETRO_4 = COL_AMD_RETRO_2;
art->ascii_chars[0] = '@';
}
art->ascii_chars[1] = '#';
// If style is emtpy, set the default style
if(style == STYLE_EMPTY) {
#ifdef _WIN32
style = STYLE_LEGACY;
#else
style = STYLE_FANCY;
#endif
} }
switch(style) { switch(style) {
case STYLE_NONE: case STYLE_LEGACY:
strcpy(art->color1,COL_NONE); strcpy(art->color1_ascii,COL_NONE);
strcpy(art->color2,COL_NONE); strcpy(art->color2_ascii,COL_NONE);
break; strcpy(art->color1_text,COL_NONE);
case STYLE_EMPTY: strcpy(art->color2_text,COL_NONE);
#ifdef _WIN32 art->reset[0] = '\0';
strcpy(art->color1,COL_NONE);
strcpy(art->color2,COL_NONE);
art->reset[0] = '\0';
break;
#endif
case STYLE_DEFAULT:
strcpy(art->color1,COL_DEFAULT_1);
strcpy(art->color2,COL_DEFAULT_2);
break; break;
case STYLE_DARK: case STYLE_FANCY:
strcpy(art->color1,COL_DARK_1); if(cs != NULL) {
strcpy(art->color2,COL_DARK_2); COL_FANCY_1 = rgb_to_ansi(cs->c1, true, true);
COL_FANCY_2 = rgb_to_ansi(cs->c2, true, true);
COL_FANCY_3 = rgb_to_ansi(cs->c3, false, true);
COL_FANCY_4 = rgb_to_ansi(cs->c4, false, true);
}
art->ascii_chars[0] = ' ';
art->ascii_chars[1] = ' ';
strcpy(art->color1_ascii,COL_FANCY_1);
strcpy(art->color2_ascii,COL_FANCY_2);
strcpy(art->color1_text,COL_FANCY_3);
strcpy(art->color2_text,COL_FANCY_4);
if(cs != NULL) {
free(COL_FANCY_1);
free(COL_FANCY_2);
free(COL_FANCY_3);
free(COL_FANCY_4);
}
break; break;
case STYLE_RETRO:
if(cs != NULL) {
COL_RETRO_1 = rgb_to_ansi(cs->c1, false, true);
COL_RETRO_2 = rgb_to_ansi(cs->c2, false, true);
COL_RETRO_3 = rgb_to_ansi(cs->c3, false, true);
COL_RETRO_4 = rgb_to_ansi(cs->c4, false, true);
}
strcpy(art->color1_ascii,COL_RETRO_1);
strcpy(art->color2_ascii,COL_RETRO_2);
strcpy(art->color1_text,COL_RETRO_3);
strcpy(art->color2_text,COL_RETRO_4);
if(cs != NULL) {
free(COL_RETRO_1);
free(COL_RETRO_2);
free(COL_RETRO_3);
free(COL_RETRO_4);
}
break;
case STYLE_INVALID:
default: default:
printBug("Found invalid style (%d)",style); printBug("Found invalid style (%d)",style);
return NULL; return NULL;
@@ -126,62 +221,168 @@ struct ascii* set_ascii(VENDOR cpuVendor, STYLE style) {
return art; return art;
} }
void print_ascii_intel(struct ascii* art) { uint32_t get_next_attribute(struct ascii* art, uint32_t last_attr) {
last_attr++;
while(art->attributes[last_attr] == NULL) last_attr++;
return last_attr;
}
void print_ascii_intel(struct ascii* art, uint32_t la) {
bool flag = false; bool flag = false;
int attr_to_print = -1;
uint32_t space_right;
uint32_t space_up = (NUMBER_OF_LINES - art->n_attributes_set)/2;
uint32_t space_down = NUMBER_OF_LINES - art->n_attributes_set - space_up;
for(int n=0;n<NUMBER_OF_LINES;n++) { printf("\n");
for(uint32_t n=0;n<NUMBER_OF_LINES;n++) {
/*** PRINT ASCII-ART ***/
for(int i=0;i<LINE_SIZE;i++) { for(int i=0;i<LINE_SIZE;i++) {
if(flag) { if(flag) {
if(art->art[n][i] == ' ') { if(art->art[n][i] == ' ') {
flag = false; flag = false;
printf("%c",art->art[n][i]); printf("%s%c%s", art->color2_ascii, art->ascii_chars[1], art->reset);
} }
else else
printf("%s%c%s", art->color1, art->art[n][i], art->reset); printf("%s%c%s", art->color1_ascii, art->ascii_chars[0], art->reset);
} }
else { else {
if(art->art[n][i] != ' ') { if(art->art[n][i] != ' ' && art->art[n][i] != '\0') {
flag = true; flag = true;
printf("%s%c%s", art->color2, art->art[n][i], art->reset); printf("%c",' ');
} }
else else
printf("%c",art->art[n][i]); printf("%c",' ');
} }
} }
/*** PRINT ATTRIBUTE ***/ if(n > space_up-1 && n < NUMBER_OF_LINES-space_down) {
if(n>LINES_SPACE_UP-1 && n<NUMBER_OF_LINES-LINES_SPACE_DOWN) attr_to_print = get_next_attribute(art, attr_to_print);
printf("%s%s%s%s%s\n",art->color1,ATTRIBUTE_FIELDS[n-LINES_SPACE_UP],art->color2,art->atributes[n-LINES_SPACE_UP],art->reset); space_right = 1 + (la - strlen(ATTRIBUTE_FIELDS[attr_to_print]));
printf("%s%s%s%*s%s%s%s\n",art->color1_text, ATTRIBUTE_FIELDS[attr_to_print], art->reset, space_right, "", art->color2_text, art->attributes[attr_to_print], art->reset);
}
else printf("\n"); else printf("\n");
} }
printf("\n");
} }
void print_ascii_amd(struct ascii* art) { void print_ascii_amd(struct ascii* art, uint32_t la) {
int attr_to_print = -1;
uint32_t space_right;
uint32_t space_up = (NUMBER_OF_LINES - art->n_attributes_set)/2;
uint32_t space_down = NUMBER_OF_LINES - art->n_attributes_set - space_up;
for(int n=0;n<NUMBER_OF_LINES;n++) { printf("\n");
/*** PRINT ASCII-ART ***/ for(uint32_t n=0;n<NUMBER_OF_LINES;n++) {
for(int i=0;i<LINE_SIZE;i++) { for(int i=0;i<LINE_SIZE;i++) {
if(art->art[n][i] == '@') if(art->art[n][i] == '@')
printf("%s%c%s", art->color1, art->art[n][i], art->reset); printf("%s%c%s", art->color1_ascii, art->ascii_chars[0], art->reset);
else if(art->art[n][i] == '#') else if(art->art[n][i] == '#')
printf("%s%c%s", art->color2, art->art[n][i], art->reset); printf("%s%c%s", art->color2_ascii, art->ascii_chars[1], art->reset);
else else
printf("%c",art->art[n][i]); printf("%c",art->art[n][i]);
} }
/*** PRINT ATTRIBUTE ***/ if(n > space_up-1 && n < NUMBER_OF_LINES-space_down) {
if(n>LINES_SPACE_UP-1 && n<NUMBER_OF_LINES-LINES_SPACE_DOWN) attr_to_print = get_next_attribute(art, attr_to_print);
printf("%s%s%s%s%s\n",art->color1,ATTRIBUTE_FIELDS[n-LINES_SPACE_UP],art->color2,art->atributes[n-LINES_SPACE_UP], art->reset); space_right = 1 + (la - strlen(ATTRIBUTE_FIELDS[attr_to_print]));
printf("%s%s%s%*s%s%s%s\n",art->color1_text, ATTRIBUTE_FIELDS[attr_to_print], art->reset, space_right, "", art->color2_text, art->attributes[attr_to_print], art->reset);
}
else printf("\n"); else printf("\n");
} }
printf("\n");
} }
uint32_t longest_attribute_length(struct ascii* art) {
uint32_t max = 0;
uint64_t len = 0;
for(int i=0; i < MAX_ATTRIBUTE_COUNT; i++) {
if(art->attributes[i] != NULL) {
len = strlen(ATTRIBUTE_FIELDS[i]);
if(len > max) max = len;
}
}
return max;
}
void print_ascii(struct ascii* art) { void print_ascii(struct ascii* art) {
uint32_t longest_attribute = longest_attribute_length(art);
if(art->vendor == VENDOR_INTEL) if(art->vendor == VENDOR_INTEL)
print_ascii_intel(art); print_ascii_intel(art, longest_attribute);
else else
print_ascii_amd(art); print_ascii_amd(art, longest_attribute);
}
bool print_cpufetch(struct cpuInfo* cpu, struct cache* cach, struct frequency* freq, struct topology* topo, STYLE s, struct colors* cs) {
struct ascii* art = set_ascii(get_cpu_vendor(cpu), s, cs);
if(art == NULL)
return false;
char* cpu_name = get_str_cpu_name(cpu);
char* uarch = get_str_uarch(cpu);
char* manufacturing_process = get_str_process(cpu);
char* sockets = get_str_sockets(topo);
char* max_frequency = get_str_freq(freq);
char* n_cores = get_str_topology(cpu, topo, false);
char* n_cores_dual = get_str_topology(cpu, topo, true);
char* avx = get_str_avx(cpu);
char* fma = get_str_fma(cpu);
char* l1i = get_str_l1i(topo->cach);
char* l1d = get_str_l1d(topo->cach);
char* l2 = get_str_l2(topo->cach);
char* l3 = get_str_l3(topo->cach);
char* pp = get_str_peak_performance(cpu,topo,get_freq(freq));
setAttribute(art,ATTRIBUTE_NAME,cpu_name);
setAttribute(art,ATTRIBUTE_UARCH,uarch);
setAttribute(art,ATTRIBUTE_TECHNOLOGY,manufacturing_process);
setAttribute(art,ATTRIBUTE_FREQUENCY,max_frequency);
setAttribute(art,ATTRIBUTE_NCORES,n_cores);
setAttribute(art,ATTRIBUTE_AVX,avx);
setAttribute(art,ATTRIBUTE_FMA,fma);
setAttribute(art,ATTRIBUTE_L1i,l1i);
setAttribute(art,ATTRIBUTE_L1d,l1d);
setAttribute(art,ATTRIBUTE_L2,l2);
setAttribute(art,ATTRIBUTE_PEAK,pp);
uint32_t socket_num = get_nsockets(topo);
if (socket_num > 1) {
setAttribute(art, ATTRIBUTE_SOCKETS, sockets);
setAttribute(art, ATTRIBUTE_NCORES_DUAL, n_cores_dual);
}
if(l3 != NULL) {
setAttribute(art,ATTRIBUTE_L3,l3);
}
if(art->n_attributes_set > NUMBER_OF_LINES) {
printBug("The number of attributes set is bigger than the max that can be displayed");
return false;
}
print_ascii(art);
free(manufacturing_process);
free(max_frequency);
free(sockets);
free(n_cores);
free(n_cores_dual);
free(avx);
free(fma);
free(l1i);
free(l1d);
free(l2);
free(l3);
free(pp);
free(art);
if(cs != NULL) free_colors_struct(cs);
free_cache_struct(cach);
free_topo_struct(topo);
free_freq_struct(freq);
free_cpuinfo_struct(cpu);
return true;
} }

View File

@@ -1,37 +1,22 @@
#ifndef __PRINTER__ #ifndef __PRINTER__
#define __PRINTER__ #define __PRINTER__
#include "standart.h"
#include "ascii.h"
#define ATTRIBUTE_COUNT 12
#define ATTRIBUTE_NAME 0
#define ATTRIBUTE_FREQUENCY 1
#define ATTRIBUTE_NCORES 2
#define ATTRIBUTE_AVX 3
#define ATTRIBUTE_SSE 4
#define ATTRIBUTE_FMA 5
#define ATTRIBUTE_AES 6
#define ATTRIBUTE_SHA 7
#define ATTRIBUTE_L1 8
#define ATTRIBUTE_L2 9
#define ATTRIBUTE_L3 10
#define ATTRIBUTE_PEAK 11
typedef int STYLE; typedef int STYLE;
#include "args.h"
#include "cpuid.h"
#define STYLES_COUNT 3 #define STYLES_COUNT 3
#define STYLE_EMPTY -2 #define STYLE_INVALID -2
#define STYLE_INVALID -1 #define STYLE_EMPTY -1
#define STYLE_DEFAULT 0 #define STYLE_FANCY 0
#define STYLE_DARK 1 #define STYLE_RETRO 1
#define STYLE_NONE 2 #define STYLE_LEGACY 2
struct ascii; #define COLOR_DEFAULT_INTEL "15,125,194:230,230,230:40,150,220:230,230,230"
#define COLOR_DEFAULT_AMD "250,250,250:0,154,102:250,250,250:0,154,102"
static const int STYLES_CODE_LIST [STYLES_COUNT] = {STYLE_DEFAULT, STYLE_DARK}; bool print_cpufetch(struct cpuInfo* cpu, struct cache* cach, struct frequency* freq, struct topology* topo, STYLE s, struct colors* cs);
struct ascii* set_ascii(VENDOR cpuVendor, STYLE style);
void print_ascii(struct ascii* art);
void setAttribute(struct ascii* art, int type, char* value);
#endif #endif

View File

@@ -1,736 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <stdbool.h>
#ifdef _WIN32
#include <windows.h>
#else
#include <unistd.h>
#include "udev.h"
#endif
#include "standart.h"
#include "cpuid.h"
#include "global.h"
#define VENDOR_INTEL_STRING "GenuineIntel"
#define VENDOR_AMD_STRING "AuthenticAMD"
#define STRING_YES "Yes"
#define STRING_NO "No"
#define STRING_UNKNOWN "Unknown"
#define STRING_NONE "None"
#define STRING_MEGAHERZ "MHz"
#define STRING_GIGAHERZ "GHz"
#define STRING_KILOBYTES "KB"
#define STRING_MEGABYTES "MB"
#define MASK 0xFF
/*
* cpuid reference: http://www.sandpile.org/x86/cpuid.htm
* cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf
*/
struct cpuInfo {
bool AVX;
bool AVX2;
bool AVX512;
bool SSE;
bool SSE2;
bool SSE3;
bool SSSE3;
bool SSE4a;
bool SSE4_1;
bool SSE4_2;
bool FMA3;
bool FMA4;
bool AES;
bool SHA;
VENDOR cpu_vendor;
// Max cpuids levels
uint32_t maxLevels;
// Max cpuids extended levels
uint32_t maxExtendedLevels;
};
struct cache {
int32_t L1i;
int32_t L1d;
int32_t L2;
int32_t L3;
};
struct frequency {
int64_t base;
int64_t max;
};
struct topology {
int64_t total_cores;
uint32_t physical_cores;
uint32_t logical_cores;
uint32_t smt;
uint32_t sockets;
bool ht;
};
void init_cpu_info(struct cpuInfo* cpu) {
cpu->AVX = false;
cpu->AVX2 = false;
cpu->AVX512 = false;
cpu->SSE = false;
cpu->SSE2 = false;
cpu->SSE3 = false;
cpu->SSSE3 = false;
cpu->SSE4a = false;
cpu->SSE4_1 = false;
cpu->SSE4_2 = false;
cpu->FMA3 = false;
cpu->FMA4 = false;
cpu->AES = false;
cpu->SHA = false;
}
void get_cpu_vendor_internal(char* name, uint32_t ebx,uint32_t ecx,uint32_t edx) {
name[__COUNTER__] = ebx & MASK;
name[__COUNTER__] = (ebx>>8) & MASK;
name[__COUNTER__] = (ebx>>16) & MASK;
name[__COUNTER__] = (ebx>>24) & MASK;
name[__COUNTER__] = edx & MASK;
name[__COUNTER__] = (edx>>8) & MASK;
name[__COUNTER__] = (edx>>16) & MASK;
name[__COUNTER__] = (edx>>24) & MASK;
name[__COUNTER__] = ecx & MASK;
name[__COUNTER__] = (ecx>>8) & MASK;
name[__COUNTER__] = (ecx>>16) & MASK;
name[__COUNTER__] = (ecx>>24) & MASK;
}
struct cpuInfo* get_cpu_info() {
struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo));
init_cpu_info(cpu);
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
//Get max cpuid level
cpuid(&eax, &ebx, &ecx, &edx);
cpu->maxLevels = eax;
//Fill vendor
char name[13];
memset(name,0,13);
get_cpu_vendor_internal(name, ebx, ecx, edx);
if(strcmp(VENDOR_INTEL_STRING,name) == 0)
cpu->cpu_vendor = VENDOR_INTEL;
else if (strcmp(VENDOR_AMD_STRING,name) == 0)
cpu->cpu_vendor = VENDOR_AMD;
else {
cpu->cpu_vendor = VENDOR_INVALID;
printErr("Unknown CPU vendor: %s", name);
return NULL;
}
//Get max extended level
eax = 0x80000000;
ebx = 0;
ecx = 0;
edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->maxExtendedLevels = eax;
//Fill instructions support
if (cpu->maxLevels >= 0x00000001){
eax = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->SSE = (edx & ((int)1 << 25)) != 0;
cpu->SSE2 = (edx & ((int)1 << 26)) != 0;
cpu->SSE3 = (ecx & ((int)1 << 0)) != 0;
cpu->SSSE3 = (ecx & ((int)1 << 9)) != 0;
cpu->SSE4_1 = (ecx & ((int)1 << 19)) != 0;
cpu->SSE4_2 = (ecx & ((int)1 << 20)) != 0;
cpu->AES = (ecx & ((int)1 << 25)) != 0;
cpu->AVX = (ecx & ((int)1 << 28)) != 0;
cpu->FMA3 = (ecx & ((int)1 << 12)) != 0;
}
else {
printWarn("Can't read features information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
}
if (cpu->maxLevels >= 0x00000007){
eax = 0x00000007;
ecx = 0x00000000;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->AVX2 = (ebx & ((int)1 << 5)) != 0;
cpu->SHA = (ebx & ((int)1 << 29)) != 0;
cpu->AVX512 = (((ebx & ((int)1 << 16)) != 0) ||
((ebx & ((int)1 << 28)) != 0) ||
((ebx & ((int)1 << 26)) != 0) ||
((ebx & ((int)1 << 27)) != 0) ||
((ebx & ((int)1 << 31)) != 0) ||
((ebx & ((int)1 << 30)) != 0) ||
((ebx & ((int)1 << 17)) != 0) ||
((ebx & ((int)1 << 21)) != 0));
}
else {
printWarn("Can't read features information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000007, cpu->maxLevels);
}
if (cpu->maxExtendedLevels >= 0x80000001){
eax = 0x80000001;
cpuid(&eax, &ebx, &ecx, &edx);
cpu->SSE4a = (ecx & ((int)1 << 6)) != 0;
cpu->FMA4 = (ecx & ((int)1 << 16)) != 0;
}
else {
printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels);
}
return cpu;
}
struct topology* get_topology_info(struct cpuInfo* cpu) {
struct topology* topo = malloc(sizeof(struct topology));
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
int32_t type;
if (cpu->maxLevels >= 0x00000001) {
eax = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
topo->ht = edx & (1 << 28);
}
else {
printWarn("Can't read HT information from cpuid (needed level is 0x%.8X, max is 0x%.8X). Assuming HT is disabled", 0x00000001, cpu->maxLevels);
topo->ht = false;
}
switch(cpu->cpu_vendor) {
case VENDOR_INTEL:
if (cpu->maxLevels >= 0x0000000B) {
//TODO: This idea only works with no NUMA systems
eax = 0x0000000B;
ecx = 0x00000000;
cpuid(&eax, &ebx, &ecx, &edx);
type = (ecx >> 8) & 0xFF;
if (type != 1) {
printBug("Unexpected type in cpuid 0x0000000B (expected 1, got %d)", type);
return NULL;
}
topo->smt = ebx & 0xFFFF;
eax = 0x0000000B;
ecx = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
type = (ecx >> 8) & 0xFF;
if (type < 2) {
printBug("Unexpected type in cpuid 0x0000000B (expected < 2, got %d)", type);
return NULL;
}
topo->logical_cores = ebx & 0xFFFF;
topo->physical_cores = topo->logical_cores / topo->smt;
}
else {
printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x0000000B, cpu->maxLevels);
topo->physical_cores = 1;
topo->logical_cores = 1;
topo->smt = 1;
}
break;
case VENDOR_AMD:
if (cpu->maxExtendedLevels >= 0x80000008) {
eax = 0x80000008;
cpuid(&eax, &ebx, &ecx, &edx);
topo->logical_cores = (ecx & 0xFF) + 1;
if (cpu->maxExtendedLevels >= 0x8000001E) {
eax = 0x8000001E;
cpuid(&eax, &ebx, &ecx, &edx);
topo->smt = ((ebx >> 8) & 0x03) + 1;
}
else {
printWarn("Can't read topology information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x8000001E, cpu->maxLevels);
topo->smt = 1;
}
topo->physical_cores = topo->logical_cores / topo->smt;
}
else {
printWarn("Can't read topology information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000008, cpu->maxLevels);
topo->physical_cores = 1;
topo->logical_cores = 1;
topo->smt = 1;
}
break;
default:
printBug("Cant get topology because VENDOR is empty");
return NULL;
}
// Ask the OS the total number of cores it sees
// If we have one socket, it will be same as the cpuid,
// but in dual socket it will not!
#ifdef _WIN32
SYSTEM_INFO info;
GetSystemInfo(&info);
topo->total_cores = info.dwNumberOfProcessors;
#else
if((topo->total_cores = sysconf(_SC_NPROCESSORS_ONLN)) == -1) {
perror("sysconf");
topo->total_cores = topo->logical_cores; // fallback
}
#endif
topo->sockets = topo->total_cores / topo->smt / topo->physical_cores; // Idea borrowed from lscpu
return topo;
}
struct cache* get_cache_info(struct cpuInfo* cpu) {
struct cache* cach = malloc(sizeof(struct cache));
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
uint32_t level;
// We use standart 0x00000004 for Intel
// We use extended 0x8000001D for AMD
if(cpu->cpu_vendor == VENDOR_INTEL) {
level = 0x00000004;
if(cpu->maxLevels < level) {
printErr("Can't read cache information from cpuid (needed level is %d, max is %d)", level, cpu->maxLevels);
return NULL;
}
}
else {
level = 0x8000001D;
if(cpu->maxExtendedLevels < level) {
printErr("Can't read cache information from cpuid (needed extended level is %d, max is %d)", level, cpu->maxExtendedLevels);
return NULL;
}
}
// We suppose there are 4 caches (at most)
for(int i=0; i < 4; i++) {
eax = level; // get cache info
ebx = 0;
ecx = i; // cache id
edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
int32_t cache_type = eax & 0x1F;
// If its 0, we tried fetching a non existing cache
if (cache_type > 0) {
int32_t cache_level = (eax >>= 5) & 0x7;
int32_t cache_is_self_initializing = (eax >>= 3) & 0x1; // does not need SW initialization
int32_t cache_is_fully_associative = (eax >>= 1) & 0x1;
uint32_t cache_sets = ecx + 1;
uint32_t cache_coherency_line_size = (ebx & 0xFFF) + 1;
uint32_t cache_physical_line_partitions = ((ebx >>= 12) & 0x3FF) + 1;
uint32_t cache_ways_of_associativity = ((ebx >>= 10) & 0x3FF) + 1;
int32_t cache_total_size = cache_ways_of_associativity * cache_physical_line_partitions * cache_coherency_line_size * cache_sets;
switch (cache_type) {
case 1: // Data Cache (We assume this is L1d)
if(cache_level != 1) {
printBug("Found data cache at level %d (expected 1)", cache_level);
return NULL;
}
cach->L1d = cache_total_size;
break;
case 2: // Instruction Cache (We assume this is L1i)
if(cache_level != 1) {
printBug("Found instruction cache at level %d (expected 1)", cache_level);
return NULL;
}
cach->L1i = cache_total_size;
break;
case 3: // Unified Cache (This may be L2 or L3)
if(cache_level == 2) cach->L2 = cache_total_size;
else if(cache_level == 3) cach->L3 = cache_total_size;
else {
printBug("Found unified cache at level %d (expected == 2 or 3)", cache_level);
return NULL;
}
break;
default: // Unknown Type Cache
printBug("Unknown Type Cache found at ID %d", i);
return NULL;
}
}
else if(i == 2) cach->L2 = UNKNOWN;
else if(i == 3) cach->L3 = UNKNOWN;
else {
printBug("Could not find cache ID %d", i);
return NULL;
}
}
// Sanity checks. If we read values greater than this, they can't be valid ones
// The values were chosen by me
if(cach->L1i > 64 * 1024) {
printBug("Invalid L1i size: %dKB\n", cach->L1i/1024);
return NULL;
}
if(cach->L1d > 64 * 1024) {
printBug("Invalid L1d size: %dKB\n", cach->L1d/1024);
return NULL;
}
if(cach->L2 != UNKNOWN && cach->L2 > 2 * 1048576) {
printBug("Invalid L2 size: %dMB\n", cach->L2/(1048576));
return NULL;
}
if(cach->L3 != UNKNOWN && cach->L3 > 100 * 1048576) {
printBug("Invalid L3 size: %dMB\n", cach->L3/(1048576));
return NULL;
}
return cach;
}
struct frequency* get_frequency_info(struct cpuInfo* cpu) {
struct frequency* freq = malloc(sizeof(struct frequency));
if(cpu->maxLevels < 0x16) {
#ifdef _WIN32
printErr("Can't read frequency information from cpuid (needed level is %d, max is %d)", 0x16, cpu->maxLevels);
freq->base = UNKNOWN;
freq->max = UNKNOWN;
#else
printWarn("Can't read frequency information from cpuid (needed level is %d, max is %d). Using udev", 0x16, cpu->maxLevels);
freq->base = UNKNOWN;
freq->max = get_max_freq_from_file();
#endif
}
else {
uint32_t eax = 0x16;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
cpuid(&eax, &ebx, &ecx, &edx);
freq->base = eax;
freq->max = ebx;
}
return freq;
}
int64_t get_freq(struct frequency* freq) {
return freq->max;
}
VENDOR get_cpu_vendor(struct cpuInfo* cpu) {
return cpu->cpu_vendor;
}
void debug_cpu_info(struct cpuInfo* cpu) {
printf("AVX=%s\n", cpu->AVX ? "true" : "false");
printf("AVX2=%s\n", cpu->AVX2 ? "true" : "false");
printf("AVX512=%s\n\n", cpu->AVX512 ? "true" : "false");
printf("SSE=%s\n", cpu->SSE ? "true" : "false");
printf("SSE2=%s\n", cpu->SSE2 ? "true" : "false");
printf("SSE3=%s\n", cpu->SSE3 ? "true" : "false");
printf("SSSE3=%s\n", cpu->SSSE3 ? "true" : "false");
printf("SSE4a=%s\n", cpu->SSE4a ? "true" : "false");
printf("SSE4_1=%s\n", cpu->SSE4_1 ? "true" : "false");
printf("SSE4_2=%s\n\n", cpu->SSE4_2 ? "true" : "false");
printf("FMA3=%s\n", cpu->FMA3 ? "true" : "false");
printf("FMA4=%s\n\n", cpu->FMA4 ? "true" : "false");
printf("AES=%s\n", cpu->AES ? "true" : "false");
printf("SHA=%s\n", cpu->SHA ? "true" : "false");
}
void debug_cache(struct cache* cach) {
printf("L1i=%dB\n",cach->L1i);
printf("L1d=%dB\n",cach->L1d);
printf("L2=%dB\n",cach->L2);
printf("L3=%dB\n",cach->L3);
}
void debug_frequency(struct frequency* freq) {
#ifdef _WIN32
printf("maxf=%I64d Mhz\n",freq->max);
printf("basef=%I64d Mhz\n",freq->base);
#else
printf("maxf=%ld Mhz\n",freq->max);
printf("basef=%ld Mhz\n",freq->base);
#endif
}
/*** STRING FUNCTIONS ***/
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
/***
PP = PeakPerformance
SP = SinglePrecision
PP(SP) =
N_CORES *
FREQUENCY *
2(Two vector units) *
2(If cpu has fma) *
16(If AVX512), 8(If AVX), 4(If SSE) *
***/
//7 for GFLOP/s and 6 for digits,eg 412.14
uint32_t size = 7+6+1+1;
assert(strlen(STRING_UNKNOWN)+1 <= size);
char* string = malloc(sizeof(char)*size);
//First check we have consistent data
if(freq == UNKNOWN) {
snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN);
return string;
}
double flops = topo->physical_cores*(freq*1000000);
// Intel USUALLY has two VPUs. I have never seen an AMD
// with two VPUs.
if(cpu->cpu_vendor == VENDOR_INTEL) flops = flops * 2;
if(cpu->FMA3 || cpu->FMA4)
flops = flops*2;
if(cpu->AVX512)
flops = flops*16;
else if(cpu->AVX || cpu->AVX2)
flops = flops*8;
else if(cpu->SSE)
flops = flops*4;
if(flops >= (double)1000000000000.0)
snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000);
else if(flops >= 1000000000.0)
snprintf(string,size,"%.2f GFLOP/s",flops/1000000000);
else
snprintf(string,size,"%.2f MFLOP/s",flops/1000000);
return string;
}
char* get_str_topology(struct topology* topo) {
char* string;
if(topo->smt > 1) {
//3 for digits, 8 for ' cores (', 3 for digits, 9 for ' threads)'
uint32_t size = 3+8+3+9+1;
string = malloc(sizeof(char)*size);
snprintf(string, size, "%d cores (%d threads)",topo->physical_cores,topo->logical_cores);
}
else {
uint32_t size = 3+7+1;
string = malloc(sizeof(char)*size);
snprintf(string, size, "%d cores",topo->physical_cores);
}
return string;
}
char* get_str_avx(struct cpuInfo* cpu) {
//If all AVX are available, it will use up to 15
char* string = malloc(sizeof(char)*15+1);
if(!cpu->AVX)
snprintf(string,2+1,"No");
else if(!cpu->AVX2)
snprintf(string,3+1,"AVX");
else if(!cpu->AVX512)
snprintf(string,8+1,"AVX,AVX2");
else
snprintf(string,15+1,"AVX,AVX2,AVX512");
return string;
}
char* get_str_sse(struct cpuInfo* cpu) {
uint32_t last = 0;
uint32_t SSE_sl = 4;
uint32_t SSE2_sl = 5;
uint32_t SSE3_sl = 5;
uint32_t SSSE3_sl = 6;
uint32_t SSE4a_sl = 6;
uint32_t SSE4_1_sl = 7;
uint32_t SSE4_2_sl = 7;
char* string = malloc(sizeof(char)*SSE_sl+SSE2_sl+SSE3_sl+SSSE3_sl+SSE4a_sl+SSE4_1_sl+SSE4_2_sl+1);
if(cpu->SSE) {
snprintf(string+last,SSE_sl+1,"SSE,");
last+=SSE_sl;
}
if(cpu->SSE2) {
snprintf(string+last,SSE2_sl+1,"SSE2,");
last+=SSE2_sl;
}
if(cpu->SSE3) {
snprintf(string+last,SSE3_sl+1,"SSE3,");
last+=SSE3_sl;
}
if(cpu->SSSE3) {
snprintf(string+last,SSSE3_sl+1,"SSSE3,");
last+=SSSE3_sl;
}
if(cpu->SSE4a) {
snprintf(string+last,SSE4a_sl+1,"SSE4a,");
last+=SSE4a_sl;
}
if(cpu->SSE4_1) {
snprintf(string+last,SSE4_1_sl+1,"SSE4_1,");
last+=SSE4_1_sl;
}
if(cpu->SSE4_2) {
snprintf(string+last,SSE4_2_sl+1,"SSE4_2,");
last+=SSE4_2_sl;
}
//Purge last comma
string[last-1] = '\0';
return string;
}
char* get_str_fma(struct cpuInfo* cpu) {
char* string = malloc(sizeof(char)*9+1);
if(!cpu->FMA3)
snprintf(string,2+1,"No");
else if(!cpu->FMA4)
snprintf(string,4+1,"FMA3");
else
snprintf(string,9+1,"FMA3,FMA4");
return string;
}
char* get_str_aes(struct cpuInfo* cpu) {
char* string = malloc(sizeof(char)*3+1);
if(cpu->AES)
snprintf(string,3+1,STRING_YES);
else
snprintf(string,2+1,STRING_NO);
return string;
}
char* get_str_sha(struct cpuInfo* cpu) {
char* string = malloc(sizeof(char)*3+1);
if(cpu->SHA)
snprintf(string,3+1,STRING_YES);
else
snprintf(string,2+1,STRING_NO);
return string;
}
// String functions
char* get_str_l1(struct cache* cach) {
// 2*2 for digits, 4 for two 'KB' and 6 for '(D)' and '(I)'
uint32_t size = (2*2+4+6+1);
int32_t sanity_ret;
char* string = malloc(sizeof(char)*size);
sanity_ret = snprintf(string,size,"%d"STRING_KILOBYTES"(D)%d"STRING_KILOBYTES"(I)",cach->L1d/1024,cach->L1i/1024);
assert(sanity_ret > 0);
return string;
}
char* get_str_l2(struct cache* cach) {
if(cach->L2 == UNKNOWN) {
char* string = malloc(sizeof(char) * 5);
snprintf(string, 5, STRING_NONE);
return string;
}
else {
int32_t sanity_ret;
char* string;
if(cach->L2/1024 >= 1024) {
//1 for digit, 2 for 'MB'
uint32_t size = (1+2+1);
string = malloc(sizeof(char)*size);
sanity_ret = snprintf(string,size,"%d"STRING_MEGABYTES,cach->L2/(1048576));
}
else {
//4 for digits, 2 for 'KB'
uint32_t size = (4+2+1);
string = malloc(sizeof(char)*size);
sanity_ret = snprintf(string,size,"%d"STRING_KILOBYTES,cach->L2/1024);
}
assert(sanity_ret > 0);
return string;
}
}
char* get_str_l3(struct cache* cach) {
if(cach->L3 == UNKNOWN) {
char* string = malloc(sizeof(char) * 5);
snprintf(string, 5, STRING_NONE);
return string;
}
else {
int32_t sanity_ret;
char* string;
if(cach->L3/1024 >= 1024) {
//1 for digit, 2 for 'MB'
uint32_t size = (1+2+1);
string = malloc(sizeof(char)*size);
sanity_ret = snprintf(string,size,"%d"STRING_MEGABYTES,cach->L3/(1048576));
}
else {
//4 for digits, 2 for 'KB'
uint32_t size = (4+2+1);
string = malloc(sizeof(char)*size);
sanity_ret = snprintf(string,size,"%d"STRING_KILOBYTES,cach->L3/1024);
}
assert(sanity_ret > 0);
return string;
}
}
char* get_str_freq(struct frequency* freq) {
//Max 3 digits and 3 for '(M/G)Hz' plus 1 for '\0'
uint32_t size = (4+3+1);
assert(strlen(STRING_UNKNOWN)+1 <= size);
char* string = malloc(sizeof(char)*size);
if(freq->max == UNKNOWN)
snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN);
else if(freq->max >= 1000)
snprintf(string,size,"%.2f"STRING_GIGAHERZ,(float)(freq->max)/1000);
else
snprintf(string,size,"%.2f"STRING_MEGAHERZ,(float)(freq->max));
return string;
}
void print_levels(struct cpuInfo* cpu, char* cpu_name) {
printf("%s\n", cpu_name);
printf("- Max standart level: 0x%.8X\n", cpu->maxLevels);
printf("- Max extended level: 0x%.8X\n", cpu->maxExtendedLevels);
}
void free_topo_struct(struct topology* topo) {
free(topo);
}
void free_cache_struct(struct cache* cach) {
free(cach);
}
void free_freq_struct(struct frequency* freq) {
free(freq);
}

View File

@@ -1,55 +0,0 @@
#ifndef __01h__
#define __01h__
#include <stdint.h>
#define VENDOR_EMPTY 0
#define VENDOR_INTEL 1
#define VENDOR_AMD 2
#define VENDOR_INVALID 3
#define UNKNOWN -1
struct cpuInfo;
struct frequency;
struct cache;
struct topology;
typedef int32_t VENDOR;
struct cpuInfo* get_cpu_info();
VENDOR get_cpu_vendor(struct cpuInfo* cpu);
int64_t get_freq(struct frequency* freq);
struct cache* get_cache_info(struct cpuInfo* cpu);
struct frequency* get_frequency_info(struct cpuInfo* cpu);
struct topology* get_topology_info(struct cpuInfo* cpu);
char* get_str_ncores(struct cpuInfo* cpu);
char* get_str_avx(struct cpuInfo* cpu);
char* get_str_sse(struct cpuInfo* cpu);
char* get_str_fma(struct cpuInfo* cpu);
char* get_str_aes(struct cpuInfo* cpu);
char* get_str_sha(struct cpuInfo* cpu);
char* get_str_l1(struct cache* cach);
char* get_str_l2(struct cache* cach);
char* get_str_l3(struct cache* cach);
char* get_str_freq(struct frequency* freq);
char* get_str_topology(struct topology* topo);
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq);
void print_levels(struct cpuInfo* cpu, char* cpu_name);
void free_cpuinfo_struct(struct cpuInfo* cpu);
void free_cache_struct(struct cache* cach);
void free_topo_struct(struct topology* topo);
void free_freq_struct(struct frequency* freq);
void debug_cpu_info(struct cpuInfo* cpu);
void debug_cache(struct cache* cach);
void debug_frequency(struct frequency* freq);
#endif

397
src/uarch.c Normal file
View File

@@ -0,0 +1,397 @@
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "uarch.h"
#include "global.h"
/*
* - cpuid codes are based on Todd Allen's cpuid program
* http://www.etallen.com/cpuid.html
* - This should be updated from time to time, to support newer CPUs. A good reference to look at:
* https://en.wikichip.org/
*/
// From Todd Allen:
//
// MSR_CPUID_table* is a table that appears in Intel document 325462, "Intel 64
// and IA-32 Architectures Software Developer's Manual Combined Volumes: 1, 2A,
// 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4" (the name changes from version to version
// as more volumes are added). The table moves around from version to version,
// but in version 071US, was in "Volume 4: Model-Specific Registers", Table 2-1:
// "CPUID Signature Values of DisplayFamily_DisplayModel".
// MRG* is a table that forms the bulk of Intel Microcode Revision Guidance (or
// Microcode Update Guidance). Its purpose is not to list CPUID values, but
// it does so, and sometimes lists values that appear nowhere else.
// LX* indicates features that I have seen no documentation for, but which are
// used by the Linux kernel (which is good evidence that they're correct).
// The "hook" to find these generally is an X86_FEATURE_* flag in:
// arch/x86/include/asm/cpufeatures.h
// For (synth) and (uarch synth) decoding, it often indicates
// family/model/stepping value which are documented nowhere else. These usually
// can be found in:
// arch/x86/include/asm/intel-family.h
typedef uint32_t MICROARCH;
// Data not available
#define NA -1
// Unknown manufacturing process
#define UNK -1
#define UARCH_UNKNOWN 0x000
#define UARCH_P5 0x001
#define UARCH_P6 0x002
#define UARCH_DOTHAN 0x003
#define UARCH_YONAH 0x004
#define UARCH_MEROM 0x005
#define UARCH_PENYR 0x006
#define UARCH_NEHALEM 0x007
#define UARCH_WESTMERE 0x008
#define UARCH_BONNELL 0x009
#define UARCH_SALTWELL 0x010
#define UARCH_SANDY_BRIDGE 0x011
#define UARCH_SILVERMONT 0x012
#define UARCH_IVY_BRIDGE 0x013
#define UARCH_HASWELL 0x014
#define UARCH_BROADWELL 0x015
#define UARCH_AIRMONT 0x016
#define UARCH_KABY_LAKE 0x017
#define UARCH_SKYLAKE 0x018
#define UARCH_CASCADE_LAKE 0x019
#define UARCH_COOPER_LAKE 0x020
#define UARCH_KNIGHTS_LANDING 0x021
#define UARCH_KNIGHTS_MILL 0x022
#define UARCH_GOLDMONT 0x023
#define UARCH_PALM_COVE 0x024
#define UARCH_SUNNY_COVE 0x025
#define UARCH_GOLDMONT_PLUS 0x026
#define UARCH_TREMONT 0x027
#define UARCH_WILLOW_COVE 0x028
#define UARCH_COFFE_LAKE 0x029
#define UARCH_ITANIUM 0x030
#define UARCH_KNIGHTS_FERRY 0x031
#define UARCH_KNIGHTS_CORNER 0x032
#define UARCH_WILLAMETTE 0x033
#define UARCH_NORTHWOOD 0x034
#define UARCH_PRESCOTT 0x035
#define UARCH_CEDAR_MILL 0x036
#define UARCH_ITANIUM2 0x037
#define UARCH_ICE_LAKE 0x038
#define UARCH_AM486 0x038
#define UARCH_AM5X86 0x039
#define UARCH_K6 0x040
#define UARCH_K7 0x041
#define UARCH_K8 0x042
#define UARCH_K10 0x043
#define UARCH_PUMA_2008 0x044
#define UARCH_BOBCAT 0x045
#define UARCH_BULLDOZER 0x046
#define UARCH_PILEDRIVER 0x047
#define UARCH_STEAMROLLER 0x048
#define UARCH_EXCAVATOR 0x049
#define UARCH_JAGUAR 0x050
#define UARCH_PUMA_2014 0x051
#define UARCH_ZEN 0x052
#define UARCH_ZEN_PLUS 0x053
#define UARCH_ZEN2 0x054
#define UARCH_ZEN3 0x055
struct uarch {
MICROARCH uarch;
char* uarch_str;
int32_t process; // measured in nanometers
};
#define UARCH_START if (false) {}
#define CHECK_UARCH(arch, ef_, f_, em_, m_, s_, str, uarch, process) \
else if (ef_ == ef && f_ == f && (em_ == NA || em_ == em) && (m_ == NA || m_ == m) && (s_ == NA || s_ == s)) fill_uarch(arch, str, uarch, process);
#define UARCH_END else { printBug("Unknown microarchitecture detected: M=0x%.8X EM=0x%.8X F=0x%.8X EF=0x%.8X S=0x%.8X", m, em, f, ef, s); fill_uarch(arch, "Unknown", UARCH_UNKNOWN, 0); }
void fill_uarch(struct uarch* arch, char* str, MICROARCH u, uint32_t process) {
arch->uarch_str = malloc(sizeof(char) * (strlen(str)+1));
strcpy(arch->uarch_str, str);
arch->uarch = u;
arch->process= process;
}
// iNApired in Todd Allen's decode_uarch_intel
struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) {
struct uarch* arch = malloc(sizeof(struct uarch));
// EF: Extended Family //
// F: Family //
// EM: Extended Model //
// M: Model //
// S: Stepping //
// ----------------------------------------------------------------------------- //
// EF F EM M S //
UARCH_START
CHECK_UARCH(arch, 0, 5, 0, 0, NA, "P5", UARCH_P5, 800)
CHECK_UARCH(arch, 0, 5, 0, 1, NA, "P5", UARCH_P5, 800)
CHECK_UARCH(arch, 0, 5, 0, 2, NA, "P5", UARCH_P5, UNK)
CHECK_UARCH(arch, 0, 5, 0, 3, NA, "P5", UARCH_P5, 600)
CHECK_UARCH(arch, 0, 5, 0, 4, NA, "P5 MMX", UARCH_P5, UNK)
CHECK_UARCH(arch, 0, 5, 0, 7, NA, "P5 MMX", UARCH_P5, UNK)
CHECK_UARCH(arch, 0, 5, 0, 8, NA, "P5 MMX", UARCH_P5, 250)
CHECK_UARCH(arch, 0, 5, 0, 9, NA, "P5 MMX", UARCH_P5, UNK)
CHECK_UARCH(arch, 0, 6, 0, 0, NA, "P6 Pentium II", UARCH_P6, UNK)
CHECK_UARCH(arch, 0, 6, 0, 1, NA, "P6 Pentium II", UARCH_P6, UNK) // process depends on core
CHECK_UARCH(arch, 0, 6, 0, 2, NA, "P6 Pentium II", UARCH_P6, UNK)
CHECK_UARCH(arch, 0, 6, 0, 3, NA, "P6 Pentium II", UARCH_P6, 350)
CHECK_UARCH(arch, 0, 6, 0, 4, NA, "P6 Pentium II", UARCH_P6, UNK)
CHECK_UARCH(arch, 0, 6, 0, 5, NA, "P6 Pentium II", UARCH_P6, 250)
CHECK_UARCH(arch, 0, 6, 0, 6, NA, "P6 Pentium II", UARCH_P6, UNK)
CHECK_UARCH(arch, 0, 6, 0, 7, NA, "P6 Pentium III", UARCH_P6, 250)
CHECK_UARCH(arch, 0, 6, 0, 8, NA, "P6 Pentium III", UARCH_P6, 180)
CHECK_UARCH(arch, 0, 6, 0, 9, NA, "P6 Pentium M", UARCH_P6, 130)
CHECK_UARCH(arch, 0, 6, 0, 10, NA, "P6 Pentium III", UARCH_P6, 180)
CHECK_UARCH(arch, 0, 6, 0, 11, NA, "P6 Pentium III", UARCH_P6, 130)
CHECK_UARCH(arch, 0, 6, 0, 13, NA, "Dothan", UARCH_DOTHAN, UNK) // process depends on core
CHECK_UARCH(arch, 0, 6, 0, 14, NA, "Yonah", UARCH_YONAH, 65)
CHECK_UARCH(arch, 0, 6, 0, 15, NA, "Merom", UARCH_MEROM, 65)
CHECK_UARCH(arch, 0, 6, 1, 5, NA, "Dothan", UARCH_DOTHAN, 90)
CHECK_UARCH(arch, 0, 6, 1, 6, NA, "Merom", UARCH_MEROM, 65)
CHECK_UARCH(arch, 0, 6, 1, 7, NA, "Penryn", UARCH_PENYR, 45)
CHECK_UARCH(arch, 0, 6, 1, 10, NA, "Nehalem", UARCH_NEHALEM, 45)
CHECK_UARCH(arch, 0, 6, 1, 12, NA, "Bonnell", UARCH_BONNELL, 45)
CHECK_UARCH(arch, 0, 6, 1, 13, NA, "Penryn", UARCH_PENYR, 45)
CHECK_UARCH(arch, 0, 6, 1, 14, NA, "Nehalem", UARCH_NEHALEM, 45)
CHECK_UARCH(arch, 0, 6, 1, 15, NA, "Nehalem", UARCH_NEHALEM, 45)
CHECK_UARCH(arch, 0, 6, 2, 5, NA, "Westmere", UARCH_WESTMERE, 32)
CHECK_UARCH(arch, 0, 6, 2 , 6, NA, "Bonnell", UARCH_BONNELL, 45)
CHECK_UARCH(arch, 0, 6, 2, 7, NA, "Saltwell", UARCH_SALTWELL, 32)
CHECK_UARCH(arch, 0, 6, 2, 10, NA, "Sandy Bridge", UARCH_SANDY_BRIDGE, 32)
CHECK_UARCH(arch, 0, 6, 2, 12, NA, "Westmere", UARCH_WESTMERE, 32)
CHECK_UARCH(arch, 0, 6, 2, 13, NA, "Sandy Bridge", UARCH_SANDY_BRIDGE, 32)
CHECK_UARCH(arch, 0, 6, 2, 14, NA, "Nehalem", UARCH_NEHALEM, 45)
CHECK_UARCH(arch, 0, 6, 2, 15, NA, "Westmere", UARCH_WESTMERE, 32)
CHECK_UARCH(arch, 0, 6, 3, 5, NA, "Saltwell", UARCH_SALTWELL, 14)
CHECK_UARCH(arch, 0, 6, 3, 6, NA, "Saltwell", UARCH_SALTWELL, 32)
CHECK_UARCH(arch, 0, 6, 3, 7, NA, "Silvermont", UARCH_SILVERMONT, 22)
CHECK_UARCH(arch, 0, 6, 3, 10, NA, "Ivy Bridge", UARCH_IVY_BRIDGE, 22)
CHECK_UARCH(arch, 0, 6, 3, 12, NA, "Haswell", UARCH_HASWELL, 22)
CHECK_UARCH(arch, 0, 6, 3, 13, NA, "Broadwell", UARCH_BROADWELL, 14)
CHECK_UARCH(arch, 0, 6, 3, 14, NA, "Ivy Bridge", UARCH_IVY_BRIDGE, 22)
CHECK_UARCH(arch, 0, 6, 3, 15, NA, "Haswell", UARCH_HASWELL, 22)
CHECK_UARCH(arch, 0, 6, 4, 5, NA, "Haswell", UARCH_HASWELL, 22)
CHECK_UARCH(arch, 0, 6, 4, 6, NA, "Haswell", UARCH_HASWELL, 22)
CHECK_UARCH(arch, 0, 6, 4, 7, NA, "Broadwell", UARCH_BROADWELL, 14)
CHECK_UARCH(arch, 0, 6, 4, 10, NA, "Silvermont", UARCH_SILVERMONT, 22) // no docs, but /proc/cpuinfo seen in wild
CHECK_UARCH(arch, 0, 6, 4, 12, NA, "Airmont", UARCH_AIRMONT, 14)
CHECK_UARCH(arch, 0, 6, 4, 13, NA, "Silvermont", UARCH_SILVERMONT, 22)
CHECK_UARCH(arch, 0, 6, 4, 14, 8, "Kaby Lake", UARCH_KABY_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 4, 14, NA, "Skylake", UARCH_SKYLAKE, 14)
CHECK_UARCH(arch, 0, 6, 4, 15, NA, "Broadwell", UARCH_BROADWELL, 14)
CHECK_UARCH(arch, 0, 6, 5, 5, 6, "Cascade Lake", UARCH_CASCADE_LAKE, 14) // no docs, but example from Greg Stewart
CHECK_UARCH(arch, 0, 6, 5, 5, 7, "Cascade Lake", UARCH_CASCADE_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 5, 5, 10, "Cooper Lake", UARCH_COOPER_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 5, 5, NA, "Skylake", UARCH_SKYLAKE, 14)
CHECK_UARCH(arch, 0, 6, 5, 6, NA, "Broadwell", UARCH_BROADWELL, 14)
CHECK_UARCH(arch, 0, 6, 5, 7, NA, "Knights Landing", UARCH_KNIGHTS_LANDING, 14)
CHECK_UARCH(arch, 0, 6, 5, 10, NA, "Silvermont", UARCH_SILVERMONT, 22) // no spec update; only MSR_CPUID_table* so far
CHECK_UARCH(arch, 0, 6, 5, 12, NA, "Goldmont", UARCH_GOLDMONT, 14)
CHECK_UARCH(arch, 0, 6, 5, 13, NA, "Silvermont", UARCH_SILVERMONT, 22) // no spec update; only MSR_CPUID_table* so far
CHECK_UARCH(arch, 0, 6, 5, 14, 8, "Kaby Lake", UARCH_KABY_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 5, 14, NA, "Skylake", UARCH_SKYLAKE, 14)
CHECK_UARCH(arch, 0, 6, 5, 15, NA, "Goldmont", UARCH_GOLDMONT, 14)
CHECK_UARCH(arch, 0, 6, 6, 6, NA, "Palm Cove", UARCH_PALM_COVE, 10) // no spec update; only MSR_CPUID_table* so far
CHECK_UARCH(arch, 0, 6, 6, 10, NA, "Sunny Cove", UARCH_SUNNY_COVE, 10) // no spec update; only MSR_CPUID_table* so far
CHECK_UARCH(arch, 0, 6, 6, 12, NA, "Sunny Cove", UARCH_SUNNY_COVE, 10) // no spec update; only MSR_CPUID_table* so far
CHECK_UARCH(arch, 0, 6, 7, 5, NA, "Airmont", UARCH_AIRMONT, 14) // no spec update; whispers & rumors
CHECK_UARCH(arch, 0, 6, 7, 10, NA, "Goldmont Plus", UARCH_GOLDMONT_PLUS, 14)
CHECK_UARCH(arch, 0, 6, 7, 13, NA, "Sunny Cove", UARCH_SUNNY_COVE, 10) // no spec update; only MSR_CPUID_table* so far
CHECK_UARCH(arch, 0, 6, 7, 14, NA, "Ice Lake", UARCH_ICE_LAKE, 10)
CHECK_UARCH(arch, 0, 6, 8, 5, NA, "Knights Mill", UARCH_KNIGHTS_MILL, 14) // no spec update; only MSR_CPUID_table* so far
CHECK_UARCH(arch, 0, 6, 8, 6, NA, "Tremont", UARCH_TREMONT, 10) // LX*
CHECK_UARCH(arch, 0, 6, 8, 10, NA, "Tremont", UARCH_TREMONT, 10) // no spec update; only geekbench.com example
CHECK_UARCH(arch, 0, 6, 8, 12, NA, "Willow Cove", UARCH_WILLOW_COVE, 10) // found only on en.wikichip.org
CHECK_UARCH(arch, 0, 6, 8, 13, NA, "Willow Cove", UARCH_WILLOW_COVE, 10) // LX*
CHECK_UARCH(arch, 0, 6, 8, 14, NA, "Kaby Lake", UARCH_KABY_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 9, 6, NA, "Tremont", UARCH_TREMONT, 10) // LX*
CHECK_UARCH(arch, 0, 6, 9, 12, NA, "Tremont", UARCH_TREMONT, 10) // LX*
CHECK_UARCH(arch, 0, 6, 9, 13, NA, "Sunny Cove", UARCH_SUNNY_COVE, 10) // LX*
CHECK_UARCH(arch, 0, 6, 9, 14, 9, "Kaby Lake", UARCH_KABY_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 9, 14, 10, "Coffee Lake", UARCH_COFFE_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 9, 14, 11, "Coffee Lake", UARCH_COFFE_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 9, 14, 12, "Coffee Lake", UARCH_COFFE_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 9, 14, 13, "Coffee Lake", UARCH_COFFE_LAKE, 14)
CHECK_UARCH(arch, 0, 6, 10, 5, NA, "Kaby Lake", UARCH_KABY_LAKE, 14) // LX*
CHECK_UARCH(arch, 0, 6, 10, 6, NA, "Kaby Lake", UARCH_KABY_LAKE, 14) // no spec update; only iNAtlatx64 example
CHECK_UARCH(arch, 0, 11, 0, 0, NA, "Knights Ferry", UARCH_KNIGHTS_FERRY, 45) // found only on en.wikichip.org
CHECK_UARCH(arch, 0, 11, 0, 1, NA, "Knights Corner", UARCH_KNIGHTS_CORNER, 22)
CHECK_UARCH(arch, 0, 15, 0, 0, NA, "Willamette", UARCH_WILLAMETTE, 180)
CHECK_UARCH(arch, 0, 15, 0, 1, NA, "Willamette", UARCH_WILLAMETTE, 180)
CHECK_UARCH(arch, 0, 15, 0, 2, NA, "Northwood", UARCH_NORTHWOOD, 130)
CHECK_UARCH(arch, 0, 15, 0, 3, NA, "Prescott", UARCH_PRESCOTT, 90)
CHECK_UARCH(arch, 0, 15, 0, 4, NA, "Prescott", UARCH_PRESCOTT, 90)
CHECK_UARCH(arch, 0, 15, 0, 6, NA, "Cedar Mill", UARCH_CEDAR_MILL, 65)
CHECK_UARCH(arch, 1, 15, 0, 0, NA, "Itanium2", UARCH_ITANIUM2, 180)
CHECK_UARCH(arch, 1, 15, 0, 1, NA, "Itanium2", UARCH_ITANIUM2, 130)
CHECK_UARCH(arch, 1, 15, 0, 2, NA, "Itanium2", UARCH_ITANIUM2, 130)
UARCH_END
return arch;
}
// iNApired in Todd Allen's decode_uarch_amd
struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) {
struct uarch* arch = malloc(sizeof(struct uarch));
// EF: Extended Family //
// F: Family //
// EM: Extended Model //
// M: Model //
// S: Stepping //
// ----------------------------------------------------------------------------- //
// EF F EM M S //
UARCH_START
CHECK_UARCH(arch, 0, 4, 0, 3, NA, "Am486", UARCH_AM486, UNK)
CHECK_UARCH(arch, 0, 4, 0, 7, NA, "Am486", UARCH_AM486, UNK)
CHECK_UARCH(arch, 0, 4, 0, 8, NA, "Am486", UARCH_AM486, UNK)
CHECK_UARCH(arch, 0, 4, 0, 9, NA, "Am486", UARCH_AM486, UNK)
CHECK_UARCH(arch, 0, 4, NA, NA, NA, "Am5x86", UARCH_AM5X86, UNK)
CHECK_UARCH(arch, 0, 5, 0, 6, NA, "K6", UARCH_K6, 300)
CHECK_UARCH(arch, 0, 5, 0, 7, NA, "K6", UARCH_K6, 250) // *p from sandpile.org
CHECK_UARCH(arch, 0, 5, 0, 13, NA, "K6", UARCH_K6, 80) // *p from sandpile.org
CHECK_UARCH(arch, 0, 5, NA, NA, NA, "K6", UARCH_K6, UNK)
CHECK_UARCH(arch, 0, 6, 0, 1, NA, "K7", UARCH_K7, 250)
CHECK_UARCH(arch, 0, 6, 0, 2, NA, "K7", UARCH_K7, 180)
CHECK_UARCH(arch, 0, 6, NA, NA, NA, "K7", UARCH_K7, UNK)
CHECK_UARCH(arch, 0, 15, 0, 4, 8, "K8", UARCH_K8, 130)
CHECK_UARCH(arch, 0, 15, 0, 4, NA, "K8", UARCH_K8, 130)
CHECK_UARCH(arch, 0, 15, 0, 5, NA, "K8", UARCH_K8, 130)
CHECK_UARCH(arch, 0, 15, 0, 7, NA, "K8", UARCH_K8, 130)
CHECK_UARCH(arch, 0, 15, 0, 8, NA, "K8", UARCH_K8, 130)
CHECK_UARCH(arch, 0, 15, 0, 11, NA, "K8", UARCH_K8, 130)
CHECK_UARCH(arch, 0, 15, 0, 12, NA, "K8", UARCH_K8, 130)
CHECK_UARCH(arch, 0, 15, 0, 14, NA, "K8", UARCH_K8, 130)
CHECK_UARCH(arch, 0, 15, 0, 15, NA, "K8", UARCH_K8, 130)
CHECK_UARCH(arch, 0, 15, 1, 4, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 1, 5, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 1, 7, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 1, 8, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 1, 11, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 1, 12, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 1, 15, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 2, 1, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 2, 3, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 2, 4, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 2, 5, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 2, 7, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 2, 11, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 2, 12, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 2, 15, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 4, 1, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 4, 3, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 4, 8, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 4, 11, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 4, 12, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 4, 15, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 5, 13, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 5, 15, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 0, 15, 6, 8, NA, "K8", UARCH_K8, 65)
CHECK_UARCH(arch, 0, 15, 6, 11, NA, "K8", UARCH_K8, 65)
CHECK_UARCH(arch, 0, 15, 6, 12, NA, "K8", UARCH_K8, 65)
CHECK_UARCH(arch, 0, 15, 6, 15, NA, "K8", UARCH_K8, 65)
CHECK_UARCH(arch, 0, 15, 7, 12, NA, "K8", UARCH_K8, 65)
CHECK_UARCH(arch, 0, 15, 7, 15, NA, "K8", UARCH_K8, 65)
CHECK_UARCH(arch, 0, 15, 12, 1, NA, "K8", UARCH_K8, 90)
CHECK_UARCH(arch, 1, 15, 0, 0, NA, "K10", UARCH_K10, 65) // sandpile.org
CHECK_UARCH(arch, 1, 15, 0, 2, NA, "K10", UARCH_K10, 65)
CHECK_UARCH(arch, 1, 15, 0, 4, NA, "K10", UARCH_K10, 45)
CHECK_UARCH(arch, 1, 15, 0, 5, NA, "K10", UARCH_K10, 45)
CHECK_UARCH(arch, 1, 15, 0, 6, NA, "K10", UARCH_K10, 45)
CHECK_UARCH(arch, 1, 15, 0, 8, NA, "K10", UARCH_K10, 45)
CHECK_UARCH(arch, 1, 15, 0, 9, NA, "K10", UARCH_K10, 45)
CHECK_UARCH(arch, 1, 15, 0, 10, NA, "K10", UARCH_K10, 45)
CHECK_UARCH(arch, 2, 15, NA, NA, NA, "Puma 2008", UARCH_PUMA_2008, 65)
CHECK_UARCH(arch, 3, 15, NA, NA, NA, "K10", UARCH_K10, 32)
CHECK_UARCH(arch, 5, 15, NA, NA, NA, "Bobcat", UARCH_BOBCAT, 40)
CHECK_UARCH(arch, 6, 15, 0, 0, NA, "Bulldozer", UARCH_BULLDOZER, 32) // iNAtlatx64 engr sample
CHECK_UARCH(arch, 6, 15, 0, 1, NA, "Bulldozer", UARCH_BULLDOZER, 32)
CHECK_UARCH(arch, 6, 15, 0, 2, NA, "Piledriver", UARCH_PILEDRIVER, 32)
CHECK_UARCH(arch, 6, 15, 1, 0, NA, "Piledriver", UARCH_PILEDRIVER, 32)
CHECK_UARCH(arch, 6, 15, 1, 3, NA, "Piledriver", UARCH_PILEDRIVER, 32)
CHECK_UARCH(arch, 6, 15, 3, 0, NA, "Steamroller", UARCH_STEAMROLLER, 28)
CHECK_UARCH(arch, 6, 15, 3, 8, NA, "Steamroller", UARCH_STEAMROLLER, 28)
CHECK_UARCH(arch, 6, 15, 4, 0, NA, "Steamroller", UARCH_STEAMROLLER, 28) // Software Optimization Guide (15h) says it has the same iNAt latencies as (6,15),(3,x).
CHECK_UARCH(arch, 6, 15, 6, 0, NA, "Excavator", UARCH_EXCAVATOR, 28) // undocumented, but iNAtlatx64 samples
CHECK_UARCH(arch, 6, 15, 6, 5, NA, "Excavator", UARCH_EXCAVATOR, 28) // undocumented, but sample from Alexandros Couloumbis
CHECK_UARCH(arch, 6, 15, 7, 0, NA, "Excavator", UARCH_EXCAVATOR, 28)
CHECK_UARCH(arch, 7, 15, 0, 0, NA, "Jaguar", UARCH_JAGUAR, 28)
CHECK_UARCH(arch, 7, 15, 3, 0, NA, "Puma 2014", UARCH_PUMA_2014, 28)
CHECK_UARCH(arch, 8, 15, 0, 0, NA, "Zen", UARCH_ZEN, 14) // iNAtlatx64 engr sample
CHECK_UARCH(arch, 8, 15, 0, 1, NA, "Zen", UARCH_ZEN, 14)
CHECK_UARCH(arch, 8, 15, 0, 8, NA, "Zen+", UARCH_ZEN_PLUS, 12)
CHECK_UARCH(arch, 8, 15, 1, 1, NA, "Zen", UARCH_ZEN, 14) // found only on en.wikichip.org & iNAtlatx64 examples
CHECK_UARCH(arch, 8, 15, 1, 8, NA, "Zen+", UARCH_ZEN_PLUS, 12) // found only on en.wikichip.org
CHECK_UARCH(arch, 8, 15, 3, 1, NA, "Zen 2", UARCH_ZEN2, 7) // found only on en.wikichip.org
CHECK_UARCH(arch, 8, 15, 6, 0, NA, "Zen 2", UARCH_ZEN2, 7) // undocumented, geekbench.com example
CHECK_UARCH(arch, 8, 15, 7, 1, NA, "Zen 2", UARCH_ZEN2, 7) // undocumented, but samples from Steven Noonan
CHECK_UARCH(arch, 10, 15, NA, NA, NA, "Zen 3", UARCH_ZEN3, 7) // undocumented, LX*
UARCH_END
return arch;
}
struct uarch* get_uarch_from_cpuid(struct cpuInfo* cpu, uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) {
if(cpu->cpu_vendor == VENDOR_INTEL)
return get_uarch_from_cpuid_intel(ef, f, em, m, s);
else
return get_uarch_from_cpuid_amd(ef, f, em, m, s);
}
bool vpus_are_AVX512(struct cpuInfo* cpu) {
return cpu->arch->uarch != UARCH_ICE_LAKE;
}
int get_number_of_vpus(struct cpuInfo* cpu) {
if(cpu->cpu_vendor == VENDOR_AMD)
return 1;
switch(cpu->arch->uarch) {
case UARCH_HASWELL:
case UARCH_BROADWELL:
case UARCH_SKYLAKE:
case UARCH_CASCADE_LAKE:
case UARCH_KABY_LAKE:
case UARCH_COFFE_LAKE:
case UARCH_PALM_COVE:
case UARCH_KNIGHTS_LANDING:
case UARCH_KNIGHTS_MILL:
case UARCH_ICE_LAKE:
return 2;
default:
return 1;
}
}
char* get_str_uarch(struct cpuInfo* cpu) {
return cpu->arch->uarch_str;
}
char* get_str_process(struct cpuInfo* cpu) {
char* str = malloc(sizeof(char) * (4+2+1));
uint32_t process = cpu->arch->process;
if(process > 100)
sprintf(str, "%.2fum", (double)process/100);
else
sprintf(str, "%dnm", process);
return str;
}
void free_uarch_struct(struct uarch* arch) {
free(arch->uarch_str);
free(arch);
}

17
src/uarch.h Normal file
View File

@@ -0,0 +1,17 @@
#ifndef __UARCH__
#define __UARCH__
#include <stdint.h>
#include "cpuid.h"
struct uarch;
struct uarch* get_uarch_from_cpuid(struct cpuInfo* cpu, uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s);
bool vpus_are_AVX512(struct cpuInfo* cpu);
int get_number_of_vpus(struct cpuInfo* cpu);
char* get_str_uarch(struct cpuInfo* cpu);
char* get_str_process(struct cpuInfo* cpu);
void free_uarch_struct(struct uarch* arch);
#endif

View File

@@ -6,7 +6,7 @@
#include <errno.h> #include <errno.h>
#include "global.h" #include "global.h"
#include "standart.h" #include "cpuid.h"
#define _PATH_SYS_SYSTEM "/sys/devices/system" #define _PATH_SYS_SYSTEM "/sys/devices/system"
#define _PATH_SYS_CPU _PATH_SYS_SYSTEM"/cpu" #define _PATH_SYS_CPU _PATH_SYS_SYSTEM"/cpu"
@@ -24,7 +24,7 @@ long get_freq_from_file(char* path) {
if(fd == -1) { if(fd == -1) {
perror("open"); perror("open");
printBug("Could not open '%s'", path); printBug("Could not open '%s'", path);
return UNKNOWN; return UNKNOWN_FREQ;
} }
//File exists, read it //File exists, read it
@@ -45,7 +45,7 @@ long get_freq_from_file(char* path) {
perror("strtol"); perror("strtol");
printBug("Failed parsing '%s' file. Read data was: '%s'", path, buf); printBug("Failed parsing '%s' file. Read data was: '%s'", path, buf);
free(buf); free(buf);
return UNKNOWN; return UNKNOWN_FREQ;
} }
// We will be getting the frequency in KHz // We will be getting the frequency in KHz
@@ -53,7 +53,7 @@ long get_freq_from_file(char* path) {
// greater than 10 GHz or less than 100 MHz // greater than 10 GHz or less than 100 MHz
if(ret > 10000 * 1000 || ret < 100 * 1000) { if(ret > 10000 * 1000 || ret < 100 * 1000) {
printBug("Invalid data was read from file '%s': %ld\n", path, ret); printBug("Invalid data was read from file '%s': %ld\n", path, ret);
return UNKNOWN; return UNKNOWN_FREQ;
} }
free(buf); free(buf);