Compare commits

...

48 Commits
v0.98 ... v0.99

Author SHA1 Message Date
Dr-Noob
cae701dbd1 [v0.99] Update version 2021-08-07 11:29:08 +02:00
Dr-Noob
d0ec0d8c0f [v0.98][Refactoring] Simplify parse_color 2021-08-07 10:47:15 +02:00
Dr-Noob
5737f1ecaf [v0.98][Refactoring] Do not use hv_present in get_freq_from_file 2021-08-07 10:38:32 +02:00
Dr-Noob
fba69daee0 [v0.98][Refactoring] Simplify x86 get_str_topology 2021-08-07 10:27:41 +02:00
Dr-Noob
2e08b10652 [v0.98][Refactoring] Use array of colors instead of fixed structure of colors in args 2021-08-07 10:01:34 +02:00
Dr-Noob
a03f296390 [v0.98] Fix bug in get_str_peak_performance and always show unknown pp when freq is also unknown 2021-08-07 08:58:49 +02:00
Dr-Noob
2b21326167 [v0.98][Refactoring] Use printWarn + strerror(errno) instead of perror. Use fallback in ppc in case total_cores cannot be retrieved 2021-08-07 08:45:37 +02:00
Dr-Noob
c24dd7cbb6 [v0.98][Refactoring] Use int for peak performance, which makes code cleaner 2021-08-06 11:04:29 +02:00
Dr-Noob
6953d8dda5 [v0.98][Refactoring] Unify the use of get_str_peak_performance 2021-08-06 10:26:07 +02:00
Dr-Noob
7e1dde3c71 [v0.98][PPC] Check if udev functions failed 2021-08-06 09:38:04 +02:00
Dr-Noob
44d4b3b553 [v0.98][Refactoring] Unify the use of init_topology_struct and init_cache_struct 2021-08-05 20:01:32 +02:00
Dr-Noob
6ab6afc974 [v0.98][Refactoring] Unify the use of unknown string 2021-08-05 19:07:09 +02:00
Dr-Noob
6e8a9612ad [v0.98] Fix little bug in get_str_cache_two (spotted by #90) and simplfy get_str_cache_one 2021-08-05 16:16:16 +02:00
Dr-Noob
ee57646f9e [v0.98][PPC] Update ppc peak performance taking into account slices in POWER9 2021-08-05 15:47:18 +02:00
Dr-Noob
921e815470 [v0.98][PPC] Add part number detection using linux device tree 2021-08-05 10:06:16 +02:00
Dr-Noob
bcdd5267b2 [v0.98][PPC] Fix ppc compilation after adding emalloc wrapper 2021-08-05 09:19:50 +02:00
Dr-Noob
6b6f8f504f [v0.98][PPC] Improve ppc detection arch in Makefile as suggested by a macrumors user 2021-08-05 09:17:41 +02:00
Dr-Noob
c4f6ba7c55 [v0.98] Fix compilation in different platforms 2021-08-04 23:33:44 +02:00
Dr-Noob
051a37862c [v0.98] Add compiler flags (in a different target, "strict") to detect programming errors, as suggested by #90 and #76 2021-08-04 23:17:12 +02:00
Dr-Noob
eac97bf721 [v0.98] Use malloc/calloc wrapper that exits when alloc fails, as suggested by #90 2021-08-04 10:01:32 +02:00
Dr-Noob
3a636c101b [v0.98] Use unsigned integers in bit operations as suggested by #76 2021-08-03 23:54:49 +02:00
Dr-Noob
c0263c0378 [v0.98][PPC] Fix bug in which altivec was not detected for POWER9 2021-08-03 13:43:34 +02:00
Dr-Noob
7802505c19 [v0.98][PPC] Add stepping for POWER9 CPUs 2021-08-02 09:08:08 +02:00
Dr-Noob
868903638d [v0.98] Add PPC to supported architectures in README. Add basic programming documentation for PPC 2021-08-02 08:50:26 +02:00
Dr-Noob
aa7eaa882f [v0.98][PPC] Various fixes. Implement debug option 2021-07-31 23:46:29 +02:00
Dr-Noob
55df725e38 [v0.98][PPC] Forgot to use cache level in get_num_caches_by_level 2021-07-31 23:24:12 +02:00
Dr-Noob
f744b72e27 [v0.98][PPC] Retrieve num caches from udev instead of guessing 2021-07-31 23:18:38 +02:00
Dr-Noob
18744c69f7 [v0.98][PPC] Dont display name (it was always unknown anyway) 2021-07-31 18:30:31 +02:00
Dr-Noob
2180fb1c26 [v0.98][PPC] Add cache detection using udev and use it for ppc 2021-07-31 18:26:47 +02:00
Dr-Noob
4d1d14d2a7 [v0.98][PPC] Add altivec detection and peak performance output 2021-07-31 17:43:02 +02:00
Dr-Noob
faac972107 [v0.98][PPC] Add max frequency detection 2021-07-31 17:01:06 +02:00
Dr-Noob
d953d9a4f0 [v0.98][PPC] Relation between uarch and process and str. Added a few more uarchs 2021-07-31 16:42:16 +02:00
Dr-Noob
53fa2511b9 [v0.98][PPC] Obtain microarchitecture using pvr (better!) 2021-07-31 15:58:57 +02:00
Dr-Noob
9b483d2db5 [v0.98][PPC] Obtain microarchitecture using getauxval 2021-07-31 09:50:38 +02:00
Dr-Noob
af22b2e186 [v0.98][PPC] Added IBM color scheme 2021-07-27 22:23:39 +02:00
Dr-Noob
897d05e976 [v0.98][PPC] Added bars to the IBM logo 2021-07-27 22:12:13 +02:00
Dr-Noob
8ba5b66983 [v0.98][PPC] Add IBM ascii art 2021-07-27 22:06:37 +02:00
Dr-Noob
3870527732 [v0.98][PPC] Refactor PowerPC udev functions 2021-07-27 21:37:13 +02:00
Dr-Noob
135cc9d504 [v0.98][PPC] Basic support for topology detection 2021-07-27 21:34:08 +02:00
Dr-Noob
f4aa335af1 [v0.98][PPC] Start PowerPC port. It just compiles but nothing is displayed 2021-07-27 20:26:17 +02:00
Dr-Noob
7afb6fd0fe [v0.98] Add exynosXXXX string to exynos SoCs detection 2021-07-27 11:15:04 +02:00
Dr-Noob
bb502250c6 [v0.98] Update ryzen uarch table. Add bash script to decode CPUID 2021-07-27 10:06:59 +02:00
Dr-Noob
5ae8db272d [v0.98] Remove schedule from action to avoid unnecessary workflow runs and fix README ToC 2021-07-26 13:10:19 +02:00
Dr-Noob
cb49d4bbab [v0.98] Disable PR in github 2021-07-26 13:06:47 +02:00
Dr-Noob
15035b9423 [v0.98] Add FreeBSD support to README 2021-06-20 23:29:25 +02:00
Dr-Noob
c1a029e26f [v0.98] Merge RPi branch to fix SoC detection issue 2021-06-20 23:15:24 +02:00
Dr-Noob
5c3f49c580 [v0.98] Patch to fix the compilation error reported by #93 2021-06-19 00:03:13 +02:00
Dr-Noob
d8dbbc8dd8 [v0.98] Detect RPi SoC using revision codes, according to #91 2021-06-16 16:01:25 +01:00
37 changed files with 2123 additions and 1025 deletions

36
.github/workflows/lockdown.yml vendored Normal file
View File

@@ -0,0 +1,36 @@
name: 'Disable PR in cpufetch'
on:
issues:
types: opened
pull_request:
types: opened
permissions:
issues: write
pull-requests: write
jobs:
action:
runs-on: ubuntu-latest
steps:
- uses: dessant/repo-lockdown@v2
with:
github-token: ${{ github.token }}
exclude-issue-created-before: ''
exclude-issue-labels: ''
issue-labels: ''
issue-comment: ''
skip-closed-issue-comment: false
close-issue: false
lock-issue: true
issue-lock-reason: ''
exclude-pr-created-before: ''
exclude-pr-labels: ''
pr-labels: ''
pr-comment: 'cpufetch does not accept pull requests, see [the contributing guidelines](https://github.com/Dr-Noob/cpufetch/blob/master/CONTRIBUTING.md) for details'
skip-closed-pr-comment: false
close-pr: true
lock-pr: false
pr-lock-reason: ''
process-only: 'prs'

View File

@@ -3,49 +3,50 @@
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
- [Creating a pull request](#creating-a-pull-request)
- [Case 1: I found a bug in cpufetch](#case-1-i-found-a-bug-in-cpufetch)
- [Case 2: I have an idea for a new feature in cpufetch / I want to suggest a change in cpufetch](#case-2-i-have-an-idea-for-a-new-feature-in-cpufetch--i-want-to-suggest-a-change-in-cpufetch)
- [Case 3: I want to make changes to the Makefile](#case-3-i-want-to-make-changes-to-the-makefile)
- [Creating an issue](#creating-an-issue)
- [cpufetch fails / crashes with a segmentation fault / ends without any output](#cpufetch-fails--crashes-with-a-segmentation-fault--ends-without-any-output)
- [Option 1 (best)](#option-1-best)
- [Option 2 (use this option if you can't work with option 1)](#option-2-use-this-option-if-you-cant-work-with-option-1)
- [1. cpufetch does not accept pull requests](#1-cpufetch-does-not-accept-pull-requests)
- [2. Creating an issue](#2-creating-an-issue)
- [2.1: I found a bug in cpufetch (the program provides incorrect / invalid information)](#21-i-found-a-bug-in-cpufetch-the-program-provides-incorrect--invalid-information)
- [2.2: I found a bug in cpufetch (the program crashes / does not work properly)](#22-i-found-a-bug-in-cpufetch-the-program-crashes--does-not-work-properly)
- [Stacktrace option 1 (best)](#stacktrace-option-1-best)
- [Stacktrace option 2 (use this option if option 1 does not work)](#stacktrace-option-2-use-this-option-if-option-1-does-not-work)
- [2.3: I have an idea for a new feature in cpufetch / I want to suggest a change in cpufetch](#23-i-have-an-idea-for-a-new-feature-in-cpufetch--i-want-to-suggest-a-change-in-cpufetch)
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
Thanks for your interest in contributing to cpufetch! Please, read this page carefully to understand how to contribute to cpufetch.
## Creating a pull request
## 1. cpufetch does not accept pull requests
cpufetch is a small project, and I enjoy developing it. There are for sure some bugs and exciting features to add, but I prefer to make these
changes myself. For that reason, you should always use the issues page to report anything related to cpufetch. In the rare case that there is
a concise bug or feature that I am unable to implement myself, I will enable pull requests for this.
Thanks for your interest in contributing to cpufetch!
## 2. Creating an issue
What kind of contribution are you going to propose?
### 2.1: I found a bug in cpufetch (the program provides incorrect / invalid information)
In the github issue **you must include**:
- Exact CPU model.
- Operating system.
- The output of `cpufetch`.
- The output of `cpufetch --debug`.
### Case 1: I found a bug in cpufetch
If you found a bug, please don't open a pull request; open an issue instead, even if you know the solution. I appreciate people finding bugs, but I generally prefer to fix them myself.
### 2.2: I found a bug in cpufetch (the program crashes / does not work properly)
- Exact CPU model.
- Operating system.
- The output of `cpufetch`.
- The output of `cpufetch --debug`.
- A stacktrace (if program crashes):
### Case 2: I have an idea for a new feature in cpufetch / I want to suggest a change in cpufetch
Great! Make a PR and make sure to explain how did you implement your new feature on the github conversation page.
### Case 3: I want to make changes to the Makefile
Don't open a pull request; open an issue instead and suggest your changes in the Makefile there. Except for extraordinary cases, I will not accept changes in the Makefile.
## Creating an issue
If you are going to report a bug or problem, always report the CPU model and OS. If possible, also paste the output of `cpufetch` and `cpufetch --debug`.
### cpufetch fails / crashes with a segmentation fault / ends without any output
You need to provide additional information in the github issue:
#### Option 1 (best)
#### Stacktrace option 1 (best)
1. Build cpufetch with debug symbols (`make clean; make debug`).
2. Install valgrind (if it is not already installed)
3. Run cpufetch with valgrind (`valgrind ./cpufetch`)
4. Paste the complete output (preferably on a platform like pastebin)
#### Option 2 (use this option if you can't work with option 1)
#### Stacktrace option 2 (use this option if option 1 does not work)
1. Build cpufetch with debug symbols (`make clean; make debug`).
2. Install gdb (if it is not already installed)
3. Debug cpufetch with gdb (`gdb cpufetch`)
3. Run cpufetch (just r inside gdb console)
4. Paste the complete output (preferably on a platform like pastebin)
### 2.3: I have an idea for a new feature in cpufetch / I want to suggest a change in cpufetch
Just explain the feature in the issue and include references (links) to relevant sources if appropriate.

View File

@@ -1,6 +1,6 @@
CC ?= gcc
CFLAGS+=-Wall -Wextra -pedantic -fstack-protector-all -pedantic -std=c99
CFLAGS+=-Wall -Wextra -pedantic -fstack-protector-all -pedantic
SANITY_FLAGS=-Wfloat-equal -Wshadow -Wpointer-arith
PREFIX ?= /usr
@@ -12,16 +12,22 @@ COMMON_HDR = $(SRC_COMMON)ascii.h $(SRC_COMMON)cpu.h $(SRC_COMMON)udev.h $(SRC_C
ifneq ($(OS),Windows_NT)
arch := $(shell uname -m)
ifeq ($(arch), $(filter $(arch), x86_64 i686))
ifeq ($(arch), $(filter $(arch), x86_64 amd64 i686))
SRC_DIR=src/x86/
SOURCE += $(COMMON_SRC) $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)uarch.c
HEADERS += $(COMMON_HDR) $(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)uarch.h
CFLAGS += -DARCH_X86
CFLAGS += -DARCH_X86 -std=c99
else ifeq ($(arch), $(filter $(arch), ppc64le ppc64 ppcle ppc))
SRC_DIR=src/ppc/
SOURCE += $(COMMON_SRC) $(SRC_DIR)ppc.c $(SRC_DIR)uarch.c $(SRC_DIR)udev.c
HEADERS += $(COMMON_HDR) $(SRC_DIR)ppc.h $(SRC_DIR)uarch.h $(SRC_DIR)udev.c
CFLAGS += -DARCH_PPC -std=gnu99
else
# Assume ARM
SRC_DIR=src/arm/
SOURCE += $(COMMON_SRC) $(SRC_DIR)midr.c $(SRC_DIR)uarch.c $(SRC_DIR)soc.c $(SRC_DIR)udev.c
HEADERS += $(COMMON_HDR) $(SRC_DIR)midr.h $(SRC_DIR)uarch.h $(SRC_DIR)soc.h $(SRC_DIR)udev.c $(SRC_DIR)socs.h
CFLAGS += -DARCH_ARM -Wno-unused-parameter
CFLAGS += -DARCH_ARM -Wno-unused-parameter -std=c99
endif
OUTPUT=cpufetch
@@ -31,7 +37,7 @@ else
SOURCE += $(COMMON_SRC) $(SRC_DIR)cpuid.c $(SRC_DIR)apic.c $(SRC_DIR)cpuid_asm.c $(SRC_DIR)uarch.c
HEADERS += $(COMMON_HDR) $(SRC_DIR)cpuid.h $(SRC_DIR)apic.h $(SRC_DIR)cpuid_asm.h $(SRC_DIR)uarch.h
CFLAGS += -DARCH_X86
SANITY_FLAGS += -Wno-pedantic-ms-format
SANITY_FLAGS += -Wno-pedantic-ms-format -std=c99
OUTPUT=cpufetch.exe
endif
@@ -44,6 +50,9 @@ debug: $(OUTPUT)
static: CFLAGS += -static -O3
static: $(OUTPUT)
strict: CFLAGS += -O3 -Werror -fsanitize=undefined -D_FORTIFY_SOURCE=2
strict: $(OUTPUT)
$(OUTPUT): Makefile $(SOURCE) $(HEADERS)
$(CC) $(CFLAGS) $(SANITY_FLAGS) $(SOURCE) -o $(OUTPUT)

View File

@@ -23,7 +23,7 @@
- [1. Support](#1-support)
- [2. Installation](#2-installation)
- [2.1 Available packages](#21-available-packages)
- [2.1 Installing from a package](#21-installing-from-a-package)
- [2.2 Building from source (Linux/Windows/macOS)](#22-building-from-source-linuxwindowsmacos)
- [2.3 Android](#23-android)
- [3. Examples](#3-examples)
@@ -32,18 +32,24 @@
- [4. Colors and style](#4-colors-and-style)
- [5. Implementation](#5-implementation)
- [6. Bugs or improvements](#6-bugs-or-improvements)
- [7. Acknowledgements](#7-acknowledgements)
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
# 1. Support
cpufetch supports x86, x86_64 (Intel and AMD) and ARM.
| Platform | x86_64 | ARM | Notes |
|:---------:|:------------------------:|:-------------------:|:-----------------:|
| GNU/Linux | :heavy_check_mark: | :heavy_check_mark: | Best support |
| Windows | :heavy_check_mark: | :x: | Some information may be missing. <br> Colors will be used if supported |
| Android | :heavy_exclamation_mark: | :heavy_check_mark: | Some information may be missing. <br> Not tested under x86_64 |
| macOS | :heavy_check_mark: | :x: | Some information may be missing. <br> Apple M1 support may be added <br> in the future (see [#47](https://github.com/Dr-Noob/cpufetch/issues/47))|
cpufetch supports the following architectures:
- x86 / x86_64
- ARM
- PowerPC
| OS | x86_64 / x86 | ARM | PowerPC | Notes |
|:---------:|:------------------------:|:-------------------:|:------------------:|:----------:|
| GNU/Linux | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | Best support |
| Windows | :heavy_check_mark: | :x: | :x: | Some information may be missing. <br> Colors will be used if supported |
| Android | :heavy_exclamation_mark: | :heavy_check_mark: | :x: | Some information may be missing. <br> Not tested under x86_64 |
| macOS | :heavy_check_mark: | :x: | :x: | Some information may be missing. <br> Apple M1 support may be added <br> in the future (see [#47](https://github.com/Dr-Noob/cpufetch/issues/47))|
| FreeBSD | :heavy_check_mark: | :x: | :x: | Some information may be missing. |
# 2. Installation
## 2.1 Installing from a package
@@ -102,10 +108,12 @@ specifying Intel or AMD, or specifying the colors in RGB format:
In the case of setting the colors using RGB, 4 colors must be given in with the format: ``[R,G,B:R,G,B:R,G,B:R,G,B]``. These colors correspond to CPU art color (2 colors) and for the text colors (following 2). Thus, you can customize all the colors.
# 5. Implementation
See [cpufetch programming documentation](https://github.com/Dr-Noob/cpufetch/blob/master/doc/README.md).
See [cpufetch programming documentation](https://github.com/Dr-Noob/cpufetch/doc/README.md).
# 6. Bugs or improvements
There are many open issues in github (see [issues](https://github.com/Dr-Noob/cpufetch/issues)). Feel free to open a new one report an issue or propose any improvement in `cpufetch`
See [cpufetch contributing guidelines](https://github.com/Dr-Noob/cpufetch/CONTRIBUTING.md)
I would like to thank [Gonzalocl](https://github.com/Gonzalocl) and [OdnetninI](https://github.com/OdnetninI) for their help, running `cpufetch` in many different CPUs they have access to, which makes it easier to debug and check the correctness of `cpufetch`.
# 7. Acknowledgements
Special thanks to [Gonzalocl](https://github.com/Gonzalocl) and [OdnetninI](https://github.com/OdnetninI). They tested `cpufetch` in its beginnings in many different CPUs they have access to, which made it easier to debug and check the correctness of `cpufetch`.
Special thanks too to the fellow contributors and interested people in the project!

View File

@@ -37,6 +37,9 @@ Use Intel default color scheme
* "amd":
Use AMD default color scheme
.TP
* "ibm":
Use IBM default color scheme
.TP
* "arm":
Use ARM default color scheme
.TP

26
doc/DOCUMENTATION_PPC.md Normal file
View File

@@ -0,0 +1,26 @@
### 2. How to get CPU microarchitecture?
__Involved code: [get_uarch_from_pvr (uarch.c)](https://github.com/Dr-Noob/cpufetch/src/ppc/uarch.c)__
Microarchitecture is deduced from the PVR register, which is read using the `mfpvr` instruction. The correspondence between the PVR and the specific microarchitecture has been implemented using the values in `arch/powerpc/kernel/cputable.c` in the Linux kernel. Some of them have been removed. The manufacturing process has been queried by searching on the internet.
### 3. How to get CPU topology?
__Involved code: [get_topology_info (ppc.c)](https://github.com/Dr-Noob/cpufetch/src/ppc/ppc.c)__
The total number of cores is queried using `sysconf(_SC_NPROCESSORS_ONLN)`. Then, with the number of sockets and the number of physical cores, we can calculate the number of threads per core.
The number of sockets is queried using `/sys/devices/system/cpu/cpu*/topology/physical_package_id`. Once this file has been read for all of the cores, a simple custom algorithm is used to determine the number of sockets.
The number of physical cores is queried using `/sys/devices/system/cpu/cpu*/topology/core_id`. Again, a custom algorithm is used to determine the number of physical cores.
### 4. How to get the frequency?
Frequency is read directly from `/sys/devices/system/cpu/cpu*/cpufreq/cpuinfo_max_freq`
### 5. How to get cache size and topology?
Cache size is retrieved directly from Linux (using `/sys/devices/system/cpu/cpu0/cache/index*/size`).
To find the cache topology, the files `/sys/devices/system/cpu/cpu0/cache/index*/shared_cpu_map` are used, and a custom algorithm is used to determine how many caches are there at each level.
_NOTE_: To avoid Linux dependencies at this point, it looks like it is possible to derive the cache size and topology from the microarchitecture. For example, in the POWER9 architecture, wikichip assumes that all the POWER9 CPUs have the same cache size for each core and topology [[1](#references)].
#### References
- [1] [POWER9 - wikichip](https://en.wikichip.org/wiki/ibm/microarchitectures/power9)

View File

@@ -1,13 +1,14 @@
# cpufetch programming documentation (v0.94)
# cpufetch programming documentation (v0.98)
This documentation explains how cpufetch works internally and all the design decisions I made. This document intends to be useful for me in the future, for everyone interested in the project, and for anyone who is trying to obtain any specific information from the CPU. In this way, this can be used as a manual or a page that collects interesting material in this area.
### 1. Basics
cpufetch works for __x86_64__ (Intel and AMD) and __ARM__ CPUs. However, cpufetch is expected to work better on x86_64, because the codebase is older and has been tested much more than the ARM version. Other kinds of x86_64 CPU are not supported (I don't think supporting other CPUs may pay off). Depending on the architecture, cpufetch choose certain files to be compiled. A summarized tree of the source code of cpufetch is shown below.
cpufetch works for __x86_64__ (Intel and AMD), __ARM__ and __PowerPC__ CPUs. However, cpufetch is expected to work better on x86_64, because the codebase is older and has been tested much more than the ARM and PowerPC versions. Depending on the architecture, cpufetch choose certain files to be compiled. A summarized tree of the source code of cpufetch is shown below.
```
cpufetch/
├── doc
│   ├── DOCUMENTATION_ARM.md
| ├── DOCUMENTATION_PPC.md
│   ├── DOCUMENTATION_X86.md
│   └── README.md
├── Makefile
@@ -19,17 +20,22 @@ cpufetch/
│   └── other files ...
├── common/
│   └── common files ...
├── ppc/
| ├── ppc.c
| ├── ppc.h
| └── other files ...
└── x86/
├── cpuid.c
├── cpuid.h
└── other files ...
```
Source code is divided into three directories:
Source code is divided into four directories:
- `common/`: Source code shared between x86 and ARM
- `arm/`: ARM dependant source code
- `x86/`: x86_64 dependant source code
- `common/`: Source code shared between all architectures
- `arm/`: ARM source code
- `ppc/`: PowerPC source code
- `x86/`: x86 source code
##### 1.1 Basics (x86_64)
@@ -50,10 +56,14 @@ struct cpuInfo {
To use any CPUID leaf, cpufetch always needs to check that it is supported in the current CPU.
##### 1.2 Basics (ARM)
In ARM, __cpufetch works using the MIDR register and Linux filesystem__. MIDR (Main ID Register) is read from `/proc/cpuinfo`. It allows the detection of the microarchitecture of the cores. Furthermore, Linux filesystem `/sys/devices/system/cpu/` is used to fetch the number of cores and other information. This is the main reason to explain __why `cpufetch` only works on Linux kernel based systems.__
In ARM, __cpufetch works using the MIDR register and Linux filesystem__. MIDR (Main ID Register) is read from `/proc/cpuinfo`. It allows the detection of the microarchitecture of the cores. Furthermore, Linux filesystem `/sys/devices/system/cpu/` is used to fetch the number of cores and other information. This is the main reason to explain __why `cpufetch` for ARM only works on Linux systems.__
##### 1.3 Documentation organization
The rest of the documentation is divided into x86 and ARM architectures since each one needs different implementations:
##### 1.3 Basics (PowerPC)
In PowerPC, __cpufetch works using the PVR register and Linux filesystem__. PVR (Processor Version Register) is read using assembly and it is used to identify the microarchitecture of the CPU. Linux is also used to query the rest of the information, like the CPU topology, frequency, etc. This is the main reason to explain __why `cpufetch` for PowerPC only works on Linux systems.__
##### 1.4 Documentation organization
The rest of the documentation is divided in specific files for each architecture, since each one needs different implementations:
- [DOCUMENTATION_X86.md](https://github.com/Dr-Noob/cpufetch/blob/master/doc/DOCUMENTATION_X86.md)
- [DOCUMENTATION_ARM.md](https://github.com/Dr-Noob/cpufetch/blob/master/doc/DOCUMENTATION_ARM.md)
- [DOCUMENTATION_PPC.md](https://github.com/Dr-Noob/cpufetch/blob/master/doc/DOCUMENTATION_PPC.md)
- [DOCUMENTATION_X86.md](https://github.com/Dr-Noob/cpufetch/blob/master/doc/DOCUMENTATION_X86.md)

View File

@@ -13,29 +13,8 @@
#include "uarch.h"
#include "soc.h"
#define STRING_UNKNOWN "Unknown"
void init_cache_struct(struct cache* cach) {
cach->L1i = malloc(sizeof(struct cach));
cach->L1d = malloc(sizeof(struct cach));
cach->L2 = malloc(sizeof(struct cach));
cach->L3 = malloc(sizeof(struct cach));
cach->cach_arr = malloc(sizeof(struct cach*) * 4);
cach->cach_arr[0] = cach->L1i;
cach->cach_arr[1] = cach->L1d;
cach->cach_arr[2] = cach->L2;
cach->cach_arr[3] = cach->L3;
cach->max_cache_level = 0;
cach->L1i->exists = false;
cach->L1d->exists = false;
cach->L2->exists = false;
cach->L3->exists = false;
}
struct cache* get_cache_info(struct cpuInfo* cpu) {
struct cache* cach = malloc(sizeof(struct cache));
struct cache* cach = emalloc(sizeof(struct cache));
init_cache_struct(cach);
cach->max_cache_level = 2;
@@ -49,19 +28,17 @@ struct cache* get_cache_info(struct cpuInfo* cpu) {
}
struct frequency* get_frequency_info(uint32_t core) {
struct frequency* freq = malloc(sizeof(struct frequency));
struct frequency* freq = emalloc(sizeof(struct frequency));
freq->base = UNKNOWN_FREQ;
freq->max = get_max_freq_from_file(core, false);
freq->max = get_max_freq_from_file(core);
return freq;
}
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, uint32_t* midr_array, int socket_idx, int ncores) {
struct topology* topo = malloc(sizeof(struct topology));
topo->cach = cach;
topo->total_cores = 0;
struct topology* topo = emalloc(sizeof(struct topology));
init_topology_struct(topo, cach);
int sockets_seen = 0;
int first_core_idx = 0;
@@ -84,6 +61,27 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, uint
return topo;
}
int64_t get_peak_performance(struct cpuInfo* cpu) {
struct cpuInfo* ptr = cpu;
//First check we have consistent data
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
if(get_freq(ptr->freq) == UNKNOWN_FREQ) {
return -1;
}
}
int64_t flops = 0;
ptr = cpu;
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
}
if(cpu->feat->NEON) flops = flops * 4;
return flops;
}
bool cores_are_equal(int c1pos, int c2pos, uint32_t* midr_array, int32_t* freq_array) {
return midr_array[c1pos] == midr_array[c2pos] && freq_array[c1pos] == freq_array[c2pos];
}
@@ -128,7 +126,7 @@ void init_cpu_info(struct cpuInfo* cpu) {
// ARM32 https://elixir.bootlin.com/linux/latest/source/arch/arm/include/uapi/asm/hwcap.h
// ARM64 https://elixir.bootlin.com/linux/latest/source/arch/arm64/include/uapi/asm/hwcap.h
struct features* get_features_info() {
struct features* feat = malloc(sizeof(struct features));
struct features* feat = emalloc(sizeof(struct features));
bool *ptr = &(feat->AES);
for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
*ptr = false;
@@ -169,14 +167,14 @@ struct features* get_features_info() {
}
struct cpuInfo* get_cpu_info() {
struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo));
struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
init_cpu_info(cpu);
int ncores = get_ncores_from_cpuinfo();
bool success = false;
int32_t* freq_array = malloc(sizeof(uint32_t) * ncores);
uint32_t* midr_array = malloc(sizeof(uint32_t) * ncores);
uint32_t* ids_array = malloc(sizeof(uint32_t) * ncores);
int32_t* freq_array = emalloc(sizeof(uint32_t) * ncores);
uint32_t* midr_array = emalloc(sizeof(uint32_t) * ncores);
uint32_t* ids_array = emalloc(sizeof(uint32_t) * ncores);
for(int i=0; i < ncores; i++) {
midr_array[i] = get_midr_from_cpuinfo(i, &success);
@@ -186,7 +184,7 @@ struct cpuInfo* get_cpu_info() {
midr_array[i] = midr_array[0];
}
freq_array[i] = get_max_freq_from_file(i, false);
freq_array[i] = get_max_freq_from_file(i);
if(freq_array[i] == UNKNOWN_FREQ) {
printWarn("Unable to fetch max frequency for core %d. This is probably because the core is offline", i);
freq_array[i] = freq_array[0];
@@ -199,7 +197,7 @@ struct cpuInfo* get_cpu_info() {
int tmp_midr_idx = 0;
for(uint32_t i=0; i < sockets; i++) {
if(i > 0) {
ptr->next_cpu = malloc(sizeof(struct cpuInfo));
ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
ptr = ptr->next_cpu;
init_cpu_info(ptr);
@@ -218,57 +216,25 @@ struct cpuInfo* get_cpu_info() {
}
cpu->num_cpus = sockets;
cpu->hv = malloc(sizeof(struct hypervisor));
cpu->hv = emalloc(sizeof(struct hypervisor));
cpu->hv->present = false;
cpu->soc = get_soc();
cpu->peak_performance = get_peak_performance(cpu);
return cpu;
}
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket) {
uint32_t size = 3+7+1;
char* string = malloc(sizeof(char)*size);
char* string = emalloc(sizeof(char)*size);
snprintf(string, size, "%d cores", topo->total_cores);
return string;
}
char* get_str_peak_performance(struct cpuInfo* cpu) {
//7 for GFLOP/s and 6 for digits,eg 412.14
uint32_t size = 7+6+1+1;
assert(strlen(STRING_UNKNOWN)+1 <= size);
char* string = malloc(sizeof(char)*size);
struct cpuInfo* ptr = cpu;
//First check we have consistent data
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
if(get_freq(ptr->freq) == UNKNOWN_FREQ) {
snprintf(string, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
return string;
}
}
double flops = 0.0;
ptr = cpu;
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
}
if(cpu->feat->NEON) flops = flops * 4;
if(flops >= (double)1000000000000.0)
snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000);
else if(flops >= 1000000000.0)
snprintf(string,size,"%.2f GFLOP/s",flops/1000000000);
else
snprintf(string,size,"%.2f MFLOP/s",flops/1000000);
return string;
}
char* get_str_features(struct cpuInfo* cpu) {
struct features* feat = cpu->feat;
char* string = malloc(sizeof(char) * 25);
char* string = emalloc(sizeof(char) * 25);
uint32_t len = 0;
if(feat->NEON) {
@@ -306,7 +272,7 @@ void print_debug(struct cpuInfo* cpu) {
for(int i=0; i < ncores; i++) {
printf("[Core %d] ", i);
long freq = get_max_freq_from_file(i, false);
long freq = get_max_freq_from_file(i);
uint32_t midr = get_midr_from_cpuinfo(i, &success);
if(!success) {
printWarn("Unable to fetch MIDR for core %d. This is probably because the core is offline", i);
@@ -317,7 +283,7 @@ void print_debug(struct cpuInfo* cpu) {
}
if(freq == UNKNOWN_FREQ) {
printWarn("Unable to fetch max frequency for core %d. This is probably because the core is offline", i);
printf("%ld MHz\n", get_max_freq_from_file(0, false));
printf("%ld MHz\n", get_max_freq_from_file(0));
}
else {
printf("%ld MHz\n", freq);

View File

@@ -7,7 +7,6 @@ struct cpuInfo* get_cpu_info();
uint32_t get_nsockets(struct topology* topo);
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket);
char* get_str_peak_performance(struct cpuInfo* cpu);
char* get_str_features(struct cpuInfo* cpu);
void print_debug(struct cpuInfo* cpu);

View File

@@ -9,7 +9,7 @@
#include "../common/global.h"
#define min(a,b) (((a)<(b))?(a):(b))
#define STRING_UNKNOWN "Unknown"
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
static char* soc_trademark_string[] = {
[SOC_VENDOR_SNAPDRAGON] = "Snapdragon ",
@@ -19,12 +19,19 @@ static char* soc_trademark_string[] = {
[SOC_VENDOR_BROADCOM] = "Broadcom BCM",
};
static char* soc_rpi_string[] = {
"BCM2835",
"BCM2836",
"BCM2837",
"BCM2711"
};
void fill_soc(struct system_on_chip* soc, char* soc_name, SOC soc_model, int32_t process) {
soc->soc_model = soc_model;
soc->soc_vendor = get_soc_vendor_from_soc(soc_model);
soc->process = process;
int len = strlen(soc_name) + strlen(soc_trademark_string[soc->soc_vendor]) + 1;
soc->soc_name = malloc(sizeof(char) * len);
soc->soc_name = emalloc(sizeof(char) * len);
memset(soc->soc_name, 0, sizeof(char) * len);
sprintf(soc->soc_name, "%s%s", soc_trademark_string[soc->soc_vendor], soc_name);
}
@@ -45,7 +52,7 @@ bool match_soc(struct system_on_chip* soc, char* raw_name, char* expected_name,
char* toupperstr(char* str) {
int len = strlen(str) + 1;
char* ret = malloc(sizeof(char) * len);
char* ret = emalloc(sizeof(char) * len);
memset(ret, 0, sizeof(char) * len);
for(int i=0; i < len; i++) {
@@ -59,6 +66,12 @@ char* toupperstr(char* str) {
#define SOC_EQ(raw_name, expected_name, soc_name, soc_model, soc, process) \
else if (match_soc(soc, raw_name, expected_name, soc_name, soc_model, process)) return true;
#define SOC_END else { return false; }
// Exynos special define
#define SOC_EXY_EQ(raw_name, tmpsoc, soc_name, soc_model, soc, process) \
sprintf(tmpsoc, "exynos%s", soc_name); \
if (match_soc(soc, raw_name, tmpsoc, soc_name, soc_model, process)) return true; \
sprintf(tmpsoc, "universal%s", soc_name); \
if (match_soc(soc, raw_name, tmpsoc, soc_name, soc_model, process)) return true;
// https://en.wikipedia.org/wiki/Raspberry_Pi
// http://phonedb.net/index.php?m=processor&id=562&c=broadcom_bcm21663
@@ -130,45 +143,54 @@ bool match_hisilicon(char* soc_name, struct system_on_chip* soc) {
bool match_exynos(char* soc_name, struct system_on_chip* soc) {
char* tmp;
if((tmp = strstr(soc_name, "universal")) == NULL)
return false;
if((tmp = strstr(soc_name, "universal")) != NULL);
else if((tmp = strstr(soc_name, "exynos")) != NULL);
else return false;
// Because exynos are recently using "exynosXXXX" instead
// of "universalXXXX" as codenames, SOC_EXY_EQ will check for
// both cases, since it seems that there are some SoCs that
// can appear with both codenames
// Used by SOC_EXY_EQ
char tmpsoc[14];
SOC_START
// universalXXXX //
SOC_EQ(tmp, "universal3475", "3475", SOC_EXYNOS_3475, soc, 28)
SOC_EQ(tmp, "universal4210", "4210", SOC_EXYNOS_4210, soc, 45)
SOC_EQ(tmp, "universal4212", "4212", SOC_EXYNOS_4212, soc, 32)
SOC_EQ(tmp, "universal4412", "4412", SOC_EXYNOS_4412, soc, 32)
SOC_EQ(tmp, "universal5250", "5250", SOC_EXYNOS_5250, soc, 32)
SOC_EQ(tmp, "universal5410", "5410", SOC_EXYNOS_5410, soc, 28)
SOC_EQ(tmp, "universal5420", "5420", SOC_EXYNOS_5420, soc, 28)
SOC_EQ(tmp, "universal5422", "5422", SOC_EXYNOS_5422, soc, 28)
SOC_EQ(tmp, "universal5430", "5430", SOC_EXYNOS_5430, soc, 20)
SOC_EQ(tmp, "universal5433", "5433", SOC_EXYNOS_5433, soc, 20)
SOC_EQ(tmp, "universal5260", "5260", SOC_EXYNOS_5260, soc, 28)
SOC_EQ(tmp, "universal7270", "7270", SOC_EXYNOS_7270, soc, 14)
SOC_EQ(tmp, "universal7420", "7420", SOC_EXYNOS_7420, soc, 14)
SOC_EQ(tmp, "universal7570", "7570", SOC_EXYNOS_7570, soc, 14)
SOC_EQ(tmp, "universal7870", "7870", SOC_EXYNOS_7870, soc, 14)
SOC_EQ(tmp, "universal7872", "7872", SOC_EXYNOS_7872, soc, 14)
SOC_EQ(tmp, "universal7880", "7880", SOC_EXYNOS_7880, soc, 14)
SOC_EQ(tmp, "universal7884", "7884", SOC_EXYNOS_7884, soc, 14)
SOC_EQ(tmp, "universal7885", "7885", SOC_EXYNOS_7885, soc, 14)
SOC_EQ(tmp, "universal7904", "7904", SOC_EXYNOS_7904, soc, 14)
SOC_EQ(tmp, "universal8890", "8890", SOC_EXYNOS_8890, soc, 14)
SOC_EQ(tmp, "universal8895", "8895", SOC_EXYNOS_8895, soc, 10)
SOC_EQ(tmp, "universal9110", "9110", SOC_EXYNOS_9110, soc, 14)
SOC_EQ(tmp, "universal9609", "9609", SOC_EXYNOS_9609, soc, 10)
SOC_EQ(tmp, "universal9610", "9610", SOC_EXYNOS_9610, soc, 10)
SOC_EQ(tmp, "universal9611", "9611", SOC_EXYNOS_9611, soc, 10)
SOC_EQ(tmp, "universal9810", "9810", SOC_EXYNOS_9810, soc, 10)
SOC_EQ(tmp, "universal9820", "9820", SOC_EXYNOS_9820, soc, 8)
SOC_EQ(tmp, "universal9825", "9825", SOC_EXYNOS_9825, soc, 7)
// New exynos. Dont know if they will work //
SOC_EQ(tmp, "universal1080", "1080", SOC_EXYNOS_1080, soc, 5)
SOC_EQ(tmp, "universal990", "990", SOC_EXYNOS_990, soc, 7)
SOC_EQ(tmp, "universal980", "980", SOC_EXYNOS_980, soc, 8)
SOC_EQ(tmp, "universal880", "880", SOC_EXYNOS_880, soc, 8)
SOC_EXY_EQ(tmp, tmpsoc, "3475", SOC_EXYNOS_3475, soc, 28)
SOC_EXY_EQ(tmp, tmpsoc, "4210", SOC_EXYNOS_4210, soc, 45)
SOC_EXY_EQ(tmp, tmpsoc, "4212", SOC_EXYNOS_4212, soc, 32)
SOC_EXY_EQ(tmp, tmpsoc, "4412", SOC_EXYNOS_4412, soc, 32)
SOC_EXY_EQ(tmp, tmpsoc, "5250", SOC_EXYNOS_5250, soc, 32)
SOC_EXY_EQ(tmp, tmpsoc, "5410", SOC_EXYNOS_5410, soc, 28)
SOC_EXY_EQ(tmp, tmpsoc, "5420", SOC_EXYNOS_5420, soc, 28)
SOC_EXY_EQ(tmp, tmpsoc, "5422", SOC_EXYNOS_5422, soc, 28)
SOC_EXY_EQ(tmp, tmpsoc, "5430", SOC_EXYNOS_5430, soc, 20)
SOC_EXY_EQ(tmp, tmpsoc, "5433", SOC_EXYNOS_5433, soc, 20)
SOC_EXY_EQ(tmp, tmpsoc, "5260", SOC_EXYNOS_5260, soc, 28)
SOC_EXY_EQ(tmp, tmpsoc, "7270", SOC_EXYNOS_7270, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "7420", SOC_EXYNOS_7420, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "7570", SOC_EXYNOS_7570, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "7570", SOC_EXYNOS_7570, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "7870", SOC_EXYNOS_7870, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "7870", SOC_EXYNOS_7870, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "7872", SOC_EXYNOS_7872, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "7880", SOC_EXYNOS_7880, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "7884", SOC_EXYNOS_7884, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "7885", SOC_EXYNOS_7885, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "7904", SOC_EXYNOS_7904, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "8890", SOC_EXYNOS_8890, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "8895", SOC_EXYNOS_8895, soc, 10)
SOC_EXY_EQ(tmp, tmpsoc, "9110", SOC_EXYNOS_9110, soc, 14)
SOC_EXY_EQ(tmp, tmpsoc, "9609", SOC_EXYNOS_9609, soc, 10)
SOC_EXY_EQ(tmp, tmpsoc, "9610", SOC_EXYNOS_9610, soc, 10)
SOC_EXY_EQ(tmp, tmpsoc, "9611", SOC_EXYNOS_9611, soc, 10)
SOC_EXY_EQ(tmp, tmpsoc, "9810", SOC_EXYNOS_9810, soc, 10)
SOC_EXY_EQ(tmp, tmpsoc, "9820", SOC_EXYNOS_9820, soc, 8)
SOC_EXY_EQ(tmp, tmpsoc, "9825", SOC_EXYNOS_9825, soc, 7)
SOC_EXY_EQ(tmp, tmpsoc, "1080", SOC_EXYNOS_1080, soc, 5)
SOC_EXY_EQ(tmp, tmpsoc, "990", SOC_EXYNOS_990, soc, 7)
SOC_EXY_EQ(tmp, tmpsoc, "980", SOC_EXYNOS_980, soc, 8)
SOC_EXY_EQ(tmp, tmpsoc, "880", SOC_EXYNOS_880, soc, 8)
SOC_END
}
@@ -481,7 +503,7 @@ struct system_on_chip* guess_soc_from_android(struct system_on_chip* soc) {
property_len = android_property_get("ro.mediatek.platform", (char *) &tmp);
if(property_len > 0) {
soc->raw_name = malloc(sizeof(char) * (property_len + 1));
soc->raw_name = emalloc(sizeof(char) * (property_len + 1));
strncpy(soc->raw_name, tmp, property_len + 1);
soc->raw_name[property_len] = '\0';
soc->soc_vendor = SOC_VENDOR_UNKNOWN;
@@ -490,7 +512,7 @@ struct system_on_chip* guess_soc_from_android(struct system_on_chip* soc) {
property_len = android_property_get("ro.product.board", (char *) &tmp);
if(property_len > 0) {
soc->raw_name = malloc(sizeof(char) * (property_len + 1));
soc->raw_name = emalloc(sizeof(char) * (property_len + 1));
strncpy(soc->raw_name, tmp, property_len + 1);
soc->raw_name[property_len] = '\0';
soc->soc_vendor = SOC_VENDOR_UNKNOWN;
@@ -502,7 +524,7 @@ struct system_on_chip* guess_soc_from_android(struct system_on_chip* soc) {
#endif
struct system_on_chip* guess_soc_from_cpuinfo(struct system_on_chip* soc) {
char* tmp = get_hardware_from_cpuinfo(&strlen);
char* tmp = get_hardware_from_cpuinfo();
if(tmp != NULL) {
soc->raw_name = tmp;
@@ -512,12 +534,69 @@ struct system_on_chip* guess_soc_from_cpuinfo(struct system_on_chip* soc) {
return soc;
}
int hex2int(char c) {
if (c >= '0' && c <= '9')
return c - '0';
if (c >= 'A' && c <= 'F')
return c - 'A' + 10;
if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
return -1;
}
// https://www.raspberrypi.org/documentation/hardware/raspberrypi/revision-codes/README.md
struct system_on_chip* guess_soc_raspbery_pi(struct system_on_chip* soc) {
char* revision = get_revision_from_cpuinfo();
if(revision == NULL) {
printWarn("[RPi] Couldn't find revision field in cpuinfo");
return soc;
}
if(strlen(revision) != 6) {
printWarn("[RPi] Found invalid RPi revision code: '%s'", revision);
return soc;
}
int arr_size = ARRAY_SIZE(soc_rpi_string);
int pppp = hex2int(revision[2]);
if(pppp == -1) {
printErr("[RPi] Found invalid RPi PPPP code: %s", revision[2]);
return soc;
}
if(pppp > arr_size) {
printErr("[RPi] Found invalid RPi PPPP code: %d while max is %d", pppp, arr_size);
return soc;
}
char* soc_raw_name = soc_rpi_string[pppp];
/*int soc_len = strlen(soc_raw_name);
soc->raw_name = emalloc(sizeof(char) * (soc_len + 1));
strncpy(soc->raw_name, soc_raw_name, soc_len + 1);*/
match_broadcom(soc_raw_name, soc);
return soc;
}
struct system_on_chip* get_soc() {
struct system_on_chip* soc = malloc(sizeof(struct system_on_chip));
struct system_on_chip* soc = emalloc(sizeof(struct system_on_chip));
soc->raw_name = NULL;
soc->soc_vendor = SOC_VENDOR_UNKNOWN;
soc->process = UNKNOWN;
bool isRPi = is_raspberry_pi();
if(isRPi) {
soc = guess_soc_raspbery_pi(soc);
if(soc->soc_vendor == SOC_VENDOR_UNKNOWN) {
printWarn("SoC detection failed using revision code");
}
else {
return soc;
}
}
soc = guess_soc_from_cpuinfo(soc);
if(soc->soc_vendor == SOC_VENDOR_UNKNOWN) {
if(soc->raw_name != NULL)
@@ -534,7 +613,7 @@ struct system_on_chip* get_soc() {
}
if(soc->raw_name == NULL) {
soc->raw_name = malloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
soc->raw_name = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
snprintf(soc->raw_name, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
}
@@ -555,14 +634,13 @@ char* get_str_process(struct system_on_chip* soc) {
char* str;
if(soc->process == UNKNOWN) {
str = malloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
str = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
}
else {
str = malloc(sizeof(char) * 5);
str = emalloc(sizeof(char) * 5);
memset(str, 0, sizeof(char) * 5);
snprintf(str, 5, "%dnm", soc->process);
}
return str;
}

View File

@@ -7,8 +7,6 @@
#include "uarch.h"
#include "../common/global.h"
#define STRING_UNKNOWN "Unknown"
// Data not available
#define NA -1
@@ -176,10 +174,10 @@ void fill_uarch(struct uarch* arch, struct cpuInfo* cpu, char* str, MICROARCH u,
arch->isa = isas_uarch[arch->uarch];
cpu->cpu_vendor = vendor;
arch->uarch_str = malloc(sizeof(char) * (strlen(str)+1));
arch->uarch_str = emalloc(sizeof(char) * (strlen(str)+1));
strcpy(arch->uarch_str, str);
arch->isa_str = malloc(sizeof(char) * (strlen(isas_string[arch->isa])+1));
arch->isa_str = emalloc(sizeof(char) * (strlen(isas_string[arch->isa])+1));
strcpy(arch->isa_str, isas_string[arch->isa]);
}
@@ -191,7 +189,7 @@ void fill_uarch(struct uarch* arch, struct cpuInfo* cpu, char* str, MICROARCH u,
* - https://elixir.bootlin.com/linux/latest/source/arch/arm/include/asm/cputype.h
*/
struct uarch* get_uarch_from_midr(uint32_t midr, struct cpuInfo* cpu) {
struct uarch* arch = malloc(sizeof(struct uarch));
struct uarch* arch = emalloc(sizeof(struct uarch));
uint32_t im = midr_get_implementer(midr);
uint32_t p = midr_get_part(midr);
uint32_t v = midr_get_variant(midr);
@@ -298,4 +296,3 @@ void free_uarch_struct(struct uarch* arch) {
free(arch->uarch_str);
free(arch);
}

View File

@@ -1,6 +1,8 @@
#include "../common/global.h"
#include "udev.h"
#include "midr.h"
#define _PATH_DEVICETREE_MODEL "/sys/firmware/devicetree/base/model"
#define _PATH_CPUS_PRESENT _PATH_SYS_SYSTEM _PATH_SYS_CPU "/present"
#define _PATH_CPUINFO "/proc/cpuinfo"
//#define _PATH_CPUINFO "cpuinfo_debug"
@@ -11,6 +13,7 @@
#define CPUINFO_CPU_PART_STR "CPU part\t: "
#define CPUINFO_CPU_REVISION_STR "CPU revision\t: "
#define CPUINFO_HARDWARE_STR "Hardware\t: "
#define CPUINFO_REVISION_STR "Revision\t: "
#define CPUINFO_CPU_STRING "processor"
@@ -24,7 +27,7 @@ int get_ncores_from_cpuinfo() {
int filelen;
char* buf;
if((buf = read_file(_PATH_CPUS_PRESENT, &filelen)) == NULL) {
perror("open");
printWarn("read_file: %s: %s\n", _PATH_CPUS_PRESENT, strerror(errno));
return UNKNOWN;
}
@@ -47,7 +50,7 @@ int get_ncores_from_cpuinfo() {
errno = 0;
ncores = strtol(ncores_str, &end, 10) + 1;
if(errno != 0) {
perror("strtol");
printWarn("strtol: %s:\n", strerror(errno));
return UNKNOWN;
}
@@ -65,7 +68,7 @@ long parse_cpuinfo_field(char* buf, char* field_str, int field_base) {
errno = 0;
long ret = strtol(tmp, &end, field_base);
if(errno != 0) {
perror("strtol");
printWarn("strtol: %s:\n", strerror(errno));
return -1;
}
@@ -78,7 +81,7 @@ uint32_t get_midr_from_cpuinfo(uint32_t core, bool* success) {
char* buf;
*success = true;
if((buf = read_file(_PATH_CPUINFO, &filelen)) == NULL) {
perror("open");
printWarn("read_file: %s: %s\n", _PATH_CPUINFO, strerror(errno));
*success = false;
return 0;
}
@@ -105,35 +108,35 @@ uint32_t get_midr_from_cpuinfo(uint32_t core, bool* success) {
long ret;
if ((ret = parse_cpuinfo_field(tmp, CPUINFO_CPU_IMPLEMENTER_STR, 16)) < 0) {
printf("Failed parsing cpu_implementer\n");
printBug("get_midr_from_cpuinfo: Failed parsing cpu_implementer\n");
*success = false;
return 0;
}
cpu_implementer = (uint32_t) ret;
if ((ret = parse_cpuinfo_field(tmp, CPUINFO_CPU_ARCHITECTURE_STR, 10)) < 0) {
printf("Failed parsing cpu_architecture\n");
printBug("get_midr_from_cpuinfo: Failed parsing cpu_architecture\n");
*success = false;
return 0;
}
cpu_architecture = (uint32_t) 0xF; // Why?
if ((ret = parse_cpuinfo_field(tmp, CPUINFO_CPU_VARIANT_STR, 16)) < 0) {
printf("Failed parsing cpu_variant\n");
printBug("get_midr_from_cpuinfo: Failed parsing cpu_variant\n");
*success = false;
return 0;
}
cpu_variant = (uint32_t) ret;
if ((ret = parse_cpuinfo_field(tmp, CPUINFO_CPU_PART_STR, 16)) < 0) {
printf("Failed parsing cpu_part\n");
printBug("get_midr_from_cpuinfo: Failed parsing cpu_part\n");
*success = false;
return 0;
}
cpu_part = (uint32_t) ret;
if ((ret = parse_cpuinfo_field(tmp, CPUINFO_CPU_REVISION_STR, 10)) < 0) {
printf("Failed parsing cpu_revision\n");
printBug("get_midr_from_cpuinfo: Failed parsing cpu_revision\n");
*success = false;
return 0;
}
@@ -148,24 +151,45 @@ uint32_t get_midr_from_cpuinfo(uint32_t core, bool* success) {
return midr;
}
char* get_hardware_from_cpuinfo() {
char* get_field_from_cpuinfo(char* CPUINFO_FIELD) {
int filelen;
char* buf;
if((buf = read_file(_PATH_CPUINFO, &filelen)) == NULL) {
perror("open");
printWarn("read_file: %s: %s:\n", _PATH_CPUINFO, strerror(errno));
return NULL;
}
char* tmp1 = strstr(buf, CPUINFO_HARDWARE_STR);
char* tmp1 = strstr(buf, CPUINFO_FIELD);
if(tmp1 == NULL) return NULL;
tmp1 = tmp1 + strlen(CPUINFO_HARDWARE_STR);
tmp1 = tmp1 + strlen(CPUINFO_FIELD);
char* tmp2 = strstr(tmp1, "\n");
int strlen = (1 + (tmp2-tmp1));
char* hardware = malloc(sizeof(char) * strlen);
char* hardware = emalloc(sizeof(char) * strlen);
memset(hardware, 0, sizeof(char) * strlen);
strncpy(hardware, tmp1, tmp2-tmp1);
return hardware;
}
char* get_hardware_from_cpuinfo() {
return get_field_from_cpuinfo(CPUINFO_HARDWARE_STR);
}
char* get_revision_from_cpuinfo() {
return get_field_from_cpuinfo(CPUINFO_REVISION_STR);
}
bool is_raspberry_pi() {
int filelen;
char* buf;
if((buf = read_file(_PATH_DEVICETREE_MODEL, &filelen)) == NULL) {
return false;
}
char* tmp;
if((tmp = strstr(buf, "Raspberry Pi")) == NULL) {
return false;
}
return true;
}

View File

@@ -7,6 +7,8 @@
int get_ncores_from_cpuinfo();
uint32_t get_midr_from_cpuinfo(uint32_t core, bool* success);
char* get_hardware_from_cpuinfo();
char* get_revision_from_cpuinfo();
bool is_raspberry_pi();
#endif

View File

@@ -5,8 +5,11 @@
#include "args.h"
#include "global.h"
#define NUM_COLORS 4
#define COLOR_STR_INTEL "intel"
#define COLOR_STR_AMD "amd"
#define COLOR_STR_IBM "ibm"
#define COLOR_STR_ARM "arm"
static const char *SYTLES_STR_LIST[] = {
@@ -24,7 +27,7 @@ struct args_struct {
bool verbose_flag;
bool version_flag;
STYLE style;
struct colors* colors;
struct color** colors;
};
const char args_chr[] = {
@@ -53,7 +56,7 @@ STYLE get_style() {
return args.style;
}
struct colors* get_colors() {
struct color** get_colors() {
return args.colors;
}
@@ -99,83 +102,62 @@ STYLE parse_style(char* style) {
return i;
}
void free_colors_struct(struct colors* cs) {
free(cs->c1);
free(cs->c2);
free(cs->c3);
free(cs->c4);
void free_colors_struct(struct color** cs) {
for(int i=0; i < NUM_COLORS; i++) {
free(cs[i]);
}
free(cs);
}
bool parse_color(char* optarg_str, struct colors** cs) {
*cs = malloc(sizeof(struct colors));
(*cs)->c1 = malloc(sizeof(struct color));
(*cs)->c2 = malloc(sizeof(struct color));
(*cs)->c3 = malloc(sizeof(struct color));
(*cs)->c4 = malloc(sizeof(struct color));
struct color** c1 = &((*cs)->c1);
struct color** c2 = &((*cs)->c2);
struct color** c3 = &((*cs)->c3);
struct color** c4 = &((*cs)->c4);
bool parse_color(char* optarg_str, struct color*** cs) {
for(int i=0; i < NUM_COLORS; i++) {
(*cs)[i] = emalloc(sizeof(struct color));
}
struct color** c = *cs;
int32_t ret;
char* str_to_parse = NULL;
bool free_ptr;
char* color_to_copy = NULL;
bool free_ptr = true;
if(strcmp(optarg_str, COLOR_STR_INTEL) == 0) {
str_to_parse = malloc(sizeof(char) * 46);
strcpy(str_to_parse, COLOR_DEFAULT_INTEL);
free_ptr = true;
}
else if(strcmp(optarg_str, COLOR_STR_AMD) == 0) {
str_to_parse = malloc(sizeof(char) * 44);
strcpy(str_to_parse, COLOR_DEFAULT_AMD);
free_ptr = true;
}
else if(strcmp(optarg_str, COLOR_STR_ARM) == 0) {
str_to_parse = malloc(sizeof(char) * 46);
strcpy(str_to_parse, COLOR_DEFAULT_ARM);
free_ptr = true;
}
if(strcmp(optarg_str, COLOR_STR_INTEL) == 0) color_to_copy = COLOR_DEFAULT_INTEL;
else if(strcmp(optarg_str, COLOR_STR_AMD) == 0) color_to_copy = COLOR_DEFAULT_AMD;
else if(strcmp(optarg_str, COLOR_STR_IBM) == 0) color_to_copy = COLOR_DEFAULT_IBM;
else if(strcmp(optarg_str, COLOR_STR_ARM) == 0) color_to_copy = COLOR_DEFAULT_ARM;
else {
str_to_parse = optarg_str;
free_ptr = false;
}
if(str_to_parse == NULL) {
str_to_parse = emalloc(sizeof(char) * (strlen(color_to_copy) + 1));
strcpy(str_to_parse, color_to_copy);
}
ret = sscanf(str_to_parse, "%d,%d,%d:%d,%d,%d:%d,%d,%d:%d,%d,%d",
&(*c1)->R, &(*c1)->G, &(*c1)->B,
&(*c2)->R, &(*c2)->G, &(*c2)->B,
&(*c3)->R, &(*c3)->G, &(*c3)->B,
&(*c4)->R, &(*c4)->G, &(*c4)->B);
&c[0]->R, &c[0]->G, &c[0]->B,
&c[1]->R, &c[1]->G, &c[1]->B,
&c[2]->R, &c[2]->G, &c[2]->B,
&c[3]->R, &c[3]->G, &c[3]->B);
if(ret != 12) {
printErr("Expected to read 12 values for color but read %d", ret);
return false;
}
//TODO: Refactor c1->R c2->R ... to c[i]->R
if((*c1)->R < 0 || (*c1)->R > 255) {
printErr("Red in color 1 is invalid. Must be in range (0, 255)");
for(int i=0; i < NUM_COLORS; i++) {
if(c[i]->R < 0 || c[i]->R > 255) {
printErr("Red in color %d is invalid: %d; must be in range (0, 255)", i+1, c[i]->R);
return false;
}
if((*c1)->G < 0 || (*c1)->G > 255) {
printErr("Green in color 1 is invalid. Must be in range (0, 255)");
if(c[i]->G < 0 || c[i]->G > 255) {
printErr("Green in color %d is invalid: %d; must be in range (0, 255)", i+1, c[i]->G);
return false;
}
if((*c1)->B < 0 || (*c1)->B > 255) {
printErr("Blue in color 1 is invalid. Must be in range (0, 255)");
if(c[i]->B < 0 || c[i]->B > 255) {
printErr("Blue in color %d is invalid: %d; must be in range (0, 255)", i+1, c[i]->B);
return false;
}
if((*c2)->R < 0 || (*c2)->R > 255) {
printErr("Red in color 2 is invalid. Must be in range (0, 255)");
return false;
}
if((*c2)->G < 0 || (*c2)->G > 255) {
printErr("Green in color 2 is invalid. Must be in range (0, 255)");
return false;
}
if((*c2)->B < 0 || (*c2)->B > 255) {
printErr("Blue in color 2 is invalid. Must be in range (0, 255)");
return false;
}
if(free_ptr) free (str_to_parse);
@@ -186,7 +168,7 @@ bool parse_color(char* optarg_str, struct colors** cs) {
char* build_short_options() {
const char *c = args_chr;
int len = sizeof(args_chr) / sizeof(args_chr[0]);
char* str = (char *) malloc(sizeof(char) * (len*2 + 1));
char* str = (char *) emalloc(sizeof(char) * (len*2 + 1));
memset(str, 0, sizeof(char) * (len*2 + 1));
#ifdef ARCH_X86
@@ -238,8 +220,8 @@ bool parse_args(int argc, char* argv[]) {
return false;
}
color_flag = true;
args.colors = emalloc(sizeof(struct color *) * NUM_COLORS);
if(!parse_color(optarg, &args.colors)) {
printErr("Color parsing failed");
return false;
}
}

View File

@@ -10,13 +10,6 @@ struct color {
int32_t B;
};
struct colors {
struct color* c1;
struct color* c2;
struct color* c3;
struct color* c4;
};
enum {
STYLE_EMPTY,
STYLE_FANCY,
@@ -48,8 +41,8 @@ bool show_raw();
bool show_debug();
bool show_version();
bool verbose_enabled();
void free_colors_struct(struct colors* cs);
struct colors* get_colors();
void free_colors_struct(struct color** cs);
struct color** get_colors();
STYLE get_style();
#endif

View File

@@ -133,14 +133,14 @@
#define BROADCOM_ASCII \
" \
################ \
######################### \
############################### \
########################## \
################################ \
################@@@@################ \
################@@@@@@################ \
#################@@@@@@################ \
#################@@@@@@################# \
#################@@@@@@@@################# \
#################@@@@@@@@################# \
#################@@@@##@@@@################ \
################@@@@##@@@@################ \
################@@@@##@@@@################ \
###############@@@@####@@@@############### \
@@@@@@@@@@####@@@@####@@@@####@@@@@@@@@@ \
@@ -172,6 +172,29 @@
\
"
// jp2a --height=17 ibm.jpg
#define IBM_ASCII \
" \
\
\
############ ################ ########## ########## \
\
############ ################## ############ ############ \
\
###### ###### ###### #################### \
\
###### ############## #################### \
\
###### ###### ###### ##### ###### ##### \
\
############ ################## ######### #### ######### \
\
############ ################ ######### ## ######### \
\
\
"
#define UNKNOWN_ASCII \
" \
\
@@ -202,6 +225,7 @@ static const char* ASCII_ARRAY [] = {
EXYNOS_ASCII,
KIRIN_ASCII,
BROADCOM_ASCII,
IBM_ASCII,
UNKNOWN_ASCII
};

View File

@@ -9,11 +9,13 @@
#ifdef ARCH_X86
#include "../x86/uarch.h"
#include "../x86/apic.h"
#elif ARCH_PPC
#include "../ppc/uarch.h"
#elif ARCH_ARM
#include "../arm/uarch.h"
#endif
#define STRING_UNKNOWN "Unknown"
#define STRING_YES "Yes"
#define STRING_NO "No"
#define STRING_NONE "None"
@@ -30,13 +32,13 @@ int64_t get_freq(struct frequency* freq) {
return freq->max;
}
#ifdef ARCH_X86
#if defined(ARCH_X86) || defined(ARCH_PPC)
char* get_str_cpu_name(struct cpuInfo* cpu) {
return cpu->cpu_name;
}
char* get_str_sockets(struct topology* topo) {
char* string = malloc(sizeof(char) * 2);
char* string = emalloc(sizeof(char) * 2);
int32_t sanity_ret = snprintf(string, 2, "%d", topo->sockets);
if(sanity_ret < 0) {
printBug("get_str_sockets: snprintf returned a negative value for input: '%d'", topo->sockets);
@@ -51,71 +53,58 @@ uint32_t get_nsockets(struct topology* topo) {
#endif
int32_t get_value_as_smallest_unit(char ** str, uint32_t value) {
int32_t sanity_ret;
*str = malloc(sizeof(char)* 11); //8 for digits, 2 for units
int32_t ret;
int max_len = 10; // Max is 8 for digits, 2 for units
*str = emalloc(sizeof(char)* (max_len + 1));
if(value/1024 >= 1024)
sanity_ret = snprintf(*str, 10,"%.4g"STRING_MEGABYTES, (double)value/(1<<20));
ret = snprintf(*str, max_len, "%.4g"STRING_MEGABYTES, (double)value/(1<<20));
else
sanity_ret = snprintf(*str, 10,"%.4g"STRING_KILOBYTES, (double)value/(1<<10));
ret = snprintf(*str, max_len, "%.4g"STRING_KILOBYTES, (double)value/(1<<10));
return sanity_ret;
return ret;
}
// String functions
char* get_str_cache_two(int32_t cache_size, uint32_t physical_cores) {
// 4 for digits, 2 for units, 2 for ' (', 3 digits, 2 for units and 7 for ' Total)'
uint32_t max_size = 4+2 + 2 + 4+2 + 7 + 1;
int32_t sanity_ret;
char* string = malloc(sizeof(char) * max_size);
char* tmp1;
char* tmp2;
int32_t tmp1_len = get_value_as_smallest_unit(&tmp1, cache_size);
int32_t tmp2_len = get_value_as_smallest_unit(&tmp2, cache_size * physical_cores);
// tmp1_len for first output, 2 for ' (', tmp2_len for second output and 7 for ' Total)'
uint32_t size = tmp1_len + 2 + tmp2_len + 7 + 1;
char* string = emalloc(sizeof(char) * size);
if(tmp1_len < 0) {
printBug("get_value_as_smallest_unit: snprintf returned a negative value for input: %d\n", cache_size);
printBug("get_value_as_smallest_unit: snprintf failed for input: %d\n", cache_size);
return NULL;
}
if(tmp2_len < 0) {
printBug("get_value_as_smallest_unit: snprintf returned a negative value for input: %d\n", cache_size * physical_cores);
printBug("get_value_as_smallest_unit: snprintf failed for input: %d\n", cache_size * physical_cores);
return NULL;
}
uint32_t size = tmp1_len + 2 + tmp2_len + 7 + 1;
sanity_ret = snprintf(string, size, "%s (%s Total)", tmp1, tmp2);
if(sanity_ret < 0) {
printBug("get_str_cache_two: snprintf returned a negative value for input: '%s' and '%s'\n", tmp1, tmp2);
if(snprintf(string, size, "%s (%s Total)", tmp1, tmp2) < 0) {
printBug("get_str_cache_two: snprintf failed for input: '%s' and '%s'\n", tmp1, tmp2);
return NULL;
}
free(tmp1);
free(tmp2);
return string;
}
char* get_str_cache_one(int32_t cache_size) {
// 4 for digits, 2 for units, 2 for ' (', 3 digits, 2 for units and 7 for ' Total)'
uint32_t max_size = 4+2 + 1;
int32_t sanity_ret;
char* string = malloc(sizeof(char) * max_size);
char* tmp;
int32_t tmp_len = get_value_as_smallest_unit(&tmp, cache_size);
char* string;
int32_t str_len = get_value_as_smallest_unit(&string, cache_size);
if(tmp_len < 0) {
printBug("get_value_as_smallest_unit: snprintf returned a negative value for input: %d", cache_size);
if(str_len < 0) {
printBug("get_value_as_smallest_unit: snprintf failed for input: %d", cache_size);
return NULL;
}
uint32_t size = tmp_len + 1;
sanity_ret = snprintf(string, size, "%s", tmp);
if(sanity_ret < 0) {
printBug("get_str_cache_one: snprintf returned a negative value for input: '%s'", tmp);
return NULL;
}
free(tmp);
return string;
}
@@ -149,7 +138,7 @@ char* get_str_freq(struct frequency* freq) {
//Max 3 digits and 3 for '(M/G)Hz' plus 1 for '\0'
uint32_t size = (5+1+3+1);
assert(strlen(STRING_UNKNOWN)+1 <= size);
char* string = malloc(sizeof(char)*size);
char* string = emalloc(sizeof(char)*size);
memset(string, 0, sizeof(char)*size);
if(freq->max == UNKNOWN_FREQ || freq->max < 0)
@@ -162,6 +151,64 @@ char* get_str_freq(struct frequency* freq) {
return string;
}
char* get_str_peak_performance(int64_t flops) {
char* str;
if(flops == -1) {
str = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
strncpy(str, STRING_UNKNOWN, strlen(STRING_UNKNOWN) + 1);
return str;
}
// 7 for digits (e.g, XXXX.XX), 7 for XFLOP/s
double flopsd = (double) flops;
uint32_t max_size = 7+1+7+1;
str = ecalloc(max_size, sizeof(char));
if(flopsd >= (double)1000000000000.0)
snprintf(str, max_size, "%.2f TFLOP/s", flopsd/1000000000000);
else if(flopsd >= 1000000000.0)
snprintf(str, max_size, "%.2f GFLOP/s", flopsd/1000000000);
else
snprintf(str, max_size, "%.2f MFLOP/s", flopsd/1000000);
return str;
}
void init_topology_struct(struct topology* topo, struct cache* cach) {
topo->total_cores = 0;
topo->cach = cach;
#if defined(ARCH_X86) || defined(ARCH_PPC)
topo->physical_cores = 0;
topo->logical_cores = 0;
topo->smt_supported = 0;
topo->sockets = 0;
#ifdef ARCH_X86
topo->smt_available = 0;
topo->apic = emalloc(sizeof(struct apic));
#endif
#endif
}
void init_cache_struct(struct cache* cach) {
cach->L1i = emalloc(sizeof(struct cach));
cach->L1d = emalloc(sizeof(struct cach));
cach->L2 = emalloc(sizeof(struct cach));
cach->L3 = emalloc(sizeof(struct cach));
cach->cach_arr = emalloc(sizeof(struct cach*) * 4);
cach->cach_arr[0] = cach->L1i;
cach->cach_arr[1] = cach->L1d;
cach->cach_arr[2] = cach->L2;
cach->cach_arr[3] = cach->L3;
cach->max_cache_level = 0;
cach->L1i->exists = false;
cach->L1d->exists = false;
cach->L2->exists = false;
cach->L3->exists = false;
}
void free_cache_struct(struct cache* cach) {
for(int i=0; i < 4; i++) free(cach->cach_arr[i]);
free(cach->cach_arr);

View File

@@ -69,14 +69,16 @@ struct cache {
struct topology {
int32_t total_cores;
struct cache* cach;
#ifdef ARCH_X86
#if defined(ARCH_X86) || defined(ARCH_PPC)
uint32_t physical_cores;
uint32_t logical_cores;
uint32_t smt_available; // Number of SMT that is currently enabled
uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled)
uint32_t sockets;
uint32_t smt_supported; // Number of SMT that CPU supports (equal to smt_available if SMT is enabled)
#ifdef ARCH_X86
uint32_t smt_available; // Number of SMT that is currently enabled
struct apic* apic;
#endif
#endif
};
struct features {
@@ -95,6 +97,8 @@ struct features {
bool FMA3;
bool FMA4;
bool SHA;
#elif ARCH_PPC
bool altivec;
#elif ARCH_ARM
bool NEON;
bool SHA1;
@@ -111,16 +115,22 @@ struct cpuInfo {
struct cache* cach;
struct topology* topo;
struct features* feat;
int64_t peak_performance;
#ifdef ARCH_X86
#if defined(ARCH_X86) || defined(ARCH_PPC)
// CPU name from model
char* cpu_name;
#endif
#ifdef ARCH_X86
// Max cpuids levels
uint32_t maxLevels;
// Max cpuids extended levels
uint32_t maxExtendedLevels;
// Topology Extensions (AMD only)
bool topology_extensions;
#elif ARCH_PPC
uint32_t pvr;
#elif ARCH_ARM
// Main ID register
uint32_t midr;
@@ -136,7 +146,7 @@ struct cpuInfo {
#endif
};
#ifdef ARCH_X86
#if defined(ARCH_X86) || defined(ARCH_PPC)
char* get_str_cpu_name(struct cpuInfo* cpu);
char* get_str_sockets(struct topology* topo);
uint32_t get_nsockets(struct topology* topo);
@@ -152,6 +162,10 @@ char* get_str_l1d(struct cache* cach);
char* get_str_l2(struct cache* cach);
char* get_str_l3(struct cache* cach);
char* get_str_freq(struct frequency* freq);
char* get_str_peak_performance(int64_t flops);
void init_topology_struct(struct topology* topo, struct cache* cach);
void init_cache_struct(struct cache* cach);
void free_cache_struct(struct cache* cach);
void free_freq_struct(struct frequency* freq);

View File

@@ -1,5 +1,9 @@
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "global.h"
#ifdef _WIN32
@@ -53,7 +57,7 @@ void printBug(const char *fmt, ...) {
vsnprintf(buffer,buffer_size, fmt, args);
va_end(args);
fprintf(stderr,RED "[ERROR]: "RESET "%s\n",buffer);
#ifdef ARCH_X86
#if defined(ARCH_X86) || defined(ARCH_PPC)
fprintf(stderr,"Please, create a new issue with this error message and the output of 'cpufetch --debug' in https://github.com/Dr-Noob/cpufetch/issues\n");
#elif ARCH_ARM
fprintf(stderr,"Please, create a new issue with this error message, your smartphone/computer model and the output of 'cpufetch --debug' in https://github.com/Dr-Noob/cpufetch/issues\n");
@@ -68,3 +72,25 @@ void set_log_level(bool verbose) {
int max(int a, int b) {
return a > b ? a : b;
}
void* emalloc(size_t size) {
void* ptr = malloc(size);
if(ptr == NULL) {
printErr("malloc failed: %s", strerror(errno));
exit(1);
}
return ptr;
}
void* ecalloc(size_t nmemb, size_t size) {
void* ptr = calloc(nmemb, size);
if(ptr == NULL) {
printErr("calloc failed: %s", strerror(errno));
exit(1);
}
return ptr;
}

View File

@@ -2,11 +2,16 @@
#define __GLOBAL__
#include <stdbool.h>
#include <stddef.h>
#define STRING_UNKNOWN "Unknown"
void set_log_level(bool verbose);
void printWarn(const char *fmt, ...);
void printErr(const char *fmt, ...);
void printBug(const char *fmt, ...);
int max(int a, int b);
void* emalloc(size_t size);
void* ecalloc(size_t nmemb, size_t size);
#endif

View File

@@ -9,6 +9,9 @@
#ifdef ARCH_X86
static const char* ARCH_STR = "x86_64 build";
#include "../x86/cpuid.h"
#elif ARCH_PPC
static const char* ARCH_STR = "PowerPC build";
#include "../ppc/ppc.h"
#elif ARCH_ARM
static const char* ARCH_STR = "ARM build";
#include "../arm/midr.h"
@@ -20,6 +23,8 @@
#else
static const char* OS_STR = "Linux";
#endif
#elif __FreeBSD__
static const char* OS_STR = "FreeBSD";
#elif _WIN32
static const char* OS_STR = "Windows";
#elif defined __APPLE__ || __MACH__
@@ -28,7 +33,7 @@
static const char* OS_STR = "Unknown OS";
#endif
static const char* VERSION = "0.98";
static const char* VERSION = "0.99";
void print_help(char *argv[]) {
const char **t = args_str;
@@ -56,8 +61,9 @@ void print_help(char *argv[]) {
printf("\nCOLORS: \n");
printf(" * \"intel\": Use Intel default color scheme \n");
printf(" * \"amd\": Use AMD default color scheme \n");
printf(" * \"ibm\", Use IBM default color scheme \n");
printf(" * \"arm\": Use ARM default color scheme \n");
printf(" * custom: If color argument do not match \"intel\", \"amd\" or \"arm\", a custom scheme can be specified.\n");
printf(" * custom: If color argument do not match \"intel\", \"amd\", \"ibm\" or \"arm\", a custom scheme can be specified.\n");
printf(" 4 colors must be given in RGB with the format: R,G,B:R,G,B:...\n");
printf(" The first 2 colors are the CPU art color and the next 2 colors are the text colors\n");

View File

@@ -11,6 +11,9 @@
#ifdef ARCH_X86
#include "../x86/uarch.h"
#include "../x86/cpuid.h"
#elif ARCH_PPC
#include "../ppc/uarch.h"
#include "../ppc/ppc.h"
#else
#include "../arm/uarch.h"
#include "../arm/midr.h"
@@ -45,7 +48,7 @@
#define COLOR_RESET "\x1b[m"
enum {
#ifdef ARCH_X86
#if defined(ARCH_X86) || defined(ARCH_PPC)
ATTRIBUTE_NAME,
#elif ARCH_ARM
ATTRIBUTE_SOC,
@@ -61,6 +64,8 @@ enum {
#ifdef ARCH_X86
ATTRIBUTE_AVX,
ATTRIBUTE_FMA,
#elif ARCH_PPC
ATTRIBUTE_ALTIVEC,
#elif ARCH_ARM
ATTRIBUTE_FEATURES,
#endif
@@ -72,9 +77,9 @@ enum {
};
static const char* ATTRIBUTE_FIELDS [] = {
#ifdef ARCH_X86
#if defined(ARCH_X86) || defined(ARCH_PPC)
"Name:",
#elif ARCH_ARM
#elif ARCH_ARM
"SoC:",
"",
#endif
@@ -88,7 +93,9 @@ static const char* ATTRIBUTE_FIELDS [] = {
#ifdef ARCH_X86
"AVX:",
"FMA:",
#elif ARCH_ARM
#elif ARCH_PPC
"Altivec: ",
#elif defined(ARCH_ARM)
"Features: ",
#endif
"L1i Size:",
@@ -129,7 +136,7 @@ void setAttribute(struct ascii* art, int type, char* value) {
}
char* rgb_to_ansi(struct color* c, bool background, bool bold) {
char* str = malloc(sizeof(char) * 100);
char* str = emalloc(sizeof(char) * 100);
if(background) {
snprintf(str, 44, "\x1b[48;2;%.3d;%.3d;%.3dm", c->R, c->G, c->B);
}
@@ -143,15 +150,15 @@ char* rgb_to_ansi(struct color* c, bool background, bool bold) {
return str;
}
struct ascii* set_ascii(VENDOR vendor, STYLE style, struct colors* cs) {
struct ascii* set_ascii(VENDOR vendor, STYLE style, struct color** cs) {
char *COL_FANCY_1, *COL_FANCY_2, *COL_FANCY_3, *COL_FANCY_4, *COL_RETRO_1, *COL_RETRO_2, *COL_RETRO_3, *COL_RETRO_4;
struct ascii* art = malloc(sizeof(struct ascii));
struct ascii* art = emalloc(sizeof(struct ascii));
art->n_attributes_set = 0;
art->additional_spaces = 0;
art->vendor = vendor;
art->attributes = malloc(sizeof(struct attribute *) * MAX_ATTRIBUTES);
art->attributes = emalloc(sizeof(struct attribute *) * MAX_ATTRIBUTES);
for(uint32_t i=0; i < MAX_ATTRIBUTES; i++) {
art->attributes[i] = malloc(sizeof(struct attribute));
art->attributes[i] = emalloc(sizeof(struct attribute));
art->attributes[i]->type = 0;
art->attributes[i]->value = NULL;
}
@@ -176,6 +183,12 @@ struct ascii* set_ascii(VENDOR vendor, STYLE style, struct colors* cs) {
printBug("Invalid CPU vendor in set_ascii (%d)", art->vendor);
return NULL;
}
#elif ARCH_PPC
COL_FANCY_1 = COLOR_BG_CYAN;
COL_FANCY_2 = COLOR_BG_WHITE;
COL_FANCY_3 = COLOR_FG_CYAN;
COL_FANCY_4 = COLOR_FG_WHITE;
art->ascii_chars[0] = '#';
#elif ARCH_ARM
if(art->vendor == SOC_VENDOR_SNAPDRAGON) {
COL_FANCY_1 = COLOR_BG_RED;
@@ -265,10 +278,10 @@ struct ascii* set_ascii(VENDOR vendor, STYLE style, struct colors* cs) {
break;
case STYLE_FANCY:
if(cs != NULL) {
COL_FANCY_1 = rgb_to_ansi(cs->c1, true, true);
COL_FANCY_2 = rgb_to_ansi(cs->c2, true, true);
COL_FANCY_3 = rgb_to_ansi(cs->c3, false, true);
COL_FANCY_4 = rgb_to_ansi(cs->c4, false, true);
COL_FANCY_1 = rgb_to_ansi(cs[0], true, true);
COL_FANCY_2 = rgb_to_ansi(cs[1], true, true);
COL_FANCY_3 = rgb_to_ansi(cs[2], false, true);
COL_FANCY_4 = rgb_to_ansi(cs[3], false, true);
}
art->ascii_chars[0] = ' ';
art->ascii_chars[1] = ' ';
@@ -285,10 +298,10 @@ struct ascii* set_ascii(VENDOR vendor, STYLE style, struct colors* cs) {
break;
case STYLE_RETRO:
if(cs != NULL) {
COL_RETRO_1 = rgb_to_ansi(cs->c1, false, true);
COL_RETRO_2 = rgb_to_ansi(cs->c2, false, true);
COL_RETRO_3 = rgb_to_ansi(cs->c3, false, true);
COL_RETRO_4 = rgb_to_ansi(cs->c4, false, true);
COL_RETRO_1 = rgb_to_ansi(cs[0], false, true);
COL_RETRO_2 = rgb_to_ansi(cs[1], false, true);
COL_RETRO_3 = rgb_to_ansi(cs[2], false, true);
COL_RETRO_4 = rgb_to_ansi(cs[3], false, true);
}
strcpy(art->color1_ascii,COL_RETRO_1);
strcpy(art->color2_ascii,COL_RETRO_2);
@@ -315,6 +328,8 @@ struct ascii* set_ascii(VENDOR vendor, STYLE style, struct colors* cs) {
strcpy(tmp, AMD_ASCII);
else
strcpy(tmp, UNKNOWN_ASCII);
#elif ARCH_PPC
strcpy(tmp, IBM_ASCII);
#elif ARCH_ARM
if(art->vendor == SOC_VENDOR_SNAPDRAGON)
strcpy(tmp, SNAPDRAGON_ASCII);
@@ -426,7 +441,7 @@ void print_ascii(struct ascii* art) {
}
bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs) {
struct ascii* art = set_ascii(get_cpu_vendor(cpu), s, cs);
if(art == NULL)
return false;
@@ -441,12 +456,11 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
char* avx = get_str_avx(cpu);
char* fma = get_str_fma(cpu);
char* l1i = get_str_l1i(cpu->cach);
char* l1d = get_str_l1d(cpu->cach);
char* l2 = get_str_l2(cpu->cach);
char* l3 = get_str_l3(cpu->cach);
char* pp = get_str_peak_performance(cpu,cpu->topo,get_freq(cpu->freq));
char* pp = get_str_peak_performance(cpu->peak_performance);
setAttribute(art,ATTRIBUTE_NAME,cpu_name);
if(cpu->hv->present) {
@@ -507,6 +521,102 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
}
#endif
#ifdef ARCH_PPC
void print_algorithm_ppc(struct ascii* art, int n) {
for(int i=0; i < LINE_SIZE; i++) {
if(art->art[n][i] == '#')
printf("%s%c%s", art->color1_ascii, art->ascii_chars[0], art->reset);
else
printf("%c",art->art[n][i]);
}
}
void print_ascii_ppc(struct ascii* art, uint32_t la) {
int attr_to_print = 0;
int attr_type;
char* attr_value;
uint32_t space_right;
uint32_t space_up = (NUMBER_OF_LINES - art->n_attributes_set)/2;
uint32_t space_down = NUMBER_OF_LINES - art->n_attributes_set - space_up;
printf("\n");
for(uint32_t n=0;n<NUMBER_OF_LINES;n++) {
print_algorithm_ppc(art, n);
if(n > space_up-1 && n < NUMBER_OF_LINES-space_down) {
attr_type = art->attributes[attr_to_print]->type;
attr_value = art->attributes[attr_to_print]->value;
attr_to_print++;
space_right = 1 + (la - strlen(ATTRIBUTE_FIELDS[attr_type]));
printf("%s%s%s%*s%s%s%s\n", art->color1_text, ATTRIBUTE_FIELDS[attr_type], art->reset, space_right, "", art->color2_text, attr_value, art->reset);
}
else printf("\n");
}
printf("\n");
}
void print_ascii(struct ascii* art) {
uint32_t longest_attribute = longest_attribute_length(art);
print_ascii_ppc(art, longest_attribute);
}
bool print_cpufetch_ppc(struct cpuInfo* cpu, STYLE s, struct color** cs) {
struct ascii* art = set_ascii(get_cpu_vendor(cpu), s, cs);
if(art == NULL)
return false;
char* uarch = get_str_uarch(cpu);
char* manufacturing_process = get_str_process(cpu);
char* sockets = get_str_sockets(cpu->topo);
char* max_frequency = get_str_freq(cpu->freq);
char* cpu_name = get_str_cpu_name(cpu);
char* n_cores = get_str_topology(cpu->topo, false);
char* n_cores_dual = get_str_topology(cpu->topo, true);
char* altivec = get_str_altivec(cpu);
char* l1i = get_str_l1i(cpu->cach);
char* l1d = get_str_l1d(cpu->cach);
char* l2 = get_str_l2(cpu->cach);
char* l3 = get_str_l3(cpu->cach);
char* pp = get_str_peak_performance(cpu->peak_performance);
if(cpu_name != NULL) {
setAttribute(art,ATTRIBUTE_NAME,cpu_name);
}
setAttribute(art,ATTRIBUTE_UARCH,uarch);
setAttribute(art,ATTRIBUTE_TECHNOLOGY,manufacturing_process);
setAttribute(art,ATTRIBUTE_FREQUENCY,max_frequency);
uint32_t socket_num = get_nsockets(cpu->topo);
if (socket_num > 1) {
setAttribute(art, ATTRIBUTE_SOCKETS, sockets);
setAttribute(art, ATTRIBUTE_NCORES, n_cores);
setAttribute(art, ATTRIBUTE_NCORES_DUAL, n_cores_dual);
}
else {
setAttribute(art,ATTRIBUTE_NCORES, n_cores);
}
setAttribute(art,ATTRIBUTE_ALTIVEC, altivec);
setAttribute(art,ATTRIBUTE_L1i,l1i);
setAttribute(art,ATTRIBUTE_L1d,l1d);
setAttribute(art,ATTRIBUTE_L2,l2);
if(l3 != NULL) {
setAttribute(art,ATTRIBUTE_L3,l3);
}
setAttribute(art,ATTRIBUTE_PEAK,pp);
if(art->n_attributes_set > NUMBER_OF_LINES) {
printBug("The number of attributes set is bigger than the max that can be displayed");
return false;
}
print_ascii(art);
return true;
}
#endif
#ifdef ARCH_ARM
void print_algorithm_snapd_mtk(struct ascii* art, int n) {
for(int i=0; i < LINE_SIZE; i++) {
@@ -617,7 +727,7 @@ void print_ascii(struct ascii* art) {
}
bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct color** cs) {
struct ascii* art = set_ascii(get_soc_vendor(cpu->soc), s, cs);
if(art == NULL)
return false;
@@ -665,7 +775,7 @@ bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
* in the future
*/
char* cpu_num = malloc(sizeof(char) * 9);
char* cpu_num = emalloc(sizeof(char) * 9);
sprintf(cpu_num, "CPU %d:", i+1);
setAttribute(art, ATTRIBUTE_CPU_NUM, cpu_num);
setAttribute(art, ATTRIBUTE_UARCH, uarch);
@@ -676,7 +786,7 @@ bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
}
}
}
char* pp = get_str_peak_performance(cpu);
char* pp = get_str_peak_performance(cpu->peak_performance);
setAttribute(art,ATTRIBUTE_PEAK,pp);
if(art->n_attributes_set > NUMBER_OF_LINES) {
@@ -702,7 +812,7 @@ bool print_cpufetch_arm(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
}
#endif
bool print_cpufetch(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
bool print_cpufetch(struct cpuInfo* cpu, STYLE s, struct color** cs) {
// Sanity check of ASCII arts
int len = sizeof(ASCII_ARRAY) / sizeof(ASCII_ARRAY[0]);
for(int i=0; i < len; i++) {
@@ -715,6 +825,8 @@ bool print_cpufetch(struct cpuInfo* cpu, STYLE s, struct colors* cs) {
#ifdef ARCH_X86
return print_cpufetch_x86(cpu, s, cs);
#elif ARCH_PPC
return print_cpufetch_ppc(cpu, s, cs);
#elif ARCH_ARM
return print_cpufetch_arm(cpu, s, cs);
#endif

View File

@@ -7,18 +7,21 @@ typedef int STYLE;
#ifdef ARCH_X86
#include "../x86/cpuid.h"
#else
#elif ARCH_PPC
#include "../ppc/ppc.h"
#elif ARCH_ARM
#include "../arm/midr.h"
#endif
#define COLOR_DEFAULT_INTEL "15,125,194:230,230,230:40,150,220:230,230,230"
#define COLOR_DEFAULT_AMD "250,250,250:0,154,102:250,250,250:0,154,102"
#define COLOR_DEFAULT_IBM "92,119,172:92,119,172:240,240,240:92,119,172"
#define COLOR_DEFAULT_ARM "0,145,189:0,145,189:240,240,240:0,145,189"
#ifdef ARCH_X86
void print_levels(struct cpuInfo* cpu);
#endif
bool print_cpufetch(struct cpuInfo* cpu, STYLE s, struct colors* cs);
bool print_cpufetch(struct cpuInfo* cpu, STYLE s, struct color** cs);
#endif

View File

@@ -13,7 +13,7 @@ char* read_file(char* path, int* len) {
int bytes_read = 0;
int offset = 0;
int block = 128;
char* buf = malloc(sizeof(char)*DEFAULT_FILE_SIZE);
char* buf = emalloc(sizeof(char)*DEFAULT_FILE_SIZE);
memset(buf, 0, sizeof(char)*DEFAULT_FILE_SIZE);
while ( (bytes_read = read(fd, buf+offset, block)) > 0 ) {
@@ -28,15 +28,11 @@ char* read_file(char* path, int* len) {
return buf;
}
long get_freq_from_file(char* path, bool hv_present) {
long get_freq_from_file(char* path) {
int filelen;
char* buf;
if((buf = read_file(path, &filelen)) == NULL) {
if(hv_present)
printWarn("Could not open '%s' (HV is present)", path);
else
printWarn("Could not open '%s'", path);
return UNKNOWN_FREQ;
}
@@ -44,8 +40,7 @@ long get_freq_from_file(char* path, bool hv_present) {
errno = 0;
long ret = strtol(buf, &end, 10);
if(errno != 0) {
perror("strtol");
printBug("Failed parsing '%s' file. Read data was: '%s'", path, buf);
printBug("strtol: %s", strerror(errno));
free(buf);
return UNKNOWN_FREQ;
}
@@ -63,14 +58,139 @@ long get_freq_from_file(char* path, bool hv_present) {
return ret/1000;
}
long get_max_freq_from_file(uint32_t core, bool hv_present) {
char path[_PATH_FREQUENCY_MAX_LEN];
sprintf(path, "%s%s/cpu%d%s%s", _PATH_SYS_SYSTEM, _PATH_SYS_CPU, core, _PATH_FREQUENCY, _PATH_FREQUENCY_MAX);
return get_freq_from_file(path, hv_present);
long get_cache_size_from_file(char* path) {
int filelen;
char* buf;
if((buf = read_file(path, &filelen)) == NULL) {
printWarn("Could not open '%s'", path);
return -1;
}
buf[filelen] = '\0'; // remove the K at the end
char* end;
errno = 0;
long ret = strtol(buf, &end, 10);
if(errno != 0) {
printBug("strtol: %s", strerror(errno));
free(buf);
return -1;
}
free(buf);
return ret * 1024;
}
long get_min_freq_from_file(uint32_t core, bool hv_present) {
long get_max_freq_from_file(uint32_t core) {
char path[_PATH_FREQUENCY_MAX_LEN];
sprintf(path, "%s%s/cpu%d%s%s", _PATH_SYS_SYSTEM, _PATH_SYS_CPU, core, _PATH_FREQUENCY, _PATH_FREQUENCY_MAX);
return get_freq_from_file(path);
}
long get_min_freq_from_file(uint32_t core) {
char path[_PATH_FREQUENCY_MAX_LEN];
sprintf(path, "%s%s/cpu%d%s%s", _PATH_SYS_SYSTEM, _PATH_SYS_CPU, core, _PATH_FREQUENCY, _PATH_FREQUENCY_MIN);
return get_freq_from_file(path, hv_present);
return get_freq_from_file(path);
}
long get_l1i_cache_size(uint32_t core) {
char path[_PATH_CACHE_MAX_LEN];
sprintf(path, "%s%s/cpu%d%s%s", _PATH_SYS_SYSTEM, _PATH_SYS_CPU, core, _PATH_CACHE_L1I, _PATH_CACHE_SIZE);
return get_cache_size_from_file(path);
}
long get_l1d_cache_size(uint32_t core) {
char path[_PATH_CACHE_MAX_LEN];
sprintf(path, "%s%s/cpu%d%s%s", _PATH_SYS_SYSTEM, _PATH_SYS_CPU, core, _PATH_CACHE_L1D, _PATH_CACHE_SIZE);
return get_cache_size_from_file(path);
}
long get_l2_cache_size(uint32_t core) {
char path[_PATH_CACHE_MAX_LEN];
sprintf(path, "%s%s/cpu%d%s%s", _PATH_SYS_SYSTEM, _PATH_SYS_CPU, core, _PATH_CACHE_L2, _PATH_CACHE_SIZE);
return get_cache_size_from_file(path);
}
long get_l3_cache_size(uint32_t core) {
char path[_PATH_CACHE_MAX_LEN];
sprintf(path, "%s%s/cpu%d%s%s", _PATH_SYS_SYSTEM, _PATH_SYS_CPU, core, _PATH_CACHE_L3, _PATH_CACHE_SIZE);
return get_cache_size_from_file(path);
}
int get_num_caches_from_files(char** paths, int num_paths) {
int SHARED_MAP_MAX_LEN = 8 + 1;
int filelen;
char* buf;
uint32_t* shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
// 1. Read cpu_shared_map from every core
for(int i=0; i < num_paths; i++) {
if((buf = read_file(paths[i], &filelen)) == NULL) {
printWarn("Could not open '%s'", paths[i]);
return -1;
}
if(filelen > SHARED_MAP_MAX_LEN) {
printBug("Shared map length is %d while the max is be %d", filelen, SHARED_MAP_MAX_LEN);
return -1;
}
char* end;
errno = 0;
long ret = strtol(buf, &end, 16);
if(errno != 0) {
printBug("strtol: %s", strerror(errno));
free(buf);
return -1;
}
shared_maps[i] = (uint32_t) ret;
}
// 2. Count number of different masks; this is the number of caches
int num_caches = 0;
bool found = false;
uint32_t* unique_shared_maps = emalloc(sizeof(uint32_t *) * num_paths);
for(int i=0; i < num_paths; i++) unique_shared_maps[i] = 0;
for(int i=0; i < num_paths; i++) {
for(int j=0; j < num_paths && !found; j++) {
if(shared_maps[i] == unique_shared_maps[j]) found = true;
}
if(!found) {
unique_shared_maps[num_caches] = shared_maps[i];
num_caches++;
}
found = false;
}
return num_caches;
}
int get_num_caches_by_level(struct cpuInfo* cpu, uint32_t level) {
char** paths = emalloc(sizeof(char *) * cpu->topo->total_cores);
char* cache_path = NULL;
if(level == 0) cache_path = _PATH_CACHE_L1I;
else if(level == 1) cache_path = _PATH_CACHE_L1D;
else if(level == 2) cache_path = _PATH_CACHE_L2;
else if(level == 3) cache_path = _PATH_CACHE_L3;
else {
printBug("Found invalid cache level to inspect: %d\n", level);
return -1;
}
for(int i=0; i < cpu->topo->total_cores; i++) {
paths[i] = emalloc(sizeof(char) * _PATH_CACHE_MAX_LEN);
sprintf(paths[i], "%s%s/cpu%d%s%s", _PATH_SYS_SYSTEM, _PATH_SYS_CPU, i, cache_path, _PATH_CACHE_SHARED_MAP);
}
int ret = get_num_caches_from_files(paths, cpu->topo->total_cores);
for(int i=0; i < cpu->topo->total_cores; i++)
free(paths[i]);
free(paths);
return ret;
}

View File

@@ -17,12 +17,24 @@
#define _PATH_FREQUENCY "/cpufreq"
#define _PATH_FREQUENCY_MAX "/cpuinfo_max_freq"
#define _PATH_FREQUENCY_MIN "/cpuinfo_min_freq"
#define _PATH_CACHE_L1D "/cache/index0"
#define _PATH_CACHE_L1I "/cache/index1"
#define _PATH_CACHE_L2 "/cache/index2"
#define _PATH_CACHE_L3 "/cache/index3"
#define _PATH_CACHE_SIZE "/size"
#define _PATH_CACHE_SHARED_MAP "/shared_cpu_map"
#define _PATH_FREQUENCY_MAX_LEN 100
#define _PATH_CACHE_MAX_LEN 200
#define DEFAULT_FILE_SIZE 4096
char* read_file(char* path, int* len);
long get_max_freq_from_file(uint32_t core, bool hv_present);
long get_min_freq_from_file(uint32_t core, bool hv_present);
long get_max_freq_from_file(uint32_t core);
long get_min_freq_from_file(uint32_t core);
long get_l1i_cache_size(uint32_t core);
long get_l1d_cache_size(uint32_t core);
long get_l2_cache_size(uint32_t core);
long get_l3_cache_size(uint32_t core);
int get_num_caches_by_level(struct cpuInfo* cpu, uint32_t level);
#endif

223
src/ppc/ppc.c Normal file
View File

@@ -0,0 +1,223 @@
#include <stdio.h>
#include <stdlib.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <assert.h>
#include "ppc.h"
#include "uarch.h"
#include "udev.h"
#include "../common/udev.h"
#include "../common/global.h"
struct cache* get_cache_info(struct cpuInfo* cpu) {
struct cache* cach = emalloc(sizeof(struct cache));
init_cache_struct(cach);
cach->L1i->size = get_l1i_cache_size(0);
cach->L1d->size = get_l1d_cache_size(0);
cach->L2->size = get_l2_cache_size(0);
cach->L3->size = get_l3_cache_size(0);
if(cach->L1i->size > 0) {
cach->L1i->exists = true;
cach->L1i->num_caches = get_num_caches_by_level(cpu, 0);
cach->max_cache_level = 1;
}
if(cach->L1d->size > 0) {
cach->L1d->exists = true;
cach->L1d->num_caches = get_num_caches_by_level(cpu, 1);
cach->max_cache_level = 2;
}
if(cach->L2->size > 0) {
cach->L2->exists = true;
cach->L2->num_caches = get_num_caches_by_level(cpu, 2);
cach->max_cache_level = 3;
}
if(cach->L3->size > 0) {
cach->L3->exists = true;
cach->L3->num_caches = get_num_caches_by_level(cpu, 3);
cach->max_cache_level = 4;
}
return cach;
}
struct topology* get_topology_info(struct cache* cach) {
struct topology* topo = emalloc(sizeof(struct topology));
init_topology_struct(topo, cach);
// 1. Total cores detection
if((topo->total_cores = sysconf(_SC_NPROCESSORS_ONLN)) == -1) {
printWarn("sysconf(_SC_NPROCESSORS_ONLN): %s", strerror(errno));
topo->total_cores = 1; // fallback
}
// To find physical cores, we use topo->total_cores and core_ids
// To find number of sockets, we use package_ids
int* core_ids = emalloc(sizeof(int) * topo->total_cores);
int* package_ids = emalloc(sizeof(int) * topo->total_cores);
if(!fill_core_ids_from_sys(core_ids, topo->total_cores)) {
printWarn("fill_core_ids_from_sys failed, output may be incomplete/invalid");
for(int i=0; i < topo->total_cores; i++) core_ids[i] = 0;
}
if(!fill_package_ids_from_sys(package_ids, topo->total_cores)) {
printWarn("fill_package_ids_from_sys failed, output may be incomplete/invalid");
for(int i=0; i < topo->total_cores; i++) package_ids[i] = 0;
}
// 2. Socket detection
int *package_ids_count = emalloc(sizeof(int) * topo->total_cores);
for(int i=0; i < topo->total_cores; i++) {
package_ids_count[i] = 0;
}
for(int i=0; i < topo->total_cores; i++) {
package_ids_count[package_ids[i]]++;
}
for(int i=0; i < topo->total_cores; i++) {
if(package_ids_count[i] != 0) {
topo->sockets++;
}
}
// 3. Physical cores detection
int *core_ids_unified = emalloc(sizeof(int) * topo->total_cores);
for(int i=0; i < topo->total_cores; i++) {
core_ids_unified[i] = -1;
}
bool found = false;
for(int i=0; i < topo->total_cores; i++) {
for(int j=0; j < topo->total_cores && !found; j++) {
if(core_ids_unified[j] == core_ids[i]) found = true;
}
if(!found) {
core_ids_unified[topo->physical_cores] = core_ids[i];
topo->physical_cores++;
}
found = false;
}
topo->physical_cores = topo->physical_cores / topo->sockets; // only count cores on one socket
topo->logical_cores = topo->total_cores / topo->sockets; // only count threads on one socket
topo->smt_supported = topo->logical_cores / topo->physical_cores;
free(core_ids);
free(package_ids);
free(package_ids_count);
free(core_ids_unified);
return topo;
}
static inline uint32_t mfpvr() {
uint32_t pvr;
asm ("mfpvr %0"
: "=r"(pvr));
return pvr;
}
struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
return get_uarch_from_pvr(cpu->pvr);
}
struct frequency* get_frequency_info() {
struct frequency* freq = emalloc(sizeof(struct frequency));
freq->max = get_max_freq_from_file(0);
freq->base = get_min_freq_from_file(0);
return freq;
}
int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
/*
* Not sure about this
* PP(SP) = N_CORES * FREQUENCY * 4(If altivec)
*/
//First check we have consistent data
if(freq == UNKNOWN_FREQ) {
return -1;
}
struct features* feat = cpu->feat;
int64_t flops = topo->physical_cores * topo->sockets * (freq * 1000000);
if(feat->altivec) flops = flops * 4;
// POWER9 has the concept called "slices". Each SMT4 core has two super-slices,
// and each super-slice is capable of doing two FLOPS per cycle. In the case of
// SMT8, it has 4 super-slices, thus four FLOPS per cycle.
if(is_power9(cpu->arch)) {
int threads_per_core = topo->logical_cores / topo->physical_cores;
flops = flops * (threads_per_core / 2);
}
return flops;
}
struct cpuInfo* get_cpu_info() {
struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
struct features* feat = emalloc(sizeof(struct features));
cpu->feat = feat;
bool *ptr = &(feat->AES);
for(uint32_t i = 0; i < sizeof(struct features)/sizeof(bool); i++, ptr++) {
*ptr = false;
}
int len;
char* path = emalloc(sizeof(char) * (strlen(_PATH_DT) + strlen(_PATH_DT_PART) + 1));
sprintf(path, "%s%s", _PATH_DT, _PATH_DT_PART);
cpu->cpu_name = read_file(path, &len);
cpu->pvr = mfpvr();
cpu->arch = get_cpu_uarch(cpu);
cpu->freq = get_frequency_info();
cpu->topo = get_topology_info(cpu->cach);
cpu->cach = get_cache_info(cpu);
feat->altivec = has_altivec(cpu->arch);
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq));
if(cpu->cach == NULL || cpu->topo == NULL) {
return NULL;
}
return cpu;
}
char* get_str_altivec(struct cpuInfo* cpu) {
char* string = ecalloc(4, sizeof(char));
if(cpu->feat->altivec) strcpy(string, "Yes");
else strcpy(string, "No");
return string;
}
char* get_str_topology(struct topology* topo, bool dual_socket) {
char* string;
if(topo->smt_supported > 1) {
uint32_t size = 3+3+17+1;
string = emalloc(sizeof(char)*size);
if(dual_socket)
snprintf(string, size, "%d cores (%d threads)", topo->physical_cores * topo->sockets, topo->logical_cores * topo->sockets);
else
snprintf(string, size, "%d cores (%d threads)",topo->physical_cores,topo->logical_cores);
}
else {
uint32_t size = 3+7+1;
string = emalloc(sizeof(char)*size);
if(dual_socket)
snprintf(string, size, "%d cores",topo->physical_cores * topo->sockets);
else
snprintf(string, size, "%d cores",topo->physical_cores);
}
return string;
}
void print_debug(struct cpuInfo* cpu) {
printf("PVR: 0x%.8X\n", cpu->pvr);
}

11
src/ppc/ppc.h Normal file
View File

@@ -0,0 +1,11 @@
#ifndef __POWERPC__
#define __POWERPC__
#include "../common/cpu.h"
struct cpuInfo* get_cpu_info();
char* get_str_altivec(struct cpuInfo* cpu);
char* get_str_topology(struct topology* topo, bool dual_socket);
void print_debug(struct cpuInfo* cpu);
#endif

28
src/ppc/pvr_kern_to_cpufetch.sh Executable file
View File

@@ -0,0 +1,28 @@
#!/bin/bash
# This script takes as input cputable.c from linux kernel
# and generates a valid output for cpufetch in src/ppc/uarch.c
CPUTABLE_PATH="linux-5.13.7/arch/powerpc/kernel/cputable.c"
raw_values=$(grep '\.pvr_value' "$CPUTABLE_PATH" | grep -oP "= .*," | cut -d' ' -f2 | tr -d ',')
raw_masks=$(grep '\.pvr_mask' "$CPUTABLE_PATH" | grep -oE "0x........")
raw_v_len=$(echo "$raw_values" | wc -l)
raw_m_len=$(echo "$raw_masks" | wc -l)
if [ $raw_v_len -ne $raw_m_len ]
then
echo "Lengths do not match!"
echo "values length: $raw_v_len"
echo "masks length: $raw_m_len"
exit 1
fi
IFS=$'\n' read -r -d ' ' -a values <<< "$raw_values"
IFS=$'\n' read -r -d ' ' -a masks <<< "$raw_masks"
for i in "${!values[@]}"
do
echo ' CHECK_UARCH(arch, pvr, '"${masks[i]}"', '"${values[i]}"', "POWERX", UARCH_POWERX, -1)'
done

286
src/ppc/uarch.c Normal file
View File

@@ -0,0 +1,286 @@
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/auxv.h>
#include <errno.h>
#include "uarch.h"
#include "../common/global.h"
typedef uint32_t MICROARCH;
// Data not available
#define NA -1
// Unknown manufacturing process
#define UNK -1
enum {
UARCH_UNKNOWN,
UARCH_PPC604,
UARCH_PPCG3,
UARCH_PPCG4,
UARCH_PPC405,
UARCH_PPC603,
UARCH_PPC440,
UARCH_PPC470,
UARCH_PPC970,
UARCH_PPC970FX,
UARCH_PPC970MP,
UARCH_CELLBE,
UARCH_POWER5,
UARCH_POWER5PLUS,
UARCH_POWER6,
UARCH_POWER7,
UARCH_POWER7PLUS,
UARCH_POWER8,
UARCH_POWER9,
UARCH_POWER9_DD20,
UARCH_POWER9_DD21,
UARCH_POWER9_DD22,
UARCH_POWER9_DD23,
UARCH_POWER10,
};
struct uarch {
MICROARCH uarch;
char* uarch_str;
int32_t process; // measured in nanometers
};
#define UARCH_START if (false) {}
#define CHECK_UARCH(arch, cpu_pvr, pvr_mask, pvr_value, uarch) \
else if ((cpu_pvr & pvr_mask) == pvr_value) fill_uarch(arch, uarch);
#define UARCH_END else { printBug("Unknown microarchitecture detected: 0x%.8X", pvr); fill_uarch(arch, UARCH_UNKNOWN); }
#define FILL_START if (false) {}
#define FILL_UARCH(u, uarch, uarch_str, uarch_process) \
else if(u == uarch) { fill = true; str = uarch_str; process = uarch_process; }
#define FILL_END else { printBug("Found invalid microarchitecture: %d", u); }
void fill_uarch(struct uarch* arch, MICROARCH u) {
arch->uarch = u;
char* str = NULL;
int32_t process = UNK;
bool fill = false;
FILL_START
FILL_UARCH(arch->uarch, UARCH_UNKNOWN, STRING_UNKNOWN, UNK)
FILL_UARCH(arch->uarch, UARCH_PPC604, "PowerPC 604", 500)
FILL_UARCH(arch->uarch, UARCH_PPCG3, "PowerPC G3", UNK) // varies
FILL_UARCH(arch->uarch, UARCH_PPCG4, "PowerPC G4", UNK) // varies
FILL_UARCH(arch->uarch, UARCH_PPC405, "PowerPC 405", UNK)
FILL_UARCH(arch->uarch, UARCH_PPC603, "PowerPC 603", UNK) // varies
FILL_UARCH(arch->uarch, UARCH_PPC440, "PowerPC 440", UNK)
FILL_UARCH(arch->uarch, UARCH_PPC470, "PowerPC 470", 45) // strange...
FILL_UARCH(arch->uarch, UARCH_PPC970, "PowerPC 970", 130)
FILL_UARCH(arch->uarch, UARCH_PPC970FX, "PowerPC 970FX", 90)
FILL_UARCH(arch->uarch, UARCH_PPC970MP, "PowerPC 970MP", 90)
FILL_UARCH(arch->uarch, UARCH_CELLBE, "Cell BE", UNK) // varies depending on manufacturer
FILL_UARCH(arch->uarch, UARCH_POWER5, "POWER5", 130)
FILL_UARCH(arch->uarch, UARCH_POWER5PLUS, "POWER5+", 90)
FILL_UARCH(arch->uarch, UARCH_POWER6, "POWER6", 65)
FILL_UARCH(arch->uarch, UARCH_POWER7, "POWER7", 45)
FILL_UARCH(arch->uarch, UARCH_POWER7PLUS, "POWER7+", 32)
FILL_UARCH(arch->uarch, UARCH_POWER8, "POWER8", 22)
FILL_UARCH(arch->uarch, UARCH_POWER9, "POWER9", 14)
FILL_UARCH(arch->uarch, UARCH_POWER9_DD20, "POWER9 (DD2.0)", 14)
FILL_UARCH(arch->uarch, UARCH_POWER9_DD21, "POWER9 (DD2.1)", 14)
FILL_UARCH(arch->uarch, UARCH_POWER9_DD22, "POWER9 (DD2.2)", 14)
FILL_UARCH(arch->uarch, UARCH_POWER9_DD23, "POWER9 (DD2.3)", 14)
FILL_UARCH(arch->uarch, UARCH_POWER10, "POWER10", 7)
FILL_END
if(fill) {
arch->uarch_str = emalloc(sizeof(char) * (strlen(str)+1));
strcpy(arch->uarch_str, str);
arch->process= process;
}
}
/*
* PVR masks/values from arch/powerpc/kernel/cputable.c (Linux kernel)
* This list may be incorrect, incomplete or overly simplified,
* specially in the case of 32 bit entries
*/
struct uarch* get_uarch_from_pvr(uint32_t pvr) {
struct uarch* arch = emalloc(sizeof(struct uarch));
UARCH_START
// 64 bit
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00390000, UARCH_PPC970)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x003c0000, UARCH_PPC970FX)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x00440100, UARCH_PPC970MP)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00440000, UARCH_PPC970MP)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x003a0000, UARCH_POWER5)
CHECK_UARCH(arch, pvr, 0xffffff00, 0x003b0300, UARCH_POWER5PLUS)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x003b0000, UARCH_POWER5)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x0f000001, UARCH_POWER5)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x003e0000, UARCH_POWER6)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x0f000002, UARCH_POWER6)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x0f000003, UARCH_POWER7)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x0f000004, UARCH_POWER8)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x0f000005, UARCH_POWER9)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x0f000006, UARCH_POWER10)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x003f0000, UARCH_POWER7)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x004A0000, UARCH_POWER7PLUS)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x004b0000, UARCH_POWER8)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x004c0000, UARCH_POWER8)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x004d0000, UARCH_POWER8)
CHECK_UARCH(arch, pvr, 0xffffefff, 0x004e0200, UARCH_POWER9_DD20)
CHECK_UARCH(arch, pvr, 0xffffefff, 0x004e0201, UARCH_POWER9_DD21)
CHECK_UARCH(arch, pvr, 0xffffefff, 0x004e0202, UARCH_POWER9_DD22)
CHECK_UARCH(arch, pvr, 0xffffefff, 0x004e0203, UARCH_POWER9_DD23)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00800000, UARCH_POWER10)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00700000, UARCH_CELLBE)
// 32 bit
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00040000, UARCH_PPC604)
CHECK_UARCH(arch, pvr, 0xfffff000, 0x00090000, UARCH_PPC604)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00090000, UARCH_PPC604)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x000a0000, UARCH_PPC604)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x00084202, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xfffffff0, 0x00080100, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xfffffff0, 0x00082200, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xfffffff0, 0x00082210, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x00083214, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xfffff0e0, 0x00087000, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xfffff000, 0x00083000, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xffffff00, 0x70000100, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x70000200, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x70000000, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x70020000, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00080000, UARCH_PPCG3)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x000c1101, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x000c0000, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x800c0000, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x80000200, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x80000201, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x80000000, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffffff00, 0x80010100, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x80010200, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x80010000, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x80020100, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x80020101, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x80020000, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x80030000, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x80040000, UARCH_PPCG4)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00030000, UARCH_PPC603)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00060000, UARCH_PPC603)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00070000, UARCH_PPC603)
CHECK_UARCH(arch, pvr, 0x7fff0000, 0x00810000, UARCH_PPC603)
CHECK_UARCH(arch, pvr, 0x7fff0000, 0x00820000, UARCH_PPC603)
CHECK_UARCH(arch, pvr, 0x7fff0000, 0x00830000, UARCH_PPC603)
CHECK_UARCH(arch, pvr, 0x7fff0000, 0x00840000, UARCH_PPC603)
CHECK_UARCH(arch, pvr, 0x7fff0000, 0x00850000, UARCH_PPC603)
CHECK_UARCH(arch, pvr, 0x7fff0000, 0x00860000, UARCH_PPC603)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x41810000, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x41610000, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x40B10000, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x41410000, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x50910000, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x51510000, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x41F10000, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x51210000, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff000f, 0x12910007, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff000f, 0x1291000d, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff000f, 0x1291000f, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff000f, 0x12910003, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff000f, 0x12910005, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff000f, 0x12910001, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff000f, 0x12910009, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff000f, 0x1291000b, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff000f, 0x12910000, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff000f, 0x12910002, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x41510000, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x7ff11432, UARCH_PPC405)
CHECK_UARCH(arch, pvr, 0xf0000fff, 0x40000850, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000fff, 0x40000858, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000fff, 0x400008d3, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000ff7, 0x400008d4, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000fff, 0x400008db, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000ffb, 0x200008D0, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000ffb, 0x200008D8, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000fff, 0x40000440, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000fff, 0x40000481, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000fff, 0x50000850, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000fff, 0x50000851, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000fff, 0x50000892, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xf0000fff, 0x50000894, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xfff00fff, 0x53200891, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xfff00fff, 0x53400890, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xfff00fff, 0x53400891, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xffff0006, 0x13020002, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xffff0007, 0x13020004, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xffff0006, 0x13020000, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xffff0007, 0x13020005, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xffffff00, 0x13541800, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xfffffff0, 0x12C41C80, UARCH_PPC440)
CHECK_UARCH(arch, pvr, 0xffffffff, 0x11a52080, UARCH_PPC470)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x7ff50000, UARCH_PPC470)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x00050000, UARCH_PPC470)
CHECK_UARCH(arch, pvr, 0xffff0000, 0x11a50000, UARCH_PPC470)
UARCH_END
return arch;
}
bool has_altivec(struct uarch* arch) {
switch(arch->uarch) {
case UARCH_PPC970FX:
case UARCH_PPC970MP:
case UARCH_CELLBE:
case UARCH_POWER6:
case UARCH_POWER7:
case UARCH_POWER7PLUS:
case UARCH_POWER8:
case UARCH_POWER9:
case UARCH_POWER9_DD20:
case UARCH_POWER9_DD21:
case UARCH_POWER9_DD22:
case UARCH_POWER9_DD23:
case UARCH_POWER10:
return true;
default:
return false;
}
}
bool is_power9(struct uarch* arch) {
return arch->uarch == UARCH_POWER9 ||
arch->uarch == UARCH_POWER9_DD20 ||
arch->uarch == UARCH_POWER9_DD21 ||
arch->uarch == UARCH_POWER9_DD22 ||
arch->uarch == UARCH_POWER9_DD23;
}
char* get_str_uarch(struct cpuInfo* cpu) {
return cpu->arch->uarch_str;
}
char* get_str_process(struct cpuInfo* cpu) {
char* str = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
int32_t process = cpu->arch->process;
if(process == UNK) {
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
}
else if(process > 100) {
sprintf(str, "%.2fum", (double)process/100);
}
else if(process > 0){
sprintf(str, "%dnm", process);
}
else {
snprintf(str, strlen(STRING_UNKNOWN)+1, STRING_UNKNOWN);
printBug("Found invalid process: '%d'", process);
}
return str;
}
void free_uarch_struct(struct uarch* arch) {
free(arch->uarch_str);
free(arch);
}

16
src/ppc/uarch.h Normal file
View File

@@ -0,0 +1,16 @@
#ifndef __UARCH__
#define __UARCH__
#include <stdint.h>
#include "ppc.h"
struct uarch;
struct uarch* get_uarch_from_pvr(uint32_t pvr);
bool has_altivec(struct uarch* arch);
bool is_power9(struct uarch* arch);
char* get_str_uarch(struct cpuInfo* cpu);
char* get_str_process(struct cpuInfo* cpu);
void free_uarch_struct(struct uarch* arch);
#endif

40
src/ppc/udev.c Normal file
View File

@@ -0,0 +1,40 @@
#include <errno.h>
#include "../common/global.h"
#include "udev.h"
#define _PATH_TOPO_CORE_ID "topology/core_id"
#define _PATH_TOPO_PACKAGE_ID "topology/physical_package_id"
bool fill_array_from_sys(int *core_ids, int total_cores, char* SYS_PATH) {
int filelen;
char* buf;
char* end;
char path[128];
for(int i=0; i < total_cores; i++) {
sprintf(path, "%s%s/cpu%d/%s", _PATH_SYS_SYSTEM, _PATH_SYS_CPU, i, SYS_PATH);
if((buf = read_file(path, &filelen)) == NULL) {
printWarn("fill_array_from_sys: %s: %s", path, strerror(errno));
return false;
}
errno = 0;
core_ids[i] = strtol(buf, &end, 10);
if(errno != 0) {
printWarn("fill_array_from_sys: %s:", strerror(errno));
return false;
}
free(buf);
}
return true;
}
bool fill_core_ids_from_sys(int *core_ids, int total_cores) {
return fill_array_from_sys(core_ids, total_cores, _PATH_TOPO_CORE_ID);
}
bool fill_package_ids_from_sys(int* package_ids, int total_cores) {
return fill_array_from_sys(package_ids, total_cores, _PATH_TOPO_PACKAGE_ID);
}

11
src/ppc/udev.h Normal file
View File

@@ -0,0 +1,11 @@
#ifndef __UDEV_PPC__
#define __UDEV_PPC__
#include "../common/udev.h"
#define _PATH_DT "/proc/device-tree/vpd/root-node-vpd@a000/enclosure@1e00/backplane@800/processor@1000"
#define _PATH_DT_PART "/part-number"
bool fill_core_ids_from_sys(int *core_ids, int total_cores);
bool fill_package_ids_from_sys(int* package_ids, int total_cores);
#endif

View File

@@ -4,6 +4,9 @@
#elif defined __linux__
#define _GNU_SOURCE
#include <sched.h>
#elif defined __FreeBSD__
#include <sys/param.h>
#include <sys/cpuset.h>
#elif defined __APPLE__
#define UNUSED(x) (void)(x)
#endif
@@ -13,6 +16,7 @@
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include "apic.h"
#include "cpuid_asm.h"
@@ -24,7 +28,7 @@
*/
unsigned char bit_scan_reverse(uint32_t* index, uint64_t mask) {
for(uint64_t i = (8 * sizeof(uint64_t)); i > 0; i--) {
if((mask & (1LL << (i-1))) != 0) {
if((mask & (1ULL << (i-1))) != 0) {
*index = (uint64_t) (i-1);
break;
}
@@ -76,12 +80,21 @@ bool bind_to_cpu(int cpu_id) {
HANDLE process = GetCurrentProcess();
DWORD_PTR processAffinityMask = 1 << cpu_id;
return SetProcessAffinityMask(process, processAffinityMask);
#else
#elif defined __linux__
cpu_set_t currentCPU;
CPU_ZERO(&currentCPU);
CPU_SET(cpu_id, &currentCPU);
if (sched_setaffinity (0, sizeof(currentCPU), &currentCPU) == -1) {
perror("sched_setaffinity");
printWarn("sched_setaffinity: %s", strerror(errno));
return false;
}
return true;
#elif defined __FreeBSD__
cpuset_t currentCPU;
CPU_ZERO(&currentCPU);
CPU_SET(cpu_id, &currentCPU);
if(cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(cpuset_t), &currentCPU) == -1) {
printWarn("cpuset_setaffinity: %s", strerror(errno));
return false;
}
return true;
@@ -196,9 +209,9 @@ uint32_t max_apic_id_size(uint32_t** cache_id_apic, struct topology* topo) {
bool build_topo_from_apic(uint32_t* apic_pkg, uint32_t* apic_smt, uint32_t** cache_id_apic, struct topology* topo) {
uint32_t size = max_apic_id_size(cache_id_apic, topo);
uint32_t* sockets = malloc(sizeof(uint32_t) * size);
uint32_t* smt = malloc(sizeof(uint32_t) * size);
uint32_t* apic_id = malloc(sizeof(uint32_t) * size);
uint32_t* sockets = emalloc(sizeof(uint32_t) * size);
uint32_t* smt = emalloc(sizeof(uint32_t) * size);
uint32_t* apic_id = emalloc(sizeof(uint32_t) * size);
uint32_t num_caches = 0;
memset(sockets, 0, sizeof(uint32_t) * size);
@@ -313,12 +326,12 @@ bool fill_apic_ids(uint32_t* apic_ids, int n, bool x2apic_id) {
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
uint32_t apic_id;
uint32_t* apic_ids = malloc(sizeof(uint32_t) * topo->total_cores);
uint32_t* apic_pkg = malloc(sizeof(uint32_t) * topo->total_cores);
uint32_t* apic_core = malloc(sizeof(uint32_t) * topo->total_cores);
uint32_t* apic_smt = malloc(sizeof(uint32_t) * topo->total_cores);
uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * topo->total_cores);
uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * topo->total_cores);
uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores);
uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores);
uint32_t* apic_core = emalloc(sizeof(uint32_t) * topo->total_cores);
uint32_t* apic_smt = emalloc(sizeof(uint32_t) * topo->total_cores);
uint32_t** cache_smt_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores);
uint32_t** cache_id_apic = emalloc(sizeof(uint32_t*) * topo->total_cores);
bool x2apic_id;
if(cpu->maxLevels >= 0x0000000B) {
@@ -337,11 +350,11 @@ bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
}
for(int i=0; i < topo->total_cores; i++) {
cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
cache_id_apic[i] = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
cache_smt_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
cache_id_apic[i] = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
}
topo->apic->cache_select_mask = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
topo->apic->cache_id_apic = malloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
topo->apic->cache_select_mask = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
topo->apic->cache_id_apic = emalloc(sizeof(uint32_t) * (topo->cach->max_cache_level));
if(x2apic_id) {
if(!fill_topo_masks_x2apic(topo))

View File

@@ -37,11 +37,9 @@ static char *hv_vendors_name[] = {
[HV_VENDOR_VMWARE] = "VMware",
[HV_VENDOR_XEN] = "Xen",
[HV_VENDOR_PARALLELS] = "Parallels",
[HV_VENDOR_INVALID] = "Unknown"
[HV_VENDOR_INVALID] = STRING_UNKNOWN
};
#define STRING_UNKNOWN "Unknown"
#define HYPERVISOR_NAME_MAX_LENGTH 17
#define MASK 0xFF
@@ -51,36 +49,6 @@ static char *hv_vendors_name[] = {
* cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf
*/
void init_topology_struct(struct topology* topo, struct cache* cach) {
topo->total_cores = 0;
topo->physical_cores = 0;
topo->logical_cores = 0;
topo->smt_available = 0;
topo->smt_supported = 0;
topo->sockets = 0;
topo->apic = malloc(sizeof(struct apic));
topo->cach = cach;
}
void init_cache_struct(struct cache* cach) {
cach->L1i = malloc(sizeof(struct cach));
cach->L1d = malloc(sizeof(struct cach));
cach->L2 = malloc(sizeof(struct cach));
cach->L3 = malloc(sizeof(struct cach));
cach->cach_arr = malloc(sizeof(struct cach*) * 4);
cach->cach_arr[0] = cach->L1i;
cach->cach_arr[1] = cach->L1d;
cach->cach_arr[2] = cach->L2;
cach->cach_arr[3] = cach->L3;
cach->max_cache_level = 0;
cach->L1i->exists = false;
cach->L1d->exists = false;
cach->L2->exists = false;
cach->L3->exists = false;
}
void get_name_cpuid(char* name, uint32_t reg1, uint32_t reg2, uint32_t reg3) {
uint32_t c = 0;
@@ -107,7 +75,7 @@ char* get_str_cpu_name_internal() {
uint32_t edx = 0;
uint32_t c = 0;
char * name = malloc(sizeof(char) * CPU_NAME_MAX_LENGTH);
char * name = emalloc(sizeof(char) * CPU_NAME_MAX_LENGTH);
memset(name, 0, CPU_NAME_MAX_LENGTH);
for(int i=0; i < 3; i++) {
@@ -165,8 +133,51 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
return get_uarch_from_cpuid(cpu, efamily, family, emodel, model, (int)stepping);
}
int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
/*
* PP = PeakPerformance
* SP = SinglePrecision
*
* PP(SP) =
* N_CORES *
* FREQUENCY *
* 2(Two vector units) *
* 2(If cpu has fma) *
* 16(If AVX512), 8(If AVX), 4(If SSE) *
*/
//First, check we have consistent data
if(freq == UNKNOWN_FREQ) {
return -1;
}
struct features* feat = cpu->feat;
int vpus = get_number_of_vpus(cpu);
int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
if(feat->FMA3 || feat->FMA4)
flops = flops*2;
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
// the peak performance supposing AVX2, not AVX512
if(feat->AVX512 && vpus_are_AVX512(cpu))
flops = flops*16;
else if(feat->AVX || feat->AVX2)
flops = flops*8;
else if(feat->SSE)
flops = flops*4;
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
if(is_knights_landing(cpu))
flops = flops * 6 / 7;
return flops;
}
struct hypervisor* get_hp_info(bool hv_present) {
struct hypervisor* hv = malloc(sizeof(struct hypervisor));
struct hypervisor* hv = emalloc(sizeof(struct hypervisor));
if(!hv_present) {
hv->present = false;
return hv;
@@ -206,8 +217,8 @@ struct hypervisor* get_hp_info(bool hv_present) {
}
struct cpuInfo* get_cpu_info() {
struct cpuInfo* cpu = malloc(sizeof(struct cpuInfo));
struct features* feat = malloc(sizeof(struct features));
struct cpuInfo* cpu = emalloc(sizeof(struct cpuInfo));
struct features* feat = emalloc(sizeof(struct features));
cpu->feat = feat;
bool *ptr = &(feat->AES);
@@ -251,20 +262,20 @@ struct cpuInfo* get_cpu_info() {
if (cpu->maxLevels >= 0x00000001){
eax = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
feat->SSE = (edx & ((int)1 << 25)) != 0;
feat->SSE2 = (edx & ((int)1 << 26)) != 0;
feat->SSE3 = (ecx & ((int)1 << 0)) != 0;
feat->SSE = (edx & (1U << 25)) != 0;
feat->SSE2 = (edx & (1U << 26)) != 0;
feat->SSE3 = (ecx & (1U << 0)) != 0;
feat->SSSE3 = (ecx & ((int)1 << 9)) != 0;
feat->SSE4_1 = (ecx & ((int)1 << 19)) != 0;
feat->SSE4_2 = (ecx & ((int)1 << 20)) != 0;
feat->SSSE3 = (ecx & (1U << 9)) != 0;
feat->SSE4_1 = (ecx & (1U << 19)) != 0;
feat->SSE4_2 = (ecx & (1U << 20)) != 0;
feat->AES = (ecx & ((int)1 << 25)) != 0;
feat->AES = (ecx & (1U << 25)) != 0;
feat->AVX = (ecx & ((int)1 << 28)) != 0;
feat->FMA3 = (ecx & ((int)1 << 12)) != 0;
feat->AVX = (ecx & (1U << 28)) != 0;
feat->FMA3 = (ecx & (1U << 12)) != 0;
bool hv_present = (ecx & ((int)1 << 31)) != 0;
bool hv_present = (ecx & (1U << 31)) != 0;
if((cpu->hv = get_hp_info(hv_present)) == NULL)
return NULL;
}
@@ -276,16 +287,16 @@ struct cpuInfo* get_cpu_info() {
eax = 0x00000007;
ecx = 0x00000000;
cpuid(&eax, &ebx, &ecx, &edx);
feat->AVX2 = (ebx & ((int)1 << 5)) != 0;
feat->SHA = (ebx & ((int)1 << 29)) != 0;
feat->AVX512 = (((ebx & ((int)1 << 16)) != 0) ||
((ebx & ((int)1 << 28)) != 0) ||
((ebx & ((int)1 << 26)) != 0) ||
((ebx & ((int)1 << 27)) != 0) ||
((ebx & ((int)1 << 31)) != 0) ||
((ebx & ((int)1 << 30)) != 0) ||
((ebx & ((int)1 << 17)) != 0) ||
((ebx & ((int)1 << 21)) != 0));
feat->AVX2 = (ebx & (1U << 5)) != 0;
feat->SHA = (ebx & (1U << 29)) != 0;
feat->AVX512 = (((ebx & (1U << 16)) != 0) ||
((ebx & (1U << 28)) != 0) ||
((ebx & (1U << 26)) != 0) ||
((ebx & (1U << 27)) != 0) ||
((ebx & (1U << 31)) != 0) ||
((ebx & (1U << 30)) != 0) ||
((ebx & (1U << 17)) != 0) ||
((ebx & (1U << 21)) != 0));
}
else {
printWarn("Can't read features information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000007, cpu->maxLevels);
@@ -294,8 +305,8 @@ struct cpuInfo* get_cpu_info() {
if (cpu->maxExtendedLevels >= 0x80000001){
eax = 0x80000001;
cpuid(&eax, &ebx, &ecx, &edx);
feat->SSE4a = (ecx & ((int)1 << 6)) != 0;
feat->FMA4 = (ecx & ((int)1 << 16)) != 0;
feat->SSE4a = (ecx & (1U << 6)) != 0;
feat->FMA4 = (ecx & (1U << 16)) != 0;
}
else {
printWarn("Can't read features information from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000001, cpu->maxExtendedLevels);
@@ -305,8 +316,8 @@ struct cpuInfo* get_cpu_info() {
cpu->cpu_name = get_str_cpu_name_internal();
}
else {
cpu->cpu_name = malloc(sizeof(char)*8);
sprintf(cpu->cpu_name,"Unknown");
cpu->cpu_name = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN) + 1));
strcpy(cpu->cpu_name, STRING_UNKNOWN);
printWarn("Can't read cpu name from cpuid (needed extended level is 0x%.8X, max is 0x%.8X)", 0x80000004, cpu->maxExtendedLevels);
}
@@ -321,6 +332,7 @@ struct cpuInfo* get_cpu_info() {
cpu->freq = get_frequency_info(cpu);
cpu->cach = get_cache_info(cpu);
cpu->topo = get_topology_info(cpu, cpu->cach);
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq));
if(cpu->cach == NULL || cpu->topo == NULL) {
return NULL;
@@ -405,7 +417,7 @@ bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
// Main reference: https://software.intel.com/content/www/us/en/develop/articles/intel-64-architecture-processor-topology-enumeration.html
// Very interesting resource: https://wiki.osdev.org/Detecting_CPU_Topology_(80x86)
struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
struct topology* topo = malloc(sizeof(struct topology));
struct topology* topo = emalloc(sizeof(struct topology));
init_topology_struct(topo, cach);
uint32_t eax = 0;
@@ -423,7 +435,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
topo->total_cores = info.dwNumberOfProcessors;
#else
if((topo->total_cores = sysconf(_SC_NPROCESSORS_ONLN)) == -1) {
perror("sysconf");
printWarn("sysconf(_SC_NPROCESSORS_ONLN): %s", strerror(errno));
topo->total_cores = topo->logical_cores; // fallback
}
#endif
@@ -598,7 +610,7 @@ struct cache* get_cache_info_general(struct cache* cach, uint32_t level) {
}
struct cache* get_cache_info(struct cpuInfo* cpu) {
struct cache* cach = malloc(sizeof(struct cache));
struct cache* cach = emalloc(sizeof(struct cache));
init_cache_struct(cach);
uint32_t level;
@@ -637,7 +649,7 @@ struct cache* get_cache_info(struct cpuInfo* cpu) {
}
struct frequency* get_frequency_info(struct cpuInfo* cpu) {
struct frequency* freq = malloc(sizeof(struct frequency));
struct frequency* freq = emalloc(sizeof(struct frequency));
if(cpu->maxLevels < 0x00000016) {
#if defined (_WIN32) || defined (__APPLE__)
@@ -647,7 +659,7 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
#else
printWarn("Can't read frequency information from cpuid (needed level is 0x%.8X, max is 0x%.8X). Using udev", 0x00000016, cpu->maxLevels);
freq->base = UNKNOWN_FREQ;
freq->max = get_max_freq_from_file(0, cpu->hv->present);
freq->max = get_max_freq_from_file(0);
if(freq->max == 0) {
printWarn("Read max CPU frequency from udev and got 0 MHz");
@@ -671,122 +683,55 @@ struct frequency* get_frequency_info(struct cpuInfo* cpu) {
freq->base = UNKNOWN_FREQ;
}
if(freq->max == 0) {
printWarn("Read max CPU frequency from CPUID and got 0 MHz. Using udev");
freq->max = get_max_freq_from_file(0, cpu->hv->present);
printWarn("Read max CPU frequency from CPUID and got 0 MHz");
#ifdef __linux__
printWarn("Using udev to detect frequency");
freq->max = get_max_freq_from_file(0);
if(freq->max == 0) {
printWarn("Read max CPU frequency from udev and got 0 MHz");
freq->max = UNKNOWN_FREQ;
}
#else
freq->max = UNKNOWN_FREQ;
#endif
}
}
return freq;
}
/*** STRING FUNCTIONS ***/
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq) {
/***
PP = PeakPerformance
SP = SinglePrecision
PP(SP) =
N_CORES *
FREQUENCY *
2(Two vector units) *
2(If cpu has fma) *
16(If AVX512), 8(If AVX), 4(If SSE) *
***/
//7 for GFLOP/s and 6 for digits,eg 412.14
uint32_t size = 7+6+1+1;
assert(strlen(STRING_UNKNOWN)+1 <= size);
char* string = malloc(sizeof(char)*size);
//First check we have consistent data
if(freq == UNKNOWN_FREQ) {
snprintf(string,strlen(STRING_UNKNOWN)+1,STRING_UNKNOWN);
return string;
}
struct features* feat = cpu->feat;
double flops = topo->physical_cores * topo->sockets * (freq*1000000);
int vpus = get_number_of_vpus(cpu);
flops = flops * vpus;
if(feat->FMA3 || feat->FMA4)
flops = flops*2;
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
// the peak performance supposing AVX2, not AVX512
if(feat->AVX512 && vpus_are_AVX512(cpu))
flops = flops*16;
else if(feat->AVX || feat->AVX2)
flops = flops*8;
else if(feat->SSE)
flops = flops*4;
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
if(is_knights_landing(cpu))
flops = flops * 6 / 7;
if(flops >= (double)1000000000000.0)
snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000);
else if(flops >= 1000000000.0)
snprintf(string,size,"%.2f GFLOP/s",flops/1000000000);
else
snprintf(string,size,"%.2f MFLOP/s",flops/1000000);
return string;
}
// TODO: Refactoring
// STRING FUNCTIONS
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket) {
int topo_sockets = dual_socket ? topo->sockets : 1;
char* string;
if(topo->smt_supported > 1) {
//3 for digits, 21 for ' cores (SMT disabled)' which is the longest possible output
uint32_t size = 3+21+1;
string = malloc(sizeof(char)*size);
if(dual_socket) {
// 4 for digits, 21 for ' cores (SMT disabled)' which is the longest possible output
uint32_t max_size = 4+21+1;
string = emalloc(sizeof(char) * max_size);
if(topo->smt_available > 1)
snprintf(string, size, "%d cores (%d threads)",topo->physical_cores * topo->sockets, topo->logical_cores * topo->sockets);
snprintf(string, max_size, "%d cores (%d threads)", topo->physical_cores * topo_sockets, topo->logical_cores * topo_sockets);
else {
if(cpu->cpu_vendor == CPU_VENDOR_AMD)
snprintf(string, size, "%d cores (SMT disabled)",topo->physical_cores * topo->sockets);
snprintf(string, max_size, "%d cores (SMT disabled)", topo->physical_cores * topo_sockets);
else
snprintf(string, size, "%d cores (HT disabled)",topo->physical_cores * topo->sockets);
snprintf(string, max_size, "%d cores (HT disabled)", topo->physical_cores * topo_sockets);
}
}
else {
if(topo->smt_available > 1)
snprintf(string, size, "%d cores (%d threads)",topo->physical_cores,topo->logical_cores);
else {
if(cpu->cpu_vendor == CPU_VENDOR_AMD)
snprintf(string, size, "%d cores (SMT disabled)",topo->physical_cores);
else
snprintf(string, size, "%d cores (HT disabled)",topo->physical_cores);
}
}
}
else {
uint32_t size = 3+7+1;
string = malloc(sizeof(char)*size);
if(dual_socket)
snprintf(string, size, "%d cores",topo->physical_cores * topo->sockets);
else
snprintf(string, size, "%d cores",topo->physical_cores);
uint32_t max_size = 4+7+1;
string = emalloc(sizeof(char) * max_size);
snprintf(string, max_size, "%d cores",topo->physical_cores * topo_sockets);
}
return string;
}
char* get_str_avx(struct cpuInfo* cpu) {
//If all AVX are available, it will use up to 15
char* string = malloc(sizeof(char)*17+1);
char* string = emalloc(sizeof(char)*17+1);
if(!cpu->feat->AVX)
snprintf(string,2+1,"No");
else if(!cpu->feat->AVX2)
@@ -808,7 +753,7 @@ char* get_str_sse(struct cpuInfo* cpu) {
uint32_t SSE4a_sl = 6;
uint32_t SSE4_1_sl = 7;
uint32_t SSE4_2_sl = 7;
char* string = malloc(sizeof(char)*SSE_sl+SSE2_sl+SSE3_sl+SSSE3_sl+SSE4a_sl+SSE4_1_sl+SSE4_2_sl+1);
char* string = emalloc(sizeof(char)*SSE_sl+SSE2_sl+SSE3_sl+SSSE3_sl+SSE4a_sl+SSE4_1_sl+SSE4_2_sl+1);
if(cpu->feat->SSE) {
snprintf(string+last,SSE_sl+1,"SSE,");
@@ -845,7 +790,7 @@ char* get_str_sse(struct cpuInfo* cpu) {
}
char* get_str_fma(struct cpuInfo* cpu) {
char* string = malloc(sizeof(char)*9+1);
char* string = emalloc(sizeof(char)*9+1);
if(!cpu->feat->FMA3)
snprintf(string,2+1,"No");
else if(!cpu->feat->FMA4)

View File

@@ -12,7 +12,6 @@ char* get_str_avx(struct cpuInfo* cpu);
char* get_str_sse(struct cpuInfo* cpu);
char* get_str_fma(struct cpuInfo* cpu);
char* get_str_topology(struct cpuInfo* cpu, struct topology* topo, bool dual_socket);
char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t freq);
void print_debug(struct cpuInfo* cpu);
void print_raw(struct cpuInfo* cpu);

View File

@@ -38,8 +38,6 @@
typedef uint32_t MICROARCH;
#define STRING_UNKNOWN "Unknown"
// Data not available
#define NA -1
@@ -121,10 +119,10 @@ struct uarch {
#define UARCH_START if (false) {}
#define CHECK_UARCH(arch, ef_, f_, em_, m_, s_, str, uarch, process) \
else if (ef_ == ef && f_ == f && (em_ == NA || em_ == em) && (m_ == NA || m_ == m) && (s_ == NA || s_ == s)) fill_uarch(arch, str, uarch, process);
#define UARCH_END else { printBug("Unknown microarchitecture detected: M=0x%.8X EM=0x%.8X F=0x%.8X EF=0x%.8X S=0x%.8X", m, em, f, ef, s); fill_uarch(arch, "Unknown", UARCH_UNKNOWN, 0); }
#define UARCH_END else { printBug("Unknown microarchitecture detected: M=0x%.8X EM=0x%.8X F=0x%.8X EF=0x%.8X S=0x%.8X", m, em, f, ef, s); fill_uarch(arch, STRING_UNKNOWN, UARCH_UNKNOWN, 0); }
void fill_uarch(struct uarch* arch, char* str, MICROARCH u, uint32_t process) {
arch->uarch_str = malloc(sizeof(char) * (strlen(str)+1));
arch->uarch_str = emalloc(sizeof(char) * (strlen(str)+1));
strcpy(arch->uarch_str, str);
arch->uarch = u;
arch->process= process;
@@ -132,7 +130,7 @@ void fill_uarch(struct uarch* arch, char* str, MICROARCH u, uint32_t process) {
// Inspired in Todd Allen's decode_uarch_intel
struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) {
struct uarch* arch = malloc(sizeof(struct uarch));
struct uarch* arch = emalloc(sizeof(struct uarch));
// EF: Extended Family //
// F: Family //
@@ -255,7 +253,7 @@ struct uarch* get_uarch_from_cpuid_intel(uint32_t ef, uint32_t f, uint32_t em, u
// iNApired in Todd Allen's decode_uarch_amd
struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uint32_t m, int s) {
struct uarch* arch = malloc(sizeof(struct uarch));
struct uarch* arch = emalloc(sizeof(struct uarch));
// EF: Extended Family //
// F: Family //
@@ -347,8 +345,10 @@ struct uarch* get_uarch_from_cpuid_amd(uint32_t ef, uint32_t f, uint32_t em, uin
CHECK_UARCH(arch, 8, 15, 1, 8, NA, "Zen+", UARCH_ZEN_PLUS, 12) // found only on en.wikichip.org
CHECK_UARCH(arch, 8, 15, 3, 1, NA, "Zen 2", UARCH_ZEN2, 7) // found only on en.wikichip.org
CHECK_UARCH(arch, 8, 15, 6, 0, NA, "Zen 2", UARCH_ZEN2, 7) // undocumented, geekbench.com example
CHECK_UARCH(arch, 8, 15, 7, 1, NA, "Zen 2", UARCH_ZEN2, 7) // undocumented, but samples from Steven Noonan
CHECK_UARCH(arch, 10, 15, NA, NA, NA, "Zen 3", UARCH_ZEN3, 7) // undocumented, LX*
CHECK_UARCH(arch, 8, 15, 6, 8, NA, "Zen 2", UARCH_ZEN2, 7) // found on instlatx64
CHECK_UARCH(arch, 8, 15, 7, 1, NA, "Zen 2", UARCH_ZEN2, 7) // samples from Steven Noonan and instlatx64
CHECK_UARCH(arch, 10, 15, 2, 1, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64
CHECK_UARCH(arch, 10, 15, 5, 0, NA, "Zen 3", UARCH_ZEN3, 7) // instlatx64
UARCH_END
return arch;
@@ -404,7 +404,7 @@ char* get_str_uarch(struct cpuInfo* cpu) {
}
char* get_str_process(struct cpuInfo* cpu) {
char* str = malloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
char* str = emalloc(sizeof(char) * (strlen(STRING_UNKNOWN)+1));
int32_t process = cpu->arch->process;
if(process == UNK) {

23
src/x86/uarch_decode.sh Executable file
View File

@@ -0,0 +1,23 @@
#!/bin/bash -u
CPUID=0x00A50F00
efamily=$(((${CPUID}>>20)&0xFF))
family=$(((${CPUID}>>8)&0xF))
emodel=$(((${CPUID}>>16)&0xF))
model=$(((${CPUID}>>4)&0xF))
stepping=$((${CPUID}&0xF))
printf 'CPUID: 0x%.8X\n' $CPUID
printf -- '- EF = 0x%X (%d)\n' $efamily $efamily
printf -- '- F = 0x%X (%d)\n' $family $family
printf -- '- EM = 0x%X (%d)\n' $emodel $emodel
printf -- '- M = 0x%X (%d)\n' $model $model
printf -- '- S = 0x%X (%d)\n' $stepping $stepping
#EF=$efamily
#F=$family
#EM=$emodel
#M=$model
#S=$stepping
#grep -E "\s*CHECK_UARCH\(arch,\s*${EF},\s*${F},\s*(${EM}|NA),\s*(${M}|NA),\s*(${S}|NA)" uarch.c