mirror of
https://github.com/Dr-Noob/cpufetch.git
synced 2026-03-24 23:40:39 +01:00
[v1.04][ARM] Experimental new backend for computing peak performance, considering the number of VPUs and width (previously this was highly simplified)
This commit is contained in:
@@ -80,26 +80,21 @@ int64_t get_peak_performance(struct cpuInfo* cpu) {
|
||||
}
|
||||
}
|
||||
|
||||
int64_t flops = 0;
|
||||
int64_t total_flops = 0;
|
||||
ptr = cpu;
|
||||
|
||||
if(cpu->soc->soc_vendor == SOC_VENDOR_APPLE) {
|
||||
// Special case for M1/M2
|
||||
// First we find the E cores, then the P
|
||||
// M1 have 2 (E cores) or 4 (P cores) FMA units
|
||||
// Source: https://dougallj.github.io/applecpu/firestorm-simd.html
|
||||
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 2;
|
||||
ptr = ptr->next_cpu;
|
||||
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000) * 2 * 4 * 4;
|
||||
}
|
||||
else {
|
||||
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
|
||||
flops += ptr->topo->total_cores * (get_freq(ptr->freq) * 1000000);
|
||||
}
|
||||
if(cpu->feat->NEON) flops = flops * 4;
|
||||
int vpus = get_number_of_vpus(ptr);
|
||||
int vpus_width = get_vpus_width(ptr);
|
||||
bool has_fma = has_fma_support(ptr);
|
||||
|
||||
int64_t flops = ptr->topo->total_cores * get_freq(ptr->freq) * 1000000 * vpus * (vpus_width/32);
|
||||
if(has_fma) flops = flops * 2;
|
||||
|
||||
total_flops += flops;
|
||||
}
|
||||
|
||||
return flops;
|
||||
return total_flops;
|
||||
}
|
||||
|
||||
uint32_t fill_ids_from_midr(uint32_t* midr_array, int32_t* freq_array, uint32_t* ids_array, int len) {
|
||||
|
||||
@@ -72,6 +72,7 @@ enum {
|
||||
UARCH_CORTEX_X2,
|
||||
UARCH_NEOVERSE_N1,
|
||||
UARCH_NEOVERSE_E1,
|
||||
UARCH_NEOVERSE_V1,
|
||||
UARCH_SCORPION,
|
||||
UARCH_KRAIT,
|
||||
UARCH_KYRO,
|
||||
@@ -146,6 +147,7 @@ static const ISA isas_uarch[] = {
|
||||
[UARCH_CORTEX_X2] = ISA_ARMv9_A,
|
||||
[UARCH_NEOVERSE_N1] = ISA_ARMv8_2_A,
|
||||
[UARCH_NEOVERSE_E1] = ISA_ARMv8_2_A,
|
||||
[UARCH_NEOVERSE_V1] = ISA_ARMv8_4_A,
|
||||
[UARCH_BRAHMA_B15] = ISA_ARMv7_A, // Same as Cortex-A15
|
||||
[UARCH_BRAHMA_B53] = ISA_ARMv8_A, // Same as Cortex-A53
|
||||
[UARCH_THUNDERX] = ISA_ARMv8_A,
|
||||
@@ -254,6 +256,7 @@ struct uarch* get_uarch_from_midr(uint32_t midr, struct cpuInfo* cpu) {
|
||||
CHECK_UARCH(arch, cpu, 'A', 0xD0C, NA, NA, "Neoverse N1", UARCH_NEOVERSE_N1, CPU_VENDOR_ARM)
|
||||
CHECK_UARCH(arch, cpu, 'A', 0xD0D, NA, NA, "Cortex-A77", UARCH_CORTEX_A77, CPU_VENDOR_ARM)
|
||||
CHECK_UARCH(arch, cpu, 'A', 0xD0E, NA, NA, "Cortex-A76", UARCH_CORTEX_A76, CPU_VENDOR_ARM)
|
||||
CHECK_UARCH(arch, cpu, 'A', 0xD40, NA, NA, "Neoverse V1", UARCH_NEOVERSE_V1, CPU_VENDOR_ARM)
|
||||
CHECK_UARCH(arch, cpu, 'A', 0xD41, NA, NA, "Cortex-A78", UARCH_CORTEX_A78, CPU_VENDOR_ARM)
|
||||
CHECK_UARCH(arch, cpu, 'A', 0xD44, NA, NA, "Cortex-X1", UARCH_CORTEX_X1, CPU_VENDOR_ARM)
|
||||
CHECK_UARCH(arch, cpu, 'A', 0xD46, NA, NA, "Cortex‑A510", UARCH_CORTEX_A510, CPU_VENDOR_ARM)
|
||||
@@ -325,6 +328,96 @@ struct uarch* get_uarch_from_midr(uint32_t midr, struct cpuInfo* cpu) {
|
||||
return arch;
|
||||
}
|
||||
|
||||
bool is_ARMv8_or_newer(struct cpuInfo* cpu) {
|
||||
return cpu->arch->isa == ISA_ARMv8_A ||
|
||||
cpu->arch->isa == ISA_ARMv8_A_AArch32 ||
|
||||
cpu->arch->isa == ISA_ARMv8_1_A ||
|
||||
cpu->arch->isa == ISA_ARMv8_2_A ||
|
||||
cpu->arch->isa == ISA_ARMv8_3_A ||
|
||||
cpu->arch->isa == ISA_ARMv8_4_A ||
|
||||
cpu->arch->isa == ISA_ARMv8_5_A ||
|
||||
cpu->arch->isa == ISA_ARMv9_A;
|
||||
}
|
||||
|
||||
bool has_fma_support(struct cpuInfo* cpu) {
|
||||
// Arm A64 Instruction Set Architecture
|
||||
// https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions
|
||||
return is_ARMv8_or_newer(cpu);
|
||||
}
|
||||
|
||||
int get_vpus_width(struct cpuInfo* cpu) {
|
||||
// If the CPU has NEON, width can be 64 or 128 [1].
|
||||
// In >= ARMv8, NEON are 128 bits width [2]
|
||||
// If the CPU has SVE/SVE2, width can be between 128-2048 [3],
|
||||
// so we must check the exact width depending on
|
||||
// the exact chip (Neoverse V1 uses 256b implementations.)
|
||||
//
|
||||
// [1] https://en.wikipedia.org/wiki/ARM_architecture_family#Advanced_SIMD_(Neon)
|
||||
// [2] https://developer.arm.com/documentation/102474/0100/Fundamentals-of-Armv8-Neon-technology
|
||||
// [3] https://www.anandtech.com/show/16640/arm-announces-neoverse-v1-n2-platforms-cpus-cmn700-mesh/5
|
||||
|
||||
MICROARCH ua = cpu->arch->uarch;
|
||||
switch(ua) {
|
||||
case UARCH_NEOVERSE_V1:
|
||||
return 256;
|
||||
default:
|
||||
if(cpu->feat->NEON) {
|
||||
if(is_ARMv8_or_newer(cpu)) {
|
||||
return 128;
|
||||
}
|
||||
else {
|
||||
return 64;
|
||||
}
|
||||
}
|
||||
else {
|
||||
return 32;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int get_number_of_vpus(struct cpuInfo* cpu) {
|
||||
MICROARCH ua = cpu->arch->uarch;
|
||||
|
||||
switch(ua) {
|
||||
case UARCH_FIRESTORM: // [https://dougallj.github.io/applecpu/firestorm-simd.html]
|
||||
case UARCH_AVALANCHE: // [https://en.wikipedia.org/wiki/Comparison_of_ARM_processors]
|
||||
case UARCH_CORTEX_X1: // [https://www.anandtech.com/show/15813/arm-cortex-a78-cortex-x1-cpu-ip-diverging/3]
|
||||
case UARCH_CORTEX_X2: // [https://www.anandtech.com/show/16693/arm-announces-mobile-armv9-cpu-microarchitectures-cortexx2-cortexa710-cortexa510/2]
|
||||
case UARCH_NEOVERSE_V1: // [https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1]
|
||||
return 4;
|
||||
case UARCH_EXYNOS_M3: // [https://www.anandtech.com/show/12361/samsung-exynos-m3-architecture]
|
||||
case UARCH_EXYNOS_M4: // [https://en.wikichip.org/wiki/samsung/microarchitectures/m4#Block_Diagram]
|
||||
case UARCH_EXYNOS_M5: // [https://en.wikichip.org/wiki/samsung/microarchitectures/m5]
|
||||
return 3;
|
||||
case UARCH_ICESTORM: // [https://dougallj.github.io/applecpu/icestorm-simd.html]
|
||||
case UARCH_BLIZZARD: // [https://en.wikipedia.org/wiki/Comparison_of_ARM_processors]
|
||||
case UARCH_CORTEX_A57: // [https://www.anandtech.com/show/8718/the-samsung-galaxy-note-4-exynos-review/5]
|
||||
case UARCH_CORTEX_A72: // [https://www.anandtech.com/show/10347/arm-cortex-a73-artemis-unveiled/2]
|
||||
case UARCH_CORTEX_A73: // [https://www.anandtech.com/show/10347/arm-cortex-a73-artemis-unveiled/2]
|
||||
case UARCH_CORTEX_A75: // [https://www.anandtech.com/show/11441/dynamiq-and-arms-new-cpus-cortex-a75-a55/3]
|
||||
case UARCH_CORTEX_A76: // [https://www.anandtech.com/show/12785/arm-cortex-a76-cpu-unveiled-7nm-powerhouse/3]
|
||||
case UARCH_CORTEX_A77: // [https://fuse.wikichip.org/news/2339/arm-unveils-cortex-a77-emphasizes-single-thread-performance]
|
||||
case UARCH_CORTEX_A78: // [https://fuse.wikichip.org/news/3536/arm-unveils-the-cortex-a78-when-less-is-more]
|
||||
case UARCH_EXYNOS_M1: // [https://www.anandtech.com/show/12361/samsung-exynos-m3-architecture]
|
||||
case UARCH_EXYNOS_M2: // [https://www.anandtech.com/show/12361/samsung-exynos-m3-architecture]
|
||||
case UARCH_NEOVERSE_N1: // [https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_n1#Individual_Core]
|
||||
return 2;
|
||||
case UARCH_NEOVERSE_E1: // [https://www.anandtech.com/show/13959/arm-announces-neoverse-n1-platform/5]
|
||||
// A510 is integrated as part of a Complex. Normally, each complex would incorporate two Cortex-A510 cores.
|
||||
// Each complex incorporates a single VPU with 2 ports, so for each A510 there is theoretically 1 port.
|
||||
case UARCH_CORTEX_A510: // [https://en.wikichip.org/wiki/arm_holdings/microarchitectures/cortex-a510#Vector_Processing_Unit_.28VPU.29]
|
||||
// Not sure about this one since there is no explicit information saying it has one, but they never state it has more either.
|
||||
case UARCH_CORTEX_A710: // [Arm Cortex‑A710 Core Technical Reference Manual]
|
||||
return 1;
|
||||
default:
|
||||
// ARMv6
|
||||
// ARMv7
|
||||
// Remaining UARCH_CORTEX_AXX
|
||||
// Old Snapdragon (e.g., Scorpion, Krait, etc)
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
char* get_str_uarch(struct cpuInfo* cpu) {
|
||||
return cpu->arch->uarch_str;
|
||||
}
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
#include "midr.h"
|
||||
|
||||
struct uarch* get_uarch_from_midr(uint32_t midr, struct cpuInfo* cpu);
|
||||
int get_number_of_vpus(struct cpuInfo* cpu);
|
||||
int get_vpus_width(struct cpuInfo* cpu);
|
||||
bool has_fma_support(struct cpuInfo* cpu);
|
||||
char* get_str_uarch(struct cpuInfo* cpu);
|
||||
void free_uarch_struct(struct uarch* arch);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user