Fix #23. I tried fetching the cache topology in AMD but could not find a proper way, so the code fallback to two commits ago. cpufetch has to guess cache sizes except L3, which can be fetched. Since I have been trying many approaches and stuff, the code needs to be refactored

This commit is contained in:
Dr-Noob
2020-08-30 12:12:25 +02:00
parent 69cc08759a
commit dae0f678ad
5 changed files with 96 additions and 64 deletions

View File

@@ -48,22 +48,15 @@ uint32_t create_mask(uint32_t num_entries, uint32_t *mask_width) {
return (1 << i) -1; return (1 << i) -1;
} }
uint32_t get_apic_id(bool x2apic_id) { uint32_t get_apic_id() {
uint32_t eax = 0; uint32_t eax = 0;
uint32_t ebx = 0; uint32_t ebx = 0;
uint32_t ecx = 0; uint32_t ecx = 0;
uint32_t edx = 0; uint32_t edx = 0;
if(x2apic_id) { eax = 0x00000001;
eax = 0x0000000B; cpuid(&eax, &ebx, &ecx, &edx);
cpuid(&eax, &ebx, &ecx, &edx); return (ebx >> 24);
return edx;
}
else {
eax = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
return (ebx >> 24);
}
} }
bool bind_to_cpu(int cpu_id) { bool bind_to_cpu(int cpu_id) {
@@ -172,6 +165,52 @@ bool fill_topo_masks_x2apic(struct topology** topo) {
return true; return true;
} }
uint8_t get_number_llc_amd(struct topology* topo) {
uint32_t eax = 0x8000001D;
uint32_t ebx = 0;
uint32_t ecx = 3; // LLC Level
uint32_t edx = 0;
uint32_t num_sharing_cache = 0;
cpuid(&eax, &ebx, &ecx, &edx);
num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
return topo->logical_cores / num_sharing_cache;
}
void guess_cach_sizes_amd(struct topology** topo) {
(*topo)->cach->L1i->num_caches = (*topo)->physical_cores;
(*topo)->cach->L1d->num_caches = (*topo)->physical_cores;
(*topo)->cach->L2->num_caches = (*topo)->physical_cores;
(*topo)->cach->L3->num_caches = get_number_llc_amd(*topo);
}
bool get_topology_amd(struct topology** topo) {
/*uint32_t eax = 0x8000001E;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
uint32_t err;
cpuid(&eax, &ebx, &ecx, &edx);
uint32_t node_id = ecx & 0xff;
uint32_t cpu_core_id = ebx & 0xff;
uint32_t smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
uint32_t x86_max_cores = 0;
if (smp_num_siblings > 1)
x86_max_cores /= smp_num_siblings;*/
// AMD does not support CPUID 0xB or 0x1F to query topology
// err = detect_extended_topology(cpu, topo);
guess_cach_sizes_amd(topo);
return true;
}
bool arr_contains_value(uint32_t* arr, uint32_t value, uint32_t arr_size) { bool arr_contains_value(uint32_t* arr, uint32_t value, uint32_t arr_size) {
for(uint32_t i=0; i < arr_size; i++) { for(uint32_t i=0; i < arr_size; i++) {
if(arr[i] == value) return true; if(arr[i] == value) return true;
@@ -267,14 +306,17 @@ bool get_cache_topology_from_apic(struct topology** topo) {
return true; return true;
} }
bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) { bool get_topology_from_apic(struct cpuInfo* cpu, struct topology** topo) {
if(cpu->cpu_vendor == VENDOR_AMD)
return get_topology_amd(topo);
uint32_t apic_id; uint32_t apic_id;
uint32_t* apic_pkg = malloc(sizeof(uint32_t) * (*topo)->total_cores); uint32_t* apic_pkg = malloc(sizeof(uint32_t) * (*topo)->total_cores);
uint32_t* apic_core = malloc(sizeof(uint32_t) * (*topo)->total_cores); uint32_t* apic_core = malloc(sizeof(uint32_t) * (*topo)->total_cores);
uint32_t* apic_smt = malloc(sizeof(uint32_t) * (*topo)->total_cores); uint32_t* apic_smt = malloc(sizeof(uint32_t) * (*topo)->total_cores);
uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores); uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores);
uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores); uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores);
bool x2apic_id = cpuid_max_levels >= 0x0000000B; bool x2apic_id = cpu->maxLevels >= 0x0000000B;
for(int i=0; i < (*topo)->total_cores; i++) { for(int i=0; i < (*topo)->total_cores; i++) {
cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * ((*topo)->cach->max_cache_level)); cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * ((*topo)->cach->max_cache_level));
@@ -299,7 +341,7 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {
printErr("Failed binding to CPU %d", i); printErr("Failed binding to CPU %d", i);
return false; return false;
} }
apic_id = get_apic_id(x2apic_id); apic_id = get_apic_id();
apic_pkg[i] = (apic_id & (*topo)->apic->pkg_mask) >> (*topo)->apic->pkg_mask_shift; apic_pkg[i] = (apic_id & (*topo)->apic->pkg_mask) >> (*topo)->apic->pkg_mask_shift;
apic_core[i] = (apic_id & (*topo)->apic->core_mask) >> (*topo)->apic->smt_mask_width; apic_core[i] = (apic_id & (*topo)->apic->core_mask) >> (*topo)->apic->smt_mask_width;
@@ -331,7 +373,7 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {
bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo); bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
// Assumption: If we cant get smt_available, we assume it is equal to smt_supported... // Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
if(!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available; if (!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available;
//TODO: free //TODO: free
@@ -348,8 +390,7 @@ uint32_t is_smt_enabled(struct topology* topo) {
return false; return false;
} }
id = get_apic_id(false) & 1; // get the last bit id = get_apic_id(false) & 1; // get the last bit
printf("0x%.8X %d\n", get_apic_id(false), id); if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core
//if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core
} }
return 1; return 1;

View File

@@ -14,7 +14,7 @@ struct apic {
uint32_t* cache_id_apic; uint32_t* cache_id_apic;
}; };
bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo); bool get_topology_from_apic(struct cpuInfo* cpu, struct topology** topo);
uint32_t is_smt_enabled(struct topology* topo); uint32_t is_smt_enabled(struct topology* topo);
#endif #endif

View File

@@ -37,31 +37,6 @@
* cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf * cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf
*/ */
struct cpuInfo {
bool AVX;
bool AVX2;
bool AVX512;
bool SSE;
bool SSE2;
bool SSE3;
bool SSSE3;
bool SSE4a;
bool SSE4_1;
bool SSE4_2;
bool FMA3;
bool FMA4;
bool AES;
bool SHA;
VENDOR cpu_vendor;
char* cpu_name;
// Max cpuids levels
uint32_t maxLevels;
// Max cpuids extended levels
uint32_t maxExtendedLevels;
};
struct frequency { struct frequency {
int64_t base; int64_t base;
int64_t max; int64_t max;
@@ -287,7 +262,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
switch(cpu->cpu_vendor) { switch(cpu->cpu_vendor) {
case VENDOR_INTEL: case VENDOR_INTEL:
if (cpu->maxLevels >= 0x00000004) { if (cpu->maxLevels >= 0x00000004) {
get_topology_from_apic(cpu->maxLevels, &topo); get_topology_from_apic(cpu, &topo);
} }
else { else {
printErr("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels); printErr("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
@@ -297,7 +272,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
topo->smt_supported = 1; topo->smt_supported = 1;
} }
break; break;
case VENDOR_AMD: case VENDOR_AMD:
if (cpu->maxExtendedLevels >= 0x80000008) { if (cpu->maxExtendedLevels >= 0x80000008) {
eax = 0x80000008; eax = 0x80000008;
cpuid(&eax, &ebx, &ecx, &edx); cpuid(&eax, &ebx, &ecx, &edx);
@@ -319,6 +294,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
topo->logical_cores = 1; topo->logical_cores = 1;
topo->smt_supported = 1; topo->smt_supported = 1;
} }
if (cpu->maxLevels >= 0x0000000B) { if (cpu->maxLevels >= 0x0000000B) {
topo->smt_available = is_smt_enabled(topo); topo->smt_available = is_smt_enabled(topo);
} }
@@ -331,9 +307,14 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
if(topo->smt_supported > 1) if(topo->smt_supported > 1)
topo->sockets = topo->total_cores / topo->smt_supported / topo->physical_cores; // Idea borrowed from lscpu topo->sockets = topo->total_cores / topo->smt_supported / topo->physical_cores; // Idea borrowed from lscpu
else else
topo->sockets = topo->total_cores / topo->physical_cores; topo->sockets = topo->total_cores / topo->physical_cores;
if (cpu->maxExtendedLevels >= 0x8000001D) {
get_topology_from_apic(cpu, &topo);
}
break; break;
default: default:
printBug("Cant get topology because VENDOR is empty"); printBug("Cant get topology because VENDOR is empty");
return NULL; return NULL;
@@ -342,20 +323,6 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
return topo; return topo;
} }
uint8_t get_number_llc_amd(struct topology* topo) {
uint32_t eax = 0x8000001D;
uint32_t ebx = 0;
uint32_t ecx = 3; // LLC Level
uint32_t edx = 0;
uint32_t num_sharing_cache = 0;
cpuid(&eax, &ebx, &ecx, &edx);
num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
return topo->logical_cores / num_sharing_cache;
}
struct cache* get_cache_info(struct cpuInfo* cpu) { struct cache* get_cache_info(struct cpuInfo* cpu) {
struct cache* cach = malloc(sizeof(struct cache)); struct cache* cach = malloc(sizeof(struct cache));
cach->L1i = malloc(sizeof(struct cach)); cach->L1i = malloc(sizeof(struct cach));

View File

@@ -10,9 +10,35 @@
#define UNKNOWN -1 #define UNKNOWN -1
struct cpuInfo; typedef int32_t VENDOR;
struct frequency; struct frequency;
struct cpuInfo {
bool AVX;
bool AVX2;
bool AVX512;
bool SSE;
bool SSE2;
bool SSE3;
bool SSSE3;
bool SSE4a;
bool SSE4_1;
bool SSE4_2;
bool FMA3;
bool FMA4;
bool AES;
bool SHA;
VENDOR cpu_vendor;
char* cpu_name;
// Max cpuids levels
uint32_t maxLevels;
// Max cpuids extended levels
uint32_t maxExtendedLevels;
};
struct cach { struct cach {
int32_t size; int32_t size;
uint8_t num_caches; uint8_t num_caches;
@@ -40,8 +66,6 @@ struct topology {
struct cache* cach; struct cache* cach;
}; };
typedef int32_t VENDOR;
struct cpuInfo* get_cpu_info(); struct cpuInfo* get_cpu_info();
VENDOR get_cpu_vendor(struct cpuInfo* cpu); VENDOR get_cpu_vendor(struct cpuInfo* cpu);
uint32_t get_nsockets(struct topology* topo); uint32_t get_nsockets(struct topology* topo);

View File

@@ -6,7 +6,7 @@
#include "cpuid.h" #include "cpuid.h"
#include "global.h" #include "global.h"
static const char* VERSION = "0.61"; static const char* VERSION = "0.62";
void print_help(char *argv[]) { void print_help(char *argv[]) {
printf("Usage: %s [--version] [--help] [--levels] [--style fancy|retro|legacy] [--color 'R,G,B:R,G,B:R,G,B:R,G,B']\n\ printf("Usage: %s [--version] [--help] [--levels] [--style fancy|retro|legacy] [--color 'R,G,B:R,G,B:R,G,B:R,G,B']\n\