Fix #23. I tried fetching the cache topology in AMD but could not find a proper way, so the code fallback to two commits ago. cpufetch has to guess cache sizes except L3, which can be fetched. Since I have been trying many approaches and stuff, the code needs to be refactored

This commit is contained in:
Dr-Noob
2020-08-30 12:12:25 +02:00
parent 69cc08759a
commit dae0f678ad
5 changed files with 96 additions and 64 deletions

View File

@@ -48,22 +48,15 @@ uint32_t create_mask(uint32_t num_entries, uint32_t *mask_width) {
return (1 << i) -1;
}
uint32_t get_apic_id(bool x2apic_id) {
uint32_t get_apic_id() {
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
if(x2apic_id) {
eax = 0x0000000B;
cpuid(&eax, &ebx, &ecx, &edx);
return edx;
}
else {
eax = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
return (ebx >> 24);
}
eax = 0x00000001;
cpuid(&eax, &ebx, &ecx, &edx);
return (ebx >> 24);
}
bool bind_to_cpu(int cpu_id) {
@@ -172,6 +165,52 @@ bool fill_topo_masks_x2apic(struct topology** topo) {
return true;
}
uint8_t get_number_llc_amd(struct topology* topo) {
uint32_t eax = 0x8000001D;
uint32_t ebx = 0;
uint32_t ecx = 3; // LLC Level
uint32_t edx = 0;
uint32_t num_sharing_cache = 0;
cpuid(&eax, &ebx, &ecx, &edx);
num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
return topo->logical_cores / num_sharing_cache;
}
void guess_cach_sizes_amd(struct topology** topo) {
(*topo)->cach->L1i->num_caches = (*topo)->physical_cores;
(*topo)->cach->L1d->num_caches = (*topo)->physical_cores;
(*topo)->cach->L2->num_caches = (*topo)->physical_cores;
(*topo)->cach->L3->num_caches = get_number_llc_amd(*topo);
}
bool get_topology_amd(struct topology** topo) {
/*uint32_t eax = 0x8000001E;
uint32_t ebx = 0;
uint32_t ecx = 0;
uint32_t edx = 0;
uint32_t err;
cpuid(&eax, &ebx, &ecx, &edx);
uint32_t node_id = ecx & 0xff;
uint32_t cpu_core_id = ebx & 0xff;
uint32_t smp_num_siblings = ((ebx >> 8) & 0xff) + 1;
uint32_t x86_max_cores = 0;
if (smp_num_siblings > 1)
x86_max_cores /= smp_num_siblings;*/
// AMD does not support CPUID 0xB or 0x1F to query topology
// err = detect_extended_topology(cpu, topo);
guess_cach_sizes_amd(topo);
return true;
}
bool arr_contains_value(uint32_t* arr, uint32_t value, uint32_t arr_size) {
for(uint32_t i=0; i < arr_size; i++) {
if(arr[i] == value) return true;
@@ -267,14 +306,17 @@ bool get_cache_topology_from_apic(struct topology** topo) {
return true;
}
bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology** topo) {
if(cpu->cpu_vendor == VENDOR_AMD)
return get_topology_amd(topo);
uint32_t apic_id;
uint32_t* apic_pkg = malloc(sizeof(uint32_t) * (*topo)->total_cores);
uint32_t* apic_core = malloc(sizeof(uint32_t) * (*topo)->total_cores);
uint32_t* apic_smt = malloc(sizeof(uint32_t) * (*topo)->total_cores);
uint32_t** cache_smt_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores);
uint32_t** cache_id_apic = malloc(sizeof(uint32_t*) * (*topo)->total_cores);
bool x2apic_id = cpuid_max_levels >= 0x0000000B;
bool x2apic_id = cpu->maxLevels >= 0x0000000B;
for(int i=0; i < (*topo)->total_cores; i++) {
cache_smt_id_apic[i] = malloc(sizeof(uint32_t) * ((*topo)->cach->max_cache_level));
@@ -299,7 +341,7 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {
printErr("Failed binding to CPU %d", i);
return false;
}
apic_id = get_apic_id(x2apic_id);
apic_id = get_apic_id();
apic_pkg[i] = (apic_id & (*topo)->apic->pkg_mask) >> (*topo)->apic->pkg_mask_shift;
apic_core[i] = (apic_id & (*topo)->apic->core_mask) >> (*topo)->apic->smt_mask_width;
@@ -331,7 +373,7 @@ bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo) {
bool ret = build_topo_from_apic(apic_pkg, apic_smt, cache_id_apic, topo);
// Assumption: If we cant get smt_available, we assume it is equal to smt_supported...
if(!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available;
if (!x2apic_id) (*topo)->smt_supported = (*topo)->smt_available;
//TODO: free
@@ -348,8 +390,7 @@ uint32_t is_smt_enabled(struct topology* topo) {
return false;
}
id = get_apic_id(false) & 1; // get the last bit
printf("0x%.8X %d\n", get_apic_id(false), id);
//if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core
if(id == 1) return 2; // We assume there isn't any AMD CPU with more than 2th per core
}
return 1;

View File

@@ -14,7 +14,7 @@ struct apic {
uint32_t* cache_id_apic;
};
bool get_topology_from_apic(uint32_t cpuid_max_levels, struct topology** topo);
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology** topo);
uint32_t is_smt_enabled(struct topology* topo);
#endif

View File

@@ -37,31 +37,6 @@
* cpuid amd: https://www.amd.com/system/files/TechDocs/25481.pdf
*/
struct cpuInfo {
bool AVX;
bool AVX2;
bool AVX512;
bool SSE;
bool SSE2;
bool SSE3;
bool SSSE3;
bool SSE4a;
bool SSE4_1;
bool SSE4_2;
bool FMA3;
bool FMA4;
bool AES;
bool SHA;
VENDOR cpu_vendor;
char* cpu_name;
// Max cpuids levels
uint32_t maxLevels;
// Max cpuids extended levels
uint32_t maxExtendedLevels;
};
struct frequency {
int64_t base;
int64_t max;
@@ -287,7 +262,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
switch(cpu->cpu_vendor) {
case VENDOR_INTEL:
if (cpu->maxLevels >= 0x00000004) {
get_topology_from_apic(cpu->maxLevels, &topo);
get_topology_from_apic(cpu, &topo);
}
else {
printErr("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
@@ -297,7 +272,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
topo->smt_supported = 1;
}
break;
case VENDOR_AMD:
case VENDOR_AMD:
if (cpu->maxExtendedLevels >= 0x80000008) {
eax = 0x80000008;
cpuid(&eax, &ebx, &ecx, &edx);
@@ -319,6 +294,7 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
topo->logical_cores = 1;
topo->smt_supported = 1;
}
if (cpu->maxLevels >= 0x0000000B) {
topo->smt_available = is_smt_enabled(topo);
}
@@ -331,9 +307,14 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
if(topo->smt_supported > 1)
topo->sockets = topo->total_cores / topo->smt_supported / topo->physical_cores; // Idea borrowed from lscpu
else
topo->sockets = topo->total_cores / topo->physical_cores;
topo->sockets = topo->total_cores / topo->physical_cores;
if (cpu->maxExtendedLevels >= 0x8000001D) {
get_topology_from_apic(cpu, &topo);
}
break;
default:
printBug("Cant get topology because VENDOR is empty");
return NULL;
@@ -342,20 +323,6 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach) {
return topo;
}
uint8_t get_number_llc_amd(struct topology* topo) {
uint32_t eax = 0x8000001D;
uint32_t ebx = 0;
uint32_t ecx = 3; // LLC Level
uint32_t edx = 0;
uint32_t num_sharing_cache = 0;
cpuid(&eax, &ebx, &ecx, &edx);
num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
return topo->logical_cores / num_sharing_cache;
}
struct cache* get_cache_info(struct cpuInfo* cpu) {
struct cache* cach = malloc(sizeof(struct cache));
cach->L1i = malloc(sizeof(struct cach));

View File

@@ -10,9 +10,35 @@
#define UNKNOWN -1
struct cpuInfo;
typedef int32_t VENDOR;
struct frequency;
struct cpuInfo {
bool AVX;
bool AVX2;
bool AVX512;
bool SSE;
bool SSE2;
bool SSE3;
bool SSSE3;
bool SSE4a;
bool SSE4_1;
bool SSE4_2;
bool FMA3;
bool FMA4;
bool AES;
bool SHA;
VENDOR cpu_vendor;
char* cpu_name;
// Max cpuids levels
uint32_t maxLevels;
// Max cpuids extended levels
uint32_t maxExtendedLevels;
};
struct cach {
int32_t size;
uint8_t num_caches;
@@ -40,8 +66,6 @@ struct topology {
struct cache* cach;
};
typedef int32_t VENDOR;
struct cpuInfo* get_cpu_info();
VENDOR get_cpu_vendor(struct cpuInfo* cpu);
uint32_t get_nsockets(struct topology* topo);

View File

@@ -6,7 +6,7 @@
#include "cpuid.h"
#include "global.h"
static const char* VERSION = "0.61";
static const char* VERSION = "0.62";
void print_help(char *argv[]) {
printf("Usage: %s [--version] [--help] [--levels] [--style fancy|retro|legacy] [--color 'R,G,B:R,G,B:R,G,B:R,G,B']\n\