[v1.05] Continue merging measure-freq #220

- [v1.05][X86] Show SSE if AVX/FMA is not supported
- [v1.05][X86] Do not stop if cach is NULL and check for non-NULL cache in get_topology_info functions
- [v1.05][X86] Fix bug where the number of cpus were not set if NULL was returned inside the loop. Ensure topo is not NULL in get_peak_performance. Fallback to UNKNOWN_DATA when we have no information about topology
This commit is contained in:
Dr-Noob
2024-07-05 08:37:54 +01:00
parent d4cadbd807
commit b019256515
3 changed files with 41 additions and 18 deletions

View File

@@ -61,6 +61,7 @@ enum {
ATTRIBUTE_NCORES,
ATTRIBUTE_NCORES_DUAL,
#ifdef ARCH_X86
ATTRIBUTE_SSE,
ATTRIBUTE_AVX,
ATTRIBUTE_FMA,
#elif ARCH_PPC
@@ -96,6 +97,7 @@ static const char* ATTRIBUTE_FIELDS [] = {
"Cores:",
"Cores (Total):",
#ifdef ARCH_X86
"SSE:",
"AVX:",
"FMA:",
#elif ARCH_PPC
@@ -131,6 +133,7 @@ static const char* ATTRIBUTE_FIELDS_SHORT [] = {
"Cores:",
"Cores (Total):",
#ifdef ARCH_X86
"SSE:",
"AVX:",
"FMA:",
#elif ARCH_PPC
@@ -591,6 +594,7 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
for(int i = 0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
char* max_frequency = get_str_freq(ptr->freq);
char* avx = get_str_avx(ptr);
char* sse = get_str_sse(ptr);
char* fma = get_str_fma(ptr);
char* cpu_num = emalloc(sizeof(char) * 9);
@@ -625,8 +629,17 @@ bool print_cpufetch_x86(struct cpuInfo* cpu, STYLE s, struct color** cs, struct
setAttribute(art, ATTRIBUTE_NCORES, n_cores);
}
}
setAttribute(art, ATTRIBUTE_AVX, avx);
setAttribute(art, ATTRIBUTE_FMA, fma);
// Show the most modern vector instructions.
// If AVX is supported show it, otherwise show SSE
if (strcmp(avx, "No") == 0) {
setAttribute(art, ATTRIBUTE_SSE, sse);
}
else {
setAttribute(art, ATTRIBUTE_AVX, avx);
setAttribute(art, ATTRIBUTE_FMA, fma);
}
if(l1i != NULL) setAttribute(art, ATTRIBUTE_L1i, l1i);
if(l1d != NULL) setAttribute(art, ATTRIBUTE_L1d, l1d);
if(l2 != NULL) setAttribute(art, ATTRIBUTE_L2, l2);

View File

@@ -369,6 +369,11 @@ bool fill_apic_ids(uint32_t* apic_ids, int first_core, int n, bool x2apic_id) {
}
bool get_topology_from_apic(struct cpuInfo* cpu, struct topology* topo) {
if (topo->cach == NULL) {
printWarn("get_topology_from_apic: cach is NULL");
return false;
}
uint32_t apic_id;
uint32_t* apic_ids = emalloc(sizeof(uint32_t) * topo->total_cores_module);
uint32_t* apic_pkg = emalloc(sizeof(uint32_t) * topo->total_cores_module);

View File

@@ -218,7 +218,7 @@ int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
#endif
//First, check we have consistent data
if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
if(freq == UNKNOWN_DATA || topo == NULL || topo->logical_cores == UNKNOWN_DATA) {
return -1;
}
@@ -451,7 +451,7 @@ struct cpuInfo* get_cpu_info(void) {
cpu->cach = NULL;
cpu->feat = NULL;
uint32_t modules = 1;
cpu->num_cpus = 1;
uint32_t eax = 0;
uint32_t ebx = 0;
uint32_t ecx = 0;
@@ -507,12 +507,12 @@ struct cpuInfo* get_cpu_info(void) {
cpu->hybrid_flag = (edx >> 15) & 0x1;
}
if(cpu->hybrid_flag) modules = 2;
if(cpu->hybrid_flag) cpu->num_cpus = 2;
struct cpuInfo* ptr = cpu;
for(uint32_t i=0; i < modules; i++) {
for(uint32_t i=0; i < cpu->num_cpus; i++) {
int32_t first_core;
set_cpu_module(i, modules, &first_core);
set_cpu_module(i, cpu->num_cpus, &first_core);
if(i > 0) {
ptr->next_cpu = emalloc(sizeof(struct cpuInfo));
@@ -547,11 +547,7 @@ struct cpuInfo* get_cpu_info(void) {
cpu->cpu_name = infer_cpu_name_from_uarch(cpu->arch);
}
// If any field of the struct is NULL,
// return early, as next functions
// require non NULL fields in cach and topo
ptr->cach = get_cache_info(ptr);
if(ptr->cach == NULL) return cpu;
if(cpu->hybrid_flag) {
ptr->topo = get_topology_info(ptr, ptr->cach, i);
@@ -559,16 +555,23 @@ struct cpuInfo* get_cpu_info(void) {
else {
ptr->topo = get_topology_info(ptr, ptr->cach, -1);
}
if(cpu->topo == NULL) return cpu;
// If topo is NULL, return early, as get_peak_performance
// requries non-NULL topology.
if(ptr->topo == NULL) return cpu;
}
cpu->num_cpus = modules;
cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
return cpu;
}
bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
if (topo->cach == NULL) {
printWarn("get_cache_topology_amd: cach is NULL");
return false;
}
if(cpu->maxExtendedLevels >= 0x8000001D && cpu->topology_extensions) {
uint32_t i, eax, ebx, ecx, edx, num_sharing_cache, cache_type, cache_level;
@@ -644,10 +647,12 @@ bool get_cache_topology_amd(struct cpuInfo* cpu, struct topology* topo) {
#ifdef __linux__
void get_topology_from_udev(struct topology* topo) {
// TODO: To be improved in the future
topo->total_cores = get_ncores_from_cpuinfo();
topo->logical_cores = topo->total_cores;
topo->physical_cores = topo->total_cores;
// TODO: To be improved in the future
// Conservative setting as we only know the total
// number of cores.
topo->logical_cores = UNKNOWN_DATA;
topo->physical_cores = UNKNOWN_DATA;
topo->smt_available = 1;
topo->smt_supported = 1;
topo->sockets = 1;
@@ -711,8 +716,8 @@ struct topology* get_topology_info(struct cpuInfo* cpu, struct cache* cach, int
}
else {
printWarn("Can't read topology information from cpuid (needed level is 0x%.8X, max is 0x%.8X)", 0x00000001, cpu->maxLevels);
topo->physical_cores = 1;
topo->logical_cores = 1;
topo->physical_cores = UNKNOWN_DATA;
topo->logical_cores = UNKNOWN_DATA;
topo->smt_available = 1;
topo->smt_supported = 1;
}