From 1eb1a5246e8b21eff298cfb641ba7a01e1173f20 Mon Sep 17 00:00:00 2001 From: Dr-Noob Date: Sat, 5 Nov 2022 18:17:38 +0000 Subject: [PATCH] [v1.02][x86] Extending peakperf computation to hybid cores --- src/x86/cpuid.c | 88 +++++++++++++++++++++++++++---------------------- src/x86/uarch.c | 1 + 2 files changed, 50 insertions(+), 39 deletions(-) diff --git a/src/x86/cpuid.c b/src/x86/cpuid.c index ca59789..5863454 100644 --- a/src/x86/cpuid.c +++ b/src/x86/cpuid.c @@ -179,7 +179,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) { return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping); } -int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t max_freq, bool accurate_pp) { +int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) { /* * PP = PeakPerformance * SP = SinglePrecision @@ -192,46 +192,56 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t * 16(If AVX512), 8(If AVX), 4(If SSE) * */ - int64_t freq; -#ifdef __linux__ - if(accurate_pp) - freq = measure_frequency(cpu); - else - freq = max_freq; -#else - // Silence compiler warning - (void)(accurate_pp); - freq = max_freq; -#endif + struct cpuInfo* ptr = cpu; + int64_t total_flops = 0; - //First, check we have consistent data - if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) { - return -1; + for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) { + struct topology* topo = ptr->topo; + int64_t max_freq = get_freq(ptr->freq); + + int64_t freq; + #ifdef __linux__ + if(accurate_pp) + freq = measure_frequency(ptr); + else + freq = max_freq; + #else + // Silence compiler warning + (void)(accurate_pp); + freq = max_freq; + #endif + + //First, check we have consistent data + if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) { + return -1; + } + + struct features* feat = ptr->feat; + int vpus = get_number_of_vpus(ptr); + int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus; + + if(feat->FMA3 || feat->FMA4) + flops = flops*2; + + // Ice Lake has AVX512, but it has 1 VPU for AVX512, while + // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing + // the peak performance supposing AVX2, not AVX512 + if(feat->AVX512 && vpus_are_AVX512(ptr)) + flops = flops*16; + else if(feat->AVX || feat->AVX2) + flops = flops*8; + else if(feat->SSE) + flops = flops*4; + + // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar- + // throughput-limitation-on-intels-xeon-phi-x200-knights-landing/ + if(is_knights_landing(ptr)) + flops = flops * 6 / 7; + + total_flops += flops; } - struct features* feat = cpu->feat; - int vpus = get_number_of_vpus(cpu); - int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus; - - if(feat->FMA3 || feat->FMA4) - flops = flops*2; - - // Ice Lake has AVX512, but it has 1 VPU for AVX512, while - // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing - // the peak performance supposing AVX2, not AVX512 - if(feat->AVX512 && vpus_are_AVX512(cpu)) - flops = flops*16; - else if(feat->AVX || feat->AVX2) - flops = flops*8; - else if(feat->SSE) - flops = flops*4; - - // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar- - // throughput-limitation-on-intels-xeon-phi-x200-knights-landing/ - if(is_knights_landing(cpu)) - flops = flops * 6 / 7; - - return flops; + return total_flops; } struct hypervisor* get_hp_info(bool hv_present) { @@ -498,7 +508,7 @@ struct cpuInfo* get_cpu_info() { } cpu->num_cpus = modules; - cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp()); + cpu->peak_performance = get_peak_performance(cpu, accurate_pp()); return cpu; } diff --git a/src/x86/uarch.c b/src/x86/uarch.c index 85c531a..0c25565 100644 --- a/src/x86/uarch.c +++ b/src/x86/uarch.c @@ -419,6 +419,7 @@ int get_number_of_vpus(struct cpuInfo* cpu) { case UARCH_ICE_LAKE: case UARCH_TIGER_LAKE: + case UARCH_ALDER_LAKE: // AMD case UARCH_ZEN2: