[v1.02][x86] Extending peakperf computation to hybid cores

This commit is contained in:
Dr-Noob
2022-11-05 18:17:38 +00:00
parent cec91a1e4d
commit 1eb1a5246e
2 changed files with 50 additions and 39 deletions

View File

@@ -179,7 +179,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping); return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping);
} }
int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t max_freq, bool accurate_pp) { int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
/* /*
* PP = PeakPerformance * PP = PeakPerformance
* SP = SinglePrecision * SP = SinglePrecision
@@ -192,25 +192,32 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
* 16(If AVX512), 8(If AVX), 4(If SSE) * * 16(If AVX512), 8(If AVX), 4(If SSE) *
*/ */
struct cpuInfo* ptr = cpu;
int64_t total_flops = 0;
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
struct topology* topo = ptr->topo;
int64_t max_freq = get_freq(ptr->freq);
int64_t freq; int64_t freq;
#ifdef __linux__ #ifdef __linux__
if(accurate_pp) if(accurate_pp)
freq = measure_frequency(cpu); freq = measure_frequency(ptr);
else else
freq = max_freq; freq = max_freq;
#else #else
// Silence compiler warning // Silence compiler warning
(void)(accurate_pp); (void)(accurate_pp);
freq = max_freq; freq = max_freq;
#endif #endif
//First, check we have consistent data //First, check we have consistent data
if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) { if(freq == UNKNOWN_DATA || topo->logical_cores == UNKNOWN_DATA) {
return -1; return -1;
} }
struct features* feat = cpu->feat; struct features* feat = ptr->feat;
int vpus = get_number_of_vpus(cpu); int vpus = get_number_of_vpus(ptr);
int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus; int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
if(feat->FMA3 || feat->FMA4) if(feat->FMA3 || feat->FMA4)
@@ -219,7 +226,7 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while // Ice Lake has AVX512, but it has 1 VPU for AVX512, while
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing // it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
// the peak performance supposing AVX2, not AVX512 // the peak performance supposing AVX2, not AVX512
if(feat->AVX512 && vpus_are_AVX512(cpu)) if(feat->AVX512 && vpus_are_AVX512(ptr))
flops = flops*16; flops = flops*16;
else if(feat->AVX || feat->AVX2) else if(feat->AVX || feat->AVX2)
flops = flops*8; flops = flops*8;
@@ -228,10 +235,13 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar- // See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/ // throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
if(is_knights_landing(cpu)) if(is_knights_landing(ptr))
flops = flops * 6 / 7; flops = flops * 6 / 7;
return flops; total_flops += flops;
}
return total_flops;
} }
struct hypervisor* get_hp_info(bool hv_present) { struct hypervisor* get_hp_info(bool hv_present) {
@@ -498,7 +508,7 @@ struct cpuInfo* get_cpu_info() {
} }
cpu->num_cpus = modules; cpu->num_cpus = modules;
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp()); cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
return cpu; return cpu;
} }

View File

@@ -419,6 +419,7 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
case UARCH_ICE_LAKE: case UARCH_ICE_LAKE:
case UARCH_TIGER_LAKE: case UARCH_TIGER_LAKE:
case UARCH_ALDER_LAKE:
// AMD // AMD
case UARCH_ZEN2: case UARCH_ZEN2: