mirror of
https://github.com/Dr-Noob/cpufetch.git
synced 2026-03-25 16:00:39 +01:00
[v1.02][x86] Extending peakperf computation to hybid cores
This commit is contained in:
@@ -179,7 +179,7 @@ struct uarch* get_cpu_uarch(struct cpuInfo* cpu) {
|
|||||||
return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping);
|
return get_uarch_from_cpuid(cpu, eax, efamily, family, emodel, model, (int)stepping);
|
||||||
}
|
}
|
||||||
|
|
||||||
int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t max_freq, bool accurate_pp) {
|
int64_t get_peak_performance(struct cpuInfo* cpu, bool accurate_pp) {
|
||||||
/*
|
/*
|
||||||
* PP = PeakPerformance
|
* PP = PeakPerformance
|
||||||
* SP = SinglePrecision
|
* SP = SinglePrecision
|
||||||
@@ -192,10 +192,17 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
|
|||||||
* 16(If AVX512), 8(If AVX), 4(If SSE) *
|
* 16(If AVX512), 8(If AVX), 4(If SSE) *
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
struct cpuInfo* ptr = cpu;
|
||||||
|
int64_t total_flops = 0;
|
||||||
|
|
||||||
|
for(int i=0; i < cpu->num_cpus; ptr = ptr->next_cpu, i++) {
|
||||||
|
struct topology* topo = ptr->topo;
|
||||||
|
int64_t max_freq = get_freq(ptr->freq);
|
||||||
|
|
||||||
int64_t freq;
|
int64_t freq;
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
if(accurate_pp)
|
if(accurate_pp)
|
||||||
freq = measure_frequency(cpu);
|
freq = measure_frequency(ptr);
|
||||||
else
|
else
|
||||||
freq = max_freq;
|
freq = max_freq;
|
||||||
#else
|
#else
|
||||||
@@ -209,8 +216,8 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct features* feat = cpu->feat;
|
struct features* feat = ptr->feat;
|
||||||
int vpus = get_number_of_vpus(cpu);
|
int vpus = get_number_of_vpus(ptr);
|
||||||
int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
|
int64_t flops = topo->physical_cores * topo->sockets * (freq*1000000) * vpus;
|
||||||
|
|
||||||
if(feat->FMA3 || feat->FMA4)
|
if(feat->FMA3 || feat->FMA4)
|
||||||
@@ -219,7 +226,7 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
|
|||||||
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while
|
// Ice Lake has AVX512, but it has 1 VPU for AVX512, while
|
||||||
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
|
// it has 2 for AVX2. If this is a Ice Lake CPU, we are computing
|
||||||
// the peak performance supposing AVX2, not AVX512
|
// the peak performance supposing AVX2, not AVX512
|
||||||
if(feat->AVX512 && vpus_are_AVX512(cpu))
|
if(feat->AVX512 && vpus_are_AVX512(ptr))
|
||||||
flops = flops*16;
|
flops = flops*16;
|
||||||
else if(feat->AVX || feat->AVX2)
|
else if(feat->AVX || feat->AVX2)
|
||||||
flops = flops*8;
|
flops = flops*8;
|
||||||
@@ -228,10 +235,13 @@ int64_t get_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64_t
|
|||||||
|
|
||||||
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
|
// See https://sites.utexas.edu/jdm4372/2018/01/22/a-peculiar-
|
||||||
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
|
// throughput-limitation-on-intels-xeon-phi-x200-knights-landing/
|
||||||
if(is_knights_landing(cpu))
|
if(is_knights_landing(ptr))
|
||||||
flops = flops * 6 / 7;
|
flops = flops * 6 / 7;
|
||||||
|
|
||||||
return flops;
|
total_flops += flops;
|
||||||
|
}
|
||||||
|
|
||||||
|
return total_flops;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct hypervisor* get_hp_info(bool hv_present) {
|
struct hypervisor* get_hp_info(bool hv_present) {
|
||||||
@@ -498,7 +508,7 @@ struct cpuInfo* get_cpu_info() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
cpu->num_cpus = modules;
|
cpu->num_cpus = modules;
|
||||||
cpu->peak_performance = get_peak_performance(cpu, cpu->topo, get_freq(cpu->freq), accurate_pp());
|
cpu->peak_performance = get_peak_performance(cpu, accurate_pp());
|
||||||
|
|
||||||
return cpu;
|
return cpu;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -419,6 +419,7 @@ int get_number_of_vpus(struct cpuInfo* cpu) {
|
|||||||
|
|
||||||
case UARCH_ICE_LAKE:
|
case UARCH_ICE_LAKE:
|
||||||
case UARCH_TIGER_LAKE:
|
case UARCH_TIGER_LAKE:
|
||||||
|
case UARCH_ALDER_LAKE:
|
||||||
|
|
||||||
// AMD
|
// AMD
|
||||||
case UARCH_ZEN2:
|
case UARCH_ZEN2:
|
||||||
|
|||||||
Reference in New Issue
Block a user