[v0.98][PPC] Update ppc peak performance taking into account slices in POWER9

This commit is contained in:
Dr-Noob
2021-08-05 15:47:18 +02:00
parent 921e815470
commit ee57646f9e
3 changed files with 17 additions and 0 deletions

View File

@@ -215,6 +215,14 @@ char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64
double flops = topo->physical_cores * topo->sockets * (freq*1000000); double flops = topo->physical_cores * topo->sockets * (freq*1000000);
if(feat->altivec) flops = flops*4; if(feat->altivec) flops = flops*4;
// POWER9 has the concept called "slices". Each SMT4 core has two super-slices,
// and each super-slice is capable of doing two FLOPS per cycle. In the case of
// SMT8, it has 4 super-slices, thus four FLOPS per cycle.
if(is_power9(cpu->arch)) {
int threads_per_core = topo->logical_cores / topo->physical_cores;
flops = flops * (threads_per_core / 2);
}
if(flops >= (double)1000000000000.0) if(flops >= (double)1000000000000.0)
snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000); snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000);
else if(flops >= 1000000000.0) else if(flops >= 1000000000.0)

View File

@@ -249,6 +249,14 @@ bool has_altivec(struct uarch* arch) {
} }
} }
bool is_power9(struct uarch* arch) {
return arch->uarch == UARCH_POWER9 ||
arch->uarch == UARCH_POWER9_DD20 ||
arch->uarch == UARCH_POWER9_DD21 ||
arch->uarch == UARCH_POWER9_DD22 ||
arch->uarch == UARCH_POWER9_DD23;
}
char* get_str_uarch(struct cpuInfo* cpu) { char* get_str_uarch(struct cpuInfo* cpu) {
return cpu->arch->uarch_str; return cpu->arch->uarch_str;
} }

View File

@@ -8,6 +8,7 @@ struct uarch;
struct uarch* get_uarch_from_pvr(uint32_t pvr); struct uarch* get_uarch_from_pvr(uint32_t pvr);
bool has_altivec(struct uarch* arch); bool has_altivec(struct uarch* arch);
bool is_power9(struct uarch* arch);
char* get_str_uarch(struct cpuInfo* cpu); char* get_str_uarch(struct cpuInfo* cpu);
char* get_str_process(struct cpuInfo* cpu); char* get_str_process(struct cpuInfo* cpu);
void free_uarch_struct(struct uarch* arch); void free_uarch_struct(struct uarch* arch);