diff --git a/src/ppc/ppc.c b/src/ppc/ppc.c index 66fc397..1a69b5b 100644 --- a/src/ppc/ppc.c +++ b/src/ppc/ppc.c @@ -215,6 +215,14 @@ char* get_str_peak_performance(struct cpuInfo* cpu, struct topology* topo, int64 double flops = topo->physical_cores * topo->sockets * (freq*1000000); if(feat->altivec) flops = flops*4; + // POWER9 has the concept called "slices". Each SMT4 core has two super-slices, + // and each super-slice is capable of doing two FLOPS per cycle. In the case of + // SMT8, it has 4 super-slices, thus four FLOPS per cycle. + if(is_power9(cpu->arch)) { + int threads_per_core = topo->logical_cores / topo->physical_cores; + flops = flops * (threads_per_core / 2); + } + if(flops >= (double)1000000000000.0) snprintf(string,size,"%.2f TFLOP/s",flops/1000000000000); else if(flops >= 1000000000.0) diff --git a/src/ppc/uarch.c b/src/ppc/uarch.c index 4be62f2..61eda1d 100644 --- a/src/ppc/uarch.c +++ b/src/ppc/uarch.c @@ -249,6 +249,14 @@ bool has_altivec(struct uarch* arch) { } } +bool is_power9(struct uarch* arch) { + return arch->uarch == UARCH_POWER9 || + arch->uarch == UARCH_POWER9_DD20 || + arch->uarch == UARCH_POWER9_DD21 || + arch->uarch == UARCH_POWER9_DD22 || + arch->uarch == UARCH_POWER9_DD23; +} + char* get_str_uarch(struct cpuInfo* cpu) { return cpu->arch->uarch_str; } diff --git a/src/ppc/uarch.h b/src/ppc/uarch.h index 7ad20d8..f7c45ce 100644 --- a/src/ppc/uarch.h +++ b/src/ppc/uarch.h @@ -8,6 +8,7 @@ struct uarch; struct uarch* get_uarch_from_pvr(uint32_t pvr); bool has_altivec(struct uarch* arch); +bool is_power9(struct uarch* arch); char* get_str_uarch(struct cpuInfo* cpu); char* get_str_process(struct cpuInfo* cpu); void free_uarch_struct(struct uarch* arch);