freebsd-skq/usr.sbin/pmcstudy/pmcstudy.c
rrs 7d03b5290e Fix several typos and bugs within pmcstudy. Also highlight the one SB test
that is failing (and is likely a problem in the actual PMC defintions). Add
to this also the -A option to run all canned tests.

Sponsored by:	Netflix Inc.
2015-12-10 01:52:29 +00:00

2955 lines
82 KiB
C

/*-
* Copyright (c) 2014, 2015 Netflix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer,
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <strings.h>
#include <sys/errno.h>
#include <signal.h>
#include <sys/wait.h>
#include <getopt.h>
#include "eval_expr.h"
__FBSDID("$FreeBSD$");
static int max_pmc_counters = 1;
static int run_all = 0;
#define MAX_COUNTER_SLOTS 1024
#define MAX_NLEN 64
#define MAX_CPU 64
static int verbose = 0;
extern char **environ;
extern struct expression *master_exp;
struct expression *master_exp=NULL;
#define PMC_INITIAL_ALLOC 512
extern char **valid_pmcs;
char **valid_pmcs = NULL;
extern int valid_pmc_cnt;
int valid_pmc_cnt=0;
extern int pmc_allocated_cnt;
int pmc_allocated_cnt=0;
/*
* The following two varients on popen and pclose with
* the cavet that they get you the PID so that you
* can supply it to pclose so it can send a SIGTERM
* to the process.
*/
static FILE *
my_popen(const char *command, const char *dir, pid_t *p_pid)
{
FILE *io_out, *io_in;
int pdesin[2], pdesout[2];
char *argv[4];
pid_t pid;
char cmd[4];
char cmd2[1024];
char arg1[4];
if ((strcmp(dir, "r") != 0) &&
(strcmp(dir, "w") != 0)) {
errno = EINVAL;
return(NULL);
}
if (pipe(pdesin) < 0)
return (NULL);
if (pipe(pdesout) < 0) {
(void)close(pdesin[0]);
(void)close(pdesin[1]);
return (NULL);
}
strcpy(cmd, "sh");
strcpy(arg1, "-c");
strcpy(cmd2, command);
argv[0] = cmd;
argv[1] = arg1;
argv[2] = cmd2;
argv[3] = NULL;
switch (pid = fork()) {
case -1: /* Error. */
(void)close(pdesin[0]);
(void)close(pdesin[1]);
(void)close(pdesout[0]);
(void)close(pdesout[1]);
return (NULL);
/* NOTREACHED */
case 0: /* Child. */
/* Close out un-used sides */
(void)close(pdesin[1]);
(void)close(pdesout[0]);
/* Now prepare the stdin of the process */
close(0);
(void)dup(pdesin[0]);
(void)close(pdesin[0]);
/* Now prepare the stdout of the process */
close(1);
(void)dup(pdesout[1]);
/* And lets do stderr just in case */
close(2);
(void)dup(pdesout[1]);
(void)close(pdesout[1]);
/* Now run it */
execve("/bin/sh", argv, environ);
exit(127);
/* NOTREACHED */
}
/* Parent; assume fdopen can't fail. */
/* Store the pid */
*p_pid = pid;
if (strcmp(dir, "r") != 0) {
io_out = fdopen(pdesin[1], "w");
(void)close(pdesin[0]);
(void)close(pdesout[0]);
(void)close(pdesout[1]);
return(io_out);
} else {
/* Prepare the input stream */
io_in = fdopen(pdesout[0], "r");
(void)close(pdesout[1]);
(void)close(pdesin[0]);
(void)close(pdesin[1]);
return (io_in);
}
}
/*
* pclose --
* Pclose returns -1 if stream is not associated with a `popened' command,
* if already `pclosed', or waitpid returns an error.
*/
static void
my_pclose(FILE *io, pid_t the_pid)
{
int pstat;
pid_t pid;
/*
* Find the appropriate file pointer and remove it from the list.
*/
(void)fclose(io);
/* Die if you are not dead! */
kill(the_pid, SIGTERM);
do {
pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
} while (pid == -1 && errno == EINTR);
}
struct counters {
struct counters *next_cpu;
char counter_name[MAX_NLEN]; /* Name of counter */
int cpu; /* CPU we are on */
int pos; /* Index we are filling to. */
uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */
uint64_t sum; /* Summary of entries */
};
extern struct counters *glob_cpu[MAX_CPU];
struct counters *glob_cpu[MAX_CPU];
extern struct counters *cnts;
struct counters *cnts=NULL;
extern int ncnts;
int ncnts=0;
extern int (*expression)(struct counters *, int);
int (*expression)(struct counters *, int);
static const char *threshold=NULL;
static const char *command;
struct cpu_entry {
const char *name;
const char *thresh;
const char *command;
int (*func)(struct counters *, int);
int counters_required;
};
struct cpu_type {
char cputype[32];
int number;
struct cpu_entry *ents;
void (*explain)(const char *name);
};
extern struct cpu_type the_cpu;
struct cpu_type the_cpu;
static void
explain_name_sb(const char *name)
{
const char *mythresh;
if (strcmp(name, "allocstall1") == 0) {
printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "allocstall2") == 0) {
printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "br_miss") == 0) {
printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "splitload") == 0) {
printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "splitstore") == 0) {
printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
mythresh = "thresh >= .01";
} else if (strcmp(name, "contested") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "blockstorefwd") == 0) {
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "cache2") == 0) {
printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "cache1") == 0) {
printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "dtlbmissload") == 0) {
printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "frontendstall") == 0) {
printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
mythresh = "thresh >= .15";
} else if (strcmp(name, "clears") == 0) {
printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
printf(" MACHINE_CLEARS.SMC + \n");
printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .02";
} else if (strcmp(name, "microassist") == 0) {
printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "aliasing_4k") == 0) {
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "fpassist") == 0) {
printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistavx") == 0) {
printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistsse") == 0) {
printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "eff1") == 0) {
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh < .9";
} else if (strcmp(name, "eff2") == 0) {
printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
mythresh = "thresh > 1.0";
} else if (strcmp(name, "dtlbmissstore") == 0) {
printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .05";
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
}
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
static void
explain_name_ib(const char *name)
{
const char *mythresh;
if (strcmp(name, "br_miss") == 0) {
printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "eff1") == 0) {
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh < .9";
} else if (strcmp(name, "eff2") == 0) {
printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
mythresh = "thresh > 1.0";
} else if (strcmp(name, "cache1") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "cache2") == 0) {
printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "itlbmiss") == 0) {
printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "icachemiss") == 0) {
printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "lcpstall") == 0) {
printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "datashare") == 0) {
printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "blockstorefwd") == 0) {
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "splitload") == 0) {
printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "splitstore") == 0) {
printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
mythresh = "thresh >= .01";
} else if (strcmp(name, "aliasing_4k") == 0) {
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "dtlbmissload") == 0) {
printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "dtlbmissstore") == 0) {
printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "contested") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "clears") == 0) {
printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
printf(" MACHINE_CLEARS.SMC + \n");
printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .02";
} else if (strcmp(name, "microassist") == 0) {
printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "fpassist") == 0) {
printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistavx") == 0) {
printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistsse") == 0) {
printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
}
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
static void
explain_name_has(const char *name)
{
const char *mythresh;
if (strcmp(name, "eff1") == 0) {
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh < .75";
} else if (strcmp(name, "eff2") == 0) {
printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
mythresh = "thresh > 1.0";
} else if (strcmp(name, "itlbmiss") == 0) {
printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "icachemiss") == 0) {
printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "lcpstall") == 0) {
printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "cache1") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "cache2") == 0) {
printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "contested") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "datashare") == 0) {
printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "blockstorefwd") == 0) {
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "splitload") == 0) {
printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "splitstore") == 0) {
printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
mythresh = "thresh >= .01";
} else if (strcmp(name, "aliasing_4k") == 0) {
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "dtlbmissload") == 0) {
printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "br_miss") == 0) {
printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "clears") == 0) {
printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
printf(" MACHINE_CLEARS.SMC + \n");
printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .02";
} else if (strcmp(name, "microassist") == 0) {
printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "fpassist") == 0) {
printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistavx") == 0) {
printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistsse") == 0) {
printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
}
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
static struct counters *
find_counter(struct counters *base, const char *name)
{
struct counters *at;
int len;
at = base;
len = strlen(name);
while(at) {
if (strncmp(at->counter_name, name, len) == 0) {
return(at);
}
at = at->next_cpu;
}
printf("Can't find counter %s\n", name);
printf("We have:\n");
at = base;
while(at) {
printf("- %s\n", at->counter_name);
at = at->next_cpu;
}
exit(-1);
}
static int
allocstall1(struct counters *cpu, int pos)
{
/* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
int ret;
struct counters *partial;
struct counters *unhalt;
double un, par, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
if (pos != -1) {
par = partial->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
par = partial->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = par/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
allocstall2(struct counters *cpu, int pos)
{
/* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
int ret;
struct counters *partial;
struct counters *unhalt;
double un, par, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
if (pos != -1) {
par = partial->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
par = partial->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = par/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
br_mispredict(struct counters *cpu, int pos)
{
struct counters *brctr;
struct counters *unhalt;
int ret;
/* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
double br, un, con, res;
con = 20.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
if (pos != -1) {
br = brctr->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
br = brctr->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (con * br)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
br_mispredictib(struct counters *cpu, int pos)
{
struct counters *brctr;
struct counters *unhalt;
struct counters *clear, *clear2, *clear3;
struct counters *uops;
struct counters *recv;
struct counters *iss;
/* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
int ret;
/*
* (BR_MISP_RETIRED.ALL_BRANCHES /
* (BR_MISP_RETIRED.ALL_BRANCHES +
* MACHINE_CLEAR.COUNT) *
* ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
*
*/
double br, cl, cl2, cl3, uo, re, un, con, res, is;
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
iss = find_counter(cpu, "UOPS_ISSUED.ANY");
recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
if (pos != -1) {
br = brctr->vals[pos] * 1.0;
cl = clear->vals[pos] * 1.0;
cl2 = clear2->vals[pos] * 1.0;
cl3 = clear3->vals[pos] * 1.0;
uo = uops->vals[pos] * 1.0;
re = recv->vals[pos] * 1.0;
is = iss->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
br = brctr->sum * 1.0;
cl = clear->sum * 1.0;
cl2 = clear2->sum * 1.0;
cl3 = clear3->sum * 1.0;
uo = uops->sum * 1.0;
re = recv->sum * 1.0;
is = iss->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
ret = printf("%1.3f", res);
return(ret);
}
static int
br_mispredict_broad(struct counters *cpu, int pos)
{
struct counters *brctr;
struct counters *unhalt;
struct counters *clear;
struct counters *uops;
struct counters *uops_ret;
struct counters *recv;
int ret;
double br, cl, uo, uo_r, re, con, un, res;
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
uops = find_counter(cpu, "UOPS_ISSUED.ANY");
uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
if (pos != -1) {
un = unhalt->vals[pos] * 1.0;
br = brctr->vals[pos] * 1.0;
cl = clear->vals[pos] * 1.0;
uo = uops->vals[pos] * 1.0;
uo_r = uops_ret->vals[pos] * 1.0;
re = recv->vals[pos] * 1.0;
} else {
un = unhalt->sum * 1.0;
br = brctr->sum * 1.0;
cl = clear->sum * 1.0;
uo = uops->sum * 1.0;
uo_r = uops_ret->sum * 1.0;
re = recv->sum * 1.0;
}
res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
ret = printf("%1.3f", res);
return(ret);
}
static int
splitloadib(struct counters *cpu, int pos)
{
int ret;
struct counters *mem;
struct counters *l1d, *ldblock;
struct counters *unhalt;
double un, memd, res, l1, ldb;
/*
* ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
*/
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
l1 = l1d->vals[pos] * 1.0;
ldb = ldblock->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
memd = mem->sum * 1.0;
l1 = l1d->sum * 1.0;
ldb = ldblock->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((l1 / memd) * ldb)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
splitload(struct counters *cpu, int pos)
{
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, memd, res;
/* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
con = 5.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
memd = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (memd * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
splitload_sb(struct counters *cpu, int pos)
{
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, memd, res;
/* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
con = 5.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
memd = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (memd * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
splitstore_sb(struct counters *cpu, int pos)
{
/* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
int ret;
struct counters *mem_split;
struct counters *mem_stores;
double memsplit, memstore, res;
mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
if (pos != -1) {
memsplit = mem_split->vals[pos] * 1.0;
memstore = mem_stores->vals[pos] * 1.0;
} else {
memsplit = mem_split->sum * 1.0;
memstore = mem_stores->sum * 1.0;
}
res = memsplit/memstore;
ret = printf("%1.3f", res);
return(ret);
}
static int
splitstore(struct counters *cpu, int pos)
{
/* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
int ret;
struct counters *mem_split;
struct counters *mem_stores;
double memsplit, memstore, res;
mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
if (pos != -1) {
memsplit = mem_split->vals[pos] * 1.0;
memstore = mem_stores->vals[pos] * 1.0;
} else {
memsplit = mem_split->sum * 1.0;
memstore = mem_stores->sum * 1.0;
}
res = memsplit/memstore;
ret = printf("%1.3f", res);
return(ret);
}
static int
contested(struct counters *cpu, int pos)
{
/* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, memd, res;
con = 60.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
memd = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (memd * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
contested_has(struct counters *cpu, int pos)
{
/* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, memd, res;
con = 84.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
memd = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (memd * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
contestedbroad(struct counters *cpu, int pos)
{
/* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
int ret;
struct counters *mem;
struct counters *mem2;
struct counters *unhalt;
double con, un, memd, memtoo, res;
con = 84.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
memtoo = mem2->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
memd = mem->sum * 1.0;
memtoo = mem2->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((memd * con) + memtoo)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
blockstoreforward(struct counters *cpu, int pos)
{
/* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
int ret;
struct counters *ldb;
struct counters *unhalt;
double con, un, ld, res;
con = 13.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
if (pos != -1) {
ld = ldb->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ld = ldb->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (ld * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache2(struct counters *cpu, int pos)
{
/* ** Suspect ***
* 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
* (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem1, *mem2, *mem3;
struct counters *unhalt;
double con1, con2, con3, un, me_1, me_2, me_3, res;
con1 = 26.0;
con2 = 43.0;
con3 = 60.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
if (pos != -1) {
me_1 = mem1->vals[pos] * 1.0;
me_2 = mem2->vals[pos] * 1.0;
me_3 = mem3->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me_1 = mem1->sum * 1.0;
me_2 = mem2->sum * 1.0;
me_3 = mem3->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
datasharing(struct counters *cpu, int pos)
{
/*
* (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
struct counters *unhalt;
double con, res, me, un;
con = 43.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (me * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
datasharing_has(struct counters *cpu, int pos)
{
/*
* (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
struct counters *unhalt;
double con, res, me, un;
con = 72.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (me * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache2ib(struct counters *cpu, int pos)
{
/*
* (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, me, res;
con = 29.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (con * me)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache2has(struct counters *cpu, int pos)
{
/*
* Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
* (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
* (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
* / CPU_CLK_UNHALTED.THREAD_P
*/
int ret;
struct counters *mem1, *mem2, *mem3;
struct counters *unhalt;
double con1, con2, con3, un, me1, me2, me3, res;
con1 = 36.0;
con2 = 72.0;
con3 = 84.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
if (pos != -1) {
me1 = mem1->vals[pos] * 1.0;
me2 = mem2->vals[pos] * 1.0;
me3 = mem3->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me1 = mem1->sum * 1.0;
me2 = mem2->sum * 1.0;
me3 = mem3->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache2broad(struct counters *cpu, int pos)
{
/*
* (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, me, res;
con = 36.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (con * me)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache1(struct counters *cpu, int pos)
{
/* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, me, res;
con = 180.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (me * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache1ib(struct counters *cpu, int pos)
{
/* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, me, res;
con = 180.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (me * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache1broad(struct counters *cpu, int pos)
{
/* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, me, res;
con = 180.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (me * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
dtlb_missload(struct counters *cpu, int pos)
{
/* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
int ret;
struct counters *dtlb_m, *dtlb_d;
struct counters *unhalt;
double con, un, d1, d2, res;
con = 7.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
if (pos != -1) {
d1 = dtlb_m->vals[pos] * 1.0;
d2 = dtlb_d->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
d1 = dtlb_m->sum * 1.0;
d2 = dtlb_d->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((d1 * con) + d2)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
dtlb_missstore(struct counters *cpu, int pos)
{
/*
* ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
* CPU_CLK_UNHALTED.THREAD_P (t >= .1)
*/
int ret;
struct counters *dtsb_m, *dtsb_d;
struct counters *unhalt;
double con, un, d1, d2, res;
con = 7.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
if (pos != -1) {
d1 = dtsb_m->vals[pos] * 1.0;
d2 = dtsb_d->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
d1 = dtsb_m->sum * 1.0;
d2 = dtsb_d->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((d1 * con) + d2)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
itlb_miss(struct counters *cpu, int pos)
{
/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */
int ret;
struct counters *itlb;
struct counters *unhalt;
double un, d1, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
if (pos != -1) {
d1 = itlb->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
d1 = itlb->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = d1/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
itlb_miss_broad(struct counters *cpu, int pos)
{
/* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */
int ret;
struct counters *itlb;
struct counters *unhalt;
struct counters *four_k;
double un, d1, res, k;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
if (pos != -1) {
d1 = itlb->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
k = four_k->vals[pos] * 1.0;
} else {
d1 = itlb->sum * 1.0;
un = unhalt->sum * 1.0;
k = four_k->sum * 1.0;
}
res = (7.0 * k + d1)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
icache_miss(struct counters *cpu, int pos)
{
/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
int ret;
struct counters *itlb, *icache;
struct counters *unhalt;
double un, d1, ic, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
if (pos != -1) {
d1 = itlb->vals[pos] * 1.0;
ic = icache->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
d1 = itlb->sum * 1.0;
ic = icache->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (ic-d1)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
icache_miss_has(struct counters *cpu, int pos)
{
/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
int ret;
struct counters *icache;
struct counters *unhalt;
double un, con, ic, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
icache = find_counter(cpu, "ICACHE.MISSES");
con = 36.0;
if (pos != -1) {
ic = icache->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ic = icache->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (con * ic)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
lcp_stall(struct counters *cpu, int pos)
{
/* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
int ret;
struct counters *ild;
struct counters *unhalt;
double un, d1, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
ild = find_counter(cpu, "ILD_STALL.LCP");
if (pos != -1) {
d1 = ild->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
d1 = ild->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = d1/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
frontendstall(struct counters *cpu, int pos)
{
/* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
int ret;
struct counters *idq;
struct counters *unhalt;
double con, un, id, res;
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
if (pos != -1) {
id = idq->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
id = idq->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = id/(un * con);
ret = printf("%1.3f", res);
return(ret);
}
static int
clears(struct counters *cpu, int pos)
{
/* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
* / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/
int ret;
struct counters *clr1, *clr2, *clr3;
struct counters *unhalt;
double con, un, cl1, cl2, cl3, res;
con = 100.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
if (pos != -1) {
cl1 = clr1->vals[pos] * 1.0;
cl2 = clr2->vals[pos] * 1.0;
cl3 = clr3->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
cl1 = clr1->sum * 1.0;
cl2 = clr2->sum * 1.0;
cl3 = clr3->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((cl1 + cl2 + cl3) * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
clears_broad(struct counters *cpu, int pos)
{
int ret;
struct counters *clr1, *clr2, *clr3, *cyc;
struct counters *unhalt;
double con, un, cl1, cl2, cl3, cy, res;
con = 100.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
if (pos != -1) {
cl1 = clr1->vals[pos] * 1.0;
cl2 = clr2->vals[pos] * 1.0;
cl3 = clr3->vals[pos] * 1.0;
cy = cyc->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
cl1 = clr1->sum * 1.0;
cl2 = clr2->sum * 1.0;
cl3 = clr3->sum * 1.0;
cy = cyc->sum * 1.0;
un = unhalt->sum * 1.0;
}
/* Formula not listed but extrapulated to add the cy ?? */
res = ((cl1 + cl2 + cl3 + cy) * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
microassist(struct counters *cpu, int pos)
{
/* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
int ret;
struct counters *idq;
struct counters *unhalt;
double un, id, res, con;
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
idq = find_counter(cpu, "IDQ.MS_UOPS");
if (pos != -1) {
id = idq->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
id = idq->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = id/(un * con);
ret = printf("%1.3f", res);
return(ret);
}
static int
microassist_broad(struct counters *cpu, int pos)
{
int ret;
struct counters *idq;
struct counters *unhalt;
struct counters *uopiss;
struct counters *uopret;
double un, id, res, con, uoi, uor;
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
idq = find_counter(cpu, "IDQ.MS_UOPS");
uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
if (pos != -1) {
id = idq->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
uoi = uopiss->vals[pos] * 1.0;
uor = uopret->vals[pos] * 1.0;
} else {
id = idq->sum * 1.0;
un = unhalt->sum * 1.0;
uoi = uopiss->sum * 1.0;
uor = uopret->sum * 1.0;
}
res = (uor/uoi) * (id/(un * con));
ret = printf("%1.3f", res);
return(ret);
}
static int
aliasing(struct counters *cpu, int pos)
{
/* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
int ret;
struct counters *ld;
struct counters *unhalt;
double un, lds, con, res;
con = 5.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
if (pos != -1) {
lds = ld->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
lds = ld->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (lds * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
aliasing_broad(struct counters *cpu, int pos)
{
/* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
int ret;
struct counters *ld;
struct counters *unhalt;
double un, lds, con, res;
con = 7.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
if (pos != -1) {
lds = ld->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
lds = ld->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (lds * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
fpassists(struct counters *cpu, int pos)
{
/* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
int ret;
struct counters *fp;
struct counters *inst;
double un, fpd, res;
inst = find_counter(cpu, "INST_RETIRED.ANY_P");
fp = find_counter(cpu, "FP_ASSIST.ANY");
if (pos != -1) {
fpd = fp->vals[pos] * 1.0;
un = inst->vals[pos] * 1.0;
} else {
fpd = fp->sum * 1.0;
un = inst->sum * 1.0;
}
res = fpd/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
otherassistavx(struct counters *cpu, int pos)
{
/* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
int ret;
struct counters *oth;
struct counters *unhalt;
double un, ot, con, res;
con = 75.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
if (pos != -1) {
ot = oth->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ot = oth->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (ot * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
otherassistsse(struct counters *cpu, int pos)
{
int ret;
struct counters *oth;
struct counters *unhalt;
double un, ot, con, res;
/* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
con = 75.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
if (pos != -1) {
ot = oth->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ot = oth->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (ot * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
efficiency1(struct counters *cpu, int pos)
{
int ret;
struct counters *uops;
struct counters *unhalt;
double un, ot, con, res;
/* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
if (pos != -1) {
ot = uops->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ot = uops->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ot/(con * un);
ret = printf("%1.3f", res);
return(ret);
}
static int
efficiency2(struct counters *cpu, int pos)
{
int ret;
struct counters *uops;
struct counters *unhalt;
double un, ot, res;
/* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
uops = find_counter(cpu, "INST_RETIRED.ANY_P");
if (pos != -1) {
ot = uops->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ot = uops->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = un/ot;
ret = printf("%1.3f", res);
return(ret);
}
#define SANDY_BRIDGE_COUNT 20
static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
/*01*/ { "allocstall1", "thresh > .05",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
allocstall1, 2 },
/* -- not defined for SB right (partial-rat_stalls) 02*/
{ "allocstall2", "thresh > .05",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
allocstall2, 2 },
/*03*/ { "br_miss", "thresh >= .2",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
br_mispredict, 2 },
/*04*/ { "splitload", "thresh >= .1",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
splitload_sb, 2 },
/* 05*/ { "splitstore", "thresh >= .01",
"pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
splitstore_sb, 2 },
/*06*/ { "contested", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
contested, 2 },
/*07*/ { "blockstorefwd", "thresh >= .05",
"pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
blockstoreforward, 2 },
/*08*/ { "cache2", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache2, 4 },
/*09*/ { "cache1", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache1, 2 },
/*10*/ { "dtlbmissload", "thresh >= .1",
"pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missload, 3 },
/*11*/ { "dtlbmissstore", "thresh >= .05",
"pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missstore, 3 },
/*12*/ { "frontendstall", "thresh >= .15",
"pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
frontendstall, 2 },
/*13*/ { "clears", "thresh >= .02",
"pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
clears, 4 },
/*14*/ { "microassist", "thresh >= .05",
"pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
microassist, 2 },
/*15*/ { "aliasing_4k", "thresh >= .1",
"pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
aliasing, 2 },
/*16*/ { "fpassist", "look for a excessive value",
"pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
fpassists, 2 },
/*17*/ { "otherassistavx", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistavx, 2},
/*18*/ { "otherassistsse", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistsse, 2 },
/*19*/ { "eff1", "thresh < .9",
"pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency1, 2 },
/*20*/ { "eff2", "thresh > 1.0",
"pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency2, 2 },
};
#define IVY_BRIDGE_COUNT 21
static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
/*1*/ { "eff1", "thresh < .75",
"pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency1, 2 },
/*2*/ { "eff2", "thresh > 1.0",
"pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency2, 2 },
/*3*/ { "itlbmiss", "thresh > .05",
"pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
itlb_miss, 2 },
/*4*/ { "icachemiss", "thresh > .05",
"pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
icache_miss, 3 },
/*5*/ { "lcpstall", "thresh > .05",
"pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
lcp_stall, 2 },
/*6*/ { "cache1", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache1ib, 2 },
/*7*/ { "cache2", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache2ib, 2 },
/*8*/ { "contested", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
contested, 2 },
/*9*/ { "datashare", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
datasharing, 2 },
/*10*/ { "blockstorefwd", "thresh >= .05",
"pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
blockstoreforward, 2 },
/*11*/ { "splitload", "thresh >= .1",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
splitloadib, 4 },
/*12*/ { "splitstore", "thresh >= .01",
"pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
splitstore, 2 },
/*13*/ { "aliasing_4k", "thresh >= .1",
"pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
aliasing, 2 },
/*14*/ { "dtlbmissload", "thresh >= .1",
"pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missload , 3},
/*15*/ { "dtlbmissstore", "thresh >= .05",
"pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missstore, 3 },
/*16*/ { "br_miss", "thresh >= .2",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
br_mispredictib, 8 },
/*17*/ { "clears", "thresh >= .02",
"pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
clears, 4 },
/*18*/ { "microassist", "thresh >= .05",
"pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
microassist, 2 },
/*19*/ { "fpassist", "look for a excessive value",
"pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
fpassists, 2 },
/*20*/ { "otherassistavx", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistavx , 2},
/*21*/ { "otherassistsse", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistsse, 2 },
};
#define HASWELL_COUNT 20
static struct cpu_entry haswell[HASWELL_COUNT] = {
/*1*/ { "eff1", "thresh < .75",
"pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency1, 2 },
/*2*/ { "eff2", "thresh > 1.0",
"pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency2, 2 },
/*3*/ { "itlbmiss", "thresh > .05",
"pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
itlb_miss, 2 },
/*4*/ { "icachemiss", "thresh > .05",
"pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
icache_miss_has, 2 },
/*5*/ { "lcpstall", "thresh > .05",
"pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
lcp_stall, 2 },
/*6*/ { "cache1", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache1ib, 2 },
/*7*/ { "cache2", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache2has, 4 },
/*8*/ { "contested", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
contested_has, 2 },
/*9*/ { "datashare", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
datasharing_has, 2 },
/*10*/ { "blockstorefwd", "thresh >= .05",
"pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
blockstoreforward, 2 },
/*11*/ { "splitload", "thresh >= .1",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
splitload , 2},
/*12*/ { "splitstore", "thresh >= .01",
"pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
splitstore, 2 },
/*13*/ { "aliasing_4k", "thresh >= .1",
"pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
aliasing, 2 },
/*14*/ { "dtlbmissload", "thresh >= .1",
"pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missload, 3 },
/*15*/ { "br_miss", "thresh >= .2",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
br_mispredict, 2 },
/*16*/ { "clears", "thresh >= .02",
"pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
clears, 4 },
/*17*/ { "microassist", "thresh >= .05",
"pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
microassist, 2 },
/*18*/ { "fpassist", "look for a excessive value",
"pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
fpassists, 2 },
/*19*/ { "otherassistavx", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistavx, 2 },
/*20*/ { "otherassistsse", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistsse, 2 },
};
static void
explain_name_broad(const char *name)
{
const char *mythresh;
if (strcmp(name, "eff1") == 0) {
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh < .75";
} else if (strcmp(name, "eff2") == 0) {
printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
mythresh = "thresh > 1.0";
} else if (strcmp(name, "itlbmiss") == 0) {
printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "icachemiss") == 0) {
printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
mythresh = "thresh > .05";
} else if (strcmp(name, "lcpstall") == 0) {
printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "cache1") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "cache2") == 0) {
printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "contested") == 0) {
printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "datashare") == 0) {
printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "blockstorefwd") == 0) {
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "aliasing_4k") == 0) {
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "dtlbmissload") == 0) {
printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "br_miss") == 0) {
printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "clears") == 0) {
printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
printf(" MACHINE_CLEARS.SMC + \n");
printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .02";
} else if (strcmp(name, "fpassist") == 0) {
printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistavx") == 0) {
printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "microassist") == 0) {
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
mythresh = "thresh >= .05";
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
}
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
#define BROADWELL_COUNT 17
static struct cpu_entry broadwell[BROADWELL_COUNT] = {
/*1*/ { "eff1", "thresh < .75",
"pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency1, 2 },
/*2*/ { "eff2", "thresh > 1.0",
"pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency2, 2 },
/*3*/ { "itlbmiss", "thresh > .05",
"pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
itlb_miss_broad, 3 },
/*4*/ { "icachemiss", "thresh > .05",
"pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
icache_miss_has, 2 },
/*5*/ { "lcpstall", "thresh > .05",
"pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
lcp_stall, 2 },
/*6*/ { "cache1", "thresh >= .1",
"pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache1broad, 2 },
/*7*/ { "cache2", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache2broad, 2 },
/*8*/ { "contested", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
contestedbroad, 2 },
/*9*/ { "datashare", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
datasharing_has, 2 },
/*10*/ { "blockstorefwd", "thresh >= .05",
"pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
blockstoreforward, 2 },
/*11*/ { "aliasing_4k", "thresh >= .1",
"pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
aliasing_broad, 2 },
/*12*/ { "dtlbmissload", "thresh >= .1",
"pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missload, 3 },
/*13*/ { "br_miss", "thresh >= .2",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
br_mispredict_broad, 7 },
/*14*/ { "clears", "thresh >= .02",
"pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
clears_broad, 5 },
/*15*/ { "fpassist", "look for a excessive value",
"pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
fpassists, 2 },
/*16*/ { "otherassistavx", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistavx, 2 },
/*17*/ { "microassist", "thresh >= .2",
"pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1",
microassist_broad, 4 },
};
static void
set_sandybridge(void)
{
strcpy(the_cpu.cputype, "SandyBridge PMC");
the_cpu.number = SANDY_BRIDGE_COUNT;
the_cpu.ents = sandy_bridge;
the_cpu.explain = explain_name_sb;
}
static void
set_ivybridge(void)
{
strcpy(the_cpu.cputype, "IvyBridge PMC");
the_cpu.number = IVY_BRIDGE_COUNT;
the_cpu.ents = ivy_bridge;
the_cpu.explain = explain_name_ib;
}
static void
set_haswell(void)
{
strcpy(the_cpu.cputype, "HASWELL PMC");
the_cpu.number = HASWELL_COUNT;
the_cpu.ents = haswell;
the_cpu.explain = explain_name_has;
}
static void
set_broadwell(void)
{
strcpy(the_cpu.cputype, "HASWELL PMC");
the_cpu.number = BROADWELL_COUNT;
the_cpu.ents = broadwell;
the_cpu.explain = explain_name_broad;
}
static int
set_expression(const char *name)
{
int found = 0, i;
for(i=0 ; i< the_cpu.number; i++) {
if (strcmp(name, the_cpu.ents[i].name) == 0) {
found = 1;
expression = the_cpu.ents[i].func;
command = the_cpu.ents[i].command;
threshold = the_cpu.ents[i].thresh;
if (the_cpu.ents[i].counters_required > max_pmc_counters) {
printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
the_cpu.ents[i].name,
the_cpu.ents[i].counters_required, max_pmc_counters);
printf("Sorry this test can not be run\n");
if (run_all == 0) {
exit(-1);
} else {
return(-1);
}
}
break;
}
}
if (!found) {
printf("For CPU type %s we have no expression:%s\n",
the_cpu.cputype, name);
exit(-1);
}
return(0);
}
static int
validate_expression(char *name)
{
int i, found;
found = 0;
for(i=0 ; i< the_cpu.number; i++) {
if (strcmp(name, the_cpu.ents[i].name) == 0) {
found = 1;
break;
}
}
if (!found) {
return(-1);
}
return (0);
}
static void
do_expression(struct counters *cpu, int pos)
{
if (expression == NULL)
return;
(*expression)(cpu, pos);
}
static void
process_header(int idx, char *p)
{
struct counters *up;
int i, len, nlen;
/*
* Given header element idx, at p in
* form 's/NN/nameof'
* process the entry to pull out the name and
* the CPU number.
*/
if (strncmp(p, "s/", 2)) {
printf("Check -- invalid header no s/ in %s\n",
p);
return;
}
up = &cnts[idx];
up->cpu = strtol(&p[2], NULL, 10);
len = strlen(p);
for (i=2; i<len; i++) {
if (p[i] == '/') {
nlen = strlen(&p[(i+1)]);
if (nlen < (MAX_NLEN-1)) {
strcpy(up->counter_name, &p[(i+1)]);
} else {
strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
}
}
}
}
static void
build_counters_from_header(FILE *io)
{
char buffer[8192], *p;
int i, len, cnt;
size_t mlen;
/* We have a new start, lets
* setup our headers and cpus.
*/
if (fgets(buffer, sizeof(buffer), io) == NULL) {
printf("First line can't be read from file err:%d\n", errno);
return;
}
/*
* Ok output is an array of counters. Once
* we start to read the values in we must
* put them in there slot to match there CPU and
* counter being updated. We create a mass array
* of the counters, filling in the CPU and
* counter name.
*/
/* How many do we get? */
len = strlen(buffer);
for (i=0, cnt=0; i<len; i++) {
if (strncmp(&buffer[i], "s/", 2) == 0) {
cnt++;
for(;i<len;i++) {
if (buffer[i] == ' ')
break;
}
}
}
mlen = sizeof(struct counters) * cnt;
cnts = malloc(mlen);
ncnts = cnt;
if (cnts == NULL) {
printf("No memory err:%d\n", errno);
return;
}
memset(cnts, 0, mlen);
for (i=0, cnt=0; i<len; i++) {
if (strncmp(&buffer[i], "s/", 2) == 0) {
p = &buffer[i];
for(;i<len;i++) {
if (buffer[i] == ' ') {
buffer[i] = 0;
break;
}
}
process_header(cnt, p);
cnt++;
}
}
if (verbose)
printf("We have %d entries\n", cnt);
}
extern int max_to_collect;
int max_to_collect = MAX_COUNTER_SLOTS;
static int
read_a_line(FILE *io)
{
char buffer[8192], *p, *stop;
int pos, i;
if (fgets(buffer, sizeof(buffer), io) == NULL) {
return(0);
}
p = buffer;
for (i=0; i<ncnts; i++) {
pos = cnts[i].pos;
cnts[i].vals[pos] = strtol(p, &stop, 0);
cnts[i].pos++;
cnts[i].sum += cnts[i].vals[pos];
p = stop;
}
return (1);
}
extern int cpu_count_out;
int cpu_count_out=0;
static void
print_header(void)
{
int i, cnt, printed_cnt;
printf("*********************************\n");
for(i=0, cnt=0; i<MAX_CPU; i++) {
if (glob_cpu[i]) {
cnt++;
}
}
cpu_count_out = cnt;
for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
if (glob_cpu[i]) {
printf("CPU%d", i);
printed_cnt++;
}
if (printed_cnt == cnt) {
printf("\n");
break;
} else {
printf("\t");
}
}
}
static void
lace_cpus_together(void)
{
int i, j, lace_cpu;
struct counters *cpat, *at;
for(i=0; i<ncnts; i++) {
cpat = &cnts[i];
if (cpat->next_cpu) {
/* Already laced in */
continue;
}
lace_cpu = cpat->cpu;
if (lace_cpu >= MAX_CPU) {
printf("CPU %d to big\n", lace_cpu);
continue;
}
if (glob_cpu[lace_cpu] == NULL) {
glob_cpu[lace_cpu] = cpat;
} else {
/* Already processed this cpu */
continue;
}
/* Ok look forward for cpu->cpu and link in */
for(j=(i+1); j<ncnts; j++) {
at = &cnts[j];
if (at->next_cpu) {
continue;
}
if (at->cpu == lace_cpu) {
/* Found one */
cpat->next_cpu = at;
cpat = at;
}
}
}
}
static void
process_file(char *filename)
{
FILE *io;
int i;
int line_at, not_done;
pid_t pid_of_command=0;
if (filename == NULL) {
io = my_popen(command, "r", &pid_of_command);
} else {
io = fopen(filename, "r");
if (io == NULL) {
printf("Can't process file %s err:%d\n",
filename, errno);
return;
}
}
build_counters_from_header(io);
if (cnts == NULL) {
/* Nothing we can do */
printf("Nothing to do -- no counters built\n");
if (io) {
fclose(io);
}
return;
}
lace_cpus_together();
print_header();
if (verbose) {
for (i=0; i<ncnts; i++) {
printf("Counter:%s cpu:%d index:%d\n",
cnts[i].counter_name,
cnts[i].cpu, i);
}
}
line_at = 0;
not_done = 1;
while(not_done) {
if (read_a_line(io)) {
line_at++;
} else {
break;
}
if (line_at >= max_to_collect) {
not_done = 0;
}
if (filename == NULL) {
int cnt;
/* For the ones we dynamically open we print now */
for(i=0, cnt=0; i<MAX_CPU; i++) {
do_expression(glob_cpu[i], (line_at-1));
cnt++;
if (cnt == cpu_count_out) {
printf("\n");
break;
} else {
printf("\t");
}
}
}
}
if (filename) {
fclose(io);
} else {
my_pclose(io, pid_of_command);
}
}
#if defined(__amd64__)
#define cpuid(in,a,b,c,d)\
asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
static __inline void
do_cpuid(u_int ax, u_int cx, u_int *p)
{
__asm __volatile("cpuid"
: "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
: "0" (ax), "c" (cx) );
}
#else
#define cpuid(in, a, b, c, d)
static __inline void
do_cpuid(u_int ax, u_int cx, u_int *p)
{
}
#endif
static void
get_cpuid_set(void)
{
unsigned long eax, ebx, ecx, edx;
int model;
pid_t pid_of_command=0;
size_t sz, len;
FILE *io;
char linebuf[1024], *str;
u_int reg[4];
eax = ebx = ecx = edx = 0;
cpuid(0, eax, ebx, ecx, edx);
if (ebx == 0x68747541) {
printf("AMD processors are not supported by this program\n");
printf("Sorry\n");
exit(0);
} else if (ebx == 0x6972794) {
printf("Cyrix processors are not supported by this program\n");
printf("Sorry\n");
exit(0);
} else if (ebx == 0x756e6547) {
printf("Genuine Intel\n");
} else {
printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
exit(0);
}
cpuid(1, eax, ebx, ecx, edx);
model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
printf("CPU model is 0x%x id:0x%lx\n", model, eax);
switch (eax & 0xF00) {
case 0x500: /* Pentium family processors */
printf("Intel Pentium P5\n");
goto not_supported;
break;
case 0x600: /* Pentium Pro, Celeron, Pentium II & III */
switch (model) {
case 0x1:
printf("Intel Pentium P6\n");
goto not_supported;
break;
case 0x3:
case 0x5:
printf("Intel PII\n");
goto not_supported;
break;
case 0x6: case 0x16:
printf("Intel CL\n");
goto not_supported;
break;
case 0x7: case 0x8: case 0xA: case 0xB:
printf("Intel PIII\n");
goto not_supported;
break;
case 0x9: case 0xD:
printf("Intel PM\n");
goto not_supported;
break;
case 0xE:
printf("Intel CORE\n");
goto not_supported;
break;
case 0xF:
printf("Intel CORE2\n");
goto not_supported;
break;
case 0x17:
printf("Intel CORE2EXTREME\n");
goto not_supported;
break;
case 0x1C: /* Per Intel document 320047-002. */
printf("Intel ATOM\n");
goto not_supported;
break;
case 0x1A:
case 0x1E: /*
* Per Intel document 253669-032 9/2009,
* pages A-2 and A-57
*/
case 0x1F: /*
* Per Intel document 253669-032 9/2009,
* pages A-2 and A-57
*/
printf("Intel COREI7\n");
goto not_supported;
break;
case 0x2E:
printf("Intel NEHALEM\n");
goto not_supported;
break;
case 0x25: /* Per Intel document 253669-033US 12/2009. */
case 0x2C: /* Per Intel document 253669-033US 12/2009. */
printf("Intel WESTMERE\n");
goto not_supported;
break;
case 0x2F: /* Westmere-EX, seen in wild */
printf("Intel WESTMERE\n");
goto not_supported;
break;
case 0x2A: /* Per Intel document 253669-039US 05/2011. */
printf("Intel SANDYBRIDGE\n");
set_sandybridge();
break;
case 0x2D: /* Per Intel document 253669-044US 08/2012. */
printf("Intel SANDYBRIDGE_XEON\n");
set_sandybridge();
break;
case 0x3A: /* Per Intel document 253669-043US 05/2012. */
printf("Intel IVYBRIDGE\n");
set_ivybridge();
break;
case 0x3E: /* Per Intel document 325462-045US 01/2013. */
printf("Intel IVYBRIDGE_XEON\n");
set_ivybridge();
break;
case 0x3F: /* Per Intel document 325462-045US 09/2014. */
printf("Intel HASWELL (Xeon)\n");
set_haswell();
break;
case 0x3C: /* Per Intel document 325462-045US 01/2013. */
case 0x45:
case 0x46:
printf("Intel HASWELL\n");
set_haswell();
break;
case 0x4e:
case 0x5e:
printf("Intel SKY-LAKE\n");
goto not_supported;
break;
case 0x3D:
case 0x47:
printf("Intel BROADWELL\n");
set_broadwell();
break;
case 0x4f:
case 0x56:
printf("Intel BROADWEL (Xeon)\n");
set_broadwell();
break;
case 0x4D:
/* Per Intel document 330061-001 01/2014. */
printf("Intel ATOM_SILVERMONT\n");
goto not_supported;
break;
default:
printf("Intel model 0x%x is not known -- sorry\n",
model);
goto not_supported;
break;
}
break;
case 0xF00: /* P4 */
printf("Intel unknown model %d\n", model);
goto not_supported;
break;
}
do_cpuid(0xa, 0, reg);
max_pmc_counters = (reg[3] & 0x0000000f) + 1;
printf("We have %d PMC counters to work with\n", max_pmc_counters);
/* Ok lets load the list of all known PMC's */
io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
if (valid_pmcs == NULL) {
/* Likely */
pmc_allocated_cnt = PMC_INITIAL_ALLOC;
sz = sizeof(char *) * pmc_allocated_cnt;
valid_pmcs = malloc(sz);
if (valid_pmcs == NULL) {
printf("No memory allocation fails at startup?\n");
exit(-1);
}
memset(valid_pmcs, 0, sz);
}
while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
if (linebuf[0] != '\t') {
/* sometimes headers ;-) */
continue;
}
len = strlen(linebuf);
if (linebuf[(len-1)] == '\n') {
/* Likely */
linebuf[(len-1)] = 0;
}
str = &linebuf[1];
len = strlen(str) + 1;
valid_pmcs[valid_pmc_cnt] = malloc(len);
if (valid_pmcs[valid_pmc_cnt] == NULL) {
printf("No memory2 allocation fails at startup?\n");
exit(-1);
}
memset(valid_pmcs[valid_pmc_cnt], 0, len);
strcpy(valid_pmcs[valid_pmc_cnt], str);
valid_pmc_cnt++;
if (valid_pmc_cnt >= pmc_allocated_cnt) {
/* Got to expand -- unlikely */
char **more;
sz = sizeof(char *) * (pmc_allocated_cnt * 2);
more = malloc(sz);
if (more == NULL) {
printf("No memory3 allocation fails at startup?\n");
exit(-1);
}
memset(more, 0, sz);
memcpy(more, valid_pmcs, sz);
pmc_allocated_cnt *= 2;
free(valid_pmcs);
valid_pmcs = more;
}
}
my_pclose(io, pid_of_command);
return;
not_supported:
printf("Not supported\n");
exit(-1);
}
static void
explain_all(void)
{
int i;
printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
printf("-------------------------------------------------------------\n");
for(i=0; i<the_cpu.number; i++){
printf("For -e %s ", the_cpu.ents[i].name);
(*the_cpu.explain)(the_cpu.ents[i].name);
printf("----------------------------\n");
}
}
static void
test_for_a_pmc(const char *pmc, int out_so_far)
{
FILE *io;
pid_t pid_of_command=0;
char my_command[1024];
char line[1024];
char resp[1024];
int len, llen, i;
if (out_so_far < 50) {
len = 50 - out_so_far;
for(i=0; i<len; i++) {
printf(" ");
}
}
sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
io = my_popen(my_command, "r", &pid_of_command);
if (io == NULL) {
printf("Failed -- popen fails\n");
return;
}
/* Setup what we expect */
len = sprintf(resp, "%s", pmc);
if (fgets(line, sizeof(line), io) == NULL) {
printf("Failed -- no output from pmstat\n");
goto out;
}
llen = strlen(line);
if (line[(llen-1)] == '\n') {
line[(llen-1)] = 0;
llen--;
}
for(i=2; i<(llen-len); i++) {
if (strncmp(&line[i], "ERROR", 5) == 0) {
printf("Failed %s\n", line);
goto out;
} else if (strncmp(&line[i], resp, len) == 0) {
int j, k;
if (fgets(line, sizeof(line), io) == NULL) {
printf("Failed -- no second output from pmstat\n");
goto out;
}
len = strlen(line);
for (j=0; j<len; j++) {
if (line[j] == ' ') {
j++;
} else {
break;
}
}
printf("Pass");
len = strlen(&line[j]);
if (len < 20) {
for(k=0; k<(20-len); k++) {
printf(" ");
}
}
if (len) {
printf("%s", &line[j]);
} else {
printf("\n");
}
goto out;
}
}
printf("Failed -- '%s' not '%s'\n", line, resp);
out:
my_pclose(io, pid_of_command);
}
static int
add_it_to(char **vars, int cur_cnt, char *name)
{
int i;
size_t len;
for(i=0; i<cur_cnt; i++) {
if (strcmp(vars[i], name) == 0) {
/* Already have */
return(0);
}
}
if (vars[cur_cnt] != NULL) {
printf("Cur_cnt:%d filled with %s??\n",
cur_cnt, vars[cur_cnt]);
exit(-1);
}
/* Ok its new */
len = strlen(name) + 1;
vars[cur_cnt] = malloc(len);
if (vars[cur_cnt] == NULL) {
printf("No memory %s\n", __FUNCTION__);
exit(-1);
}
memset(vars[cur_cnt], 0, len);
strcpy(vars[cur_cnt], name);
return(1);
}
static char *
build_command_for_exp(struct expression *exp)
{
/*
* Build the pmcstat command to handle
* the passed in expression.
* /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
* where NNN and QQQ represent the PMC's in the expression
* uniquely..
*/
char forming[1024];
int cnt_pmc, alloced_pmcs, i;
struct expression *at;
char **vars, *cmd;
size_t mal;
alloced_pmcs = cnt_pmc = 0;
/* first how many do we have */
at = exp;
while (at) {
if (at->type == TYPE_VALUE_PMC) {
cnt_pmc++;
}
at = at->next;
}
if (cnt_pmc == 0) {
printf("No PMC's in your expression -- nothing to do!!\n");
exit(0);
}
mal = cnt_pmc * sizeof(char *);
vars = malloc(mal);
if (vars == NULL) {
printf("No memory\n");
exit(-1);
}
memset(vars, 0, mal);
at = exp;
while (at) {
if (at->type == TYPE_VALUE_PMC) {
if(add_it_to(vars, alloced_pmcs, at->name)) {
alloced_pmcs++;
}
}
at = at->next;
}
/* Now we have a unique list in vars so create our command */
mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */
for(i=0; i<alloced_pmcs; i++) {
mal += strlen(vars[i]) + 4; /* var + " -s " */
}
cmd = malloc((mal+2));
if (cmd == NULL) {
printf("%s out of mem\n", __FUNCTION__);
exit(-1);
}
memset(cmd, 0, (mal+2));
strcpy(cmd, "/usr/sbin/pmcstat -w 1");
at = exp;
for(i=0; i<alloced_pmcs; i++) {
sprintf(forming, " -s %s", vars[i]);
strcat(cmd, forming);
free(vars[i]);
vars[i] = NULL;
}
free(vars);
return(cmd);
}
static int
user_expr(struct counters *cpu, int pos)
{
int ret;
double res;
struct counters *var;
struct expression *at;
at = master_exp;
while (at) {
if (at->type == TYPE_VALUE_PMC) {
var = find_counter(cpu, at->name);
if (var == NULL) {
printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
exit(-1);
}
if (pos != -1) {
at->value = var->vals[pos] * 1.0;
} else {
at->value = var->sum * 1.0;
}
}
at = at->next;
}
res = run_expr(master_exp, 1, NULL);
ret = printf("%1.3f", res);
return(ret);
}
static void
set_manual_exp(struct expression *exp)
{
expression = user_expr;
command = build_command_for_exp(exp);
threshold = "User defined threshold";
}
static void
run_tests(void)
{
int i, lenout;
printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
printf("------------------------------------------------------------------------\n");
for(i=0; i<valid_pmc_cnt; i++) {
lenout = printf("%s", valid_pmcs[i]);
fflush(stdout);
test_for_a_pmc(valid_pmcs[i], lenout);
}
}
static void
list_all(void)
{
int i, cnt, j;
printf("PMC Abbreviation\n");
printf("--------------------------------------------------------------\n");
for(i=0; i<valid_pmc_cnt; i++) {
cnt = printf("%s", valid_pmcs[i]);
for(j=cnt; j<52; j++) {
printf(" ");
}
printf("%%%d\n", i);
}
}
int
main(int argc, char **argv)
{
int i, j, cnt;
char *filename=NULL;
const char *name=NULL;
int help_only = 0;
int test_mode = 0;
int test_at = 0;
get_cpuid_set();
memset(glob_cpu, 0, sizeof(glob_cpu));
while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
switch (i) {
case 'A':
run_all = 1;
break;
case 'L':
list_all();
return(0);
case 'H':
printf("**********************************\n");
explain_all();
printf("**********************************\n");
return(0);
break;
case 'T':
test_mode = 1;
break;
case 'E':
master_exp = parse_expression(optarg);
if (master_exp) {
set_manual_exp(master_exp);
}
break;
case 'e':
if (validate_expression(optarg)) {
printf("Unknown expression %s\n", optarg);
return(0);
}
name = optarg;
set_expression(optarg);
break;
case 'm':
max_to_collect = strtol(optarg, NULL, 0);
if (max_to_collect > MAX_COUNTER_SLOTS) {
/* You can't collect more than max in array */
max_to_collect = MAX_COUNTER_SLOTS;
}
break;
case 'v':
verbose++;
break;
case 'h':
help_only = 1;
break;
case 'i':
filename = optarg;
break;
case '?':
default:
use:
printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
argv[0]);
printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
printf("-v -- verbose dump debug type things -- you don't want this\n");
printf("-m N -- maximum to collect is N measurments\n");
printf("-e expr-name -- Do expression expr-name\n");
printf("-E 'your expression' -- Do your expression\n");
printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
printf("-H -- Don't run anything, just explain all canned expressions\n");
printf("-T -- Test all PMC's defined by this processor\n");
printf("-A -- Run all canned tests\n");
return(0);
break;
};
}
if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
(test_mode == 0) && (master_exp == NULL)) {
printf("Without setting an expression we cannot dynamically gather information\n");
printf("you must supply a filename (and you probably want verbosity)\n");
goto use;
}
if (run_all && max_to_collect > 10) {
max_to_collect = 3;
}
if (test_mode) {
run_tests();
return(0);
}
printf("*********************************\n");
if ((master_exp == NULL) && name) {
(*the_cpu.explain)(name);
} else if (master_exp) {
printf("Examine your expression ");
print_exp(master_exp);
printf("User defined threshold\n");
}
if (help_only) {
return(0);
}
if (run_all) {
more:
name = the_cpu.ents[test_at].name;
printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
test_at++;
if (set_expression(name) == -1) {
if (test_at >= the_cpu.number) {
goto done;
} else
goto more;
}
}
process_file(filename);
if (verbose >= 2) {
for (i=0; i<ncnts; i++) {
printf("Counter:%s cpu:%d index:%d\n",
cnts[i].counter_name,
cnts[i].cpu, i);
for(j=0; j<cnts[i].pos; j++) {
printf(" val - %ld\n", (long int)cnts[i].vals[j]);
}
printf(" sum - %ld\n", (long int)cnts[i].sum);
}
}
if (expression == NULL) {
return(0);
}
if (max_to_collect > 1) {
for(i=0, cnt=0; i<MAX_CPU; i++) {
if (glob_cpu[i]) {
do_expression(glob_cpu[i], -1);
cnt++;
if (cnt == cpu_count_out) {
printf("\n");
break;
} else {
printf("\t");
}
}
}
}
if (run_all && (test_at < the_cpu.number)) {
memset(glob_cpu, 0, sizeof(glob_cpu));
ncnts = 0;
printf("*********************************\n");
goto more;
} else if (run_all) {
done:
printf("*********************************\n");
}
return(0);
}