rrs 28ddb599ff Update the manual page to be a bit more sane. It still
may need some work, but at least now it will read properly.
(gnn you are still on the hook to help out with this
like you committed).

Sponsored by:	Netflix.
2015-03-26 15:40:47 +00:00

2439 lines
67 KiB
C

/*-
* Copyright (c) 2014, 2015 Netflix Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer,
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <strings.h>
#include <sys/errno.h>
#include <signal.h>
#include <sys/wait.h>
#include <getopt.h>
#include "eval_expr.h"
__FBSDID("$FreeBSD$");
#define MAX_COUNTER_SLOTS 1024
#define MAX_NLEN 64
#define MAX_CPU 64
static int verbose = 0;
extern char **environ;
extern struct expression *master_exp;
struct expression *master_exp=NULL;
#define PMC_INITIAL_ALLOC 512
extern char **valid_pmcs;
char **valid_pmcs = NULL;
extern int valid_pmc_cnt;
int valid_pmc_cnt=0;
extern int pmc_allocated_cnt;
int pmc_allocated_cnt=0;
/*
* The following two varients on popen and pclose with
* the cavet that they get you the PID so that you
* can supply it to pclose so it can send a SIGTERM
* to the process.
*/
static FILE *
my_popen(const char *command, const char *dir, pid_t *p_pid)
{
FILE *io_out, *io_in;
int pdesin[2], pdesout[2];
char *argv[4];
pid_t pid;
char cmd[4];
char cmd2[1024];
char arg1[4];
if ((strcmp(dir, "r") != 0) &&
(strcmp(dir, "w") != 0)) {
errno = EINVAL;
return(NULL);
}
if (pipe(pdesin) < 0)
return (NULL);
if (pipe(pdesout) < 0) {
(void)close(pdesin[0]);
(void)close(pdesin[1]);
return (NULL);
}
strcpy(cmd, "sh");
strcpy(arg1, "-c");
strcpy(cmd2, command);
argv[0] = cmd;
argv[1] = arg1;
argv[2] = cmd2;
argv[3] = NULL;
switch (pid = fork()) {
case -1: /* Error. */
(void)close(pdesin[0]);
(void)close(pdesin[1]);
(void)close(pdesout[0]);
(void)close(pdesout[1]);
return (NULL);
/* NOTREACHED */
case 0: /* Child. */
/* Close out un-used sides */
(void)close(pdesin[1]);
(void)close(pdesout[0]);
/* Now prepare the stdin of the process */
close(0);
(void)dup(pdesin[0]);
(void)close(pdesin[0]);
/* Now prepare the stdout of the process */
close(1);
(void)dup(pdesout[1]);
/* And lets do stderr just in case */
close(2);
(void)dup(pdesout[1]);
(void)close(pdesout[1]);
/* Now run it */
execve("/bin/sh", argv, environ);
exit(127);
/* NOTREACHED */
}
/* Parent; assume fdopen can't fail. */
/* Store the pid */
*p_pid = pid;
if (strcmp(dir, "r") != 0) {
io_out = fdopen(pdesin[1], "w");
(void)close(pdesin[0]);
(void)close(pdesout[0]);
(void)close(pdesout[1]);
return(io_out);
} else {
/* Prepare the input stream */
io_in = fdopen(pdesout[0], "r");
(void)close(pdesout[1]);
(void)close(pdesin[0]);
(void)close(pdesin[1]);
return (io_in);
}
}
/*
* pclose --
* Pclose returns -1 if stream is not associated with a `popened' command,
* if already `pclosed', or waitpid returns an error.
*/
static void
my_pclose(FILE *io, pid_t the_pid)
{
int pstat;
pid_t pid;
/*
* Find the appropriate file pointer and remove it from the list.
*/
(void)fclose(io);
/* Die if you are not dead! */
kill(the_pid, SIGTERM);
do {
pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
} while (pid == -1 && errno == EINTR);
}
struct counters {
struct counters *next_cpu;
char counter_name[MAX_NLEN]; /* Name of counter */
int cpu; /* CPU we are on */
int pos; /* Index we are filling to. */
uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */
uint64_t sum; /* Summary of entries */
};
extern struct counters *glob_cpu[MAX_CPU];
struct counters *glob_cpu[MAX_CPU];
extern struct counters *cnts;
struct counters *cnts=NULL;
extern int ncnts;
int ncnts=0;
extern int (*expression)(struct counters *, int);
int (*expression)(struct counters *, int);
static const char *threshold=NULL;
static const char *command;
struct cpu_entry {
const char *name;
const char *thresh;
const char *command;
int (*func)(struct counters *, int);
};
struct cpu_type {
char cputype[32];
int number;
struct cpu_entry *ents;
void (*explain)(const char *name);
};
extern struct cpu_type the_cpu;
struct cpu_type the_cpu;
static void
explain_name_sb(const char *name)
{
const char *mythresh;
if (strcmp(name, "allocstall1") == 0) {
printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "allocstall2") == 0) {
printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "br_miss") == 0) {
printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "splitload") == 0) {
printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "splitstore") == 0) {
printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
mythresh = "thresh >= .01";
} else if (strcmp(name, "contested") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "blockstorefwd") == 0) {
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "cache2") == 0) {
printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "cache1") == 0) {
printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "dtlbmissload") == 0) {
printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "frontendstall") == 0) {
printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
mythresh = "thresh >= .15";
} else if (strcmp(name, "clears") == 0) {
printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
printf(" MACHINE_CLEARS.SMC + \n");
printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .02";
} else if (strcmp(name, "microassist") == 0) {
printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "aliasing_4k") == 0) {
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "fpassist") == 0) {
printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistavx") == 0) {
printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistsse") == 0) {
printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "eff1") == 0) {
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh < .9";
} else if (strcmp(name, "eff2") == 0) {
printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
mythresh = "thresh > 1.0";
} else if (strcmp(name, "dtlbmissstore") == 0) {
printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .05";
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
}
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
static void
explain_name_ib(const char *name)
{
const char *mythresh;
if (strcmp(name, "br_miss") == 0) {
printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "eff1") == 0) {
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh < .9";
} else if (strcmp(name, "eff2") == 0) {
printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
mythresh = "thresh > 1.0";
} else if (strcmp(name, "cache1") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "cache2") == 0) {
printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "itlbmiss") == 0) {
printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "icachemiss") == 0) {
printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "lcpstall") == 0) {
printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "datashare") == 0) {
printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "blockstorefwd") == 0) {
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "splitload") == 0) {
printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "splitstore") == 0) {
printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
mythresh = "thresh >= .01";
} else if (strcmp(name, "aliasing_4k") == 0) {
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "dtlbmissload") == 0) {
printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "dtlbmissstore") == 0) {
printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "contested") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "clears") == 0) {
printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
printf(" MACHINE_CLEARS.SMC + \n");
printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .02";
} else if (strcmp(name, "microassist") == 0) {
printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "fpassist") == 0) {
printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistavx") == 0) {
printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistsse") == 0) {
printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
}
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
static void
explain_name_has(const char *name)
{
const char *mythresh;
if (strcmp(name, "eff1") == 0) {
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh < .75";
} else if (strcmp(name, "eff2") == 0) {
printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
mythresh = "thresh > 1.0";
} else if (strcmp(name, "itlbmiss") == 0) {
printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "icachemiss") == 0) {
printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "lcpstall") == 0) {
printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "cache1") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "cache2") == 0) {
printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "contested") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "datashare") == 0) {
printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
} else if (strcmp(name, "blockstorefwd") == 0) {
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "splitload") == 0) {
printf("Examine (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "splitstore") == 0) {
printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
mythresh = "thresh >= .01";
} else if (strcmp(name, "aliasing_4k") == 0) {
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "dtlbmissload") == 0) {
printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "br_miss") == 0) {
printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "clears") == 0) {
printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
printf(" MACHINE_CLEARS.SMC + \n");
printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .02";
} else if (strcmp(name, "microassist") == 0) {
printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "fpassist") == 0) {
printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistavx") == 0) {
printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else if (strcmp(name, "otherassistsse") == 0) {
printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "look for a excessive value";
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
}
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
static struct counters *
find_counter(struct counters *base, const char *name)
{
struct counters *at;
int len;
at = base;
len = strlen(name);
while(at) {
if (strncmp(at->counter_name, name, len) == 0) {
return(at);
}
at = at->next_cpu;
}
printf("Can't find counter %s\n", name);
printf("We have:\n");
at = base;
while(at) {
printf("- %s\n", at->counter_name);
at = at->next_cpu;
}
exit(-1);
}
static int
allocstall1(struct counters *cpu, int pos)
{
/* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
int ret;
struct counters *partial;
struct counters *unhalt;
double un, par, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
if (pos != -1) {
par = partial->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
par = partial->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = par/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
allocstall2(struct counters *cpu, int pos)
{
/* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
int ret;
struct counters *partial;
struct counters *unhalt;
double un, par, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
if (pos != -1) {
par = partial->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
par = partial->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = par/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
br_mispredict(struct counters *cpu, int pos)
{
struct counters *brctr;
struct counters *unhalt;
int ret;
/* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
double br, un, con, res;
con = 20.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
if (pos != -1) {
br = brctr->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
br = brctr->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (con * br)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
br_mispredictib(struct counters *cpu, int pos)
{
struct counters *brctr;
struct counters *unhalt;
struct counters *clear, *clear2, *clear3;
struct counters *uops;
struct counters *recv;
struct counters *iss;
/* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
int ret;
/*
* (BR_MISP_RETIRED.ALL_BRANCHES /
* (BR_MISP_RETIRED.ALL_BRANCHES +
* MACHINE_CLEAR.COUNT) *
* ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
*
*/
double br, cl, cl2, cl3, uo, re, un, con, res, is;
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
iss = find_counter(cpu, "UOPS_ISSUED.ANY");
recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
if (pos != -1) {
br = brctr->vals[pos] * 1.0;
cl = clear->vals[pos] * 1.0;
cl2 = clear2->vals[pos] * 1.0;
cl3 = clear3->vals[pos] * 1.0;
uo = uops->vals[pos] * 1.0;
re = recv->vals[pos] * 1.0;
is = iss->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
br = brctr->sum * 1.0;
cl = clear->sum * 1.0;
cl2 = clear2->sum * 1.0;
cl3 = clear3->sum * 1.0;
uo = uops->sum * 1.0;
re = recv->sum * 1.0;
is = iss->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
ret = printf("%1.3f", res);
return(ret);
}
static int
splitloadib(struct counters *cpu, int pos)
{
int ret;
struct counters *mem;
struct counters *l1d, *ldblock;
struct counters *unhalt;
double un, memd, res, l1, ldb;
/*
* ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
*/
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
l1 = l1d->vals[pos] * 1.0;
ldb = ldblock->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
memd = mem->sum * 1.0;
l1 = l1d->sum * 1.0;
ldb = ldblock->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((l1 / memd) * ldb)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
splitload(struct counters *cpu, int pos)
{
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, memd, res;
/* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
con = 5.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
memd = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (memd * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
splitstore(struct counters *cpu, int pos)
{
/* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
int ret;
struct counters *mem_split;
struct counters *mem_stores;
double memsplit, memstore, res;
mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
if (pos != -1) {
memsplit = mem_split->vals[pos] * 1.0;
memstore = mem_stores->vals[pos] * 1.0;
} else {
memsplit = mem_split->sum * 1.0;
memstore = mem_stores->sum * 1.0;
}
res = memsplit/memstore;
ret = printf("%1.3f", res);
return(ret);
}
static int
contested(struct counters *cpu, int pos)
{
/* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, memd, res;
con = 60.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
memd = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (memd * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
contested_has(struct counters *cpu, int pos)
{
/* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, memd, res;
con = 84.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
memd = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (memd * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
blockstoreforward(struct counters *cpu, int pos)
{
/* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
int ret;
struct counters *ldb;
struct counters *unhalt;
double con, un, ld, res;
con = 13.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
if (pos != -1) {
ld = ldb->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ld = ldb->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (ld * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache2(struct counters *cpu, int pos)
{
/* ** Suspect ***
* 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
* (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem1, *mem2, *mem3;
struct counters *unhalt;
double con1, con2, con3, un, me_1, me_2, me_3, res;
con1 = 26.0;
con2 = 43.0;
con3 = 60.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
if (pos != -1) {
me_1 = mem1->vals[pos] * 1.0;
me_2 = mem2->vals[pos] * 1.0;
me_3 = mem3->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me_1 = mem1->sum * 1.0;
me_2 = mem2->sum * 1.0;
me_3 = mem3->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
datasharing(struct counters *cpu, int pos)
{
/*
* (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
struct counters *unhalt;
double con, res, me, un;
con = 43.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (me * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
datasharing_has(struct counters *cpu, int pos)
{
/*
* (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
struct counters *unhalt;
double con, res, me, un;
con = 72.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (me * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache2ib(struct counters *cpu, int pos)
{
/*
* (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, me, res;
con = 29.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (con * me)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache2has(struct counters *cpu, int pos)
{
/*
* Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
* (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
* (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
* / CPU_CLK_UNHALTED.THREAD_P
*/
int ret;
struct counters *mem1, *mem2, *mem3;
struct counters *unhalt;
double con1, con2, con3, un, me1, me2, me3, res;
con1 = 36.0;
con2 = 72.0;
con3 = 84.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
if (pos != -1) {
me1 = mem1->vals[pos] * 1.0;
me2 = mem2->vals[pos] * 1.0;
me3 = mem3->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me1 = mem1->sum * 1.0;
me2 = mem2->sum * 1.0;
me3 = mem3->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache1(struct counters *cpu, int pos)
{
/* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, me, res;
con = 180.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (me * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
cache1ib(struct counters *cpu, int pos)
{
/* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
int ret;
struct counters *mem;
struct counters *unhalt;
double con, un, me, res;
con = 180.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
if (pos != -1) {
me = mem->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (me * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
dtlb_missload(struct counters *cpu, int pos)
{
/* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
int ret;
struct counters *dtlb_m, *dtlb_d;
struct counters *unhalt;
double con, un, d1, d2, res;
con = 7.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
if (pos != -1) {
d1 = dtlb_m->vals[pos] * 1.0;
d2 = dtlb_d->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
d1 = dtlb_m->sum * 1.0;
d2 = dtlb_d->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((d1 * con) + d2)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
dtlb_missstore(struct counters *cpu, int pos)
{
/*
* ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
* CPU_CLK_UNHALTED.THREAD_P (t >= .1)
*/
int ret;
struct counters *dtsb_m, *dtsb_d;
struct counters *unhalt;
double con, un, d1, d2, res;
con = 7.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
if (pos != -1) {
d1 = dtsb_m->vals[pos] * 1.0;
d2 = dtsb_d->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
d1 = dtsb_m->sum * 1.0;
d2 = dtsb_d->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((d1 * con) + d2)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
itlb_miss(struct counters *cpu, int pos)
{
/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */
int ret;
struct counters *itlb;
struct counters *unhalt;
double un, d1, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
if (pos != -1) {
d1 = itlb->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
d1 = itlb->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = d1/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
icache_miss(struct counters *cpu, int pos)
{
/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
int ret;
struct counters *itlb, *icache;
struct counters *unhalt;
double un, d1, ic, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
if (pos != -1) {
d1 = itlb->vals[pos] * 1.0;
ic = icache->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
d1 = itlb->sum * 1.0;
ic = icache->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (ic-d1)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
icache_miss_has(struct counters *cpu, int pos)
{
/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
int ret;
struct counters *icache;
struct counters *unhalt;
double un, con, ic, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
icache = find_counter(cpu, "ICACHE.MISSES");
con = 36.0;
if (pos != -1) {
ic = icache->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ic = icache->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (con * ic)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
lcp_stall(struct counters *cpu, int pos)
{
/* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
int ret;
struct counters *ild;
struct counters *unhalt;
double un, d1, res;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
ild = find_counter(cpu, "ILD_STALL.LCP");
if (pos != -1) {
d1 = ild->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
d1 = ild->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = d1/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
frontendstall(struct counters *cpu, int pos)
{
/* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
int ret;
struct counters *idq;
struct counters *unhalt;
double con, un, id, res;
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
if (pos != -1) {
id = idq->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
id = idq->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = id/(un * con);
ret = printf("%1.3f", res);
return(ret);
}
static int
clears(struct counters *cpu, int pos)
{
/* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
* / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/
int ret;
struct counters *clr1, *clr2, *clr3;
struct counters *unhalt;
double con, un, cl1, cl2, cl3, res;
con = 100.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
if (pos != -1) {
cl1 = clr1->vals[pos] * 1.0;
cl2 = clr2->vals[pos] * 1.0;
cl3 = clr3->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
cl1 = clr1->sum * 1.0;
cl2 = clr2->sum * 1.0;
cl3 = clr3->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ((cl1 + cl2 + cl3) * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
microassist(struct counters *cpu, int pos)
{
/* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
int ret;
struct counters *idq;
struct counters *unhalt;
double un, id, res, con;
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
idq = find_counter(cpu, "IDQ.MS_UOPS");
if (pos != -1) {
id = idq->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
id = idq->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = id/(un * con);
ret = printf("%1.3f", res);
return(ret);
}
static int
aliasing(struct counters *cpu, int pos)
{
/* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
int ret;
struct counters *ld;
struct counters *unhalt;
double un, lds, con, res;
con = 5.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
if (pos != -1) {
lds = ld->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
lds = ld->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (lds * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
fpassists(struct counters *cpu, int pos)
{
/* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
int ret;
struct counters *fp;
struct counters *inst;
double un, fpd, res;
inst = find_counter(cpu, "INST_RETIRED.ANY_P");
fp = find_counter(cpu, "FP_ASSIST.ANY");
if (pos != -1) {
fpd = fp->vals[pos] * 1.0;
un = inst->vals[pos] * 1.0;
} else {
fpd = fp->sum * 1.0;
un = inst->sum * 1.0;
}
res = fpd/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
otherassistavx(struct counters *cpu, int pos)
{
/* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
int ret;
struct counters *oth;
struct counters *unhalt;
double un, ot, con, res;
con = 75.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
if (pos != -1) {
ot = oth->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ot = oth->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (ot * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
otherassistsse(struct counters *cpu, int pos)
{
int ret;
struct counters *oth;
struct counters *unhalt;
double un, ot, con, res;
/* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
con = 75.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
if (pos != -1) {
ot = oth->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ot = oth->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = (ot * con)/un;
ret = printf("%1.3f", res);
return(ret);
}
static int
efficiency1(struct counters *cpu, int pos)
{
int ret;
struct counters *uops;
struct counters *unhalt;
double un, ot, con, res;
/* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
if (pos != -1) {
ot = uops->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ot = uops->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = ot/(con * un);
ret = printf("%1.3f", res);
return(ret);
}
static int
efficiency2(struct counters *cpu, int pos)
{
int ret;
struct counters *uops;
struct counters *unhalt;
double un, ot, res;
/* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
uops = find_counter(cpu, "INST_RETIRED.ANY_P");
if (pos != -1) {
ot = uops->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
} else {
ot = uops->sum * 1.0;
un = unhalt->sum * 1.0;
}
res = un/ot;
ret = printf("%1.3f", res);
return(ret);
}
#define SANDY_BRIDGE_COUNT 20
static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
/*01*/ { "allocstall1", "thresh > .05",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
allocstall1 },
/*02*/ { "allocstall2", "thresh > .05",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1",
allocstall2 },
/*03*/ { "br_miss", "thresh >= .2",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
br_mispredict },
/*04*/ { "splitload", "thresh >= .1",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
splitload },
/*05*/ { "splitstore", "thresh >= .01",
"pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
splitstore },
/*06*/ { "contested", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
contested },
/*07*/ { "blockstorefwd", "thresh >= .05",
"pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
blockstoreforward },
/*08*/ { "cache2", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache2 },
/*09*/ { "cache1", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache1 },
/*10*/ { "dtlbmissload", "thresh >= .1",
"pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missload },
/*11*/ { "dtlbmissstore", "thresh >= .05",
"pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missstore },
/*12*/ { "frontendstall", "thresh >= .15",
"pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
frontendstall },
/*13*/ { "clears", "thresh >= .02",
"pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
clears },
/*14*/ { "microassist", "thresh >= .05",
"pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
microassist },
/*15*/ { "aliasing_4k", "thresh >= .1",
"pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
aliasing },
/*16*/ { "fpassist", "look for a excessive value",
"pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
fpassists },
/*17*/ { "otherassistavx", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistavx },
/*18*/ { "otherassistsse", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistsse },
/*19*/ { "eff1", "thresh < .9",
"pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency1 },
/*20*/ { "eff2", "thresh > 1.0",
"pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency2 },
};
#define IVY_BRIDGE_COUNT 21
static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
/*1*/ { "eff1", "thresh < .75",
"pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency1 },
/*2*/ { "eff2", "thresh > 1.0",
"pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency2 },
/*3*/ { "itlbmiss", "thresh > .05",
"pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
itlb_miss },
/*4*/ { "icachemiss", "thresh > .05",
"pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
icache_miss },
/*5*/ { "lcpstall", "thresh > .05",
"pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
lcp_stall },
/*6*/ { "cache1", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache1ib },
/*7*/ { "cache2", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache2ib },
/*8*/ { "contested", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
contested },
/*9*/ { "datashare", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
datasharing },
/*10*/ { "blockstorefwd", "thresh >= .05",
"pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
blockstoreforward },
/*11*/ { "splitload", "thresh >= .1",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
splitloadib },
/*12*/ { "splitstore", "thresh >= .01",
"pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
splitstore },
/*13*/ { "aliasing_4k", "thresh >= .1",
"pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
aliasing },
/*14*/ { "dtlbmissload", "thresh >= .1",
"pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missload },
/*15*/ { "dtlbmissstore", "thresh >= .05",
"pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missstore },
/*16*/ { "br_miss", "thresh >= .2",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
br_mispredictib },
/*17*/ { "clears", "thresh >= .02",
"pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
clears },
/*18*/ { "microassist", "thresh >= .05",
"pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
microassist },
/*19*/ { "fpassist", "look for a excessive value",
"pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
fpassists },
/*20*/ { "otherassistavx", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistavx },
/*21*/ { "otherassistsse", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistsse },
};
#define HASWELL_COUNT 20
static struct cpu_entry haswell[HASWELL_COUNT] = {
/*1*/ { "eff1", "thresh < .75",
"pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency1 },
/*2*/ { "eff2", "thresh > 1.0",
"pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
efficiency2 },
/*3*/ { "itlbmiss", "thresh > .05",
"pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
itlb_miss },
/*4*/ { "icachemiss", "thresh > .05",
"pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
icache_miss_has },
/*5*/ { "lcpstall", "thresh > .05",
"pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
lcp_stall },
/*6*/ { "cache1", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache1ib },
/*7*/ { "cache2", "thresh >= .2",
"pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
cache2has },
/*8*/ { "contested", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
contested_has },
/*9*/ { "datashare", "thresh >= .05",
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
datasharing_has },
/*10*/ { "blockstorefwd", "thresh >= .05",
"pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
blockstoreforward },
/*11*/ { "splitload", "thresh >= .1",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
splitload },
/*12*/ { "splitstore", "thresh >= .01",
"pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
splitstore },
/*13*/ { "aliasing_4k", "thresh >= .1",
"pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
aliasing },
/*14*/ { "dtlbmissload", "thresh >= .1",
"pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
dtlb_missload },
/*15*/ { "br_miss", "thresh >= .2",
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
br_mispredict },
/*16*/ { "clears", "thresh >= .02",
"pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
clears },
/*17*/ { "microassist", "thresh >= .05",
"pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
microassist },
/*18*/ { "fpassist", "look for a excessive value",
"pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
fpassists },
/*19*/ { "otherassistavx", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistavx },
/*20*/ { "otherassistsse", "look for a excessive value",
"pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
otherassistsse },
};
static void
set_sandybridge(void)
{
strcpy(the_cpu.cputype, "SandyBridge PMC");
the_cpu.number = SANDY_BRIDGE_COUNT;
the_cpu.ents = sandy_bridge;
the_cpu.explain = explain_name_sb;
}
static void
set_ivybridge(void)
{
strcpy(the_cpu.cputype, "IvyBridge PMC");
the_cpu.number = IVY_BRIDGE_COUNT;
the_cpu.ents = ivy_bridge;
the_cpu.explain = explain_name_ib;
}
static void
set_haswell(void)
{
strcpy(the_cpu.cputype, "HASWELL PMC");
the_cpu.number = HASWELL_COUNT;
the_cpu.ents = haswell;
the_cpu.explain = explain_name_has;
}
static void
set_expression(char *name)
{
int found = 0, i;
for(i=0 ; i< the_cpu.number; i++) {
if (strcmp(name, the_cpu.ents[i].name) == 0) {
found = 1;
expression = the_cpu.ents[i].func;
command = the_cpu.ents[i].command;
threshold = the_cpu.ents[i].thresh;
break;
}
}
if (!found) {
printf("For CPU type %s we have no expression:%s\n",
the_cpu.cputype, name);
exit(-1);
}
}
static int
validate_expression(char *name)
{
int i, found;
found = 0;
for(i=0 ; i< the_cpu.number; i++) {
if (strcmp(name, the_cpu.ents[i].name) == 0) {
found = 1;
break;
}
}
if (!found) {
return(-1);
}
return (0);
}
static void
do_expression(struct counters *cpu, int pos)
{
if (expression == NULL)
return;
(*expression)(cpu, pos);
}
static void
process_header(int idx, char *p)
{
struct counters *up;
int i, len, nlen;
/*
* Given header element idx, at p in
* form 's/NN/nameof'
* process the entry to pull out the name and
* the CPU number.
*/
if (strncmp(p, "s/", 2)) {
printf("Check -- invalid header no s/ in %s\n",
p);
return;
}
up = &cnts[idx];
up->cpu = strtol(&p[2], NULL, 10);
len = strlen(p);
for (i=2; i<len; i++) {
if (p[i] == '/') {
nlen = strlen(&p[(i+1)]);
if (nlen < (MAX_NLEN-1)) {
strcpy(up->counter_name, &p[(i+1)]);
} else {
strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
}
}
}
}
static void
build_counters_from_header(FILE *io)
{
char buffer[8192], *p;
int i, len, cnt;
size_t mlen;
/* We have a new start, lets
* setup our headers and cpus.
*/
if (fgets(buffer, sizeof(buffer), io) == NULL) {
printf("First line can't be read from file err:%d\n", errno);
return;
}
/*
* Ok output is an array of counters. Once
* we start to read the values in we must
* put them in there slot to match there CPU and
* counter being updated. We create a mass array
* of the counters, filling in the CPU and
* counter name.
*/
/* How many do we get? */
len = strlen(buffer);
for (i=0, cnt=0; i<len; i++) {
if (strncmp(&buffer[i], "s/", 2) == 0) {
cnt++;
for(;i<len;i++) {
if (buffer[i] == ' ')
break;
}
}
}
mlen = sizeof(struct counters) * cnt;
cnts = malloc(mlen);
ncnts = cnt;
if (cnts == NULL) {
printf("No memory err:%d\n", errno);
return;
}
memset(cnts, 0, mlen);
for (i=0, cnt=0; i<len; i++) {
if (strncmp(&buffer[i], "s/", 2) == 0) {
p = &buffer[i];
for(;i<len;i++) {
if (buffer[i] == ' ') {
buffer[i] = 0;
break;
}
}
process_header(cnt, p);
cnt++;
}
}
if (verbose)
printf("We have %d entries\n", cnt);
}
extern int max_to_collect;
int max_to_collect = MAX_COUNTER_SLOTS;
static int
read_a_line(FILE *io)
{
char buffer[8192], *p, *stop;
int pos, i;
if (fgets(buffer, sizeof(buffer), io) == NULL) {
return(0);
}
p = buffer;
for (i=0; i<ncnts; i++) {
pos = cnts[i].pos;
cnts[i].vals[pos] = strtol(p, &stop, 0);
cnts[i].pos++;
cnts[i].sum += cnts[i].vals[pos];
p = stop;
}
return (1);
}
extern int cpu_count_out;
int cpu_count_out=0;
static void
print_header(void)
{
int i, cnt, printed_cnt;
printf("*********************************\n");
for(i=0, cnt=0; i<MAX_CPU; i++) {
if (glob_cpu[i]) {
cnt++;
}
}
cpu_count_out = cnt;
for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
if (glob_cpu[i]) {
printf("CPU%d", i);
printed_cnt++;
}
if (printed_cnt == cnt) {
printf("\n");
break;
} else {
printf("\t");
}
}
}
static void
lace_cpus_together(void)
{
int i, j, lace_cpu;
struct counters *cpat, *at;
for(i=0; i<ncnts; i++) {
cpat = &cnts[i];
if (cpat->next_cpu) {
/* Already laced in */
continue;
}
lace_cpu = cpat->cpu;
if (lace_cpu >= MAX_CPU) {
printf("CPU %d to big\n", lace_cpu);
continue;
}
if (glob_cpu[lace_cpu] == NULL) {
glob_cpu[lace_cpu] = cpat;
} else {
/* Already processed this cpu */
continue;
}
/* Ok look forward for cpu->cpu and link in */
for(j=(i+1); j<ncnts; j++) {
at = &cnts[j];
if (at->next_cpu) {
continue;
}
if (at->cpu == lace_cpu) {
/* Found one */
cpat->next_cpu = at;
cpat = at;
}
}
}
}
static void
process_file(char *filename)
{
FILE *io;
int i;
int line_at, not_done;
pid_t pid_of_command=0;
if (filename == NULL) {
io = my_popen(command, "r", &pid_of_command);
if (io == NULL) {
printf("Can't popen the command %s\n", command);
return;
}
} else {
io = fopen(filename, "r");
if (io == NULL) {
printf("Can't process file %s err:%d\n",
filename, errno);
return;
}
}
build_counters_from_header(io);
if (cnts == NULL) {
/* Nothing we can do */
printf("Nothing to do -- no counters built\n");
if (filename) {
fclose(io);
} else {
my_pclose(io, pid_of_command);
}
return;
}
lace_cpus_together();
print_header();
if (verbose) {
for (i=0; i<ncnts; i++) {
printf("Counter:%s cpu:%d index:%d\n",
cnts[i].counter_name,
cnts[i].cpu, i);
}
}
line_at = 0;
not_done = 1;
while(not_done) {
if (read_a_line(io)) {
line_at++;
} else {
break;
}
if (line_at >= max_to_collect) {
not_done = 0;
}
if (filename == NULL) {
int cnt;
/* For the ones we dynamically open we print now */
for(i=0, cnt=0; i<MAX_CPU; i++) {
do_expression(glob_cpu[i], (line_at-1));
cnt++;
if (cnt == cpu_count_out) {
printf("\n");
break;
} else {
printf("\t");
}
}
}
}
if (filename) {
fclose(io);
} else {
my_pclose(io, pid_of_command);
}
}
#if defined(__amd64__)
#define cpuid(in,a,b,c,d)\
asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
#else
#define cpuid(in, a, b, c, d)
#endif
static void
get_cpuid_set(void)
{
unsigned long eax, ebx, ecx, edx;
int model;
pid_t pid_of_command=0;
size_t sz, len;
FILE *io;
char linebuf[1024], *str;
eax = ebx = ecx = edx = 0;
cpuid(0, eax, ebx, ecx, edx);
if (ebx == 0x68747541) {
printf("AMD processors are not supported by this program\n");
printf("Sorry\n");
exit(0);
} else if (ebx == 0x6972794) {
printf("Cyrix processors are not supported by this program\n");
printf("Sorry\n");
exit(0);
} else if (ebx == 0x756e6547) {
printf("Genuine Intel\n");
} else {
printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
exit(0);
}
cpuid(1, eax, ebx, ecx, edx);
model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
printf("CPU model is 0x%x id:0x%lx\n", model, eax);
switch (eax & 0xF00) {
case 0x500: /* Pentium family processors */
printf("Intel Pentium P5\n");
goto not_supported;
break;
case 0x600: /* Pentium Pro, Celeron, Pentium II & III */
switch (model) {
case 0x1:
printf("Intel Pentium P6\n");
goto not_supported;
break;
case 0x3:
case 0x5:
printf("Intel PII\n");
goto not_supported;
break;
case 0x6: case 0x16:
printf("Intel CL\n");
goto not_supported;
break;
case 0x7: case 0x8: case 0xA: case 0xB:
printf("Intel PIII\n");
goto not_supported;
break;
case 0x9: case 0xD:
printf("Intel PM\n");
goto not_supported;
break;
case 0xE:
printf("Intel CORE\n");
goto not_supported;
break;
case 0xF:
printf("Intel CORE2\n");
goto not_supported;
break;
case 0x17:
printf("Intel CORE2EXTREME\n");
goto not_supported;
break;
case 0x1C: /* Per Intel document 320047-002. */
printf("Intel ATOM\n");
goto not_supported;
break;
case 0x1A:
case 0x1E: /*
* Per Intel document 253669-032 9/2009,
* pages A-2 and A-57
*/
case 0x1F: /*
* Per Intel document 253669-032 9/2009,
* pages A-2 and A-57
*/
printf("Intel COREI7\n");
goto not_supported;
break;
case 0x2E:
printf("Intel NEHALEM\n");
goto not_supported;
break;
case 0x25: /* Per Intel document 253669-033US 12/2009. */
case 0x2C: /* Per Intel document 253669-033US 12/2009. */
printf("Intel WESTMERE\n");
goto not_supported;
break;
case 0x2F: /* Westmere-EX, seen in wild */
printf("Intel WESTMERE\n");
goto not_supported;
break;
case 0x2A: /* Per Intel document 253669-039US 05/2011. */
printf("Intel SANDYBRIDGE\n");
set_sandybridge();
break;
case 0x2D: /* Per Intel document 253669-044US 08/2012. */
printf("Intel SANDYBRIDGE_XEON\n");
set_sandybridge();
break;
case 0x3A: /* Per Intel document 253669-043US 05/2012. */
printf("Intel IVYBRIDGE\n");
set_ivybridge();
break;
case 0x3E: /* Per Intel document 325462-045US 01/2013. */
printf("Intel IVYBRIDGE_XEON\n");
set_ivybridge();
break;
case 0x3F: /* Per Intel document 325462-045US 09/2014. */
printf("Intel HASWELL (Xeon)\n");
set_haswell();
break;
case 0x3C: /* Per Intel document 325462-045US 01/2013. */
case 0x45:
case 0x46:
printf("Intel HASWELL\n");
set_haswell();
break;
case 0x4D:
/* Per Intel document 330061-001 01/2014. */
printf("Intel ATOM_SILVERMONT\n");
goto not_supported;
break;
default:
printf("Intel model 0x%x is not known -- sorry\n",
model);
goto not_supported;
break;
}
break;
case 0xF00: /* P4 */
printf("Intel unknown model %d\n", model);
goto not_supported;
break;
}
/* Ok lets load the list of all known PMC's */
io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
if (valid_pmcs == NULL) {
/* Likely */
pmc_allocated_cnt = PMC_INITIAL_ALLOC;
sz = sizeof(char *) * pmc_allocated_cnt;
valid_pmcs = malloc(sz);
if (valid_pmcs == NULL) {
printf("No memory allocation fails at startup?\n");
exit(-1);
}
memset(valid_pmcs, 0, sz);
}
while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
if (linebuf[0] != '\t') {
/* sometimes headers ;-) */
continue;
}
len = strlen(linebuf);
if (linebuf[(len-1)] == '\n') {
/* Likely */
linebuf[(len-1)] = 0;
}
str = &linebuf[1];
len = strlen(str) + 1;
valid_pmcs[valid_pmc_cnt] = malloc(len);
if (valid_pmcs[valid_pmc_cnt] == NULL) {
printf("No memory2 allocation fails at startup?\n");
exit(-1);
}
memset(valid_pmcs[valid_pmc_cnt], 0, len);
strcpy(valid_pmcs[valid_pmc_cnt], str);
valid_pmc_cnt++;
if (valid_pmc_cnt >= pmc_allocated_cnt) {
/* Got to expand -- unlikely */
char **more;
sz = sizeof(char *) * (pmc_allocated_cnt * 2);
more = malloc(sz);
if (more == NULL) {
printf("No memory3 allocation fails at startup?\n");
exit(-1);
}
memset(more, 0, sz);
memcpy(more, valid_pmcs, sz);
pmc_allocated_cnt *= 2;
free(valid_pmcs);
valid_pmcs = more;
}
}
my_pclose(io, pid_of_command);
return;
not_supported:
printf("Not supported\n");
exit(-1);
}
static void
explain_all(void)
{
int i;
printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
printf("-------------------------------------------------------------\n");
for(i=0; i<the_cpu.number; i++){
printf("For -e %s ", the_cpu.ents[i].name);
(*the_cpu.explain)(the_cpu.ents[i].name);
printf("----------------------------\n");
}
}
static void
test_for_a_pmc(const char *pmc, int out_so_far)
{
FILE *io;
pid_t pid_of_command=0;
char my_command[1024];
char line[1024];
char resp[1024];
int len, llen, i;
if (out_so_far < 50) {
len = 50 - out_so_far;
for(i=0; i<len; i++) {
printf(" ");
}
}
sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
io = my_popen(my_command, "r", &pid_of_command);
if (io == NULL) {
printf("Failed -- popen fails\n");
return;
}
/* Setup what we expect */
len = sprintf(resp, "%s", pmc);
if (fgets(line, sizeof(line), io) == NULL) {
printf("Failed -- no output from pmstat\n");
goto out;
}
llen = strlen(line);
if (line[(llen-1)] == '\n') {
line[(llen-1)] = 0;
llen--;
}
for(i=2; i<(llen-len); i++) {
if (strncmp(&line[i], "ERROR", 5) == 0) {
printf("Failed %s\n", line);
goto out;
} else if (strncmp(&line[i], resp, len) == 0) {
int j, k;
if (fgets(line, sizeof(line), io) == NULL) {
printf("Failed -- no second output from pmstat\n");
goto out;
}
len = strlen(line);
for (j=0; j<len; j++) {
if (line[j] == ' ') {
j++;
} else {
break;
}
}
printf("Pass");
len = strlen(&line[j]);
if (len < 20) {
for(k=0; k<(20-len); k++) {
printf(" ");
}
}
if (len) {
printf("%s", &line[j]);
} else {
printf("\n");
}
goto out;
}
}
printf("Failed -- '%s' not '%s'\n", line, resp);
out:
my_pclose(io, pid_of_command);
}
static int
add_it_to(char **vars, int cur_cnt, char *name)
{
int i;
size_t len;
for(i=0; i<cur_cnt; i++) {
if (strcmp(vars[i], name) == 0) {
/* Already have */
return(0);
}
}
if (vars[cur_cnt] != NULL) {
printf("Cur_cnt:%d filled with %s??\n",
cur_cnt, vars[cur_cnt]);
exit(-1);
}
/* Ok its new */
len = strlen(name) + 1;
vars[cur_cnt] = malloc(len);
if (vars[cur_cnt] == NULL) {
printf("No memory %s\n", __FUNCTION__);
exit(-1);
}
memset(vars[cur_cnt], 0, len);
strcpy(vars[cur_cnt], name);
return(1);
}
static char *
build_command_for_exp(struct expression *exp)
{
/*
* Build the pmcstat command to handle
* the passed in expression.
* /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
* where NNN and QQQ represent the PMC's in the expression
* uniquely..
*/
char forming[1024];
int cnt_pmc, alloced_pmcs, i;
struct expression *at;
char **vars, *cmd;
size_t mal;
alloced_pmcs = cnt_pmc = 0;
/* first how many do we have */
at = exp;
while (at) {
if (at->type == TYPE_VALUE_PMC) {
cnt_pmc++;
}
at = at->next;
}
if (cnt_pmc == 0) {
printf("No PMC's in your expression -- nothing to do!!\n");
exit(0);
}
mal = cnt_pmc * sizeof(char *);
vars = malloc(mal);
if (vars == NULL) {
printf("No memory\n");
exit(-1);
}
memset(vars, 0, mal);
at = exp;
while (at) {
if (at->type == TYPE_VALUE_PMC) {
if(add_it_to(vars, alloced_pmcs, at->name)) {
alloced_pmcs++;
}
}
at = at->next;
}
/* Now we have a unique list in vars so create our command */
mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */
for(i=0; i<alloced_pmcs; i++) {
mal += strlen(vars[i]) + 4; /* var + " -s " */
}
cmd = malloc((mal+2));
if (cmd == NULL) {
printf("%s out of mem\n", __FUNCTION__);
exit(-1);
}
memset(cmd, 0, (mal+2));
strcpy(cmd, "/usr/sbin/pmcstat -w 1");
at = exp;
for(i=0; i<alloced_pmcs; i++) {
sprintf(forming, " -s %s", vars[i]);
strcat(cmd, forming);
free(vars[i]);
vars[i] = NULL;
}
free(vars);
return(cmd);
}
static int
user_expr(struct counters *cpu, int pos)
{
int ret;
double res;
struct counters *var;
struct expression *at;
at = master_exp;
while (at) {
if (at->type == TYPE_VALUE_PMC) {
var = find_counter(cpu, at->name);
if (var == NULL) {
printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
exit(-1);
}
if (pos != -1) {
at->value = var->vals[pos] * 1.0;
} else {
at->value = var->sum * 1.0;
}
}
at = at->next;
}
res = run_expr(master_exp, 1, NULL);
ret = printf("%1.3f", res);
return(ret);
}
static void
set_manual_exp(struct expression *exp)
{
expression = user_expr;
command = build_command_for_exp(exp);
threshold = "User defined threshold";
}
static void
run_tests(void)
{
int i, lenout;
printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
printf("------------------------------------------------------------------------\n");
for(i=0; i<valid_pmc_cnt; i++) {
lenout = printf("%s", valid_pmcs[i]);
fflush(stdout);
test_for_a_pmc(valid_pmcs[i], lenout);
}
}
static void
list_all(void)
{
int i, cnt, j;
printf("PMC Abbreviation\n");
printf("--------------------------------------------------------------\n");
for(i=0; i<valid_pmc_cnt; i++) {
cnt = printf("%s", valid_pmcs[i]);
for(j=cnt; j<52; j++) {
printf(" ");
}
printf("%%%d\n", i);
}
}
int
main(int argc, char **argv)
{
int i, j, cnt;
char *filename=NULL;
char *name=NULL;
int help_only = 0;
int test_mode = 0;
get_cpuid_set();
memset(glob_cpu, 0, sizeof(glob_cpu));
while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) {
switch (i) {
case 'L':
list_all();
return(0);
case 'H':
printf("**********************************\n");
explain_all();
printf("**********************************\n");
return(0);
break;
case 'T':
test_mode = 1;
break;
case 'E':
master_exp = parse_expression(optarg);
if (master_exp) {
set_manual_exp(master_exp);
}
break;
case 'e':
if (validate_expression(optarg)) {
printf("Unknown expression %s\n", optarg);
return(0);
}
name = optarg;
set_expression(optarg);
break;
case 'm':
max_to_collect = strtol(optarg, NULL, 0);
if (max_to_collect > MAX_COUNTER_SLOTS) {
/* You can't collect more than max in array */
max_to_collect = MAX_COUNTER_SLOTS;
}
break;
case 'v':
verbose++;
break;
case 'h':
help_only = 1;
break;
case 'i':
filename = optarg;
break;
case '?':
default:
use:
printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
argv[0]);
printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
printf("-v -- verbose dump debug type things -- you don't want this\n");
printf("-m N -- maximum to collect is N measurments\n");
printf("-e expr-name -- Do expression expr-name\n");
printf("-E 'your expression' -- Do your expression\n");
printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
printf("-H -- Don't run anything, just explain all canned expressions\n");
printf("-T -- Test all PMC's defined by this processor\n");
return(0);
break;
};
}
if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) {
printf("Without setting an expression we cannot dynamically gather information\n");
printf("you must supply a filename (and you probably want verbosity)\n");
goto use;
}
if (test_mode) {
run_tests();
return(0);
}
printf("*********************************\n");
if (master_exp == NULL) {
(*the_cpu.explain)(name);
} else {
printf("Examine your expression ");
print_exp(master_exp);
printf("User defined threshold\n");
}
if (help_only) {
return(0);
}
process_file(filename);
if (verbose >= 2) {
for (i=0; i<ncnts; i++) {
printf("Counter:%s cpu:%d index:%d\n",
cnts[i].counter_name,
cnts[i].cpu, i);
for(j=0; j<cnts[i].pos; j++) {
printf(" val - %ld\n", (long int)cnts[i].vals[j]);
}
printf(" sum - %ld\n", (long int)cnts[i].sum);
}
}
if (expression == NULL) {
return(0);
}
for(i=0, cnt=0; i<MAX_CPU; i++) {
if (glob_cpu[i]) {
do_expression(glob_cpu[i], -1);
cnt++;
if (cnt == cpu_count_out) {
printf("\n");
break;
} else {
printf("\t");
}
}
}
return(0);
}