099a5eb8d8
PR: 201594 Sponsored by: The FreeBSD Foundation
2433 lines
67 KiB
C
2433 lines
67 KiB
C
/*-
|
|
* Copyright (c) 2014, 2015 Netflix Inc.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer,
|
|
* in this position and unchanged.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
#include <sys/types.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <strings.h>
|
|
#include <sys/errno.h>
|
|
#include <signal.h>
|
|
#include <sys/wait.h>
|
|
#include <getopt.h>
|
|
#include "eval_expr.h"
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#define MAX_COUNTER_SLOTS 1024
|
|
#define MAX_NLEN 64
|
|
#define MAX_CPU 64
|
|
static int verbose = 0;
|
|
|
|
extern char **environ;
|
|
extern struct expression *master_exp;
|
|
struct expression *master_exp=NULL;
|
|
|
|
#define PMC_INITIAL_ALLOC 512
|
|
extern char **valid_pmcs;
|
|
char **valid_pmcs = NULL;
|
|
extern int valid_pmc_cnt;
|
|
int valid_pmc_cnt=0;
|
|
extern int pmc_allocated_cnt;
|
|
int pmc_allocated_cnt=0;
|
|
|
|
/*
|
|
* The following two varients on popen and pclose with
|
|
* the cavet that they get you the PID so that you
|
|
* can supply it to pclose so it can send a SIGTERM
|
|
* to the process.
|
|
*/
|
|
static FILE *
|
|
my_popen(const char *command, const char *dir, pid_t *p_pid)
|
|
{
|
|
FILE *io_out, *io_in;
|
|
int pdesin[2], pdesout[2];
|
|
char *argv[4];
|
|
pid_t pid;
|
|
char cmd[4];
|
|
char cmd2[1024];
|
|
char arg1[4];
|
|
|
|
if ((strcmp(dir, "r") != 0) &&
|
|
(strcmp(dir, "w") != 0)) {
|
|
errno = EINVAL;
|
|
return(NULL);
|
|
}
|
|
if (pipe(pdesin) < 0)
|
|
return (NULL);
|
|
|
|
if (pipe(pdesout) < 0) {
|
|
(void)close(pdesin[0]);
|
|
(void)close(pdesin[1]);
|
|
return (NULL);
|
|
}
|
|
strcpy(cmd, "sh");
|
|
strcpy(arg1, "-c");
|
|
strcpy(cmd2, command);
|
|
argv[0] = cmd;
|
|
argv[1] = arg1;
|
|
argv[2] = cmd2;
|
|
argv[3] = NULL;
|
|
|
|
switch (pid = fork()) {
|
|
case -1: /* Error. */
|
|
(void)close(pdesin[0]);
|
|
(void)close(pdesin[1]);
|
|
(void)close(pdesout[0]);
|
|
(void)close(pdesout[1]);
|
|
return (NULL);
|
|
/* NOTREACHED */
|
|
case 0: /* Child. */
|
|
/* Close out un-used sides */
|
|
(void)close(pdesin[1]);
|
|
(void)close(pdesout[0]);
|
|
/* Now prepare the stdin of the process */
|
|
close(0);
|
|
(void)dup(pdesin[0]);
|
|
(void)close(pdesin[0]);
|
|
/* Now prepare the stdout of the process */
|
|
close(1);
|
|
(void)dup(pdesout[1]);
|
|
/* And lets do stderr just in case */
|
|
close(2);
|
|
(void)dup(pdesout[1]);
|
|
(void)close(pdesout[1]);
|
|
/* Now run it */
|
|
execve("/bin/sh", argv, environ);
|
|
exit(127);
|
|
/* NOTREACHED */
|
|
}
|
|
/* Parent; assume fdopen can't fail. */
|
|
/* Store the pid */
|
|
*p_pid = pid;
|
|
if (strcmp(dir, "r") != 0) {
|
|
io_out = fdopen(pdesin[1], "w");
|
|
(void)close(pdesin[0]);
|
|
(void)close(pdesout[0]);
|
|
(void)close(pdesout[1]);
|
|
return(io_out);
|
|
} else {
|
|
/* Prepare the input stream */
|
|
io_in = fdopen(pdesout[0], "r");
|
|
(void)close(pdesout[1]);
|
|
(void)close(pdesin[0]);
|
|
(void)close(pdesin[1]);
|
|
return (io_in);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* pclose --
|
|
* Pclose returns -1 if stream is not associated with a `popened' command,
|
|
* if already `pclosed', or waitpid returns an error.
|
|
*/
|
|
static void
|
|
my_pclose(FILE *io, pid_t the_pid)
|
|
{
|
|
int pstat;
|
|
pid_t pid;
|
|
|
|
/*
|
|
* Find the appropriate file pointer and remove it from the list.
|
|
*/
|
|
(void)fclose(io);
|
|
/* Die if you are not dead! */
|
|
kill(the_pid, SIGTERM);
|
|
do {
|
|
pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
|
|
} while (pid == -1 && errno == EINTR);
|
|
}
|
|
|
|
struct counters {
|
|
struct counters *next_cpu;
|
|
char counter_name[MAX_NLEN]; /* Name of counter */
|
|
int cpu; /* CPU we are on */
|
|
int pos; /* Index we are filling to. */
|
|
uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */
|
|
uint64_t sum; /* Summary of entries */
|
|
};
|
|
|
|
extern struct counters *glob_cpu[MAX_CPU];
|
|
struct counters *glob_cpu[MAX_CPU];
|
|
|
|
extern struct counters *cnts;
|
|
struct counters *cnts=NULL;
|
|
|
|
extern int ncnts;
|
|
int ncnts=0;
|
|
|
|
extern int (*expression)(struct counters *, int);
|
|
int (*expression)(struct counters *, int);
|
|
|
|
static const char *threshold=NULL;
|
|
static const char *command;
|
|
|
|
struct cpu_entry {
|
|
const char *name;
|
|
const char *thresh;
|
|
const char *command;
|
|
int (*func)(struct counters *, int);
|
|
};
|
|
|
|
|
|
struct cpu_type {
|
|
char cputype[32];
|
|
int number;
|
|
struct cpu_entry *ents;
|
|
void (*explain)(const char *name);
|
|
};
|
|
extern struct cpu_type the_cpu;
|
|
struct cpu_type the_cpu;
|
|
|
|
static void
|
|
explain_name_sb(const char *name)
|
|
{
|
|
const char *mythresh;
|
|
if (strcmp(name, "allocstall1") == 0) {
|
|
printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh > .05";
|
|
} else if (strcmp(name, "allocstall2") == 0) {
|
|
printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh > .05";
|
|
} else if (strcmp(name, "br_miss") == 0) {
|
|
printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .2";
|
|
} else if (strcmp(name, "splitload") == 0) {
|
|
printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .1";
|
|
} else if (strcmp(name, "splitstore") == 0) {
|
|
printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
|
|
mythresh = "thresh >= .01";
|
|
} else if (strcmp(name, "contested") == 0) {
|
|
printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .05";
|
|
} else if (strcmp(name, "blockstorefwd") == 0) {
|
|
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .05";
|
|
} else if (strcmp(name, "cache2") == 0) {
|
|
printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
|
|
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
|
|
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
|
|
mythresh = "thresh >= .2";
|
|
} else if (strcmp(name, "cache1") == 0) {
|
|
printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .2";
|
|
} else if (strcmp(name, "dtlbmissload") == 0) {
|
|
printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
|
|
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
|
|
mythresh = "thresh >= .1";
|
|
} else if (strcmp(name, "frontendstall") == 0) {
|
|
printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
|
|
mythresh = "thresh >= .15";
|
|
} else if (strcmp(name, "clears") == 0) {
|
|
printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
|
|
printf(" MACHINE_CLEARS.SMC + \n");
|
|
printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .02";
|
|
} else if (strcmp(name, "microassist") == 0) {
|
|
printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
|
|
printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
|
|
mythresh = "thresh >= .05";
|
|
} else if (strcmp(name, "aliasing_4k") == 0) {
|
|
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .1";
|
|
} else if (strcmp(name, "fpassist") == 0) {
|
|
printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
|
|
mythresh = "look for a excessive value";
|
|
} else if (strcmp(name, "otherassistavx") == 0) {
|
|
printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "look for a excessive value";
|
|
} else if (strcmp(name, "otherassistsse") == 0) {
|
|
printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "look for a excessive value";
|
|
} else if (strcmp(name, "eff1") == 0) {
|
|
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
|
|
mythresh = "thresh < .9";
|
|
} else if (strcmp(name, "eff2") == 0) {
|
|
printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
|
|
mythresh = "thresh > 1.0";
|
|
} else if (strcmp(name, "dtlbmissstore") == 0) {
|
|
printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
|
|
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
|
|
mythresh = "thresh >= .05";
|
|
} else {
|
|
printf("Unknown name:%s\n", name);
|
|
mythresh = "unknown entry";
|
|
}
|
|
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
|
|
}
|
|
|
|
static void
|
|
explain_name_ib(const char *name)
|
|
{
|
|
const char *mythresh;
|
|
if (strcmp(name, "br_miss") == 0) {
|
|
printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
|
|
printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
|
|
printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
|
|
mythresh = "thresh >= .2";
|
|
} else if (strcmp(name, "eff1") == 0) {
|
|
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
|
|
mythresh = "thresh < .9";
|
|
} else if (strcmp(name, "eff2") == 0) {
|
|
printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
|
|
mythresh = "thresh > 1.0";
|
|
} else if (strcmp(name, "cache1") == 0) {
|
|
printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .2";
|
|
} else if (strcmp(name, "cache2") == 0) {
|
|
printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .2";
|
|
} else if (strcmp(name, "itlbmiss") == 0) {
|
|
printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh > .05";
|
|
} else if (strcmp(name, "icachemiss") == 0) {
|
|
printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh > .05";
|
|
} else if (strcmp(name, "lcpstall") == 0) {
|
|
printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh > .05";
|
|
} else if (strcmp(name, "datashare") == 0) {
|
|
printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh > .05";
|
|
} else if (strcmp(name, "blockstorefwd") == 0) {
|
|
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .05";
|
|
} else if (strcmp(name, "splitload") == 0) {
|
|
printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
|
|
printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .1";
|
|
} else if (strcmp(name, "splitstore") == 0) {
|
|
printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
|
|
mythresh = "thresh >= .01";
|
|
} else if (strcmp(name, "aliasing_4k") == 0) {
|
|
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .1";
|
|
} else if (strcmp(name, "dtlbmissload") == 0) {
|
|
printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
|
|
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
|
|
mythresh = "thresh >= .1";
|
|
} else if (strcmp(name, "dtlbmissstore") == 0) {
|
|
printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
|
|
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
|
|
mythresh = "thresh >= .05";
|
|
} else if (strcmp(name, "contested") == 0) {
|
|
printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .05";
|
|
} else if (strcmp(name, "clears") == 0) {
|
|
printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
|
|
printf(" MACHINE_CLEARS.SMC + \n");
|
|
printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .02";
|
|
} else if (strcmp(name, "microassist") == 0) {
|
|
printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
|
|
printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
|
|
mythresh = "thresh >= .05";
|
|
} else if (strcmp(name, "fpassist") == 0) {
|
|
printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
|
|
mythresh = "look for a excessive value";
|
|
} else if (strcmp(name, "otherassistavx") == 0) {
|
|
printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "look for a excessive value";
|
|
} else if (strcmp(name, "otherassistsse") == 0) {
|
|
printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "look for a excessive value";
|
|
} else {
|
|
printf("Unknown name:%s\n", name);
|
|
mythresh = "unknown entry";
|
|
}
|
|
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
|
|
}
|
|
|
|
|
|
static void
|
|
explain_name_has(const char *name)
|
|
{
|
|
const char *mythresh;
|
|
if (strcmp(name, "eff1") == 0) {
|
|
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
|
|
mythresh = "thresh < .75";
|
|
} else if (strcmp(name, "eff2") == 0) {
|
|
printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
|
|
mythresh = "thresh > 1.0";
|
|
} else if (strcmp(name, "itlbmiss") == 0) {
|
|
printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh > .05";
|
|
} else if (strcmp(name, "icachemiss") == 0) {
|
|
printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh > .05";
|
|
} else if (strcmp(name, "lcpstall") == 0) {
|
|
printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh > .05";
|
|
} else if (strcmp(name, "cache1") == 0) {
|
|
printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .2";
|
|
} else if (strcmp(name, "cache2") == 0) {
|
|
printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
|
|
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
|
|
printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
|
|
printf(" / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .2";
|
|
} else if (strcmp(name, "contested") == 0) {
|
|
printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .05";
|
|
} else if (strcmp(name, "datashare") == 0) {
|
|
printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh > .05";
|
|
} else if (strcmp(name, "blockstorefwd") == 0) {
|
|
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .05";
|
|
} else if (strcmp(name, "splitload") == 0) {
|
|
printf("Examine (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .1";
|
|
} else if (strcmp(name, "splitstore") == 0) {
|
|
printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
|
|
mythresh = "thresh >= .01";
|
|
} else if (strcmp(name, "aliasing_4k") == 0) {
|
|
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .1";
|
|
} else if (strcmp(name, "dtlbmissload") == 0) {
|
|
printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
|
|
printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
|
|
mythresh = "thresh >= .1";
|
|
} else if (strcmp(name, "br_miss") == 0) {
|
|
printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
|
|
mythresh = "thresh >= .2";
|
|
} else if (strcmp(name, "clears") == 0) {
|
|
printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
|
|
printf(" MACHINE_CLEARS.SMC + \n");
|
|
printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "thresh >= .02";
|
|
} else if (strcmp(name, "microassist") == 0) {
|
|
printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
|
|
printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
|
|
mythresh = "thresh >= .05";
|
|
} else if (strcmp(name, "fpassist") == 0) {
|
|
printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
|
|
mythresh = "look for a excessive value";
|
|
} else if (strcmp(name, "otherassistavx") == 0) {
|
|
printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "look for a excessive value";
|
|
} else if (strcmp(name, "otherassistsse") == 0) {
|
|
printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
|
|
mythresh = "look for a excessive value";
|
|
} else {
|
|
printf("Unknown name:%s\n", name);
|
|
mythresh = "unknown entry";
|
|
}
|
|
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
|
|
}
|
|
|
|
|
|
static struct counters *
|
|
find_counter(struct counters *base, const char *name)
|
|
{
|
|
struct counters *at;
|
|
int len;
|
|
|
|
at = base;
|
|
len = strlen(name);
|
|
while(at) {
|
|
if (strncmp(at->counter_name, name, len) == 0) {
|
|
return(at);
|
|
}
|
|
at = at->next_cpu;
|
|
}
|
|
printf("Can't find counter %s\n", name);
|
|
printf("We have:\n");
|
|
at = base;
|
|
while(at) {
|
|
printf("- %s\n", at->counter_name);
|
|
at = at->next_cpu;
|
|
}
|
|
exit(-1);
|
|
}
|
|
|
|
static int
|
|
allocstall1(struct counters *cpu, int pos)
|
|
{
|
|
/* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
|
|
int ret;
|
|
struct counters *partial;
|
|
struct counters *unhalt;
|
|
double un, par, res;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
|
|
if (pos != -1) {
|
|
par = partial->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
par = partial->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = par/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
allocstall2(struct counters *cpu, int pos)
|
|
{
|
|
/* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
|
|
int ret;
|
|
struct counters *partial;
|
|
struct counters *unhalt;
|
|
double un, par, res;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
|
|
if (pos != -1) {
|
|
par = partial->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
par = partial->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = par/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
br_mispredict(struct counters *cpu, int pos)
|
|
{
|
|
struct counters *brctr;
|
|
struct counters *unhalt;
|
|
int ret;
|
|
/* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
|
|
double br, un, con, res;
|
|
con = 20.0;
|
|
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
|
|
if (pos != -1) {
|
|
br = brctr->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
br = brctr->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (con * br)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
br_mispredictib(struct counters *cpu, int pos)
|
|
{
|
|
struct counters *brctr;
|
|
struct counters *unhalt;
|
|
struct counters *clear, *clear2, *clear3;
|
|
struct counters *uops;
|
|
struct counters *recv;
|
|
struct counters *iss;
|
|
/* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
|
|
int ret;
|
|
/*
|
|
* (BR_MISP_RETIRED.ALL_BRANCHES /
|
|
* (BR_MISP_RETIRED.ALL_BRANCHES +
|
|
* MACHINE_CLEAR.COUNT) *
|
|
* ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
|
|
*
|
|
*/
|
|
double br, cl, cl2, cl3, uo, re, un, con, res, is;
|
|
con = 4.0;
|
|
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
|
|
clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
|
|
clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
|
|
clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
|
|
uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
|
|
iss = find_counter(cpu, "UOPS_ISSUED.ANY");
|
|
recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
|
|
if (pos != -1) {
|
|
br = brctr->vals[pos] * 1.0;
|
|
cl = clear->vals[pos] * 1.0;
|
|
cl2 = clear2->vals[pos] * 1.0;
|
|
cl3 = clear3->vals[pos] * 1.0;
|
|
uo = uops->vals[pos] * 1.0;
|
|
re = recv->vals[pos] * 1.0;
|
|
is = iss->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
br = brctr->sum * 1.0;
|
|
cl = clear->sum * 1.0;
|
|
cl2 = clear2->sum * 1.0;
|
|
cl3 = clear3->sum * 1.0;
|
|
uo = uops->sum * 1.0;
|
|
re = recv->sum * 1.0;
|
|
is = iss->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
splitloadib(struct counters *cpu, int pos)
|
|
{
|
|
int ret;
|
|
struct counters *mem;
|
|
struct counters *l1d, *ldblock;
|
|
struct counters *unhalt;
|
|
double un, memd, res, l1, ldb;
|
|
/*
|
|
* ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
|
|
* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
|
|
*/
|
|
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
|
|
l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
|
|
ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
|
|
if (pos != -1) {
|
|
memd = mem->vals[pos] * 1.0;
|
|
l1 = l1d->vals[pos] * 1.0;
|
|
ldb = ldblock->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
memd = mem->sum * 1.0;
|
|
l1 = l1d->sum * 1.0;
|
|
ldb = ldblock->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = ((l1 / memd) * ldb)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
splitload(struct counters *cpu, int pos)
|
|
{
|
|
int ret;
|
|
struct counters *mem;
|
|
struct counters *unhalt;
|
|
double con, un, memd, res;
|
|
/* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
|
|
|
|
con = 5.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
|
|
if (pos != -1) {
|
|
memd = mem->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
memd = mem->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (memd * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
splitstore(struct counters *cpu, int pos)
|
|
{
|
|
/* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
|
|
int ret;
|
|
struct counters *mem_split;
|
|
struct counters *mem_stores;
|
|
double memsplit, memstore, res;
|
|
mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
|
|
mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
|
|
if (pos != -1) {
|
|
memsplit = mem_split->vals[pos] * 1.0;
|
|
memstore = mem_stores->vals[pos] * 1.0;
|
|
} else {
|
|
memsplit = mem_split->sum * 1.0;
|
|
memstore = mem_stores->sum * 1.0;
|
|
}
|
|
res = memsplit/memstore;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
|
|
static int
|
|
contested(struct counters *cpu, int pos)
|
|
{
|
|
/* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
|
|
int ret;
|
|
struct counters *mem;
|
|
struct counters *unhalt;
|
|
double con, un, memd, res;
|
|
|
|
con = 60.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
|
|
if (pos != -1) {
|
|
memd = mem->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
memd = mem->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (memd * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
contested_has(struct counters *cpu, int pos)
|
|
{
|
|
/* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
|
|
int ret;
|
|
struct counters *mem;
|
|
struct counters *unhalt;
|
|
double con, un, memd, res;
|
|
|
|
con = 84.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
|
|
if (pos != -1) {
|
|
memd = mem->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
memd = mem->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (memd * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
|
|
static int
|
|
blockstoreforward(struct counters *cpu, int pos)
|
|
{
|
|
/* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
|
|
int ret;
|
|
struct counters *ldb;
|
|
struct counters *unhalt;
|
|
double con, un, ld, res;
|
|
|
|
con = 13.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
|
|
if (pos != -1) {
|
|
ld = ldb->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
ld = ldb->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (ld * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
cache2(struct counters *cpu, int pos)
|
|
{
|
|
/* ** Suspect ***
|
|
* 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
|
|
* (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
|
|
*/
|
|
int ret;
|
|
struct counters *mem1, *mem2, *mem3;
|
|
struct counters *unhalt;
|
|
double con1, con2, con3, un, me_1, me_2, me_3, res;
|
|
|
|
con1 = 26.0;
|
|
con2 = 43.0;
|
|
con3 = 60.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
|
|
mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
|
|
mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
|
|
mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
|
|
if (pos != -1) {
|
|
me_1 = mem1->vals[pos] * 1.0;
|
|
me_2 = mem2->vals[pos] * 1.0;
|
|
me_3 = mem3->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
me_1 = mem1->sum * 1.0;
|
|
me_2 = mem2->sum * 1.0;
|
|
me_3 = mem3->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
datasharing(struct counters *cpu, int pos)
|
|
{
|
|
/*
|
|
* (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
|
|
*/
|
|
int ret;
|
|
struct counters *mem;
|
|
struct counters *unhalt;
|
|
double con, res, me, un;
|
|
|
|
con = 43.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
|
|
if (pos != -1) {
|
|
me = mem->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
me = mem->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (me * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
|
|
}
|
|
|
|
|
|
static int
|
|
datasharing_has(struct counters *cpu, int pos)
|
|
{
|
|
/*
|
|
* (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
|
|
*/
|
|
int ret;
|
|
struct counters *mem;
|
|
struct counters *unhalt;
|
|
double con, res, me, un;
|
|
|
|
con = 72.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
|
|
if (pos != -1) {
|
|
me = mem->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
me = mem->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (me * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
|
|
}
|
|
|
|
|
|
static int
|
|
cache2ib(struct counters *cpu, int pos)
|
|
{
|
|
/*
|
|
* (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
|
|
*/
|
|
int ret;
|
|
struct counters *mem;
|
|
struct counters *unhalt;
|
|
double con, un, me, res;
|
|
|
|
con = 29.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
|
|
if (pos != -1) {
|
|
me = mem->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
me = mem->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (con * me)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
cache2has(struct counters *cpu, int pos)
|
|
{
|
|
/*
|
|
* Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
|
|
* (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
|
|
* (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
|
|
* / CPU_CLK_UNHALTED.THREAD_P
|
|
*/
|
|
int ret;
|
|
struct counters *mem1, *mem2, *mem3;
|
|
struct counters *unhalt;
|
|
double con1, con2, con3, un, me1, me2, me3, res;
|
|
|
|
con1 = 36.0;
|
|
con2 = 72.0;
|
|
con3 = 84.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
|
|
mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
|
|
mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
|
|
if (pos != -1) {
|
|
me1 = mem1->vals[pos] * 1.0;
|
|
me2 = mem2->vals[pos] * 1.0;
|
|
me3 = mem3->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
me1 = mem1->sum * 1.0;
|
|
me2 = mem2->sum * 1.0;
|
|
me3 = mem3->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
cache1(struct counters *cpu, int pos)
|
|
{
|
|
/* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
|
|
int ret;
|
|
struct counters *mem;
|
|
struct counters *unhalt;
|
|
double con, un, me, res;
|
|
|
|
con = 180.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
|
|
if (pos != -1) {
|
|
me = mem->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
me = mem->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (me * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
cache1ib(struct counters *cpu, int pos)
|
|
{
|
|
/* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
|
|
int ret;
|
|
struct counters *mem;
|
|
struct counters *unhalt;
|
|
double con, un, me, res;
|
|
|
|
con = 180.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
|
|
if (pos != -1) {
|
|
me = mem->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
me = mem->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (me * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
|
|
static int
|
|
dtlb_missload(struct counters *cpu, int pos)
|
|
{
|
|
/* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
|
|
int ret;
|
|
struct counters *dtlb_m, *dtlb_d;
|
|
struct counters *unhalt;
|
|
double con, un, d1, d2, res;
|
|
|
|
con = 7.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
|
|
dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
|
|
if (pos != -1) {
|
|
d1 = dtlb_m->vals[pos] * 1.0;
|
|
d2 = dtlb_d->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
d1 = dtlb_m->sum * 1.0;
|
|
d2 = dtlb_d->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = ((d1 * con) + d2)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
dtlb_missstore(struct counters *cpu, int pos)
|
|
{
|
|
/*
|
|
* ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
|
|
* CPU_CLK_UNHALTED.THREAD_P (t >= .1)
|
|
*/
|
|
int ret;
|
|
struct counters *dtsb_m, *dtsb_d;
|
|
struct counters *unhalt;
|
|
double con, un, d1, d2, res;
|
|
|
|
con = 7.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
|
|
dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
|
|
if (pos != -1) {
|
|
d1 = dtsb_m->vals[pos] * 1.0;
|
|
d2 = dtsb_d->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
d1 = dtsb_m->sum * 1.0;
|
|
d2 = dtsb_d->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = ((d1 * con) + d2)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
itlb_miss(struct counters *cpu, int pos)
|
|
{
|
|
/* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */
|
|
int ret;
|
|
struct counters *itlb;
|
|
struct counters *unhalt;
|
|
double un, d1, res;
|
|
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
|
|
if (pos != -1) {
|
|
d1 = itlb->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
d1 = itlb->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = d1/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
icache_miss(struct counters *cpu, int pos)
|
|
{
|
|
/* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
|
|
|
|
int ret;
|
|
struct counters *itlb, *icache;
|
|
struct counters *unhalt;
|
|
double un, d1, ic, res;
|
|
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
|
|
icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
|
|
if (pos != -1) {
|
|
d1 = itlb->vals[pos] * 1.0;
|
|
ic = icache->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
d1 = itlb->sum * 1.0;
|
|
ic = icache->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (ic-d1)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
|
|
}
|
|
|
|
static int
|
|
icache_miss_has(struct counters *cpu, int pos)
|
|
{
|
|
/* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
|
|
|
|
int ret;
|
|
struct counters *icache;
|
|
struct counters *unhalt;
|
|
double un, con, ic, res;
|
|
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
icache = find_counter(cpu, "ICACHE.MISSES");
|
|
con = 36.0;
|
|
if (pos != -1) {
|
|
ic = icache->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
ic = icache->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (con * ic)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
|
|
}
|
|
|
|
static int
|
|
lcp_stall(struct counters *cpu, int pos)
|
|
{
|
|
/* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
|
|
int ret;
|
|
struct counters *ild;
|
|
struct counters *unhalt;
|
|
double un, d1, res;
|
|
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
ild = find_counter(cpu, "ILD_STALL.LCP");
|
|
if (pos != -1) {
|
|
d1 = ild->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
d1 = ild->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = d1/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
|
|
}
|
|
|
|
|
|
static int
|
|
frontendstall(struct counters *cpu, int pos)
|
|
{
|
|
/* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
|
|
int ret;
|
|
struct counters *idq;
|
|
struct counters *unhalt;
|
|
double con, un, id, res;
|
|
|
|
con = 4.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
|
|
if (pos != -1) {
|
|
id = idq->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
id = idq->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = id/(un * con);
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
clears(struct counters *cpu, int pos)
|
|
{
|
|
/* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
|
|
* / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/
|
|
|
|
int ret;
|
|
struct counters *clr1, *clr2, *clr3;
|
|
struct counters *unhalt;
|
|
double con, un, cl1, cl2, cl3, res;
|
|
|
|
con = 100.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
|
|
clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
|
|
clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
|
|
|
|
if (pos != -1) {
|
|
cl1 = clr1->vals[pos] * 1.0;
|
|
cl2 = clr2->vals[pos] * 1.0;
|
|
cl3 = clr3->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
cl1 = clr1->sum * 1.0;
|
|
cl2 = clr2->sum * 1.0;
|
|
cl3 = clr3->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = ((cl1 + cl2 + cl3) * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
microassist(struct counters *cpu, int pos)
|
|
{
|
|
/* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
|
|
int ret;
|
|
struct counters *idq;
|
|
struct counters *unhalt;
|
|
double un, id, res, con;
|
|
|
|
con = 4.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
idq = find_counter(cpu, "IDQ.MS_UOPS");
|
|
if (pos != -1) {
|
|
id = idq->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
id = idq->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = id/(un * con);
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
|
|
static int
|
|
aliasing(struct counters *cpu, int pos)
|
|
{
|
|
/* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
|
|
int ret;
|
|
struct counters *ld;
|
|
struct counters *unhalt;
|
|
double un, lds, con, res;
|
|
|
|
con = 5.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
|
|
if (pos != -1) {
|
|
lds = ld->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
lds = ld->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (lds * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
fpassists(struct counters *cpu, int pos)
|
|
{
|
|
/* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
|
|
int ret;
|
|
struct counters *fp;
|
|
struct counters *inst;
|
|
double un, fpd, res;
|
|
|
|
inst = find_counter(cpu, "INST_RETIRED.ANY_P");
|
|
fp = find_counter(cpu, "FP_ASSIST.ANY");
|
|
if (pos != -1) {
|
|
fpd = fp->vals[pos] * 1.0;
|
|
un = inst->vals[pos] * 1.0;
|
|
} else {
|
|
fpd = fp->sum * 1.0;
|
|
un = inst->sum * 1.0;
|
|
}
|
|
res = fpd/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
otherassistavx(struct counters *cpu, int pos)
|
|
{
|
|
/* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
|
|
int ret;
|
|
struct counters *oth;
|
|
struct counters *unhalt;
|
|
double un, ot, con, res;
|
|
|
|
con = 75.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
|
|
if (pos != -1) {
|
|
ot = oth->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
ot = oth->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (ot * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
otherassistsse(struct counters *cpu, int pos)
|
|
{
|
|
|
|
int ret;
|
|
struct counters *oth;
|
|
struct counters *unhalt;
|
|
double un, ot, con, res;
|
|
|
|
/* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
|
|
con = 75.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
|
|
if (pos != -1) {
|
|
ot = oth->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
ot = oth->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = (ot * con)/un;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
efficiency1(struct counters *cpu, int pos)
|
|
{
|
|
|
|
int ret;
|
|
struct counters *uops;
|
|
struct counters *unhalt;
|
|
double un, ot, con, res;
|
|
|
|
/* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
|
|
con = 4.0;
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
|
|
if (pos != -1) {
|
|
ot = uops->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
ot = uops->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = ot/(con * un);
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
static int
|
|
efficiency2(struct counters *cpu, int pos)
|
|
{
|
|
|
|
int ret;
|
|
struct counters *uops;
|
|
struct counters *unhalt;
|
|
double un, ot, res;
|
|
|
|
/* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
|
|
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
|
|
uops = find_counter(cpu, "INST_RETIRED.ANY_P");
|
|
if (pos != -1) {
|
|
ot = uops->vals[pos] * 1.0;
|
|
un = unhalt->vals[pos] * 1.0;
|
|
} else {
|
|
ot = uops->sum * 1.0;
|
|
un = unhalt->sum * 1.0;
|
|
}
|
|
res = un/ot;
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
#define SANDY_BRIDGE_COUNT 20
|
|
static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
|
|
/*01*/ { "allocstall1", "thresh > .05",
|
|
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
|
|
allocstall1 },
|
|
/*02*/ { "allocstall2", "thresh > .05",
|
|
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1",
|
|
allocstall2 },
|
|
/*03*/ { "br_miss", "thresh >= .2",
|
|
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
|
|
br_mispredict },
|
|
/*04*/ { "splitload", "thresh >= .1",
|
|
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
|
|
splitload },
|
|
/*05*/ { "splitstore", "thresh >= .01",
|
|
"pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
|
|
splitstore },
|
|
/*06*/ { "contested", "thresh >= .05",
|
|
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
contested },
|
|
/*07*/ { "blockstorefwd", "thresh >= .05",
|
|
"pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
blockstoreforward },
|
|
/*08*/ { "cache2", "thresh >= .2",
|
|
"pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
cache2 },
|
|
/*09*/ { "cache1", "thresh >= .2",
|
|
"pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
cache1 },
|
|
/*10*/ { "dtlbmissload", "thresh >= .1",
|
|
"pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
dtlb_missload },
|
|
/*11*/ { "dtlbmissstore", "thresh >= .05",
|
|
"pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
dtlb_missstore },
|
|
/*12*/ { "frontendstall", "thresh >= .15",
|
|
"pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
frontendstall },
|
|
/*13*/ { "clears", "thresh >= .02",
|
|
"pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
clears },
|
|
/*14*/ { "microassist", "thresh >= .05",
|
|
"pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
microassist },
|
|
/*15*/ { "aliasing_4k", "thresh >= .1",
|
|
"pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
aliasing },
|
|
/*16*/ { "fpassist", "look for a excessive value",
|
|
"pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
|
|
fpassists },
|
|
/*17*/ { "otherassistavx", "look for a excessive value",
|
|
"pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
otherassistavx },
|
|
/*18*/ { "otherassistsse", "look for a excessive value",
|
|
"pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
otherassistsse },
|
|
/*19*/ { "eff1", "thresh < .9",
|
|
"pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
efficiency1 },
|
|
/*20*/ { "eff2", "thresh > 1.0",
|
|
"pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
efficiency2 },
|
|
};
|
|
|
|
|
|
#define IVY_BRIDGE_COUNT 21
|
|
static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
|
|
/*1*/ { "eff1", "thresh < .75",
|
|
"pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
efficiency1 },
|
|
/*2*/ { "eff2", "thresh > 1.0",
|
|
"pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
efficiency2 },
|
|
/*3*/ { "itlbmiss", "thresh > .05",
|
|
"pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
itlb_miss },
|
|
/*4*/ { "icachemiss", "thresh > .05",
|
|
"pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
icache_miss },
|
|
/*5*/ { "lcpstall", "thresh > .05",
|
|
"pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
lcp_stall },
|
|
/*6*/ { "cache1", "thresh >= .2",
|
|
"pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
cache1ib },
|
|
/*7*/ { "cache2", "thresh >= .2",
|
|
"pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
cache2ib },
|
|
/*8*/ { "contested", "thresh >= .05",
|
|
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
contested },
|
|
/*9*/ { "datashare", "thresh >= .05",
|
|
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
datasharing },
|
|
/*10*/ { "blockstorefwd", "thresh >= .05",
|
|
"pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
blockstoreforward },
|
|
/*11*/ { "splitload", "thresh >= .1",
|
|
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
|
|
splitloadib },
|
|
/*12*/ { "splitstore", "thresh >= .01",
|
|
"pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
|
|
splitstore },
|
|
/*13*/ { "aliasing_4k", "thresh >= .1",
|
|
"pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
aliasing },
|
|
/*14*/ { "dtlbmissload", "thresh >= .1",
|
|
"pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
dtlb_missload },
|
|
/*15*/ { "dtlbmissstore", "thresh >= .05",
|
|
"pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
dtlb_missstore },
|
|
/*16*/ { "br_miss", "thresh >= .2",
|
|
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
|
|
br_mispredictib },
|
|
/*17*/ { "clears", "thresh >= .02",
|
|
"pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
clears },
|
|
/*18*/ { "microassist", "thresh >= .05",
|
|
"pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
microassist },
|
|
/*19*/ { "fpassist", "look for a excessive value",
|
|
"pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
|
|
fpassists },
|
|
/*20*/ { "otherassistavx", "look for a excessive value",
|
|
"pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
otherassistavx },
|
|
/*21*/ { "otherassistsse", "look for a excessive value",
|
|
"pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
otherassistsse },
|
|
};
|
|
|
|
#define HASWELL_COUNT 20
|
|
static struct cpu_entry haswell[HASWELL_COUNT] = {
|
|
/*1*/ { "eff1", "thresh < .75",
|
|
"pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
efficiency1 },
|
|
/*2*/ { "eff2", "thresh > 1.0",
|
|
"pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
efficiency2 },
|
|
/*3*/ { "itlbmiss", "thresh > .05",
|
|
"pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
itlb_miss },
|
|
/*4*/ { "icachemiss", "thresh > .05",
|
|
"pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
icache_miss_has },
|
|
/*5*/ { "lcpstall", "thresh > .05",
|
|
"pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
lcp_stall },
|
|
/*6*/ { "cache1", "thresh >= .2",
|
|
"pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
cache1ib },
|
|
/*7*/ { "cache2", "thresh >= .2",
|
|
"pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
cache2has },
|
|
/*8*/ { "contested", "thresh >= .05",
|
|
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
contested_has },
|
|
/*9*/ { "datashare", "thresh >= .05",
|
|
"pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
datasharing_has },
|
|
/*10*/ { "blockstorefwd", "thresh >= .05",
|
|
"pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
blockstoreforward },
|
|
/*11*/ { "splitload", "thresh >= .1",
|
|
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
|
|
splitload },
|
|
/*12*/ { "splitstore", "thresh >= .01",
|
|
"pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
|
|
splitstore },
|
|
/*13*/ { "aliasing_4k", "thresh >= .1",
|
|
"pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
aliasing },
|
|
/*14*/ { "dtlbmissload", "thresh >= .1",
|
|
"pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
dtlb_missload },
|
|
/*15*/ { "br_miss", "thresh >= .2",
|
|
"pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
|
|
br_mispredict },
|
|
/*16*/ { "clears", "thresh >= .02",
|
|
"pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
clears },
|
|
/*17*/ { "microassist", "thresh >= .05",
|
|
"pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
microassist },
|
|
/*18*/ { "fpassist", "look for a excessive value",
|
|
"pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
|
|
fpassists },
|
|
/*19*/ { "otherassistavx", "look for a excessive value",
|
|
"pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
otherassistavx },
|
|
/*20*/ { "otherassistsse", "look for a excessive value",
|
|
"pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
|
|
otherassistsse },
|
|
};
|
|
|
|
|
|
static void
|
|
set_sandybridge(void)
|
|
{
|
|
strcpy(the_cpu.cputype, "SandyBridge PMC");
|
|
the_cpu.number = SANDY_BRIDGE_COUNT;
|
|
the_cpu.ents = sandy_bridge;
|
|
the_cpu.explain = explain_name_sb;
|
|
}
|
|
|
|
static void
|
|
set_ivybridge(void)
|
|
{
|
|
strcpy(the_cpu.cputype, "IvyBridge PMC");
|
|
the_cpu.number = IVY_BRIDGE_COUNT;
|
|
the_cpu.ents = ivy_bridge;
|
|
the_cpu.explain = explain_name_ib;
|
|
}
|
|
|
|
|
|
static void
|
|
set_haswell(void)
|
|
{
|
|
strcpy(the_cpu.cputype, "HASWELL PMC");
|
|
the_cpu.number = HASWELL_COUNT;
|
|
the_cpu.ents = haswell;
|
|
the_cpu.explain = explain_name_has;
|
|
}
|
|
|
|
static void
|
|
set_expression(char *name)
|
|
{
|
|
int found = 0, i;
|
|
for(i=0 ; i< the_cpu.number; i++) {
|
|
if (strcmp(name, the_cpu.ents[i].name) == 0) {
|
|
found = 1;
|
|
expression = the_cpu.ents[i].func;
|
|
command = the_cpu.ents[i].command;
|
|
threshold = the_cpu.ents[i].thresh;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) {
|
|
printf("For CPU type %s we have no expression:%s\n",
|
|
the_cpu.cputype, name);
|
|
exit(-1);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
|
validate_expression(char *name)
|
|
{
|
|
int i, found;
|
|
|
|
found = 0;
|
|
for(i=0 ; i< the_cpu.number; i++) {
|
|
if (strcmp(name, the_cpu.ents[i].name) == 0) {
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) {
|
|
return(-1);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
do_expression(struct counters *cpu, int pos)
|
|
{
|
|
if (expression == NULL)
|
|
return;
|
|
(*expression)(cpu, pos);
|
|
}
|
|
|
|
static void
|
|
process_header(int idx, char *p)
|
|
{
|
|
struct counters *up;
|
|
int i, len, nlen;
|
|
/*
|
|
* Given header element idx, at p in
|
|
* form 's/NN/nameof'
|
|
* process the entry to pull out the name and
|
|
* the CPU number.
|
|
*/
|
|
if (strncmp(p, "s/", 2)) {
|
|
printf("Check -- invalid header no s/ in %s\n",
|
|
p);
|
|
return;
|
|
}
|
|
up = &cnts[idx];
|
|
up->cpu = strtol(&p[2], NULL, 10);
|
|
len = strlen(p);
|
|
for (i=2; i<len; i++) {
|
|
if (p[i] == '/') {
|
|
nlen = strlen(&p[(i+1)]);
|
|
if (nlen < (MAX_NLEN-1)) {
|
|
strcpy(up->counter_name, &p[(i+1)]);
|
|
} else {
|
|
strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
build_counters_from_header(FILE *io)
|
|
{
|
|
char buffer[8192], *p;
|
|
int i, len, cnt;
|
|
size_t mlen;
|
|
|
|
/* We have a new start, lets
|
|
* setup our headers and cpus.
|
|
*/
|
|
if (fgets(buffer, sizeof(buffer), io) == NULL) {
|
|
printf("First line can't be read from file err:%d\n", errno);
|
|
return;
|
|
}
|
|
/*
|
|
* Ok output is an array of counters. Once
|
|
* we start to read the values in we must
|
|
* put them in there slot to match there CPU and
|
|
* counter being updated. We create a mass array
|
|
* of the counters, filling in the CPU and
|
|
* counter name.
|
|
*/
|
|
/* How many do we get? */
|
|
len = strlen(buffer);
|
|
for (i=0, cnt=0; i<len; i++) {
|
|
if (strncmp(&buffer[i], "s/", 2) == 0) {
|
|
cnt++;
|
|
for(;i<len;i++) {
|
|
if (buffer[i] == ' ')
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
mlen = sizeof(struct counters) * cnt;
|
|
cnts = malloc(mlen);
|
|
ncnts = cnt;
|
|
if (cnts == NULL) {
|
|
printf("No memory err:%d\n", errno);
|
|
return;
|
|
}
|
|
memset(cnts, 0, mlen);
|
|
for (i=0, cnt=0; i<len; i++) {
|
|
if (strncmp(&buffer[i], "s/", 2) == 0) {
|
|
p = &buffer[i];
|
|
for(;i<len;i++) {
|
|
if (buffer[i] == ' ') {
|
|
buffer[i] = 0;
|
|
break;
|
|
}
|
|
}
|
|
process_header(cnt, p);
|
|
cnt++;
|
|
}
|
|
}
|
|
if (verbose)
|
|
printf("We have %d entries\n", cnt);
|
|
}
|
|
extern int max_to_collect;
|
|
int max_to_collect = MAX_COUNTER_SLOTS;
|
|
|
|
static int
|
|
read_a_line(FILE *io)
|
|
{
|
|
char buffer[8192], *p, *stop;
|
|
int pos, i;
|
|
|
|
if (fgets(buffer, sizeof(buffer), io) == NULL) {
|
|
return(0);
|
|
}
|
|
p = buffer;
|
|
for (i=0; i<ncnts; i++) {
|
|
pos = cnts[i].pos;
|
|
cnts[i].vals[pos] = strtol(p, &stop, 0);
|
|
cnts[i].pos++;
|
|
cnts[i].sum += cnts[i].vals[pos];
|
|
p = stop;
|
|
}
|
|
return (1);
|
|
}
|
|
|
|
extern int cpu_count_out;
|
|
int cpu_count_out=0;
|
|
|
|
static void
|
|
print_header(void)
|
|
{
|
|
int i, cnt, printed_cnt;
|
|
|
|
printf("*********************************\n");
|
|
for(i=0, cnt=0; i<MAX_CPU; i++) {
|
|
if (glob_cpu[i]) {
|
|
cnt++;
|
|
}
|
|
}
|
|
cpu_count_out = cnt;
|
|
for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
|
|
if (glob_cpu[i]) {
|
|
printf("CPU%d", i);
|
|
printed_cnt++;
|
|
}
|
|
if (printed_cnt == cnt) {
|
|
printf("\n");
|
|
break;
|
|
} else {
|
|
printf("\t");
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
lace_cpus_together(void)
|
|
{
|
|
int i, j, lace_cpu;
|
|
struct counters *cpat, *at;
|
|
|
|
for(i=0; i<ncnts; i++) {
|
|
cpat = &cnts[i];
|
|
if (cpat->next_cpu) {
|
|
/* Already laced in */
|
|
continue;
|
|
}
|
|
lace_cpu = cpat->cpu;
|
|
if (lace_cpu >= MAX_CPU) {
|
|
printf("CPU %d to big\n", lace_cpu);
|
|
continue;
|
|
}
|
|
if (glob_cpu[lace_cpu] == NULL) {
|
|
glob_cpu[lace_cpu] = cpat;
|
|
} else {
|
|
/* Already processed this cpu */
|
|
continue;
|
|
}
|
|
/* Ok look forward for cpu->cpu and link in */
|
|
for(j=(i+1); j<ncnts; j++) {
|
|
at = &cnts[j];
|
|
if (at->next_cpu) {
|
|
continue;
|
|
}
|
|
if (at->cpu == lace_cpu) {
|
|
/* Found one */
|
|
cpat->next_cpu = at;
|
|
cpat = at;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
process_file(char *filename)
|
|
{
|
|
FILE *io;
|
|
int i;
|
|
int line_at, not_done;
|
|
pid_t pid_of_command=0;
|
|
|
|
if (filename == NULL) {
|
|
io = my_popen(command, "r", &pid_of_command);
|
|
} else {
|
|
io = fopen(filename, "r");
|
|
if (io == NULL) {
|
|
printf("Can't process file %s err:%d\n",
|
|
filename, errno);
|
|
return;
|
|
}
|
|
}
|
|
build_counters_from_header(io);
|
|
if (cnts == NULL) {
|
|
/* Nothing we can do */
|
|
printf("Nothing to do -- no counters built\n");
|
|
if (io) {
|
|
fclose(io);
|
|
}
|
|
return;
|
|
}
|
|
lace_cpus_together();
|
|
print_header();
|
|
if (verbose) {
|
|
for (i=0; i<ncnts; i++) {
|
|
printf("Counter:%s cpu:%d index:%d\n",
|
|
cnts[i].counter_name,
|
|
cnts[i].cpu, i);
|
|
}
|
|
}
|
|
line_at = 0;
|
|
not_done = 1;
|
|
while(not_done) {
|
|
if (read_a_line(io)) {
|
|
line_at++;
|
|
} else {
|
|
break;
|
|
}
|
|
if (line_at >= max_to_collect) {
|
|
not_done = 0;
|
|
}
|
|
if (filename == NULL) {
|
|
int cnt;
|
|
/* For the ones we dynamically open we print now */
|
|
for(i=0, cnt=0; i<MAX_CPU; i++) {
|
|
do_expression(glob_cpu[i], (line_at-1));
|
|
cnt++;
|
|
if (cnt == cpu_count_out) {
|
|
printf("\n");
|
|
break;
|
|
} else {
|
|
printf("\t");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (filename) {
|
|
fclose(io);
|
|
} else {
|
|
my_pclose(io, pid_of_command);
|
|
}
|
|
}
|
|
#if defined(__amd64__)
|
|
#define cpuid(in,a,b,c,d)\
|
|
asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
|
|
#else
|
|
#define cpuid(in, a, b, c, d)
|
|
#endif
|
|
|
|
static void
|
|
get_cpuid_set(void)
|
|
{
|
|
unsigned long eax, ebx, ecx, edx;
|
|
int model;
|
|
pid_t pid_of_command=0;
|
|
size_t sz, len;
|
|
FILE *io;
|
|
char linebuf[1024], *str;
|
|
|
|
eax = ebx = ecx = edx = 0;
|
|
|
|
cpuid(0, eax, ebx, ecx, edx);
|
|
if (ebx == 0x68747541) {
|
|
printf("AMD processors are not supported by this program\n");
|
|
printf("Sorry\n");
|
|
exit(0);
|
|
} else if (ebx == 0x6972794) {
|
|
printf("Cyrix processors are not supported by this program\n");
|
|
printf("Sorry\n");
|
|
exit(0);
|
|
} else if (ebx == 0x756e6547) {
|
|
printf("Genuine Intel\n");
|
|
} else {
|
|
printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
|
|
exit(0);
|
|
}
|
|
cpuid(1, eax, ebx, ecx, edx);
|
|
model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
|
|
printf("CPU model is 0x%x id:0x%lx\n", model, eax);
|
|
switch (eax & 0xF00) {
|
|
case 0x500: /* Pentium family processors */
|
|
printf("Intel Pentium P5\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x600: /* Pentium Pro, Celeron, Pentium II & III */
|
|
switch (model) {
|
|
case 0x1:
|
|
printf("Intel Pentium P6\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x3:
|
|
case 0x5:
|
|
printf("Intel PII\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x6: case 0x16:
|
|
printf("Intel CL\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x7: case 0x8: case 0xA: case 0xB:
|
|
printf("Intel PIII\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x9: case 0xD:
|
|
printf("Intel PM\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0xE:
|
|
printf("Intel CORE\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0xF:
|
|
printf("Intel CORE2\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x17:
|
|
printf("Intel CORE2EXTREME\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x1C: /* Per Intel document 320047-002. */
|
|
printf("Intel ATOM\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x1A:
|
|
case 0x1E: /*
|
|
* Per Intel document 253669-032 9/2009,
|
|
* pages A-2 and A-57
|
|
*/
|
|
case 0x1F: /*
|
|
* Per Intel document 253669-032 9/2009,
|
|
* pages A-2 and A-57
|
|
*/
|
|
printf("Intel COREI7\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x2E:
|
|
printf("Intel NEHALEM\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x25: /* Per Intel document 253669-033US 12/2009. */
|
|
case 0x2C: /* Per Intel document 253669-033US 12/2009. */
|
|
printf("Intel WESTMERE\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x2F: /* Westmere-EX, seen in wild */
|
|
printf("Intel WESTMERE\n");
|
|
goto not_supported;
|
|
break;
|
|
case 0x2A: /* Per Intel document 253669-039US 05/2011. */
|
|
printf("Intel SANDYBRIDGE\n");
|
|
set_sandybridge();
|
|
break;
|
|
case 0x2D: /* Per Intel document 253669-044US 08/2012. */
|
|
printf("Intel SANDYBRIDGE_XEON\n");
|
|
set_sandybridge();
|
|
break;
|
|
case 0x3A: /* Per Intel document 253669-043US 05/2012. */
|
|
printf("Intel IVYBRIDGE\n");
|
|
set_ivybridge();
|
|
break;
|
|
case 0x3E: /* Per Intel document 325462-045US 01/2013. */
|
|
printf("Intel IVYBRIDGE_XEON\n");
|
|
set_ivybridge();
|
|
break;
|
|
case 0x3F: /* Per Intel document 325462-045US 09/2014. */
|
|
printf("Intel HASWELL (Xeon)\n");
|
|
set_haswell();
|
|
break;
|
|
case 0x3C: /* Per Intel document 325462-045US 01/2013. */
|
|
case 0x45:
|
|
case 0x46:
|
|
printf("Intel HASWELL\n");
|
|
set_haswell();
|
|
break;
|
|
case 0x4D:
|
|
/* Per Intel document 330061-001 01/2014. */
|
|
printf("Intel ATOM_SILVERMONT\n");
|
|
goto not_supported;
|
|
break;
|
|
default:
|
|
printf("Intel model 0x%x is not known -- sorry\n",
|
|
model);
|
|
goto not_supported;
|
|
break;
|
|
}
|
|
break;
|
|
case 0xF00: /* P4 */
|
|
printf("Intel unknown model %d\n", model);
|
|
goto not_supported;
|
|
break;
|
|
}
|
|
/* Ok lets load the list of all known PMC's */
|
|
io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
|
|
if (valid_pmcs == NULL) {
|
|
/* Likely */
|
|
pmc_allocated_cnt = PMC_INITIAL_ALLOC;
|
|
sz = sizeof(char *) * pmc_allocated_cnt;
|
|
valid_pmcs = malloc(sz);
|
|
if (valid_pmcs == NULL) {
|
|
printf("No memory allocation fails at startup?\n");
|
|
exit(-1);
|
|
}
|
|
memset(valid_pmcs, 0, sz);
|
|
}
|
|
|
|
while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
|
|
if (linebuf[0] != '\t') {
|
|
/* sometimes headers ;-) */
|
|
continue;
|
|
}
|
|
len = strlen(linebuf);
|
|
if (linebuf[(len-1)] == '\n') {
|
|
/* Likely */
|
|
linebuf[(len-1)] = 0;
|
|
}
|
|
str = &linebuf[1];
|
|
len = strlen(str) + 1;
|
|
valid_pmcs[valid_pmc_cnt] = malloc(len);
|
|
if (valid_pmcs[valid_pmc_cnt] == NULL) {
|
|
printf("No memory2 allocation fails at startup?\n");
|
|
exit(-1);
|
|
}
|
|
memset(valid_pmcs[valid_pmc_cnt], 0, len);
|
|
strcpy(valid_pmcs[valid_pmc_cnt], str);
|
|
valid_pmc_cnt++;
|
|
if (valid_pmc_cnt >= pmc_allocated_cnt) {
|
|
/* Got to expand -- unlikely */
|
|
char **more;
|
|
|
|
sz = sizeof(char *) * (pmc_allocated_cnt * 2);
|
|
more = malloc(sz);
|
|
if (more == NULL) {
|
|
printf("No memory3 allocation fails at startup?\n");
|
|
exit(-1);
|
|
}
|
|
memset(more, 0, sz);
|
|
memcpy(more, valid_pmcs, sz);
|
|
pmc_allocated_cnt *= 2;
|
|
free(valid_pmcs);
|
|
valid_pmcs = more;
|
|
}
|
|
}
|
|
my_pclose(io, pid_of_command);
|
|
return;
|
|
not_supported:
|
|
printf("Not supported\n");
|
|
exit(-1);
|
|
}
|
|
|
|
static void
|
|
explain_all(void)
|
|
{
|
|
int i;
|
|
printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
|
|
printf("-------------------------------------------------------------\n");
|
|
for(i=0; i<the_cpu.number; i++){
|
|
printf("For -e %s ", the_cpu.ents[i].name);
|
|
(*the_cpu.explain)(the_cpu.ents[i].name);
|
|
printf("----------------------------\n");
|
|
}
|
|
}
|
|
|
|
static void
|
|
test_for_a_pmc(const char *pmc, int out_so_far)
|
|
{
|
|
FILE *io;
|
|
pid_t pid_of_command=0;
|
|
char my_command[1024];
|
|
char line[1024];
|
|
char resp[1024];
|
|
int len, llen, i;
|
|
|
|
if (out_so_far < 50) {
|
|
len = 50 - out_so_far;
|
|
for(i=0; i<len; i++) {
|
|
printf(" ");
|
|
}
|
|
}
|
|
sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
|
|
io = my_popen(my_command, "r", &pid_of_command);
|
|
if (io == NULL) {
|
|
printf("Failed -- popen fails\n");
|
|
return;
|
|
}
|
|
/* Setup what we expect */
|
|
len = sprintf(resp, "%s", pmc);
|
|
if (fgets(line, sizeof(line), io) == NULL) {
|
|
printf("Failed -- no output from pmstat\n");
|
|
goto out;
|
|
}
|
|
llen = strlen(line);
|
|
if (line[(llen-1)] == '\n') {
|
|
line[(llen-1)] = 0;
|
|
llen--;
|
|
}
|
|
for(i=2; i<(llen-len); i++) {
|
|
if (strncmp(&line[i], "ERROR", 5) == 0) {
|
|
printf("Failed %s\n", line);
|
|
goto out;
|
|
} else if (strncmp(&line[i], resp, len) == 0) {
|
|
int j, k;
|
|
|
|
if (fgets(line, sizeof(line), io) == NULL) {
|
|
printf("Failed -- no second output from pmstat\n");
|
|
goto out;
|
|
}
|
|
len = strlen(line);
|
|
for (j=0; j<len; j++) {
|
|
if (line[j] == ' ') {
|
|
j++;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
printf("Pass");
|
|
len = strlen(&line[j]);
|
|
if (len < 20) {
|
|
for(k=0; k<(20-len); k++) {
|
|
printf(" ");
|
|
}
|
|
}
|
|
if (len) {
|
|
printf("%s", &line[j]);
|
|
} else {
|
|
printf("\n");
|
|
}
|
|
goto out;
|
|
}
|
|
}
|
|
printf("Failed -- '%s' not '%s'\n", line, resp);
|
|
out:
|
|
my_pclose(io, pid_of_command);
|
|
|
|
}
|
|
|
|
static int
|
|
add_it_to(char **vars, int cur_cnt, char *name)
|
|
{
|
|
int i;
|
|
size_t len;
|
|
for(i=0; i<cur_cnt; i++) {
|
|
if (strcmp(vars[i], name) == 0) {
|
|
/* Already have */
|
|
return(0);
|
|
}
|
|
}
|
|
if (vars[cur_cnt] != NULL) {
|
|
printf("Cur_cnt:%d filled with %s??\n",
|
|
cur_cnt, vars[cur_cnt]);
|
|
exit(-1);
|
|
}
|
|
/* Ok its new */
|
|
len = strlen(name) + 1;
|
|
vars[cur_cnt] = malloc(len);
|
|
if (vars[cur_cnt] == NULL) {
|
|
printf("No memory %s\n", __FUNCTION__);
|
|
exit(-1);
|
|
}
|
|
memset(vars[cur_cnt], 0, len);
|
|
strcpy(vars[cur_cnt], name);
|
|
return(1);
|
|
}
|
|
|
|
static char *
|
|
build_command_for_exp(struct expression *exp)
|
|
{
|
|
/*
|
|
* Build the pmcstat command to handle
|
|
* the passed in expression.
|
|
* /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
|
|
* where NNN and QQQ represent the PMC's in the expression
|
|
* uniquely..
|
|
*/
|
|
char forming[1024];
|
|
int cnt_pmc, alloced_pmcs, i;
|
|
struct expression *at;
|
|
char **vars, *cmd;
|
|
size_t mal;
|
|
|
|
alloced_pmcs = cnt_pmc = 0;
|
|
/* first how many do we have */
|
|
at = exp;
|
|
while (at) {
|
|
if (at->type == TYPE_VALUE_PMC) {
|
|
cnt_pmc++;
|
|
}
|
|
at = at->next;
|
|
}
|
|
if (cnt_pmc == 0) {
|
|
printf("No PMC's in your expression -- nothing to do!!\n");
|
|
exit(0);
|
|
}
|
|
mal = cnt_pmc * sizeof(char *);
|
|
vars = malloc(mal);
|
|
if (vars == NULL) {
|
|
printf("No memory\n");
|
|
exit(-1);
|
|
}
|
|
memset(vars, 0, mal);
|
|
at = exp;
|
|
while (at) {
|
|
if (at->type == TYPE_VALUE_PMC) {
|
|
if(add_it_to(vars, alloced_pmcs, at->name)) {
|
|
alloced_pmcs++;
|
|
}
|
|
}
|
|
at = at->next;
|
|
}
|
|
/* Now we have a unique list in vars so create our command */
|
|
mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */
|
|
for(i=0; i<alloced_pmcs; i++) {
|
|
mal += strlen(vars[i]) + 4; /* var + " -s " */
|
|
}
|
|
cmd = malloc((mal+2));
|
|
if (cmd == NULL) {
|
|
printf("%s out of mem\n", __FUNCTION__);
|
|
exit(-1);
|
|
}
|
|
memset(cmd, 0, (mal+2));
|
|
strcpy(cmd, "/usr/sbin/pmcstat -w 1");
|
|
at = exp;
|
|
for(i=0; i<alloced_pmcs; i++) {
|
|
sprintf(forming, " -s %s", vars[i]);
|
|
strcat(cmd, forming);
|
|
free(vars[i]);
|
|
vars[i] = NULL;
|
|
}
|
|
free(vars);
|
|
return(cmd);
|
|
}
|
|
|
|
static int
|
|
user_expr(struct counters *cpu, int pos)
|
|
{
|
|
int ret;
|
|
double res;
|
|
struct counters *var;
|
|
struct expression *at;
|
|
|
|
at = master_exp;
|
|
while (at) {
|
|
if (at->type == TYPE_VALUE_PMC) {
|
|
var = find_counter(cpu, at->name);
|
|
if (var == NULL) {
|
|
printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
|
|
exit(-1);
|
|
}
|
|
if (pos != -1) {
|
|
at->value = var->vals[pos] * 1.0;
|
|
} else {
|
|
at->value = var->sum * 1.0;
|
|
}
|
|
}
|
|
at = at->next;
|
|
}
|
|
res = run_expr(master_exp, 1, NULL);
|
|
ret = printf("%1.3f", res);
|
|
return(ret);
|
|
}
|
|
|
|
|
|
static void
|
|
set_manual_exp(struct expression *exp)
|
|
{
|
|
expression = user_expr;
|
|
command = build_command_for_exp(exp);
|
|
threshold = "User defined threshold";
|
|
}
|
|
|
|
static void
|
|
run_tests(void)
|
|
{
|
|
int i, lenout;
|
|
printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
|
|
printf("------------------------------------------------------------------------\n");
|
|
for(i=0; i<valid_pmc_cnt; i++) {
|
|
lenout = printf("%s", valid_pmcs[i]);
|
|
fflush(stdout);
|
|
test_for_a_pmc(valid_pmcs[i], lenout);
|
|
}
|
|
}
|
|
static void
|
|
list_all(void)
|
|
{
|
|
int i, cnt, j;
|
|
printf("PMC Abbreviation\n");
|
|
printf("--------------------------------------------------------------\n");
|
|
for(i=0; i<valid_pmc_cnt; i++) {
|
|
cnt = printf("%s", valid_pmcs[i]);
|
|
for(j=cnt; j<52; j++) {
|
|
printf(" ");
|
|
}
|
|
printf("%%%d\n", i);
|
|
}
|
|
}
|
|
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
int i, j, cnt;
|
|
char *filename=NULL;
|
|
char *name=NULL;
|
|
int help_only = 0;
|
|
int test_mode = 0;
|
|
|
|
get_cpuid_set();
|
|
memset(glob_cpu, 0, sizeof(glob_cpu));
|
|
while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) {
|
|
switch (i) {
|
|
case 'L':
|
|
list_all();
|
|
return(0);
|
|
case 'H':
|
|
printf("**********************************\n");
|
|
explain_all();
|
|
printf("**********************************\n");
|
|
return(0);
|
|
break;
|
|
case 'T':
|
|
test_mode = 1;
|
|
break;
|
|
case 'E':
|
|
master_exp = parse_expression(optarg);
|
|
if (master_exp) {
|
|
set_manual_exp(master_exp);
|
|
}
|
|
break;
|
|
case 'e':
|
|
if (validate_expression(optarg)) {
|
|
printf("Unknown expression %s\n", optarg);
|
|
return(0);
|
|
}
|
|
name = optarg;
|
|
set_expression(optarg);
|
|
break;
|
|
case 'm':
|
|
max_to_collect = strtol(optarg, NULL, 0);
|
|
if (max_to_collect > MAX_COUNTER_SLOTS) {
|
|
/* You can't collect more than max in array */
|
|
max_to_collect = MAX_COUNTER_SLOTS;
|
|
}
|
|
break;
|
|
case 'v':
|
|
verbose++;
|
|
break;
|
|
case 'h':
|
|
help_only = 1;
|
|
break;
|
|
case 'i':
|
|
filename = optarg;
|
|
break;
|
|
case '?':
|
|
default:
|
|
use:
|
|
printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
|
|
argv[0]);
|
|
printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
|
|
printf("-v -- verbose dump debug type things -- you don't want this\n");
|
|
printf("-m N -- maximum to collect is N measurments\n");
|
|
printf("-e expr-name -- Do expression expr-name\n");
|
|
printf("-E 'your expression' -- Do your expression\n");
|
|
printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
|
|
printf("-H -- Don't run anything, just explain all canned expressions\n");
|
|
printf("-T -- Test all PMC's defined by this processor\n");
|
|
return(0);
|
|
break;
|
|
};
|
|
}
|
|
if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) {
|
|
printf("Without setting an expression we cannot dynamically gather information\n");
|
|
printf("you must supply a filename (and you probably want verbosity)\n");
|
|
goto use;
|
|
}
|
|
if (test_mode) {
|
|
run_tests();
|
|
return(0);
|
|
}
|
|
printf("*********************************\n");
|
|
if (master_exp == NULL) {
|
|
(*the_cpu.explain)(name);
|
|
} else {
|
|
printf("Examine your expression ");
|
|
print_exp(master_exp);
|
|
printf("User defined threshold\n");
|
|
}
|
|
if (help_only) {
|
|
return(0);
|
|
}
|
|
process_file(filename);
|
|
if (verbose >= 2) {
|
|
for (i=0; i<ncnts; i++) {
|
|
printf("Counter:%s cpu:%d index:%d\n",
|
|
cnts[i].counter_name,
|
|
cnts[i].cpu, i);
|
|
for(j=0; j<cnts[i].pos; j++) {
|
|
printf(" val - %ld\n", (long int)cnts[i].vals[j]);
|
|
}
|
|
printf(" sum - %ld\n", (long int)cnts[i].sum);
|
|
}
|
|
}
|
|
if (expression == NULL) {
|
|
return(0);
|
|
}
|
|
for(i=0, cnt=0; i<MAX_CPU; i++) {
|
|
if (glob_cpu[i]) {
|
|
do_expression(glob_cpu[i], -1);
|
|
cnt++;
|
|
if (cnt == cpu_count_out) {
|
|
printf("\n");
|
|
break;
|
|
} else {
|
|
printf("\t");
|
|
}
|
|
}
|
|
}
|
|
return(0);
|
|
}
|