diff --git a/usr.sbin/pmcstudy/pmcstudy.c b/usr.sbin/pmcstudy/pmcstudy.c index 731dc0e99a5f..06ded9723fca 100644 --- a/usr.sbin/pmcstudy/pmcstudy.c +++ b/usr.sbin/pmcstudy/pmcstudy.c @@ -45,20 +45,20 @@ static int verbose = 0; extern char **environ; extern struct expression *master_exp; -struct expression *master_exp=NULL; +struct expression *master_exp = NULL; #define PMC_INITIAL_ALLOC 512 extern char **valid_pmcs; char **valid_pmcs = NULL; extern int valid_pmc_cnt; -int valid_pmc_cnt=0; +int valid_pmc_cnt = 0; extern int pmc_allocated_cnt; -int pmc_allocated_cnt=0; +int pmc_allocated_cnt = 0; /* * The following two varients on popen and pclose with * the cavet that they get you the PID so that you - * can supply it to pclose so it can send a SIGTERM + * can supply it to pclose so it can send a SIGTERM * to the process. */ static FILE * @@ -75,7 +75,7 @@ my_popen(const char *command, const char *dir, pid_t *p_pid) if ((strcmp(dir, "r") != 0) && (strcmp(dir, "w") != 0)) { errno = EINVAL; - return(NULL); + return (NULL); } if (pipe(pdesin) < 0) return (NULL); @@ -94,14 +94,14 @@ my_popen(const char *command, const char *dir, pid_t *p_pid) argv[3] = NULL; switch (pid = fork()) { - case -1: /* Error. */ + case -1: /* Error. */ (void)close(pdesin[0]); (void)close(pdesin[1]); (void)close(pdesout[0]); (void)close(pdesout[1]); return (NULL); /* NOTREACHED */ - case 0: /* Child. */ + case 0: /* Child. */ /* Close out un-used sides */ (void)close(pdesin[1]); (void)close(pdesout[0]); @@ -129,8 +129,8 @@ my_popen(const char *command, const char *dir, pid_t *p_pid) (void)close(pdesin[0]); (void)close(pdesout[0]); (void)close(pdesout[1]); - return(io_out); - } else { + return (io_out); + } else { /* Prepare the input stream */ io_in = fdopen(pdesout[0], "r"); (void)close(pdesout[1]); @@ -146,7 +146,7 @@ my_popen(const char *command, const char *dir, pid_t *p_pid) * if already `pclosed', or waitpid returns an error. */ static void -my_pclose(FILE *io, pid_t the_pid) +my_pclose(FILE * io, pid_t the_pid) { int pstat; pid_t pid; @@ -164,33 +164,33 @@ my_pclose(FILE *io, pid_t the_pid) struct counters { struct counters *next_cpu; - char counter_name[MAX_NLEN]; /* Name of counter */ - int cpu; /* CPU we are on */ - int pos; /* Index we are filling to. */ + char counter_name[MAX_NLEN]; /* Name of counter */ + int cpu; /* CPU we are on */ + int pos; /* Index we are filling to. */ uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ - uint64_t sum; /* Summary of entries */ + uint64_t sum; /* Summary of entries */ }; extern struct counters *glob_cpu[MAX_CPU]; struct counters *glob_cpu[MAX_CPU]; extern struct counters *cnts; -struct counters *cnts=NULL; +struct counters *cnts = NULL; extern int ncnts; -int ncnts=0; +int ncnts = 0; -extern int (*expression)(struct counters *, int); -int (*expression)(struct counters *, int); +extern int (*expression) (struct counters *, int); +int (*expression) (struct counters *, int); -static const char *threshold=NULL; +static const char *threshold = NULL; static const char *command; struct cpu_entry { const char *name; const char *thresh; const char *command; - int (*func)(struct counters *, int); + int (*func) (struct counters *, int); }; @@ -198,7 +198,7 @@ struct cpu_type { char cputype[32]; int number; struct cpu_entry *ents; - void (*explain)(const char *name); + void (*explain) (const char *name); }; extern struct cpu_type the_cpu; struct cpu_type the_cpu; @@ -207,6 +207,7 @@ static void explain_name_sb(const char *name) { const char *mythresh; + if (strcmp(name, "allocstall1") == 0) { printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; @@ -278,7 +279,7 @@ explain_name_sb(const char *name) } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; - } + } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } @@ -286,6 +287,7 @@ static void explain_name_ib(const char *name) { const char *mythresh; + if (strcmp(name, "br_miss") == 0) { printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); @@ -305,7 +307,7 @@ explain_name_ib(const char *name) mythresh = "thresh >= .2"; } else if (strcmp(name, "itlbmiss") == 0) { printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); - mythresh = "thresh > .05"; + mythresh = "thresh > .05"; } else if (strcmp(name, "icachemiss") == 0) { printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; @@ -360,7 +362,7 @@ explain_name_ib(const char *name) } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; - } + } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } @@ -369,6 +371,7 @@ static void explain_name_has(const char *name) { const char *mythresh; + if (strcmp(name, "eff1") == 0) { printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh < .75"; @@ -377,7 +380,7 @@ explain_name_has(const char *name) mythresh = "thresh > 1.0"; } else if (strcmp(name, "itlbmiss") == 0) { printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); - mythresh = "thresh > .05"; + mythresh = "thresh > .05"; } else if (strcmp(name, "icachemiss") == 0) { printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; @@ -439,7 +442,7 @@ explain_name_has(const char *name) } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; - } + } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } @@ -451,16 +454,16 @@ find_counter(struct counters *base, const char *name) at = base; len = strlen(name); - while(at) { + while (at) { if (strncmp(at->counter_name, name, len) == 0) { - return(at); + return (at); } at = at->next_cpu; } printf("Can't find counter %s\n", name); printf("We have:\n"); at = base; - while(at) { + while (at) { printf("- %s\n", at->counter_name); at = at->next_cpu; } @@ -475,6 +478,7 @@ allocstall1(struct counters *cpu, int pos) struct counters *partial; struct counters *unhalt; double un, par, res; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); if (pos != -1) { @@ -484,9 +488,9 @@ allocstall1(struct counters *cpu, int pos) par = partial->sum * 1.0; un = unhalt->sum * 1.0; } - res = par/un; + res = par / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int @@ -497,6 +501,7 @@ allocstall2(struct counters *cpu, int pos) struct counters *partial; struct counters *unhalt; double un, par, res; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); if (pos != -1) { @@ -506,9 +511,9 @@ allocstall2(struct counters *cpu, int pos) par = partial->sum * 1.0; un = unhalt->sum * 1.0; } - res = par/un; + res = par / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int @@ -517,12 +522,14 @@ br_mispredict(struct counters *cpu, int pos) struct counters *brctr; struct counters *unhalt; int ret; + /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ double br, un, con, res; + con = 20.0; - + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); - brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); + brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); if (pos != -1) { br = brctr->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; @@ -530,9 +537,9 @@ br_mispredict(struct counters *cpu, int pos) br = brctr->sum * 1.0; un = unhalt->sum * 1.0; } - res = (con * br)/un; - ret = printf("%1.3f", res); - return(ret); + res = (con * br) / un; + ret = printf("%1.3f", res); + return (ret); } static int @@ -542,22 +549,25 @@ br_mispredictib(struct counters *cpu, int pos) struct counters *unhalt; struct counters *clear, *clear2, *clear3; struct counters *uops; - struct counters *recv; + struct counters *recv; struct counters *iss; + /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ int ret; - /* - * (BR_MISP_RETIRED.ALL_BRANCHES / - * (BR_MISP_RETIRED.ALL_BRANCHES + - * MACHINE_CLEAR.COUNT) * - * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) - * + + /* + * (BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + + * MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - + * UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * + * CPU_CLK_UNHALTED.THREAD))) + * */ double br, cl, cl2, cl3, uo, re, un, con, res, is; + con = 4.0; - + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); - brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); + brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); @@ -583,9 +593,9 @@ br_mispredictib(struct counters *cpu, int pos) is = iss->sum * 1.0; un = unhalt->sum * 1.0; } - res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); - ret = printf("%1.3f", res); - return(ret); + res = (br / (br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); + ret = printf("%1.3f", res); + return (ret); } static int @@ -601,9 +611,9 @@ br_mispredict_broad(struct counters *cpu, int pos) double br, cl, uo, uo_r, re, con, un, res; con = 4.0; - + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); - brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); + brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); uops = find_counter(cpu, "UOPS_ISSUED.ANY"); uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); @@ -625,8 +635,8 @@ br_mispredict_broad(struct counters *cpu, int pos) re = recv->sum * 1.0; } res = br / (br + cl) * (uo - uo_r + con * re) / (un * con); - ret = printf("%1.3f", res); - return(ret); + ret = printf("%1.3f", res); + return (ret); } static int @@ -637,9 +647,12 @@ splitloadib(struct counters *cpu, int pos) struct counters *l1d, *ldblock; struct counters *unhalt; double un, memd, res, l1, ldb; - /* - * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P - * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", + + /* + * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * + * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P "pmcstat -s + * CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s + * MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", */ unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); @@ -657,9 +670,9 @@ splitloadib(struct counters *cpu, int pos) ldb = ldblock->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((l1 / memd) * ldb)/un; + res = ((l1 / memd) * ldb) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int @@ -669,6 +682,7 @@ splitload(struct counters *cpu, int pos) struct counters *mem; struct counters *unhalt; double con, un, memd, res; + /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ con = 5.0; @@ -681,19 +695,23 @@ splitload(struct counters *cpu, int pos) memd = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (memd * con)/un; + res = (memd * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int splitstore(struct counters *cpu, int pos) { - /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ + /* + * 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES + * (thresh > 0.01) + */ int ret; struct counters *mem_split; struct counters *mem_stores; double memsplit, memstore, res; + mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); if (pos != -1) { @@ -703,16 +721,19 @@ splitstore(struct counters *cpu, int pos) memsplit = mem_split->sum * 1.0; memstore = mem_stores->sum * 1.0; } - res = memsplit/memstore; + res = memsplit / memstore; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int contested(struct counters *cpu, int pos) { - /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ + /* + * 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / + * CPU_CLK_UNHALTED.THREAD_P (thresh >.05) + */ int ret; struct counters *mem; struct counters *unhalt; @@ -728,15 +749,18 @@ contested(struct counters *cpu, int pos) memd = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (memd * con)/un; + res = (memd * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int contested_has(struct counters *cpu, int pos) { - /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ + /* + * 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / + * CPU_CLK_UNHALTED.THREAD_P (thresh >.05) + */ int ret; struct counters *mem; struct counters *unhalt; @@ -752,15 +776,18 @@ contested_has(struct counters *cpu, int pos) memd = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (memd * con)/un; + res = (memd * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int contestedbroad(struct counters *cpu, int pos) { - /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ + /* + * 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / + * CPU_CLK_UNHALTED.THREAD_P (thresh >.05) + */ int ret; struct counters *mem; struct counters *mem2; @@ -770,7 +797,7 @@ contestedbroad(struct counters *cpu, int pos) con = 84.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); - mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); + mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); if (pos != -1) { memd = mem->vals[pos] * 1.0; @@ -781,16 +808,19 @@ contestedbroad(struct counters *cpu, int pos) memtoo = mem2->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((memd * con) + memtoo)/un; + res = ((memd * con) + memtoo) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int blockstoreforward(struct counters *cpu, int pos) { - /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ + /* + * 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P + * (thresh >= .05) + */ int ret; struct counters *ldb; struct counters *unhalt; @@ -806,17 +836,19 @@ blockstoreforward(struct counters *cpu, int pos) ld = ldb->sum * 1.0; un = unhalt->sum * 1.0; } - res = (ld * con)/un; + res = (ld * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int cache2(struct counters *cpu, int pos) { - /* ** Suspect *** - * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + - * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + /* + * ** Suspect *** 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / + * CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem1, *mem2, *mem3; @@ -842,16 +874,17 @@ cache2(struct counters *cpu, int pos) me_3 = mem3->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; + res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3)) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int datasharing(struct counters *cpu, int pos) { - /* - * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + /* + * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ + * CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem; @@ -868,9 +901,9 @@ datasharing(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (me * con)/un; + res = (me * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } @@ -878,8 +911,9 @@ datasharing(struct counters *cpu, int pos) static int datasharing_has(struct counters *cpu, int pos) { - /* - * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + /* + * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ + * CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem; @@ -896,9 +930,9 @@ datasharing_has(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (me * con)/un; + res = (me * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } @@ -906,8 +940,9 @@ datasharing_has(struct counters *cpu, int pos) static int cache2ib(struct counters *cpu, int pos) { - /* - * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + /* + * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P + * (thresh >.2) */ int ret; struct counters *mem; @@ -924,9 +959,9 @@ cache2ib(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (con * me)/un; + res = (con * me) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int @@ -934,9 +969,9 @@ cache2has(struct counters *cpu, int pos) { /* * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ - * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + - * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) - * / CPU_CLK_UNHALTED.THREAD_P + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) / + * CPU_CLK_UNHALTED.THREAD_P */ int ret; struct counters *mem1, *mem2, *mem3; @@ -961,16 +996,17 @@ cache2has(struct counters *cpu, int pos) me3 = mem3->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; + res = ((me1 * con1) + (me2 * con2) + (me3 * con3)) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int cache2broad(struct counters *cpu, int pos) { - /* - * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + /* + * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P + * (thresh >.2) */ int ret; struct counters *mem; @@ -987,16 +1023,19 @@ cache2broad(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (con * me)/un; + res = (con * me) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int cache1(struct counters *cpu, int pos) { - /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ + /* + * 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / + * CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) + */ int ret; struct counters *mem; struct counters *unhalt; @@ -1012,15 +1051,18 @@ cache1(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (me * con)/un; + res = (me * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int cache1ib(struct counters *cpu, int pos) { - /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ + /* + * 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / + * CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) + */ int ret; struct counters *mem; struct counters *unhalt; @@ -1036,16 +1078,19 @@ cache1ib(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (me * con)/un; + res = (me * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int cache1broad(struct counters *cpu, int pos) { - /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ + /* + * 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / + * CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) + */ int ret; struct counters *mem; struct counters *unhalt; @@ -1061,16 +1106,20 @@ cache1broad(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (me * con)/un; + res = (me * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int dtlb_missload(struct counters *cpu, int pos) { - /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ + /* + * 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + + * DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t + * >=.1) + */ int ret; struct counters *dtlb_m, *dtlb_d; struct counters *unhalt; @@ -1089,39 +1138,40 @@ dtlb_missload(struct counters *cpu, int pos) d2 = dtlb_d->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((d1 * con) + d2)/un; + res = ((d1 * con) + d2) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int dtlb_missstore(struct counters *cpu, int pos) { - /* - * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / - * CPU_CLK_UNHALTED.THREAD_P (t >= .1) + /* + * ((DTLB_STORE_MISSES.STLB_HIT * 7) + + * DTLB_STORE_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t + * >= .1) */ - int ret; - struct counters *dtsb_m, *dtsb_d; - struct counters *unhalt; - double con, un, d1, d2, res; + int ret; + struct counters *dtsb_m, *dtsb_d; + struct counters *unhalt; + double con, un, d1, d2, res; - con = 7.0; - unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); - dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); - dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); - if (pos != -1) { - d1 = dtsb_m->vals[pos] * 1.0; - d2 = dtsb_d->vals[pos] * 1.0; - un = unhalt->vals[pos] * 1.0; - } else { - d1 = dtsb_m->sum * 1.0; - d2 = dtsb_d->sum * 1.0; - un = unhalt->sum * 1.0; - } - res = ((d1 * con) + d2)/un; - ret = printf("%1.3f", res); - return(ret); + con = 7.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); + dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); + if (pos != -1) { + d1 = dtsb_m->vals[pos] * 1.0; + d2 = dtsb_d->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + d1 = dtsb_m->sum * 1.0; + d2 = dtsb_d->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = ((d1 * con) + d2) / un; + ret = printf("%1.3f", res); + return (ret); } static int @@ -1142,16 +1192,19 @@ itlb_miss(struct counters *cpu, int pos) d1 = itlb->sum * 1.0; un = unhalt->sum * 1.0; } - res = d1/un; + res = d1 / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int itlb_miss_broad(struct counters *cpu, int pos) { - /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */ + /* + * (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / + * CPU_CLK_UNTHREAD_P + */ int ret; struct counters *itlb; struct counters *unhalt; @@ -1170,16 +1223,19 @@ itlb_miss_broad(struct counters *cpu, int pos) un = unhalt->sum * 1.0; k = four_k->sum * 1.0; } - res = (7.0 * k + d1)/un; + res = (7.0 * k + d1) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int icache_miss(struct counters *cpu, int pos) { - /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ + /* + * (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / + * CPU_CLK_UNHALTED.THREAD_P IB + */ int ret; struct counters *itlb, *icache; @@ -1198,9 +1254,9 @@ icache_miss(struct counters *cpu, int pos) ic = icache->sum * 1.0; un = unhalt->sum * 1.0; } - res = (ic-d1)/un; + res = (ic - d1) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } @@ -1224,16 +1280,16 @@ icache_miss_has(struct counters *cpu, int pos) ic = icache->sum * 1.0; un = unhalt->sum * 1.0; } - res = (con * ic)/un; + res = (con * ic) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int lcp_stall(struct counters *cpu, int pos) { - /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ + /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ int ret; struct counters *ild; struct counters *unhalt; @@ -1248,9 +1304,9 @@ lcp_stall(struct counters *cpu, int pos) d1 = ild->sum * 1.0; un = unhalt->sum * 1.0; } - res = d1/un; + res = d1 / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } @@ -1258,7 +1314,10 @@ lcp_stall(struct counters *cpu, int pos) static int frontendstall(struct counters *cpu, int pos) { - /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ + /* + * 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * + * 4) (thresh >= .15) + */ int ret; struct counters *idq; struct counters *unhalt; @@ -1274,17 +1333,20 @@ frontendstall(struct counters *cpu, int pos) id = idq->sum * 1.0; un = unhalt->sum * 1.0; } - res = id/(un * con); + res = id / (un * con); ret = printf("%1.3f", res); - return(ret); + return (ret); } static int clears(struct counters *cpu, int pos) { - /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) - * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ - + /* + * 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + + * MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P + * (thresh >= .02) + */ + int ret; struct counters *clr1, *clr2, *clr3; struct counters *unhalt; @@ -1295,7 +1357,7 @@ clears(struct counters *cpu, int pos) clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); - + if (pos != -1) { cl1 = clr1->vals[pos] * 1.0; cl2 = clr2->vals[pos] * 1.0; @@ -1307,9 +1369,9 @@ clears(struct counters *cpu, int pos) cl3 = clr3->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((cl1 + cl2 + cl3) * con)/un; + res = ((cl1 + cl2 + cl3) * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int @@ -1340,9 +1402,9 @@ clears_broad(struct counters *cpu, int pos) un = unhalt->sum * 1.0; } /* Formula not listed but extrapulated to add the cy ?? */ - res = ((cl1 + cl2 + cl3 + cy) * con)/un; + res = ((cl1 + cl2 + cl3 + cy) * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int @@ -1364,9 +1426,9 @@ microassist(struct counters *cpu, int pos) id = idq->sum * 1.0; un = unhalt->sum * 1.0; } - res = id/(un * con); + res = id / (un * con); ret = printf("%1.3f", res); - return(ret); + return (ret); } static int @@ -1395,16 +1457,19 @@ microassist_broad(struct counters *cpu, int pos) uoi = uopiss->sum * 1.0; uor = uopret->sum * 1.0; } - res = (uor/uoi) * (id/(un * con)); + res = (uor / uoi) * (id / (un * con)); ret = printf("%1.3f", res); - return(ret); + return (ret); } static int aliasing(struct counters *cpu, int pos) { - /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ - int ret; + /* + * 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / + * CPU_CLK_UNHALTED.THREAD_P (thresh > .1) + */ + int ret; struct counters *ld; struct counters *unhalt; double un, lds, con, res; @@ -1419,16 +1484,19 @@ aliasing(struct counters *cpu, int pos) lds = ld->sum * 1.0; un = unhalt->sum * 1.0; } - res = (lds * con)/un; + res = (lds * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int aliasing_broad(struct counters *cpu, int pos) { - /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ - int ret; + /* + * 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / + * CPU_CLK_UNHALTED.THREAD_P (thresh > .1) + */ + int ret; struct counters *ld; struct counters *unhalt; double un, lds, con, res; @@ -1443,9 +1511,9 @@ aliasing_broad(struct counters *cpu, int pos) lds = ld->sum * 1.0; un = unhalt->sum * 1.0; } - res = (lds * con)/un; + res = (lds * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } @@ -1453,7 +1521,7 @@ static int fpassists(struct counters *cpu, int pos) { /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ - int ret; + int ret; struct counters *fp; struct counters *inst; double un, fpd, res; @@ -1467,16 +1535,19 @@ fpassists(struct counters *cpu, int pos) fpd = fp->sum * 1.0; un = inst->sum * 1.0; } - res = fpd/un; + res = fpd / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int otherassistavx(struct counters *cpu, int pos) { - /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ - int ret; + /* + * 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P + * thresh .1 + */ + int ret; struct counters *oth; struct counters *unhalt; double un, ot, con, res; @@ -1491,21 +1562,24 @@ otherassistavx(struct counters *cpu, int pos) ot = oth->sum * 1.0; un = unhalt->sum * 1.0; } - res = (ot * con)/un; + res = (ot * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int otherassistsse(struct counters *cpu, int pos) { - int ret; + int ret; struct counters *oth; struct counters *unhalt; double un, ot, con, res; - /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ + /* + * 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P + * thresh .1 + */ con = 75.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); @@ -1516,21 +1590,24 @@ otherassistsse(struct counters *cpu, int pos) ot = oth->sum * 1.0; un = unhalt->sum * 1.0; } - res = (ot * con)/un; + res = (ot * con) / un; ret = printf("%1.3f", res); - return(ret); + return (ret); } static int efficiency1(struct counters *cpu, int pos) { - int ret; + int ret; struct counters *uops; struct counters *unhalt; double un, ot, con, res; - /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ + /* + * 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look + * if thresh < .9 + */ con = 4.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); @@ -1541,21 +1618,24 @@ efficiency1(struct counters *cpu, int pos) ot = uops->sum * 1.0; un = unhalt->sum * 1.0; } - res = ot/(con * un); + res = ot / (con * un); ret = printf("%1.3f", res); - return(ret); + return (ret); } static int efficiency2(struct counters *cpu, int pos) { - int ret; + int ret; struct counters *uops; struct counters *unhalt; double un, ot, res; - /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ + /* + * 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. + * (comp factor) + */ unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); uops = find_counter(cpu, "INST_RETIRED.ANY_P"); if (pos != -1) { @@ -1565,205 +1645,205 @@ efficiency2(struct counters *cpu, int pos) ot = uops->sum * 1.0; un = unhalt->sum * 1.0; } - res = un/ot; + res = un / ot; ret = printf("%1.3f", res); - return(ret); + return (ret); } -#define SANDY_BRIDGE_COUNT 20 +#define SANDY_BRIDGE_COUNT 20 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { -/*01*/ { "allocstall1", "thresh > .05", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", - allocstall1 }, -/*02*/ { "allocstall2", "thresh > .05", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1", - allocstall2 }, -/*03*/ { "br_miss", "thresh >= .2", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", - br_mispredict }, -/*04*/ { "splitload", "thresh >= .1", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", - splitload }, -/*05*/ { "splitstore", "thresh >= .01", - "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", - splitstore }, -/*06*/ { "contested", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - contested }, -/*07*/ { "blockstorefwd", "thresh >= .05", - "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", - blockstoreforward }, -/*08*/ { "cache2", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache2 }, -/*09*/ { "cache1", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache1 }, -/*10*/ { "dtlbmissload", "thresh >= .1", - "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missload }, -/*11*/ { "dtlbmissstore", "thresh >= .05", - "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missstore }, -/*12*/ { "frontendstall", "thresh >= .15", - "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", - frontendstall }, -/*13*/ { "clears", "thresh >= .02", - "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", - clears }, -/*14*/ { "microassist", "thresh >= .05", - "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", - microassist }, -/*15*/ { "aliasing_4k", "thresh >= .1", - "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - aliasing }, -/*16*/ { "fpassist", "look for a excessive value", - "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", - fpassists }, -/*17*/ { "otherassistavx", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistavx }, -/*18*/ { "otherassistsse", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistsse }, -/*19*/ { "eff1", "thresh < .9", - "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency1 }, -/*20*/ { "eff2", "thresh > 1.0", - "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency2 }, + /*01*/ {"allocstall1", "thresh > .05", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", + allocstall1}, + /*02*/ {"allocstall2", "thresh > .05", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1", + allocstall2}, + /*03*/ {"br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", + br_mispredict}, + /*04*/ {"splitload", "thresh >= .1", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", + splitload}, + /*05*/ {"splitstore", "thresh >= .01", + "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", + splitstore}, + /*06*/ {"contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + contested}, + /*07*/ {"blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward}, + /*08*/ {"cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2}, + /*09*/ {"cache1", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1}, + /*10*/ {"dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload}, + /*11*/ {"dtlbmissstore", "thresh >= .05", + "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missstore}, + /*12*/ {"frontendstall", "thresh >= .15", + "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + frontendstall}, + /*13*/ {"clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears}, + /*14*/ {"microassist", "thresh >= .05", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", + microassist}, + /*15*/ {"aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing}, + /*16*/ {"fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists}, + /*17*/ {"otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx}, + /*18*/ {"otherassistsse", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistsse}, + /*19*/ {"eff1", "thresh < .9", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1}, + /*20*/ {"eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2}, }; #define IVY_BRIDGE_COUNT 21 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { -/*1*/ { "eff1", "thresh < .75", - "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency1 }, -/*2*/ { "eff2", "thresh > 1.0", - "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency2 }, -/*3*/ { "itlbmiss", "thresh > .05", - "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - itlb_miss }, -/*4*/ { "icachemiss", "thresh > .05", - "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - icache_miss }, -/*5*/ { "lcpstall", "thresh > .05", - "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", - lcp_stall }, -/*6*/ { "cache1", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache1ib }, -/*7*/ { "cache2", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache2ib }, -/*8*/ { "contested", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - contested }, -/*9*/ { "datashare", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", - datasharing }, -/*10*/ { "blockstorefwd", "thresh >= .05", - "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", - blockstoreforward }, -/*11*/ { "splitload", "thresh >= .1", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", - splitloadib }, -/*12*/ { "splitstore", "thresh >= .01", - "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", - splitstore }, -/*13*/ { "aliasing_4k", "thresh >= .1", - "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - aliasing }, -/*14*/ { "dtlbmissload", "thresh >= .1", - "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missload }, -/*15*/ { "dtlbmissstore", "thresh >= .05", - "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missstore }, -/*16*/ { "br_miss", "thresh >= .2", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", - br_mispredictib }, -/*17*/ { "clears", "thresh >= .02", - "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", - clears }, -/*18*/ { "microassist", "thresh >= .05", - "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", - microassist }, -/*19*/ { "fpassist", "look for a excessive value", - "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", - fpassists }, -/*20*/ { "otherassistavx", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistavx }, -/*21*/ { "otherassistsse", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistsse }, + /*1*/ {"eff1", "thresh < .75", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1}, + /*2*/ {"eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2}, + /*3*/ {"itlbmiss", "thresh > .05", + "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + itlb_miss}, + /*4*/ {"icachemiss", "thresh > .05", + "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + icache_miss}, + /*5*/ {"lcpstall", "thresh > .05", + "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", + lcp_stall}, + /*6*/ {"cache1", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1ib}, + /*7*/ {"cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2ib}, + /*8*/ {"contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + contested}, + /*9*/ {"datashare", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + datasharing}, + /*10*/ {"blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward}, + /*11*/ {"splitload", "thresh >= .1", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", + splitloadib}, + /*12*/ {"splitstore", "thresh >= .01", + "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", + splitstore}, + /*13*/ {"aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing}, + /*14*/ {"dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload}, + /*15*/ {"dtlbmissstore", "thresh >= .05", + "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missstore}, + /*16*/ {"br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", + br_mispredictib}, + /*17*/ {"clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears}, + /*18*/ {"microassist", "thresh >= .05", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", + microassist}, + /*19*/ {"fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists}, + /*20*/ {"otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx}, + /*21*/ {"otherassistsse", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistsse}, }; #define HASWELL_COUNT 20 static struct cpu_entry haswell[HASWELL_COUNT] = { -/*1*/ { "eff1", "thresh < .75", - "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency1 }, -/*2*/ { "eff2", "thresh > 1.0", - "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency2 }, -/*3*/ { "itlbmiss", "thresh > .05", - "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - itlb_miss }, -/*4*/ { "icachemiss", "thresh > .05", - "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", - icache_miss_has }, -/*5*/ { "lcpstall", "thresh > .05", - "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", - lcp_stall }, -/*6*/ { "cache1", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache1ib }, -/*7*/ { "cache2", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache2has }, -/*8*/ { "contested", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - contested_has }, -/*9*/ { "datashare", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", - datasharing_has }, -/*10*/ { "blockstorefwd", "thresh >= .05", - "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", - blockstoreforward }, -/*11*/ { "splitload", "thresh >= .1", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", - splitload }, -/*12*/ { "splitstore", "thresh >= .01", - "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", - splitstore }, -/*13*/ { "aliasing_4k", "thresh >= .1", - "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - aliasing }, -/*14*/ { "dtlbmissload", "thresh >= .1", - "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missload }, -/*15*/ { "br_miss", "thresh >= .2", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", - br_mispredict }, -/*16*/ { "clears", "thresh >= .02", - "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", - clears }, -/*17*/ { "microassist", "thresh >= .05", - "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", - microassist }, -/*18*/ { "fpassist", "look for a excessive value", - "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", - fpassists }, -/*19*/ { "otherassistavx", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistavx }, -/*20*/ { "otherassistsse", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistsse }, + /*1*/ {"eff1", "thresh < .75", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1}, + /*2*/ {"eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2}, + /*3*/ {"itlbmiss", "thresh > .05", + "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + itlb_miss}, + /*4*/ {"icachemiss", "thresh > .05", + "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", + icache_miss_has}, + /*5*/ {"lcpstall", "thresh > .05", + "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", + lcp_stall}, + /*6*/ {"cache1", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1ib}, + /*7*/ {"cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2has}, + /*8*/ {"contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + contested_has}, + /*9*/ {"datashare", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + datasharing_has}, + /*10*/ {"blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward}, + /*11*/ {"splitload", "thresh >= .1", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", + splitload}, + /*12*/ {"splitstore", "thresh >= .01", + "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", + splitstore}, + /*13*/ {"aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing}, + /*14*/ {"dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload}, + /*15*/ {"br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", + br_mispredict}, + /*16*/ {"clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears}, + /*17*/ {"microassist", "thresh >= .05", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", + microassist}, + /*18*/ {"fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists}, + /*19*/ {"otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx}, + /*20*/ {"otherassistsse", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistsse}, }; @@ -1771,6 +1851,7 @@ static void explain_name_broad(const char *name) { const char *mythresh; + if (strcmp(name, "eff1") == 0) { printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh < .75"; @@ -1779,7 +1860,7 @@ explain_name_broad(const char *name) mythresh = "thresh > 1.0"; } else if (strcmp(name, "itlbmiss") == 0) { printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); - mythresh = "thresh > .05"; + mythresh = "thresh > .05"; } else if (strcmp(name, "icachemiss") == 0) { printf("Examine ( 36.0 * CACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n"); mythresh = "thresh > .05"; @@ -1832,64 +1913,64 @@ explain_name_broad(const char *name) } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; - } + } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } #define BROADWELL_COUNT 17 static struct cpu_entry broadwell[BROADWELL_COUNT] = { -/*1*/ { "eff1", "thresh < .75", - "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency1 }, -/*2*/ { "eff2", "thresh > 1.0", - "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency2 }, -/*3*/ { "itlbmiss", "thresh > .05", - "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", - itlb_miss_broad }, -/*4*/ { "icachemiss", "thresh > .05", - "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", - icache_miss_has }, -/*5*/ { "lcpstall", "thresh > .05", - "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", - lcp_stall }, -/*6*/ { "cache1", "thresh >= .1", - "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache1broad }, -/*7*/ { "cache2", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache2broad }, -/*8*/ { "contested", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", - contestedbroad }, -/*9*/ { "datashare", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", - datasharing_has }, -/*10*/ { "blockstorefwd", "thresh >= .05", - "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", - blockstoreforward }, -/*11*/ { "aliasing_4k", "thresh >= .1", - "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - aliasing_broad }, -/*12*/ { "dtlbmissload", "thresh >= .1", - "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missload }, -/*13*/ { "br_miss", "thresh >= .2", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", - br_mispredict_broad }, -/*14*/ { "clears", "thresh >= .02", - "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", - clears_broad }, -/*15*/ { "fpassist", "look for a excessive value", - "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", - fpassists }, -/*16*/ { "otherassistavx", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistavx }, -/*17*/ { "microassist", "thresh >= .2", - "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", - microassist_broad }, + /*1*/ {"eff1", "thresh < .75", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1}, + /*2*/ {"eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2}, + /*3*/ {"itlbmiss", "thresh > .05", + "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", + itlb_miss_broad}, + /*4*/ {"icachemiss", "thresh > .05", + "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", + icache_miss_has}, + /*5*/ {"lcpstall", "thresh > .05", + "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", + lcp_stall}, + /*6*/ {"cache1", "thresh >= .1", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1broad}, + /*7*/ {"cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2broad}, + /*8*/ {"contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", + contestedbroad}, + /*9*/ {"datashare", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + datasharing_has}, + /*10*/ {"blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward}, + /*11*/ {"aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing_broad}, + /*12*/ {"dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload}, + /*13*/ {"br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", + br_mispredict_broad}, + /*14*/ {"clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears_broad}, + /*15*/ {"fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists}, + /*16*/ {"otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx}, + /*17*/ {"microassist", "thresh >= .2", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", + microassist_broad}, }; static void @@ -1933,7 +2014,8 @@ static void set_expression(char *name) { int found = 0, i; - for(i=0 ; i< the_cpu.number; i++) { + + for (i = 0; i < the_cpu.number; i++) { if (strcmp(name, the_cpu.ents[i].name) == 0) { found = 1; expression = the_cpu.ents[i].func; @@ -1944,25 +2026,25 @@ set_expression(char *name) } if (!found) { printf("For CPU type %s we have no expression:%s\n", - the_cpu.cputype, name); + the_cpu.cputype, name); exit(-1); } } static int -validate_expression(char *name) +validate_expression(char *name) { int i, found; found = 0; - for(i=0 ; i< the_cpu.number; i++) { + for (i = 0; i < the_cpu.number; i++) { if (strcmp(name, the_cpu.ents[i].name) == 0) { found = 1; break; } } if (!found) { - return(-1); + return (-1); } return (0); } @@ -1970,9 +2052,9 @@ validate_expression(char *name) static void do_expression(struct counters *cpu, int pos) { - if (expression == NULL) + if (expression == NULL) return; - (*expression)(cpu, pos); + (*expression) (cpu, pos); } static void @@ -1980,60 +2062,57 @@ process_header(int idx, char *p) { struct counters *up; int i, len, nlen; - /* - * Given header element idx, at p in - * form 's/NN/nameof' - * process the entry to pull out the name and - * the CPU number. + + /* + * Given header element idx, at p in form 's/NN/nameof' process the + * entry to pull out the name and the CPU number. */ if (strncmp(p, "s/", 2)) { printf("Check -- invalid header no s/ in %s\n", - p); + p); return; } up = &cnts[idx]; up->cpu = strtol(&p[2], NULL, 10); len = strlen(p); - for (i=2; icounter_name, &p[(i+1)]); + nlen = strlen(&p[(i + 1)]); + if (nlen < (MAX_NLEN - 1)) { + strcpy(up->counter_name, &p[(i + 1)]); } else { - strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); + strncpy(up->counter_name, &p[(i + 1)], (MAX_NLEN - 1)); } } } } static void -build_counters_from_header(FILE *io) +build_counters_from_header(FILE * io) { char buffer[8192], *p; int i, len, cnt; size_t mlen; - /* We have a new start, lets - * setup our headers and cpus. + /* + * We have a new start, lets setup our headers and cpus. */ if (fgets(buffer, sizeof(buffer), io) == NULL) { printf("First line can't be read from file err:%d\n", errno); return; } /* - * Ok output is an array of counters. Once - * we start to read the values in we must - * put them in there slot to match there CPU and - * counter being updated. We create a mass array - * of the counters, filling in the CPU and - * counter name. + * Ok output is an array of counters. Once we start to read the + * values in we must put them in there slot to match there CPU and + * counter being updated. We create a mass array of the counters, + * filling in the CPU and counter name. */ /* How many do we get? */ len = strlen(buffer); - for (i=0, cnt=0; inext_cpu) { /* Already laced in */ @@ -2139,7 +2218,7 @@ lace_cpus_together(void) continue; } /* Ok look forward for cpu->cpu and link in */ - for(j=(i+1); jnext_cpu) { continue; @@ -2160,9 +2239,9 @@ process_file(char *filename) FILE *io; int i; int line_at, not_done; - pid_t pid_of_command=0; + pid_t pid_of_command = 0; - if (filename == NULL) { + if (filename == NULL) { io = my_popen(command, "r", &pid_of_command); if (io == NULL) { printf("Can't popen the command %s\n", command); @@ -2172,7 +2251,7 @@ process_file(char *filename) io = fopen(filename, "r"); if (io == NULL) { printf("Can't process file %s err:%d\n", - filename, errno); + filename, errno); return; } } @@ -2190,15 +2269,15 @@ process_file(char *filename) lace_cpus_together(); print_header(); if (verbose) { - for (i=0; itype == TYPE_VALUE_PMC) { - if(add_it_to(vars, alloced_pmcs, at->name)) { + if (add_it_to(vars, alloced_pmcs, at->name)) { alloced_pmcs++; } } at = at->next; } /* Now we have a unique list in vars so create our command */ - mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ - for(i=0; i= 2) { - for (i=0; i