histogram: move nvme/perf histogram code to a common header

Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: Ia0083365b2da63cb38aebb9f7bbc02f4dfd1ae94 Reviewed-on: https://review.gerrithub.io/365263 Tested-by: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
2017-06-13 08:48:55 -07:00 · 2017-06-13 08:48:55 -07:00 · 5044e4f65a
commit 5044e4f65a
parent ad20a6ddc2
2 changed files with 231 additions and 127 deletions
--- a/examples/nvme/perf/perf.c
+++ b/examples/nvme/perf/perf.c
@ -43,6 +43,7 @@
 #include "spdk/queue.h"
 #include "spdk/string.h"
 #include "spdk/nvme_intel.h"
+#include "spdk/histogram_data.h"

 #if HAVE_LIBAIO
 #include <libaio.h>
@ -81,41 +82,6 @@ struct ns_entry {
 	char			name[1024];
 };

-/*
- * Latency tracking is done with ranges of bucket arrays.  The bucket
- * for any given I/O is determined solely by the TSC delta - any
- * translation to microseconds is only done after the test is finished
- * and statistics are printed.
- *
- * Each range has a number of buckets determined by NUM_BUCKETS_PER_RANGE
- * which is 128.  The buckets in ranges 0 and 1 each map to one specific
- * TSC delta.  The buckets in subsequent ranges each map to twice as many
- * TSC deltas as buckets in the range before it:
- *
- * Range 0:  1 TSC each - 128 buckets cover 0 to 127 (2^7-1)
- * Range 1:  1 TSC each - 128 buckets cover 128 to 255 (2^8-1)
- * Range 2:  2 TSC each - 128 buckets cover 256 to 511 (2^9-1)
- * Range 3:  4 TSC each - 128 buckets cover 512 to 1023 (2^10-1)
- * Range 4:  8 TSC each - 128 buckets cover 1024 to 2047 (2^11-1)
- * Range 5: 16 TSC each - 128 buckets cover 2048 to 4095 (2^12-1)
- * ...
- * Range 55: 2^54 TSC each - 128 buckets cover 2^61 to 2^62-1
- * Range 56: 2^55 TSC each - 128 buckets cover 2^62 to 2^63-1
- * Range 57: 2^56 TSC each - 128 buckets cover 2^63 to 2^64-1
- *
- * On a 2.3GHz processor, this strategy results in 50ns buckets in the
- * 7-14us range (sweet spot for Intel Optane SSD latency testing).
- *
- * Buckets can be made more granular by increasing BUCKET_SHIFT.  This
- * comes at the cost of additional storage per namespace context to
- * store the bucket data.
- */
-#define BUCKET_SHIFT 7
-#define BUCKET_LSB (64 - BUCKET_SHIFT)
-#define NUM_BUCKETS_PER_RANGE (1ULL << BUCKET_SHIFT)
-#define BUCKET_MASK (NUM_BUCKETS_PER_RANGE - 1)
-#define NUM_BUCKET_RANGES (BUCKET_LSB + 1)
-
 static const double g_latency_cutoffs[] = {
 	0.01,
 	0.10,
@ -157,7 +123,7 @@ struct ns_worker_ctx {

 	struct ns_worker_ctx	*next;

-	uint64_t		bucket[NUM_BUCKET_RANGES][NUM_BUCKETS_PER_RANGE];
+	struct spdk_histogram_data	histogram;
 };

 struct perf_task {
@ -215,63 +181,6 @@ static int g_aio_optind; /* Index of first AIO filename in argv */
 static void
 task_complete(struct perf_task *task);

-static uint32_t
-get_bucket_range(uint64_t tsc)
-{
-	uint32_t clz, range;
-
-	assert(tsc != 0);
-
-	clz = __builtin_clzll(tsc);
-
-	if (clz <= BUCKET_LSB) {
-		range = BUCKET_LSB - clz;
-	} else {
-		range = 0;
-	}
-
-	return range;
-}
-
-static uint32_t
-get_bucket_index(uint64_t tsc, uint32_t range)
-{
-	uint32_t shift;
-
-	if (range == 0) {
-		shift = 0;
-	} else {
-		shift = range - 1;
-	}
-
-	return (tsc >> shift) & BUCKET_MASK;
-}
-
-static double
-get_us_from_bucket(uint32_t range, uint32_t index)
-{
-	uint64_t tsc;
-
-	index += 1;
-	if (range > 0) {
-		tsc = 1ULL << (range + BUCKET_SHIFT - 1);
-		tsc += (uint64_t)index << (range - 1);
-	} else {
-		tsc = index;
-	}
-
-	return (double)tsc * 1000 * 1000 / g_tsc_rate;
-}
-
-static void
-track_latency(struct ns_worker_ctx *ns_ctx, uint64_t tsc)
-{
-	uint32_t range = get_bucket_range(tsc);
-	uint32_t index = get_bucket_index(tsc, range);
-
-	ns_ctx->bucket[range][index]++;
-}
-
 static void
 register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
 {
@ -607,7 +516,7 @@ task_complete(struct perf_task *task)
 		ns_ctx->max_tsc = tsc_diff;
 	}
 	if (g_latency_sw_tracking_level > 0) {
-		track_latency(ns_ctx, tsc_diff);
+		spdk_histogram_data_tally(&ns_ctx->histogram, tsc_diff);
 	}
 	rte_mempool_put(task_pool, task);

@ -792,6 +701,41 @@ static void usage(char *program_name)
 	printf("\t[-i shared memory group ID]\n");
 }

+static void
+check_cutoff(void *ctx, uint64_t start, uint64_t end, uint64_t count,
+	     uint64_t total, uint64_t so_far)
+{
+	double so_far_pct;
+	double **cutoff = ctx;
+
+	if (count == 0) {
+		return;
+	}
+
+	so_far_pct = (double)so_far / total;
+	while (so_far_pct >= **cutoff && **cutoff > 0) {
+		printf("%8.4f%% : %9.3fus\n", **cutoff * 100, (double)end * 1000 * 1000 / g_tsc_rate);
+		(*cutoff)++;
+	}
+}
+
+static void
+print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count,
+	     uint64_t total, uint64_t so_far)
+{
+	double so_far_pct;
+
+	if (count == 0) {
+		return;
+	}
+
+	so_far_pct = (double)so_far * 100 / total;
+	printf("%9.3f - %9.3f: %9.4f%%  (%9ju)\n",
+	       (double)start * 1000 * 1000 / g_tsc_rate,
+	       (double)end * 1000 * 1000 / g_tsc_rate,
+	       so_far_pct, count);
+}
+
 static void
 print_performance(void)
 {
@ -858,27 +802,13 @@ print_performance(void)
 	while (worker) {
 		ns_ctx = worker->ns_ctx;
 		while (ns_ctx) {
-			uint64_t i, j, so_far = 0;
-			double so_far_pct = 0, bucket = 0;
 			const double *cutoff = g_latency_cutoffs;

 			printf("Summary latency data for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore);
 			printf("=================================================================================\n");

-			for (i = 0; i < NUM_BUCKET_RANGES; i++) {
-				for (j = 0; j < NUM_BUCKETS_PER_RANGE; j++) {
-					so_far += ns_ctx->bucket[i][j];
-					so_far_pct = (double)so_far / total_io_completed;
-					bucket = get_us_from_bucket(i, j);
-					if (ns_ctx->bucket[i][j] == 0) {
-						continue;
-					}
-					while (so_far_pct >= *cutoff && *cutoff > 0) {
-						printf("%8.4f%% : %9.3fus\n", *cutoff * 100, bucket);
-						cutoff++;
-					}
-				}
-			}
+			spdk_histogram_data_iterate(&ns_ctx->histogram, check_cutoff, &cutoff);
+
 			printf("\n");
 			ns_ctx = ns_ctx->next;
 		}
@ -893,27 +823,11 @@ print_performance(void)
 	while (worker) {
 		ns_ctx = worker->ns_ctx;
 		while (ns_ctx) {
-			uint64_t i, j, so_far = 0;
-			float so_far_pct = 0;
-			double last_bucket, bucket = 0;
-
 			printf("Latency histogram for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore);
 			printf("==============================================================================\n");
 			printf("       Range in us     Cumulative    IO count\n");

-			for (i = 0; i < NUM_BUCKET_RANGES; i++) {
-				for (j = 0; j < NUM_BUCKETS_PER_RANGE; j++) {
-					so_far += ns_ctx->bucket[i][j];
-					so_far_pct = (float)so_far * 100 / total_io_completed;
-					last_bucket = bucket;
-					bucket = get_us_from_bucket(i, j);
-					if (ns_ctx->bucket[i][j] == 0) {
-						continue;
-					}
-					printf("%9.3f - %9.3f: %9.4f%%  (%9ju)\n",
-					       last_bucket, bucket, so_far_pct, ns_ctx->bucket[i][j]);
-				}
-			}
+			spdk_histogram_data_iterate(&ns_ctx->histogram, print_bucket, NULL);
 			printf("\n");
 			ns_ctx = ns_ctx->next;
 		}
@ -1376,6 +1290,7 @@ associate_workers_with_ns(void)
 		ns_ctx->min_tsc = UINT64_MAX;
 		ns_ctx->entry = entry;
 		ns_ctx->next = worker->ns_ctx;
+		spdk_histogram_data_reset(&ns_ctx->histogram);
 		worker->ns_ctx = ns_ctx;

 		worker = worker->next;
--- a/include/spdk/histogram_data.h
+++ b/include/spdk/histogram_data.h
@ -0,0 +1,189 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright (c) Intel Corporation.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file
+ * Generic histogram library
+ */
+
+#ifndef _SPDK_HISTOGRAM_DATA_H_
+#define _SPDK_HISTOGRAM_DATA_H_
+
+#include "spdk/stdinc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SPDK_HISTOGRAM_BUCKET_SHIFT		7
+#define SPDK_HISTOGRAM_BUCKET_LSB		(64 - SPDK_HISTOGRAM_BUCKET_SHIFT)
+#define SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE	(1ULL << SPDK_HISTOGRAM_BUCKET_SHIFT)
+#define SPDK_HISTOGRAM_BUCKET_MASK		(SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE - 1)
+#define SPDK_HISTOGRAM_NUM_BUCKET_RANGES	(SPDK_HISTOGRAM_BUCKET_LSB + 1)
+
+/*
+ * SPDK histograms are implemented using ranges of bucket arrays.  The most common usage
+ * model is using TSC datapoints to capture an I/O latency histogram.  For this usage model,
+ * the histogram tracks only TSC deltas - any translation to microseconds is done by the
+ * histogram user calling spdk_histogram_data_iterate() to iterate over the buckets to perform
+ * the translations.
+ *
+ * Each range has a number of buckets determined by SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE
+ * which is 128.  The buckets in ranges 0 and 1 each map to one specific datapoint value.
+ * The buckets in subsequent ranges each map to twice as many datapoint values as buckets
+ * in the range before it:
+ *
+ * Range 0:  1 value each  - 128 buckets cover 0 to 127 (2^7-1)
+ * Range 1:  1 value each  - 128 buckets cover 128 to 255 (2^8-1)
+ * Range 2:  2 values each - 128 buckets cover 256 to 511 (2^9-1)
+ * Range 3:  4 values each - 128 buckets cover 512 to 1023 (2^10-1)
+ * Range 4:  8 values each - 128 buckets cover 1024 to 2047 (2^11-1)
+ * Range 5: 16 values each - 128 buckets cover 2048 to 4095 (2^12-1)
+ * ...
+ * Range 55: 2^54 values each - 128 buckets cover 2^61 to 2^62-1
+ * Range 56: 2^55 values each - 128 buckets cover 2^62 to 2^63-1
+ * Range 57: 2^56 values each - 128 buckets cover 2^63 to 2^64-1
+ *
+ * On a 2.3GHz processor, this strategy results in 50ns buckets in the 7-14us range (sweet
+ * spot for Intel Optane SSD latency testing).
+ *
+ * Buckets can be made more granular by increasing SPDK_HISTOGRAM_BUCKET_SHIFT.  This
+ * comes at the cost of additional storage per namespace context to store the bucket data.
+ */
+
+struct spdk_histogram_data {
+
+	uint64_t	bucket[SPDK_HISTOGRAM_NUM_BUCKET_RANGES][SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE];
+
+};
+
+static inline void
+spdk_histogram_data_reset(struct spdk_histogram_data *histogram)
+{
+	memset(histogram, 0, sizeof(*histogram));
+}
+
+static inline uint32_t
+__spdk_histogram_data_get_bucket_range(uint64_t datapoint)
+{
+	uint32_t clz, range;
+
+	assert(datapoint != 0);
+
+	clz = __builtin_clzll(datapoint);
+
+	if (clz <= SPDK_HISTOGRAM_BUCKET_LSB) {
+		range = SPDK_HISTOGRAM_BUCKET_LSB - clz;
+	} else {
+		range = 0;
+	}
+
+	return range;
+}
+
+static inline uint32_t
+__spdk_histogram_data_get_bucket_index(uint64_t datapoint, uint32_t range)
+{
+	uint32_t shift;
+
+	if (range == 0) {
+		shift = 0;
+	} else {
+		shift = range - 1;
+	}
+
+	return (datapoint >> shift) & SPDK_HISTOGRAM_BUCKET_MASK;
+}
+
+static inline void
+spdk_histogram_data_tally(struct spdk_histogram_data *histogram, uint64_t datapoint)
+{
+	uint32_t range = __spdk_histogram_data_get_bucket_range(datapoint);
+	uint32_t index = __spdk_histogram_data_get_bucket_index(datapoint, range);
+
+	histogram->bucket[range][index]++;
+}
+
+static inline uint64_t
+__spdk_histogram_data_get_bucket_start(uint32_t range, uint32_t index)
+{
+	uint64_t bucket;
+
+	index += 1;
+	if (range > 0) {
+		bucket = 1ULL << (range + SPDK_HISTOGRAM_BUCKET_SHIFT - 1);
+		bucket += (uint64_t)index << (range - 1);
+	} else {
+		bucket = index;
+	}
+
+	return bucket;
+}
+
+typedef void (*spdk_histogram_data_fn)(void *ctx, uint64_t start, uint64_t end, uint64_t count,
+				       uint64_t total, uint64_t so_far);
+
+static inline void
+spdk_histogram_data_iterate(const struct spdk_histogram_data *histogram,
+			    spdk_histogram_data_fn fn, void *ctx)
+{
+	uint64_t i, j, count, so_far, total;
+	uint64_t bucket, last_bucket;
+
+	total = 0;
+
+	for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES; i++) {
+		for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE; j++) {
+			total += histogram->bucket[i][j];
+		}
+	}
+
+	so_far = 0;
+	bucket = 0;
+
+	for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES; i++) {
+		for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE; j++) {
+			count = histogram->bucket[i][j];
+			so_far += count;
+			last_bucket = bucket;
+			bucket = __spdk_histogram_data_get_bucket_start(i, j);
+			fn(ctx, last_bucket, bucket, count, total, so_far);
+		}
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif