histogram: move nvme/perf histogram code to a common header
Signed-off-by: Jim Harris <james.r.harris@intel.com> Change-Id: Ia0083365b2da63cb38aebb9f7bbc02f4dfd1ae94 Reviewed-on: https://review.gerrithub.io/365263 Tested-by: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Daniel Verkamp <daniel.verkamp@intel.com>
This commit is contained in:
parent
ad20a6ddc2
commit
5044e4f65a
@ -43,6 +43,7 @@
|
||||
#include "spdk/queue.h"
|
||||
#include "spdk/string.h"
|
||||
#include "spdk/nvme_intel.h"
|
||||
#include "spdk/histogram_data.h"
|
||||
|
||||
#if HAVE_LIBAIO
|
||||
#include <libaio.h>
|
||||
@ -81,41 +82,6 @@ struct ns_entry {
|
||||
char name[1024];
|
||||
};
|
||||
|
||||
/*
|
||||
* Latency tracking is done with ranges of bucket arrays. The bucket
|
||||
* for any given I/O is determined solely by the TSC delta - any
|
||||
* translation to microseconds is only done after the test is finished
|
||||
* and statistics are printed.
|
||||
*
|
||||
* Each range has a number of buckets determined by NUM_BUCKETS_PER_RANGE
|
||||
* which is 128. The buckets in ranges 0 and 1 each map to one specific
|
||||
* TSC delta. The buckets in subsequent ranges each map to twice as many
|
||||
* TSC deltas as buckets in the range before it:
|
||||
*
|
||||
* Range 0: 1 TSC each - 128 buckets cover 0 to 127 (2^7-1)
|
||||
* Range 1: 1 TSC each - 128 buckets cover 128 to 255 (2^8-1)
|
||||
* Range 2: 2 TSC each - 128 buckets cover 256 to 511 (2^9-1)
|
||||
* Range 3: 4 TSC each - 128 buckets cover 512 to 1023 (2^10-1)
|
||||
* Range 4: 8 TSC each - 128 buckets cover 1024 to 2047 (2^11-1)
|
||||
* Range 5: 16 TSC each - 128 buckets cover 2048 to 4095 (2^12-1)
|
||||
* ...
|
||||
* Range 55: 2^54 TSC each - 128 buckets cover 2^61 to 2^62-1
|
||||
* Range 56: 2^55 TSC each - 128 buckets cover 2^62 to 2^63-1
|
||||
* Range 57: 2^56 TSC each - 128 buckets cover 2^63 to 2^64-1
|
||||
*
|
||||
* On a 2.3GHz processor, this strategy results in 50ns buckets in the
|
||||
* 7-14us range (sweet spot for Intel Optane SSD latency testing).
|
||||
*
|
||||
* Buckets can be made more granular by increasing BUCKET_SHIFT. This
|
||||
* comes at the cost of additional storage per namespace context to
|
||||
* store the bucket data.
|
||||
*/
|
||||
#define BUCKET_SHIFT 7
|
||||
#define BUCKET_LSB (64 - BUCKET_SHIFT)
|
||||
#define NUM_BUCKETS_PER_RANGE (1ULL << BUCKET_SHIFT)
|
||||
#define BUCKET_MASK (NUM_BUCKETS_PER_RANGE - 1)
|
||||
#define NUM_BUCKET_RANGES (BUCKET_LSB + 1)
|
||||
|
||||
static const double g_latency_cutoffs[] = {
|
||||
0.01,
|
||||
0.10,
|
||||
@ -157,7 +123,7 @@ struct ns_worker_ctx {
|
||||
|
||||
struct ns_worker_ctx *next;
|
||||
|
||||
uint64_t bucket[NUM_BUCKET_RANGES][NUM_BUCKETS_PER_RANGE];
|
||||
struct spdk_histogram_data histogram;
|
||||
};
|
||||
|
||||
struct perf_task {
|
||||
@ -215,63 +181,6 @@ static int g_aio_optind; /* Index of first AIO filename in argv */
|
||||
static void
|
||||
task_complete(struct perf_task *task);
|
||||
|
||||
static uint32_t
|
||||
get_bucket_range(uint64_t tsc)
|
||||
{
|
||||
uint32_t clz, range;
|
||||
|
||||
assert(tsc != 0);
|
||||
|
||||
clz = __builtin_clzll(tsc);
|
||||
|
||||
if (clz <= BUCKET_LSB) {
|
||||
range = BUCKET_LSB - clz;
|
||||
} else {
|
||||
range = 0;
|
||||
}
|
||||
|
||||
return range;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
get_bucket_index(uint64_t tsc, uint32_t range)
|
||||
{
|
||||
uint32_t shift;
|
||||
|
||||
if (range == 0) {
|
||||
shift = 0;
|
||||
} else {
|
||||
shift = range - 1;
|
||||
}
|
||||
|
||||
return (tsc >> shift) & BUCKET_MASK;
|
||||
}
|
||||
|
||||
static double
|
||||
get_us_from_bucket(uint32_t range, uint32_t index)
|
||||
{
|
||||
uint64_t tsc;
|
||||
|
||||
index += 1;
|
||||
if (range > 0) {
|
||||
tsc = 1ULL << (range + BUCKET_SHIFT - 1);
|
||||
tsc += (uint64_t)index << (range - 1);
|
||||
} else {
|
||||
tsc = index;
|
||||
}
|
||||
|
||||
return (double)tsc * 1000 * 1000 / g_tsc_rate;
|
||||
}
|
||||
|
||||
static void
|
||||
track_latency(struct ns_worker_ctx *ns_ctx, uint64_t tsc)
|
||||
{
|
||||
uint32_t range = get_bucket_range(tsc);
|
||||
uint32_t index = get_bucket_index(tsc, range);
|
||||
|
||||
ns_ctx->bucket[range][index]++;
|
||||
}
|
||||
|
||||
static void
|
||||
register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
|
||||
{
|
||||
@ -607,7 +516,7 @@ task_complete(struct perf_task *task)
|
||||
ns_ctx->max_tsc = tsc_diff;
|
||||
}
|
||||
if (g_latency_sw_tracking_level > 0) {
|
||||
track_latency(ns_ctx, tsc_diff);
|
||||
spdk_histogram_data_tally(&ns_ctx->histogram, tsc_diff);
|
||||
}
|
||||
rte_mempool_put(task_pool, task);
|
||||
|
||||
@ -792,6 +701,41 @@ static void usage(char *program_name)
|
||||
printf("\t[-i shared memory group ID]\n");
|
||||
}
|
||||
|
||||
static void
|
||||
check_cutoff(void *ctx, uint64_t start, uint64_t end, uint64_t count,
|
||||
uint64_t total, uint64_t so_far)
|
||||
{
|
||||
double so_far_pct;
|
||||
double **cutoff = ctx;
|
||||
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
so_far_pct = (double)so_far / total;
|
||||
while (so_far_pct >= **cutoff && **cutoff > 0) {
|
||||
printf("%8.4f%% : %9.3fus\n", **cutoff * 100, (double)end * 1000 * 1000 / g_tsc_rate);
|
||||
(*cutoff)++;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count,
|
||||
uint64_t total, uint64_t so_far)
|
||||
{
|
||||
double so_far_pct;
|
||||
|
||||
if (count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
so_far_pct = (double)so_far * 100 / total;
|
||||
printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n",
|
||||
(double)start * 1000 * 1000 / g_tsc_rate,
|
||||
(double)end * 1000 * 1000 / g_tsc_rate,
|
||||
so_far_pct, count);
|
||||
}
|
||||
|
||||
static void
|
||||
print_performance(void)
|
||||
{
|
||||
@ -858,27 +802,13 @@ print_performance(void)
|
||||
while (worker) {
|
||||
ns_ctx = worker->ns_ctx;
|
||||
while (ns_ctx) {
|
||||
uint64_t i, j, so_far = 0;
|
||||
double so_far_pct = 0, bucket = 0;
|
||||
const double *cutoff = g_latency_cutoffs;
|
||||
|
||||
printf("Summary latency data for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore);
|
||||
printf("=================================================================================\n");
|
||||
|
||||
for (i = 0; i < NUM_BUCKET_RANGES; i++) {
|
||||
for (j = 0; j < NUM_BUCKETS_PER_RANGE; j++) {
|
||||
so_far += ns_ctx->bucket[i][j];
|
||||
so_far_pct = (double)so_far / total_io_completed;
|
||||
bucket = get_us_from_bucket(i, j);
|
||||
if (ns_ctx->bucket[i][j] == 0) {
|
||||
continue;
|
||||
}
|
||||
while (so_far_pct >= *cutoff && *cutoff > 0) {
|
||||
printf("%8.4f%% : %9.3fus\n", *cutoff * 100, bucket);
|
||||
cutoff++;
|
||||
}
|
||||
}
|
||||
}
|
||||
spdk_histogram_data_iterate(&ns_ctx->histogram, check_cutoff, &cutoff);
|
||||
|
||||
printf("\n");
|
||||
ns_ctx = ns_ctx->next;
|
||||
}
|
||||
@ -893,27 +823,11 @@ print_performance(void)
|
||||
while (worker) {
|
||||
ns_ctx = worker->ns_ctx;
|
||||
while (ns_ctx) {
|
||||
uint64_t i, j, so_far = 0;
|
||||
float so_far_pct = 0;
|
||||
double last_bucket, bucket = 0;
|
||||
|
||||
printf("Latency histogram for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore);
|
||||
printf("==============================================================================\n");
|
||||
printf(" Range in us Cumulative IO count\n");
|
||||
|
||||
for (i = 0; i < NUM_BUCKET_RANGES; i++) {
|
||||
for (j = 0; j < NUM_BUCKETS_PER_RANGE; j++) {
|
||||
so_far += ns_ctx->bucket[i][j];
|
||||
so_far_pct = (float)so_far * 100 / total_io_completed;
|
||||
last_bucket = bucket;
|
||||
bucket = get_us_from_bucket(i, j);
|
||||
if (ns_ctx->bucket[i][j] == 0) {
|
||||
continue;
|
||||
}
|
||||
printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n",
|
||||
last_bucket, bucket, so_far_pct, ns_ctx->bucket[i][j]);
|
||||
}
|
||||
}
|
||||
spdk_histogram_data_iterate(&ns_ctx->histogram, print_bucket, NULL);
|
||||
printf("\n");
|
||||
ns_ctx = ns_ctx->next;
|
||||
}
|
||||
@ -1376,6 +1290,7 @@ associate_workers_with_ns(void)
|
||||
ns_ctx->min_tsc = UINT64_MAX;
|
||||
ns_ctx->entry = entry;
|
||||
ns_ctx->next = worker->ns_ctx;
|
||||
spdk_histogram_data_reset(&ns_ctx->histogram);
|
||||
worker->ns_ctx = ns_ctx;
|
||||
|
||||
worker = worker->next;
|
||||
|
189
include/spdk/histogram_data.h
Normal file
189
include/spdk/histogram_data.h
Normal file
@ -0,0 +1,189 @@
|
||||
/*-
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright (c) Intel Corporation.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* * Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file
|
||||
* Generic histogram library
|
||||
*/
|
||||
|
||||
#ifndef _SPDK_HISTOGRAM_DATA_H_
|
||||
#define _SPDK_HISTOGRAM_DATA_H_
|
||||
|
||||
#include "spdk/stdinc.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SPDK_HISTOGRAM_BUCKET_SHIFT 7
|
||||
#define SPDK_HISTOGRAM_BUCKET_LSB (64 - SPDK_HISTOGRAM_BUCKET_SHIFT)
|
||||
#define SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE (1ULL << SPDK_HISTOGRAM_BUCKET_SHIFT)
|
||||
#define SPDK_HISTOGRAM_BUCKET_MASK (SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE - 1)
|
||||
#define SPDK_HISTOGRAM_NUM_BUCKET_RANGES (SPDK_HISTOGRAM_BUCKET_LSB + 1)
|
||||
|
||||
/*
|
||||
* SPDK histograms are implemented using ranges of bucket arrays. The most common usage
|
||||
* model is using TSC datapoints to capture an I/O latency histogram. For this usage model,
|
||||
* the histogram tracks only TSC deltas - any translation to microseconds is done by the
|
||||
* histogram user calling spdk_histogram_data_iterate() to iterate over the buckets to perform
|
||||
* the translations.
|
||||
*
|
||||
* Each range has a number of buckets determined by SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE
|
||||
* which is 128. The buckets in ranges 0 and 1 each map to one specific datapoint value.
|
||||
* The buckets in subsequent ranges each map to twice as many datapoint values as buckets
|
||||
* in the range before it:
|
||||
*
|
||||
* Range 0: 1 value each - 128 buckets cover 0 to 127 (2^7-1)
|
||||
* Range 1: 1 value each - 128 buckets cover 128 to 255 (2^8-1)
|
||||
* Range 2: 2 values each - 128 buckets cover 256 to 511 (2^9-1)
|
||||
* Range 3: 4 values each - 128 buckets cover 512 to 1023 (2^10-1)
|
||||
* Range 4: 8 values each - 128 buckets cover 1024 to 2047 (2^11-1)
|
||||
* Range 5: 16 values each - 128 buckets cover 2048 to 4095 (2^12-1)
|
||||
* ...
|
||||
* Range 55: 2^54 values each - 128 buckets cover 2^61 to 2^62-1
|
||||
* Range 56: 2^55 values each - 128 buckets cover 2^62 to 2^63-1
|
||||
* Range 57: 2^56 values each - 128 buckets cover 2^63 to 2^64-1
|
||||
*
|
||||
* On a 2.3GHz processor, this strategy results in 50ns buckets in the 7-14us range (sweet
|
||||
* spot for Intel Optane SSD latency testing).
|
||||
*
|
||||
* Buckets can be made more granular by increasing SPDK_HISTOGRAM_BUCKET_SHIFT. This
|
||||
* comes at the cost of additional storage per namespace context to store the bucket data.
|
||||
*/
|
||||
|
||||
struct spdk_histogram_data {
|
||||
|
||||
uint64_t bucket[SPDK_HISTOGRAM_NUM_BUCKET_RANGES][SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE];
|
||||
|
||||
};
|
||||
|
||||
static inline void
|
||||
spdk_histogram_data_reset(struct spdk_histogram_data *histogram)
|
||||
{
|
||||
memset(histogram, 0, sizeof(*histogram));
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
__spdk_histogram_data_get_bucket_range(uint64_t datapoint)
|
||||
{
|
||||
uint32_t clz, range;
|
||||
|
||||
assert(datapoint != 0);
|
||||
|
||||
clz = __builtin_clzll(datapoint);
|
||||
|
||||
if (clz <= SPDK_HISTOGRAM_BUCKET_LSB) {
|
||||
range = SPDK_HISTOGRAM_BUCKET_LSB - clz;
|
||||
} else {
|
||||
range = 0;
|
||||
}
|
||||
|
||||
return range;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
__spdk_histogram_data_get_bucket_index(uint64_t datapoint, uint32_t range)
|
||||
{
|
||||
uint32_t shift;
|
||||
|
||||
if (range == 0) {
|
||||
shift = 0;
|
||||
} else {
|
||||
shift = range - 1;
|
||||
}
|
||||
|
||||
return (datapoint >> shift) & SPDK_HISTOGRAM_BUCKET_MASK;
|
||||
}
|
||||
|
||||
static inline void
|
||||
spdk_histogram_data_tally(struct spdk_histogram_data *histogram, uint64_t datapoint)
|
||||
{
|
||||
uint32_t range = __spdk_histogram_data_get_bucket_range(datapoint);
|
||||
uint32_t index = __spdk_histogram_data_get_bucket_index(datapoint, range);
|
||||
|
||||
histogram->bucket[range][index]++;
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
__spdk_histogram_data_get_bucket_start(uint32_t range, uint32_t index)
|
||||
{
|
||||
uint64_t bucket;
|
||||
|
||||
index += 1;
|
||||
if (range > 0) {
|
||||
bucket = 1ULL << (range + SPDK_HISTOGRAM_BUCKET_SHIFT - 1);
|
||||
bucket += (uint64_t)index << (range - 1);
|
||||
} else {
|
||||
bucket = index;
|
||||
}
|
||||
|
||||
return bucket;
|
||||
}
|
||||
|
||||
typedef void (*spdk_histogram_data_fn)(void *ctx, uint64_t start, uint64_t end, uint64_t count,
|
||||
uint64_t total, uint64_t so_far);
|
||||
|
||||
static inline void
|
||||
spdk_histogram_data_iterate(const struct spdk_histogram_data *histogram,
|
||||
spdk_histogram_data_fn fn, void *ctx)
|
||||
{
|
||||
uint64_t i, j, count, so_far, total;
|
||||
uint64_t bucket, last_bucket;
|
||||
|
||||
total = 0;
|
||||
|
||||
for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES; i++) {
|
||||
for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE; j++) {
|
||||
total += histogram->bucket[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
so_far = 0;
|
||||
bucket = 0;
|
||||
|
||||
for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES; i++) {
|
||||
for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE; j++) {
|
||||
count = histogram->bucket[i][j];
|
||||
so_far += count;
|
||||
last_bucket = bucket;
|
||||
bucket = __spdk_histogram_data_get_bucket_start(i, j);
|
||||
fn(ctx, last_bucket, bucket, count, total, so_far);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user