numam/storage/birb.cc

749 lines
19 KiB
C++
Raw Normal View History

2022-03-20 14:17:26 +00:00
#include <sys/endian.h>
#include <sys/errno.h>
#include <sys/types.h>
#include <getopt.h>
#include <pthread.h>
#include <pthread_np.h>
#include <threads.h>
#include <unistd.h>
#include <cerrno>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <chrono>
#include <list>
#include <set>
#include "rte_lcore.h"
#include "spdk/cpuset.h"
#include "spdk/stdinc.h"
#include "spdk/thread.h"
#include "spdk/bdev.h"
#include "spdk/env.h"
#include "spdk/event.h"
#include "spdk/log.h"
#include "spdk/string.h"
#include "spdk/bdev_zone.h"
#include "gen.h"
#include "ntr.h"
#include "defs.h"
#include "nm.h"
#include "storage/io_gen.hh"
static inline uint64_t get_cur_ts_nano()
{
return std::chrono::duration_cast<std::chrono::nanoseconds>
(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
}
/*
* We'll use this struct to gather housekeeping hello_context to pass between
* our events and callbacks.
*/
static constexpr unsigned long MAX_SPEC_LEN = 32;
static constexpr unsigned long MAX_BDEV_NAME_LEN = 32;
static constexpr unsigned long MAX_OUTPUT_FILE_LEN = 256;
struct options_t {
// args
int verbosity = NTR_LEVEL_DEFAULT;
int num_threads = 1;
unsigned long cpumask = 1;
char pattern_spec[MAX_SPEC_LEN];
char ia_spec[MAX_SPEC_LEN];
unsigned int time = 5;
unsigned int warmup = 2;
unsigned int queue_depth = 1;
char bdev_name[MAX_BDEV_NAME_LEN];
char output_file[MAX_OUTPUT_FILE_LEN] = "output.txt";
unsigned long req_size = 4096;
unsigned long rps = 0;
};
struct main_thread_cb_vars {
uint32_t worker_thread_init_cnt;
uint32_t worker_thread_stop_cnt;
};
struct worker_thread_cb_vars {
uint32_t worker_start;
uint32_t worker_stop;
struct thread_context * ctx;
std::list<struct io_request *> * free_ios;
};
static __thread void * cb_vars;
static struct options_t options;
struct io_record {
uint64_t start_ts;
uint64_t end_ts;
};
struct io_request {
uint64_t start_ts;
io_generator_opcode op;
char * user_buf;
char * dma_buf;
};
// thread local states (RO by worker threads)
struct thread_context {
unsigned int tid;
unsigned int coreid;
unsigned int sockid;
pthread_t sys_thread;
char thread_name[32];
struct spdk_thread *main_thread;
struct spdk_bdev *bdev;
struct spdk_bdev_desc *bdev_desc;
struct spdk_thread *s_thread;
const char * ia_gen_desc;
unsigned long start_region_offset;
unsigned long start_region_length;
unsigned int read_pct;
io_generator_address_mode addr_mode;
std::list<io_record *> *io_records;
};
static void dump_options()
{
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: Options:\n"
" bdev name: %s\n"
" worker threads: 0x%lx\n"
" number of threads: %d\n"
" IO request size: %lu\n"
" IO requests per second: %lu\n"
" IO pattern: %s\n"
" IO queue depth: %d\n"
" inter-arrival dist: %s\n"
" run time: %d\n"
" warmup time: %d\n"
" output file: %s\n",
options.bdev_name,
options.cpumask,
options.num_threads,
options.req_size,
options.rps,
options.pattern_spec,
options.queue_depth,
options.ia_spec,
options.time,
options.warmup,
options.output_file
);
}
static void usage()
{
fprintf(stdout,
" -V(VV): verbose mode\n"
" -D: bdev name\n"
" -a: worker threads spec (0x3 = spawn 2 threads on core 1 & 2)\n"
" -b: IO request size\n"
" -q: IO requests per second\n"
" -P: IO request pattern\n"
" -Q: IO request queue depth\n"
" -I: inter-arrival time distribution\n"
" -t: total run time\n"
" -w: warm up time\n"
" -o: latency response output file\n");
}
static int parse_arg(int c, char *arg)
{
switch (c) {
case 'V':
ntr_set_level(NTR_DEP_USER1,
ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 'D':
strncpy(options.bdev_name, arg, MAX_BDEV_NAME_LEN);
break;
case 'a':
options.cpumask = strtoull(optarg, nullptr, 16);
options.num_threads = cmask_get_num_cpus(
options.cpumask);
if (options.num_threads == 0) {
fprintf(stderr,
"must run at least one thread\n");
return EINVAL;
}
break;
case 'b':
options.req_size = strtoull(
optarg, nullptr, 10);
break;
case 'q':
options.rps = strtoull(
optarg, nullptr, 10);
break;
case 'Q':
options.queue_depth = strtoull(
optarg, nullptr, 10);
break;
case 'P':
strncpy(options.pattern_spec, optarg, MAX_SPEC_LEN);
break;
case 'I':
strncpy(options.ia_spec, optarg, MAX_SPEC_LEN);
break;
case 't':
options.time = strtoull(
optarg, nullptr, 10);
break;
case 'w':
options.warmup = strtoull(
optarg, nullptr, 10);
break;
case 'o':
strncpy(options.output_file, optarg, MAX_OUTPUT_FILE_LEN);
break;
case 'h':
default:
return EINVAL;
}
return 0;
}
/*
* Callback function for io completion.
*/
static void
worker_io_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
auto vars = (struct worker_thread_cb_vars *)cb_vars;
auto req = (struct io_request *)cb_arg;
spdk_bdev_free_io(bdev_io);
uint64_t end_ts = get_cur_ts_nano();
if (!success) {
// XXX: print warning for errors for now
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d <worker_read_complete>: io request failed\n", vars->ctx->tid);
} else {
auto rec = new struct io_record;
rec->start_ts = req->start_ts;
rec->end_ts = end_ts;
vars->ctx->io_records->push_back(rec);
if (req->op == IOGEN_READ) {
memcpy(req->user_buf, req->dma_buf, options.req_size);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d <worker_read_complete>: completed io request type %d\n", vars->ctx->tid, req->op);
}
vars->free_ios->push_back(req);
}
static void
bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev * bdev UNUSED,
void * event_ctx UNUSED)
{
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "unsupported bdev event: type %d\n", type);
}
static void
cb_notify_main_init(void * arg)
{
auto * ctx = (struct thread_context *)arg;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_main_init: from thread %d to main.\n", ctx->tid);
auto * vars = (struct main_thread_cb_vars *) cb_vars;
vars->worker_thread_init_cnt++;
}
static void
cb_notify_main_stop(void * arg)
{
auto * ctx = (struct thread_context *)arg;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_main_stop: from thread %d to main.\n", ctx->tid);
auto * vars = (struct main_thread_cb_vars *) cb_vars;
vars->worker_thread_stop_cnt++;
}
static void
cb_notify_worker_start(void * arg)
{
auto * ctx = (struct thread_context *)arg;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_worker_start: from main to thread %d.\n", ctx->tid);
auto * vars = (struct worker_thread_cb_vars *) cb_vars;
vars->worker_start = 1;
}
static void
cb_notify_worker_stop(void * arg)
{
auto * ctx = (struct thread_context *)arg;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_worker_stop: from main to thread %d.\n", ctx->tid);
auto * vars = (struct worker_thread_cb_vars *) cb_vars;
vars->worker_stop = 1;
}
static void
main_thread_cb_vars_init(struct main_thread_cb_vars * vars)
{
vars->worker_thread_init_cnt = 0;
vars->worker_thread_stop_cnt = 0;
}
static void
worker_thread_cb_vars_init(struct worker_thread_cb_vars * vars, struct thread_context * ctx,
std::list<struct io_request *> * free_ios)
{
vars->worker_start = 0;
vars->worker_stop = 0;
vars->ctx = ctx;
vars->free_ios = free_ios;
}
static void *
worker_thread_main(void * arg)
{
int rc = 0;
struct worker_thread_cb_vars vars;
auto *ctx = (struct thread_context *)arg;
struct spdk_io_channel *io_channel = nullptr;
const unsigned long buf_align = spdk_bdev_get_buf_align(ctx->bdev);
std::list<struct io_request *> free_ios;
Generator * ia_gen = nullptr;
io_generator * io_gen = nullptr;
struct io_generator_ctx io_ctx;
uint64_t next_ts;
uint64_t a_offset;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init...\n", ctx->tid);
// associate current thread with spdk thread
spdk_set_thread(ctx->s_thread);
// create io request objects
for (unsigned int i = 0; i < options.queue_depth; i++) {
auto dma_buf = (char *)spdk_dma_zmalloc_socket(options.req_size, buf_align, NULL, ctx->sockid);
auto user_buf = (char *)nm_malloc(ctx->sockid, options.req_size);
if (dma_buf == nullptr || user_buf == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate buffers!\n", ctx->tid);
rc = ENOMEM;
goto cleanup;
}
auto io_req = new struct io_request;
io_req->dma_buf = dma_buf;
io_req->user_buf = user_buf;
free_ios.push_back(io_req);
}
// init thread local states
worker_thread_cb_vars_init(&vars, ctx, &free_ios);
cb_vars = &vars;
// obtain io channel
io_channel = spdk_bdev_get_io_channel(ctx->bdev_desc);
if (io_channel == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not create bdev I/O channel!\n", ctx->tid);
rc = EINVAL;
goto cleanup;
}
ia_gen = createGenerator(ctx->ia_gen_desc);
if (ia_gen == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
rc = EINVAL;
goto cleanup;
}
ia_gen->set_lambda((double)options.rps / (double)(options.num_threads));
io_gen = new io_generator(options.req_size, ctx->start_region_length, ctx->read_pct, ctx->addr_mode);
if (io_gen == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
rc = EINVAL;
goto cleanup;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init complete.\n", ctx->tid);
if ((rc = spdk_thread_send_msg(ctx->main_thread, cb_notify_main_init, ctx)) != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not send message %d\n", ctx->tid, rc);
goto cleanup;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: waiting for start...\n", ctx->tid);
while (vars.worker_start != 1) {
spdk_thread_poll(spdk_get_thread(), 0, 0);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: started...\n", ctx->tid);
/* random delay 0-100 ms */
usleep(((rand() * nm_get_uptime_ns()) % 100) * 1000);
next_ts = get_cur_ts_nano();
while (true) {
spdk_thread_poll(spdk_get_thread(), 0, 0);
if (vars.worker_stop != 0) {
if (free_ios.size() >= options.queue_depth) {
break;
}
} else {
if (!free_ios.empty()) {
auto io_req = free_ios.front();
uint64_t cur_ts = get_cur_ts_nano();
if (cur_ts >= next_ts) {
io_gen->issue(&io_ctx, io_req->dma_buf);
a_offset = io_ctx.offset + ctx->start_region_offset;
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: issuing IO type %d at offset 0x%lx size 0x%lx...\n", ctx->tid, io_ctx.op, a_offset, io_ctx.size);
io_req->start_ts = cur_ts;
io_req->op = io_ctx.op;
if(io_ctx.op == IOGEN_READ) {
rc = spdk_bdev_read(ctx->bdev_desc, io_channel, io_req->dma_buf,
a_offset, io_ctx.size, worker_io_complete, io_req);
} else {
rc = spdk_bdev_write(ctx->bdev_desc, io_channel, io_req->dma_buf,
a_offset, io_ctx.size, worker_io_complete, io_req);
}
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: failed to issue io %d, retrying...", ctx->tid, rc);
} else {
free_ios.pop_front();
next_ts = next_ts + ia_gen->generate() * S2NS;
}
}
}
}
}
cleanup:
while (!free_ios.empty()) {
auto req = free_ios.front();
free_ios.pop_front();
spdk_dma_free(req->dma_buf);
nm_free(ctx->sockid, req->user_buf);
}
if (io_channel != nullptr) {
spdk_put_io_channel(io_channel);
}
if (ia_gen != nullptr) {
delete ia_gen;
}
if (io_gen != nullptr) {
delete io_gen;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: stopped...\n", ctx->tid);
if (rc != 0) {
spdk_app_stop(rc);
}
if ((rc = spdk_thread_send_msg(ctx->main_thread, cb_notify_main_stop, ctx)) != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not send message %d\n", ctx->tid, rc);
}
return nullptr;
}
static void
parse_pattern(char * pattern, int * read_pct, io_generator_address_mode * addr_mode)
{
char * token = strtok(pattern, ",");
if (strcmp(token, "M") == 0) {
*addr_mode = IOGEN_ADDR_MONOTONIC_INCREASING;
} else {
*addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
}
token = strtok(nullptr, ",");
*read_pct = strtoull(token, nullptr, 10);
}
static void
print_all_bdev()
{
struct spdk_bdev * cur = spdk_bdev_first();
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: All registered block devices: ");
while(cur != NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "%s, ", spdk_bdev_get_name(cur));
cur = spdk_bdev_next(cur);
}
}
static void
birb_main(void * arg1 UNUSED)
{
int rc = 0;
struct spdk_bdev * bdev;
struct spdk_bdev_desc * bdev_desc;
std::list<struct thread_context *> worker_threads;
std::ofstream output_file;
uint64_t total_reqs = 0;
io_generator_address_mode addr_mode = IOGEN_ADDR_MONOTONIC_INCREASING;
int read_pct = 0;
struct main_thread_cb_vars vars;
main_thread_cb_vars_init(&vars);
cb_vars = &vars;
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: successfully started the application\n");
dump_options();
/* process spec */
parse_pattern(options.pattern_spec, &read_pct, &addr_mode);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: read percent %d, address mode %d\n", read_pct, addr_mode);
/*
* There can be many bdevs configured, but this application will only use
* the one input by the user at runtime.
*
* Open the bdev by calling spdk_bdev_open_ext() with its name.
* The function will return a descriptor
*/
print_all_bdev();
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: opening block device %s\n", options.bdev_name);
rc = spdk_bdev_open_ext(options.bdev_name, true, bdev_event_cb, NULL, &bdev_desc);
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open bdev: %d\n", rc);
spdk_app_stop(rc);
return;
}
output_file.open(options.output_file, std::ofstream::out);
if (!output_file) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open output file %s\n", options.output_file);
spdk_app_stop(EINVAL);
return;
}
/* A bdev pointer is valid while the bdev is opened. */
bdev = spdk_bdev_desc_get_bdev(bdev_desc);
const uint32_t blk_size = spdk_bdev_get_block_size(bdev);
const unsigned long bdev_capacity = blk_size * spdk_bdev_get_num_blocks(bdev);
const unsigned long per_thread_cap = bdev_capacity / options.num_threads;
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: bdev block size %d bytes, # blocks %lu, per_thread_cap %lu\n", blk_size, spdk_bdev_get_num_blocks(bdev), per_thread_cap);
unsigned long record_cutoff_time = 0;
unsigned long current_s = 0;
/* create worker threads */
unsigned int tid = 0;
int cur_core = cmask_get_next_cpu(&options.cpumask);
struct spdk_cpuset * cpuset = spdk_cpuset_alloc();
if (cpuset == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc cpuset\n");
rc = ENOMEM;
goto end;
}
while(cur_core != NEXT_CPU_NULL) {
auto * ctx = new struct thread_context;
memset(ctx, 0, sizeof(struct thread_context));
if (ctx == NULL) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc thread ctx.\n");
spdk_app_stop(ENOMEM);
return;
}
ctx->tid = tid++;
ctx->main_thread = spdk_get_thread();
snprintf(ctx->thread_name, 32, "birb_wrk_%d", ctx->tid);
ctx->sockid = rte_lcore_to_socket_id(cur_core);
ctx->coreid = cur_core;
ctx->bdev = bdev;
ctx->bdev_desc = bdev_desc;
ctx->io_records = new std::list<struct io_record *>();
ctx->start_region_length = per_thread_cap;
ctx->start_region_offset = per_thread_cap * ctx->tid;
ctx->ia_gen_desc = options.ia_spec;
ctx->addr_mode = addr_mode;
ctx->read_pct = read_pct;
// create spdk thread
spdk_cpuset_zero(cpuset);
spdk_cpuset_set_cpu(cpuset, cur_core, true);
ctx->s_thread = spdk_thread_create(ctx->thread_name, cpuset);
if (ctx->s_thread == nullptr) {
rc = ENOMEM;
goto end;
}
// create sys thread
pthread_attr_t attr;
cpuset_t scpuset;
CPU_ZERO(&scpuset);
CPU_SET(cur_core, &scpuset);
pthread_attr_init(&attr);
pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &scpuset);
rc = pthread_create(&ctx->sys_thread, NULL, worker_thread_main, ctx);
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create sys thread: %d\n", rc);
spdk_app_stop(EINVAL);
return;
}
worker_threads.push_back(ctx);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: created worker thread %d on core %d socket %d offset 0x%lx length %ld\n", ctx->tid, cur_core, ctx->sockid,
ctx->start_region_offset,
ctx->start_region_length);
cur_core = cmask_get_next_cpu(&options.cpumask);
}
spdk_cpuset_free(cpuset);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: waiting for worker thread preinit...\n");
while(vars.worker_thread_init_cnt < (uint32_t)options.num_threads) {
spdk_thread_poll(spdk_get_thread(), 0, 0);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: starting worker threads...\n");
for (struct thread_context * tctx : worker_threads) {
rc = spdk_thread_send_msg(tctx->s_thread, cb_notify_worker_start, tctx);
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to send message %d\n", rc);
goto end;
}
}
while(current_s < options.time) {
if (current_s >= options.warmup && record_cutoff_time == 0) {
record_cutoff_time = get_cur_ts_nano();
}
usleep(1 * S2US);
current_s++;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: stopping worker threads...\n");
for (struct thread_context * tctx : worker_threads) {
rc = spdk_thread_send_msg(tctx->s_thread, cb_notify_worker_stop, tctx);
if (rc != 0) {
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to send message %d\n", rc);
goto end;
}
}
while(vars.worker_thread_stop_cnt < (uint32_t)options.num_threads) {
spdk_thread_poll(spdk_get_thread(), 0, 0);
}
// keep stats
for (struct thread_context * tctx : worker_threads) {
for (struct io_record * r : *tctx->io_records) {
if (r->start_ts >= record_cutoff_time) {
output_file << r->end_ts - r->start_ts << std::endl;
total_reqs++;
}
}
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: total requests: %lu, bytes per second: %lu\n", total_reqs, total_reqs * options.req_size / (options.time - options.warmup));
end:
output_file.close();
for (struct thread_context * tctx : worker_threads) {
for (struct io_record * r : *tctx->io_records) {
delete r;
}
spdk_thread_destroy(tctx->s_thread);
delete tctx->io_records;
delete tctx;
}
if (bdev_desc != nullptr) {
spdk_bdev_close(bdev_desc);
}
exit(0);
spdk_app_stop(rc);
return;
}
int
main(int argc, char **argv)
{
struct spdk_app_opts opts = {};
int rc = 0;
ntr_init();
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_INFO);
/* Set default values in opts structure. */
spdk_app_opts_init(&opts, sizeof(opts));
opts.name = "birb";
/*
* Parse built-in SPDK command line parameters as well
* as our custom one(s).
*/
if ((rc = spdk_app_parse_args(argc, argv, &opts, "VD:a:b:q:Q:P:I:t:w:o:", NULL, parse_arg,
usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) {
exit(rc);
}
nm_init(options.verbosity);
/*
* spdk_app_start() will initialize the SPDK framework, call hello_start(),
* and then block until spdk_app_stop() is called (or if an initialization
* error occurs, spdk_app_start() will return with rc even without calling
* hello_start().
*/
rc = spdk_app_start(&opts, birb_main, NULL);
if (rc) {
SPDK_ERRLOG("ERROR starting application\n");
}
/* At this point either spdk_app_stop() was called, or spdk_app_start()
* failed because of internal error.
*/
/* Gracefully close out all of the SPDK subsystems. */
spdk_app_fini();
return rc;
}