#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "rte_lcore.h" #include "spdk/cpuset.h" #include "spdk/stdinc.h" #include "spdk/thread.h" #include "spdk/env.h" #include "spdk/event.h" #include "spdk/log.h" #include "spdk/string.h" #include "gen.hh" #include "ntr.h" #include "defs.hh" #include "nm.hh" #include "storage/io_gen.hh" #include "storage/drivers/driver.hh" #include "storage/drivers/bdev.hh" #include "storage/drivers/nvme.hh" static inline uint64_t get_cur_ts_nano() { return std::chrono::duration_cast (std::chrono::high_resolution_clock::now().time_since_epoch()).count(); } /* * We'll use this struct to gather housekeeping hello_context to pass between * our events and callbacks. */ static constexpr unsigned long MAX_SPEC_LEN = 32; static constexpr unsigned long MAX_DEV_NAME_LEN = 32; static constexpr unsigned long MAX_OUTPUT_FILE_LEN = 256; struct options_t { // args int verbosity = NTR_LEVEL_DEFAULT; int num_threads = 1; unsigned long cpumask = 1; char pattern_spec[MAX_SPEC_LEN] = "R,100"; char ia_spec[MAX_SPEC_LEN] = "fixed"; unsigned int time = 5; unsigned int warmup = 2; unsigned int queue_depth = 1; char dev_name[MAX_DEV_NAME_LEN] = "Malloc0"; char driver_name[MAX_DEV_NAME_LEN] = "bdev"; unsigned int read_pct = 0; io_generator_address_mode addr_mode = IOGEN_ADDR_UNIFORM_RANDOM; char output_file[MAX_OUTPUT_FILE_LEN] = "output.txt"; unsigned long req_size = 4096; unsigned long rps = 0; }; struct main_thread_cb_vars { uint32_t worker_thread_init_cnt; uint32_t worker_thread_stop_cnt; }; struct worker_thread_cb_vars { uint32_t worker_start; uint32_t worker_stop; struct thread_context * ctx; std::list * free_ios; }; static __thread void * cb_vars; static struct options_t options; struct io_record { uint64_t start_ts; uint64_t end_ts; }; struct io_request { uint64_t start_ts; io_generator_opcode op; char * user_buf; char * dma_buf; }; struct thread_context { unsigned int tid; unsigned int coreid; unsigned int sockid; pthread_t sys_thread; struct spdk_thread * main_thread; birb_driver * driver; unsigned long start_region_offset; unsigned long start_region_length; /* modified by worker threads */ struct spdk_thread * sp_thread; std::list *io_records; uint64_t overhead_avg; uint32_t overhead_cnt; uint64_t overhead_max; uint64_t overhead_min; }; static void dump_options() { ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: Options:\n" " dev name: %s\n" " driver name: %s\n" " worker threads: 0x%lx\n" " number of threads: %d\n" " IO request size: %lu\n" " IO requests per second: %lu\n" " IO pattern: %s\n" " IO queue depth: %d\n" " IO addressing mode: %d\n" " read percent: %u\n" " inter-arrival dist: %s\n" " run time: %d\n" " warmup time: %d\n" " output file: %s\n", options.dev_name, options.driver_name, options.cpumask, options.num_threads, options.req_size, options.rps, options.pattern_spec, options.queue_depth, options.addr_mode, options.read_pct, options.ia_spec, options.time, options.warmup, options.output_file ); } static void usage() { fprintf(stdout, " -V(VV): verbose mode\n" " -D: dev name\n" " -k: driver to use (default bdev)\n" " -a: worker threads spec (0x3 = spawn 2 threads on core 1 & 2)\n" " -b: IO request size\n" " -q: IO requests per second\n" " -P: IO request pattern\n" " -Q: IO request queue depth\n" " -I: inter-arrival time distribution\n" " -t: total run time\n" " -w: warm up time\n" " -o: latency response output file\n"); } static int parse_arg(int c, char *arg) { switch (c) { case 'V': ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1); break; case 'D': strncpy(options.dev_name, arg, MAX_DEV_NAME_LEN); break; case 'k': strncpy(options.driver_name, arg, MAX_DEV_NAME_LEN); break; case 'a': options.cpumask = strtoull(optarg, nullptr, 16); options.num_threads = cmask_get_num_cpus( options.cpumask); if (options.num_threads == 0) { fprintf(stderr, "must run at least one thread\n"); return EINVAL; } break; case 'b': options.req_size = strtoull( optarg, nullptr, 10); break; case 'q': options.rps = strtoull( optarg, nullptr, 10); break; case 'Q': options.queue_depth = strtoull( optarg, nullptr, 10); break; case 'P': strncpy(options.pattern_spec, optarg, MAX_SPEC_LEN); break; case 'I': strncpy(options.ia_spec, optarg, MAX_SPEC_LEN); break; case 't': options.time = strtoull( optarg, nullptr, 10); break; case 'w': options.warmup = strtoull( optarg, nullptr, 10); break; case 'o': strncpy(options.output_file, optarg, MAX_OUTPUT_FILE_LEN); break; case 'h': default: return EINVAL; } return 0; } static birb_driver * birb_create_driver(const char * driver_name, void * context) { if (strcmp(driver_name, "bdev") == 0) { return new birb_bdev_driver(reinterpret_cast(context)); } else if (strcmp(driver_name, "nvme") == 0) { return new birb_nvme_driver(reinterpret_cast(context)); } else { return nullptr; } } static birb_driver_thread_context * birb_create_thread_context(birb_driver * driver) { if (driver->get_type() == birb_driver::BIRB_DRV_BDEV) { return new birb_bdev_thread_context(dynamic_cast(driver)); } else if (driver->get_type() == birb_driver::BIRB_DRV_NVME) { return new birb_nvme_thread_context(dynamic_cast(driver)); } else { return nullptr; } } static void birb_destroy_driver(birb_driver * drv) { delete drv; } static void birb_destroy_thread_context(birb_driver_thread_context * ctx) { delete ctx; } /* * Callback function for io completion. */ static void worker_io_complete(bool success, void *cb_arg) { auto vars = (struct worker_thread_cb_vars *)cb_vars; auto req = (struct io_request *)cb_arg; uint64_t end_ts = get_cur_ts_nano(); if (!success) { // XXX: print warning for errors for now ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d : io request failed\n", vars->ctx->tid); } else { auto rec = new struct io_record; rec->start_ts = req->start_ts; rec->end_ts = end_ts; vars->ctx->io_records->push_back(rec); if (req->op == IOGEN_READ) { memcpy(req->user_buf, req->dma_buf, options.req_size); } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d : completed io request type %d\n", vars->ctx->tid, req->op); } vars->free_ios->push_back(req); } static void cb_notify_main_init(void * arg) { auto * ctx = (struct thread_context *)arg; ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_main_init: from thread %d to main.\n", ctx->tid); auto * vars = (struct main_thread_cb_vars *) cb_vars; vars->worker_thread_init_cnt++; } static void cb_notify_main_stop(void * arg) { auto * ctx = (struct thread_context *)arg; ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_main_stop: from thread %d to main.\n", ctx->tid); auto * vars = (struct main_thread_cb_vars *) cb_vars; vars->worker_thread_stop_cnt++; } static void cb_notify_worker_start(void * arg) { auto * ctx = (struct thread_context *)arg; ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_worker_start: from main to thread %d.\n", ctx->tid); auto * vars = (struct worker_thread_cb_vars *) cb_vars; vars->worker_start = 1; } static void cb_notify_worker_stop(void * arg) { auto * ctx = (struct thread_context *)arg; ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "cb_notify_worker_stop: from main to thread %d.\n", ctx->tid); auto * vars = (struct worker_thread_cb_vars *) cb_vars; vars->worker_stop = 1; } static void main_thread_cb_vars_init(struct main_thread_cb_vars * vars) { vars->worker_thread_init_cnt = 0; vars->worker_thread_stop_cnt = 0; } static void worker_thread_cb_vars_init(struct worker_thread_cb_vars * vars, struct thread_context * ctx, std::list * free_ios) { vars->worker_start = 0; vars->worker_stop = 0; vars->ctx = ctx; vars->free_ios = free_ios; } static void * worker_thread_main(void * arg) { int rc = 0; constexpr static unsigned int SPDK_THREAD_NAME_SZ = 16; struct worker_thread_cb_vars vars; auto *ctx = (struct thread_context *)arg; birb_driver_thread_context * driver_thread_ctx; std::list free_ios; char spdk_thread_name[SPDK_THREAD_NAME_SZ]; struct spdk_cpuset * cpuset; Generator * ia_gen = nullptr; io_generator * io_gen = nullptr; struct io_generator_ctx io_ctx; uint64_t next_ts; uint64_t a_offset; uint64_t last_loop_ts = 0; ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init...\n", ctx->tid); ctx->overhead_avg = 0; ctx->overhead_cnt = 0; ctx->overhead_max = 0; ctx->overhead_min = UINT64_MAX; // create spdk thread cpuset = spdk_cpuset_alloc(); if (cpuset == nullptr) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc cpuset\n"); rc = ENOMEM; goto cleanup; } spdk_cpuset_zero(cpuset); spdk_cpuset_set_cpu(cpuset, ctx->coreid, true); snprintf(spdk_thread_name, SPDK_THREAD_NAME_SZ, "birb_worker_%u", ctx->tid); ctx->sp_thread = spdk_thread_create(spdk_thread_name, cpuset); if (ctx->sp_thread == nullptr) { rc = ENOMEM; goto cleanup; } spdk_set_thread(ctx->sp_thread); // create thread context driver_thread_ctx = birb_create_thread_context(ctx->driver); if (driver_thread_ctx == nullptr || driver_thread_ctx->get_status() != birb_driver::BIRB_SUCCESS) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not create thread context!\n", ctx->tid); rc = EINVAL; goto cleanup; } // create io request objects for (unsigned int i = 0; i < options.queue_depth; i++) { auto dma_buf = (char *)spdk_dma_zmalloc_socket(options.req_size, ctx->driver->get_align(), NULL, ctx->sockid); auto user_buf = (char *)nm_malloc(ctx->sockid, options.req_size); if (dma_buf == nullptr || user_buf == nullptr) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate buffers!\n", ctx->tid); rc = ENOMEM; goto cleanup; } auto io_req = new struct io_request; io_req->dma_buf = dma_buf; io_req->user_buf = user_buf; free_ios.push_back(io_req); } // init thread local states worker_thread_cb_vars_init(&vars, ctx, &free_ios); cb_vars = &vars; ia_gen = createGenerator(options.ia_spec); if (ia_gen == nullptr) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid); rc = EINVAL; goto cleanup; } ia_gen->set_lambda((double)options.rps / (double)(options.num_threads)); io_gen = new io_generator(options.req_size, ctx->start_region_length, options.read_pct, options.addr_mode); if (io_gen == nullptr) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid); rc = EINVAL; goto cleanup; } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init complete.\n", ctx->tid); if ((rc = spdk_thread_send_msg(ctx->main_thread, cb_notify_main_init, ctx)) != 0) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not send message %d\n", ctx->tid, rc); goto cleanup; } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: waiting for start...\n", ctx->tid); while (vars.worker_start != 1) { spdk_thread_poll(spdk_get_thread(), 0, 0); } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: started...\n", ctx->tid); /* random delay 0-100 us */ usleep(nm_get_uptime_ns() % 100); next_ts = get_cur_ts_nano(); while (true) { uint64_t cur_loop_ts = get_cur_ts_nano(); if (last_loop_ts > 0) { uint64_t overhead = cur_loop_ts - last_loop_ts; if (ctx->overhead_max < overhead) { ctx->overhead_max = overhead; } if (ctx->overhead_min > overhead) { ctx->overhead_min = overhead; } ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + overhead; ctx->overhead_cnt++; ctx->overhead_avg /= ctx->overhead_cnt; } last_loop_ts = cur_loop_ts; spdk_thread_poll(spdk_get_thread(), 0, 0); driver_thread_ctx->poll(); if (vars.worker_stop != 0) { if (free_ios.size() >= options.queue_depth) { break; } } else { if (!free_ios.empty()) { auto io_req = free_ios.front(); uint64_t cur_ts = get_cur_ts_nano(); if (cur_ts >= next_ts) { io_gen->issue(&io_ctx, io_req->dma_buf); a_offset = io_ctx.offset + ctx->start_region_offset; ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: issuing IO type %d at offset 0x%lx size 0x%lx...\n", ctx->tid, io_ctx.op, a_offset, io_ctx.size); io_req->start_ts = cur_ts; io_req->op = io_ctx.op; if(io_ctx.op == IOGEN_READ) { rc = driver_thread_ctx->read(a_offset, io_ctx.size, io_req->dma_buf, worker_io_complete, io_req); } else { rc = driver_thread_ctx->write(a_offset, io_ctx.size, io_req->dma_buf, worker_io_complete, io_req); } if (rc != 0) { ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: failed to issue io %d, retrying...", ctx->tid, rc); } else { free_ios.pop_front(); next_ts = next_ts + ia_gen->generate() * S2NS; } } } } } cleanup: while (!free_ios.empty()) { auto req = free_ios.front(); free_ios.pop_front(); spdk_dma_free(req->dma_buf); nm_free(ctx->sockid, req->user_buf); } if (ia_gen != nullptr) { delete ia_gen; } if (io_gen != nullptr) { delete io_gen; } if (cpuset != nullptr) { spdk_cpuset_free(cpuset); } if (driver_thread_ctx != nullptr) { birb_destroy_thread_context(driver_thread_ctx); } if (rc == 0) { if ((rc = spdk_thread_send_msg(ctx->main_thread, cb_notify_main_stop, ctx)) != 0) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not send message %d\n", ctx->tid, rc); } } spdk_thread_exit(ctx->sp_thread); while (!spdk_thread_is_exited(ctx->sp_thread)) { spdk_thread_poll(ctx->sp_thread, 0, 0); }; if (ctx->sp_thread != nullptr) { spdk_set_thread(nullptr); spdk_thread_destroy(ctx->sp_thread); } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: stopped...\n", ctx->tid); if (rc != 0) { spdk_app_stop(rc); } return nullptr; } static void parse_pattern(char * pattern, unsigned int * read_pct, io_generator_address_mode * addr_mode) { char * token = strtok(pattern, ","); if (strcmp(token, "M") == 0) { *addr_mode = IOGEN_ADDR_MONOTONIC_INCREASING; } else { *addr_mode = IOGEN_ADDR_UNIFORM_RANDOM; } token = strtok(nullptr, ","); *read_pct = strtoull(token, nullptr, 10); } static void birb_main(void * arg1 UNUSED) { int rc = 0; std::list worker_threads; std::ofstream output_file; struct main_thread_cb_vars vars; birb_driver * drv = nullptr; unsigned long record_cutoff_time = 0; unsigned long current_s = 0; unsigned int total_reqs = 0; unsigned int tid = 0; unsigned long per_thread_cap = 0; int cur_core; /* initialize driver */ ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initializing device driver for device %s\n", options.dev_name); drv = birb_create_driver(options.driver_name, options.dev_name); if (drv == nullptr || drv->get_status() != birb_driver::BIRB_SUCCESS) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create device driver.\n"); rc = EINVAL; goto end; } per_thread_cap = drv->get_capacity() / options.num_threads; ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initialized device with capacity %zu bytes ~= %zu MB\n", drv->get_capacity(), drv->get_capacity() / 1024 / 1024); /* misc init */ main_thread_cb_vars_init(&vars); cb_vars = &vars; parse_pattern(options.pattern_spec, &options.read_pct, &options.addr_mode); dump_options(); output_file.open(options.output_file, std::ofstream::out); if (!output_file) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open output file %s\n", options.output_file); rc = EINVAL; goto end; } cur_core = cmask_get_next_cpu(&options.cpumask); while(cur_core != NEXT_CPU_NULL) { auto * ctx = new struct thread_context; memset(ctx, 0, sizeof(struct thread_context)); if (ctx == NULL) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc thread ctx.\n"); spdk_app_stop(ENOMEM); return; } ctx->tid = tid++; ctx->driver = drv; ctx->main_thread = spdk_get_thread(); ctx->sockid = rte_lcore_to_socket_id(cur_core); ctx->coreid = cur_core; ctx->io_records = new std::list(); ctx->start_region_length = per_thread_cap; ctx->start_region_offset = per_thread_cap * ctx->tid; // create sys thread pthread_attr_t attr; cpuset_t scpuset; CPU_ZERO(&scpuset); CPU_SET(cur_core, &scpuset); pthread_attr_init(&attr); pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &scpuset); rc = pthread_create(&ctx->sys_thread, &attr, worker_thread_main, ctx); if (rc != 0) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create sys thread: %d\n", rc); rc = EINVAL; goto end; } worker_threads.push_back(ctx); ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: created worker thread %d on core %d socket %d offset 0x%lx length %ld\n", ctx->tid, cur_core, ctx->sockid, ctx->start_region_offset, ctx->start_region_length); cur_core = cmask_get_next_cpu(&options.cpumask); } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: waiting for worker thread init...\n"); while(vars.worker_thread_init_cnt < (uint32_t)options.num_threads) { spdk_thread_poll(spdk_get_thread(), 0, 0); } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: starting worker threads...\n"); for (struct thread_context * tctx : worker_threads) { rc = spdk_thread_send_msg(tctx->sp_thread, cb_notify_worker_start, tctx); if (rc != 0) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to send message %d\n", rc); goto end; } } /* main event loop */ while(current_s < options.time) { if (current_s >= options.warmup && record_cutoff_time == 0) { record_cutoff_time = get_cur_ts_nano(); } usleep(1 * S2US); current_s++; } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: stopping worker threads...\n"); for (struct thread_context * tctx : worker_threads) { rc = spdk_thread_send_msg(tctx->sp_thread, cb_notify_worker_stop, tctx); if (rc != 0) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to send message %d\n", rc); goto end; } } while(vars.worker_thread_stop_cnt < (uint32_t)options.num_threads) { spdk_thread_poll(spdk_get_thread(), 0, 0); } // keep stats for (struct thread_context * tctx : worker_threads) { uint64_t last_ts = 0; uint64_t processed = 0; for (struct io_record * r : *tctx->io_records) { if (r->start_ts >= record_cutoff_time) { if (r->end_ts > last_ts) { last_ts = r->end_ts; } processed++; output_file << r->end_ts - r->start_ts << std::endl; total_reqs++; } } ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: thread %d processed requests: %lu, last request %lu. Overhead - avg %lu min %lu max %lu\n", tctx->tid, processed, last_ts, tctx->overhead_avg, tctx->overhead_min, tctx->overhead_max); } ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: total requests: %u, bytes per second: %lu\n", total_reqs, total_reqs * options.req_size / (options.time - options.warmup)); end: if (drv != nullptr) { birb_destroy_driver(drv); } output_file.close(); for (struct thread_context * tctx : worker_threads) { for (struct io_record * r : *tctx->io_records) { delete r; } delete tctx->io_records; delete tctx; } exit(0); spdk_app_stop(rc); return; } int main(int argc, char **argv) { struct spdk_app_opts opts = {}; int rc = 0; ntr_init(); ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_INFO); /* Set default values in opts structure. */ spdk_app_opts_init(&opts, sizeof(opts)); opts.name = "birb"; /* * Parse built-in SPDK command line parameters as well * as our custom one(s). */ if ((rc = spdk_app_parse_args(argc, argv, &opts, "VD:k:a:b:q:Q:P:I:t:w:o:", NULL, parse_arg, usage)) != SPDK_APP_PARSE_ARGS_SUCCESS) { exit(rc); } nm_init(options.verbosity); /* * spdk_app_start() will initialize the SPDK framework, call hello_start(), * and then block until spdk_app_stop() is called (or if an initialization * error occurs, spdk_app_start() will return with rc even without calling * hello_start(). */ rc = spdk_app_start(&opts, birb_main, NULL); if (rc) { SPDK_ERRLOG("ERROR starting application\n"); } /* At this point either spdk_app_stop() was called, or spdk_app_start() * failed because of internal error. */ /* Gracefully close out all of the SPDK subsystems. */ spdk_app_fini(); return rc; }