#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "gen.hh" #include "ntr.h" #include "defs.hh" #include "nm.hh" #include "storage/io_gen.hh" static inline uint64_t get_cur_ts_nano() { return std::chrono::duration_cast (std::chrono::high_resolution_clock::now().time_since_epoch()).count(); } /* * We'll use this struct to gather housekeeping hello_context to pass between * our events and callbacks. */ static constexpr unsigned long MAX_SPEC_LEN = 32; static constexpr unsigned long MAX_DEV_NAME_LEN = 32; static constexpr unsigned long MAX_OUTPUT_FILE_LEN = 256; struct options_t { // args int verbosity = NTR_LEVEL_DEFAULT; int num_threads = 1; unsigned long cpumask = 1; char pattern_spec[MAX_SPEC_LEN] = "R,100"; char ia_spec[MAX_SPEC_LEN] = "fixed"; unsigned int time = 5; unsigned int warmup = 2; unsigned int queue_depth = 1; char dev_name[MAX_DEV_NAME_LEN] = "Malloc0"; char driver_name[MAX_DEV_NAME_LEN] = "bdev"; unsigned int read_pct = 0; io_generator_address_mode addr_mode = IOGEN_ADDR_UNIFORM_RANDOM; char output_file[MAX_OUTPUT_FILE_LEN] = "output.txt"; unsigned long req_size = 4096; unsigned long rps = 0; }; std::atomic worker_thread_init_cnt(0); std::atomic worker_thread_stop_cnt(0); std::atomic worker_start(0); std::atomic worker_stop(0); static struct options_t options; struct io_record { uint64_t start_ts; uint64_t end_ts; }; struct io_request { uint64_t start_ts; io_generator_opcode op; char * user_buf; char * dma_buf; struct aiocb aio; }; struct thread_context { unsigned int tid; unsigned int coreid; unsigned int sockid; pthread_t sys_thread; int disk_fd; unsigned long start_region_offset; unsigned long start_region_length; /* modified by worker threads */ std::list *io_records; uint64_t overhead_avg; uint32_t overhead_cnt; uint64_t overhead_max; uint64_t overhead_min; }; static void dump_options() { ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: Options:\n" " dev name: %s\n" " driver name: %s\n" " worker threads: 0x%lx\n" " number of threads: %d\n" " IO request size: %lu\n" " IO requests per second: %lu\n" " IO pattern: %s\n" " IO queue depth: %d\n" " IO addressing mode: %d\n" " read percent: %u\n" " inter-arrival dist: %s\n" " run time: %d\n" " warmup time: %d\n" " output file: %s\n", options.dev_name, options.driver_name, options.cpumask, options.num_threads, options.req_size, options.rps, options.pattern_spec, options.queue_depth, options.addr_mode, options.read_pct, options.ia_spec, options.time, options.warmup, options.output_file ); } static void usage() { fprintf(stdout, " -V(VV): verbose mode\n" " -D: dev name\n" " -k: driver to use (default bdev)\n" " -a: worker threads spec (0x3 = spawn 2 threads on core 1 & 2)\n" " -b: IO request size\n" " -q: IO requests per second\n" " -P: IO request pattern\n" " -Q: IO request queue depth\n" " -I: inter-arrival time distribution\n" " -t: total run time\n" " -w: warm up time\n" " -o: latency response output file\n"); } static void * worker_thread_main(void * arg) { int rc = 0; auto *ctx = (struct thread_context *)arg; std::list free_ios; std::list prog_ios; Generator * ia_gen = nullptr; io_generator * io_gen = nullptr; struct io_generator_ctx io_ctx; uint64_t next_ts; uint64_t a_offset; uint64_t last_loop_ts = 0; ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init...\n", ctx->tid); // create io request objects for (unsigned int i = 0; i < options.queue_depth; i++) { auto buf = (char *)nm_malloc(ctx->sockid, options.req_size); auto user_buf = (char *)nm_malloc(ctx->sockid, options.req_size); if (buf == nullptr || user_buf == nullptr) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate buffers!\n", ctx->tid); rc = ENOMEM; goto cleanup; } auto io_req = new struct io_request; io_req->dma_buf = buf; io_req->user_buf = user_buf; io_req->aio.aio_fildes = ctx->disk_fd; io_req->aio.aio_nbytes = options.req_size; io_req->aio.aio_buf = buf; io_req->aio.aio_sigevent.sigev_notify = SIGEV_NONE; io_req->aio.aio_reqprio = 0; free_ios.push_back(io_req); } // init thread local states ia_gen = createGenerator(options.ia_spec); if (ia_gen == nullptr) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid); rc = EINVAL; goto cleanup; } ia_gen->set_lambda((double)options.rps / (double)(options.num_threads)); io_gen = new io_generator(options.req_size, ctx->start_region_length, options.read_pct, options.addr_mode); if (io_gen == nullptr) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid); rc = EINVAL; goto cleanup; } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init complete.\n", ctx->tid); worker_thread_init_cnt.fetch_add(1); ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: waiting for start...\n", ctx->tid); while (worker_start.load() == 0) {} ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: started...\n", ctx->tid); /* random delay 0-100 us */ usleep(nm_get_uptime_ns() % 100); next_ts = get_cur_ts_nano(); while (true) { uint64_t cur_ts = get_cur_ts_nano(); if (last_loop_ts > 0) { uint64_t overhead = cur_ts - last_loop_ts; if (ctx->overhead_max < overhead) { ctx->overhead_max = overhead; } if (ctx->overhead_min > overhead) { ctx->overhead_min = overhead; } ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + overhead; ctx->overhead_cnt++; ctx->overhead_avg /= ctx->overhead_cnt; } last_loop_ts = cur_ts; // process io completion auto itr = prog_ios.begin(); while (itr != prog_ios.end()) { int err; struct io_request * ioreq = *itr; if ((err = aio_error(&ioreq->aio)) != EINPROGRESS) { if (err == 0) { auto rec = new struct io_record; rec->start_ts = ioreq->start_ts; rec->end_ts = cur_ts; ctx->io_records->push_back(rec); if (ioreq->op == IOGEN_READ) { memcpy(ioreq->user_buf, ioreq->dma_buf, options.req_size); } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d : completed io request type %d\n", ctx->tid, ioreq->op); } else { ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: aio failed with %d...\n", ctx->tid, err); } if (aio_return(&ioreq->aio) == -1) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: aio_return failed with %d...\n", ctx->tid, errno); exit(errno); } /* cleanup */ itr = prog_ios.erase(itr); free_ios.push_back(ioreq); } else { ++itr; } } if (worker_stop.load() == 1) { if (free_ios.size() >= options.queue_depth) { break; } } else { if (!free_ios.empty()) { auto io_req = free_ios.front(); cur_ts = get_cur_ts_nano(); if (cur_ts >= next_ts) { io_gen->issue(&io_ctx, io_req->dma_buf); a_offset = io_ctx.offset + ctx->start_region_offset; ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: issuing IO type %d at offset 0x%lx size 0x%lx...\n", ctx->tid, io_ctx.op, a_offset, io_ctx.size); io_req->start_ts = cur_ts; io_req->op = io_ctx.op; io_req->aio.aio_offset = a_offset; if(io_ctx.op == IOGEN_READ) { rc = aio_read(&io_req->aio); } else { rc = aio_write(&io_req->aio); } if (rc != 0) { ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: failed to issue io %d, retrying...\n", ctx->tid, errno); } else { free_ios.pop_front(); prog_ios.push_back(io_req); next_ts = next_ts + ia_gen->generate() * S2NS; } } } } } cleanup: while (!free_ios.empty()) { auto req = free_ios.front(); free_ios.pop_front(); nm_free(ctx->sockid, req->dma_buf); nm_free(ctx->sockid, req->user_buf); } if (ia_gen != nullptr) { delete ia_gen; } if (io_gen != nullptr) { delete io_gen; } worker_thread_stop_cnt.fetch_add(1); ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: stopped...\n", ctx->tid); return nullptr; } static void parse_pattern(char * pattern, unsigned int * read_pct, io_generator_address_mode * addr_mode) { char * token = strtok(pattern, ","); if (strcmp(token, "M") == 0) { *addr_mode = IOGEN_ADDR_MONOTONIC_INCREASING; } else { *addr_mode = IOGEN_ADDR_UNIFORM_RANDOM; } token = strtok(nullptr, ","); *read_pct = strtoull(token, nullptr, 10); } static void birb_main() { int rc = 0; std::list worker_threads; std::ofstream output_file; unsigned long record_cutoff_time = 0; unsigned long current_s = 0; unsigned int total_reqs = 0; unsigned int tid = 0; unsigned long per_thread_cap = 0; int cur_core; int disk_fd; off_t disk_size; u_int disk_sec_size; /* initialize driver */ ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initializing device driver for device %s\n", options.dev_name); disk_fd = open(options.dev_name, O_RDWR | O_DIRECT); if (disk_fd == -1) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open device - %d\n", errno); exit(errno); } rc = ioctl(disk_fd, DIOCGMEDIASIZE, &disk_size); if (rc == -1) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to obtain disk size - %d\n", errno); exit(errno); } rc = ioctl(disk_fd, DIOCGSECTORSIZE, &disk_sec_size); if (rc == -1) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to obtain disk sector size - %d\n", errno); exit(errno); } per_thread_cap = disk_size / options.num_threads; ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initialized device with capacity %zu bytes ~= %zu MB, sector %u bytes\n", disk_size, disk_size / 1024 / 1024, disk_sec_size); parse_pattern(options.pattern_spec, &options.read_pct, &options.addr_mode); dump_options(); output_file.open(options.output_file, std::ofstream::out); if (!output_file) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open output file %s\n", options.output_file); rc = EINVAL; goto end; } cur_core = cmask_get_next_cpu(&options.cpumask); while(cur_core != NEXT_CPU_NULL) { auto * ctx = new struct thread_context; memset(ctx, 0, sizeof(struct thread_context)); if (ctx == NULL) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc thread ctx.\n"); exit(ENOMEM); } ctx->tid = tid++; ctx->sockid = nm_get_node_from_core(cur_core); ctx->coreid = cur_core; ctx->io_records = new std::list(); ctx->start_region_length = per_thread_cap; ctx->start_region_offset = per_thread_cap * ctx->tid; ctx->disk_fd = disk_fd; // create sys thread pthread_attr_t attr; cpuset_t scpuset; CPU_ZERO(&scpuset); CPU_SET(cur_core, &scpuset); pthread_attr_init(&attr); pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &scpuset); rc = pthread_create(&ctx->sys_thread, &attr, worker_thread_main, ctx); if (rc != 0) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create sys thread: %d\n", rc); rc = EINVAL; goto end; } worker_threads.push_back(ctx); ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: created worker thread %d on core %d socket %d offset 0x%lx length %ld\n", ctx->tid, cur_core, ctx->sockid, ctx->start_region_offset, ctx->start_region_length); cur_core = cmask_get_next_cpu(&options.cpumask); } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: waiting for worker thread init...\n"); while(worker_thread_init_cnt.load() < options.num_threads) { } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: starting worker threads...\n"); worker_start.store(1); /* main event loop */ while(current_s < options.time) { if (current_s >= options.warmup && record_cutoff_time == 0) { record_cutoff_time = get_cur_ts_nano(); } usleep(1 * S2US); current_s++; } ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: stopping worker threads...\n"); worker_stop.store(1); while(worker_thread_stop_cnt.load() < options.num_threads) { } // keep stats for (struct thread_context * tctx : worker_threads) { uint64_t last_ts = 0; uint64_t processed = 0; for (struct io_record * r : *tctx->io_records) { if (r->start_ts >= record_cutoff_time) { if (r->end_ts > last_ts) { last_ts = r->end_ts; } processed++; output_file << r->end_ts - r->start_ts << std::endl; total_reqs++; } } ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: thread %d processed requests: %lu, last request %lu. Overhead - avg %lu min %lu max %lu\n", tctx->tid, processed, last_ts, tctx->overhead_avg, tctx->overhead_min, tctx->overhead_max); } ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: total requests: %u, bytes per second: %lu\n", total_reqs, total_reqs * options.req_size / (options.time - options.warmup)); end: if (disk_fd != -1) { close(disk_fd); } output_file.close(); for (struct thread_context * tctx : worker_threads) { for (struct io_record * r : *tctx->io_records) { delete r; } delete tctx->io_records; delete tctx; } return; } int main(int argc, char **argv) { ntr_init(); ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_INFO); int c; while (( c = getopt(argc, argv, "VD:k:a:b:q:Q:P:I:t:w:o:")) != -1) { switch (c) { case 'V': ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1); break; case 'D': strncpy(options.dev_name, optarg, MAX_DEV_NAME_LEN); break; case 'k': strncpy(options.driver_name, optarg, MAX_DEV_NAME_LEN); break; case 'a': options.cpumask = strtoull(optarg, nullptr, 16); options.num_threads = cmask_get_num_cpus( options.cpumask); if (options.num_threads == 0) { fprintf(stderr, "must run at least one thread\n"); return EINVAL; } break; case 'b': options.req_size = strtoull( optarg, nullptr, 10); break; case 'q': options.rps = strtoull( optarg, nullptr, 10); break; case 'Q': options.queue_depth = strtoull( optarg, nullptr, 10); break; case 'P': strncpy(options.pattern_spec, optarg, MAX_SPEC_LEN); break; case 'I': strncpy(options.ia_spec, optarg, MAX_SPEC_LEN); break; case 't': options.time = strtoull( optarg, nullptr, 10); break; case 'w': options.warmup = strtoull( optarg, nullptr, 10); break; case 'o': strncpy(options.output_file, optarg, MAX_OUTPUT_FILE_LEN); break; case 'h': usage(); exit(0); default: usage(); exit(EINVAL); } } nm_init(options.verbosity); birb_main(); return 0; }