587 lines
15 KiB
C++
587 lines
15 KiB
C++
#include <sys/endian.h>
|
|
#include <sys/errno.h>
|
|
#include <sys/signal.h>
|
|
#include <sys/types.h>
|
|
#include <fcntl.h>
|
|
#include <getopt.h>
|
|
#include <pthread.h>
|
|
#include <pthread_np.h>
|
|
#include <threads.h>
|
|
#include <unistd.h>
|
|
#include <aio.h>
|
|
#include <getopt.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/disk.h>
|
|
|
|
#include <cerrno>
|
|
#include <cstddef>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <chrono>
|
|
#include <list>
|
|
#include <set>
|
|
|
|
#include "gen.hh"
|
|
#include "ntr.h"
|
|
#include "defs.hh"
|
|
#include "nm.hh"
|
|
#include "storage/io_gen.hh"
|
|
|
|
static inline uint64_t get_cur_ts_nano()
|
|
{
|
|
return std::chrono::duration_cast<std::chrono::nanoseconds>
|
|
(std::chrono::high_resolution_clock::now().time_since_epoch()).count();
|
|
}
|
|
|
|
/*
|
|
* We'll use this struct to gather housekeeping hello_context to pass between
|
|
* our events and callbacks.
|
|
*/
|
|
static constexpr unsigned long MAX_SPEC_LEN = 32;
|
|
static constexpr unsigned long MAX_DEV_NAME_LEN = 32;
|
|
static constexpr unsigned long MAX_OUTPUT_FILE_LEN = 256;
|
|
struct options_t {
|
|
// args
|
|
int verbosity = NTR_LEVEL_DEFAULT;
|
|
int num_threads = 1;
|
|
unsigned long cpumask = 1;
|
|
char pattern_spec[MAX_SPEC_LEN] = "R,100";
|
|
char ia_spec[MAX_SPEC_LEN] = "fixed";
|
|
|
|
unsigned int time = 5;
|
|
unsigned int warmup = 2;
|
|
unsigned int queue_depth = 1;
|
|
char dev_name[MAX_DEV_NAME_LEN] = "Malloc0";
|
|
char driver_name[MAX_DEV_NAME_LEN] = "bdev";
|
|
unsigned int read_pct = 0;
|
|
io_generator_address_mode addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
|
|
|
|
char output_file[MAX_OUTPUT_FILE_LEN] = "output.txt";
|
|
|
|
unsigned long req_size = 4096;
|
|
unsigned long rps = 0;
|
|
};
|
|
|
|
|
|
std::atomic<int> worker_thread_init_cnt(0);
|
|
std::atomic<int> worker_thread_stop_cnt(0);
|
|
std::atomic<int> worker_start(0);
|
|
std::atomic<int> worker_stop(0);
|
|
static struct options_t options;
|
|
|
|
struct io_record {
|
|
uint64_t start_ts;
|
|
uint64_t end_ts;
|
|
};
|
|
|
|
struct io_request {
|
|
uint64_t start_ts;
|
|
io_generator_opcode op;
|
|
char * user_buf;
|
|
char * dma_buf;
|
|
struct aiocb aio;
|
|
};
|
|
|
|
struct thread_context {
|
|
unsigned int tid;
|
|
unsigned int coreid;
|
|
unsigned int sockid;
|
|
pthread_t sys_thread;
|
|
int disk_fd;
|
|
|
|
unsigned long start_region_offset;
|
|
unsigned long start_region_length;
|
|
|
|
/* modified by worker threads */
|
|
std::list<io_record *> *io_records;
|
|
uint64_t overhead_avg;
|
|
uint32_t overhead_cnt;
|
|
uint64_t overhead_max;
|
|
uint64_t overhead_min;
|
|
};
|
|
|
|
static void dump_options()
|
|
{
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: Options:\n"
|
|
" dev name: %s\n"
|
|
" driver name: %s\n"
|
|
" worker threads: 0x%lx\n"
|
|
" number of threads: %d\n"
|
|
" IO request size: %lu\n"
|
|
" IO requests per second: %lu\n"
|
|
" IO pattern: %s\n"
|
|
" IO queue depth: %d\n"
|
|
" IO addressing mode: %d\n"
|
|
" read percent: %u\n"
|
|
" inter-arrival dist: %s\n"
|
|
" run time: %d\n"
|
|
" warmup time: %d\n"
|
|
" output file: %s\n",
|
|
options.dev_name,
|
|
options.driver_name,
|
|
options.cpumask,
|
|
options.num_threads,
|
|
options.req_size,
|
|
options.rps,
|
|
options.pattern_spec,
|
|
options.queue_depth,
|
|
options.addr_mode,
|
|
options.read_pct,
|
|
options.ia_spec,
|
|
options.time,
|
|
options.warmup,
|
|
options.output_file
|
|
);
|
|
}
|
|
|
|
static void usage()
|
|
{
|
|
fprintf(stdout,
|
|
" -V(VV): verbose mode\n"
|
|
" -D: dev name\n"
|
|
" -k: driver to use (default bdev)\n"
|
|
" -a: worker threads spec (0x3 = spawn 2 threads on core 1 & 2)\n"
|
|
" -b: IO request size\n"
|
|
" -q: IO requests per second\n"
|
|
" -P: IO request pattern\n"
|
|
" -Q: IO request queue depth\n"
|
|
" -I: inter-arrival time distribution\n"
|
|
" -t: total run time\n"
|
|
" -w: warm up time\n"
|
|
" -o: latency response output file\n");
|
|
}
|
|
|
|
static void *
|
|
worker_thread_main(void * arg)
|
|
{
|
|
int rc = 0;
|
|
|
|
auto *ctx = (struct thread_context *)arg;
|
|
std::list<struct io_request *> free_ios;
|
|
std::list<struct io_request *> prog_ios;
|
|
|
|
Generator * ia_gen = nullptr;
|
|
io_generator * io_gen = nullptr;
|
|
|
|
struct io_generator_ctx io_ctx;
|
|
uint64_t next_ts;
|
|
uint64_t a_offset;
|
|
uint64_t last_loop_ts = 0;
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init...\n", ctx->tid);
|
|
|
|
// create io request objects
|
|
for (unsigned int i = 0; i < options.queue_depth; i++) {
|
|
auto buf = (char *)nm_malloc(ctx->sockid, options.req_size);
|
|
auto user_buf = (char *)nm_malloc(ctx->sockid, options.req_size);
|
|
|
|
if (buf == nullptr || user_buf == nullptr) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate buffers!\n", ctx->tid);
|
|
rc = ENOMEM;
|
|
goto cleanup;
|
|
}
|
|
|
|
auto io_req = new struct io_request;
|
|
io_req->dma_buf = buf;
|
|
io_req->user_buf = user_buf;
|
|
io_req->aio.aio_fildes = ctx->disk_fd;
|
|
io_req->aio.aio_nbytes = options.req_size;
|
|
io_req->aio.aio_buf = buf;
|
|
io_req->aio.aio_sigevent.sigev_notify = SIGEV_NONE;
|
|
io_req->aio.aio_reqprio = 0;
|
|
|
|
free_ios.push_back(io_req);
|
|
}
|
|
|
|
// init thread local states
|
|
ia_gen = createGenerator(options.ia_spec);
|
|
if (ia_gen == nullptr) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
|
|
rc = EINVAL;
|
|
goto cleanup;
|
|
}
|
|
ia_gen->set_lambda((double)options.rps / (double)(options.num_threads));
|
|
|
|
io_gen = new io_generator(options.req_size, ctx->start_region_length, options.read_pct, options.addr_mode);
|
|
if (io_gen == nullptr) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: could not allocate ia generator!\n", ctx->tid);
|
|
rc = EINVAL;
|
|
goto cleanup;
|
|
}
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: init complete.\n", ctx->tid);
|
|
|
|
worker_thread_init_cnt.fetch_add(1);
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: waiting for start...\n", ctx->tid);
|
|
|
|
while (worker_start.load() == 0) {}
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: started...\n", ctx->tid);
|
|
|
|
/* random delay 0-100 ms */
|
|
usleep(((rand() * nm_get_uptime_ns()) % 100) * 1000);
|
|
|
|
next_ts = get_cur_ts_nano();
|
|
|
|
while (true) {
|
|
uint64_t cur_loop_ts = get_cur_ts_nano();
|
|
if (last_loop_ts > 0) {
|
|
uint64_t overhead = cur_loop_ts - last_loop_ts;
|
|
if (ctx->overhead_max < overhead) {
|
|
ctx->overhead_max = overhead;
|
|
}
|
|
|
|
if (ctx->overhead_min > overhead) {
|
|
ctx->overhead_min = overhead;
|
|
}
|
|
|
|
ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + ctx->overhead_avg;
|
|
ctx->overhead_cnt++;
|
|
ctx->overhead_avg /= ctx->overhead_cnt;
|
|
}
|
|
last_loop_ts = cur_loop_ts;
|
|
|
|
// process io completion
|
|
uint64_t cur_ts = get_cur_ts_nano();
|
|
auto itr = prog_ios.begin();
|
|
while (itr != prog_ios.end()) {
|
|
int err;
|
|
struct io_request * ioreq = *itr;
|
|
if ((err = aio_error(&ioreq->aio)) != EINPROGRESS) {
|
|
if (err == 0) {
|
|
auto rec = new struct io_record;
|
|
rec->start_ts = ioreq->start_ts;
|
|
rec->end_ts = cur_ts;
|
|
|
|
ctx->io_records->push_back(rec);
|
|
if (ioreq->op == IOGEN_READ) {
|
|
memcpy(ioreq->user_buf, ioreq->dma_buf, options.req_size);
|
|
}
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d <worker_io_complete>: completed io request type %d\n", ctx->tid, ioreq->op);
|
|
|
|
} else {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: aio failed with %d...\n", ctx->tid, err);
|
|
}
|
|
|
|
if (aio_return(&ioreq->aio) == -1) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "thread %d: aio_return failed with %d...\n", ctx->tid, errno);
|
|
exit(errno);
|
|
}
|
|
|
|
/* cleanup */
|
|
itr = prog_ios.erase(itr);
|
|
free_ios.push_back(ioreq);
|
|
} else {
|
|
++itr;
|
|
}
|
|
}
|
|
|
|
if (worker_stop.load() == 1) {
|
|
if (free_ios.size() >= options.queue_depth) {
|
|
break;
|
|
}
|
|
} else {
|
|
if (!free_ios.empty()) {
|
|
auto io_req = free_ios.front();
|
|
|
|
cur_ts = get_cur_ts_nano();
|
|
|
|
if (cur_ts >= next_ts) {
|
|
io_gen->issue(&io_ctx, io_req->dma_buf);
|
|
|
|
a_offset = io_ctx.offset + ctx->start_region_offset;
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: issuing IO type %d at offset 0x%lx size 0x%lx...\n", ctx->tid, io_ctx.op, a_offset, io_ctx.size);
|
|
|
|
io_req->start_ts = cur_ts;
|
|
io_req->op = io_ctx.op;
|
|
io_req->aio.aio_offset = a_offset;
|
|
|
|
if(io_ctx.op == IOGEN_READ) {
|
|
io_req->aio.aio_lio_opcode = LIO_READ;
|
|
rc = aio_read(&io_req->aio);
|
|
} else {
|
|
io_req->aio.aio_lio_opcode = LIO_WRITE;
|
|
rc = aio_write(&io_req->aio);
|
|
}
|
|
|
|
if (rc != 0) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: failed to issue io %d, retrying...\n", ctx->tid, errno);
|
|
} else {
|
|
free_ios.pop_front();
|
|
next_ts = next_ts + ia_gen->generate() * S2NS;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
cleanup:
|
|
while (!free_ios.empty()) {
|
|
auto req = free_ios.front();
|
|
free_ios.pop_front();
|
|
nm_free(ctx->sockid, req->dma_buf);
|
|
nm_free(ctx->sockid, req->user_buf);
|
|
}
|
|
|
|
if (ia_gen != nullptr) {
|
|
delete ia_gen;
|
|
}
|
|
|
|
if (io_gen != nullptr) {
|
|
delete io_gen;
|
|
}
|
|
|
|
worker_thread_stop_cnt.fetch_add(1);
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: stopped...\n", ctx->tid);
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
|
|
static void
|
|
parse_pattern(char * pattern, unsigned int * read_pct, io_generator_address_mode * addr_mode)
|
|
{
|
|
char * token = strtok(pattern, ",");
|
|
|
|
if (strcmp(token, "M") == 0) {
|
|
*addr_mode = IOGEN_ADDR_MONOTONIC_INCREASING;
|
|
} else {
|
|
*addr_mode = IOGEN_ADDR_UNIFORM_RANDOM;
|
|
}
|
|
|
|
token = strtok(nullptr, ",");
|
|
*read_pct = strtoull(token, nullptr, 10);
|
|
}
|
|
|
|
static void
|
|
birb_main()
|
|
{
|
|
int rc = 0;
|
|
std::list<struct thread_context *> worker_threads;
|
|
std::ofstream output_file;
|
|
|
|
unsigned long record_cutoff_time = 0;
|
|
unsigned long current_s = 0;
|
|
unsigned int total_reqs = 0;
|
|
unsigned int tid = 0;
|
|
unsigned long per_thread_cap = 0;
|
|
int cur_core;
|
|
int disk_fd;
|
|
off_t disk_size;
|
|
u_int disk_sec_size;
|
|
|
|
/* initialize driver */
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initializing device driver for device %s\n", options.dev_name);
|
|
disk_fd = open(options.dev_name, O_RDWR | O_DIRECT);
|
|
if (disk_fd == -1) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open device - %d\n", errno);
|
|
exit(errno);
|
|
}
|
|
|
|
rc = ioctl(disk_fd, DIOCGMEDIASIZE, &disk_size);
|
|
if (rc == -1) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to obtain disk size - %d\n", errno);
|
|
exit(errno);
|
|
}
|
|
|
|
rc = ioctl(disk_fd, DIOCGSECTORSIZE, &disk_sec_size);
|
|
if (rc == -1) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to obtain disk sector size - %d\n", errno);
|
|
exit(errno);
|
|
}
|
|
|
|
per_thread_cap = disk_size / options.num_threads;
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: initialized device with capacity %zu bytes ~= %zu MB, sector %u bytes\n", disk_size, disk_size / 1024 / 1024, disk_sec_size);
|
|
|
|
parse_pattern(options.pattern_spec, &options.read_pct, &options.addr_mode);
|
|
dump_options();
|
|
|
|
output_file.open(options.output_file, std::ofstream::out);
|
|
if (!output_file) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to open output file %s\n", options.output_file);
|
|
rc = EINVAL;
|
|
goto end;
|
|
}
|
|
|
|
cur_core = cmask_get_next_cpu(&options.cpumask);
|
|
while(cur_core != NEXT_CPU_NULL) {
|
|
auto * ctx = new struct thread_context;
|
|
memset(ctx, 0, sizeof(struct thread_context));
|
|
|
|
if (ctx == NULL) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to alloc thread ctx.\n");
|
|
exit(ENOMEM);
|
|
}
|
|
|
|
ctx->tid = tid++;
|
|
|
|
ctx->sockid = nm_get_node_from_core(cur_core);
|
|
ctx->coreid = cur_core;
|
|
ctx->io_records = new std::list<struct io_record *>();
|
|
ctx->start_region_length = per_thread_cap;
|
|
ctx->start_region_offset = per_thread_cap * ctx->tid;
|
|
|
|
// create sys thread
|
|
pthread_attr_t attr;
|
|
cpuset_t scpuset;
|
|
CPU_ZERO(&scpuset);
|
|
CPU_SET(cur_core, &scpuset);
|
|
pthread_attr_init(&attr);
|
|
pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &scpuset);
|
|
rc = pthread_create(&ctx->sys_thread, nullptr, worker_thread_main, ctx);
|
|
if (rc != 0) {
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create sys thread: %d\n", rc);
|
|
rc = EINVAL;
|
|
goto end;
|
|
}
|
|
worker_threads.push_back(ctx);
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: created worker thread %d on core %d socket %d offset 0x%lx length %ld\n", ctx->tid, cur_core, ctx->sockid,
|
|
ctx->start_region_offset,
|
|
ctx->start_region_length);
|
|
|
|
cur_core = cmask_get_next_cpu(&options.cpumask);
|
|
}
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: waiting for worker thread init...\n");
|
|
while(worker_thread_init_cnt.load() < options.num_threads) {
|
|
}
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: starting worker threads...\n");
|
|
worker_start.store(1);
|
|
|
|
/* main event loop */
|
|
while(current_s < options.time) {
|
|
if (current_s >= options.warmup && record_cutoff_time == 0) {
|
|
record_cutoff_time = get_cur_ts_nano();
|
|
}
|
|
usleep(1 * S2US);
|
|
current_s++;
|
|
}
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "main: stopping worker threads...\n");
|
|
worker_stop.store(1);
|
|
|
|
while(worker_thread_stop_cnt.load() < options.num_threads) {
|
|
}
|
|
|
|
// keep stats
|
|
for (struct thread_context * tctx : worker_threads) {
|
|
uint64_t last_ts = 0;
|
|
uint64_t processed = 0;
|
|
for (struct io_record * r : *tctx->io_records) {
|
|
if (r->start_ts >= record_cutoff_time) {
|
|
if (r->end_ts > last_ts) {
|
|
last_ts = r->end_ts;
|
|
}
|
|
|
|
processed++;
|
|
output_file << r->end_ts - r->start_ts << std::endl;
|
|
total_reqs++;
|
|
}
|
|
}
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: thread %d processed requests: %lu, last request %lu. Overhead - avg %lu min %lu max %lu\n",
|
|
tctx->tid, processed, last_ts, tctx->overhead_avg, tctx->overhead_min, tctx->overhead_max);
|
|
}
|
|
|
|
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: total requests: %u, bytes per second: %lu\n",
|
|
total_reqs, total_reqs * options.req_size / (options.time - options.warmup));
|
|
|
|
end:
|
|
if (disk_fd != -1) {
|
|
close(disk_fd);
|
|
}
|
|
|
|
output_file.close();
|
|
|
|
for (struct thread_context * tctx : worker_threads) {
|
|
for (struct io_record * r : *tctx->io_records) {
|
|
delete r;
|
|
}
|
|
delete tctx->io_records;
|
|
delete tctx;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
ntr_init();
|
|
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_INFO);
|
|
|
|
int c;
|
|
while (( c = getopt(argc, argv, "VD:k:a:b:q:Q:P:I:t:w:o:")) != -1)
|
|
{
|
|
switch (c) {
|
|
case 'V':
|
|
ntr_set_level(NTR_DEP_USER1,
|
|
ntr_get_level(NTR_DEP_USER1) + 1);
|
|
break;
|
|
case 'D':
|
|
strncpy(options.dev_name, optarg, MAX_DEV_NAME_LEN);
|
|
break;
|
|
case 'k':
|
|
strncpy(options.driver_name, optarg, MAX_DEV_NAME_LEN);
|
|
break;
|
|
case 'a':
|
|
options.cpumask = strtoull(optarg, nullptr, 16);
|
|
options.num_threads = cmask_get_num_cpus(
|
|
options.cpumask);
|
|
|
|
if (options.num_threads == 0) {
|
|
fprintf(stderr,
|
|
"must run at least one thread\n");
|
|
return EINVAL;
|
|
}
|
|
break;
|
|
case 'b':
|
|
options.req_size = strtoull(
|
|
optarg, nullptr, 10);
|
|
break;
|
|
case 'q':
|
|
options.rps = strtoull(
|
|
optarg, nullptr, 10);
|
|
break;
|
|
case 'Q':
|
|
options.queue_depth = strtoull(
|
|
optarg, nullptr, 10);
|
|
break;
|
|
case 'P':
|
|
strncpy(options.pattern_spec, optarg, MAX_SPEC_LEN);
|
|
break;
|
|
case 'I':
|
|
strncpy(options.ia_spec, optarg, MAX_SPEC_LEN);
|
|
break;
|
|
case 't':
|
|
options.time = strtoull(
|
|
optarg, nullptr, 10);
|
|
break;
|
|
case 'w':
|
|
options.warmup = strtoull(
|
|
optarg, nullptr, 10);
|
|
break;
|
|
case 'o':
|
|
strncpy(options.output_file, optarg, MAX_OUTPUT_FILE_LEN);
|
|
break;
|
|
case 'h':
|
|
usage();
|
|
exit(0);
|
|
default:
|
|
usage();
|
|
exit(EINVAL);
|
|
}
|
|
}
|
|
|
|
nm_init(options.verbosity);
|
|
birb_main();
|
|
|
|
return 0;
|
|
}
|