From a716583b196fca6f35a04f7ca498796d1a9f8b0f Mon Sep 17 00:00:00 2001 From: quackerd Date: Wed, 25 May 2022 06:55:01 -0400 Subject: [PATCH] update various components for new machines --- CMakeLists.txt | 13 ++- inc/defs.hh | 25 +++++ inc/nm.hh | 20 ++-- inc/storage/drivers/nvme.hh | 1 + libnm/alloc.cc | 2 +- libnm/loadgen.cc | 101 ++++++++++------- libnm/nm.cc | 7 +- net/cat.cc | 21 ++-- net/khat.cc | 75 +++++++------ net/rat.cc | 40 +++---- scripts/compile.sh | 38 ------- scripts/libs/libtc.py | 55 ++++++---- scripts/run.py | 75 ++++++------- scripts/{compile_dpdk.sh => setup_dpdk.sh} | 8 +- scripts/setup_program.sh | 36 ++++++ scripts/storage/parse.py | 122 +++++++++++++++------ scripts/storage/test_posix.sh | 19 ++++ scripts/storage/test_spdk_bdev.sh | 19 ++++ scripts/storage/test_spdk_nvme.sh | 19 ++++ storage/birb.cc | 8 +- storage/birb_posix.cc | 18 ++- storage/drivers/nvme.cc | 8 ++ test/ts.cc | 84 ++++++++++++++ util/memloadgen.cc | 28 +++-- 24 files changed, 551 insertions(+), 291 deletions(-) delete mode 100755 scripts/compile.sh rename scripts/{compile_dpdk.sh => setup_dpdk.sh} (61%) create mode 100755 scripts/setup_program.sh create mode 100644 scripts/storage/test_posix.sh create mode 100644 scripts/storage/test_spdk_bdev.sh create mode 100644 scripts/storage/test_spdk_nvme.sh create mode 100644 test/ts.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index da800d5..7fa69e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,10 +57,15 @@ target_compile_options(birb PRIVATE ${CC_FLAGS} ${SPDK_CFLAGS} ${UUID_CFLAGS}) target_link_directories(birb PRIVATE ${SPDK_LIBRARY_DIRS} ${SPDK_SYS_STATIC_LIBRARY_DIRS} ${UUID_LIBRARY_DIRS}) target_link_libraries(birb PRIVATE pthread nm ntr gen -Wl,--whole-archive ${SPDK_LIBRARIES} -Wl,--no-whole-archive ${SPDK_SYS_STATIC_LIBRARIES}) -add_executable(posix EXCLUDE_FROM_ALL storage/birb_posix.cc storage/io_gen.cc) -target_compile_options(posix PRIVATE ${CC_FLAGS}) -target_link_libraries(posix PRIVATE pthread nm ntr gen) +add_executable(birb_posix EXCLUDE_FROM_ALL storage/birb_posix.cc storage/io_gen.cc) +target_compile_options(birb_posix PRIVATE ${CC_FLAGS}) +target_link_libraries(birb_posix PRIVATE pthread nm ntr gen) add_executable(memloadgen util/memloadgen.cc) target_link_libraries(memloadgen PRIVATE pthread nm ntr) -target_compile_options(memloadgen PRIVATE ${CC_FLAGS}) \ No newline at end of file +target_compile_options(memloadgen PRIVATE ${CC_FLAGS}) + +add_executable(test_ts test/ts.cc) +set_target_properties(test_ts PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/test) +target_link_libraries(test_ts PRIVATE nm) +target_compile_options(test_ts PRIVATE ${CC_FLAGS}) diff --git a/inc/defs.hh b/inc/defs.hh index 4441902..8a07b13 100644 --- a/inc/defs.hh +++ b/inc/defs.hh @@ -1,8 +1,13 @@ #pragma once +#include #include #include #include +#include +#include +#include +#include #define DISALLOW_EVIL_CONSTRUCTORS(TypeName) \ TypeName(const TypeName &) = delete; \ @@ -30,5 +35,25 @@ cmask_get_num_cpus(const uint64_t mask) return _mm_popcnt_u64(mask); } +static inline uint64_t +get_uptime() +{ + struct timespec tp; + clock_gettime(CLOCK_MONOTONIC, &tp); + return (tp.tv_sec * S2NS + tp.tv_nsec); +} + +static inline void +cpulist_to_cpuset(char * cpulist, cpuset_t * cpuset) +{ + char * cpu = strtok(cpulist, ","); + CPU_ZERO(cpuset); + + while (cpu != nullptr) { + CPU_SET(atoi(cpu), cpuset); + cpu = strtok(nullptr, ","); + } +} + #define ATTR_UNUSED __attribute__((unused)) diff --git a/inc/nm.hh b/inc/nm.hh index 11cbe51..388b92e 100644 --- a/inc/nm.hh +++ b/inc/nm.hh @@ -1,10 +1,13 @@ #pragma once +#include #include "gen.hh" #include "defs.hh" #include #include +#include +#include constexpr static unsigned int NM_LEVEL_NUMA = 0; constexpr static unsigned int NM_LEVEL_CPU = 1; @@ -53,13 +56,13 @@ nm_get_node_from_core(int coreid) class memload_generator { private: DISALLOW_EVIL_CONSTRUCTORS(memload_generator); - constexpr static uint32_t FROM_REGION_CNT = 0x2; // 2 regions struct thread_info { pthread_t pthr; - std::atomic num_trans; - Generator *ia_gen; + uint32_t iter; + uint32_t array_size; std::atomic init_status; std::atomic *state; + std::atomic num_trans; constexpr static int INIT_START = 0; constexpr static int INIT_SUCCESS = 1; constexpr static int INIT_FAILED = 2; @@ -67,9 +70,9 @@ class memload_generator { void *to_region; uint32_t to_domainid; uint32_t from_domainid; + uint64_t begin_ts; + uint64_t stop_ts; }; - constexpr static uint32_t TRANSACTION_SZ = 0x2000; // transaction sz must >= region_sz - constexpr static uint32_t REGION_SZ = 0x2000 * 0x2000; // 64MB per core std::vector thr_infos; std::atomic state; @@ -77,17 +80,18 @@ class memload_generator { constexpr static uint32_t STATE_START = 1; constexpr static uint32_t STATE_STOP = 2; - uint64_t begin_ts; - uint64_t stop_ts; + uint32_t array_size; + uint32_t iteration; static void *worker_thrd(void *_tinfo); public: - memload_generator(uint64_t from_cmask, uint64_t to_cmask, uint64_t bps, + memload_generator(uint64_t from_cmask, uint64_t to_cmask, uint32_t array_sz, uint32_t iteration, bool *success); void start(); void stop(); uint64_t get_bps(); + bool check_done(); ~memload_generator(); }; diff --git a/inc/storage/drivers/nvme.hh b/inc/storage/drivers/nvme.hh index 9dc9f7f..f5c386c 100644 --- a/inc/storage/drivers/nvme.hh +++ b/inc/storage/drivers/nvme.hh @@ -23,6 +23,7 @@ private: spdk_nvme_ctrlr ** ctrlr; spdk_nvme_ns ** ns; const char * dev_name; + int valid; }; DISALLOW_EVIL_CONSTRUCTORS(birb_nvme_driver); diff --git a/libnm/alloc.cc b/libnm/alloc.cc index 6142187..ebed2cb 100644 --- a/libnm/alloc.cc +++ b/libnm/alloc.cc @@ -50,7 +50,7 @@ nm_alloc_init() for (unsigned int j = 0; j < MEM_REGION_NUM; j++) { if ((nm_mem_regions[i][j] = mmap(nullptr, MEM_OBJ_NUM * MEM_OBJ_SIZE, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_ALIGNED_SUPER | MAP_NOCORE | MAP_PRIVATE, + MAP_ANON | MAP_ALIGNED_SUPER | MAP_NOCORE | MAP_PRIVATE | MAP_NOSYNC, -1, 0)) == MAP_FAILED) { if (nm_get_verbose() > 0) { fprintf(stdout, "libnm: mmap failed with %d\n", errno); diff --git a/libnm/loadgen.cc b/libnm/loadgen.cc index 9dbf9fe..c0d64dd 100644 --- a/libnm/loadgen.cc +++ b/libnm/loadgen.cc @@ -1,10 +1,13 @@ #include "nmp.hh" +#include #include #include +#include #include #include #include +#include void * memload_generator::worker_thrd(void *_tinfo) @@ -19,8 +22,13 @@ memload_generator::worker_thrd(void *_tinfo) tid, tinfo->from_domainid, tinfo->to_domainid); } - tinfo->from_region = nm_malloc(tinfo->from_domainid, REGION_SZ * FROM_REGION_CNT); - tinfo->to_region = nm_malloc(tinfo->to_domainid, REGION_SZ * FROM_REGION_CNT); + if (tinfo->from_region == nullptr) { + tinfo->from_region = nm_malloc(tinfo->from_domainid, tinfo->array_size); + } + + if (tinfo->to_region == nullptr) { + tinfo->to_region = nm_malloc(tinfo->to_domainid, tinfo->array_size); + } if (tinfo->from_region == nullptr || tinfo->to_region == nullptr) { tinfo->init_status.store(thread_info::INIT_FAILED); @@ -32,16 +40,15 @@ memload_generator::worker_thrd(void *_tinfo) } // populate the region with 1/2/3s - for(uint i = 0; i < FROM_REGION_CNT; i++) { - memset((char*)tinfo->from_region + i * REGION_SZ, i + 1, REGION_SZ); - } + memset((char*)tinfo->from_region, 1, tinfo->array_size); + memset((char*)tinfo->to_region, 2, tinfo->array_size); tinfo->init_status.store(thread_info::INIT_SUCCESS); if (nm_get_verbose() > 0) { fprintf(stdout, - "memload_generator : init finished, waiting for start...\n", - tid); + "memload_generator : init finished, from: %p, to: %p, waiting for start...\n", + tid, tinfo->from_region, tinfo->to_region); } while (tinfo->state->load() == STATE_READY) { @@ -51,26 +58,22 @@ memload_generator::worker_thrd(void *_tinfo) fprintf( stdout, "memload_generator : running...\n", tid); } - - uint64_t offset = 0; - uint64_t next_ts = nm_get_uptime_ns(); + tinfo->begin_ts = nm_get_uptime_ns(); while (tinfo->state->load() == STATE_START) { - // generate traffic - uint64_t now = nm_get_uptime_ns(); - - if (now >= next_ts) { - next_ts = next_ts + tinfo->ia_gen->generate() * S2NS; - uint64_t to_offset = offset % REGION_SZ; - uint64_t from_offset = offset % (REGION_SZ * FROM_REGION_CNT); - memcpy((char *)tinfo->to_region + to_offset, (char *)tinfo->from_region + from_offset, TRANSACTION_SZ); - offset += TRANSACTION_SZ; - + if (tinfo->num_trans.load() < tinfo->iter) { + // generate traffic + memcpy((char *)tinfo->to_region, (char *)tinfo->from_region, tinfo->array_size); tinfo->num_trans.fetch_add(1); + if (tinfo->num_trans.load() >= tinfo->iter) { + tinfo->stop_ts = nm_get_uptime_ns(); + } + } else { + usleep(1000 * 100); } } - nm_free(tinfo->from_domainid, tinfo->from_region); - nm_free(tinfo->to_domainid, tinfo->to_region); + //nm_free(tinfo->from_domainid, tinfo->from_region); + //nm_free(tinfo->to_domainid, tinfo->to_region); if (nm_get_verbose() > 0) { fprintf( @@ -80,10 +83,12 @@ memload_generator::worker_thrd(void *_tinfo) } memload_generator::memload_generator( - uint64_t from_cmask, uint64_t to_cmask, uint64_t bps, bool *success) + uint64_t from_cmask, uint64_t to_cmask, uint32_t array_sz, uint32_t iteration, bool *success) { *success = false; state.store(STATE_READY); + this->array_size = array_sz; + this->iteration = iteration; int nextcore; int to_coreid = cmask_get_next_cpu(&to_cmask); int num_cores = cmask_get_num_cpus(from_cmask); @@ -93,17 +98,22 @@ memload_generator::memload_generator( return; } - while ((nextcore = cmask_get_next_cpu(&from_cmask)) != NEXT_CPU_NULL) { + void * from = nm_malloc(nm_obj_get_id(nm_obj_find_parent(nm_obj_from_id(NM_LEVEL_CORE, to_coreid), NM_LEVEL_NUMA)), array_sz); + nextcore = cmask_get_next_cpu(&from_cmask); + void * to = nm_malloc(nm_obj_get_id(nm_obj_find_parent(nm_obj_from_id(NM_LEVEL_CORE, nextcore), NM_LEVEL_NUMA)), array_sz); + while (nextcore != NEXT_CPU_NULL) { auto *info = new struct thread_info; pthread_attr_t attr; - info->ia_gen = createGenerator("exponential"); - info->ia_gen->set_lambda(((double)(bps) / (double)num_cores) / - (double)(REGION_SZ)); info->num_trans.store(0); info->to_domainid = nm_obj_get_id(nm_obj_find_parent(nm_obj_from_id(NM_LEVEL_CORE, to_coreid), NM_LEVEL_NUMA)); info->from_domainid = nm_obj_get_id(nm_obj_find_parent(nm_obj_from_id(NM_LEVEL_CORE, nextcore), NM_LEVEL_NUMA)); info->init_status.store(thread_info::INIT_START); info->state = &state; + info->iter = this->iteration; + info->array_size = this->array_size; + + info->from_region = from; + info->to_region = to; CPU_ZERO(&cpuset); CPU_SET(nextcore, &cpuset); @@ -118,6 +128,7 @@ memload_generator::memload_generator( } thr_infos.push_back(info); + nextcore = cmask_get_next_cpu(&from_cmask); } if (nm_get_verbose() > 0) { @@ -153,31 +164,44 @@ memload_generator::start() { if (this->state.load() == STATE_READY) { state.store(STATE_START); - begin_ts = nm_get_uptime_ns(); } } void memload_generator::stop() { - if (this->state.load() != STATE_STOP) { - stop_ts = nm_get_uptime_ns(); + if (this->state.load() == STATE_START) { state.store(STATE_STOP); } } +bool +memload_generator::check_done() +{ + bool done = true; + for (auto i : thr_infos) { + done = done && (i->num_trans.load() >= this->iteration); + } + if (done) { + stop(); + } + return done; +} + uint64_t memload_generator::get_bps() { - uint64_t now = state.load() == STATE_STOP ? stop_ts : - nm_get_uptime_ns(); - uint64_t total_transactions = 0; - for (auto i : thr_infos) { - total_transactions += i->num_trans.load(); + if (this->state.load() == STATE_STOP) { + uint64_t total_transactions = 0; + uint64_t total_time = 0; + for (auto i : thr_infos) { + total_transactions += i->num_trans.load(); + total_time = (i->stop_ts - i->begin_ts) + total_time; + } + return (double)(total_transactions * this->array_size) / ((double)total_time / thr_infos.size() / S2NS); + } else { + return 0; } - - return (double)(TRANSACTION_SZ * total_transactions) / - (double)((now - begin_ts) / (S2NS)); } memload_generator::~memload_generator() @@ -188,7 +212,6 @@ memload_generator::~memload_generator() for (auto i : thr_infos) { pthread_join(i->pthr, nullptr); - delete i->ia_gen; delete i; } } diff --git a/libnm/nm.cc b/libnm/nm.cc index a5d4981..a487245 100644 --- a/libnm/nm.cc +++ b/libnm/nm.cc @@ -25,14 +25,17 @@ uint64_t nm_get_uptime_ns() { unsigned int dummy; - return nm_tsc2ns(__rdtscp(&dummy)); + _mm_lfence(); + uint64_t tsc = __rdtscp(&dummy); + _mm_lfence(); + return nm_tsc2ns(tsc); } uint64_t nm_tsc2ns(uint64_t tsc) { return (uint64_t)( - (double)tsc / (double)sysctl_tsc_freq * (double)1000000000ul); + (double)tsc / (double)sysctl_tsc_freq * S2NS); } // 0 on success diff --git a/net/cat.cc b/net/cat.cc index afb4586..6c60f98 100644 --- a/net/cat.cc +++ b/net/cat.cc @@ -22,7 +22,7 @@ #include #include -constexpr static unsigned int MBUF_MAX_COUNT = 65536; +#define MBUF_MAX_COUNT (rte_lcore_count() * 4096) constexpr static unsigned int MBUF_CACHE_SIZE = 512; constexpr static unsigned int RX_RING_SIZE = 1024; constexpr static unsigned int TX_RING_SIZE = 1024; @@ -61,7 +61,7 @@ struct options_t { unsigned int master_mode { 0 }; struct net_spec server_spec { }; - uint64_t cpu_mask { 0x4 }; // 2nd core + cpuset_t cpu_set = CPUSET_T_INITIALIZER(0x2); // 2nd core std::vector slaves; uint32_t pkt_loss_failure_threshold { 0 }; uint32_t pkt_loss_time_ms { UINT32_MAX }; @@ -748,14 +748,14 @@ dump_options() " run time = %d\n" " warmup time = %d\n" " output file = %s\n" - " cpu mask = 0x%lx\n" + " number of threads = %d\n" " interarrival dist = %s\n" " target qps = %d\n" " host IP = 0x%x\n" " pkt loss time = %u\n" " pkt loss failure threshold = %u\n", ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, options.run_time, - options.warmup_time, options.output, options.cpu_mask, + options.warmup_time, options.output, CPU_COUNT(&options.cpu_set), options.ia_gen_str, options.target_qps, options.s_host_spec.ip, options.pkt_loss_time_ms, options.pkt_loss_failure_threshold); @@ -858,8 +858,7 @@ main(int argc, char *argv[]) sizeof(options.output) - 1); break; case 'A': - options.cpu_mask = strtoull( - optarg, nullptr, 16); + cpulist_to_cpuset(optarg, &options.cpu_set); break; case 'i': strncpy(options.ia_gen_str, optarg, @@ -959,11 +958,12 @@ main(int argc, char *argv[]) options.s_host_spec.mac_addr.addr_bytes[3], options.s_host_spec.mac_addr.addr_bytes[4], options.s_host_spec.mac_addr.addr_bytes[5]); - uint64_t cmask = options.cpu_mask; - const int16_t core_id = cmask_get_next_cpu(&cmask); - if (core_id == NEXT_CPU_NULL) { - rte_exit(EXIT_FAILURE, "invalid cpu mask 0x%lx\n", cmask); + + int core_id = CPU_FFS(&options.cpu_set); + if (core_id == 0) { + rte_exit(EXIT_FAILURE, "invalid cpu list\n"); } + core_id--; sleep(INIT_DELAY); ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, @@ -1009,7 +1009,6 @@ main(int argc, char *argv[]) << it->srv_sw_rx << ',' << it->srv_sw_tx << ',' << it->srv_hw_rx << ',' << it->srv_hw_tx << std::endl; - printf("Writing ... datapt %p", it); } delete it; } diff --git a/net/khat.cc b/net/khat.cc index 989218c..63dc393 100644 --- a/net/khat.cc +++ b/net/khat.cc @@ -15,10 +15,15 @@ #include "net/util.hh" #include +#include +#include #include #include +#include +#include +#include -constexpr static unsigned int MBUF_MAX_COUNT = 65536; +#define MBUF_MAX_COUNT (rte_lcore_count() * 4096) constexpr static unsigned int MBUF_CACHE_SIZE = 512; constexpr static unsigned int RX_RING_SIZE = 2048; constexpr static unsigned int TX_RING_SIZE = 2048; @@ -76,16 +81,15 @@ struct probe_state_t { struct options_t { // config int num_threads { 1 }; - uint64_t cpuset { 0x4 }; // 2nd core - uint64_t memmask { 0x0 }; // same socket as the NIC + cpuset_t cpu_set = CPUSET_T_INITIALIZER(0x2); // 2nd core char mempool_name_buf[MEMPOOL_NAME_BUF_LEN]; bool jumbo_frame_enabled { false }; // setting this to true changes mbuf size and mtu int port_mtu { MAX_STANDARD_MTU }; int thread_cacheline_cnt = { 128 }; bool mlg_enabled { false }; uint64_t mlg_bps { 0 }; - uint64_t mlg_cmask { 0 }; - uint64_t mlg_dmask { 0 }; + cpuset_t mlg_cset = CPUSET_T_INITIALIZER(0x2); + cpuset_t mlg_dset = CPUSET_T_INITIALIZER(0x1); memload_generator *mlg { nullptr }; // states uint16_t s_portid { 0 }; @@ -539,7 +543,6 @@ usage() " -v(vv): verbose mode\n" " -h: seek help\n" " -A: cpu mask for worker threads\n" - " -M: mempool socket affinity mask\n" " -m: enable memory load generator(MLG)\n" " -b: MLG bytes per second\n" " -x: MLG thread affinity mask\n" @@ -556,15 +559,12 @@ dump_options() "main: khat configuration:\n" " verbosity: +%d\n" " thread count: %d\n" - " cpu mask: 0x%lx\n" - " mempool mask: 0x%lx\n" " ip: 0x%x\n" - " MLG: %s [bps: %ld, threads: 0x%lx, domain: 0x%lx]\n" + " MLG: %s [bps: %ld, thread cnt: %d, domain: %ld]\n" " jumbo frame: %d\n", - ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, - options.num_threads, options.cpuset, options.memmask, + ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, options.num_threads, options.s_host_spec.ip, options.mlg_enabled ? "on" : "off", - options.mlg_bps, options.mlg_cmask, options.mlg_dmask, options.jumbo_frame_enabled); + options.mlg_bps, CPU_COUNT(&options.mlg_cset), CPU_FFS(&options.mlg_dset) - 1, options.jumbo_frame_enabled); } int @@ -590,7 +590,7 @@ main(int argc, char *argv[]) { int c; // parse arguments - while ((c = getopt(argc, argv, "hvA:M:H:mb:X:x:J")) != -1) { + while ((c = getopt(argc, argv, "hvA:H:mb:X:x:J")) != -1) { switch (c) { case 'v': ntr_set_level(NTR_DEP_USER1, @@ -600,17 +600,13 @@ main(int argc, char *argv[]) usage(); rte_exit(EXIT_SUCCESS, "\n"); case 'A': - options.cpuset = strtoull(optarg, nullptr, 16); - options.num_threads = cmask_get_num_cpus( - options.cpuset); + cpulist_to_cpuset(optarg, &options.cpu_set); + options.num_threads = CPU_COUNT(&options.cpu_set); if (options.num_threads == 0) { rte_exit(EXIT_FAILURE, "must run at least one thread\n"); } break; - case 'M': - options.memmask = strtoull(optarg, nullptr, 16); - break; case 'H': if (str_to_netspec( optarg, &options.s_host_spec) != 0) { @@ -626,12 +622,10 @@ main(int argc, char *argv[]) options.mlg_bps = strtoull(optarg, nullptr, 10); break; case 'X': - options.mlg_dmask = strtoull( - optarg, nullptr, 16); + cpulist_to_cpuset(optarg, &options.mlg_dset); break; case 'x': - options.mlg_cmask = strtoull( - optarg, nullptr, 16); + cpulist_to_cpuset(optarg, &options.mlg_cset); break; case 'J': options.jumbo_frame_enabled = true; @@ -657,14 +651,14 @@ main(int argc, char *argv[]) dump_options(); // init mlg - if (options.mlg_enabled) { - bool success = false; - options.mlg = new memload_generator(options.mlg_cmask, - options.mlg_dmask, options.mlg_bps, &success); - if (!success) { - rte_exit(EXIT_FAILURE, "failed to init mlg\n"); - } - } + // if (options.mlg_enabled) { + // // bool success = false; + // // options.mlg = new memload_generator(options.mlg_cmask, + // // options.mlg_dmask, options.mlg_bps, &success); + // // if (!success) { + // // rte_exit(EXIT_FAILURE, "failed to init mlg\n"); + // // } + // } // register dynamic field PROBE_FLAG_OFFSET = rte_mbuf_dynfield_register( @@ -702,7 +696,7 @@ main(int argc, char *argv[]) snprintf(options.mempool_name_buf, MEMPOOL_NAME_BUF_LEN, "khat_mempool_%d", nodeid); mbuf_pool = rte_pktmbuf_pool_create(options.mempool_name_buf, - MBUF_MAX_COUNT * nb_ports, MBUF_CACHE_SIZE, 0, + MBUF_MAX_COUNT, MBUF_CACHE_SIZE, 0, options.jumbo_frame_enabled ? RTE_MBUF_DEFAULT_BUF_SIZE + (MAX_JUMBO_MTU - MAX_STANDARD_MTU) : RTE_MBUF_DEFAULT_BUF_SIZE, nodeid); @@ -717,20 +711,25 @@ main(int argc, char *argv[]) } // init threads - uint64_t cpuset = options.cpuset; - for (int i = 0; i < options.num_threads; i++) { - uint32_t lcore_id = cmask_get_next_cpu(&cpuset); + uint32_t cpu_idx = CPU_FFS(&options.cpu_set); + uint32_t tid = 0; + while(cpu_idx != 0) { + uint32_t lcore_id = cpu_idx - 1; uint32_t node_id = rte_lcore_to_socket_id(lcore_id); auto *tinfo = (struct thread_info *)nm_malloc(node_id, sizeof(struct thread_info)); tinfo->cache_lines = nm_malloc(node_id, CACHELINE_SIZE * options.thread_cacheline_cnt); tinfo->load_buffer = nm_malloc(node_id, THREAD_LOAD_BUFFER_SZ); - tinfo->tid = i; + tinfo->tid = tid; tinfo->lcore_id = lcore_id; tinfo->node_id = node_id; options.s_thr_info.push_back(tinfo); ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, - "main: thread %d assigned to cpu %d, node %d\n", tinfo->tid, - tinfo->lcore_id, nm_get_node_from_core(lcore_id)); + "main: thread %d assigned to cpu %d, node %d\n", tinfo->tid, + tinfo->lcore_id, nm_get_node_from_core(lcore_id)); + + tid++; + CPU_CLR(cpu_idx - 1, &options.cpu_set); + cpu_idx = CPU_FFS(&options.cpu_set); } if (port_init(portid, mbuf_pool) != 0) { diff --git a/net/rat.cc b/net/rat.cc index f138757..baa100b 100644 --- a/net/rat.cc +++ b/net/rat.cc @@ -1,3 +1,4 @@ +#include #include #include #include @@ -22,7 +23,7 @@ #include #include -constexpr static unsigned int MBUF_MAX_COUNT = 65536; +#define MBUF_MAX_COUNT (rte_lcore_count() * 4096) constexpr static unsigned int MBUF_CACHE_SIZE = 512; constexpr static unsigned int RX_RING_SIZE = 2048; constexpr static unsigned int TX_RING_SIZE = 2048; @@ -92,11 +93,11 @@ struct options_t { char ia_gen[256] { "fixed" }; char ld_gen[256] { "fixed:0" }; uint32_t target_qps { 0 }; - uint32_t depth = 1; + uint32_t depth { 1 }; struct net_spec server_spec { }; - uint64_t cpu_mask { 0x4 }; // 1 thread @ core 2 - uint32_t pkt_loss_delay_ms = UINT32_MAX; + cpuset_t cpu_set = CPUSET_T_INITIALIZER(0x2); // 1 thread @ core 2 + uint32_t pkt_loss_delay_ms { UINT32_MAX }; bool jumbo_frame_enabled { false }; int pkt_pad_sz { 0 }; int port_mtu { MAX_STANDARD_MTU }; @@ -685,7 +686,6 @@ dump_options() " run time = %d\n" " num threads = %d\n" " rage quit time = %ul\n" - " cpu mask = 0x%lx\n" " slave mode = %d\n" " interarrival dist = %s\n" " workload dist = %s\n" @@ -696,7 +696,7 @@ dump_options() " jumbo frame = %d\n" " packet pad size = %d\n", ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, options.run_time, - options.s_num_threads, options.rage_quit_time, options.cpu_mask, + options.s_num_threads, options.rage_quit_time, options.slave_mode, options.ia_gen, options.ld_gen, options.target_qps, options.s_host_spec.ip, options.depth, options.pkt_loss_delay_ms, options.jumbo_frame_enabled, @@ -775,14 +775,10 @@ main(int argc, char *argv[]) STATE_SYNC; // set state to wait for SYNC break; case 'A': - options.cpu_mask = strtoull( - optarg, nullptr, 16); - options.s_num_threads = cmask_get_num_cpus( - options.cpu_mask); + cpulist_to_cpuset(optarg, &options.cpu_set); + options.s_num_threads = CPU_COUNT(&options.cpu_set); if (options.s_num_threads == 0) { - rte_exit(EXIT_FAILURE, - "invalid cpu mask 0x%lx\n", - options.cpu_mask); + rte_exit(EXIT_FAILURE, "invalid cpu mask %s\n", optarg); } break; case 'i': @@ -881,8 +877,10 @@ main(int argc, char *argv[]) } options.mbuf_pool = mbuf_pool; - uint64_t cmask = options.cpu_mask; - for (unsigned int i = 0; i < options.s_num_threads; i++) { + unsigned int cpuset_idx = CPU_FFS(&options.cpu_set); + unsigned int tid = 0; + while(cpuset_idx != 0) { + unsigned int lcore_id = cpuset_idx - 1; tinfo = new thread_info; tinfo->ia_gen = createGenerator(options.ia_gen); tinfo->load_gen = createGenerator(options.ld_gen); @@ -892,11 +890,15 @@ main(int argc, char *argv[]) } tinfo->ia_gen->set_lambda((double)options.target_qps / (double)(options.s_num_threads)); - tinfo->id = i; - tinfo->lcore_id = cmask_get_next_cpu(&cmask); - tinfo->rxqid = i; - tinfo->txqid = i; + tinfo->id = tid; + tinfo->lcore_id = lcore_id; + tinfo->rxqid = tid; + tinfo->txqid = tid; options.s_thr_info.push_back(tinfo); + + tid++; + CPU_CLR(lcore_id, &options.cpu_set); + cpuset_idx = CPU_FFS(&options.cpu_set); } if (port_init(portid, mbuf_pool) != 0) { diff --git a/scripts/compile.sh b/scripts/compile.sh deleted file mode 100755 index e5ea8f9..0000000 --- a/scripts/compile.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/sh -test_dir="/numam.d" -root="$(dirname "$0")/.." -servers="skylake2.rcs.uwaterloo.ca skylake3.rcs.uwaterloo.ca skylake6.rcs.uwaterloo.ca skylake7.rcs.uwaterloo.ca skylake8.rcs.uwaterloo.ca" -rsync_flags="-vchr" -ssh_args="-o StrictHostKeyChecking=no -p77" - -user=$1 - -if [ -z $user ] -then - user=$(whoami) -fi - -echo "USER: $user" - -compile() { - # separate these functions because we might change kernel (reboot) without needing to recompile - echo "====================$1====================" - echo "Syncing directories..." - ssh $(echo $ssh_args $user@$1) "sudo mkdir -p $test_dir" - ssh $(echo $ssh_args $user@$1) "sudo chmod 777 $test_dir" - rsync $(echo $rsync_flags) -e 'ssh -p 77' $root/ $user@$1:$test_dir/ - echo "Compiling..." - ssh $(echo $ssh_args $user@$1) "sudo pkg install -y hwloc2 cmake; sudo mkdir -p $test_dir/build; cd $test_dir/build; sudo rm -rf *; sudo cmake ../; sudo make clean all -j8" & - wait - echo "$1 Done." - echo "" -} - -i=0 -for server in $servers -do - i=$(expr $i + 1) - compile "$server" & -done - -wait \ No newline at end of file diff --git a/scripts/libs/libtc.py b/scripts/libs/libtc.py index 88d00ec..839ea08 100644 --- a/scripts/libs/libtc.py +++ b/scripts/libs/libtc.py @@ -85,28 +85,25 @@ def remote_exec(srv, cmd, blocking=True, check=True): return sub -def scan_stderr(p, exclude = None): - for err in p.stderr: - fail = True - err = err.decode() - err = err.strip() +def check_stderr(p, sel, exclude = []):# -> tuple[bool, list[str]]: + max_stderr_rd = 10 + err = [] + while sel.poll(1) and max_stderr_rd > 0: + err.append(p.stderr.readline().decode().strip()) + max_stderr_rd = max_stderr_rd - 1 - #print(err) - if len(err) == 0: + good = True + for e in err: + if len(e) == 0: continue - if exclude != None: - for exc in exclude: - if (exc != None) and (re.match(exc, err) != None): - fail = False - break - - if fail: - log_print("Error detected: " + err) - return False + for exc in exclude: + if not (exc in err): + good = False + break - return True + return good, err # stderr threads errthr_objs = [] @@ -116,12 +113,21 @@ errthr_failed = False def errthr_get_failed(): return errthr_failed -def thr_check_stderr(p : sp.Popen, exclude): +def thr_check_stderr(p : sp.Popen, name: str, exclude): global errthr_failed + sel = select.poll() + sel.register(p.stderr, select.POLLIN) + local_failed = False while(not errthr_sigstop): - if not scan_stderr(p, exclude=exclude): - errthr_failed = True - time.sleep(0.5 + random.uniform(-0.1, 0.1)) + if (not local_failed): + status, err = check_stderr(p, sel, exclude=exclude) + if not status: + errthr_failed = True + local_failed = True + log_print("Error detected in \"" + name + "\":\n") + for e in err: + log_print(" " + e + "\n") + time.sleep(random.uniform(0.001, 0.1)) def errthr_start(): global errthr_sigstop @@ -129,12 +135,13 @@ def errthr_start(): errthr_sigstop = False errthr_failed = False for thr in errthr_objs: + thr.daemon = True thr.start() -def errthr_create(cp, exclude = None): +def errthr_create(cp, name, exclude = None): global errthr_objs - for p in cp: - errthr_objs.append(Thread(target = thr_check_stderr, args=(p, exclude))) + for i in range(len(cp)): + errthr_objs.append(Thread(target = thr_check_stderr, args=(cp[i], name[i], exclude))) def errthr_stop(): global errthr_objs diff --git a/scripts/run.py b/scripts/run.py index 39fc3f6..c8b3e92 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -13,15 +13,15 @@ import libpar as par import libtc as tc # load_gen -loadgen_load = "fixed:16" +loadgen_load = "fixed:0" # pkt_pad jumbo_frame_threshold = 1518 pkt_pads = [ - "0", - # "256", - # "512", - # "1024", + "0", + "256", + "512", + "1024", #"1518", #"2048", #"4096", @@ -30,11 +30,11 @@ pkt_pads = [ ] pkt_pads_depth = {} -pkt_pads_depth["0"] = "0" +pkt_pads_depth["0"] = "32" pkt_pads_depth["256"] = "16" pkt_pads_depth["512"] = "8" pkt_pads_depth["1024"] = "4" -pkt_pads_depth["1518"] = "6" +pkt_pads_depth["1518"] = "4" pkt_pads_depth["2048"] = "2" pkt_pads_depth["4096"] = "1" pkt_pads_depth["8192"] = "1" @@ -80,41 +80,25 @@ root_dir = os.path.join(file_dir,"..") sample_filename = "sample.txt" affinity = [ - #"0x20", - #"0x2000000", - # "0xA0", - # "0xAA0", - # "0xAAAA0", - # "0xAAAAA", - # "0xAAAAAA", - # "0x2000000", - # "0xA000000", - # "0xAA000000", - # "0xAAAA000000", - # "0xAAAAA000000", - # "0xAAAAAA000000", - "0x2000002", - "0xA00000A", - "0xAA0000AA", - "0xAAAA00AAAA", - "0xAAAAAAAAAAAA" + #"1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47", + "49,51,53,55,57,59,61,63,65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95" ] master = ["skylake2.rcs.uwaterloo.ca"] master_spec = ["192.168.123.10@3c:15:fb:c9:f3:36"] -master_cpumask = "0x8" # 1 thread +master_cpumask = "2" # 1 thread -server = ["skylake3.rcs.uwaterloo.ca"] -server_spec = ["192.168.123.9@3c:15:fb:c9:f3:4b"] +server = ["icelake2-int.rcs.uwaterloo.ca"] +server_spec = ["192.168.123.9@3c:ec:ef:61:39:f3"] -clients = ["skylake6.rcs.uwaterloo.ca", "skylake7.rcs.uwaterloo.ca"] # "skylake8.rcs.uwaterloo.ca"] -client_spec = ["192.168.123.11@3c:15:fb:62:9b:2f", "192.168.123.12@3c:15:fb:c9:f3:44"] # "192.168.123.13@3c:15:fb:62:9c:be"] -client_cpumask = "0xAAAAAA" +clients = ["skylake3.rcs.uwaterloo.ca", "skylake5.rcs.uwaterloo.ca", "skylake6.rcs.uwaterloo.ca"]# "skylake7.rcs.uwaterloo.ca", "skylake8.rcs.uwaterloo.ca"] +client_spec = ["192.168.123.12@3c:15:fb:c9:f3:4b", "192.168.123.13@3c:15:fb:c9:f3:28", "192.168.123.14@3c:15:fb:62:9b:2f"] #, "192.168.123.13@3c:15:fb:62:9c:be"] +client_cpumask = "1,3,5,7,9,11,13,15,17,19,21,23" client_rage_quit = 1000 #1s -warmup = 10 -duration = 10 -cooldown = 0 +warmup = 5 +duration = 20 +cooldown = 5 cacheline = 0 SSH_PARAM = "-o StrictHostKeyChecking=no -p77" SSH_USER = "oscar" @@ -147,7 +131,7 @@ def get_client_str(): return ret def calc_client_ld(ld : int): - return 0 if ld == 0 else ((ld - master_qps) / len(clients)) + return 0 if ld == 0 else (int)((ld - master_qps) / len(clients)) def run_exp(affinity : str, ld : int, pkt_pad : str, aff_idx : int): while True: @@ -169,6 +153,7 @@ def run_exp(affinity : str, ld : int, pkt_pad : str, aff_idx : int): # start clients tc.log_print("Starting clients...") sclt = [] + sclt_name = [] for i in range(len(clients)): client_cmd = "sudo " + test_dir + "/rat --log-level lib.eal:err -- -S -A " + client_cpumask + \ " -i exponential " + \ @@ -184,8 +169,9 @@ def run_exp(affinity : str, ld : int, pkt_pad : str, aff_idx : int): client_cmd += " -J " tc.log_print(client_cmd) sclt.append(tc.remote_exec([clients[i]], client_cmd, blocking=False)[0]) + sclt_name.append(clients[i]) - time.sleep(1) + time.sleep(5) # start master tc.log_print("Starting master...") master_cmd = "sudo " + test_dir + "/cat --log-level lib.eal:err -- " + \ @@ -205,19 +191,24 @@ def run_exp(affinity : str, ld : int, pkt_pad : str, aff_idx : int): p = sp[0] # launch stderr monitoring thread - exclude = None - tc.errthr_create(sp, exclude) + exclude = ["Pseudo-terminal"] + tc.errthr_create([p], master, exclude) if not client_only: - tc.errthr_create(ssrv, exclude) - tc.errthr_create(sclt, exclude) + tc.errthr_create(ssrv, server, exclude) + tc.errthr_create(sclt, sclt_name, exclude) tc.errthr_start() success = False cur = 0 + # selec = select.poll() + # selec.register(p.stdout, select.POLLIN) while True: # either failed or timeout # we use failure detection to save time for long durations - if tc.errthr_get_failed() or cur >= int(warmup + duration) * 3 : + if tc.errthr_get_failed() or cur >= (warmup + duration) * 3: break + + # while selec.poll(1): + # print(p.stdout.readline()) if p.poll() != None: success = True @@ -293,7 +284,7 @@ def main(): elif opt in ('-c'): client_only=True - tc.init("~/results.d/numam/" + output_dirname + "_" + datetime.datetime.now().strftime('%Y%m%d%H%M%S')) + tc.init("~/results.d/numam_neo/" + output_dirname + "_" + datetime.datetime.now().strftime('%Y%m%d%H%M%S')) tc.log_print("Configuration:\n" + \ "hostfile: " + ("None" if hostfile == None else hostfile) + "\n" \ diff --git a/scripts/compile_dpdk.sh b/scripts/setup_dpdk.sh similarity index 61% rename from scripts/compile_dpdk.sh rename to scripts/setup_dpdk.sh index ae2fc1f..a287dc5 100755 --- a/scripts/compile_dpdk.sh +++ b/scripts/setup_dpdk.sh @@ -1,14 +1,12 @@ #!/bin/sh dpdk_dir="/dpdk" root="$(dirname "$0")/.." -servers="skylake2.rcs.uwaterloo.ca skylake3.rcs.uwaterloo.ca skylake6.rcs.uwaterloo.ca skylake7.rcs.uwaterloo.ca skylake8.rcs.uwaterloo.ca" -rsync_flags="-vchr" +servers="skylake3.rcs.uwaterloo.ca" # skylake5.rcs.uwaterloo.ca skylake6.rcs.uwaterloo.ca skylake7.rcs.uwaterloo.ca skylake8.rcs.uwaterloo.ca" +rsync_flags="-az" ssh_args="-o StrictHostKeyChecking=no -p77" user=$1 -iface="ixl0" - if [ -z $user ] then @@ -21,7 +19,7 @@ compile() { # separate these functions because we might change kernel (reboot) without needing to recompile echo "====================$1====================" #ssh $(echo $ssh_args $user@$1) "sudo reboot" - ssh $(echo $ssh_args $user@$1) "sudo sh -c \"mkdir -p $dpdk_dir; cd $dpdk_dir; git clone https://git.quacker.org/d/numam-dpdk; cd numam-dpdk; git reset --hard; git clean -f; rm -rf build; meson -Denable_kmods=true build; cd build; ninja install\"" + ssh $(echo $ssh_args $user@$1) "sudo sh -c \"rm -rf $dpdk_dir; mkdir -p $dpdk_dir; cd $dpdk_dir; git clone https://git.quacker.org/d/numam-dpdk; cd numam-dpdk; git checkout releases-13.0; meson -Denable_kmods=true build; cd build; ninja install\"" wait echo "$1 Done." echo "" diff --git a/scripts/setup_program.sh b/scripts/setup_program.sh new file mode 100755 index 0000000..58c27d4 --- /dev/null +++ b/scripts/setup_program.sh @@ -0,0 +1,36 @@ +#!/bin/sh +dpdk_dir="/numam.d" +root="$(dirname "$0")/.." +servers="icelake2-int.rcs.uwaterloo.ca" # skylake7.rcs.uwaterloo.ca skylake8.rcs.uwaterloo.ca icelake2-int.rcs.uwaterloo.ca" +rsync_flags="-rv -e \"ssh -p77\"" +ssh_args="-o StrictHostKeyChecking=no -p77" + +user=$1 + + +if [ -z $user ] +then + user=$(whoami) +fi + +echo "USER: $user" + +compile() { + # separate these functions because we might change kernel (reboot) without needing to recompile + echo "====================$1====================" + ssh $(echo $ssh_args $user@$1) "sudo sh -c \"rm -rf $dpdk_dir; mkdir $dpdk_dir; chmod 777 $dpdk_dir\"" + rsync -rv -e "ssh -p77" $root/ $user@$1:$dpdk_dir/ + ssh $(echo $ssh_args $user@$1) "sudo sh -c \"cd $dpdk_dir; rm -rf build; mkdir build; cd build; cmake ../; make -j8 khat cat rat\"" + wait + echo "$1 Done." + echo "" +} + +i=0 +for server in $servers +do + i=$(expr $i + 1) + compile "$server" & +done + +wait \ No newline at end of file diff --git a/scripts/storage/parse.py b/scripts/storage/parse.py index 3ca3e8b..aa7f0f1 100644 --- a/scripts/storage/parse.py +++ b/scripts/storage/parse.py @@ -9,50 +9,104 @@ import getopt import math import concurrent.futures as CF +columns = [ + ("Req per second", "rps", ".2f"), + ("Bytes per second", "bps", ".2f"), + ("Average Latency", "lat_avg", ".2f"), + ("50th Latency", "lat_50", ".0f"), + ("95th Latency", "lat_95", ".0f"), + ("99th Latency", "lat_99", ".0f"), + ("Latency stddev", "lat_std", ".2f") +] + +TIME = 30 +REQ_SZ = 4096 + +class DatObj: + def __init__(self, raw : list, time : int, req_sz : int): + self.raw = raw + self.rps = len(raw) / time + self.bps = self.rps * req_sz + self.lat_avg = np.average(self.raw) + self.lat_99 = np.percentile(self.raw, 99) + self.lat_95 = np.percentile(self.raw, 95) + self.lat_50 = np.percentile(self.raw, 50) + self.lat_std = np.std(self.raw) + +def parse_file(lines : list, time : int, req_sz : int) -> DatObj : + raw = [] + for line in lines: + if len(line) > 0: + raw.append(int(line)) + return DatObj(raw, time, req_sz) + +def output_col(): + ret = "Benchmark" + for name,_,_ in columns: + ret = ret + "," + name + "," + name + " (NUMA)" + "," + "% change" + return ret + +def get_attr_or_none(obj, attr): + if (obj != None): + val = getattr(obj, attr) + else: + val = None + return val + +def output_objs(name: str, obj : DatObj, obj_numa : DatObj): + ret = name + for _, attr, fmt in columns: + val = get_attr_or_none(obj, attr) + val_numa = get_attr_or_none(obj_numa, attr) + + ret = ret + "," + (format(val, fmt) if val != None else "N/A") + ret = ret + "," + (format(val_numa, fmt) if val_numa != None else "N/A") + + if val == None or val_numa == None: + ret = ret + "," + "N/A" + else: + ret = ret + "," + format((val_numa - val) / val * 100, ".2f") + "%" + return ret + +def process_file(f : str, obj_map): + with open(f, "r") as fp: + lines = fp.readlines() + + bench_name = os.path.basename(f) + obj_map[bench_name] = parse_file(lines, TIME, REQ_SZ) + print("Processed file " + f + ". Benchmark name: " + bench_name) + +def process_dir(path : str, obj_map): + files = [os.path.abspath(os.path.join(path, x)) for x in os.listdir(path)] + for f in files: + if (".sh" in f): + continue + if (os.path.isfile(f)): + process_file(f, obj_map) + def main(): datdir = None - options = getopt.getopt(sys.argv[1:], 'f:')[0] + options = getopt.getopt(sys.argv[1:], 'd:')[0] for opt, arg in options: - if opt in ('-f'): + if opt in ('-d'): datdir = arg if datdir == None: - raise Exception("Must specify -f parameter") + raise Exception("Must specify -d parameter") - with open(datdir) as file: - lines = file.readlines() + obj_map = dict() + process_dir(datdir, obj_map) - datapts = [] - for line in lines: - if len(line) > 0: - datapts.append(int(line)) - - - runtime = 10 - req = len(datapts) - blk_size = 4096 - bps = blk_size * req - - - - avg_lat = np.average(datapts) - tail99_lat = np.percentile(datapts, 99) - tail95_lat = np.percentile(datapts, 95) - med_lat = np.percentile(datapts, 50) - std_dev = np.std(datapts) - - print("Runtime: " + str(runtime) + "s\n" - "Requests: " + str(req) + "\n" - "Request size: " + str(blk_size) + " bytes\n" - "Request per second: " + str(int(req/runtime)) + "\n" - "Bytes per second: " + str(bps) + " bytes = " + str(int(bps/1024/1024)) + " MB\n" - "Average Latency: " + str(int(avg_lat)) + "\n" - "99th Latency: " + str(int(tail99_lat)) + "\n" - "95th Latency: " + str(int(tail95_lat)) + "\n" - "50th Latency: " + str(int(med_lat)) + "\n" - "stddev: " + str(std_dev) + "\n") + with open("results.csv", "w") as f: + f.write(output_col()) + f.write("\n") + for bench in obj_map: + if bench.endswith("_numa"): + continue + f.write(output_objs(bench, obj_map[bench], obj_map.get(bench+"_numa"))) + f.write("\n") if __name__ == "__main__": main() \ No newline at end of file diff --git a/scripts/storage/test_posix.sh b/scripts/storage/test_posix.sh new file mode 100644 index 0000000..1ea28d4 --- /dev/null +++ b/scripts/storage/test_posix.sh @@ -0,0 +1,19 @@ +# rand_read +sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P R,100 -Q 3 -o rand_read +sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P R,100 -Q 3 -o rand_read_numa + +# rand_write +sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P R,0 -Q 3 -o rand_write +sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P R,0 -Q 3 -o rand_write_numa + +# mono_read +sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P M,100 -Q 3 -o mono_read +sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P M,100 -Q 3 -o mono_read_numa + +# mono_write +sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P M,0 -Q 3 -o mono_write +sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P M,0 -Q 3 -o mono_write_numa + +# mixed +sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D /dev/nvd0 -P R,70 -Q 3 -o mixed_read +sudo /numam/code/build/birb_posix -m 0x2 -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D /dev/nvd0 -P R,70 -Q 3 -o mixed_read_numa \ No newline at end of file diff --git a/scripts/storage/test_spdk_bdev.sh b/scripts/storage/test_spdk_bdev.sh new file mode 100644 index 0000000..3b28845 --- /dev/null +++ b/scripts/storage/test_spdk_bdev.sh @@ -0,0 +1,19 @@ +# rand_read +sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read -k bdev +sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read_numa -k bdev + +# rand_write +sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write -k bdev +sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write_numa -k bdev + +# mono_read +sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read -k bdev +sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read_numa -k bdev + +# mono_write +sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write -k bdev +sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write_numa -k bdev + +# mixed +sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read -k bdev +sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read_numa -k bdev \ No newline at end of file diff --git a/scripts/storage/test_spdk_nvme.sh b/scripts/storage/test_spdk_nvme.sh new file mode 100644 index 0000000..3b28845 --- /dev/null +++ b/scripts/storage/test_spdk_nvme.sh @@ -0,0 +1,19 @@ +# rand_read +sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read -k bdev +sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,100 -Q 3 -o rand_read_numa -k bdev + +# rand_write +sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write -k bdev +sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,0 -Q 3 -o rand_write_numa -k bdev + +# mono_read +sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read -k bdev +sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,100 -Q 3 -o mono_read_numa -k bdev + +# mono_write +sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write -k bdev +sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P M,0 -Q 3 -o mono_write_numa -k bdev + +# mixed +sudo /numam/code/build/birb -m 0xAAAAAA -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read -k bdev +sudo /numam/code/build/birb -m 0xAAAAAA000000 -c /numam/nvme.json -t 35 -w 5 -I fixed -a 0x555555000000 -b 4096 -q 0 -D Nvme0n1 -P R,70 -Q 3 -o mixed_read_numa -k bdev \ No newline at end of file diff --git a/storage/birb.cc b/storage/birb.cc index dd4a86c..9b99eac 100644 --- a/storage/birb.cc +++ b/storage/birb.cc @@ -460,8 +460,8 @@ worker_thread_main(void * arg) ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: started...\n", ctx->tid); - /* random delay 0-100 ms */ - usleep(((rand() * nm_get_uptime_ns()) % 100) * 1000); + /* random delay 0-100 us */ + usleep(nm_get_uptime_ns() % 100); next_ts = get_cur_ts_nano(); @@ -477,7 +477,7 @@ worker_thread_main(void * arg) ctx->overhead_min = overhead; } - ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + ctx->overhead_avg; + ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + overhead; ctx->overhead_cnt++; ctx->overhead_avg /= ctx->overhead_cnt; } @@ -657,7 +657,7 @@ birb_main(void * arg1 UNUSED) CPU_SET(cur_core, &scpuset); pthread_attr_init(&attr); pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &scpuset); - rc = pthread_create(&ctx->sys_thread, nullptr, worker_thread_main, ctx); + rc = pthread_create(&ctx->sys_thread, &attr, worker_thread_main, ctx); if (rc != 0) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create sys thread: %d\n", rc); rc = EINVAL; diff --git a/storage/birb_posix.cc b/storage/birb_posix.cc index 46da5c3..de657e5 100644 --- a/storage/birb_posix.cc +++ b/storage/birb_posix.cc @@ -221,15 +221,15 @@ worker_thread_main(void * arg) ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, "thread %d: started...\n", ctx->tid); - /* random delay 0-100 ms */ - usleep(((rand() * nm_get_uptime_ns()) % 100) * 1000); + /* random delay 0-100 us */ + usleep(nm_get_uptime_ns() % 100); next_ts = get_cur_ts_nano(); while (true) { - uint64_t cur_loop_ts = get_cur_ts_nano(); + uint64_t cur_ts = get_cur_ts_nano(); if (last_loop_ts > 0) { - uint64_t overhead = cur_loop_ts - last_loop_ts; + uint64_t overhead = cur_ts - last_loop_ts; if (ctx->overhead_max < overhead) { ctx->overhead_max = overhead; } @@ -238,14 +238,13 @@ worker_thread_main(void * arg) ctx->overhead_min = overhead; } - ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + ctx->overhead_avg; + ctx->overhead_avg = ctx->overhead_avg * ctx->overhead_cnt + overhead; ctx->overhead_cnt++; ctx->overhead_avg /= ctx->overhead_cnt; } - last_loop_ts = cur_loop_ts; + last_loop_ts = cur_ts; // process io completion - uint64_t cur_ts = get_cur_ts_nano(); auto itr = prog_ios.begin(); while (itr != prog_ios.end()) { int err; @@ -301,10 +300,8 @@ worker_thread_main(void * arg) io_req->aio.aio_offset = a_offset; if(io_ctx.op == IOGEN_READ) { - io_req->aio.aio_lio_opcode = LIO_READ; rc = aio_read(&io_req->aio); } else { - io_req->aio.aio_lio_opcode = LIO_WRITE; rc = aio_write(&io_req->aio); } @@ -312,6 +309,7 @@ worker_thread_main(void * arg) ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "thread %d: failed to issue io %d, retrying...\n", ctx->tid, errno); } else { free_ios.pop_front(); + prog_ios.push_back(io_req); next_ts = next_ts + ia_gen->generate() * S2NS; } } @@ -434,7 +432,7 @@ birb_main() CPU_SET(cur_core, &scpuset); pthread_attr_init(&attr); pthread_attr_setaffinity_np(&attr, sizeof(cpuset_t), &scpuset); - rc = pthread_create(&ctx->sys_thread, nullptr, worker_thread_main, ctx); + rc = pthread_create(&ctx->sys_thread, &attr, worker_thread_main, ctx); if (rc != 0) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "main: failed to create sys thread: %d\n", rc); rc = EINVAL; diff --git a/storage/drivers/nvme.cc b/storage/drivers/nvme.cc index 65ec3ef..7e5108e 100644 --- a/storage/drivers/nvme.cc +++ b/storage/drivers/nvme.cc @@ -39,6 +39,7 @@ birb_nvme_driver::attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *t *ctx->ns = ns; *ctx->ctrlr = ctrlr; + ctx->valid = 1; } bool @@ -65,6 +66,7 @@ birb_nvme_driver::birb_nvme_driver(const char * dev_name) : status(BIRB_FAIL), ctx.ctrlr = &this->ctrlr; ctx.ns = &this->ns; ctx.dev_name = dev_name; + ctx.valid = 0; spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE); snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); @@ -75,6 +77,12 @@ birb_nvme_driver::birb_nvme_driver(const char * dev_name) : status(BIRB_FAIL), goto end; } + if (ctx.valid != 1) { + rc = EINVAL; + ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_nvme_driver: could not find device: %s\n", dev_name); + goto end; + } + if (spdk_nvme_ns_get_csi(this->ns) == SPDK_NVME_CSI_ZNS) { ntr(NTR_DEP_USER1, NTR_LEVEL_ERROR, "birb_nvme_driver: zoned nvme namespace is unsupported\n"); spdk_nvme_detach(this->ctrlr); diff --git a/test/ts.cc b/test/ts.cc new file mode 100644 index 0000000..e961642 --- /dev/null +++ b/test/ts.cc @@ -0,0 +1,84 @@ +#include +#include "defs.hh" +#include "nm.hh" +#include + +void test(const char * case_name, struct timespec * ts) +{ + uint64_t slow; + uint64_t fast; + + slow = get_uptime(); + if (nanosleep(ts, nullptr) != 0) { + perror("nanosleep() interrupted!"); + exit(-1); + } + slow = get_uptime() - slow; + + fast = nm_get_uptime_ns(); + if (nanosleep(ts, nullptr) != 0) { + perror("nanosleep() interrupted!"); + exit(-1); + } + fast = nm_get_uptime_ns() - fast; + + printf("%s: get_uptime(): %lu, nm_get_uptime_ns(): %lu\n", case_name, slow, fast); +} + + +int main() +{ + struct timespec ts; + nm_init(0); + // 1s + ts.tv_nsec = 0; + ts.tv_sec = 1; + test("1s", &ts); + + // 100ms + ts.tv_nsec = 100000000; + ts.tv_sec = 0; + test("100ms", &ts); + + // 10ms + ts.tv_nsec = 10000000; + ts.tv_sec = 0; + test("10ms", &ts); + + // 1ms + ts.tv_nsec = 1000000; + ts.tv_sec = 0; + test("1ms", &ts); + + // 100us + ts.tv_nsec = 100000; + ts.tv_sec = 0; + test("100us", &ts); + + // 10us + ts.tv_nsec = 10000; + ts.tv_sec = 0; + test("10us", &ts); + + // 1us + ts.tv_nsec = 1000; + ts.tv_sec = 0; + test("1us", &ts); + + // 100ns + ts.tv_nsec = 100; + ts.tv_sec = 0; + test("100ns", &ts); + + // 10ns + ts.tv_nsec = 10; + ts.tv_sec = 0; + test("10ns", &ts); + + // 1ns + ts.tv_nsec = 1; + ts.tv_sec = 0; + test("1ns", &ts); + + return 0; +} \ No newline at end of file diff --git a/util/memloadgen.cc b/util/memloadgen.cc index e3ea944..c17edb9 100644 --- a/util/memloadgen.cc +++ b/util/memloadgen.cc @@ -1,4 +1,5 @@ #include "nm.hh" +#include #include "ntr.h" #include #include @@ -20,13 +21,14 @@ int main(int argc, char * argv[]) ntr_init(); ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING); - unsigned long long mlg_bps = 0; + unsigned long long mlg_arrsz = 0; + uint32_t mlg_iter = -1; unsigned long long mlg_dmask = 0; unsigned long long mlg_cmask = 0; { int c; // parse arguments - while ((c = getopt(argc, argv, "hb:X:x:v")) != -1) { + while ((c = getopt(argc, argv, "hb:X:x:vi:")) != -1) { switch (c) { case 'v': ntr_set_level(NTR_DEP_USER1, @@ -36,7 +38,10 @@ int main(int argc, char * argv[]) usage(); exit(0); case 'b': - mlg_bps = strtoull(optarg, nullptr, 10); + mlg_arrsz = strtoull(optarg, nullptr, 10); + break; + case 'i': + mlg_iter = strtoul(optarg, nullptr, 10); break; case 'X': mlg_dmask = strtoull( @@ -53,7 +58,7 @@ int main(int argc, char * argv[]) } } - ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "MLG: [bps: %lld, threads: 0x%llx, domain: 0x%llx]\n", mlg_bps, mlg_cmask, mlg_dmask); + ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "MLG: [size: %llu, iter: %u, threads: 0x%llx, domain: 0x%llx]\n", mlg_arrsz, mlg_iter, mlg_cmask, mlg_dmask); // init nm if (nm_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) != 0) { @@ -62,18 +67,17 @@ int main(int argc, char * argv[]) } bool success = false; - memload_generator * mgen = new memload_generator(mlg_cmask, mlg_dmask, mlg_bps, &success); + memload_generator * mgen = new memload_generator(mlg_cmask, mlg_dmask, mlg_arrsz, mlg_iter, &success); mgen->start(); - - while(true) - { - unsigned long bps = mgen->get_bps(); - ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, - "main: MLG bps = %ld ~= %ldM\n", bps, bps / 1024 / 1024); - sleep(1); + while(!mgen->check_done()) { + usleep(10000); } + unsigned long bps = mgen->get_bps(); + ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, + "main: MLG bps = %ld ~= %ldM\n", bps, bps / 1024 / 1024); + return 0; }