diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e2b94a..46cbf43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,7 @@ target_compile_options(birb_posix PRIVATE ${CC_FLAGS}) target_link_libraries(birb_posix PRIVATE pthread ntr gen) add_executable(memloadgen util/memloadgen.cc) -target_link_libraries(memloadgen PRIVATE pthread gen ntr ${TOPO_LINK_LIBRARIES}) +target_link_libraries(memloadgen PRIVATE pthread gen ntr nms ${TOPO_LINK_LIBRARIES}) target_compile_options(memloadgen PRIVATE ${CC_FLAGS} ${TOPO_CFLAGS}) add_executable(nms_test tests/nms_test.c) diff --git a/inc/gen.hh b/inc/gen.hh index a4c8a3e..868a845 100644 --- a/inc/gen.hh +++ b/inc/gen.hh @@ -19,6 +19,7 @@ #include #include +#include #include #include "defs.hh" @@ -297,9 +298,7 @@ Generator *createFacebookIA(); class memload_generator { public: struct memload_generator_options { - constexpr static size_t ITERATION_MAX = -1; size_t chunk_size {512 * 1024 * 1024}; - size_t iteration {ITERATION_MAX}; int verbose {0}; bool shared_buffer {true}; }; @@ -318,18 +317,25 @@ class memload_generator { // stat keeping std::atomic num_trans; - uint64_t begin_ts; - uint64_t end_ts; + std::atomic * state; + std::atomic * init_num; }; std::vector thr_infos; - std::atomic end; - struct memload_generator_options opts; + std::atomic state; + std::atomic init_num; + static constexpr int STATE_RUN = 0; + static constexpr int STATE_RDY = 1; + static constexpr int STATE_END = 2; + static constexpr int STATE_INIT = 3; + static void *worker_thrd(void *_tinfo); + struct memload_generator_options opts; public: memload_generator(cpuset_t * threads, cpuset_t * target_domain, struct memload_generator_options * opt, bool *success); - uint64_t get_bps(); - bool check_done(); + uint64_t get_transactions(); + bool start(); + bool stop(); ~memload_generator(); }; diff --git a/inc/net/pkt.hh b/inc/net/pkt.hh index babcaba..16baf35 100644 --- a/inc/net/pkt.hh +++ b/inc/net/pkt.hh @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -152,10 +153,14 @@ str_to_netspec(char *str, struct net_spec *out) } constexpr static uint16_t PKT_TYPE_LOAD = 0; +constexpr static uint32_t LOAD_TYPE_CPU = 0; // arg0 = cpu time in us. arg1 = unused +constexpr static uint32_t LOAD_TYPE_MEM = 1; // arg0 = which thread to access. arg1 = how many cachelines to access +constexpr static uint32_t LOAD_TYPE_MAX = LOAD_TYPE_MEM + 1; struct pkt_payload_load { uint32_t epoch; - uint32_t which; // which word (thread) - uint32_t load; // how many cache lines + uint32_t type; // type of load + uint32_t arg0; + uint32_t arg1; }; constexpr static uint16_t PKT_TYPE_PROBE = 1; diff --git a/libgen/loadgen.cc b/libgen/loadgen.cc index bc79d90..fc6a3ed 100644 --- a/libgen/loadgen.cc +++ b/libgen/loadgen.cc @@ -20,17 +20,29 @@ memload_generator::worker_thrd(void *_tinfo) long tid; thr_self(&tid); + // wait for other threads to init + tinfo->init_num->fetch_add(1); + if (tinfo->opts->verbose) { fprintf( stdout, "memload_generator : running...\n", tid); } - tinfo->begin_ts = topo_uptime_ns(); - while (tinfo->num_trans.load() < tinfo->opts->iteration) { - memcpy((char *)tinfo->from_buffer, (char *)tinfo->to_buffer, tinfo->opts->chunk_size); - tinfo->end_ts = topo_uptime_ns(); - tinfo->num_trans.fetch_add(1); - } + while(true) { + switch (tinfo->state->load()) { + case STATE_RUN: + memcpy((char *)tinfo->from_buffer, (char *)tinfo->to_buffer, tinfo->opts->chunk_size); + tinfo->num_trans.fetch_add(1); + break; + case STATE_END: + goto end; + case STATE_RDY: + case STATE_INIT: + default: + break; + } + } +end: if (tinfo->opts->verbose) { fprintf( stdout, "memload_generator : exiting...\n", tid); @@ -41,7 +53,8 @@ memload_generator::worker_thrd(void *_tinfo) memload_generator::memload_generator(cpuset_t * threads, cpuset_t * target_domain, struct memload_generator_options * opt, bool *success) { *success = false; - end.store(0); + state.store(STATE_INIT); + init_num.store(0); std::memcpy(&this->opts, opt, sizeof(memload_generator_options)); int nextcore = CPU_FFS(threads) - 1; @@ -64,10 +77,9 @@ memload_generator::memload_generator(cpuset_t * threads, cpuset_t * target_domai cpuset_t cpuset; pthread_attr_t attr; - info->stop = &this->end; + info->state = &this->state; + info->init_num = &this->init_num; info->num_trans.store(0); - info->begin_ts = 0; - info->end_ts = 0; info->opts = &this->opts; if (opt->shared_buffer) { info->from_buffer = local_buffer; @@ -96,6 +108,10 @@ memload_generator::memload_generator(cpuset_t * threads, cpuset_t * target_domai tid++; } + while((uint)init_num.load() != thr_infos.size()); + + state.store(STATE_RDY); + *success = true; if (opts.verbose) { fprintf(stdout, @@ -105,31 +121,40 @@ memload_generator::memload_generator(cpuset_t * threads, cpuset_t * target_domai } bool -memload_generator::check_done() +memload_generator::start() { - bool done = true; - for (auto i : thr_infos) { - done = done && (i->num_trans.load() >= this->opts.iteration); + if (this->state.load() == STATE_RDY) { + this->state.store(memload_generator::STATE_RUN); + return true; } - return done; + return false; +} + +bool +memload_generator::stop() +{ + if (this->state.load() == STATE_RUN) { + this->state.store(memload_generator::STATE_RDY); + return true; + } + return false; } uint64_t -memload_generator::get_bps() +memload_generator::get_transactions() { uint64_t total_transactions = 0; - uint64_t total_time = 0; for (auto i : thr_infos) { total_transactions += i->num_trans.load(); - total_time = (i->end_ts - i->begin_ts) + total_time; } - return (double)(total_transactions * this->opts.chunk_size) / ((double)total_time / thr_infos.size() / S2NS); + return total_transactions; } memload_generator::~memload_generator() { + this->state.store(STATE_END); for (auto i : thr_infos) { - // XXX: free + // XXX: nms_free regions pthread_join(i->pthr, NULL); delete i; } diff --git a/net/cat.cc b/net/cat.cc index 938efe3..4e10804 100644 --- a/net/cat.cc +++ b/net/cat.cc @@ -79,6 +79,7 @@ struct options_t { std::atomic s_slave_recved { 0 }; std::atomic s_slave_loss { 0 }; uint32_t s_state { STATE_WAIT }; + bool s_hwtimestamp { true }; Generator *s_iagen { nullptr }; std::vector s_data; @@ -121,22 +122,30 @@ rx_add_timestamp(uint16_t port, uint16_t qidx __rte_unused, ->epoch); if (options.s_last_datapt != nullptr && options.s_last_datapt->epoch == epoch) { - if ((ret = rte_eth_timesync_read_rx_timestamp( - port, &ts, pkts[i]->timesync & 0x3)) == - 0) { - // has hw rx timestamp - options.s_last_datapt->clt_hw_rx = - ts.tv_sec * S2NS + ts.tv_nsec; - options.s_last_datapt->clt_sw_rx = now; - ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, - "rx_add_timestamp: tagged packet %p with sw: %lu hw: %lu.\n", - (void *)pkts[i], now, - options.s_last_datapt->clt_hw_rx); + if (options.s_hwtimestamp) { + if ((ret = rte_eth_timesync_read_rx_timestamp( + port, &ts, pkts[i]->timesync & 0x3)) == + 0) { + // has hw rx timestamp + options.s_last_datapt->clt_hw_rx = + ts.tv_sec * S2NS + ts.tv_nsec; + options.s_last_datapt->clt_sw_rx = now; + ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, + "rx_add_timestamp: tagged packet %p with sw: %lu hw: %lu.\n", + (void *)pkts[i], now, + options.s_last_datapt->clt_hw_rx); + } else { + rte_exit(EXIT_FAILURE, + "rx_add_timestamp: packet %p not tagged - hw ts not " + "available - %d.\n", + (void *)pkts[i], ret); + } } else { - rte_exit(EXIT_FAILURE, - "rx_add_timestamp: packet %p not tagged - hw ts not " - "available - %d.\n", - (void *)pkts[i], ret); + options.s_last_datapt->clt_sw_rx = now; + options.s_last_datapt->clt_hw_rx = 0; + ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, + "rx_add_timestamp: tagged packet %p with sw: %lu hw: (disabled).\n", + (void *)pkts[i], now); } } else { ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, @@ -472,17 +481,22 @@ pkt_loop() } if (options.s_state == STATE_SENT) { - // check if hw ts is read + // check if hw tx ts is read if (!read_tx) { int ret; struct timespec ts; - if ((ret = rte_eth_timesync_read_tx_timestamp( - options.portid, &ts)) == 0) { - ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, - "locore_main: read hw tx timestamp %lu.\n", - (ts.tv_nsec + ts.tv_sec * S2NS)); - options.s_last_datapt->clt_hw_tx = - ts.tv_nsec + ts.tv_sec * S2NS; + if (options.s_hwtimestamp) { + if ((ret = rte_eth_timesync_read_tx_timestamp( + options.portid, &ts)) == 0) { + ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, + "locore_main: read hw tx timestamp %lu.\n", + (ts.tv_nsec + ts.tv_sec * S2NS)); + options.s_last_datapt->clt_hw_tx = + ts.tv_nsec + ts.tv_sec * S2NS; + read_tx = true; + } + } else { + options.s_last_datapt->clt_hw_tx = 0; read_tx = true; } } @@ -847,6 +861,12 @@ main(int argc, char *argv[]) struct mem_conf mconf; portconf_get(options.portid, &pconf); + if (!pconf.timesync) { + options.s_hwtimestamp = false; + ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, + "main: timesync disabled. hw timestamp unavailable.\n "); + } + dconf.mtu = MAX_STANDARD_MTU; dconf.num_threads = 1; dconf.portid = options.portid; diff --git a/net/khat.cc b/net/khat.cc index a4d0bf1..10602b0 100644 --- a/net/khat.cc +++ b/net/khat.cc @@ -8,6 +8,8 @@ #include #include +#include +#include #include @@ -23,10 +25,11 @@ #include "ntr.h" -#include "gen.hh" +//#include "gen.hh" #include "net/netsup.hh" #include "net/pkt.hh" #include "nms.h" +#include "rte_byteorder.h" constexpr static unsigned int BURST_SIZE = 32; constexpr static unsigned int CACHELINE_SIZE = 64; @@ -71,13 +74,7 @@ struct options_t { false }; // setting this to true changes mbuf size and mtu int port_mtu { MAX_STANDARD_MTU }; - int thread_cacheline_cnt = { 128 }; - bool mlg_enabled { false }; - uint64_t mlg_arr_sz { 0 }; - cpuset_t mlg_cset = CPUSET_T_INITIALIZER(0x2); - cpuset_t mlg_dset = CPUSET_T_INITIALIZER(0x1); - int mlg_shared_buffer { 0 }; - memload_generator *mlg { nullptr }; + int thread_cacheline_cnt = { 1600 }; // 100MB data per thread uint16_t portid { 0 }; // states @@ -218,6 +215,31 @@ tx_add_timestamp(uint16_t port __rte_unused, uint16_t qidx __rte_unused, return nb_pkts; } +static void +worker_cpu_load(unsigned long us) +{ + uint64_t now = topo_uptime_ns(); + while(true) { + uint64_t cur = topo_uptime_ns(); + if (cur - now >= us * 1000) { + break; + } + } +} + +static void +worker_memory_load(int tid, uint32_t which, uint32_t load) +{ + uint32_t start_cacheline = which % (options.thread_cacheline_cnt * options.s_thr_info.size()); + uint32_t thrd = start_cacheline / options.thread_cacheline_cnt; + uint32_t start = start_cacheline % options.thread_cacheline_cnt; + struct thread_info * cur = options.s_thr_info.at(tid); + struct thread_info * tgt = options.s_thr_info.at(thrd); + for (uint32_t i = 0; i < load; i++) { + *(uint32_t *)cur->load_buffer = *(uint32_t *)((char *)tgt->cache_lines + ((start + i) % options.thread_cacheline_cnt) * CACHELINE_SIZE); + } +} + static int locore_main(void *ti) { @@ -331,23 +353,24 @@ locore_main(void *ti) // perform the load auto pld = (struct pkt_payload_load *) pkt_data->payload; - uint32_t which = rte_be_to_cpu_32(pld->which); - uint32_t load = rte_be_to_cpu_32(pld->load); - uint32_t start_cacheline = which % - (options.thread_cacheline_cnt * - options.s_thr_info.size()); - uint32_t thrd = start_cacheline / - options.thread_cacheline_cnt; - uint32_t start = start_cacheline % - options.thread_cacheline_cnt; - for (uint j = 0; j < load; j++) { - *(uint32_t *)tinfo->load_buffer = - (start + j) % - options.thread_cacheline_cnt; - } + + uint32_t load_type = rte_be_to_cpu_32(pld->type); + uint32_t load_arg0 = rte_be_to_cpu_32(pld->arg0); + uint32_t load_arg1 = rte_be_to_cpu_32(pld->arg1); + ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, - "locore_main : LOAD @ thread %d, start %d, load %d\n", - tinfo->tid, thrd, start, load); + "locore_main : LOAD type %d, arg0 %d, arg1 %d\n", + tinfo->tid, load_type, load_arg0, load_arg1); + + if (load_type == LOAD_TYPE_CPU) { + worker_cpu_load(load_arg0); + } else if (load_type == LOAD_TYPE_MEM) { + worker_memory_load(tinfo->tid, load_arg0, load_arg1); + } else { + ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, + "locore_main : unknown LOAD type %d, ignoring...", tinfo->tid, load_type); + break; + } // reply pkt_hdr_to_netspec(pkt_data, &src, @@ -473,13 +496,10 @@ dump_options() " verbosity: +%d\n" " thread count: %d\n" " ip: 0x%x\n" - " MLG: %s [arr_sz: %ld, thread cnt: %d, domain: %ld]\n" " jumbo frame: %d\n" " port id: %d\n", ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, options.num_threads, options.s_host_spec.ip, - options.mlg_enabled ? "on" : "off", options.mlg_arr_sz, - CPU_COUNT(&options.mlg_cset), CPU_FFS(&options.mlg_dset) - 1, options.jumbo_frame_enabled, options.portid); } @@ -506,7 +526,7 @@ main(int argc, char *argv[]) { int c; // parse arguments - while ((c = getopt(argc, argv, "hvA:H:mb:X:x:JSp:")) != -1) { + while ((c = getopt(argc, argv, "hvA:H:Jp:")) != -1) { switch (c) { case 'v': ntr_set_level(NTR_DEP_USER1, @@ -532,26 +552,10 @@ main(int argc, char *argv[]) } has_host_spec = true; break; - case 'm': - options.mlg_enabled = true; - break; - case 'b': - options.mlg_arr_sz = strtoull(optarg, nullptr, - 10); - break; - case 'X': - cpulist_to_cpuset(optarg, &options.mlg_dset); - break; - case 'x': - cpulist_to_cpuset(optarg, &options.mlg_cset); - break; case 'J': options.jumbo_frame_enabled = true; options.port_mtu = MAX_JUMBO_MTU; break; - case 'S': - options.mlg_shared_buffer = 1; - break; case 'p': options.portid = atoi(optarg); break; @@ -600,6 +604,7 @@ main(int argc, char *argv[]) if (!pconf.timesync) { ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING, "main: timesync disabled. hw timestamp unavailable.\n "); + options.s_hwtimestamp = false; } dconf.mtu = options.port_mtu; dconf.num_threads = options.num_threads; @@ -675,31 +680,8 @@ main(int argc, char *argv[]) } } - // init mlg - if (options.mlg_enabled) { - bool success = false; - memload_generator::memload_generator_options opts; - opts.chunk_size = options.mlg_arr_sz; - opts.iteration = - memload_generator::memload_generator_options::ITERATION_MAX; - opts.shared_buffer = options.mlg_shared_buffer; - opts.verbose = (ntr_get_level(NTR_DEP_USER1) - - NTR_LEVEL_WARNING) != 0; - options.mlg = new memload_generator(&options.mlg_cset, - &options.mlg_dset, &opts, &success); - if (!success) { - rte_exit(EXIT_FAILURE, "failed to init mlg\n"); - } - } - while (true) { usleep(S2US); - if (options.mlg_enabled) { - uint64_t bps = options.mlg->get_bps(); - ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG, - "main: MLG bps = %ld ~= %ldM\n", bps, - bps / 1024 / 1024); - } } // shouldn't get here @@ -714,8 +696,6 @@ main(int argc, char *argv[]) } } - delete options.mlg; - dpdk_cleanup(&dconf); return 0; diff --git a/net/libnetsup/portconf.cc b/net/libnetsup/portconf.cc index 83c9480..92031bd 100644 --- a/net/libnetsup/portconf.cc +++ b/net/libnetsup/portconf.cc @@ -15,7 +15,7 @@ static struct port_conf port_confs[] = { .rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM, .txoffload = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM, .rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_L2_PAYLOAD, - .timesync = true + .timesync = false }, { .driver_name = "net_ice", @@ -23,6 +23,13 @@ static struct port_conf port_confs[] = { .txoffload = RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE | RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM, .rss_hf = RTE_ETH_RSS_FRAG_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP | RTE_ETH_RSS_NONFRAG_IPV4_OTHER | RTE_ETH_RSS_L2_PAYLOAD, .timesync = true + }, + { + .driver_name = "net_ixgbe", + .rxoffload = RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM, + .txoffload = RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_IPV4_CKSUM, + .rss_hf = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_NONFRAG_IPV4_UDP, + .timesync = true } }; diff --git a/net/rat.cc b/net/rat.cc index 4536349..188de05 100644 --- a/net/rat.cc +++ b/net/rat.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -63,22 +64,14 @@ struct thread_info { std::atomic lost_pkts { 0 }; Generator *ia_gen { nullptr }; - Generator *load_gen { nullptr }; - std::random_device which_rd; - std::mt19937 which_rng; - std::uniform_int_distribution which_dice; + Generator *load_gen0 { nullptr }; + Generator *load_gen1 { nullptr }; std::mutex mtx; // this lock protects data shared between worker threads, i.e.: std::list recved_epochs; - thread_info() - : which_rd() - , which_rng(which_rd()) - , which_dice(std::uniform_int_distribution(0, UINT32_MAX)) - { - which_rng.seed(topo_uptime_ns()); - } + thread_info() = default; }; constexpr static int STATE_SYNC = 0; // waiting for SYNC @@ -86,13 +79,16 @@ constexpr static int STATE_SYNC_ACK = 1; // Waiting for sending SYNC_ACK constexpr static int STATE_RUNNING = 2; // Running constexpr static int STATE_FIN = 3; // FIN received +constexpr static int WORKLOAD_MAX_ARGS = 2; + struct options_t { unsigned int run_time { 5 }; // parameters int slave_mode { 0 }; uint32_t rage_quit_time { UINT32_MAX }; - char ia_gen[256] { "fixed" }; - char ld_gen[256] { "fixed:0" }; + char ia_gen[256] { "fixed:0" }; + char load_gen[WORKLOAD_MAX_ARGS][256] = {{"fixed:0"}, {"fixed:0"}}; + uint32_t workload_type {LOAD_TYPE_CPU}; uint32_t target_qps { 0 }; uint32_t depth { 1 }; struct net_spec server_spec { }; @@ -499,10 +495,9 @@ pkt_loop(struct thread_info *tinfo) } pld_load = (struct pkt_payload_load *)pkt_data->payload; - pld_load->load = rte_cpu_to_be_32( - tinfo->load_gen->generate()); - pld_load->which = rte_cpu_to_be_32( - tinfo->which_dice(tinfo->which_rng)); + pld_load->type = rte_cpu_to_be_32(options.workload_type); + pld_load->arg0 = rte_cpu_to_be_32((uint32_t)tinfo->load_gen0->generate()); + pld_load->arg1 = rte_cpu_to_be_32((uint32_t)tinfo->load_gen1->generate()); unsigned int epoch = epoch_mk(tinfo->id, cur_epoch); pld_load->epoch = rte_cpu_to_be_32(epoch); cur_epoch++; @@ -590,7 +585,9 @@ dump_options() " rage quit time = %ul\n" " slave mode = %d\n" " interarrival dist = %s\n" - " workload dist = %s\n" + " workload type = %d\n" + " workload arg0 = %s\n" + " workload arg1 = %s\n" " qps = %d\n" " host IP = 0x%x\n" " depth = %u\n" @@ -600,7 +597,7 @@ dump_options() " portid = %d\n", ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, options.run_time, options.s_num_threads, options.rage_quit_time, options.slave_mode, - options.ia_gen, options.ld_gen, options.target_qps, + options.ia_gen, options.workload_type, options.load_gen[0], options.load_gen[1], options.target_qps, options.s_host_spec.ip, options.depth, options.pkt_loss_delay_ms, options.jumbo_frame_enabled, options.pkt_pad_sz, options.portid); } @@ -617,7 +614,9 @@ usage() " -S: slave(rat) mode\n" " -A: affinity mask\n" " -i: inter-arrival time distribution\n" - " -w: workload distribution\n" + " -w: workload type\n" + " -w (repeated): workload arg0 distribution\n" + " -w (repeated): workload arg1 distribution\n" " -r: rage quit time (in ms)\n" " -q: target QPS\n" " -H: host net spec\n" @@ -649,6 +648,7 @@ main(int argc, char *argv[]) ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING); { int c; + int num_of_ws = 0; // parse arguments while ((c = getopt(argc, argv, "vht:s:SA:i:w:r:q:H:D:l:JP:p:")) != -1) { @@ -689,8 +689,17 @@ main(int argc, char *argv[]) sizeof(options.ia_gen) - 1); break; case 'w': - strncpy(options.ld_gen, optarg, - sizeof(options.ld_gen) - 1); + if (num_of_ws == 0) { + options.workload_type = strtol(optarg, NULL, 10); + if (options.workload_type >= LOAD_TYPE_MAX) { + rte_exit(EXIT_FAILURE, + "invalid workload type %s\n", optarg); + } + } else if (num_of_ws <= WORKLOAD_MAX_ARGS) { + strncpy(options.load_gen[num_of_ws - 1], optarg, 255); + } + + num_of_ws++; break; case 'r': options.rage_quit_time = strtol(optarg, nullptr, @@ -818,8 +827,9 @@ main(int argc, char *argv[]) unsigned int lcore_id = cpuset_idx - 1; tinfo = new thread_info; tinfo->ia_gen = createGenerator(options.ia_gen); - tinfo->load_gen = createGenerator(options.ld_gen); - if (tinfo->ia_gen == nullptr || tinfo->load_gen == nullptr) { + tinfo->load_gen0 = createGenerator(options.load_gen[0]); + tinfo->load_gen1 = createGenerator(options.load_gen[1]); + if (tinfo->ia_gen == nullptr || tinfo->load_gen0 == nullptr || tinfo->load_gen1 == nullptr) { rte_exit(EXIT_FAILURE, "invalid ia_gen or ld_gen string\n"); } @@ -886,7 +896,8 @@ main(int argc, char *argv[]) qps, total_recv, total_loss); for (auto each : options.s_thr_info) { - delete each->load_gen; + delete each->load_gen0; + delete each->load_gen1; delete each->ia_gen; delete each; } diff --git a/scripts/iperf.py b/scripts/iperf.py new file mode 100644 index 0000000..aaa52af --- /dev/null +++ b/scripts/iperf.py @@ -0,0 +1,310 @@ +from http import server +import subprocess as sp +import time +import os +import datetime +import sys +import getopt + +import libpar as par +import libtc as tc +import libmechspec as mechspec + +LOG_FILEPATH = "/iperflogs" +BIN_PATH = "/iperftls" +MLG_PATH = "/numam/build/bin/memloadgen" +EXE_PATH = BIN_PATH + "/src/iperf3" +SSL_CERT = "/certs/server.crt" +SSL_PKEY = "/certs/server.key" +#SERVER = ["skylake2.rcs.uwaterloo.ca"] +#CLIENTS = ["skylake1.rcs.uwaterloo.ca", "skylake3.rcs.uwaterloo.ca", "skylake6.rcs.uwaterloo.ca", "skylake7.rcs.uwaterloo.ca"] +SERVER_PORT_START = 8050 + +SERVER = ["icelake2-int.rcs.uwaterloo.ca"] +CLIENTS = ["icelake1-int.rcs.uwaterloo.ca", "milan1-int.rcs.uwaterloo.ca", "milan2-int.rcs.uwaterloo.ca"] + +SERVER_DAT = ["192.168.100.102"] +CLIENTS_DAT = ["192.168.100.101", "192.168.100.103", "192.168.100.104"] + +MEMLOAD_BLKSZ = 1024*1024*512 + + +# paths +file_dir : str = os.path.dirname(os.path.realpath(__file__)) +root_dir : str = os.path.join(file_dir,"..") + +class Conf: + def __init__(self, affinity, sendfile, tls, ktls, memloadgen, filepath): + self.affinity = affinity + self.ktls = ktls + self.sendfile = sendfile + self.tls = tls + self.memloadgen = memloadgen + self.filepath = filepath + + def to_string(self): + return f"affinity.{self.affinity}_sendfile.{self.sendfile}_tls.{self.tls}_ktls.{self.ktls}_memloadgen.{self.memloadgen != None}_filepath.{self.filepath.replace('/','-')}" + +class ArgTypes: + def __init__(self): + self.all = [[]] + + def add_arg(self, arg : list[list[any]]): + new_all = [] + for val in arg: + for exst in self.all: + tmp = exst.copy() + tmp.extend(val) + new_all.append(tmp) + self.all = new_all + + def get_fields(self) -> list[list[any]]: + return self.all + + +#SERVER_AFFINITY = ["25,27,29,31,33,35,37,39,41,43,45,47"] +#SERVER_AFFINITY = ["1,3,5,7,9,11,13,15,17,19,21,23"] +#SERVER_AFFINITY = ["1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47"] +CLIENTS_AFFINITY = ["1,3,5,7", "1,3,5,7", "1,3,5,7"] +arg_types : ArgTypes = ArgTypes() +# affinity +arg_types.add_arg([["1,3,5,7,9,11,13,15,17,19,21,23"]]) +#arg_types.add_arg([["49,51,53,55,57,59,61,63,65,67,69,71"]]) +# sendfile/tls/ktls +arg_types.add_arg([ + [False, False, False], + [True, False, False], + [False, True, False], + [False, True, True], + [True, True, True], + ]) +# memloadgen +arg_types.add_arg([[["73,75,77,79,81,83,85,87,89,91,93,95_0", "25,27,29,31,33,35,37,39,41,43,45,47_1"]], [None]] ) +#arg_types.add_arg([[None]] ) +# filepath +arg_types.add_arg([["/mnt/zroot/large_file_#p"], ["/nvdimm/large_file_#p"]]) +#arg_types.add_arg([["/tmpfs/large_file_#p"]]) + +def parse_comma_list(input : str): + return input.split(",") + +def setup_all(): + setup_cmd : str = f''' sudo pkg install -y openssl-devel vim curl wget gmake cmake openssl-devel llvm gcc; + sudo pkg remove -y iperf iperf3; + sudo rm -rf { BIN_PATH }; + sudo mkdir -p { BIN_PATH }; + sudo git clone https://git.quacker.org/d/iperf3-tls { BIN_PATH }; + cd { BIN_PATH }; + sudo ./configure; + sudo make -j8 ''' + ssrv : list[tuple[str, sp.Popen]] = [] + tc.log_print(f"Setting up {SERVER[0]}...") + ssrv.append((SERVER[0], tc.remote_exec(SERVER, setup_cmd, blocking=False, check=False)[0])) + + for s in CLIENTS: + tc.log_print(f"Setting up {s}...") + ssrv.append((s, tc.remote_exec([s], setup_cmd, blocking=False, check=False)[0])) + + for p in ssrv: + _ , stderr = p[1].communicate() + if p[1].returncode != 0: + print(f"\n{ p[0] } failed. stderr:\n{stderr.decode()}\n") + else: + print(f"\n{ p[0] } succeeded\n") + + + +def stop_all(): + # stop clients + tc.log_print("Stopping clients...") + tc.remote_exec(CLIENTS, "sudo killall -9 iperf3; sudo killall -9 memloadgen", check=False) + + tc.log_print("Stopping server...") + tc.remote_exec(SERVER, "sudo killall -9 iperf3; sudo killall -9 memloadgen", check=False) + +def prepare_logdir(): + tc.log_print("Preparing server log directory...") + prep_cmd = "sudo rm -rf " + LOG_FILEPATH + tc.log_print(prep_cmd) + tc.remote_exec(SERVER, prep_cmd, check=False) + + time.sleep(0.1) + + prep_cmd = "sudo mkdir -p " + LOG_FILEPATH + tc.log_print(prep_cmd) + tc.remote_exec(SERVER, prep_cmd, check=True) + +def run_exp(conf : Conf): + stop_all() + while True: + prepare_logdir() + + ssrvs=[] + ssrv_names=[] + cur_srv_proc = 0 + + smlg = [] + smlg_names = [] + if conf.memloadgen != None: + for emem in conf.memloadgen: + mlg_cmd = "sudo " + mlg_cpu = emem.split("_")[0] + mlg_dom = emem.split("_")[1] + mlg_cmd += f"{MLG_PATH} -b {MEMLOAD_BLKSZ} -i -1 -s {mlg_cpu} -d {mlg_dom}" + + tc.log_print("Starting memloadgen...") + tc.log_print(mlg_cmd) + smlg.append(tc.remote_exec(SERVER, mlg_cmd, blocking=False)[0]) + smlg_names.append("memloadgen") + time.sleep(0.1) + + for eaff in parse_comma_list(conf.affinity): + server_cmd = "sudo " + server_cmd += f"{EXE_PATH} -s -p " + str(SERVER_PORT_START + cur_srv_proc) + \ + " -F " + conf.filepath.replace("#p", str(cur_srv_proc)) + \ + " -A " + eaff + \ + " -J --logfile " + LOG_FILEPATH + "/" + eaff + ".txt" + if conf.tls: + server_cmd += f" --enable-ssl-over-tcp --ssl-certificate {SSL_CERT} --ssl-private-key {SSL_PKEY}" + if conf.ktls: + server_cmd += " --enable-ssl-ktls" + # start server + tc.log_print("Starting server proc " + str(cur_srv_proc) + "...") + tc.log_print(server_cmd) + ssrv = tc.remote_exec(SERVER, server_cmd, blocking=False)[0] + ssrvs.append(ssrv) + ssrv_names.append("Server " + str(cur_srv_proc)) + cur_srv_proc = cur_srv_proc + 1 + time.sleep(0.1) + + time.sleep(5) + # start clients + tc.log_print("Starting clients...") + sclts = [] + sclt_names = [] + clt_number = 0 + for i in range(len(CLIENTS)): + client_aff = CLIENTS_AFFINITY[i] + for eaff in parse_comma_list(client_aff): + client_cmd = f"sudo {EXE_PATH} -c " + SERVER_DAT[0] + \ + " -p " + str(SERVER_PORT_START + clt_number) + \ + " --connect-timeout 1000" + \ + " -A " + eaff + \ + " -t 30" + \ + " -P 4" + \ + " -R" + \ + " -N" + \ + " -4" + \ + " -O 10" + \ + " -J --logfile /dev/null" + if conf.tls: + client_cmd += f" --enable-ssl-over-tcp --ssl-certificate {SSL_CERT} --ssl-private-key {SSL_PKEY}" + if conf.ktls: + client_cmd += " --enable-ssl-ktls" + if conf.sendfile: + client_cmd += " -Z" + + tc.log_print(CLIENTS[i] + ":\n" + client_cmd) + sclts.append(tc.remote_exec([CLIENTS[i]], client_cmd, blocking=False)[0]) + sclt_names.append(CLIENTS[i] + "@" + eaff) + clt_number = clt_number + 1 + time.sleep(0.1) + # launch stderr monitoring thread + exclude = ["Pseudo-terminal"] + tc.errthr_create(sclts, sclt_names, exclude) + tc.errthr_create(ssrvs, ssrv_names, exclude) + if (conf.memloadgen != None): + tc.errthr_create(smlg, smlg_names, exclude) + tc.errthr_start() + cur = 0 + # selec = select.poll() + # selec.register(p.stdout, select.POLLIN) + success = False + while not success: + success = False + # either failed or timeout + # we use failure detection to save time for long durations + if tc.errthr_get_failed(): + break + + # while selec.poll(1): + # print(p.stdout.readline()) + + success = True + for p in sclts: + if p.poll() == None: + success = False + break + + time.sleep(1) + cur = cur + 1 + + stop_all() + tc.errthr_stop() + tc.log_print("Cooling down...") + time.sleep(5) + + if success: + flush_netresult() + break + + +def flush_netresult(): + tc.log_print("Keeping results...") + # copy log directory back to machine + log_output = tc.get_odir() + os.makedirs(log_output, exist_ok=True) + scp_cmd = "scp -P77 -r " + tc.get_ssh_user() + "@" + SERVER[0] + ":" + LOG_FILEPATH + " " + log_output + "/" + tc.log_print(scp_cmd) + sp.check_call(scp_cmd, shell=True) + + # parse results + log_output = log_output + "/" + os.path.basename(LOG_FILEPATH) + logs = os.listdir(log_output) + logs_bytes = [] + tc.log_print("Processing " + str(len(logs)) + " logs ...") + for log in logs: + if os.path.isfile(log_output + "/" + log): + with open(log_output + "/" + log, "r") as f: + logs_bytes.append(f.read()) + parser = par.iperf_json_parser(logs_bytes) + tc.log_print("Aggregated throughput: " + "{:.2f}".format(parser.aggregate_egress_bps / 8.0) + " B/s " + \ + "{:.2f}".format(parser.aggregate_egress_bps / 8.0 / 1024.0 / 1024.0) + " MB/s " + \ + "{:.2f}".format(parser.aggregate_egress_bps / 8.0 / 1024.0 / 1024.0 / 1024.0) + " GB/s") + + +def main(): + tc.set_ssh_param("-o StrictHostKeyChecking=no -p77") + tc.set_ssh_user("oscar") + output_dirname = "run" + + options = getopt.getopt(sys.argv[1:], 'sS')[0] + for opt, arg in options: + if opt in ('-s'): + stop_all() + return + elif opt in ('-S'): + setup_all() + return + + tc.init("~/results.d/iperf3/" + output_dirname + "_" + datetime.datetime.now().strftime('%Y%m%d%H%M%S')) + cpcmd = "cp " + __file__ + " " + tc.get_odir() + "/" + tc.log_print(cpcmd) + sp.check_call(cpcmd, shell=True) + + args = arg_types.get_fields() + confs : list[Conf] = [] + for arg in args: + confs.append(Conf(*arg)) + + print(f"{len(confs)} configs to run:") + for conf in confs: + print(conf.to_string()) + + for conf in confs: + tc.begin(conf.to_string()) + run_exp(conf) + tc.end() + + stop_all() +main() \ No newline at end of file diff --git a/scripts/libs/libmechspec.py b/scripts/libs/libmechspec.py new file mode 100644 index 0000000..fbcb000 --- /dev/null +++ b/scripts/libs/libmechspec.py @@ -0,0 +1,25 @@ + +class NetSpec: + def __init__(self, fqdn, ip, mac) -> None: + self.mac = mac + self.ip = ip + self.fqdn = fqdn + self.netspec = ip + "@" + mac + + +class LabNetSpecs: + def __init__(self) -> None: + self.SKYLAKE1_10G = NetSpec(fqdn = "skylake1.rcs.uwaterloo.ca",ip = "192.168.123.11", mac = "") + self.SKYLAKE2_10G = NetSpec(fqdn = "skylake2.rcs.uwaterloo.ca",ip = "192.168.123.12", mac = "3c:15:fb:c9:f3:36") + self.SKYLAKE3_10G = NetSpec(fqdn = "skylake3.rcs.uwaterloo.ca",ip = "192.168.123.13", mac = "3c:15:fb:c9:f3:4b") + self.SKYLAKE4_10G = NetSpec(fqdn = "skylake4.rcs.uwaterloo.ca",ip = "192.168.123.14", mac = "") + self.SKYLAKE5_10G = NetSpec(fqdn = "skylake5.rcs.uwaterloo.ca",ip = "192.168.123.15", mac = "3c:15:fb:c9:f3:28") + self.SKYLAKE6_10G = NetSpec(fqdn = "skylake6.rcs.uwaterloo.ca",ip = "192.168.123.16", mac = "3c:15:fb:62:9b:2f") + self.SKYLAKE7_10G = NetSpec(fqdn = "skylake7.rcs.uwaterloo.ca",ip = "192.168.123.17", mac = "3c:15:fb:c9:f3:44") + self.SKYLAKE8_10G = NetSpec(fqdn = "skylake8.rcs.uwaterloo.ca",ip = "192.168.123.18", mac = "3c:15:fb:62:9c:be") + self.MILAN1_100G = NetSpec(fqdn = "milan1-int.rcs.uwaterloo.ca",ip = "192.168.123.19", mac = "") + self.MILAN1_10G = NetSpec(fqdn = "milan1-int.rcs.uwaterloo.ca",ip = "192.168.123.19", mac = "a0:42:3f:4d:cb:bc") + self.ICELAKE2_100G = NetSpec(fqdn = "icelake2-int.rcs.uwaterloo.ca",ip = "192.168.123.20", mac = "") + self.ICELAKE2_10G = NetSpec(fqdn = "icelake2-int.rcs.uwaterloo.ca",ip = "192.168.123.20", mac = "") + +LAB = LabNetSpecs() \ No newline at end of file diff --git a/scripts/libs/libpar.py b/scripts/libs/libpar.py index 9b740f2..75b0b53 100644 --- a/scripts/libs/libpar.py +++ b/scripts/libs/libpar.py @@ -1,6 +1,42 @@ import json import numpy as np +class iperf_json_parser: + def __init__(self, inputs): + self.aggregate_egress_bps = 0 + self.jsonobjs = [] + for input in inputs: + jsobj = json.loads(input) + self.jsonobjs.append(jsobj) + each_bps = jsobj['end']['sum_sent']['bits_per_second'] + self.aggregate_egress_bps += each_bps + + +class pmc_parser: + def __init__(self, input): + self.raw = input + lines = input.split('\n') + if len(lines) < 2: + raise Exception("Invalid pmc file format") + + spec = lines[0].strip() + if (spec[0] != '#'): + raise Exception("Invalid pmc file spec line: \"" + lines[0] + "\"") + spec = spec.split(' ') + self.cores = len(spec) - 1 + elements = spec[1].split('/') + if (len(elements) != 3): + raise Exception("Invalid pmc file spec line: \"" + lines[0] + "\"") + self.counter = elements[2].strip() + + last_line = lines[-1] + elements = last_line.split(' ') + total = 0 + for e in elements: + if (len(e) > 0): + total += int(e) + self.count = total + class khat_parser: class pt: def __init__(self): diff --git a/scripts/libs/libtc.py b/scripts/libs/libtc.py index 9cf1e80..576ea51 100644 --- a/scripts/libs/libtc.py +++ b/scripts/libs/libtc.py @@ -66,11 +66,19 @@ def set_ssh_param(para): global ssh_param ssh_param = para +def get_ssh_param(): + global ssh_param + return ssh_param + ssh_user = None def set_ssh_user(user): global ssh_user ssh_user = user +def get_ssh_user(): + global ssh_user + return ssh_user + def remote_exec(srv, cmd, blocking=True, check=True): sub = [] for s in srv: diff --git a/scripts/netexp.py b/scripts/netexp.py new file mode 100644 index 0000000..2c1097e --- /dev/null +++ b/scripts/netexp.py @@ -0,0 +1,269 @@ +import time +import subprocess as sp + +import libpar as par +import libtc as tc +import libmechspec as mechspec + +class NetExpResult: + def __init__(self): + self.parser = None + self.pmc_parser = None + self.sample = None + + +class NetExpConf: + def __init__(self): + self.root_dir = "" + + self.enable_client_only = False + self.enable_memgen = False + + self.memgen_affinity = "" + self.memgen_iteration = -1 + self.memgen_size = 512 * 1024 * 1024 + self.memgen_tgtdom = 1 + + self.srv_affinity = "" + self.srv_mechspec = None + self.srv_port = 0 + + self.clt_qps = 0 + self.clt_mechspecs = [] + self.clt_affinity = "1" + self.clt_wrkld = 0 + self.clt_wrkarg0 = "fixed:0" + self.clt_wrkarg1 = "fixed:0" + self.clt_pkt_loss_lat = 1000 + self.clt_rage_quit_lat = 1000 + self.clt_port = 0 + self.clt_pkt_pad = 0 + self.clt_pkt_depth = 1 + self.clt_ia = "exponential" + + self.mst_mechspec = None + self.mst_affinity = "2" + self.mst_qps = 100 + self.mst_port = 0 + self.mst_pkt_loss_lat = 1000 + self.mst_pkt_loss_max = 1000 + self.mst_duration = 10 + self.mst_warmup = 5 + self.mst_ia = "exponential" + + self.enable_pmc = False + self.pmc_counters = [] + self.pmc_mode = 0 # 0 = sampling + self.pmc_sampling_rate = 8192 + self.pmc_counting_interval = 0.1 + + def __build_fqdn_arr(self, ns): + ret = [] + for n in ns: + ret.append(n.fqdn) + return ret + + def get_pmc_str(self): + ret = "" + for counter in self.pmc_counters: + ret = ret + counter + "," + return ret[:-1] + + def calc_client_qps(self): + return 0 if self.clt_qps == 0 else (int)((self.clt_qps - self.mst_qps) / len(self.clt_mechspecs)) + + def finalize_mechspecs(self): + self.clt_fqdns = self.__build_fqdn_arr(self.clt_mechspecs) + self.srv_fqdns = self.__build_fqdn_arr([self.srv_mechspec]) + self.mst_fqdns = self.__build_fqdn_arr([self.mst_mechspec]) + +__SAMPLE_FN = "sample.txt.tmp" +__PMC_FN = "pmc.txt.tmp" + +def __keep_result(conf : NetExpConf): + result = NetExpResult() + + target_scp_fn = tc.get_odir() + "/" + __SAMPLE_FN + scpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.mst_mechspec.fqdn + ":" + conf.root_dir + "/" + __SAMPLE_FN + " " + target_scp_fn + tc.log_print(scpcmd) + sp.check_call(scpcmd, shell=True) + + result.parser = par.khat_parser() + with open(target_scp_fn, "r") as f: + result.sample = f.read() + result.parser.parse(result.sample) + + rmcmd = "rm " + target_scp_fn + tc.log_print(rmcmd) + sp.check_call(rmcmd, shell=True) + + if conf.enable_pmc: + target_pmc_fn = tc.get_odir() + "/" + __PMC_FN + + pmcscpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.srv_mechspec.fqdn + ":" + conf.root_dir + "/" + __PMC_FN + " " + target_pmc_fn + tc.log_print(pmcscpcmd) + sp.check_call(pmcscpcmd, shell=True) + + if conf.pmc_mode == 0: + pmcproccmd = "sudo pmcstat -R " + conf.root_dir + "/" + __PMC_FN + " -m " + conf.root_dir + "/" + __PMC_FN + ".proc" + tc.log_print(pmcproccmd) + tc.remote_exec(conf.srv_fqdns, pmcproccmd) + + pmcscpcmd = "scp -P77 " + tc.get_ssh_user() + "@" + conf.srv_mechspec.fqdn + ":" + conf.root_dir + "/" + __PMC_FN + ".proc" + " " + target_pmc_fn + ".proc" + tc.log_print(pmcscpcmd) + sp.check_call(pmcscpcmd, shell=True) + + if conf.pmc_mode != 0: + with open(target_pmc_fn, "r") as f: + result.pmc_parser = par.pmc_parser(f.read()) + else: + with open(target_pmc_fn, "rb") as f: + with open(target_pmc_fn + ".proc", "r") as g: + result.pmc_parser = [f.read(), g.read()] + + rmcmd = "rm " + target_pmc_fn + ".proc" + tc.log_print(rmcmd) + sp.check_call(rmcmd, shell=True) + + rmcmd = "rm " + target_pmc_fn + tc.log_print(rmcmd) + sp.check_call(rmcmd, shell=True) + + return result + +def stop_all(conf : NetExpConf): + # stop clients + tc.log_print("Stopping clients...") + tc.remote_exec(conf.clt_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False) + + # stop master + tc.log_print("Stopping master...") + tc.remote_exec(conf.mst_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False) + + if not conf.enable_client_only: + # stop server + tc.log_print("Stopping server...") + tc.remote_exec(conf.srv_fqdns, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat; sudo killall -9 memloadgen", check=False) + + if conf.enable_pmc: + tc.log_print("Stopping server PMC...") + tc.remote_exec(conf.srv_fqdns, "sudo killall -9 pmcstat", check=False) + + +def run(conf : NetExpConf): + stop_all(conf) + while True: + server_cmd = "sudo " + if conf.enable_pmc: + if conf.pmc_mode != 0: + pmc_cmd = "sudo pmcstat -C -w " + str(conf.pmc_counting_interval) + " -s " + conf.get_pmc_str() + " -o " + conf.root_dir + "/" + __PMC_FN + else: + pmc_cmd = "sudo pmcstat -n " + str(conf.pmc_sampling_rate) + " -S " + conf.get_pmc_str() + " -O " + conf.root_dir + "/" + __PMC_FN + tc.log_print("Starting server PMC...") + tc.log_print(pmc_cmd) + spmc = tc.remote_exec(conf.srv_fqdns, pmc_cmd, blocking=False) + + server_cmd += conf.root_dir + "/khat --log-level lib.eal:err -- -A " + conf.srv_affinity + \ + " -H " + conf.srv_mechspec.netspec + " -p " + str(conf.srv_port) + if int(conf.clt_pkt_pad) > 1518: + server_cmd += " -J " + if conf.enable_client_only: + ssrv = None + tc.log_print(server_cmd) + else: + # start server + tc.log_print("Starting server...") + tc.log_print(server_cmd) + ssrv = tc.remote_exec(conf.srv_fqdns, server_cmd, blocking=False) + + if conf.enable_memgen: + memgen_cmd = "sudo " + conf.root_dir + "/memloadgen -b " + str(conf.memgen_size) + " -s " + conf.memgen_affinity + \ + " -i " + str(conf.memgen_iteration) + " -d " + str(conf.memgen_tgtdom) + tc.log_print("Starting memloadgen...") + tc.log_print(memgen_cmd) + smem = tc.remote_exec(conf.srv_fqdns, memgen_cmd, blocking=False) + + # start clients + tc.log_print("Starting clients...") + sclt = [] + sclt_name = [] + for i in range(len(conf.clt_fqdns)): + client_cmd = "sudo " + conf.root_dir + "/rat --log-level lib.eal:err -- -S -A " + conf.clt_affinity + \ + " -i " + conf.clt_ia + \ + " -q " + str(conf.calc_client_qps()) + \ + " -H " + conf.clt_mechspecs[i].netspec + \ + " -s " + conf.srv_mechspec.netspec + \ + " -r " + str(conf.clt_rage_quit_lat) + \ + " -l " + str(conf.clt_pkt_loss_lat) + \ + " -w " + str(conf.clt_wrkld) + \ + " -w " + str(conf.clt_wrkarg0) + \ + " -w " + str(conf.clt_wrkarg1) + \ + " -P " + str(conf.clt_pkt_pad) + \ + " -D " + str(conf.clt_pkt_depth) + \ + " -p " + str(conf.clt_port) + if int(conf.clt_pkt_pad) > 1518: + client_cmd += " -J " + tc.log_print(client_cmd) + sclt.append(tc.remote_exec([conf.clt_fqdns[i]], client_cmd, blocking=False)[0]) + sclt_name.append(conf.clt_fqdns[i]) + + time.sleep(5) + # start master + tc.remote_exec + tc.log_print("Starting master...") + master_cmd = "sudo " + conf.root_dir + "/cat --log-level lib.eal:err -- " + \ + " -s " + conf.srv_mechspec.netspec + \ + " -o " + conf.root_dir + "/" + __SAMPLE_FN + \ + " -t " + str(conf.mst_duration) + \ + " -T " + str(conf.mst_warmup) + \ + " -i " + conf.mst_ia + \ + " -q " + str(conf.mst_qps) + \ + " -l " + str(conf.mst_pkt_loss_lat) + \ + " -L " + str(conf.mst_pkt_loss_max) + \ + " -A " + conf.mst_affinity + \ + " -H " + conf.mst_mechspec.netspec + \ + " -p " + str(conf.mst_port) + for clt in conf.clt_mechspecs: + master_cmd += " -S " + clt.netspec + tc.log_print(master_cmd) + sp = tc.remote_exec(conf.mst_fqdns, master_cmd, blocking=False) + p = sp[0] + + # launch stderr monitoring thread + exclude = ["Pseudo-terminal", "ice_", "i40e_"] + tc.errthr_create([p], conf.mst_fqdns, exclude) + if not conf.enable_client_only: + tc.errthr_create(ssrv, conf.srv_fqdns, exclude) + tc.errthr_create(sclt, sclt_name, exclude) + if conf.enable_memgen: + tc.errthr_create(smem, ["memloadgen"], exclude) + if conf.enable_pmc: + tc.errthr_create(spmc, ["pmcstat"], exclude) + tc.errthr_start() + success = False + cur = 0 + # selec = select.poll() + # selec.register(p.stdout, select.POLLIN) + while True: + # either failed or timeout + # we use failure detection to save time for long durations + if tc.errthr_get_failed() or cur >= (conf.mst_warmup + conf.mst_duration) * 3: + break + + # while selec.poll(1): + # print(p.stdout.readline()) + + if p.poll() != None: + success = True + break + + time.sleep(1) + cur = cur + 1 + + stop_all(conf) + tc.errthr_stop() + tc.log_print("Cooling down...") + time.sleep(5) + + if success: + return __keep_result(conf) diff --git a/scripts/run.py b/scripts/run.py index f78740b..2e25093 100755 --- a/scripts/run.py +++ b/scripts/run.py @@ -1,3 +1,5 @@ +from cgi import test +from site import abs_paths import subprocess as sp import time import select @@ -11,329 +13,213 @@ import re import libpar as par import libtc as tc +import libmechspec as mechspec +import netexp -# load_gen -loadgen_load = "fixed:0" +only_max_qps = True +# [[counter names], counting mode (0 = sampling, 1 = counting)] +pmc_counters = [ + #"", + # [["mem_load_l3_miss_retired.local_dram"], 1], + # [["mem_load_l3_miss_retired.remote_dram"], 1], + # [["mem_load_l3_miss_retired.remote_hitm"], 1], + # [["mem_load_l3_miss_retired.remote_fwd"], 1] + # [["mem_trans_retired.load_latency_gt_8"], 0], + # [["mem_trans_retired.load_latency_gt_16"], 0], + # [["mem_trans_retired.load_latency_gt_32"], 0], + # [["mem_trans_retired.load_latency_gt_64"], 0], + # [["mem_trans_retired.load_latency_gt_128"], 0], + # [["mem_trans_retired.load_latency_gt_256"], 0], + # [["mem_trans_retired.load_latency_gt_512"], 0], + + [["mem_trans_retired.load_latency_gt_8", ""], 0], +] # pkt_pad -jumbo_frame_threshold = 1518 -pkt_pads = [ - #"9018", - #1518", - "0", - "256", - "512", - "1024" - #"2048", - #"4096", - #"8192" +clt_pkt_pads = [ + 0, + # 256, + # 512, + # 1024, + # 2048, + # 4096, + # 8192 ] -pkt_pads_depth = {} -pkt_pads_depth["0"] = "32" -pkt_pads_depth["256"] = "16" -pkt_pads_depth["512"] = "8" -pkt_pads_depth["1024"] = "4" -pkt_pads_depth["1518"] = "4" -pkt_pads_depth["2048"] = "2" -pkt_pads_depth["4096"] = "1" -pkt_pads_depth["8192"] = "1" -pkt_pads_depth["9018"] = "1" +clt_pkt_pads_depth = {} +clt_pkt_pads_depth[0] = 8 +clt_pkt_pads_depth[256] = 6 +clt_pkt_pads_depth[512] = 6 +clt_pkt_pads_depth[1024] = 4 +clt_pkt_pads_depth[1518] = 4 +clt_pkt_pads_depth[2048] = 2 +clt_pkt_pads_depth[4096] = 2 +clt_pkt_pads_depth[8192] = 1 +clt_pkt_pads_depth[9018] = 1 -# memgen -enable_memgen = False -memgen_mask = [ - "0xFFFFFF000000", - "0x000000FFFFFF", - "0xFFFFFF000000", - "0xFFFFFF000000", - "0xFFFFFF000000", - "0xFFFFFF000000", - "0xFFFFFF000000", - "0x000000FFFFFF", - "0x000000FFFFFF", - "0x000000FFFFFF", - "0x000000FFFFFF", - "0x000000FFFFFF", +# clt_load +clt_wrkld = [ + [0, "fixed:0", "fixed:0"], + # [0, "uniform:1000", "fixed:0"], + # [0, "uniform:100", "fixed:0"], + # [0, "uniform:10", "fixed:0"], + # [1, "uniform:480", "uniform:1024"], + # [1, "uniform:480", "uniform:256"], + # [1, "uniform:480", "uniform:64"] ] -memgen_target = [ - "0xA", - "0xA000000", - "0xA", - "0xA", - "0xA", - "0xA", - "0xA", - "0xA000000", - "0xA000000", - "0xA000000", - "0xA000000", - "0xA000000", -] - - # paths -test_dir = "/numam.d/build/bin" file_dir = os.path.dirname(os.path.realpath(__file__)) root_dir = os.path.join(file_dir,"..") -sample_filename = "sample.txt" -affinity = [ - "1,3,5,7,9,11,13,15,17,19,21,23", - "65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127", - "1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63", - "1,3,5,7,9,11,13,15", - "17,19,21,23,25,27,29,31", - "33,35,37,39,41,43,45,47", - "49,51,53,55,57,59,61,63" +# [srv_affinity, OPTIONAL( memgen_affinity, iteration, buffer_size, target_dom )] +server_affinity = [ + ["1,3,5,7,9,11,13,15,17,19,21,23"], + ["25,27,29,31,33,35,37,39,41,43,45,47"], + #["1,3,5,7,9,11,13,15,17,19,21,23", "26,28,30,32,34,36,38,40,42,44,46", -1, 512*1024*1024, 0], + #["25,27,29,31,33,35,37,39,41,43,45,47", "2,4,6,8,10,12,14,16,18,20,22", -1, 512*1024*1024, 1], + + # "65,67,69,71,73,75,77,79,81,83,85,87,89,91,93,95,97,99,101,103,105,107,109,111,113,115,117,119,121,123,125,127", + # "1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63", + # "1,3,5,7,9,11,13,15", + # "17,19,21,23,25,27,29,31", + # "33,35,37,39,41,43,45,47", + # "49,51,53,55,57,59,61,63" ] -server = ["skylake5.rcs.uwaterloo.ca"] -server_spec = ["192.168.123.13@3c:15:fb:c9:f3:28"] -master_cpumask = "2" # 1 thread +def flush_netresult(conf : netexp.NetExpConf, result : netexp.NetExpResult): + sample_out = tc.get_odir() + "/" + str(result.parser.qps) + ".txt" -master = ["icelake2-int.rcs.uwaterloo.ca"] -master_spec = ["192.168.123.9@40:a6:b7:7c:86:10"] -#server = ["milan1-int.rcs.uwaterloo.ca"] -#server_spec = ["192.168.123.9@00:07:43:54:37:08"] + with open(sample_out, "w") as f: + f.write(result.sample) -clients = ["skylake2.rcs.uwaterloo.ca"]#, "skylake5.rcs.uwaterloo.ca", "skylake6.rcs.uwaterloo.ca"]# "skylake7.rcs.uwaterloo.ca", "skylake8.rcs.uwaterloo.ca"] -client_spec = ["192.168.123.12@3c:15:fb:c9:f3:36"]#, "192.168.123.13@3c:15:fb:c9:f3:28", "192.168.123.14@3c:15:fb:62:9b:2f"] #, "192.168.123.13@3c:15:fb:62:9c:be"] -client_cpumask = "1,3,5,7,9,11,13,15,17,19,21,23" - -client_rage_quit = 1000 #1s -warmup = 5 -duration = 20 -cooldown = 5 -cacheline = 0 -SSH_PARAM = "-o StrictHostKeyChecking=no -p77" -SSH_USER = "oscar" -master_qps = 100 -master_pkt_loss = 100 -master_pkt_loss_failure = -1 - -hostfile = None -lockstat = False -client_only = False - -def stop_all(): - # stop clients - tc.log_print("Stopping clients...") - tc.remote_exec(clients, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat", check=False) - - if not client_only: - # stop server - tc.log_print("Stopping server...") - tc.remote_exec(server, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat", check=False) - - # stop master - tc.log_print("Stopping master...") - tc.remote_exec(master, "sudo killall -9 rat; sudo killall -9 cat; sudo killall -9 khat", check=False) - -def get_client_str(): - ret = "" - for client in client_spec: - ret += " -S " + client + " " - return ret - -def calc_client_ld(ld : int): - return 0 if ld == 0 else (int)((ld - master_qps) / len(clients)) - -def run_exp(affinity : str, ld : int, pkt_pad : str, aff_idx : int): - while True: - server_cmd = "sudo " + test_dir + "/khat --log-level lib.eal:err -- -A " + affinity + \ - " -H " + server_spec[0] - if int(pkt_pad) > jumbo_frame_threshold: - server_cmd += " -J " - if enable_memgen: - server_cmd += " -m -b 0 -X " + memgen_target[aff_idx] + " -x " + memgen_mask[aff_idx] - if client_only: - ssrv = None - tc.log_print(server_cmd) + if conf.enable_pmc: + pmc_out = tc.get_odir() + "/" + str(result.parser.qps) + ".pmc" + if conf.pmc_mode != 0: + with open(pmc_out, "w") as f: + f.write(result.pmc_parser.raw) else: - # start server - tc.log_print("Starting server...") - tc.log_print(server_cmd) - ssrv = tc.remote_exec(server, server_cmd, blocking=False) + with open(pmc_out, "wb") as f: + f.write(result.pmc_parser[0]) + with open(pmc_out + "_parsed", "w") as g: + g.write(result.pmc_parser[1]) - # start clients - tc.log_print("Starting clients...") - sclt = [] - sclt_name = [] - for i in range(len(clients)): - client_cmd = "sudo " + test_dir + "/rat --log-level lib.eal:err -- -S -A " + client_cpumask + \ - " -i exponential " + \ - " -q " + str(calc_client_ld(ld)) + \ - " -H " + client_spec[i] + \ - " -s " + server_spec[0] + \ - " -r " + str(client_rage_quit) + \ - " -l 100 " + \ - " -w " + loadgen_load + \ - " -P " + pkt_pad + \ - " -D " + pkt_pads_depth[pkt_pad] - if int(pkt_pad) > jumbo_frame_threshold: - client_cmd += " -J " - tc.log_print(client_cmd) - sclt.append(tc.remote_exec([clients[i]], client_cmd, blocking=False)[0]) - sclt_name.append(clients[i]) - - time.sleep(5) - # start master - tc.log_print("Starting master...") - master_cmd = "sudo " + test_dir + "/cat --log-level lib.eal:err -- " + \ - " -s " + server_spec[0] + \ - " -o " + test_dir + "/" + sample_filename + \ - " -t " + str(duration) + \ - " -T " + str(warmup) + \ - " -i exponential" + \ - " -q " + str(master_qps) + \ - " -l " + str(master_pkt_loss) + \ - " -L " + str(master_pkt_loss_failure) + \ - " -A " + master_cpumask + \ - " -H " + master_spec[0] + \ - get_client_str() - tc.log_print(master_cmd) - sp = tc.remote_exec(master, master_cmd, blocking=False) - p = sp[0] - - # launch stderr monitoring thread - exclude = ["Pseudo-terminal", "ice_", "i40e_"] - tc.errthr_create([p], master, exclude) - if not client_only: - tc.errthr_create(ssrv, server, exclude) - tc.errthr_create(sclt, sclt_name, exclude) - tc.errthr_start() - success = False - cur = 0 - # selec = select.poll() - # selec.register(p.stdout, select.POLLIN) - while True: - # either failed or timeout - # we use failure detection to save time for long durations - if tc.errthr_get_failed() or cur >= (warmup + duration) * 3: - break - - # while selec.poll(1): - # print(p.stdout.readline()) - - if p.poll() != None: - success = True - break - - time.sleep(1) - cur = cur + 1 - - stop_all() - tc.errthr_stop() - print("Cooling down...") - time.sleep(cooldown) - - if success: - return - -def keep_results(qps_only : bool = False): - success = True - target_scp_fn = tc.get_odir() + "/sample.txt" - scpcmd = "scp -P77 oscar@" + master[0] + ":" + test_dir + "/" + sample_filename + " " + target_scp_fn - tc.log_print(scpcmd) - sp.check_call(scpcmd, shell=True) - - parser = par.khat_parser() - with open(target_scp_fn, "r") as f: - parser.parse(f.read()) - - target_fn = tc.get_odir() + "/" + str(parser.qps) + ".txt" - mvcmd = "mv " + target_scp_fn + " " + target_fn - tc.log_print(mvcmd) - sp.check_call(mvcmd, shell=True) - - tc.log_print("=== Summary - qps: " + str(parser.qps) + " master loss: " + str(float(parser.master_loss) / float(parser.master_recv + parser.master_loss) * 100.00) + " slave loss: " + str(float(parser.slave_loss) / float(parser.slave_recv + parser.slave_loss) * 100.0) + "%" ) - if not qps_only: - try: - tc.log_print("=== Server HW:") - tc.log_print(par.mutilate_data.build_mut_output(parser.srv_hwlat, [parser.qps]) + "\n") - tc.log_print("=== Server SW:") - tc.log_print(par.mutilate_data.build_mut_output(parser.srv_swlat, [parser.qps]) + "\n") - tc.log_print("=== Client HW:") - tc.log_print(par.mutilate_data.build_mut_output(parser.clt_hwlat, [parser.qps]) + "\n") - tc.log_print("=== Client SW:") - tc.log_print(par.mutilate_data.build_mut_output(parser.clt_swlat, [parser.qps]) + "\n") - except: - success = False - - if not success: - rmcmd = "rm " + target_fn - tc.log_print(rmcmd) - sp.check_call(rmcmd, shell=True) - return (parser.qps if success else -1) + tc.log_print("=== Summary - qps: " + str(result.parser.qps) + " master loss: " + str(float(result.parser.master_loss) / float(result.parser.master_recv + result.parser.master_loss) * 100.00) + "% slave loss: " + str(float(result.parser.slave_loss) / float(result.parser.slave_recv + result.parser.slave_loss) * 100.0) + "%" ) + tc.log_print("=== Server HW:") + tc.log_print(par.mutilate_data.build_mut_output(result.parser.srv_hwlat, [result.parser.qps]) + "\n") + tc.log_print("=== Server SW:") + tc.log_print(par.mutilate_data.build_mut_output(result.parser.srv_swlat, [result.parser.qps]) + "\n") + tc.log_print("=== Client HW:") + tc.log_print(par.mutilate_data.build_mut_output(result.parser.clt_hwlat, [result.parser.qps]) + "\n") + tc.log_print("=== Client SW:") + tc.log_print(par.mutilate_data.build_mut_output(result.parser.clt_swlat, [result.parser.qps]) + "\n") + if conf.enable_pmc: + if conf.pmc_mode != 0: + tc.log_print("=== PMC:") + tc.log_print("counter: " + result.pmc_parser.counter + " count: " + str(result.pmc_parser.count) + " cores: " + str(result.pmc_parser.cores)) def main(): - global hostfile - global server - global master - global clients - global client_only - - tc.set_ssh_param(SSH_PARAM) - tc.set_ssh_user(SSH_USER) + tc.set_ssh_param("-o StrictHostKeyChecking=no -p77") + tc.set_ssh_user("oscar") output_dirname = "run" - options = getopt.getopt(sys.argv[1:], 'h:so:c')[0] + conf = netexp.NetExpConf() + conf.srv_mechspec = mechspec.LAB.SKYLAKE3_10G + conf.clt_mechspecs = [mechspec.LAB.SKYLAKE6_10G, mechspec.LAB.SKYLAKE5_10G, mechspec.LAB.SKYLAKE7_10G, mechspec.LAB.SKYLAKE8_10G] + conf.mst_mechspec = mechspec.LAB.SKYLAKE2_10G + conf.finalize_mechspecs() + conf.root_dir = "/numam.d/build/bin" + + # server fixed configs + conf.srv_port = 0 + + # client fixed configs + conf.clt_ia = "exponential" + conf.clt_affinity = "1,3,5,7,9,11,13,15,17,19,21,23" + conf.clt_port = 0 + conf.clt_pkt_loss_lat = 5000 + conf.clt_rage_quit_lat = 5000 + + # master fixed configs + conf.mst_port = 0 + conf.mst_warmup = 5 + conf.mst_duration = 20 + conf.mst_qps = 100 + conf.mst_ia = "exponential" + conf.mst_pkt_loss_lat = 5000 + conf.mst_pkt_loss_max = 100 + conf.mst_affinity = "2" + + # pmc stuff + conf.pmc_sampling_rate = 4096 + conf.pmc_counting_interval = 0.1 + + options = getopt.getopt(sys.argv[1:], 'sc')[0] for opt, arg in options: - if opt in ('-h'): - hostfile = arg - elif opt in ('-s'): - stop_all() + if opt in ('-s'): + netexp.stop_all(conf) return - elif opt in ('-o'): - output_dirname = arg elif opt in ('-c'): - client_only=True + conf.enable_client_only=True tc.init("~/results.d/numam_neo/" + output_dirname + "_" + datetime.datetime.now().strftime('%Y%m%d%H%M%S')) + cpcmd = "cp " + __file__ + " " + tc.get_odir() + "/" + tc.log_print(cpcmd) + sp.check_call(cpcmd, shell=True) - tc.log_print("Configuration:\n" + \ - "hostfile: " + ("None" if hostfile == None else hostfile) + "\n" \ - "client only: " + str(client_only) + "\n" + \ - "output: " + output_dirname) + for eaff in server_affinity: + conf.srv_affinity = eaff[0] + conf.enable_memgen = False + if len(eaff) > 1: + conf.enable_memgen = True + conf.memgen_affinity = eaff[1] + conf.memgen_iteration = eaff[2] + conf.memgen_size = eaff[3] + conf.memgen_tgtdom = eaff[4] + for epad in clt_pkt_pads: + conf.clt_pkt_pad = 0 + conf.clt_pkt_depth = clt_pkt_pads_depth[conf.clt_pkt_pad] + for eload in clt_wrkld: + conf.clt_wrkld = eload[0] + conf.clt_wrkarg0 = eload[1] + conf.clt_wrkarg1 = eload[2] + for epmc in pmc_counters: + conf.enable_pmc = False + if len(epmc) > 0: + conf.enable_pmc = True + conf.pmc_counters = epmc[0] + conf.pmc_mode = epmc[1] - if hostfile != None: - hosts = tc.parse_hostfile(hostfile) - server = tc.process_hostnames(server, hosts) - clients = tc.process_hostnames(clients, hosts) - master = tc.process_hostnames(master, hosts) + test_name = "affinity" + eaff[0] + "_pad" + str(epad) + "_load" + str(eload[0]) + "," + str(eload[1]) + "," + str(eload[2]) + if (conf.enable_memgen): + test_name += "_memload" + str(eaff[1]) + "," + str(eaff[2]) + "," + str(eaff[3]) + "," + str(eaff[4]) + if (conf.enable_pmc): + test_name += "_pmc" + str(epmc[1]) + "_" + conf.get_pmc_str() + tc.begin(test_name) + + conf.clt_qps = 0 + tc.log_print("============ " + test_name + " QPS: MAX ============") + result : netexp.NetExpResult = netexp.run(conf) + flush_netresult(conf, result) + max_qps = result.parser.qps - stop_all() + if conf.enable_client_only: + return - for i in range(0, len(affinity)): - for epad in pkt_pads: - eaff = affinity[i] - - tc.begin(eaff + "_" + epad) - - tc.log_print("============ Affinity: " + str(eaff) + " PktPad: " + epad + " Load: MAX" + " ============") - run_exp(eaff, 0, epad, i) - max_qps = keep_results(qps_only=True) - stop_all() - - if client_only: - break - - finish = (int)(max_qps - max(master_qps, 0.01 * max_qps)) - step = (int)(finish / 10) - cur_load = step - while cur_load <= finish: - tc.log_print("============ Affinity: " + str(eaff) + " PktPad: " + epad + " Load: " + str(cur_load) + " ============") - - run_exp(eaff, cur_load, epad, i) - - if keep_results() != -1: - # increment only on success - cur_load += step - tc.log_print("") + if only_max_qps: + continue + finish = (int)(max_qps - max(conf.mst_qps, 0.01 * max_qps)) + step = (int)(finish / 10) + cur_qps = step + while cur_qps <= finish: + tc.log_print("============ " + test_name + " QPS: " + str(cur_qps) + " ============") + conf.clt_qps = cur_qps + result : netexp.NetExpResult = netexp.run(conf) + flush_netresult(result) + cur_qps += step + tc.log_print("") tc.end() - stop_all() + netexp.stop_all(conf) main() \ No newline at end of file diff --git a/scripts/setup_dpdk.sh b/scripts/setup_dpdk.sh index 4023eb6..b45a57f 100755 --- a/scripts/setup_dpdk.sh +++ b/scripts/setup_dpdk.sh @@ -2,7 +2,7 @@ dpdk_dir="/dpdk" libtopo_dir="/libtopo" root="$(dirname "$0")/.." -servers="skylake2.rcs.uwaterloo.ca skylake3.rcs.uwaterloo.ca skylake5.rcs.uwaterloo.ca skylake6.rcs.uwaterloo.ca icelake2-int.rcs.uwaterloo.ca milan1-int.rcs.uwaterloo.ca" +servers="skylake8.rcs.uwaterloo.ca" rsync_flags="-az" ssh_args="-o StrictHostKeyChecking=no -p77" @@ -20,7 +20,7 @@ compile() { # separate these functions because we might change kernel (reboot) without needing to recompile echo "====================$1====================" ssh $(echo $ssh_args $user@$1) "sudo sh -c \"sudo rm -rf $libtopo_dir; sudo rm -rf /usr/local/include/libtopo; sudo rm -rf /usr/local/lib/libtopo;sudo mkdir -p $libtopo_dir; sudo chmod 777 $libtopo_dir; cd $libtopo_dir; git clone https://git.quacker.org/d/libtopo; cd libtopo; mkdir build; cd build; cmake ../; sudo make install\"" - ssh $(echo $ssh_args $user@$1) "sudo sh -c \"sudo pkg install -y meson pkgconf py38-pyelftools; sudo rm -rf $dpdk_dir; sudo mkdir -p $dpdk_dir; sudo chmod 777 $dpdk_dir; cd $dpdk_dir; git clone https://git.quacker.org/d/numam-dpdk; cd numam-dpdk; git checkout migration; CC=gcc CXX=g++ meson -Denable_kmods=true build; cd build; ninja install\"" + ssh $(echo $ssh_args $user@$1) "sudo sh -c \"sudo pkg install -y meson pkgconf py39-pyelftools; sudo rm -rf $dpdk_dir; sudo mkdir -p $dpdk_dir; sudo chmod 777 $dpdk_dir; cd $dpdk_dir; git clone https://git.quacker.org/d/numam-dpdk; cd numam-dpdk; git checkout migration; CC=gcc CXX=g++ meson -Denable_kmods=true build; cd build; ninja install\"" wait echo "$1 Done." echo "" diff --git a/scripts/setup_program.sh b/scripts/setup_program.sh index ac7497f..cb8628c 100755 --- a/scripts/setup_program.sh +++ b/scripts/setup_program.sh @@ -1,7 +1,9 @@ #!/bin/sh dpdk_dir="/numam.d" root="$(dirname "$0")/.." -servers="skylake2.rcs.uwaterloo.ca skylake3.rcs.uwaterloo.ca skylake5.rcs.uwaterloo.ca skylake6.rcs.uwaterloo.ca icelake2-int.rcs.uwaterloo.ca milan1-int.rcs.uwaterloo.ca" +servers="icelake1-int.rcs.uwaterloo.ca" +#icelake2-int.rcs.uwaterloo.ca" +#servers="skylake2.rcs.uwaterloo.ca skylake3.rcs.uwaterloo.ca skylake5.rcs.uwaterloo.ca skylake6.rcs.uwaterloo.ca skylake7.rcs.uwaterloo.ca skylake8.rcs.uwaterloo.ca icelake2-int.rcs.uwaterloo.ca milan1-int.rcs.uwaterloo.ca" rsync_flags="-rv -e \"ssh -p77\"" ssh_args="-o StrictHostKeyChecking=no -p77" @@ -20,7 +22,7 @@ compile() { echo "====================$1====================" ssh $(echo $ssh_args $user@$1) "sudo sh -c \"rm -rf $dpdk_dir; mkdir $dpdk_dir; chmod 777 $dpdk_dir\"" rsync -rv -e "ssh -p77" $root/ $user@$1:$dpdk_dir/ - ssh $(echo $ssh_args $user@$1) "sudo sh -c \"cd $dpdk_dir; rm -rf build; mkdir build; cd build; cmake ../; make -j8 khat cat rat\"" + ssh $(echo $ssh_args $user@$1) "sudo sh -c \"cd $dpdk_dir; rm -rf build; mkdir build; cd build; cmake ../; make -j8 khat cat rat memloadgen\"" wait echo "$1 Done." echo "" diff --git a/scripts/test.py b/scripts/test.py new file mode 100644 index 0000000..a3876ee --- /dev/null +++ b/scripts/test.py @@ -0,0 +1,10 @@ +import libpar as par + +path = "/home/quackerd/results.d/numam_neo/run_20220807093909/affinity1,3,5,7,9,11,13,15,17,19,21,23_pad0_load0,fixed:0,fixed:0_pmc.mem_load_l3_miss_retired.local_dram/pmc.txt.tmp" + +with open(path, "r") as f: + p = par.pmc_parser(f.read()) + +print(str(p.cores)) +print(str(p.count)) +print(str(p.counter)) \ No newline at end of file diff --git a/scripts/tmp.py b/scripts/tmp.py new file mode 100644 index 0000000..4ecf4cf --- /dev/null +++ b/scripts/tmp.py @@ -0,0 +1,21 @@ +import time +import subprocess as sp + +import libpar as par +import libtc as tc +import libmechspec as mechspec +import numpy as np + +FILE : str = "/home/quackerd/1028361.txt" + +parser = par.khat_parser() +with open(FILE, "r") as f: + parser.parse(f.read()) + + +re = [] +for i in range(0, len(parser.clt_swlat)): + ertt = parser.clt_swlat[i] - parser.srv_swlat[i] + re.append(ertt) + +print("Median: " + str(np.percentile(re, 50))) \ No newline at end of file diff --git a/util/memloadgen.cc b/util/memloadgen.cc index 7e9c9ab..7dba8dd 100644 --- a/util/memloadgen.cc +++ b/util/memloadgen.cc @@ -1,6 +1,10 @@ #include "gen.hh" #include +#include +#include +#include #include "ntr.h" +#include "nms.h" #include #include #include @@ -11,32 +15,39 @@ usage() fprintf(stdout, "Usage:\n" " -v: verbose mode\n" - " -b: MLG bytes per second\n" - " -x: MLG thread affinity mask\n" - " -X: MLG target domain affinity mask\n" - " -S: shared buffer\n" - " -i: iterations\n"); + " -b: memory block size\n" + " -d: destination domain index\n" + " -s: worker threads cpu list\n" + " -S: enable shared memory block\n" + " -t: time to run\n" + " -o: output file path\n"); fflush(stdout); } +static char output_file[256] = "memloadgen_samples.txt"; + int main(int argc, char * argv[]) { ntr_init(); ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING); - size_t arr_sz = 0; - uint32_t iter = -1; + size_t arr_sz = 1024 * 1024; + uint32_t time = -1; cpuset_t threads; + CPU_ZERO(&threads); + CPU_SET(0, &threads); + int shared_buffer = 0; cpuset_t domain_mask; + CPU_ZERO(&domain_mask); + CPU_SET(0, &domain_mask); { int c; // parse arguments - while ((c = getopt(argc, argv, "hb:X:x:vi:S")) != -1) { + while ((c = getopt(argc, argv, "vhb:d:s:So:t:")) != -1) { switch (c) { case 'v': - ntr_set_level(NTR_DEP_USER1, - ntr_get_level(NTR_DEP_USER1) + 1); + ntr_set_level(NTR_DEP_USER1, ntr_get_level(NTR_DEP_USER1) + 1); break; case 'h': usage(); @@ -44,18 +55,20 @@ int main(int argc, char * argv[]) case 'b': arr_sz = strtoull(optarg, nullptr, 10); break; - case 'i': - iter = strtoul(optarg, nullptr, 10); - break; - case 'X': + case 'd': cpulist_to_cpuset(optarg, &domain_mask); break; - case 'x': + case 's': cpulist_to_cpuset(optarg, &threads); break; case 'S': shared_buffer = 1; break; + case 'o': + strncpy(output_file, optarg, 256); + break; + case 't': + time = (uint32_t)strtol(optarg, nullptr, 10); default: usage(); exit(0); @@ -63,31 +76,54 @@ int main(int argc, char * argv[]) } } - ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "MLG: [size: %ld iter: %u, # threads: 0x%d, domain: 0x%ld]\n", arr_sz, iter, CPU_COUNT(&threads), CPU_FFS(&domain_mask) - 1); + ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "MLG: [size: %ld, # threads: 0x%d, domain: 0x%ld]\n", arr_sz, CPU_COUNT(&threads), CPU_FFS(&domain_mask) - 1); // init topo - if (topo_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) != 0) { + if (topo_init(ntr_get_level(NTR_DEP_USER1) != NTR_LEVEL_DEFAULT)) { fprintf(stderr, "libtopo init failed!\n"); exit(1); } + // init + if (nms_init(ntr_get_level(NTR_DEP_USER1) != NTR_LEVEL_DEFAULT)) { + fprintf(stderr, "libnms init failed!\n"); + exit(1); + } + bool success = false; memload_generator::memload_generator_options opts; opts.chunk_size = arr_sz; - opts.iteration = iter; opts.shared_buffer = shared_buffer; - opts.verbose = 1; + opts.verbose = ntr_get_level(NTR_DEP_USER1) != NTR_LEVEL_DEFAULT; + + std::ofstream ofile; + ofile.open(output_file, std::ios::out | std::ios::trunc); auto mgen = new memload_generator(&threads, &domain_mask, &opts, &success); - while(!mgen->check_done()) { - usleep(10000); + if (!mgen->start()) { + fprintf(stderr, "failed to start memloadgen!\n"); + exit(1); } - unsigned long bps = mgen->get_bps(); - ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, - "main: MLG bps = %ld ~= %ldM\n", bps, bps / 1024 / 1024); + uint64_t prev_ts = topo_uptime_ns(); + uint64_t prev_trans = mgen->get_transactions(); + uint32_t cur_time = 0; + while(cur_time < time) { + usleep(S2US); + uint64_t cur_ts = topo_uptime_ns(); + uint64_t trans = mgen->get_transactions(); + uint64_t bps = (uint64_t)((double)((trans - prev_trans) * arr_sz) / ((double)(cur_ts - prev_ts) / (double)S2NS)); + + ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "main: MLG bps = %ld ~= %ldM\n", bps, bps / 1024 / 1024); + ofile << bps << std::endl; + prev_ts = cur_ts; + prev_trans = trans; + cur_time++; + } + mgen->stop(); delete mgen; + ofile.close(); return 0; }