numam/net/rat.cc

973 lines
26 KiB
C++
Raw Normal View History

#include <rte_byteorder.h>
#include <rte_common.h>
#include <rte_config.h>
#include <rte_eal.h>
#include <rte_ethdev.h>
#include <rte_ether.h>
#include <rte_launch.h>
#include <rte_lcore.h>
#include <rte_mbuf.h>
#include <unistd.h>
2022-03-21 11:43:49 +00:00
#include "gen.hh"
#include "nm.hh"
#include "ntr.h"
2022-03-21 11:43:49 +00:00
#include "net/pkt.hh"
#include "net/util.hh"
#include <atomic>
#include <list>
#include <map>
#include <mutex>
#include <random>
#include <vector>
constexpr static unsigned int MBUF_MAX_COUNT = 65536;
constexpr static unsigned int MBUF_CACHE_SIZE = 512;
constexpr static unsigned int RX_RING_SIZE = 2048;
constexpr static unsigned int TX_RING_SIZE = 2048;
constexpr static unsigned int BURST_SIZE = 32;
static const struct rte_eth_conf port_conf_default {
};
static unsigned int
epoch_mk(unsigned int id, unsigned int epoch)
{
return (id << 24) | epoch;
}
static unsigned int
epoch_get_id(unsigned int epoch)
{
return epoch >> 24;
}
static unsigned int
epoch_get_epoch(unsigned int epoch)
{
return epoch & 0x00FFFFFF;
}
struct epoch_info {
unsigned int epoch;
uint64_t ts;
};
struct thread_info {
unsigned int id { 0 };
unsigned int lcore_id { 0 };
unsigned int rxqid { 0 };
unsigned int txqid { 0 };
// this field is read by the stat collecting thread
2021-02-23 18:12:27 +00:00
std::atomic<int> recved_pkts { 0 };
std::atomic<int> lost_pkts { 0 };
Generator *ia_gen { nullptr };
Generator *load_gen { nullptr };
2021-03-04 06:54:13 +00:00
std::random_device which_rd;
std::mt19937 which_rng;
std::uniform_int_distribution<uint32_t> which_dice;
std::mutex
mtx; // this lock protects data shared between worker threads, i.e.:
std::list<struct epoch_info *> recved_epochs;
2021-03-04 06:54:13 +00:00
thread_info() : which_rd(), which_rng(which_rd()), which_dice(std::uniform_int_distribution<uint32_t>(0, UINT32_MAX))
{
which_rng.seed(nm_get_uptime_ns());
}
};
constexpr static int STATE_SYNC = 0; // waiting for SYNC
constexpr static int STATE_SYNC_ACK = 1; // Waiting for sending SYNC_ACK
constexpr static int STATE_RUNNING = 2; // Running
constexpr static int STATE_FIN = 3; // FIN received
struct options_t {
unsigned int run_time { 5 };
// parameters
int slave_mode { 0 };
uint32_t rage_quit_time { UINT32_MAX };
char ia_gen[256] { "fixed" };
char ld_gen[256] { "fixed:0" };
uint32_t target_qps { 0 };
uint32_t depth = 1;
struct net_spec server_spec {
};
uint64_t cpu_mask { 0x4 }; // 1 thread @ core 2
uint32_t pkt_loss_delay_ms = UINT32_MAX;
bool jumbo_frame_enabled { false };
int pkt_pad_sz { 0 };
int port_mtu { MAX_STANDARD_MTU };
// states
unsigned int s_num_threads { 1 }; // 1 thread
struct rte_mempool *mbuf_pool { nullptr };
struct net_spec s_host_spec {
};
struct net_spec s_master_spec {
};
struct conn_spec s_master_cspec {
.src = &s_host_spec, .src_port = DEFAULT_RAT_PORT,
.dst = &s_master_spec, .dst_port = DEFAULT_RAT_PORT,
};
uint16_t s_portid { 0 };
std::vector<struct thread_info *> s_thr_info;
std::atomic<int> s_state { STATE_RUNNING }; // default non master mode
// states for qps
std::atomic<uint64_t> s_ts_begin { 0 };
};
static struct options_t options;
static inline void
calc_stats(
2021-02-23 18:12:27 +00:00
uint64_t now, uint32_t *qps, uint32_t *recved_pkt, uint32_t *total_loss)
{
2021-02-23 18:12:27 +00:00
uint32_t recv = 0;
uint32_t loss = 0;
for (auto i : options.s_thr_info) {
2021-02-23 18:12:27 +00:00
recv += i->recved_pkts.load();
loss += i->lost_pkts.load();
}
2021-02-23 18:12:27 +00:00
if (recved_pkt != nullptr) {
*recved_pkt = recv;
}
if (total_loss != nullptr) {
*total_loss = loss;
}
if (qps != nullptr) {
2021-02-23 18:12:27 +00:00
*qps = (uint32_t)((double)(recv) /
((double)(now - options.s_ts_begin.load()) / (double)S2NS));
}
}
static void
proto_loop(struct thread_info *tinfo)
{
struct rte_mbuf *tx_buf;
struct rte_mbuf *rx_bufs[BURST_SIZE];
struct pkt_hdr *pkt_data;
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"proto_loop <thread %d>: waiting for SYNC from cat\n", tinfo->id);
while (options.s_state.load() == STATE_SYNC) {
const uint16_t nb_rx = rte_eth_rx_burst(
options.s_portid, tinfo->rxqid, rx_bufs, BURST_SIZE);
if (nb_rx > 0) {
for (int i = 0; i < nb_rx; i++) {
struct pkt_hdr *each = check_valid_packet(
rx_bufs[i], &options.s_host_spec.mac_addr);
if (each != nullptr) {
uint16_t type = rte_be_to_cpu_16(
each->type);
if (type == PKT_TYPE_SYNC) {
int expected = STATE_SYNC;
ntr(NTR_DEP_USER1,
NTR_LEVEL_INFO,
"proto_loop <thread %d>: received SYNC from cat\n",
tinfo->id);
if (!options.s_state
.compare_exchange_strong(
expected,
STATE_SYNC_ACK)) {
// someone barged in,
// listen to that guy
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"proto_loop <thread %d>: failed to cmpxchg sync_recv.\n",
tinfo->id);
} else {
pkt_hdr_to_netspec(each,
&options
.s_master_spec,
nullptr, nullptr,
nullptr);
if (alloc_pkt_hdr(
options
.mbuf_pool,
PKT_TYPE_SYNC_ACK,
&options
.s_master_cspec,
0,
&tx_buf,
&pkt_data) !=
0) {
rte_exit(
EXIT_FAILURE,
"failed to alloc pkt hdr\n");
}
tx_burst_all(options.s_portid, tinfo->txqid, &tx_buf, 1);
expected =
STATE_SYNC_ACK;
// we've done our job,
// set off the threads
if (!options.s_state
.compare_exchange_strong(
expected,
STATE_RUNNING)) {
rte_exit(
EXIT_FAILURE,
"state unexpectedly changed\n");
}
ntr(NTR_DEP_USER1,
NTR_LEVEL_INFO,
"proto_loop <thread %d>: sent SYNC_ACK to cat\n",
tinfo->id);
}
} else {
ntr(NTR_DEP_USER1,
NTR_LEVEL_DEBUG,
"proto_loop <thread %d>: ignoring invalid packet %p type %d.\n",
tinfo->id,
(void *)rx_bufs[i], type);
}
} else {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"proto_loop <thread %d>: ignoring invalid packet %p.\n",
tinfo->id, (void *)rx_bufs[i]);
}
rte_pktmbuf_free(rx_bufs[i]);
}
}
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"proto_loop <thread %d>: exiting loop...\n", tinfo->id);
}
static void
pkt_loop(struct thread_info *tinfo)
{
struct rte_mbuf *tx_bufs[BURST_SIZE];
struct rte_mbuf *rx_bufs[BURST_SIZE];
std::vector<struct epoch_info *> recved_epochs;
std::map<unsigned int, struct epoch_info *> sent_epochs;
uint64_t cur_epoch = 0;
uint64_t next_ts;
uint64_t last_recv_ts = 0;
struct conn_spec srv_cspec;
rdport_generator src_port_gen(MIN_RANDOM_PORT);
rdport_generator dst_port_gen(MIN_RANDOM_PORT);
srv_cspec.src = &options.s_host_spec;
srv_cspec.dst = &options.server_spec;
next_ts = nm_get_uptime_ns();
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "pkt_loop <thread %d>: entering\n",
tinfo->id);
while (options.s_state.load() == STATE_RUNNING) {
uint64_t now = nm_get_uptime_ns();
// always pop incoming packets
const uint16_t nb_rx = rte_eth_rx_burst(
options.s_portid, tinfo->rxqid, rx_bufs, BURST_SIZE);
if (nb_rx > 0) {
for (int i = 0; i < nb_rx; i++) {
struct pkt_hdr *each = check_valid_packet(
rx_bufs[i], &options.s_host_spec.mac_addr);
if (each == nullptr) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: ignoring invalid packet %p.\n",
tinfo->id, (void *)rx_bufs[i]);
rte_pktmbuf_free(rx_bufs[i]);
continue;
}
uint16_t type = rte_be_to_cpu_16(each->type);
NTR_PKT(NTR_DEP_USER1, NTR_LEVEL_DEBUG, each,
"locore_main <thread %d>: ", tinfo->id);
struct pkt_payload_epoch *pld_epoch;
struct epoch_info *einfo;
uint32_t epoch;
uint32_t id;
struct thread_info *other_t;
int int_expected = STATE_RUNNING;
switch (type) {
case PKT_TYPE_LOAD_RESP:
pld_epoch = (struct pkt_payload_epoch *)
each->payload;
epoch = rte_be_to_cpu_32(
pld_epoch->epoch);
id = epoch_get_id(epoch);
// printf("Load resp size : %d\n", rx_bufs[i]->data_len);
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: packet %p epoch 0x%x id %d.\n",
tinfo->id, (void *)rx_bufs[i],
epoch, id);
if (id >= options.s_num_threads) {
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"pkt_loop <thread %d>: packet %p invalid id %d.\n",
tinfo->id,
(void *)rx_bufs[i], id);
break;
}
einfo = new struct epoch_info;
einfo->epoch = epoch;
einfo->ts = now;
other_t = options.s_thr_info.at(id);
other_t->mtx.lock();
other_t->recved_epochs.push_back(einfo);
other_t->mtx.unlock();
break;
case PKT_TYPE_FIN:
if (rte_is_same_ether_addr(
&each->eth_hdr.s_addr,
&options.s_master_spec
.mac_addr)) {
ntr(NTR_DEP_USER1,
NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: recved FIN from cat.\n",
tinfo->id);
// master told us to stop!
if (!options.s_state
.compare_exchange_strong(
int_expected,
STATE_FIN)) {
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"pkt_loop <thread %d>: failed to cmpxchg state.\n",
tinfo->id);
}
uint32_t qps;
2021-02-23 18:12:27 +00:00
uint32_t total_recv;
uint32_t total_loss;
calc_stats(now, &qps,
2021-02-23 18:12:27 +00:00
&total_recv, &total_loss);
struct pkt_hdr *pkt_hdr;
if (alloc_pkt_hdr(
options.mbuf_pool,
PKT_TYPE_FIN_ACK,
&options.s_master_cspec,
0,
&tx_bufs[0],
&pkt_hdr) != 0) {
rte_exit(EXIT_FAILURE,
"failed to allocate pkt hdr\n");
}
auto pld_qps =
(struct pkt_payload_qps *)
pkt_hdr->payload;
pld_qps->qps = rte_cpu_to_be_32(
qps);
2021-02-23 18:12:27 +00:00
pld_qps->recved_pkts =
rte_cpu_to_be_32(
2021-02-23 18:12:27 +00:00
total_recv);
pld_qps->lost_pkts =
rte_cpu_to_be_32(
total_loss);
tx_burst_all(options.s_portid, tinfo->txqid, &tx_bufs[0], 1);
options.s_state.store(
STATE_FIN);
ntr(NTR_DEP_USER1,
NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: sent FIN_ACK to cat. QPS = %d.\n",
tinfo->id, qps);
} else {
ntr(NTR_DEP_USER1,
NTR_LEVEL_WARNING,
"pkt_loop <thread %d>: invalid FIN packet from a different cat.\n",
tinfo->id);
}
break;
default:
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop: ignoring packet %p with unknown type %d.\n",
(void *)rx_bufs[i], type);
}
rte_pktmbuf_free(rx_bufs[i]);
}
}
// dequeue receved epochs
struct epoch_info *einfo;
tinfo->mtx.lock();
while (!tinfo->recved_epochs.empty()) {
// only dequeue, process later
einfo = tinfo->recved_epochs.front();
tinfo->recved_epochs.pop_front();
// XXX: might call into the allocator
// otherwise we need to have an array and do batching
// => complex code and don't think it's worth it
recved_epochs.push_back(einfo);
}
tinfo->mtx.unlock();
if (!recved_epochs.empty())
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: dequeued %lu received epochs\n",
tinfo->id, recved_epochs.size());
// process epochs
while (!recved_epochs.empty()) {
einfo = recved_epochs.back();
recved_epochs.pop_back();
auto it = sent_epochs.find(einfo->epoch);
if (it != sent_epochs.end()) {
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: received epoch 0x%x\n",
tinfo->id, epoch_get_epoch(einfo->epoch));
if (einfo->ts > last_recv_ts) {
last_recv_ts = einfo->ts;
}
delete it->second;
sent_epochs.erase(it);
2021-02-23 18:12:27 +00:00
tinfo->recved_pkts.fetch_add(1);
} else {
// we recved an epoch we never sent
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: received epoch 0x%x but never sent it. Packet loss?\n",
tinfo->id, einfo->epoch);
}
delete einfo;
}
// handle packet loss
for (auto it = sent_epochs.begin(); it != sent_epochs.end();) {
einfo = it->second;
if (now - einfo->ts >
options.pkt_loss_delay_ms * MS2NS) {
// timed out
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: epoch 0x%x is lost after not receiving for too long\n",
tinfo->id, einfo->epoch);
delete it->second;
it = sent_epochs.erase(it);
tinfo->lost_pkts.fetch_add(1);
} else {
++it;
}
}
// check to send the next packet
uint32_t total_send = 0;
while (now >= next_ts && sent_epochs.size() < options.depth &&
total_send < BURST_SIZE) {
struct pkt_payload_load *pld_load;
struct pkt_hdr *pkt_data;
next_ts += (int)(tinfo->ia_gen->generate() * S2NS);
// change dst port for every packet for RSS
srv_cspec.dst_port = dst_port_gen.next();
srv_cspec.src_port = src_port_gen.next();
if (alloc_pkt_hdr(options.mbuf_pool, PKT_TYPE_LOAD,
&srv_cspec, options.pkt_pad_sz, &tx_bufs[total_send],
&pkt_data) != 0) {
rte_exit(EXIT_FAILURE,
"failed to allocate pkt hdr\n");
}
pld_load = (struct pkt_payload_load *)pkt_data->payload;
pld_load->load = rte_cpu_to_be_32(
tinfo->load_gen->generate());
2021-03-04 06:54:13 +00:00
pld_load->which = rte_cpu_to_be_32(tinfo->which_dice(tinfo->which_rng));
unsigned int epoch = epoch_mk(tinfo->id, cur_epoch);
pld_load->epoch = rte_cpu_to_be_32(epoch);
cur_epoch++;
einfo = new struct epoch_info;
einfo->epoch = epoch;
einfo->ts = now;
sent_epochs.insert({ epoch, einfo });
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: sending packet %p with epoch 0x%x\n",
tinfo->id, (void *)tx_bufs[total_send], epoch);
total_send++;
}
tx_burst_all(options.s_portid, tinfo->txqid, tx_bufs, total_send);
// if (total_send > 0) {
// const uint16_t nb_tx = rte_eth_tx_burst(
// options.s_portid, tinfo->txqid, tx_bufs,
// total_send);
// if (nb_tx != total_send) {
// rte_exit(
// EXIT_FAILURE, "failed to send packet\n");
// }
// }
// check rage quit only when we have sent a packet
if (last_recv_ts == 0) {
last_recv_ts = nm_get_uptime_ns();
}
if (nm_get_uptime_ns() - last_recv_ts >
options.rage_quit_time * MS2NS) {
rte_exit(EXIT_FAILURE,
"rat: thread %d waiting too long for resp. I QUIT!!\n",
tinfo->id);
}
}
// clean up
for (auto it = sent_epochs.begin(); it != sent_epochs.end();) {
delete it->second;
++it;
}
ntr(NTR_DEP_USER1, NTR_LEVEL_DEBUG,
"pkt_loop <thread %d>: exiting loop...\n", tinfo->id);
}
static int
locore_main(void *tif)
{
auto tinfo = (struct thread_info *)tif;
uint32_t core_id = rte_lcore_id();
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"locore_main <thread %d>: running on core %d...\n", tinfo->id,
core_id);
if (rte_eth_dev_socket_id(options.s_portid) > 0 &&
rte_eth_dev_socket_id(options.s_portid) != (int)rte_socket_id()) {
ntr(NTR_DEP_USER1, NTR_LEVEL_WARNING,
"locore_main <thread %d>: WARNING, port %d is on remote NUMA node to "
"polling thread.\n\tPerformance will "
"not be optimal.\n",
tinfo->id, options.s_portid);
}
if (options.slave_mode == 1) {
// perform rat protocol
proto_loop(tinfo);
}
// wait for the primary thread sending SYNC_ACK
while (options.s_state.load() != STATE_RUNNING) {
}
// store the current timestamp
options.s_ts_begin.store(nm_get_uptime_ns());
pkt_loop(tinfo);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "locore_main <thread %d>: exited\n",
tinfo->id);
return 0;
}
static int
port_init(uint16_t portid, struct rte_mempool *mbuf_pool)
{
struct rte_eth_dev_info dev_info {
};
struct rte_eth_conf port_conf = port_conf_default;
struct rte_eth_txconf txconf {
};
struct rte_eth_rxconf rxconf {
};
uint16_t nb_rxd = RX_RING_SIZE;
uint16_t nb_txd = TX_RING_SIZE;
if (!rte_eth_dev_is_valid_port(portid)) {
return -1;
}
int ret = rte_eth_dev_info_get(portid, &dev_info);
if (ret != 0) {
return ret;
}
port_conf.rxmode.max_rx_pkt_len = mtu_to_pkt_size(options.port_mtu);;
port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_NONFRAG_IPV4_UDP |
ETH_RSS_L2_PAYLOAD | ETH_RSS_NONFRAG_IPV4_TCP;
port_conf.rx_adv_conf.rss_conf.rss_key = nullptr;
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_RSS_HASH;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
if (options.jumbo_frame_enabled) {
port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
}
/* Configure the Ethernet device. */
ret = rte_eth_dev_configure(
portid, options.s_num_threads, options.s_num_threads, &port_conf);
if (ret != 0)
return ret;
ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
if (ret != 0)
return ret;
/* Allocate and set up 1 RX queue per thread . */
rxconf = dev_info.default_rxconf;
if (options.jumbo_frame_enabled) {
rxconf.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
}
rxconf.offloads = port_conf.rxmode.offloads;
for (uint32_t i = 0; i < options.s_num_threads; i++) {
ret = rte_eth_rx_queue_setup(portid,
options.s_thr_info.at(i)->rxqid, nb_rxd,
rte_eth_dev_socket_id(portid), &rxconf, mbuf_pool);
if (ret < 0)
return ret;
}
txconf = dev_info.default_txconf;
txconf.offloads = port_conf.txmode.offloads;
/* Allocate and set up 1 TX queue per Ethernet port. */
for (uint32_t i = 0; i < options.s_num_threads; i++) {
ret = rte_eth_tx_queue_setup(portid,
options.s_thr_info.at(i)->txqid, nb_txd,
rte_eth_dev_socket_id(portid), &txconf);
if (ret < 0)
return ret;
}
// set mtu
ret = rte_eth_dev_set_mtu(portid, options.port_mtu);
if (ret != 0)
return ret;
ret = rte_eth_dev_start(portid);
if (ret < 0)
return ret;
/* Display the port MAC address. */
struct rte_ether_addr addr {
};
ret = rte_eth_macaddr_get(portid, &addr);
// no promiscuous mode required
return ret;
}
static void
dump_options()
{
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"Configuration:\n"
" verbosity = +%d\n"
" run time = %d\n"
" num threads = %d\n"
" rage quit time = %ul\n"
" cpu mask = 0x%lx\n"
" slave mode = %d\n"
" interarrival dist = %s\n"
" workload dist = %s\n"
" qps = %d\n"
" host IP = 0x%x\n"
" depth = %u\n"
" packet loss time threshold = %u\n"
" jumbo frame = %d\n"
" packet pad size = %d\n",
ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING, options.run_time,
options.s_num_threads, options.rage_quit_time, options.cpu_mask,
options.slave_mode, options.ia_gen, options.ld_gen,
options.target_qps, options.s_host_spec.ip, options.depth,
options.pkt_loss_delay_ms, options.jumbo_frame_enabled,
options.pkt_pad_sz);
}
static void
usage()
{
fprintf(stdout,
"Usage:\n"
" -v(vv): verbose mode\n"
" -h: display the information\n"
" -t: run time\n"
" -s: server net spec\n"
" -S: slave(rat) mode\n"
" -A: affinity mask\n"
" -i: inter-arrival time distribution\n"
" -w: workload distribution\n"
" -r: rage quit time (in ms)\n"
" -q: target QPS\n"
" -H: host net spec\n"
" -D: max number of packets in flight\n"
" -l: packet loss time threshold\n"
" -J: enable jumbo frame\n"
" -P: pad load packets to this size\n");
}
int
main(int argc, char *argv[])
{
unsigned int nb_ports;
struct rte_mempool *mbuf_pool;
struct thread_info *tinfo;
bool has_host_spec = false;
ntr_init();
// init dpdk
int ret = rte_eal_init(argc, argv);
if (ret < 0) {
rte_exit(EXIT_FAILURE, "rte_eal_init failed!\n");
}
argc -= ret;
argv += ret;
// set warning level
ntr_set_level(NTR_DEP_USER1, NTR_LEVEL_WARNING);
{
int c;
// parse arguments
while ((c = getopt(
argc, argv, "vht:s:SA:i:w:r:q:H:D:l:JP:")) != -1) {
switch (c) {
case 'v':
ntr_set_level(NTR_DEP_USER1,
ntr_get_level(NTR_DEP_USER1) + 1);
break;
case 'h':
usage();
rte_exit(EXIT_SUCCESS, "\n");
case 't':
options.run_time = strtol(optarg, nullptr, 10);
break;
case 's':
if (str_to_netspec(
optarg, &options.server_spec) != 0) {
rte_exit(EXIT_FAILURE,
"invalid server net spec\n");
}
break;
case 'S':
options.slave_mode = 1;
options.s_state =
STATE_SYNC; // set state to wait for SYNC
break;
case 'A':
options.cpu_mask = strtoull(
optarg, nullptr, 16);
options.s_num_threads = cmask_get_num_cpus(
options.cpu_mask);
if (options.s_num_threads == 0) {
rte_exit(EXIT_FAILURE,
"invalid cpu mask 0x%lx\n",
options.cpu_mask);
}
break;
case 'i':
strncpy(options.ia_gen, optarg,
sizeof(options.ia_gen) - 1);
break;
case 'w':
strncpy(options.ld_gen, optarg,
sizeof(options.ld_gen) - 1);
break;
case 'r':
options.rage_quit_time = strtol(
optarg, nullptr, 10);
break;
case 'q':
options.target_qps = strtol(
optarg, nullptr, 10);
break;
case 'H':
has_host_spec = true;
if (str_to_netspec(
optarg, &options.s_host_spec) != 0) {
rte_exit(EXIT_FAILURE,
"invalid host net spec.\n");
}
break;
case 'D':
options.depth = strtol(optarg, nullptr, 10);
if (options.depth == 0) {
options.depth = UINT32_MAX;
}
break;
case 'l':
options.pkt_loss_delay_ms = strtol(
optarg, nullptr, 10);
if (options.pkt_loss_delay_ms == 0) {
options.pkt_loss_delay_ms = UINT32_MAX;
}
break;
case 'J':
options.jumbo_frame_enabled = true;
options.port_mtu = MAX_JUMBO_MTU;
break;
case 'P':
options.pkt_pad_sz = strtol(
optarg, nullptr, 10);
break;
default:
usage();
rte_exit(
EXIT_FAILURE, "unknown argument: %c\n", c);
}
}
}
if (options.pkt_pad_sz != 0 && options.pkt_pad_sz > mtu_to_pkt_size(options.port_mtu)) {
rte_exit(EXIT_FAILURE, "pkt_pad_sz is too large for mtu %d\n", options.port_mtu);
}
if (!has_host_spec) {
rte_exit(EXIT_FAILURE, "Must specify host IP.\n");
}
// init nm
if (nm_init(ntr_get_level(NTR_DEP_USER1) - NTR_LEVEL_WARNING) != 0) {
rte_exit(EXIT_FAILURE, "nm init failed!\n");
}
dump_options();
nb_ports = rte_eth_dev_count_avail();
if (nb_ports == 0) {
rte_exit(EXIT_FAILURE, "number of ports must be > 0\n");
}
uint16_t portid = rte_eth_find_next(0);
if (portid == RTE_MAX_ETHPORTS) {
rte_exit(EXIT_FAILURE, "cannot find an available port\n");
}
options.s_portid = portid;
if (rte_eth_macaddr_get(portid, &options.s_host_spec.mac_addr) != 0) {
rte_exit(EXIT_FAILURE, "cannot get mac address of port %d\n",
portid);
}
// create a mbuf memory pool on the socket
mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", MBUF_MAX_COUNT,
MBUF_CACHE_SIZE, 0,
options.jumbo_frame_enabled ?
RTE_MBUF_DEFAULT_BUF_SIZE + (MAX_JUMBO_MTU - MAX_STANDARD_MTU) :
RTE_MBUF_DEFAULT_BUF_SIZE,
rte_eth_dev_socket_id(options.s_portid));
if (mbuf_pool == nullptr) {
rte_exit(EXIT_FAILURE, "cannot create mbuf pool\n");
}
options.mbuf_pool = mbuf_pool;
uint64_t cmask = options.cpu_mask;
for (unsigned int i = 0; i < options.s_num_threads; i++) {
tinfo = new thread_info;
tinfo->ia_gen = createGenerator(options.ia_gen);
tinfo->load_gen = createGenerator(options.ld_gen);
if (tinfo->ia_gen == nullptr || tinfo->load_gen == nullptr) {
rte_exit(
EXIT_FAILURE, "invalid ia_gen or ld_gen string\n");
}
tinfo->ia_gen->set_lambda((double)options.target_qps /
(double)(options.s_num_threads));
tinfo->id = i;
tinfo->lcore_id = cmask_get_next_cpu(&cmask);
tinfo->rxqid = i;
tinfo->txqid = i;
options.s_thr_info.push_back(tinfo);
}
if (port_init(portid, mbuf_pool) != 0) {
rte_exit(EXIT_FAILURE, "cannot init port %d\n", portid);
}
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"Configured port %d with mac addr %x:%x:%x:%x:%x:%x\n", portid,
options.s_host_spec.mac_addr.addr_bytes[0],
options.s_host_spec.mac_addr.addr_bytes[1],
options.s_host_spec.mac_addr.addr_bytes[2],
options.s_host_spec.mac_addr.addr_bytes[3],
options.s_host_spec.mac_addr.addr_bytes[4],
options.s_host_spec.mac_addr.addr_bytes[5]);
sleep(INIT_DELAY);
for (unsigned int i = 0; i < options.s_num_threads; i++) {
tinfo = options.s_thr_info.at(i);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"main: launching thread %d on locore %d\n", tinfo->id,
tinfo->lcore_id);
if (rte_eal_remote_launch(locore_main,
(void *)options.s_thr_info.at(i),
tinfo->lcore_id) != 0) {
rte_exit(EXIT_FAILURE,
"failed to launch function on locore %d\n",
tinfo->lcore_id);
}
}
// poor man's timer
uint32_t second = 0;
// this loop exit is signaled by SYNC_FIN in slave mode and by itself in
// non slave mode
while (options.s_state.load() != STATE_FIN) {
if (options.slave_mode != 1) {
if (second >= options.run_time) {
options.s_state.store(STATE_FIN);
break;
}
usleep(1 * S2US);
second++;
}
}
for (unsigned int i = 0; i < options.s_num_threads; i++) {
tinfo = options.s_thr_info.at(i);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO,
"main: waiting for locore %d...\n", tinfo->lcore_id);
if (rte_eal_wait_lcore(tinfo->lcore_id) != 0) {
rte_exit(EXIT_FAILURE, "failed to wait for locore %d\n",
tinfo->lcore_id);
}
}
uint32_t qps;
2021-02-23 18:12:27 +00:00
uint32_t total_recv;
uint32_t total_loss;
2021-02-23 18:12:27 +00:00
calc_stats(nm_get_uptime_ns(), &qps, &total_recv, &total_loss);
ntr(NTR_DEP_USER1, NTR_LEVEL_INFO, "qps = %d, recv = %d, loss = %d\n",
qps, total_recv, total_loss);
for (auto each : options.s_thr_info) {
delete each->load_gen;
delete each->ia_gen;
delete each;
}
// clean up
rte_eth_dev_stop(portid);
return 0;
}