examples/l3fwd-power: enable one-shot Rx interrupt and polling switch

The patch demonstrates how to handle per rx queue interrupt in a NAPI-like
implementation in userspace. The working thread mainly runs in polling mode
and switch to interrupt mode only if there is no packet received in recent polls.
The working thread returns to polling mode immediately once it receives an
interrupt notification caused by the incoming packets.
The sample keeps running in polling mode if the binding PMD hasn't supported
the rx interrupt yet. Now only ixgbe(pf/vf) and igb support it.

Signed-off-by: Danny Zhou <danny.zhou@intel.com>
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
This commit is contained in:
Cunming Liang 2015-07-20 11:02:29 +08:00 committed by Thomas Monjalon
parent c3cd3de0ab
commit aee3bc79cc

View File

@ -74,12 +74,14 @@
#include <rte_string_fns.h> #include <rte_string_fns.h>
#include <rte_timer.h> #include <rte_timer.h>
#include <rte_power.h> #include <rte_power.h>
#include <rte_eal.h>
#include <rte_spinlock.h>
#define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1 #define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1
#define MAX_PKT_BURST 32 #define MAX_PKT_BURST 32
#define MIN_ZERO_POLL_COUNT 5 #define MIN_ZERO_POLL_COUNT 10
/* around 100ms at 2 Ghz */ /* around 100ms at 2 Ghz */
#define TIMER_RESOLUTION_CYCLES 200000000ULL #define TIMER_RESOLUTION_CYCLES 200000000ULL
@ -153,6 +155,9 @@ static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
/* ethernet addresses of ports */ /* ethernet addresses of ports */
static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS]; static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
/* ethernet addresses of ports */
static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
/* mask of enabled ports */ /* mask of enabled ports */
static uint32_t enabled_port_mask = 0; static uint32_t enabled_port_mask = 0;
/* Ports set in promiscuous mode off by default. */ /* Ports set in promiscuous mode off by default. */
@ -185,6 +190,9 @@ struct lcore_rx_queue {
#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS #define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
#define MAX_RX_QUEUE_PER_PORT 128 #define MAX_RX_QUEUE_PER_PORT 128
#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
#define MAX_LCORE_PARAMS 1024 #define MAX_LCORE_PARAMS 1024
struct lcore_params { struct lcore_params {
uint8_t port_id; uint8_t port_id;
@ -223,11 +231,17 @@ static struct rte_eth_conf port_conf = {
.rx_adv_conf = { .rx_adv_conf = {
.rss_conf = { .rss_conf = {
.rss_key = NULL, .rss_key = NULL,
.rss_hf = ETH_RSS_IP, .rss_hf = ETH_RSS_UDP,
}, },
}, },
.txmode = { .txmode = {
.mq_mode = ETH_DCB_NONE, .mq_mode = ETH_MQ_TX_NONE,
},
.intr_conf = {
.lsc = 1,
#ifdef RTE_NEXT_ABI
.rxq = 1,
#endif
}, },
}; };
@ -399,19 +413,22 @@ power_timer_cb(__attribute__((unused)) struct rte_timer *tim,
/* accumulate total execution time in us when callback is invoked */ /* accumulate total execution time in us when callback is invoked */
sleep_time_ratio = (float)(stats[lcore_id].sleep_time) / sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
(float)SCALING_PERIOD; (float)SCALING_PERIOD;
/** /**
* check whether need to scale down frequency a step if it sleep a lot. * check whether need to scale down frequency a step if it sleep a lot.
*/ */
if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
if (rte_power_freq_down)
rte_power_freq_down(lcore_id); rte_power_freq_down(lcore_id);
}
else if ( (unsigned)(stats[lcore_id].nb_rx_processed / else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
/** /**
* scale down a step if average packet per iteration less * scale down a step if average packet per iteration less
* than expectation. * than expectation.
*/ */
if (rte_power_freq_down)
rte_power_freq_down(lcore_id); rte_power_freq_down(lcore_id);
}
/** /**
* initialize another timer according to current frequency to ensure * initialize another timer according to current frequency to ensure
@ -712,22 +729,20 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid,
} }
#define SLEEP_GEAR1_THRESHOLD 100 #define MINIMUM_SLEEP_TIME 1
#define SLEEP_GEAR2_THRESHOLD 1000 #define SUSPEND_THRESHOLD 300
static inline uint32_t static inline uint32_t
power_idle_heuristic(uint32_t zero_rx_packet_count) power_idle_heuristic(uint32_t zero_rx_packet_count)
{ {
/* If zero count is less than 100, use it as the sleep time in us */ /* If zero count is less than 100, sleep 1us */
if (zero_rx_packet_count < SLEEP_GEAR1_THRESHOLD) if (zero_rx_packet_count < SUSPEND_THRESHOLD)
return zero_rx_packet_count; return MINIMUM_SLEEP_TIME;
/* If zero count is less than 1000, sleep time should be 100 us */ /* If zero count is less than 1000, sleep 100 us which is the
else if ((zero_rx_packet_count >= SLEEP_GEAR1_THRESHOLD) && minimum latency switching from C3/C6 to C0
(zero_rx_packet_count < SLEEP_GEAR2_THRESHOLD)) */
return SLEEP_GEAR1_THRESHOLD; else
/* If zero count is greater than 1000, sleep time should be 1000 us */ return SUSPEND_THRESHOLD;
else if (zero_rx_packet_count >= SLEEP_GEAR2_THRESHOLD)
return SLEEP_GEAR2_THRESHOLD;
return 0; return 0;
} }
@ -767,6 +782,84 @@ power_freq_scaleup_heuristic(unsigned lcore_id,
return FREQ_CURRENT; return FREQ_CURRENT;
} }
/**
* force polling thread sleep until one-shot rx interrupt triggers
* @param port_id
* Port id.
* @param queue_id
* Rx queue id.
* @return
* 0 on success
*/
static int
sleep_until_rx_interrupt(int num)
{
struct rte_epoll_event event[num];
int n, i;
uint8_t port_id, queue_id;
void *data;
RTE_LOG(INFO, L3FWD_POWER,
"lcore %u sleeps until interrupt triggers\n",
rte_lcore_id());
n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, -1);
for (i = 0; i < n; i++) {
data = event[i].epdata.data;
port_id = ((uintptr_t)data) >> CHAR_BIT;
queue_id = ((uintptr_t)data) &
RTE_LEN2MASK(CHAR_BIT, uint8_t);
RTE_LOG(INFO, L3FWD_POWER,
"lcore %u is waked up from rx interrupt on"
" port %d queue %d\n",
rte_lcore_id(), port_id, queue_id);
}
return 0;
}
static void turn_on_intr(struct lcore_conf *qconf)
{
int i;
struct lcore_rx_queue *rx_queue;
uint8_t port_id, queue_id;
for (i = 0; i < qconf->n_rx_queue; ++i) {
rx_queue = &(qconf->rx_queue_list[i]);
port_id = rx_queue->port_id;
queue_id = rx_queue->queue_id;
rte_spinlock_lock(&(locks[port_id]));
rte_eth_dev_rx_intr_enable(port_id, queue_id);
rte_spinlock_unlock(&(locks[port_id]));
}
}
static int event_register(struct lcore_conf *qconf)
{
struct lcore_rx_queue *rx_queue;
uint8_t portid, queueid;
uint32_t data;
int ret;
int i;
for (i = 0; i < qconf->n_rx_queue; ++i) {
rx_queue = &(qconf->rx_queue_list[i]);
portid = rx_queue->port_id;
queueid = rx_queue->queue_id;
data = portid << CHAR_BIT | queueid;
ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid,
RTE_EPOLL_PER_THREAD,
RTE_INTR_EVENT_ADD,
(void *)((uintptr_t)data));
if (ret)
return ret;
}
return 0;
}
/* main processing loop */ /* main processing loop */
static int static int
main_loop(__attribute__((unused)) void *dummy) main_loop(__attribute__((unused)) void *dummy)
@ -780,9 +873,9 @@ main_loop(__attribute__((unused)) void *dummy)
struct lcore_conf *qconf; struct lcore_conf *qconf;
struct lcore_rx_queue *rx_queue; struct lcore_rx_queue *rx_queue;
enum freq_scale_hint_t lcore_scaleup_hint; enum freq_scale_hint_t lcore_scaleup_hint;
uint32_t lcore_rx_idle_count = 0; uint32_t lcore_rx_idle_count = 0;
uint32_t lcore_idle_hint = 0; uint32_t lcore_idle_hint = 0;
int intr_en = 0;
const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US; const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
@ -799,13 +892,18 @@ main_loop(__attribute__((unused)) void *dummy)
RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id); RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id);
for (i = 0; i < qconf->n_rx_queue; i++) { for (i = 0; i < qconf->n_rx_queue; i++) {
portid = qconf->rx_queue_list[i].port_id; portid = qconf->rx_queue_list[i].port_id;
queueid = qconf->rx_queue_list[i].queue_id; queueid = qconf->rx_queue_list[i].queue_id;
RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%hhu " RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%hhu "
"rxqueueid=%hhu\n", lcore_id, portid, queueid); "rxqueueid=%hhu\n", lcore_id, portid, queueid);
} }
/* add into event wait list */
if (event_register(qconf) == 0)
intr_en = 1;
else
RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n");
while (1) { while (1) {
stats[lcore_id].nb_iteration_looped++; stats[lcore_id].nb_iteration_looped++;
@ -840,6 +938,7 @@ main_loop(__attribute__((unused)) void *dummy)
prev_tsc_power = cur_tsc_power; prev_tsc_power = cur_tsc_power;
} }
start_rx:
/* /*
* Read packet from RX queues * Read packet from RX queues
*/ */
@ -853,6 +952,7 @@ main_loop(__attribute__((unused)) void *dummy)
nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
MAX_PKT_BURST); MAX_PKT_BURST);
stats[lcore_id].nb_rx_processed += nb_rx; stats[lcore_id].nb_rx_processed += nb_rx;
if (unlikely(nb_rx == 0)) { if (unlikely(nb_rx == 0)) {
/** /**
@ -915,10 +1015,13 @@ main_loop(__attribute__((unused)) void *dummy)
rx_queue->freq_up_hint; rx_queue->freq_up_hint;
} }
if (lcore_scaleup_hint == FREQ_HIGHEST) if (lcore_scaleup_hint == FREQ_HIGHEST) {
if (rte_power_freq_max)
rte_power_freq_max(lcore_id); rte_power_freq_max(lcore_id);
else if (lcore_scaleup_hint == FREQ_HIGHER) } else if (lcore_scaleup_hint == FREQ_HIGHER) {
if (rte_power_freq_up)
rte_power_freq_up(lcore_id); rte_power_freq_up(lcore_id);
}
} else { } else {
/** /**
* All Rx queues empty in recent consecutive polls, * All Rx queues empty in recent consecutive polls,
@ -933,16 +1036,23 @@ main_loop(__attribute__((unused)) void *dummy)
lcore_idle_hint = rx_queue->idle_hint; lcore_idle_hint = rx_queue->idle_hint;
} }
if ( lcore_idle_hint < SLEEP_GEAR1_THRESHOLD) if (lcore_idle_hint < SUSPEND_THRESHOLD)
/** /**
* execute "pause" instruction to avoid context * execute "pause" instruction to avoid context
* switch for short sleep. * switch which generally take hundred of
* microseconds for short sleep.
*/ */
rte_delay_us(lcore_idle_hint); rte_delay_us(lcore_idle_hint);
else else {
/* long sleep force runing thread to suspend */ /* suspend until rx interrupt trigges */
usleep(lcore_idle_hint); if (intr_en) {
turn_on_intr(qconf);
sleep_until_rx_interrupt(
qconf->n_rx_queue);
}
/* start receiving packets immediately */
goto start_rx;
}
stats[lcore_id].sleep_time += lcore_idle_hint; stats[lcore_id].sleep_time += lcore_idle_hint;
} }
} }
@ -1273,7 +1383,7 @@ setup_hash(int socketid)
char s[64]; char s[64];
/* create ipv4 hash */ /* create ipv4 hash */
snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid); rte_snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
ipv4_l3fwd_hash_params.name = s; ipv4_l3fwd_hash_params.name = s;
ipv4_l3fwd_hash_params.socket_id = socketid; ipv4_l3fwd_hash_params.socket_id = socketid;
ipv4_l3fwd_lookup_struct[socketid] = ipv4_l3fwd_lookup_struct[socketid] =
@ -1283,7 +1393,7 @@ setup_hash(int socketid)
"socket %d\n", socketid); "socket %d\n", socketid);
/* create ipv6 hash */ /* create ipv6 hash */
snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid); rte_snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
ipv6_l3fwd_hash_params.name = s; ipv6_l3fwd_hash_params.name = s;
ipv6_l3fwd_hash_params.socket_id = socketid; ipv6_l3fwd_hash_params.socket_id = socketid;
ipv6_l3fwd_lookup_struct[socketid] = ipv6_l3fwd_lookup_struct[socketid] =
@ -1477,6 +1587,7 @@ main(int argc, char **argv)
unsigned lcore_id; unsigned lcore_id;
uint64_t hz; uint64_t hz;
uint32_t n_tx_queue, nb_lcores; uint32_t n_tx_queue, nb_lcores;
uint32_t dev_rxq_num, dev_txq_num;
uint8_t portid, nb_rx_queue, queue, socketid; uint8_t portid, nb_rx_queue, queue, socketid;
/* catch SIGINT and restore cpufreq governor to ondemand */ /* catch SIGINT and restore cpufreq governor to ondemand */
@ -1526,10 +1637,19 @@ main(int argc, char **argv)
printf("Initializing port %d ... ", portid ); printf("Initializing port %d ... ", portid );
fflush(stdout); fflush(stdout);
rte_eth_dev_info_get(portid, &dev_info);
dev_rxq_num = dev_info.max_rx_queues;
dev_txq_num = dev_info.max_tx_queues;
nb_rx_queue = get_port_n_rx_queues(portid); nb_rx_queue = get_port_n_rx_queues(portid);
if (nb_rx_queue > dev_rxq_num)
rte_exit(EXIT_FAILURE,
"Cannot configure not existed rxq: "
"port=%d\n", portid);
n_tx_queue = nb_lcores; n_tx_queue = nb_lcores;
if (n_tx_queue > MAX_TX_QUEUE_PER_PORT) if (n_tx_queue > dev_txq_num)
n_tx_queue = MAX_TX_QUEUE_PER_PORT; n_tx_queue = dev_txq_num;
printf("Creating queues: nb_rxq=%d nb_txq=%u... ", printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
nb_rx_queue, (unsigned)n_tx_queue ); nb_rx_queue, (unsigned)n_tx_queue );
ret = rte_eth_dev_configure(portid, nb_rx_queue, ret = rte_eth_dev_configure(portid, nb_rx_queue,
@ -1553,6 +1673,9 @@ main(int argc, char **argv)
if (rte_lcore_is_enabled(lcore_id) == 0) if (rte_lcore_is_enabled(lcore_id) == 0)
continue; continue;
if (queueid >= dev_txq_num)
continue;
if (numa_on) if (numa_on)
socketid = \ socketid = \
(uint8_t)rte_lcore_to_socket_id(lcore_id); (uint8_t)rte_lcore_to_socket_id(lcore_id);
@ -1587,8 +1710,9 @@ main(int argc, char **argv)
/* init power management library */ /* init power management library */
ret = rte_power_init(lcore_id); ret = rte_power_init(lcore_id);
if (ret) if (ret)
rte_exit(EXIT_FAILURE, "Power management library " rte_log(RTE_LOG_ERR, RTE_LOGTYPE_POWER,
"initialization failed on core%u\n", lcore_id); "Power management library initialization "
"failed on core%u", lcore_id);
/* init timer structures for each enabled lcore */ /* init timer structures for each enabled lcore */
rte_timer_init(&power_timers[lcore_id]); rte_timer_init(&power_timers[lcore_id]);
@ -1636,7 +1760,6 @@ main(int argc, char **argv)
if (ret < 0) if (ret < 0)
rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, " rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
"port=%d\n", ret, portid); "port=%d\n", ret, portid);
/* /*
* If enabled, put device in promiscuous mode. * If enabled, put device in promiscuous mode.
* This allows IO forwarding mode to forward packets * This allows IO forwarding mode to forward packets
@ -1645,6 +1768,8 @@ main(int argc, char **argv)
*/ */
if (promiscuous_on) if (promiscuous_on)
rte_eth_promiscuous_enable(portid); rte_eth_promiscuous_enable(portid);
/* initialize spinlock for each port */
rte_spinlock_init(&(locks[portid]));
} }
check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask); check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);