examples/l3fwd: fix exact match performance
It seems that for the most use cases, previous hash_multi_lookup provides better performance, and more, sequential lookup can cause significant performance drop. This patch sets previously optional hash_multi_lookup method as default. It also provides some minor optimizations such as queue drain only on used tx ports. Fixes:94c54b4158
("examples/l3fwd: rework exact-match") Fixes:dc81ebbaca
("lpm: extend IPv4 next hop field") Fixes:64d3955de1
("examples/l3fwd: fix ARM build") Reported-by: Qian Xu <qian.q.xu@intel.com> Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
This commit is contained in:
parent
fdefa0a932
commit
52c97adc1f
@ -40,6 +40,10 @@
|
||||
|
||||
#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
|
||||
|
||||
#if !defined(NO_HASH_MULTI_LOOKUP) && defined(RTE_MACHINE_CPUFLAG_NEON)
|
||||
#define NO_HASH_MULTI_LOOKUP 1
|
||||
#endif
|
||||
|
||||
#define MAX_PKT_BURST 32
|
||||
#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
|
||||
|
||||
@ -86,6 +90,8 @@ struct lcore_rx_queue {
|
||||
struct lcore_conf {
|
||||
uint16_t n_rx_queue;
|
||||
struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
|
||||
uint16_t n_tx_port;
|
||||
uint16_t tx_port_id[RTE_MAX_ETHPORTS];
|
||||
uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
|
||||
struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
|
||||
void *ipv4_lookup_struct;
|
||||
|
@ -250,7 +250,7 @@ em_mask_key(void *key, xmm_t mask)
|
||||
|
||||
return _mm_and_si128(data, mask);
|
||||
}
|
||||
#elif defined(__ARM_NEON)
|
||||
#elif defined(RTE_MACHINE_CPUFLAG_NEON)
|
||||
static inline xmm_t
|
||||
em_mask_key(void *key, xmm_t mask)
|
||||
{
|
||||
@ -320,7 +320,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct)
|
||||
* buffer optimization i.e. ENABLE_MULTI_BUFFER_OPTIMIZE=1.
|
||||
*/
|
||||
#if defined(__SSE4_1__)
|
||||
#ifndef HASH_MULTI_LOOKUP
|
||||
#if defined(NO_HASH_MULTI_LOOKUP)
|
||||
#include "l3fwd_em_sse.h"
|
||||
#else
|
||||
#include "l3fwd_em_hlm_sse.h"
|
||||
@ -568,8 +568,8 @@ em_main_loop(__attribute__((unused)) void *dummy)
|
||||
diff_tsc = cur_tsc - prev_tsc;
|
||||
if (unlikely(diff_tsc > drain_tsc)) {
|
||||
|
||||
for (i = 0; i < qconf->n_rx_queue; i++) {
|
||||
portid = qconf->rx_queue_list[i].port_id;
|
||||
for (i = 0; i < qconf->n_tx_port; ++i) {
|
||||
portid = qconf->tx_port_id[i];
|
||||
if (qconf->tx_mbufs[portid].len == 0)
|
||||
continue;
|
||||
send_burst(qconf,
|
||||
|
@ -34,17 +34,9 @@
|
||||
#ifndef __L3FWD_EM_HLM_SSE_H__
|
||||
#define __L3FWD_EM_HLM_SSE_H__
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This is an optional implementation of packet classification in Exact-Match
|
||||
* path using rte_hash_lookup_multi method from previous implementation.
|
||||
* While sequential classification seems to be faster, it's disabled by default
|
||||
* and can be enabled with HASH_LOOKUP_MULTI global define in compilation time.
|
||||
*/
|
||||
|
||||
#include "l3fwd_sse.h"
|
||||
|
||||
static inline void
|
||||
static inline __attribute__((always_inline)) void
|
||||
em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
|
||||
uint8_t portid, uint32_t dst_port[8])
|
||||
{
|
||||
@ -168,7 +160,7 @@ get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
|
||||
key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
static inline __attribute__((always_inline)) void
|
||||
em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
|
||||
uint8_t portid, uint32_t dst_port[8])
|
||||
{
|
||||
@ -322,17 +314,17 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
|
||||
|
||||
} else {
|
||||
dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
|
||||
dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j], portid);
|
||||
dst_port[j+2] = em_get_dst_port(qconf, pkts_burst[j], portid);
|
||||
dst_port[j+3] = em_get_dst_port(qconf, pkts_burst[j], portid);
|
||||
dst_port[j+4] = em_get_dst_port(qconf, pkts_burst[j], portid);
|
||||
dst_port[j+5] = em_get_dst_port(qconf, pkts_burst[j], portid);
|
||||
dst_port[j+6] = em_get_dst_port(qconf, pkts_burst[j], portid);
|
||||
dst_port[j+7] = em_get_dst_port(qconf, pkts_burst[j], portid);
|
||||
dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j+1], portid);
|
||||
dst_port[j+2] = em_get_dst_port(qconf, pkts_burst[j+2], portid);
|
||||
dst_port[j+3] = em_get_dst_port(qconf, pkts_burst[j+3], portid);
|
||||
dst_port[j+4] = em_get_dst_port(qconf, pkts_burst[j+4], portid);
|
||||
dst_port[j+5] = em_get_dst_port(qconf, pkts_burst[j+5], portid);
|
||||
dst_port[j+6] = em_get_dst_port(qconf, pkts_burst[j+6], portid);
|
||||
dst_port[j+7] = em_get_dst_port(qconf, pkts_burst[j+7], portid);
|
||||
}
|
||||
}
|
||||
|
||||
for (; j < n; j++)
|
||||
for (; j < nb_rx; j++)
|
||||
dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
|
||||
|
||||
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
|
||||
|
@ -34,6 +34,15 @@
|
||||
#ifndef __L3FWD_EM_SSE_H__
|
||||
#define __L3FWD_EM_SSE_H__
|
||||
|
||||
/**
|
||||
* @file
|
||||
* This is an optional implementation of packet classification in Exact-Match
|
||||
* path using sequential packet classification method.
|
||||
* While hash lookup multi seems to provide better performance, it's disabled
|
||||
* by default and can be enabled with NO_HASH_LOOKUP_MULTI global define in
|
||||
* compilation time.
|
||||
*/
|
||||
|
||||
#include "l3fwd_sse.h"
|
||||
|
||||
static inline __attribute__((always_inline)) uint16_t
|
||||
|
@ -159,8 +159,8 @@ lpm_main_loop(__attribute__((unused)) void *dummy)
|
||||
diff_tsc = cur_tsc - prev_tsc;
|
||||
if (unlikely(diff_tsc > drain_tsc)) {
|
||||
|
||||
for (i = 0; i < qconf->n_rx_queue; i++) {
|
||||
portid = qconf->rx_queue_list[i].port_id;
|
||||
for (i = 0; i < qconf->n_tx_port; ++i) {
|
||||
portid = qconf->tx_port_id[i];
|
||||
if (qconf->tx_mbufs[portid].len == 0)
|
||||
continue;
|
||||
send_burst(qconf,
|
||||
|
@ -791,6 +791,7 @@ main(int argc, char **argv)
|
||||
unsigned lcore_id;
|
||||
uint32_t n_tx_queue, nb_lcores;
|
||||
uint8_t portid, nb_rx_queue, queue, socketid;
|
||||
uint8_t nb_tx_port;
|
||||
|
||||
/* init EAL */
|
||||
ret = rte_eal_init(argc, argv);
|
||||
@ -830,6 +831,7 @@ main(int argc, char **argv)
|
||||
rte_exit(EXIT_FAILURE, "check_port_config failed\n");
|
||||
|
||||
nb_lcores = rte_lcore_count();
|
||||
nb_tx_port = 0;
|
||||
|
||||
/* Setup function pointers for lookup method. */
|
||||
setup_l3fwd_lookup_tables();
|
||||
@ -906,8 +908,13 @@ main(int argc, char **argv)
|
||||
qconf = &lcore_conf[lcore_id];
|
||||
qconf->tx_queue_id[portid] = queueid;
|
||||
queueid++;
|
||||
|
||||
qconf->n_tx_port = nb_tx_port;
|
||||
qconf->tx_port_id[qconf->n_tx_port] = portid;
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
nb_tx_port++;
|
||||
}
|
||||
|
||||
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
|
||||
|
Loading…
Reference in New Issue
Block a user