examples/l3fwd: fix using packet type blindly

As a example to use ptype info, l3fwd needs firstly to use
rte_eth_dev_get_supported_ptypes() API to check if device and/or
its PMD driver will parse and fill the needed packet type; if not,
use the newly added option, --parse-ptype, to analyze it in the
callback softly.

As the mode of EXACT_MATCH uses the 5 tuples to caculate hash, so
we narrow down its scope to:
  a. ip packets with no extensions, and
  b. L4 payload should be either tcp or udp.

Note: this patch does not completely solve the issue, "cannot run
l3fwd on virtio or other devices", because hw_ip_checksum may be
not supported by the devices. Currently we can:
  a. remove this requirements, or
  b. wait for virtio front end (pmd) to support it.

Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
This commit is contained in:
Jianfeng Tan 2016-03-25 08:47:46 +08:00 committed by Thomas Monjalon
parent a6b450805b
commit 71a7e2424e
9 changed files with 280 additions and 10 deletions

View File

@ -412,6 +412,11 @@ Examples
other than IPv4 or IPv6, the mbuf was not released, and caused
a memory leak.
* **l3fwd: Fixed using packet type blindly.**
l3fwd makes use of packet type information without even query if devices or PMDs
really set it. For those don't set ptypes, add an option to parse it softly.
* **examples/vhost: Fixed frequent mbuf allocation failure.**
vhost-switch often fails to allocate mbuf when dequeue from vring because it

View File

@ -92,7 +92,7 @@ The application has a number of command line options:
.. code-block:: console
./build/l3fwd [EAL options] -- -p PORTMASK [-P] --config(port,queue,lcore)[,(port,queue,lcore)] [--enable-jumbo [--max-pkt-len PKTLEN]] [--no-numa][--hash-entry-num][--ipv6]
./build/l3fwd [EAL options] -- -p PORTMASK [-P] --config(port,queue,lcore)[,(port,queue,lcore)] [--enable-jumbo [--max-pkt-len PKTLEN]] [--no-numa][--hash-entry-num][--ipv6] [--parse-ptype]
where,
@ -113,6 +113,8 @@ where,
* --ipv6: optional, set it if running ipv6 packets
* --parse-ptype: optional, set it if use software way to analyze packet type
For example, consider a dual processor socket platform where cores 0-7 and 16-23 appear on socket 0, while cores 8-15 and 24-31 appear on socket 1.
Let's say that the programmer wants to use memory from both NUMA nodes, the platform has only two ports, one connected to each NUMA node,
and the programmer wants to use two cores from each processor socket to do the packet processing.
@ -334,6 +336,8 @@ The key code snippet of simple_ipv4_fwd_4pkts() is shown below:
The simple_ipv6_fwd_4pkts() function is similar to the simple_ipv4_fwd_4pkts() function.
Known issue: IP packets with extensions or IP packets which are not TCP/UDP cannot work well at this mode.
Packet Forwarding for LPM-based Lookups
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -205,6 +205,20 @@ setup_lpm(const int socketid);
void
setup_hash(const int socketid);
int
em_check_ptype(int portid);
int
lpm_check_ptype(int portid);
uint16_t
em_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[],
uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
uint16_t
lpm_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[],
uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
int
em_main_loop(__attribute__((unused)) void *dummy);

View File

@ -42,6 +42,7 @@
#include <errno.h>
#include <getopt.h>
#include <stdbool.h>
#include <netinet/in.h>
#include <rte_debug.h>
#include <rte_ether.h>
@ -519,6 +520,114 @@ populate_ipv6_many_flow_into_table(const struct rte_hash *h,
printf("Hash: Adding 0x%x keys\n", nr_flow);
}
/* Requirements:
* 1. IP packets without extension;
* 2. L4 payload should be either TCP or UDP.
*/
int
em_check_ptype(int portid)
{
int i, ret;
int ptype_l3_ipv4_ext = 0;
int ptype_l3_ipv6_ext = 0;
int ptype_l4_tcp = 0;
int ptype_l4_udp = 0;
uint32_t ptype_mask = RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK;
ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
if (ret <= 0)
return 0;
uint32_t ptypes[ret];
ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
for (i = 0; i < ret; ++i) {
switch (ptypes[i]) {
case RTE_PTYPE_L3_IPV4_EXT:
ptype_l3_ipv4_ext = 1;
break;
case RTE_PTYPE_L3_IPV6_EXT:
ptype_l3_ipv6_ext = 1;
break;
case RTE_PTYPE_L4_TCP:
ptype_l4_tcp = 1;
break;
case RTE_PTYPE_L4_UDP:
ptype_l4_udp = 1;
break;
}
}
if (ptype_l3_ipv4_ext == 0)
printf("port %d cannot parse RTE_PTYPE_L3_IPV4_EXT\n", portid);
if (ptype_l3_ipv6_ext == 0)
printf("port %d cannot parse RTE_PTYPE_L3_IPV6_EXT\n", portid);
if (!ptype_l3_ipv4_ext || !ptype_l3_ipv6_ext)
return 0;
if (ptype_l4_tcp == 0)
printf("port %d cannot parse RTE_PTYPE_L4_TCP\n", portid);
if (ptype_l4_udp == 0)
printf("port %d cannot parse RTE_PTYPE_L4_UDP\n", portid);
if (ptype_l4_tcp && ptype_l4_udp)
return 1;
return 0;
}
static inline void
em_parse_ptype(struct rte_mbuf *m)
{
struct ether_hdr *eth_hdr;
uint32_t packet_type = RTE_PTYPE_UNKNOWN;
uint16_t ether_type;
void *l3;
int hdr_len;
struct ipv4_hdr *ipv4_hdr;
struct ipv6_hdr *ipv6_hdr;
eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
ether_type = eth_hdr->ether_type;
l3 = (uint8_t *)eth_hdr + sizeof(struct ether_hdr);
if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
ipv4_hdr = (struct ipv4_hdr *)l3;
hdr_len = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
IPV4_IHL_MULTIPLIER;
if (hdr_len == sizeof(struct ipv4_hdr)) {
packet_type |= RTE_PTYPE_L3_IPV4;
if (ipv4_hdr->next_proto_id == IPPROTO_TCP)
packet_type |= RTE_PTYPE_L4_TCP;
else if (ipv4_hdr->next_proto_id == IPPROTO_UDP)
packet_type |= RTE_PTYPE_L4_UDP;
} else
packet_type |= RTE_PTYPE_L3_IPV4_EXT;
} else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
ipv6_hdr = (struct ipv6_hdr *)l3;
if (ipv6_hdr->proto == IPPROTO_TCP)
packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP;
else if (ipv6_hdr->proto == IPPROTO_UDP)
packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP;
else
packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
}
m->packet_type = packet_type;
}
uint16_t
em_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused,
struct rte_mbuf *pkts[], uint16_t nb_pkts,
uint16_t max_pkts __rte_unused,
void *user_param __rte_unused)
{
unsigned i;
for (i = 0; i < nb_pkts; ++i)
em_parse_ptype(pkts[i]);
return nb_pkts;
}
/* main processing loop */
int
em_main_loop(__attribute__((unused)) void *dummy)

View File

@ -41,10 +41,14 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint8_t portid,
struct ether_hdr *eth_hdr;
struct ipv4_hdr *ipv4_hdr;
uint8_t dst_port;
uint32_t tcp_or_udp;
uint32_t l3_ptypes;
eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
tcp_or_udp = m->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
l3_ptypes = m->packet_type & RTE_PTYPE_L3_MASK;
if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
/* Handle IPv4 headers.*/
ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
sizeof(struct ether_hdr));
@ -56,7 +60,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint8_t portid,
return;
}
#endif
dst_port = em_get_ipv4_dst_port(ipv4_hdr, portid,
dst_port = em_get_ipv4_dst_port(ipv4_hdr, portid,
qconf->ipv4_lookup_struct);
if (dst_port >= RTE_MAX_ETHPORTS ||
@ -75,7 +79,7 @@ l3fwd_em_simple_forward(struct rte_mbuf *m, uint8_t portid,
ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
send_single_packet(qconf, m, dst_port);
} else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
} else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
/* Handle IPv6 headers.*/
struct ipv6_hdr *ipv6_hdr;

View File

@ -239,8 +239,13 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
uint8_t next_hop;
struct ipv4_hdr *ipv4_hdr;
struct ipv6_hdr *ipv6_hdr;
uint32_t tcp_or_udp;
uint32_t l3_ptypes;
if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
tcp_or_udp = pkt->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
l3_ptypes = pkt->packet_type & RTE_PTYPE_L3_MASK;
if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
/* Handle IPv4 headers.*/
ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *,
@ -255,7 +260,7 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
return next_hop;
} else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
} else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
/* Handle IPv6 headers.*/
ipv6_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *,
@ -304,11 +309,15 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
pkts_burst[j+6]->packet_type &
pkts_burst[j+7]->packet_type;
if (pkt_type & RTE_PTYPE_L3_IPV4) {
uint32_t l3_type = pkt_type & RTE_PTYPE_L3_MASK;
uint32_t tcp_or_udp = pkt_type &
(RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
em_get_dst_port_ipv4x8(qconf, &pkts_burst[j], portid, &dst_port[j]);
} else if (pkt_type & RTE_PTYPE_L3_IPV6) {
} else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
em_get_dst_port_ipv6x8(qconf, &pkts_burst[j], portid, &dst_port[j]);

View File

@ -52,8 +52,13 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
uint8_t next_hop;
struct ipv4_hdr *ipv4_hdr;
struct ipv6_hdr *ipv6_hdr;
uint32_t tcp_or_udp;
uint32_t l3_ptypes;
if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
tcp_or_udp = pkt->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
l3_ptypes = pkt->packet_type & RTE_PTYPE_L3_MASK;
if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
/* Handle IPv4 headers.*/
ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *,
@ -68,7 +73,7 @@ em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
return next_hop;
} else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
} else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
/* Handle IPv6 headers.*/
ipv6_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *,

View File

@ -277,6 +277,71 @@ setup_lpm(const int socketid)
}
}
int
lpm_check_ptype(int portid)
{
int i, ret;
int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
if (ret <= 0)
return 0;
uint32_t ptypes[ret];
ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
for (i = 0; i < ret; ++i) {
if (ptypes[i] & RTE_PTYPE_L3_IPV4)
ptype_l3_ipv4 = 1;
if (ptypes[i] & RTE_PTYPE_L3_IPV6)
ptype_l3_ipv6 = 1;
}
if (ptype_l3_ipv4 == 0)
printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
if (ptype_l3_ipv6 == 0)
printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
if (ptype_l3_ipv4 && ptype_l3_ipv6)
return 1;
return 0;
}
static inline void
lpm_parse_ptype(struct rte_mbuf *m)
{
struct ether_hdr *eth_hdr;
uint32_t packet_type = RTE_PTYPE_UNKNOWN;
uint16_t ether_type;
eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
ether_type = eth_hdr->ether_type;
if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
m->packet_type = packet_type;
}
uint16_t
lpm_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused,
struct rte_mbuf *pkts[], uint16_t nb_pkts,
uint16_t max_pkts __rte_unused,
void *user_param __rte_unused)
{
unsigned i;
for (i = 0; i < nb_pkts; ++i)
lpm_parse_ptype(pkts[i]);
return nb_pkts;
}
/* Return ipv4/ipv6 lpm fwd lookup struct. */
void *
lpm_get_ipv4_l3fwd_lookup_struct(const int socketid)

View File

@ -103,6 +103,8 @@ static int l3fwd_lpm_on;
static int l3fwd_em_on;
static int numa_on = 1; /**< NUMA is enabled by default. */
static int parse_ptype; /**< Parse packet type using rx callback, and */
/**< disabled by default */
/* Global variables. */
@ -172,6 +174,8 @@ static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
struct l3fwd_lkp_mode {
void (*setup)(int);
int (*check_ptype)(int);
rte_rx_callback_fn cb_parse_ptype;
int (*main_loop)(void *);
void* (*get_ipv4_lookup_struct)(int);
void* (*get_ipv6_lookup_struct)(int);
@ -181,6 +185,8 @@ static struct l3fwd_lkp_mode l3fwd_lkp;
static struct l3fwd_lkp_mode l3fwd_em_lkp = {
.setup = setup_hash,
.check_ptype = em_check_ptype,
.cb_parse_ptype = em_cb_parse_ptype,
.main_loop = em_main_loop,
.get_ipv4_lookup_struct = em_get_ipv4_l3fwd_lookup_struct,
.get_ipv6_lookup_struct = em_get_ipv6_l3fwd_lookup_struct,
@ -188,6 +194,8 @@ static struct l3fwd_lkp_mode l3fwd_em_lkp = {
static struct l3fwd_lkp_mode l3fwd_lpm_lkp = {
.setup = setup_lpm,
.check_ptype = lpm_check_ptype,
.cb_parse_ptype = lpm_cb_parse_ptype,
.main_loop = lpm_main_loop,
.get_ipv4_lookup_struct = lpm_get_ipv4_l3fwd_lookup_struct,
.get_ipv6_lookup_struct = lpm_get_ipv6_l3fwd_lookup_struct,
@ -461,6 +469,7 @@ parse_eth_dest(const char *optarg)
#define CMD_LINE_OPT_IPV6 "ipv6"
#define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
#define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
#define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
/*
* This expression is used to calculate the number of mbufs needed
@ -491,6 +500,7 @@ parse_args(int argc, char **argv)
{CMD_LINE_OPT_IPV6, 0, 0, 0},
{CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0},
{CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0},
{CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
{NULL, 0, 0, 0}
};
@ -617,6 +627,14 @@ parse_args(int argc, char **argv)
return -1;
}
}
if (!strncmp(lgopts[option_index].name,
CMD_LINE_OPT_PARSE_PTYPE,
sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
printf("soft parse-ptype is enabled\n");
parse_ptype = 1;
}
break;
default:
@ -784,6 +802,28 @@ signal_handler(int signum)
}
}
static int
prepare_ptype_parser(uint8_t portid, uint16_t queueid)
{
if (parse_ptype) {
printf("Port %d: softly parse packet type info\n", portid);
if (rte_eth_add_rx_callback(portid, queueid,
l3fwd_lkp.cb_parse_ptype,
NULL))
return 1;
printf("Failed to add rx callback: port=%d\n", portid);
return 0;
}
if (l3fwd_lkp.check_ptype(portid))
return 1;
printf("port %d cannot parse packet type, please add --%s\n",
portid, CMD_LINE_OPT_PARSE_PTYPE);
return 0;
}
int
main(int argc, char **argv)
{
@ -977,6 +1017,21 @@ main(int argc, char **argv)
rte_eth_promiscuous_enable(portid);
}
printf("\n");
for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
if (rte_lcore_is_enabled(lcore_id) == 0)
continue;
qconf = &lcore_conf[lcore_id];
for (queue = 0; queue < qconf->n_rx_queue; ++queue) {
portid = qconf->rx_queue_list[queue].port_id;
queueid = qconf->rx_queue_list[queue].queue_id;
if (prepare_ptype_parser(portid, queueid) == 0)
rte_exit(EXIT_FAILURE, "ptype check fails\n");
}
}
check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
ret = 0;