gro: support VxLAN GRO

This patch adds a framework that allows GRO on tunneled packets.
Furthermore, it leverages that framework to provide GRO support for
VxLAN-encapsulated packets. Supported VxLAN packets must have an outer
IPv4 header, and contain an inner TCP/IPv4 packet.

VxLAN GRO doesn't check if input packets have correct checksums and
doesn't update checksums for output packets. Additionally, it assumes
the packets are complete (i.e., MF==0 && frag_off==0), when IP
fragmentation is possible (i.e., DF==0).

Signed-off-by: Jiayu Hu <jiayu.hu@intel.com>
Reviewed-by: Junjie Chen <junjie.j.chen@intel.com>
Tested-by: Lei Yao <lei.a.yao@intel.com>
This commit is contained in:
Jiayu Hu 2018-01-10 22:03:12 +08:00 committed by Thomas Monjalon
parent b52b61f046
commit 9e0b9d2ec0
8 changed files with 918 additions and 152 deletions

View File

@ -57,7 +57,9 @@ assumes the packets are complete (i.e., MF==0 && frag_off==0), when IP
fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
6864 to process the IPv4 ID field.
Currently, the GRO library provides GRO supports for TCP/IPv4 packets.
Currently, the GRO library provides GRO supports for TCP/IPv4 packets and
VxLAN packets which contain an outer IPv4 header and an inner TCP/IPv4
packet.
Two Sets of API
---------------
@ -108,7 +110,8 @@ Reassembly Algorithm
The reassembly algorithm is used for reassembling packets. In the GRO
library, different GRO types can use different algorithms. In this
section, we will introduce an algorithm, which is used by TCP/IPv4 GRO.
section, we will introduce an algorithm, which is used by TCP/IPv4 GRO
and VxLAN GRO.
Challenges
~~~~~~~~~~
@ -185,6 +188,30 @@ Header fields deciding if two packets are neighbors include:
- IPv4 ID. The IPv4 ID fields of the packets, whose DF bit is 0, should
be increased by 1.
VxLAN GRO
---------
The table structure used by VxLAN GRO, which is in charge of processing
VxLAN packets with an outer IPv4 header and inner TCP/IPv4 packet, is
similar with that of TCP/IPv4 GRO. Differently, the header fields used
to define a VxLAN flow include:
- outer source and destination: Ethernet and IP address, UDP port
- VxLAN header (VNI and flag)
- inner source and destination: Ethernet and IP address, TCP port
Header fields deciding if packets are neighbors include:
- outer IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
outer IPv4 header is 0, should be increased by 1.
- inner TCP sequence number
- inner IPv4 ID. The IPv4 ID fields of the packets, whose DF bit in the
inner IPv4 header is 0, should be increased by 1.
.. note::
We comply RFC 6864 to process the IPv4 ID field. Specifically,
we check IPv4 ID fields for the packets whose DF bit is 0 and

View File

@ -17,6 +17,7 @@ LIBABIVER := 1
# source files
SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c
SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c
SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c
# install this header file
SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h

View File

@ -6,8 +6,6 @@
#include <rte_mbuf.h>
#include <rte_cycles.h>
#include <rte_ethdev.h>
#include <rte_ip.h>
#include <rte_tcp.h>
#include "gro_tcp4.h"
@ -74,109 +72,6 @@ gro_tcp4_tbl_destroy(void *tbl)
rte_free(tcp_tbl);
}
/*
* merge two TCP/IPv4 packets without updating checksums.
* If cmp is larger than 0, append the new packet to the
* original packet. Otherwise, pre-pend the new packet to
* the original packet.
*/
static inline int
merge_two_tcp4_packets(struct gro_tcp4_item *item,
struct rte_mbuf *pkt,
int cmp,
uint32_t sent_seq,
uint16_t ip_id)
{
struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
uint16_t hdr_len;
if (cmp > 0) {
pkt_head = item->firstseg;
pkt_tail = pkt;
} else {
pkt_head = pkt;
pkt_tail = item->firstseg;
}
/* check if the IPv4 packet length is greater than the max value */
hdr_len = pkt_head->l2_len + pkt_head->l3_len + pkt_head->l4_len;
if (unlikely(pkt_head->pkt_len - pkt_head->l2_len + pkt_tail->pkt_len -
hdr_len > MAX_IPV4_PKT_LENGTH))
return 0;
/* remove the packet header for the tail packet */
rte_pktmbuf_adj(pkt_tail, hdr_len);
/* chain two packets together */
if (cmp > 0) {
item->lastseg->next = pkt;
item->lastseg = rte_pktmbuf_lastseg(pkt);
/* update IP ID to the larger value */
item->ip_id = ip_id;
} else {
lastseg = rte_pktmbuf_lastseg(pkt);
lastseg->next = item->firstseg;
item->firstseg = pkt;
/* update sent_seq to the smaller value */
item->sent_seq = sent_seq;
}
item->nb_merged++;
/* update mbuf metadata for the merged packet */
pkt_head->nb_segs += pkt_tail->nb_segs;
pkt_head->pkt_len += pkt_tail->pkt_len;
return 1;
}
/*
* Check if two TCP/IPv4 packets are neighbors.
*/
static inline int
check_seq_option(struct gro_tcp4_item *item,
struct tcp_hdr *tcph,
uint32_t sent_seq,
uint16_t ip_id,
uint16_t tcp_hl,
uint16_t tcp_dl,
uint8_t is_atomic)
{
struct rte_mbuf *pkt_orig = item->firstseg;
struct ipv4_hdr *iph_orig;
struct tcp_hdr *tcph_orig;
uint16_t len, tcp_hl_orig;
iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
pkt_orig->l2_len);
tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
tcp_hl_orig = pkt_orig->l4_len;
/* Check if TCP option fields equal */
len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct tcp_hdr);
if ((tcp_hl != tcp_hl_orig) ||
((len > 0) && (memcmp(tcph + 1, tcph_orig + 1,
len) != 0)))
return 0;
/* Don't merge packets whose DF bits are different */
if (unlikely(item->is_atomic ^ is_atomic))
return 0;
/* check if the two packets are neighbors */
len = pkt_orig->pkt_len - pkt_orig->l2_len - pkt_orig->l3_len -
tcp_hl_orig;
if ((sent_seq == item->sent_seq + len) && (is_atomic ||
(ip_id == item->ip_id + 1)))
/* append the new packet */
return 1;
else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
(ip_id + item->nb_merged == item->ip_id)))
/* pre-pend the new packet */
return -1;
return 0;
}
static inline uint32_t
find_an_empty_item(struct gro_tcp4_tbl *tbl)
{
@ -279,21 +174,6 @@ insert_new_flow(struct gro_tcp4_tbl *tbl,
return flow_idx;
}
/*
* Check if two TCP/IPv4 packets belong to the same flow.
*/
static inline int
is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
{
return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
(k1.ip_src_addr == k2.ip_src_addr) &&
(k1.ip_dst_addr == k2.ip_dst_addr) &&
(k1.recv_ack == k2.recv_ack) &&
(k1.src_port == k2.src_port) &&
(k1.dst_port == k2.dst_port));
}
/*
* update the packet length for the flushed packet.
*/
@ -407,11 +287,11 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
prev_idx = cur_idx;
do {
cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
sent_seq, ip_id, pkt->l4_len, tcp_dl,
sent_seq, ip_id, pkt->l4_len, tcp_dl, 0,
is_atomic);
if (cmp) {
if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
pkt, cmp, sent_seq, ip_id))
pkt, cmp, sent_seq, ip_id, 0))
return 1;
/*
* Fail to merge the two packets, as the packet

View File

@ -5,6 +5,9 @@
#ifndef _GRO_TCP4_H_
#define _GRO_TCP4_H_
#include <rte_ip.h>
#include <rte_tcp.h>
#define INVALID_ARRAY_INDEX 0xffffffffUL
#define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
@ -172,4 +175,127 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
* The number of packets in the table
*/
uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
/*
* Check if two TCP/IPv4 packets belong to the same flow.
*/
static inline int
is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
{
return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
(k1.ip_src_addr == k2.ip_src_addr) &&
(k1.ip_dst_addr == k2.ip_dst_addr) &&
(k1.recv_ack == k2.recv_ack) &&
(k1.src_port == k2.src_port) &&
(k1.dst_port == k2.dst_port));
}
/*
* Merge two TCP/IPv4 packets without updating checksums.
* If cmp is larger than 0, append the new packet to the
* original packet. Otherwise, pre-pend the new packet to
* the original packet.
*/
static inline int
merge_two_tcp4_packets(struct gro_tcp4_item *item,
struct rte_mbuf *pkt,
int cmp,
uint32_t sent_seq,
uint16_t ip_id,
uint16_t l2_offset)
{
struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
uint16_t hdr_len, l2_len;
if (cmp > 0) {
pkt_head = item->firstseg;
pkt_tail = pkt;
} else {
pkt_head = pkt;
pkt_tail = item->firstseg;
}
/* check if the IPv4 packet length is greater than the max value */
hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
pkt_head->l4_len;
l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
hdr_len > MAX_IPV4_PKT_LENGTH))
return 0;
/* remove the packet header for the tail packet */
rte_pktmbuf_adj(pkt_tail, hdr_len);
/* chain two packets together */
if (cmp > 0) {
item->lastseg->next = pkt;
item->lastseg = rte_pktmbuf_lastseg(pkt);
/* update IP ID to the larger value */
item->ip_id = ip_id;
} else {
lastseg = rte_pktmbuf_lastseg(pkt);
lastseg->next = item->firstseg;
item->firstseg = pkt;
/* update sent_seq to the smaller value */
item->sent_seq = sent_seq;
item->ip_id = ip_id;
}
item->nb_merged++;
/* update MBUF metadata for the merged packet */
pkt_head->nb_segs += pkt_tail->nb_segs;
pkt_head->pkt_len += pkt_tail->pkt_len;
return 1;
}
/*
* Check if two TCP/IPv4 packets are neighbors.
*/
static inline int
check_seq_option(struct gro_tcp4_item *item,
struct tcp_hdr *tcph,
uint32_t sent_seq,
uint16_t ip_id,
uint16_t tcp_hl,
uint16_t tcp_dl,
uint16_t l2_offset,
uint8_t is_atomic)
{
struct rte_mbuf *pkt_orig = item->firstseg;
struct ipv4_hdr *iph_orig;
struct tcp_hdr *tcph_orig;
uint16_t len, tcp_hl_orig;
iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
l2_offset + pkt_orig->l2_len);
tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
tcp_hl_orig = pkt_orig->l4_len;
/* Check if TCP option fields equal */
len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct tcp_hdr);
if ((tcp_hl != tcp_hl_orig) || ((len > 0) &&
(memcmp(tcph + 1, tcph_orig + 1,
len) != 0)))
return 0;
/* Don't merge packets whose DF bits are different */
if (unlikely(item->is_atomic ^ is_atomic))
return 0;
/* check if the two packets are neighbors */
len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
pkt_orig->l3_len - tcp_hl_orig;
if ((sent_seq == item->sent_seq + len) && (is_atomic ||
(ip_id == item->ip_id + 1)))
/* append the new packet */
return 1;
else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
(ip_id + item->nb_merged == item->ip_id)))
/* pre-pend the new packet */
return -1;
return 0;
}
#endif

View File

@ -0,0 +1,494 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2018 Intel Corporation
*/
#include <rte_malloc.h>
#include <rte_mbuf.h>
#include <rte_cycles.h>
#include <rte_ethdev.h>
#include <rte_udp.h>
#include "gro_vxlan_tcp4.h"
void *
gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
uint16_t max_flow_num,
uint16_t max_item_per_flow)
{
struct gro_vxlan_tcp4_tbl *tbl;
size_t size;
uint32_t entries_num, i;
entries_num = max_flow_num * max_item_per_flow;
entries_num = RTE_MIN(entries_num, GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM);
if (entries_num == 0)
return NULL;
tbl = rte_zmalloc_socket(__func__,
sizeof(struct gro_vxlan_tcp4_tbl),
RTE_CACHE_LINE_SIZE,
socket_id);
if (tbl == NULL)
return NULL;
size = sizeof(struct gro_vxlan_tcp4_item) * entries_num;
tbl->items = rte_zmalloc_socket(__func__,
size,
RTE_CACHE_LINE_SIZE,
socket_id);
if (tbl->items == NULL) {
rte_free(tbl);
return NULL;
}
tbl->max_item_num = entries_num;
size = sizeof(struct gro_vxlan_tcp4_flow) * entries_num;
tbl->flows = rte_zmalloc_socket(__func__,
size,
RTE_CACHE_LINE_SIZE,
socket_id);
if (tbl->flows == NULL) {
rte_free(tbl->items);
rte_free(tbl);
return NULL;
}
for (i = 0; i < entries_num; i++)
tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
tbl->max_flow_num = entries_num;
return tbl;
}
void
gro_vxlan_tcp4_tbl_destroy(void *tbl)
{
struct gro_vxlan_tcp4_tbl *vxlan_tbl = tbl;
if (vxlan_tbl) {
rte_free(vxlan_tbl->items);
rte_free(vxlan_tbl->flows);
}
rte_free(vxlan_tbl);
}
static inline uint32_t
find_an_empty_item(struct gro_vxlan_tcp4_tbl *tbl)
{
uint32_t max_item_num = tbl->max_item_num, i;
for (i = 0; i < max_item_num; i++)
if (tbl->items[i].inner_item.firstseg == NULL)
return i;
return INVALID_ARRAY_INDEX;
}
static inline uint32_t
find_an_empty_flow(struct gro_vxlan_tcp4_tbl *tbl)
{
uint32_t max_flow_num = tbl->max_flow_num, i;
for (i = 0; i < max_flow_num; i++)
if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
return i;
return INVALID_ARRAY_INDEX;
}
static inline uint32_t
insert_new_item(struct gro_vxlan_tcp4_tbl *tbl,
struct rte_mbuf *pkt,
uint64_t start_time,
uint32_t prev_idx,
uint32_t sent_seq,
uint16_t outer_ip_id,
uint16_t ip_id,
uint8_t outer_is_atomic,
uint8_t is_atomic)
{
uint32_t item_idx;
item_idx = find_an_empty_item(tbl);
if (unlikely(item_idx == INVALID_ARRAY_INDEX))
return INVALID_ARRAY_INDEX;
tbl->items[item_idx].inner_item.firstseg = pkt;
tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
tbl->items[item_idx].inner_item.start_time = start_time;
tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
tbl->items[item_idx].inner_item.sent_seq = sent_seq;
tbl->items[item_idx].inner_item.ip_id = ip_id;
tbl->items[item_idx].inner_item.nb_merged = 1;
tbl->items[item_idx].inner_item.is_atomic = is_atomic;
tbl->items[item_idx].outer_ip_id = outer_ip_id;
tbl->items[item_idx].outer_is_atomic = outer_is_atomic;
tbl->item_num++;
/* If the previous packet exists, chain the new one with it. */
if (prev_idx != INVALID_ARRAY_INDEX) {
tbl->items[item_idx].inner_item.next_pkt_idx =
tbl->items[prev_idx].inner_item.next_pkt_idx;
tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
}
return item_idx;
}
static inline uint32_t
delete_item(struct gro_vxlan_tcp4_tbl *tbl,
uint32_t item_idx,
uint32_t prev_item_idx)
{
uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
/* NULL indicates an empty item. */
tbl->items[item_idx].inner_item.firstseg = NULL;
tbl->item_num--;
if (prev_item_idx != INVALID_ARRAY_INDEX)
tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
return next_idx;
}
static inline uint32_t
insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl,
struct vxlan_tcp4_flow_key *src,
uint32_t item_idx)
{
struct vxlan_tcp4_flow_key *dst;
uint32_t flow_idx;
flow_idx = find_an_empty_flow(tbl);
if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
return INVALID_ARRAY_INDEX;
dst = &(tbl->flows[flow_idx].key);
ether_addr_copy(&(src->inner_key.eth_saddr),
&(dst->inner_key.eth_saddr));
ether_addr_copy(&(src->inner_key.eth_daddr),
&(dst->inner_key.eth_daddr));
dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
dst->inner_key.recv_ack = src->inner_key.recv_ack;
dst->inner_key.src_port = src->inner_key.src_port;
dst->inner_key.dst_port = src->inner_key.dst_port;
dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
dst->outer_ip_src_addr = src->outer_ip_src_addr;
dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
dst->outer_src_port = src->outer_src_port;
dst->outer_dst_port = src->outer_dst_port;
tbl->flows[flow_idx].start_index = item_idx;
tbl->flow_num++;
return flow_idx;
}
static inline int
is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1,
struct vxlan_tcp4_flow_key k2)
{
return (is_same_ether_addr(&k1.outer_eth_saddr, &k2.outer_eth_saddr) &&
is_same_ether_addr(&k1.outer_eth_daddr,
&k2.outer_eth_daddr) &&
(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
(k1.outer_src_port == k2.outer_src_port) &&
(k1.outer_dst_port == k2.outer_dst_port) &&
(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
is_same_tcp4_flow(k1.inner_key, k2.inner_key));
}
static inline int
check_vxlan_seq_option(struct gro_vxlan_tcp4_item *item,
struct tcp_hdr *tcp_hdr,
uint32_t sent_seq,
uint16_t outer_ip_id,
uint16_t ip_id,
uint16_t tcp_hl,
uint16_t tcp_dl,
uint8_t outer_is_atomic,
uint8_t is_atomic)
{
struct rte_mbuf *pkt = item->inner_item.firstseg;
int cmp;
uint16_t l2_offset;
/* Don't merge packets whose outer DF bits are different. */
if (unlikely(item->outer_is_atomic ^ outer_is_atomic))
return 0;
l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq, ip_id,
tcp_hl, tcp_dl, l2_offset, is_atomic);
if ((cmp > 0) && (outer_is_atomic ||
(outer_ip_id == item->outer_ip_id + 1)))
/* Append the new packet. */
return 1;
else if ((cmp < 0) && (outer_is_atomic ||
(outer_ip_id + item->inner_item.nb_merged ==
item->outer_ip_id)))
/* Prepend the new packet. */
return -1;
return 0;
}
static inline int
merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item,
struct rte_mbuf *pkt,
int cmp,
uint32_t sent_seq,
uint16_t outer_ip_id,
uint16_t ip_id)
{
if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq,
ip_id, pkt->outer_l2_len +
pkt->outer_l3_len)) {
/* Update the outer IPv4 ID to the large value. */
item->outer_ip_id = cmp > 0 ? outer_ip_id : item->outer_ip_id;
return 1;
}
return 0;
}
static inline void
update_vxlan_header(struct gro_vxlan_tcp4_item *item)
{
struct ipv4_hdr *ipv4_hdr;
struct udp_hdr *udp_hdr;
struct rte_mbuf *pkt = item->inner_item.firstseg;
uint16_t len;
/* Update the outer IPv4 header. */
len = pkt->pkt_len - pkt->outer_l2_len;
ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
pkt->outer_l2_len);
ipv4_hdr->total_length = rte_cpu_to_be_16(len);
/* Update the outer UDP header. */
len -= pkt->outer_l3_len;
udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
udp_hdr->dgram_len = rte_cpu_to_be_16(len);
/* Update the inner IPv4 header. */
len -= pkt->l2_len;
ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
ipv4_hdr->total_length = rte_cpu_to_be_16(len);
}
int32_t
gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
struct gro_vxlan_tcp4_tbl *tbl,
uint64_t start_time)
{
struct ether_hdr *outer_eth_hdr, *eth_hdr;
struct ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
struct tcp_hdr *tcp_hdr;
struct udp_hdr *udp_hdr;
struct vxlan_hdr *vxlan_hdr;
uint32_t sent_seq;
uint16_t tcp_dl, frag_off, outer_ip_id, ip_id;
uint8_t outer_is_atomic, is_atomic;
struct vxlan_tcp4_flow_key key;
uint32_t cur_idx, prev_idx, item_idx;
uint32_t i, max_flow_num, remaining_flow_num;
int cmp;
uint16_t hdr_len;
uint8_t find;
outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr +
pkt->outer_l2_len);
udp_hdr = (struct udp_hdr *)((char *)outer_ipv4_hdr +
pkt->outer_l3_len);
vxlan_hdr = (struct vxlan_hdr *)((char *)udp_hdr +
sizeof(struct udp_hdr));
eth_hdr = (struct ether_hdr *)((char *)vxlan_hdr +
sizeof(struct vxlan_hdr));
ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
/*
* Don't process the packet which has FIN, SYN, RST, PSH, URG,
* ECE or CWR set.
*/
if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
return -1;
hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
pkt->l3_len + pkt->l4_len;
/*
* Don't process the packet whose payload length is less than or
* equal to 0.
*/
tcp_dl = pkt->pkt_len - hdr_len;
if (tcp_dl <= 0)
return -1;
/*
* Save IPv4 ID for the packet whose DF bit is 0. For the packet
* whose DF bit is 1, IPv4 ID is ignored.
*/
frag_off = rte_be_to_cpu_16(outer_ipv4_hdr->fragment_offset);
outer_is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
outer_ip_id = outer_is_atomic ? 0 :
rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG;
ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id);
sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
key.inner_key.recv_ack = tcp_hdr->recv_ack;
key.inner_key.src_port = tcp_hdr->src_port;
key.inner_key.dst_port = tcp_hdr->dst_port;
key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
key.outer_src_port = udp_hdr->src_port;
key.outer_dst_port = udp_hdr->dst_port;
/* Search for a matched flow. */
max_flow_num = tbl->max_flow_num;
remaining_flow_num = tbl->flow_num;
find = 0;
for (i = 0; i < max_flow_num && remaining_flow_num; i++) {
if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
if (is_same_vxlan_tcp4_flow(tbl->flows[i].key, key)) {
find = 1;
break;
}
remaining_flow_num--;
}
}
/*
* Can't find a matched flow. Insert a new flow and store the
* packet into the flow.
*/
if (find == 0) {
item_idx = insert_new_item(tbl, pkt, start_time,
INVALID_ARRAY_INDEX, sent_seq, outer_ip_id,
ip_id, outer_is_atomic, is_atomic);
if (item_idx == INVALID_ARRAY_INDEX)
return -1;
if (insert_new_flow(tbl, &key, item_idx) ==
INVALID_ARRAY_INDEX) {
/*
* Fail to insert a new flow, so
* delete the inserted packet.
*/
delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
return -1;
}
return 0;
}
/* Check all packets in the flow and try to find a neighbor. */
cur_idx = tbl->flows[i].start_index;
prev_idx = cur_idx;
do {
cmp = check_vxlan_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
sent_seq, outer_ip_id, ip_id, pkt->l4_len,
tcp_dl, outer_is_atomic, is_atomic);
if (cmp) {
if (merge_two_vxlan_tcp4_packets(&(tbl->items[cur_idx]),
pkt, cmp, sent_seq,
outer_ip_id, ip_id))
return 1;
/*
* Can't merge two packets, as the packet
* length will be greater than the max value.
* Insert the packet into the flow.
*/
if (insert_new_item(tbl, pkt, start_time, prev_idx,
sent_seq, outer_ip_id,
ip_id, outer_is_atomic,
is_atomic) ==
INVALID_ARRAY_INDEX)
return -1;
return 0;
}
prev_idx = cur_idx;
cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
} while (cur_idx != INVALID_ARRAY_INDEX);
/* Can't find neighbor. Insert the packet into the flow. */
if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq,
outer_ip_id, ip_id, outer_is_atomic,
is_atomic) == INVALID_ARRAY_INDEX)
return -1;
return 0;
}
uint16_t
gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
uint64_t flush_timestamp,
struct rte_mbuf **out,
uint16_t nb_out)
{
uint16_t k = 0;
uint32_t i, j;
uint32_t max_flow_num = tbl->max_flow_num;
for (i = 0; i < max_flow_num; i++) {
if (unlikely(tbl->flow_num == 0))
return k;
j = tbl->flows[i].start_index;
while (j != INVALID_ARRAY_INDEX) {
if (tbl->items[j].inner_item.start_time <=
flush_timestamp) {
out[k++] = tbl->items[j].inner_item.firstseg;
if (tbl->items[j].inner_item.nb_merged > 1)
update_vxlan_header(&(tbl->items[j]));
/*
* Delete the item and get the next packet
* index.
*/
j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
tbl->flows[i].start_index = j;
if (j == INVALID_ARRAY_INDEX)
tbl->flow_num--;
if (unlikely(k == nb_out))
return k;
} else
/*
* The left packets in the flow won't be
* timeout. Go to check other flows.
*/
break;
}
}
return k;
}
uint32_t
gro_vxlan_tcp4_tbl_pkt_count(void *tbl)
{
struct gro_vxlan_tcp4_tbl *gro_tbl = tbl;
if (gro_tbl)
return gro_tbl->item_num;
return 0;
}

View File

@ -0,0 +1,156 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2018 Intel Corporation
*/
#ifndef _GRO_VXLAN_TCP4_H_
#define _GRO_VXLAN_TCP4_H_
#include "gro_tcp4.h"
#define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
/* Header fields representing a VxLAN flow */
struct vxlan_tcp4_flow_key {
struct tcp4_flow_key inner_key;
struct vxlan_hdr vxlan_hdr;
struct ether_addr outer_eth_saddr;
struct ether_addr outer_eth_daddr;
uint32_t outer_ip_src_addr;
uint32_t outer_ip_dst_addr;
/* Outer UDP ports */
uint16_t outer_src_port;
uint16_t outer_dst_port;
};
struct gro_vxlan_tcp4_flow {
struct vxlan_tcp4_flow_key key;
/*
* The index of the first packet in the flow. INVALID_ARRAY_INDEX
* indicates an empty flow.
*/
uint32_t start_index;
};
struct gro_vxlan_tcp4_item {
struct gro_tcp4_item inner_item;
/* IPv4 ID in the outer IPv4 header */
uint16_t outer_ip_id;
/* Indicate if outer IPv4 ID can be ignored */
uint8_t outer_is_atomic;
};
/*
* VxLAN (with an outer IPv4 header and an inner TCP/IPv4 packet)
* reassembly table structure
*/
struct gro_vxlan_tcp4_tbl {
/* item array */
struct gro_vxlan_tcp4_item *items;
/* flow array */
struct gro_vxlan_tcp4_flow *flows;
/* current item number */
uint32_t item_num;
/* current flow number */
uint32_t flow_num;
/* the maximum item number */
uint32_t max_item_num;
/* the maximum flow number */
uint32_t max_flow_num;
};
/**
* This function creates a VxLAN reassembly table for VxLAN packets
* which have an outer IPv4 header and an inner TCP/IPv4 packet.
*
* @param socket_id
* Socket index for allocating the table
* @param max_flow_num
* The maximum number of flows in the table
* @param max_item_per_flow
* The maximum number of packets per flow
*
* @return
* - Return the table pointer on success.
* - Return NULL on failure.
*/
void *gro_vxlan_tcp4_tbl_create(uint16_t socket_id,
uint16_t max_flow_num,
uint16_t max_item_per_flow);
/**
* This function destroys a VxLAN reassembly table.
*
* @param tbl
* Pointer pointing to the VxLAN reassembly table
*/
void gro_vxlan_tcp4_tbl_destroy(void *tbl);
/**
* This function merges a VxLAN packet which has an outer IPv4 header and
* an inner TCP/IPv4 packet. It doesn't process the packet, whose TCP
* header has SYN, FIN, RST, PSH, CWR, ECE or URG bit set, or which
* doesn't have payload.
*
* This function doesn't check if the packet has correct checksums and
* doesn't re-calculate checksums for the merged packet. Additionally,
* it assumes the packets are complete (i.e., MF==0 && frag_off==0), when
* IP fragmentation is possible (i.e., DF==0). It returns the packet, if
* the packet has invalid parameters (e.g. SYN bit is set) or there is no
* available space in the table.
*
* @param pkt
* Packet to reassemble
* @param tbl
* Pointer pointing to the VxLAN reassembly table
* @start_time
* The time when the packet is inserted into the table
*
* @return
* - Return a positive value if the packet is merged.
* - Return zero if the packet isn't merged but stored in the table.
* - Return a negative value for invalid parameters or no available
* space in the table.
*/
int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt,
struct gro_vxlan_tcp4_tbl *tbl,
uint64_t start_time);
/**
* This function flushes timeout packets in the VxLAN reassembly table,
* and without updating checksums.
*
* @param tbl
* Pointer pointing to a VxLAN GRO table
* @param flush_timestamp
* This function flushes packets which are inserted into the table
* before or at the flush_timestamp.
* @param out
* Pointer array used to keep flushed packets
* @param nb_out
* The element number in 'out'. It also determines the maximum number of
* packets that can be flushed finally.
*
* @return
* The number of flushed packets
*/
uint16_t gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl,
uint64_t flush_timestamp,
struct rte_mbuf **out,
uint16_t nb_out);
/**
* This function returns the number of the packets in a VxLAN
* reassembly table.
*
* @param tbl
* Pointer pointing to the VxLAN reassembly table
*
* @return
* The number of packets in the table
*/
uint32_t gro_vxlan_tcp4_tbl_pkt_count(void *tbl);
#endif

View File

@ -9,6 +9,7 @@
#include "rte_gro.h"
#include "gro_tcp4.h"
#include "gro_vxlan_tcp4.h"
typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
uint16_t max_flow_num,
@ -17,15 +18,28 @@ typedef void (*gro_tbl_destroy_fn)(void *tbl);
typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl);
static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
gro_tcp4_tbl_create, NULL};
gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL};
static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
gro_tcp4_tbl_destroy, NULL};
gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
NULL};
static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
gro_tcp4_tbl_pkt_count, NULL};
gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
NULL};
#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
#define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
RTE_PTYPE_TUNNEL_VXLAN) && \
((ptype & RTE_PTYPE_INNER_L4_TCP) == \
RTE_PTYPE_INNER_L4_TCP) && \
(((ptype & RTE_PTYPE_INNER_L3_MASK) & \
(RTE_PTYPE_INNER_L3_IPV4 | \
RTE_PTYPE_INNER_L3_IPV4_EXT | \
RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
/*
* GRO context structure. It keeps the table structures, which are
* used to merge packets, for different GRO types. Before using
@ -109,12 +123,20 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
/* Allocate a reassembly table for VXLAN GRO */
struct gro_vxlan_tcp4_tbl vxlan_tbl;
struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {
{{0}, 0, 0} };
struct rte_mbuf *unprocess_pkts[nb_pkts];
uint32_t item_num;
int32_t ret;
uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0;
if (unlikely((param->gro_types & RTE_GRO_TCP_IPV4) == 0))
if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
RTE_GRO_TCP_IPV4)) == 0))
return nb_pkts;
/* Get the maximum number of packets */
@ -122,22 +144,47 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
param->max_item_per_flow));
item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
for (i = 0; i < item_num; i++)
tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
for (i = 0; i < item_num; i++)
vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
tcp_tbl.flows = tcp_flows;
tcp_tbl.items = tcp_items;
tcp_tbl.flow_num = 0;
tcp_tbl.item_num = 0;
tcp_tbl.max_flow_num = item_num;
tcp_tbl.max_item_num = item_num;
vxlan_tbl.flows = vxlan_flows;
vxlan_tbl.items = vxlan_items;
vxlan_tbl.flow_num = 0;
vxlan_tbl.item_num = 0;
vxlan_tbl.max_flow_num = item_num;
vxlan_tbl.max_item_num = item_num;
do_vxlan_gro = 1;
}
if (param->gro_types & RTE_GRO_TCP_IPV4) {
for (i = 0; i < item_num; i++)
tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
tcp_tbl.flows = tcp_flows;
tcp_tbl.items = tcp_items;
tcp_tbl.flow_num = 0;
tcp_tbl.item_num = 0;
tcp_tbl.max_flow_num = item_num;
tcp_tbl.max_item_num = item_num;
do_tcp4_gro = 1;
}
for (i = 0; i < nb_pkts; i++) {
if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
/*
* The timestamp is ignored, since all packets
* will be flushed from the tables.
*/
/*
* The timestamp is ignored, since all packets
* will be flushed from the tables.
*/
if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
do_vxlan_gro) {
ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl, 0);
if (ret > 0)
/* Merge successfully */
nb_after_gro--;
else if (ret < 0)
unprocess_pkts[unprocess_num++] = pkts[i];
} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
do_tcp4_gro) {
ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0);
if (ret > 0)
/* merge successfully */
@ -149,8 +196,16 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
}
if (nb_after_gro < nb_pkts) {
i = 0;
/* Flush all packets from the tables */
i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, pkts, nb_pkts);
if (do_vxlan_gro) {
i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
0, pkts, nb_pkts);
}
if (do_tcp4_gro) {
i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
&pkts[i], nb_pkts - i);
}
/* Copy unprocessed packets */
if (unprocess_num > 0) {
memcpy(&pkts[i], unprocess_pkts,
@ -169,18 +224,33 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
{
struct rte_mbuf *unprocess_pkts[nb_pkts];
struct gro_ctx *gro_ctx = ctx;
void *tcp_tbl;
void *tcp_tbl, *vxlan_tbl;
uint64_t current_time;
uint16_t i, unprocess_num = 0;
uint8_t do_tcp4_gro, do_vxlan_gro;
if (unlikely((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0))
if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
RTE_GRO_TCP_IPV4)) == 0))
return nb_pkts;
tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
RTE_GRO_TCP_IPV4;
do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
RTE_GRO_IPV4_VXLAN_TCP_IPV4;
current_time = rte_rdtsc();
for (i = 0; i < nb_pkts; i++) {
if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
do_vxlan_gro) {
if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
current_time) < 0)
unprocess_pkts[unprocess_num++] = pkts[i];
} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
do_tcp4_gro) {
if (gro_tcp4_reassemble(pkts[i], tcp_tbl,
current_time) < 0)
unprocess_pkts[unprocess_num++] = pkts[i];
@ -204,18 +274,27 @@ rte_gro_timeout_flush(void *ctx,
{
struct gro_ctx *gro_ctx = ctx;
uint64_t flush_timestamp;
uint16_t num = 0;
gro_types = gro_types & gro_ctx->gro_types;
flush_timestamp = rte_rdtsc() - timeout_cycles;
if (gro_types & RTE_GRO_TCP_IPV4) {
return gro_tcp4_tbl_timeout_flush(
gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
flush_timestamp,
out, max_nb_out);
if (gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
num = gro_vxlan_tcp4_tbl_timeout_flush(gro_ctx->tbls[
RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX],
flush_timestamp, out, max_nb_out);
max_nb_out -= num;
}
return 0;
/* If no available space in 'out', stop flushing. */
if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
num += gro_tcp4_tbl_timeout_flush(
gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX],
flush_timestamp,
&out[num], max_nb_out);
}
return num;
}
uint64_t

View File

@ -23,12 +23,15 @@ extern "C" {
*/
#define RTE_GRO_TYPE_MAX_NUM 64
/**< the max number of supported GRO types */
#define RTE_GRO_TYPE_SUPPORT_NUM 1
#define RTE_GRO_TYPE_SUPPORT_NUM 2
/**< the number of currently supported GRO types */
#define RTE_GRO_TCP_IPV4_INDEX 0
#define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX)
/**< TCP/IPv4 GRO flag */
#define RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX 1
#define RTE_GRO_IPV4_VXLAN_TCP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX)
/**< VxLAN GRO flag. */
/**
* Structure used to create GRO context objects or used to pass