Yi Yang 1ca5e67408 gro: support UDP/IPv4
UDP/IPv4 GRO can help improve VM-to-VM UDP performance
when UFO or GSO is enabled in VM, GRO must be supported
if UFO or GSO is enabled, otherwise, performance can't
get big improvement if only GSO is there.

With this enabled in DPDK, OVS DPDK can leverage it
to improve VM-to-VM UDP performance, it will reassemble
UDP fragments immediate after they are received from
a physical NIC. It is very helpful in OVS DPDK VLAN use
case.

Signed-off-by: Yi Yang <yangyi01@inspur.com>
Acked-by: Jiayu Hu <jiayu.hu@intel.com>
2020-10-06 21:51:03 +02:00

434 lines
10 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2020 Inspur Corporation
*/
#include <rte_malloc.h>
#include <rte_mbuf.h>
#include <rte_cycles.h>
#include <rte_ethdev.h>
#include "gro_udp4.h"
void *
gro_udp4_tbl_create(uint16_t socket_id,
uint16_t max_flow_num,
uint16_t max_item_per_flow)
{
struct gro_udp4_tbl *tbl;
size_t size;
uint32_t entries_num, i;
entries_num = max_flow_num * max_item_per_flow;
entries_num = RTE_MIN(entries_num, GRO_UDP4_TBL_MAX_ITEM_NUM);
if (entries_num == 0)
return NULL;
tbl = rte_zmalloc_socket(__func__,
sizeof(struct gro_udp4_tbl),
RTE_CACHE_LINE_SIZE,
socket_id);
if (tbl == NULL)
return NULL;
size = sizeof(struct gro_udp4_item) * entries_num;
tbl->items = rte_zmalloc_socket(__func__,
size,
RTE_CACHE_LINE_SIZE,
socket_id);
if (tbl->items == NULL) {
rte_free(tbl);
return NULL;
}
tbl->max_item_num = entries_num;
size = sizeof(struct gro_udp4_flow) * entries_num;
tbl->flows = rte_zmalloc_socket(__func__,
size,
RTE_CACHE_LINE_SIZE,
socket_id);
if (tbl->flows == NULL) {
rte_free(tbl->items);
rte_free(tbl);
return NULL;
}
/* INVALID_ARRAY_INDEX indicates an empty flow */
for (i = 0; i < entries_num; i++)
tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
tbl->max_flow_num = entries_num;
return tbl;
}
void
gro_udp4_tbl_destroy(void *tbl)
{
struct gro_udp4_tbl *udp_tbl = tbl;
if (udp_tbl) {
rte_free(udp_tbl->items);
rte_free(udp_tbl->flows);
}
rte_free(udp_tbl);
}
static inline uint32_t
find_an_empty_item(struct gro_udp4_tbl *tbl)
{
uint32_t i;
uint32_t max_item_num = tbl->max_item_num;
for (i = 0; i < max_item_num; i++)
if (tbl->items[i].firstseg == NULL)
return i;
return INVALID_ARRAY_INDEX;
}
static inline uint32_t
find_an_empty_flow(struct gro_udp4_tbl *tbl)
{
uint32_t i;
uint32_t max_flow_num = tbl->max_flow_num;
for (i = 0; i < max_flow_num; i++)
if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
return i;
return INVALID_ARRAY_INDEX;
}
static inline uint32_t
insert_new_item(struct gro_udp4_tbl *tbl,
struct rte_mbuf *pkt,
uint64_t start_time,
uint32_t prev_idx,
uint16_t frag_offset,
uint8_t is_last_frag)
{
uint32_t item_idx;
item_idx = find_an_empty_item(tbl);
if (unlikely(item_idx == INVALID_ARRAY_INDEX))
return INVALID_ARRAY_INDEX;
tbl->items[item_idx].firstseg = pkt;
tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt);
tbl->items[item_idx].start_time = start_time;
tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
tbl->items[item_idx].frag_offset = frag_offset;
tbl->items[item_idx].is_last_frag = is_last_frag;
tbl->items[item_idx].nb_merged = 1;
tbl->item_num++;
/* if the previous packet exists, chain them together. */
if (prev_idx != INVALID_ARRAY_INDEX) {
tbl->items[item_idx].next_pkt_idx =
tbl->items[prev_idx].next_pkt_idx;
tbl->items[prev_idx].next_pkt_idx = item_idx;
}
return item_idx;
}
static inline uint32_t
delete_item(struct gro_udp4_tbl *tbl, uint32_t item_idx,
uint32_t prev_item_idx)
{
uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
/* NULL indicates an empty item */
tbl->items[item_idx].firstseg = NULL;
tbl->item_num--;
if (prev_item_idx != INVALID_ARRAY_INDEX)
tbl->items[prev_item_idx].next_pkt_idx = next_idx;
return next_idx;
}
static inline uint32_t
insert_new_flow(struct gro_udp4_tbl *tbl,
struct udp4_flow_key *src,
uint32_t item_idx)
{
struct udp4_flow_key *dst;
uint32_t flow_idx;
flow_idx = find_an_empty_flow(tbl);
if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
return INVALID_ARRAY_INDEX;
dst = &(tbl->flows[flow_idx].key);
rte_ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
rte_ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
dst->ip_src_addr = src->ip_src_addr;
dst->ip_dst_addr = src->ip_dst_addr;
dst->ip_id = src->ip_id;
tbl->flows[flow_idx].start_index = item_idx;
tbl->flow_num++;
return flow_idx;
}
/*
* update the packet length for the flushed packet.
*/
static inline void
update_header(struct gro_udp4_item *item)
{
struct rte_ipv4_hdr *ipv4_hdr;
struct rte_mbuf *pkt = item->firstseg;
uint16_t frag_offset;
ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
pkt->l2_len);
ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len -
pkt->l2_len);
/* Clear MF bit if it is last fragment */
if (item->is_last_frag) {
frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
ipv4_hdr->fragment_offset =
rte_cpu_to_be_16(frag_offset & ~RTE_IPV4_HDR_MF_FLAG);
}
}
int32_t
gro_udp4_reassemble(struct rte_mbuf *pkt,
struct gro_udp4_tbl *tbl,
uint64_t start_time)
{
struct rte_ether_hdr *eth_hdr;
struct rte_ipv4_hdr *ipv4_hdr;
uint16_t ip_dl;
uint16_t ip_id, hdr_len;
uint16_t frag_offset = 0;
uint8_t is_last_frag;
struct udp4_flow_key key;
uint32_t cur_idx, prev_idx, item_idx;
uint32_t i, max_flow_num, remaining_flow_num;
int cmp;
uint8_t find;
eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
hdr_len = pkt->l2_len + pkt->l3_len;
/*
* Don't process non-fragment packet.
*/
if (!is_ipv4_fragment(ipv4_hdr))
return -1;
/*
* Don't process the packet whose payload length is less than or
* equal to 0.
*/
if (pkt->pkt_len <= hdr_len)
return -1;
ip_dl = rte_be_to_cpu_16(ipv4_hdr->total_length);
if (ip_dl <= pkt->l3_len)
return -1;
ip_dl -= pkt->l3_len;
ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
is_last_frag = ((frag_offset & RTE_IPV4_HDR_MF_FLAG) == 0) ? 1 : 0;
frag_offset = (uint16_t)(frag_offset & RTE_IPV4_HDR_OFFSET_MASK) << 3;
rte_ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
rte_ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr));
key.ip_src_addr = ipv4_hdr->src_addr;
key.ip_dst_addr = ipv4_hdr->dst_addr;
key.ip_id = ip_id;
/* Search for a matched flow. */
max_flow_num = tbl->max_flow_num;
remaining_flow_num = tbl->flow_num;
find = 0;
for (i = 0; i < max_flow_num && remaining_flow_num; i++) {
if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
if (is_same_udp4_flow(tbl->flows[i].key, key)) {
find = 1;
break;
}
remaining_flow_num--;
}
}
/*
* Fail to find a matched flow. Insert a new flow and store the
* packet into the flow.
*/
if (find == 0) {
item_idx = insert_new_item(tbl, pkt, start_time,
INVALID_ARRAY_INDEX, frag_offset,
is_last_frag);
if (unlikely(item_idx == INVALID_ARRAY_INDEX))
return -1;
if (insert_new_flow(tbl, &key, item_idx) ==
INVALID_ARRAY_INDEX) {
/*
* Fail to insert a new flow, so delete the
* stored packet.
*/
delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
return -1;
}
return 0;
}
/*
* Check all packets in the flow and try to find a neighbor for
* the input packet.
*/
cur_idx = tbl->flows[i].start_index;
prev_idx = cur_idx;
do {
cmp = udp4_check_neighbor(&(tbl->items[cur_idx]),
frag_offset, ip_dl, 0);
if (cmp) {
if (merge_two_udp4_packets(&(tbl->items[cur_idx]),
pkt, cmp, frag_offset,
is_last_frag, 0))
return 1;
/*
* Fail to merge the two packets, as the packet
* length is greater than the max value. Store
* the packet into the flow.
*/
if (insert_new_item(tbl, pkt, start_time, prev_idx,
frag_offset, is_last_frag) ==
INVALID_ARRAY_INDEX)
return -1;
return 0;
}
/* Ensure inserted items are ordered by frag_offset */
if (frag_offset
< tbl->items[cur_idx].frag_offset) {
break;
}
prev_idx = cur_idx;
cur_idx = tbl->items[cur_idx].next_pkt_idx;
} while (cur_idx != INVALID_ARRAY_INDEX);
/* Fail to find a neighbor, so store the packet into the flow. */
if (cur_idx == tbl->flows[i].start_index) {
/* Insert it before the first packet of the flow */
item_idx = insert_new_item(tbl, pkt, start_time,
INVALID_ARRAY_INDEX, frag_offset,
is_last_frag);
if (unlikely(item_idx == INVALID_ARRAY_INDEX))
return -1;
tbl->items[item_idx].next_pkt_idx = cur_idx;
tbl->flows[i].start_index = item_idx;
} else {
if (insert_new_item(tbl, pkt, start_time, prev_idx,
frag_offset, is_last_frag)
== INVALID_ARRAY_INDEX)
return -1;
}
return 0;
}
static int
gro_udp4_merge_items(struct gro_udp4_tbl *tbl,
uint32_t start_idx)
{
uint16_t frag_offset;
uint8_t is_last_frag;
int16_t ip_dl;
struct rte_mbuf *pkt;
int cmp;
uint32_t item_idx;
uint16_t hdr_len;
item_idx = tbl->items[start_idx].next_pkt_idx;
while (item_idx != INVALID_ARRAY_INDEX) {
pkt = tbl->items[item_idx].firstseg;
hdr_len = pkt->l2_len + pkt->l3_len;
ip_dl = pkt->pkt_len - hdr_len;
frag_offset = tbl->items[item_idx].frag_offset;
is_last_frag = tbl->items[item_idx].is_last_frag;
cmp = udp4_check_neighbor(&(tbl->items[start_idx]),
frag_offset, ip_dl, 0);
if (cmp) {
if (merge_two_udp4_packets(
&(tbl->items[start_idx]),
pkt, cmp, frag_offset,
is_last_frag, 0)) {
item_idx = delete_item(tbl, item_idx,
INVALID_ARRAY_INDEX);
tbl->items[start_idx].next_pkt_idx
= item_idx;
} else
return 0;
} else
return 0;
}
return 0;
}
uint16_t
gro_udp4_tbl_timeout_flush(struct gro_udp4_tbl *tbl,
uint64_t flush_timestamp,
struct rte_mbuf **out,
uint16_t nb_out)
{
uint16_t k = 0;
uint32_t i, j;
uint32_t max_flow_num = tbl->max_flow_num;
for (i = 0; i < max_flow_num; i++) {
if (unlikely(tbl->flow_num == 0))
return k;
j = tbl->flows[i].start_index;
while (j != INVALID_ARRAY_INDEX) {
if (tbl->items[j].start_time <= flush_timestamp) {
gro_udp4_merge_items(tbl, j);
out[k++] = tbl->items[j].firstseg;
if (tbl->items[j].nb_merged > 1)
update_header(&(tbl->items[j]));
/*
* Delete the packet and get the next
* packet in the flow.
*/
j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
tbl->flows[i].start_index = j;
if (j == INVALID_ARRAY_INDEX)
tbl->flow_num--;
if (unlikely(k == nb_out))
return k;
} else
/*
* Flushing packets does not strictly follow
* timestamp. It does not flush left packets of
* the flow this time once it finds one item
* whose start_time is greater than
* flush_timestamp. So go to check other flows.
*/
break;
}
}
return k;
}
uint32_t
gro_udp4_tbl_pkt_count(void *tbl)
{
struct gro_udp4_tbl *gro_tbl = tbl;
if (gro_tbl)
return gro_tbl->item_num;
return 0;
}