mbuf: generic support for TCP segmentation offload
Some of the NICs supported by DPDK have a possibility to accelerate TCP traffic by using segmentation offload. The application prepares a packet with valid TCP header with size up to 64K and deleguates the segmentation to the NIC. Implement the generic part of TCP segmentation offload in rte_mbuf. It introduces 2 new fields in rte_mbuf: l4_len (length of L4 header in bytes) and tso_segsz (MSS of packets). To delegate the TCP segmentation to the hardware, the user has to: - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies PKT_TX_TCP_CKSUM) - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 - set PKT_TX_IP_CKSUM if it's IPv4, and set the IP checksum to 0 in the packet - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz - calculate the pseudo header checksum without taking ip_len in account, and set it in the TCP header, for instance by using rte_ipv4_phdr_cksum(ip_hdr, ol_flags) The API is inspired from ixgbe hardware (the next commit adds the support for ixgbe), but it seems generic enough to be used for other hw/drivers in the future. This commit also reworks the way l2_len and l3_len are used in igb and ixgbe drivers as the l2_l3_len is not available anymore in mbuf. Signed-off-by: Mirek Walukiewicz <miroslaw.walukiewicz@intel.com> Signed-off-by: Olivier Matz <olivier.matz@6wind.com> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
This commit is contained in:
parent
6006818cfb
commit
4199fdea60
@ -408,7 +408,7 @@ testpmd_mbuf_ctor(struct rte_mempool *mp,
|
||||
mb->ol_flags = 0;
|
||||
mb->data_off = RTE_PKTMBUF_HEADROOM;
|
||||
mb->nb_segs = 1;
|
||||
mb->l2_l3_len = 0;
|
||||
mb->tx_offload = 0;
|
||||
mb->vlan_tci = 0;
|
||||
mb->hash.rss = 0;
|
||||
}
|
||||
|
@ -302,7 +302,7 @@ mcast_out_pkt(struct rte_mbuf *pkt, int use_clone)
|
||||
/* copy metadata from source packet*/
|
||||
hdr->port = pkt->port;
|
||||
hdr->vlan_tci = pkt->vlan_tci;
|
||||
hdr->l2_l3_len = pkt->l2_l3_len;
|
||||
hdr->tx_offload = pkt->tx_offload;
|
||||
hdr->hash = pkt->hash;
|
||||
|
||||
hdr->ol_flags = pkt->ol_flags;
|
||||
|
@ -241,6 +241,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
|
||||
case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
|
||||
case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
|
||||
case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
|
||||
case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
|
||||
* Copyright 2014 6WIND S.A.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -132,6 +133,20 @@ extern "C" {
|
||||
|
||||
#define PKT_TX_VLAN_PKT (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
|
||||
|
||||
/**
|
||||
* TCP segmentation offload. To enable this offload feature for a
|
||||
* packet to be transmitted on hardware supporting TSO:
|
||||
* - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
|
||||
* PKT_TX_TCP_CKSUM)
|
||||
* - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
|
||||
* to 0 in the packet
|
||||
* - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
|
||||
* - calculate the pseudo header checksum without taking ip_len in accound,
|
||||
* and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and
|
||||
* rte_ipv6_phdr_cksum() that can be used as helpers.
|
||||
*/
|
||||
#define PKT_TX_TCP_SEG (1ULL << 49)
|
||||
|
||||
/* Use final bit of flags to indicate a control mbuf */
|
||||
#define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */
|
||||
|
||||
@ -242,22 +257,18 @@ struct rte_mbuf {
|
||||
|
||||
/* fields to support TX offloads */
|
||||
union {
|
||||
uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */
|
||||
uint64_t tx_offload; /**< combined for easy fetch */
|
||||
struct {
|
||||
uint16_t l3_len:9; /**< L3 (IP) Header Length. */
|
||||
uint16_t l2_len:7; /**< L2 (MAC) Header Length. */
|
||||
};
|
||||
};
|
||||
uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
|
||||
uint64_t l3_len:9; /**< L3 (IP) Header Length. */
|
||||
uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
|
||||
uint64_t tso_segsz:16; /**< TCP TSO segment size */
|
||||
|
||||
/* fields for TX offloading of tunnels */
|
||||
union {
|
||||
uint16_t inner_l2_l3_len;
|
||||
/**< combined inner l2/l3 lengths as single var */
|
||||
struct {
|
||||
uint16_t inner_l3_len:9;
|
||||
/**< inner L3 (IP) Header Length. */
|
||||
uint16_t inner_l2_len:7;
|
||||
/**< inner L2 (MAC) Header Length. */
|
||||
/* fields for TX offloading of tunnels */
|
||||
uint64_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length. */
|
||||
uint64_t inner_l2_len:7; /**< inner L2 (MAC) Hdr Length. */
|
||||
|
||||
/* uint64_t unused:8; */
|
||||
};
|
||||
};
|
||||
} __rte_cache_aligned;
|
||||
@ -609,8 +620,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
|
||||
{
|
||||
m->next = NULL;
|
||||
m->pkt_len = 0;
|
||||
m->l2_l3_len = 0;
|
||||
m->inner_l2_l3_len = 0;
|
||||
m->tx_offload = 0;
|
||||
m->vlan_tci = 0;
|
||||
m->nb_segs = 1;
|
||||
m->port = 0xff;
|
||||
@ -679,8 +689,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md)
|
||||
mi->data_len = md->data_len;
|
||||
mi->port = md->port;
|
||||
mi->vlan_tci = md->vlan_tci;
|
||||
mi->l2_l3_len = md->l2_l3_len;
|
||||
mi->inner_l2_l3_len = md->inner_l2_l3_len;
|
||||
mi->tx_offload = md->tx_offload;
|
||||
mi->hash = md->hash;
|
||||
|
||||
mi->next = NULL;
|
||||
|
@ -81,6 +81,7 @@
|
||||
|
||||
#include <rte_memcpy.h>
|
||||
#include <rte_byteorder.h>
|
||||
#include <rte_mbuf.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -312,13 +313,21 @@ rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr)
|
||||
*
|
||||
* The checksum field must be set to 0 by the caller.
|
||||
*
|
||||
* Depending on the ol_flags, the pseudo-header checksum expected by the
|
||||
* drivers is not the same. For instance, when TSO is enabled, the IP
|
||||
* payload length must not be included in the packet.
|
||||
*
|
||||
* When ol_flags is 0, it computes the standard pseudo-header checksum.
|
||||
*
|
||||
* @param ipv4_hdr
|
||||
* The pointer to the contiguous IPv4 header.
|
||||
* @param ol_flags
|
||||
* The ol_flags of the associated mbuf.
|
||||
* @return
|
||||
* The non-complemented checksum to set in the L4 header.
|
||||
*/
|
||||
static inline uint16_t
|
||||
rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr)
|
||||
rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr, uint64_t ol_flags)
|
||||
{
|
||||
struct ipv4_psd_header {
|
||||
uint32_t src_addr; /* IP address of source host. */
|
||||
@ -332,9 +341,13 @@ rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr)
|
||||
psd_hdr.dst_addr = ipv4_hdr->dst_addr;
|
||||
psd_hdr.zero = 0;
|
||||
psd_hdr.proto = ipv4_hdr->next_proto_id;
|
||||
psd_hdr.len = rte_cpu_to_be_16(
|
||||
(uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length)
|
||||
- sizeof(struct ipv4_hdr)));
|
||||
if (ol_flags & PKT_TX_TCP_SEG) {
|
||||
psd_hdr.len = 0;
|
||||
} else {
|
||||
psd_hdr.len = rte_cpu_to_be_16(
|
||||
(uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length)
|
||||
- sizeof(struct ipv4_hdr)));
|
||||
}
|
||||
return rte_raw_cksum((const char *)&psd_hdr, sizeof(psd_hdr));
|
||||
}
|
||||
|
||||
@ -361,7 +374,7 @@ rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr)
|
||||
sizeof(struct ipv4_hdr);
|
||||
|
||||
cksum = rte_raw_cksum(l4_hdr, l4_len);
|
||||
cksum += rte_ipv4_phdr_cksum(ipv4_hdr);
|
||||
cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0);
|
||||
|
||||
cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff);
|
||||
cksum = (~cksum) & 0xffff;
|
||||
@ -386,13 +399,21 @@ struct ipv6_hdr {
|
||||
/**
|
||||
* Process the pseudo-header checksum of an IPv6 header.
|
||||
*
|
||||
* Depending on the ol_flags, the pseudo-header checksum expected by the
|
||||
* drivers is not the same. For instance, when TSO is enabled, the IPv6
|
||||
* payload length must not be included in the packet.
|
||||
*
|
||||
* When ol_flags is 0, it computes the standard pseudo-header checksum.
|
||||
*
|
||||
* @param ipv6_hdr
|
||||
* The pointer to the contiguous IPv6 header.
|
||||
* @param ol_flags
|
||||
* The ol_flags of the associated mbuf.
|
||||
* @return
|
||||
* The non-complemented checksum to set in the L4 header.
|
||||
*/
|
||||
static inline uint16_t
|
||||
rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr)
|
||||
rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr, uint64_t ol_flags)
|
||||
{
|
||||
struct ipv6_psd_header {
|
||||
uint8_t src_addr[16]; /* IP address of source host. */
|
||||
@ -404,7 +425,11 @@ rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr)
|
||||
rte_memcpy(&psd_hdr.src_addr, ipv6_hdr->src_addr,
|
||||
sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr));
|
||||
psd_hdr.proto = (ipv6_hdr->proto << 24);
|
||||
psd_hdr.len = ipv6_hdr->payload_len;
|
||||
if (ol_flags & PKT_TX_TCP_SEG) {
|
||||
psd_hdr.len = 0;
|
||||
} else {
|
||||
psd_hdr.len = ipv6_hdr->payload_len;
|
||||
}
|
||||
|
||||
return rte_raw_cksum((const char *)&psd_hdr, sizeof(psd_hdr));
|
||||
}
|
||||
|
@ -367,6 +367,13 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
|
||||
struct rte_mbuf *tx_pkt;
|
||||
struct rte_mbuf *m_seg;
|
||||
union igb_vlan_macip vlan_macip_lens;
|
||||
union {
|
||||
uint16_t u16;
|
||||
struct {
|
||||
uint16_t l3_len:9;
|
||||
uint16_t l2_len:7;
|
||||
};
|
||||
} l2_l3_len;
|
||||
uint64_t buf_dma_addr;
|
||||
uint32_t olinfo_status;
|
||||
uint32_t cmd_type_len;
|
||||
@ -404,8 +411,10 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
|
||||
tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
|
||||
|
||||
ol_flags = tx_pkt->ol_flags;
|
||||
l2_l3_len.l2_len = tx_pkt->l2_len;
|
||||
l2_l3_len.l3_len = tx_pkt->l3_len;
|
||||
vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
|
||||
vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
|
||||
vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
|
||||
tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
|
||||
|
||||
/* If a Context Descriptor need be built . */
|
||||
|
@ -546,6 +546,13 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
|
||||
struct rte_mbuf *tx_pkt;
|
||||
struct rte_mbuf *m_seg;
|
||||
union ixgbe_vlan_macip vlan_macip_lens;
|
||||
union {
|
||||
uint16_t u16;
|
||||
struct {
|
||||
uint16_t l3_len:9;
|
||||
uint16_t l2_len:7;
|
||||
};
|
||||
} l2_l3_len;
|
||||
uint64_t buf_dma_addr;
|
||||
uint32_t olinfo_status;
|
||||
uint32_t cmd_type_len;
|
||||
@ -588,8 +595,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
|
||||
/* If hardware offload required */
|
||||
tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
|
||||
if (tx_ol_req) {
|
||||
l2_l3_len.l2_len = tx_pkt->l2_len;
|
||||
l2_l3_len.l3_len = tx_pkt->l3_len;
|
||||
vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
|
||||
vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
|
||||
vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
|
||||
|
||||
/* If new context need be built or reuse the exist ctx. */
|
||||
ctx = what_advctx_update(txq, tx_ol_req,
|
||||
|
Loading…
x
Reference in New Issue
Block a user