numam-dpdk/lib/librte_ip_frag/rte_ipv4_reassembly.c

174 lines
4.8 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2014 Intel Corporation
*/
#include <stddef.h>
#include <rte_debug.h>
#include "ip_frag_common.h"
/*
* Reassemble fragments into one packet.
*/
struct rte_mbuf *
ipv4_frag_reassemble(struct ip_frag_pkt *fp)
{
struct rte_ipv4_hdr *ip_hdr;
struct rte_mbuf *m, *prev;
uint32_t i, n, ofs, first_len;
uint32_t curr_idx = 0;
first_len = fp->frags[IP_FIRST_FRAG_IDX].len;
n = fp->last_idx - 1;
/*start from the last fragment. */
m = fp->frags[IP_LAST_FRAG_IDX].mb;
ofs = fp->frags[IP_LAST_FRAG_IDX].ofs;
curr_idx = IP_LAST_FRAG_IDX;
while (ofs != first_len) {
prev = m;
for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) {
/* previous fragment found. */
if(fp->frags[i].ofs + fp->frags[i].len == ofs) {
RTE_ASSERT(curr_idx != i);
/* adjust start of the last fragment data. */
rte_pktmbuf_adj(m,
(uint16_t)(m->l2_len + m->l3_len));
rte_pktmbuf_chain(fp->frags[i].mb, m);
/* this mbuf should not be accessed directly */
fp->frags[curr_idx].mb = NULL;
curr_idx = i;
/* update our last fragment and offset. */
m = fp->frags[i].mb;
ofs = fp->frags[i].ofs;
}
}
/* error - hole in the packet. */
if (m == prev) {
return NULL;
}
}
/* chain with the first fragment. */
rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
fp->frags[curr_idx].mb = NULL;
m = fp->frags[IP_FIRST_FRAG_IDX].mb;
fp->frags[IP_FIRST_FRAG_IDX].mb = NULL;
/* update ipv4 header for the reassembled packet */
ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, m->l2_len);
ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size +
mbuf: flatten struct vlan_macip The vlan_macip structure combined a vlan tag id with l2 and l3 headers lengths for tracking offloads. However, this structure was only used as a unit by the e1000 and ixgbe drivers, not generally. This patch removes the structure from the mbuf header and places the fields into the mbuf structure directly at the required point, without any net effect on the structure layout. This allows us to treat the vlan tags and header length fields as separate for future mbuf changes. The drivers which were written to use the combined structure still do so, using a driver-local definition of it. Reduce perf regression caused by splitting vlan_macip field. This is done by providing a single uint16_t value to allow writing/clearing the l2 and l3 lengths together. There is still a small perf hit to the slow path TX due to the reads from vlan_tci and l2/l3 lengths being separated. (<5% in my tests with testpmd with no extra params). Unfortunately, this cannot be eliminated, without restoring the vlan tags and l2/l3 lengths as a combined 32-bit field. This would prevent us from ever looking to move those fields about and is an artificial tie that applies only for performance in igb and ixgbe drivers. Therefore, this patch keeps the vlan_tci field separate from the lengths as the best solution going forward. Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> Acked-by: Olivier Matz <olivier.matz@6wind.com> Acked-by: Pablo de Lara <pablo.de.lara.guarch@intel.com>
2014-09-09 14:40:56 +00:00
m->l3_len));
ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset &
net: add rte prefix to IP defines Add 'RTE_' prefix to defines: - rename IPv4( as RTE_IPv4(. - rename IPV4_MAX_PKT_LEN as RTE_IPV4_MAX_PKT_LEN. - rename IPV4_HDR_IHL_MASK as RTE_IPV4_HDR_IHL_MASK. - rename IPV4_IHL_MULTIPLIER as RTE_IPV4_IHL_MULTIPLIER. - rename IPV4_HDR_DF_SHIFT as RTE_IPV4_HDR_DF_SHIFT. - rename IPV4_HDR_MF_SHIFT as RTE_IPV4_HDR_MF_SHIFT. - rename IPV4_HDR_FO_SHIFT as RTE_IPV4_HDR_FO_SHIFT. - rename IPV4_HDR_DF_FLAG as RTE_IPV4_HDR_DF_FLAG. - rename IPV4_HDR_MF_FLAG as RTE_IPV4_HDR_MF_FLAG. - rename IPV4_HDR_OFFSET_MASK as RTE_IPV4_HDR_OFFSET_MASK. - rename IPV4_HDR_OFFSET_UNITS as RTE_IPV4_HDR_OFFSET_UNITS. - rename IPV4_ANY as RTE_IPV4_ANY. - rename IPV4_LOOPBACK as RTE_IPV4_LOOPBACK. - rename IPV4_BROADCAST as RTE_IPV4_BROADCAST. - rename IPV4_ALLHOSTS_GROUP as RTE_IPV4_ALLHOSTS_GROUP. - rename IPV4_ALLRTRS_GROUP as RTE_IPV4_ALLRTRS_GROUP. - rename IPV4_MAX_LOCAL_GROUP as RTE_IPV4_MAX_LOCAL_GROUP. - rename IPV4_MIN_MCAST as RTE_IPV4_MIN_MCAST. - rename IPV4_MAX_MCAST as RTE_IPV4_MAX_MCAST. - rename IS_IPV4_MCAST as RTE_IS_IPV4_MCAST. - rename IPV6_HDR_FL_SHIFT as RTE_IPV6_HDR_FL_SHIFT. - rename IPV6_HDR_TC_SHIFT as RTE_IPV6_HDR_TC_SHIFT. - rename IPV6_HDR_FL_MASK as RTE_IPV6_HDR_FL_MASK. - rename IPV6_HDR_TC_MASK as RTE_IPV6_HDR_TC_MASK. Signed-off-by: Olivier Matz <olivier.matz@6wind.com> Reviewed-by: Stephen Hemminger <stephen@networkplumber.org> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
2019-05-21 16:13:11 +00:00
rte_cpu_to_be_16(RTE_IPV4_HDR_DF_FLAG));
ip_hdr->hdr_checksum = 0;
return m;
}
/*
* Process new mbuf with fragment of IPV4 packet.
* Incoming mbuf should have it's l2_len/l3_len fields setuped correclty.
* @param tbl
* Table where to lookup/add the fragmented packet.
* @param mb
* Incoming mbuf with IPV4 fragment.
* @param tms
* Fragment arrival timestamp.
* @param ip_hdr
* Pointer to the IPV4 header inside the fragment.
* @return
* Pointer to mbuf for reassembled packet, or NULL if:
* - an error occurred.
* - not all fragments of the packet are collected yet.
*/
struct rte_mbuf *
rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
struct rte_ipv4_hdr *ip_hdr)
{
struct ip_frag_pkt *fp;
struct ip_frag_key key;
const unaligned_uint64_t *psd;
uint16_t flag_offset, ip_ofs, ip_flag;
int32_t ip_len;
ip_frag: remove padding length of fragment In some situations, we would get several ip fragments, which total data length is less than min_ip_len(64) and padding with zeros. We simulated intermediate fragments by modifying the MTU. To illustrate the problem, we simplify the packet format and ignore the impact of the packet header.In namespace2, a packet whose data length is 1520 is sent. When the packet passes tap2, the packet is divided into two fragments: fragment A and B, similar to (1520 = 1510 + 10). When the packet passes tap3, the larger fragment packet A is divided into two fragments A1 and A2, similar to (1510 = 1500 + 10). Finally, the bond interface receives three fragments: A1, A2, and B (1520 = 1500 + 10 + 10). One fragmented packet A2 is smaller than the minimum Ethernet frame length, so it needs to be padded. |---------------------------------------------------| | HOST | | |--------------| |----------------------------| | | | ns2 | | |--------------| | | | | |--------| | | |--------| |--------| | | | | | tap1 | | | | tap2 | ns1| tap3 | | | | | |mtu=1510| | | |mtu=1510| |mtu=1500| | | | |--|1.1.1.1 |--| |--|1.1.1.2 |----|2.1.1.1 |--| | | |--------| |--------| |--------| | | | | | | | |-----------------| | | | | | | |--------| | | | bond | | |--------------------------------------|mtu=1500|---| |--------| When processing the preceding packets above, DPDK would aggregate fragmented packets A2 and B. And error packets are generated, which padding(zero) is displayed in the middle of the packet. A2 + B: 0000 fa 16 3e 9f fb 82 fa 47 b2 57 dc 20 08 00 45 00 0010 00 33 b4 66 00 ba 3f 01 c1 a5 01 01 01 01 02 01 0020 01 02 c0 c1 c2 c3 c4 c5 c6 c7 00 00 00 00 00 00 0030 00 00 00 00 00 00 00 00 00 00 00 00 c8 c9 ca cb 0040 cc cd ce cf d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db 0050 dc dd de df e0 e1 e2 e3 e4 e5 e6 So, we would calculate the length of padding, and remove the padding in pkt_len and data_len before aggregation. And also we have the fix for both ipv4 and ipv6. Fixes: 7f0983ee331c ("ip_frag: check fragment length of incoming packet") Cc: stable@dpdk.org Signed-off-by: Yicai Lu <luyicai@huawei.com> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
2020-12-16 13:36:30 +00:00
int32_t trim;
flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset);
net: add rte prefix to IP defines Add 'RTE_' prefix to defines: - rename IPv4( as RTE_IPv4(. - rename IPV4_MAX_PKT_LEN as RTE_IPV4_MAX_PKT_LEN. - rename IPV4_HDR_IHL_MASK as RTE_IPV4_HDR_IHL_MASK. - rename IPV4_IHL_MULTIPLIER as RTE_IPV4_IHL_MULTIPLIER. - rename IPV4_HDR_DF_SHIFT as RTE_IPV4_HDR_DF_SHIFT. - rename IPV4_HDR_MF_SHIFT as RTE_IPV4_HDR_MF_SHIFT. - rename IPV4_HDR_FO_SHIFT as RTE_IPV4_HDR_FO_SHIFT. - rename IPV4_HDR_DF_FLAG as RTE_IPV4_HDR_DF_FLAG. - rename IPV4_HDR_MF_FLAG as RTE_IPV4_HDR_MF_FLAG. - rename IPV4_HDR_OFFSET_MASK as RTE_IPV4_HDR_OFFSET_MASK. - rename IPV4_HDR_OFFSET_UNITS as RTE_IPV4_HDR_OFFSET_UNITS. - rename IPV4_ANY as RTE_IPV4_ANY. - rename IPV4_LOOPBACK as RTE_IPV4_LOOPBACK. - rename IPV4_BROADCAST as RTE_IPV4_BROADCAST. - rename IPV4_ALLHOSTS_GROUP as RTE_IPV4_ALLHOSTS_GROUP. - rename IPV4_ALLRTRS_GROUP as RTE_IPV4_ALLRTRS_GROUP. - rename IPV4_MAX_LOCAL_GROUP as RTE_IPV4_MAX_LOCAL_GROUP. - rename IPV4_MIN_MCAST as RTE_IPV4_MIN_MCAST. - rename IPV4_MAX_MCAST as RTE_IPV4_MAX_MCAST. - rename IS_IPV4_MCAST as RTE_IS_IPV4_MCAST. - rename IPV6_HDR_FL_SHIFT as RTE_IPV6_HDR_FL_SHIFT. - rename IPV6_HDR_TC_SHIFT as RTE_IPV6_HDR_TC_SHIFT. - rename IPV6_HDR_FL_MASK as RTE_IPV6_HDR_FL_MASK. - rename IPV6_HDR_TC_MASK as RTE_IPV6_HDR_TC_MASK. Signed-off-by: Olivier Matz <olivier.matz@6wind.com> Reviewed-by: Stephen Hemminger <stephen@networkplumber.org> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
2019-05-21 16:13:11 +00:00
ip_ofs = (uint16_t)(flag_offset & RTE_IPV4_HDR_OFFSET_MASK);
ip_flag = (uint16_t)(flag_offset & RTE_IPV4_HDR_MF_FLAG);
psd = (unaligned_uint64_t *)&ip_hdr->src_addr;
/* use first 8 bytes only */
key.src_dst[0] = psd[0];
key.id = ip_hdr->packet_id;
key.key_len = IPV4_KEYLEN;
net: add rte prefix to IP defines Add 'RTE_' prefix to defines: - rename IPv4( as RTE_IPv4(. - rename IPV4_MAX_PKT_LEN as RTE_IPV4_MAX_PKT_LEN. - rename IPV4_HDR_IHL_MASK as RTE_IPV4_HDR_IHL_MASK. - rename IPV4_IHL_MULTIPLIER as RTE_IPV4_IHL_MULTIPLIER. - rename IPV4_HDR_DF_SHIFT as RTE_IPV4_HDR_DF_SHIFT. - rename IPV4_HDR_MF_SHIFT as RTE_IPV4_HDR_MF_SHIFT. - rename IPV4_HDR_FO_SHIFT as RTE_IPV4_HDR_FO_SHIFT. - rename IPV4_HDR_DF_FLAG as RTE_IPV4_HDR_DF_FLAG. - rename IPV4_HDR_MF_FLAG as RTE_IPV4_HDR_MF_FLAG. - rename IPV4_HDR_OFFSET_MASK as RTE_IPV4_HDR_OFFSET_MASK. - rename IPV4_HDR_OFFSET_UNITS as RTE_IPV4_HDR_OFFSET_UNITS. - rename IPV4_ANY as RTE_IPV4_ANY. - rename IPV4_LOOPBACK as RTE_IPV4_LOOPBACK. - rename IPV4_BROADCAST as RTE_IPV4_BROADCAST. - rename IPV4_ALLHOSTS_GROUP as RTE_IPV4_ALLHOSTS_GROUP. - rename IPV4_ALLRTRS_GROUP as RTE_IPV4_ALLRTRS_GROUP. - rename IPV4_MAX_LOCAL_GROUP as RTE_IPV4_MAX_LOCAL_GROUP. - rename IPV4_MIN_MCAST as RTE_IPV4_MIN_MCAST. - rename IPV4_MAX_MCAST as RTE_IPV4_MAX_MCAST. - rename IS_IPV4_MCAST as RTE_IS_IPV4_MCAST. - rename IPV6_HDR_FL_SHIFT as RTE_IPV6_HDR_FL_SHIFT. - rename IPV6_HDR_TC_SHIFT as RTE_IPV6_HDR_TC_SHIFT. - rename IPV6_HDR_FL_MASK as RTE_IPV6_HDR_FL_MASK. - rename IPV6_HDR_TC_MASK as RTE_IPV6_HDR_TC_MASK. Signed-off-by: Olivier Matz <olivier.matz@6wind.com> Reviewed-by: Stephen Hemminger <stephen@networkplumber.org> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com> Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
2019-05-21 16:13:11 +00:00
ip_ofs *= RTE_IPV4_HDR_OFFSET_UNITS;
ip_len = rte_be_to_cpu_16(ip_hdr->total_length) - mb->l3_len;
ip_frag: remove padding length of fragment In some situations, we would get several ip fragments, which total data length is less than min_ip_len(64) and padding with zeros. We simulated intermediate fragments by modifying the MTU. To illustrate the problem, we simplify the packet format and ignore the impact of the packet header.In namespace2, a packet whose data length is 1520 is sent. When the packet passes tap2, the packet is divided into two fragments: fragment A and B, similar to (1520 = 1510 + 10). When the packet passes tap3, the larger fragment packet A is divided into two fragments A1 and A2, similar to (1510 = 1500 + 10). Finally, the bond interface receives three fragments: A1, A2, and B (1520 = 1500 + 10 + 10). One fragmented packet A2 is smaller than the minimum Ethernet frame length, so it needs to be padded. |---------------------------------------------------| | HOST | | |--------------| |----------------------------| | | | ns2 | | |--------------| | | | | |--------| | | |--------| |--------| | | | | | tap1 | | | | tap2 | ns1| tap3 | | | | | |mtu=1510| | | |mtu=1510| |mtu=1500| | | | |--|1.1.1.1 |--| |--|1.1.1.2 |----|2.1.1.1 |--| | | |--------| |--------| |--------| | | | | | | | |-----------------| | | | | | | |--------| | | | bond | | |--------------------------------------|mtu=1500|---| |--------| When processing the preceding packets above, DPDK would aggregate fragmented packets A2 and B. And error packets are generated, which padding(zero) is displayed in the middle of the packet. A2 + B: 0000 fa 16 3e 9f fb 82 fa 47 b2 57 dc 20 08 00 45 00 0010 00 33 b4 66 00 ba 3f 01 c1 a5 01 01 01 01 02 01 0020 01 02 c0 c1 c2 c3 c4 c5 c6 c7 00 00 00 00 00 00 0030 00 00 00 00 00 00 00 00 00 00 00 00 c8 c9 ca cb 0040 cc cd ce cf d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db 0050 dc dd de df e0 e1 e2 e3 e4 e5 e6 So, we would calculate the length of padding, and remove the padding in pkt_len and data_len before aggregation. And also we have the fix for both ipv4 and ipv6. Fixes: 7f0983ee331c ("ip_frag: check fragment length of incoming packet") Cc: stable@dpdk.org Signed-off-by: Yicai Lu <luyicai@huawei.com> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
2020-12-16 13:36:30 +00:00
trim = mb->pkt_len - (ip_len + mb->l3_len + mb->l2_len);
IP_FRAG_LOG(DEBUG, "%s:%d:\n"
ip_frag: remove padding length of fragment In some situations, we would get several ip fragments, which total data length is less than min_ip_len(64) and padding with zeros. We simulated intermediate fragments by modifying the MTU. To illustrate the problem, we simplify the packet format and ignore the impact of the packet header.In namespace2, a packet whose data length is 1520 is sent. When the packet passes tap2, the packet is divided into two fragments: fragment A and B, similar to (1520 = 1510 + 10). When the packet passes tap3, the larger fragment packet A is divided into two fragments A1 and A2, similar to (1510 = 1500 + 10). Finally, the bond interface receives three fragments: A1, A2, and B (1520 = 1500 + 10 + 10). One fragmented packet A2 is smaller than the minimum Ethernet frame length, so it needs to be padded. |---------------------------------------------------| | HOST | | |--------------| |----------------------------| | | | ns2 | | |--------------| | | | | |--------| | | |--------| |--------| | | | | | tap1 | | | | tap2 | ns1| tap3 | | | | | |mtu=1510| | | |mtu=1510| |mtu=1500| | | | |--|1.1.1.1 |--| |--|1.1.1.2 |----|2.1.1.1 |--| | | |--------| |--------| |--------| | | | | | | | |-----------------| | | | | | | |--------| | | | bond | | |--------------------------------------|mtu=1500|---| |--------| When processing the preceding packets above, DPDK would aggregate fragmented packets A2 and B. And error packets are generated, which padding(zero) is displayed in the middle of the packet. A2 + B: 0000 fa 16 3e 9f fb 82 fa 47 b2 57 dc 20 08 00 45 00 0010 00 33 b4 66 00 ba 3f 01 c1 a5 01 01 01 01 02 01 0020 01 02 c0 c1 c2 c3 c4 c5 c6 c7 00 00 00 00 00 00 0030 00 00 00 00 00 00 00 00 00 00 00 00 c8 c9 ca cb 0040 cc cd ce cf d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db 0050 dc dd de df e0 e1 e2 e3 e4 e5 e6 So, we would calculate the length of padding, and remove the padding in pkt_len and data_len before aggregation. And also we have the fix for both ipv4 and ipv6. Fixes: 7f0983ee331c ("ip_frag: check fragment length of incoming packet") Cc: stable@dpdk.org Signed-off-by: Yicai Lu <luyicai@huawei.com> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
2020-12-16 13:36:30 +00:00
"mbuf: %p, tms: %" PRIu64 ", key: <%" PRIx64 ", %#x>"
"ofs: %u, len: %d, padding: %d, flags: %#x\n"
"tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, "
"max_entries: %u, use_entries: %u\n\n",
__func__, __LINE__,
ip_frag: remove padding length of fragment In some situations, we would get several ip fragments, which total data length is less than min_ip_len(64) and padding with zeros. We simulated intermediate fragments by modifying the MTU. To illustrate the problem, we simplify the packet format and ignore the impact of the packet header.In namespace2, a packet whose data length is 1520 is sent. When the packet passes tap2, the packet is divided into two fragments: fragment A and B, similar to (1520 = 1510 + 10). When the packet passes tap3, the larger fragment packet A is divided into two fragments A1 and A2, similar to (1510 = 1500 + 10). Finally, the bond interface receives three fragments: A1, A2, and B (1520 = 1500 + 10 + 10). One fragmented packet A2 is smaller than the minimum Ethernet frame length, so it needs to be padded. |---------------------------------------------------| | HOST | | |--------------| |----------------------------| | | | ns2 | | |--------------| | | | | |--------| | | |--------| |--------| | | | | | tap1 | | | | tap2 | ns1| tap3 | | | | | |mtu=1510| | | |mtu=1510| |mtu=1500| | | | |--|1.1.1.1 |--| |--|1.1.1.2 |----|2.1.1.1 |--| | | |--------| |--------| |--------| | | | | | | | |-----------------| | | | | | | |--------| | | | bond | | |--------------------------------------|mtu=1500|---| |--------| When processing the preceding packets above, DPDK would aggregate fragmented packets A2 and B. And error packets are generated, which padding(zero) is displayed in the middle of the packet. A2 + B: 0000 fa 16 3e 9f fb 82 fa 47 b2 57 dc 20 08 00 45 00 0010 00 33 b4 66 00 ba 3f 01 c1 a5 01 01 01 01 02 01 0020 01 02 c0 c1 c2 c3 c4 c5 c6 c7 00 00 00 00 00 00 0030 00 00 00 00 00 00 00 00 00 00 00 00 c8 c9 ca cb 0040 cc cd ce cf d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db 0050 dc dd de df e0 e1 e2 e3 e4 e5 e6 So, we would calculate the length of padding, and remove the padding in pkt_len and data_len before aggregation. And also we have the fix for both ipv4 and ipv6. Fixes: 7f0983ee331c ("ip_frag: check fragment length of incoming packet") Cc: stable@dpdk.org Signed-off-by: Yicai Lu <luyicai@huawei.com> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
2020-12-16 13:36:30 +00:00
mb, tms, key.src_dst[0], key.id, ip_ofs, ip_len, trim, ip_flag,
tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries,
tbl->use_entries);
/* check that fragment length is greater then zero. */
if (ip_len <= 0) {
IP_FRAG_MBUF2DR(dr, mb);
return NULL;
}
ip_frag: remove padding length of fragment In some situations, we would get several ip fragments, which total data length is less than min_ip_len(64) and padding with zeros. We simulated intermediate fragments by modifying the MTU. To illustrate the problem, we simplify the packet format and ignore the impact of the packet header.In namespace2, a packet whose data length is 1520 is sent. When the packet passes tap2, the packet is divided into two fragments: fragment A and B, similar to (1520 = 1510 + 10). When the packet passes tap3, the larger fragment packet A is divided into two fragments A1 and A2, similar to (1510 = 1500 + 10). Finally, the bond interface receives three fragments: A1, A2, and B (1520 = 1500 + 10 + 10). One fragmented packet A2 is smaller than the minimum Ethernet frame length, so it needs to be padded. |---------------------------------------------------| | HOST | | |--------------| |----------------------------| | | | ns2 | | |--------------| | | | | |--------| | | |--------| |--------| | | | | | tap1 | | | | tap2 | ns1| tap3 | | | | | |mtu=1510| | | |mtu=1510| |mtu=1500| | | | |--|1.1.1.1 |--| |--|1.1.1.2 |----|2.1.1.1 |--| | | |--------| |--------| |--------| | | | | | | | |-----------------| | | | | | | |--------| | | | bond | | |--------------------------------------|mtu=1500|---| |--------| When processing the preceding packets above, DPDK would aggregate fragmented packets A2 and B. And error packets are generated, which padding(zero) is displayed in the middle of the packet. A2 + B: 0000 fa 16 3e 9f fb 82 fa 47 b2 57 dc 20 08 00 45 00 0010 00 33 b4 66 00 ba 3f 01 c1 a5 01 01 01 01 02 01 0020 01 02 c0 c1 c2 c3 c4 c5 c6 c7 00 00 00 00 00 00 0030 00 00 00 00 00 00 00 00 00 00 00 00 c8 c9 ca cb 0040 cc cd ce cf d0 d1 d2 d3 d4 d5 d6 d7 d8 d9 da db 0050 dc dd de df e0 e1 e2 e3 e4 e5 e6 So, we would calculate the length of padding, and remove the padding in pkt_len and data_len before aggregation. And also we have the fix for both ipv4 and ipv6. Fixes: 7f0983ee331c ("ip_frag: check fragment length of incoming packet") Cc: stable@dpdk.org Signed-off-by: Yicai Lu <luyicai@huawei.com> Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
2020-12-16 13:36:30 +00:00
if (unlikely(trim > 0))
rte_pktmbuf_trim(mb, trim);
/* try to find/add entry into the fragment's table. */
if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) {
IP_FRAG_MBUF2DR(dr, mb);
return NULL;
}
IP_FRAG_LOG(DEBUG, "%s:%d:\n"
"tbl: %p, max_entries: %u, use_entries: %u\n"
"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64
", total_size: %u, frag_size: %u, last_idx: %u\n\n",
__func__, __LINE__,
tbl, tbl->max_entries, tbl->use_entries,
fp, fp->key.src_dst[0], fp->key.id, fp->start,
fp->total_size, fp->frag_size, fp->last_idx);
/* process the fragmented packet. */
mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag);
ip_frag_inuse(tbl, fp);
IP_FRAG_LOG(DEBUG, "%s:%d:\n"
"mbuf: %p\n"
"tbl: %p, max_entries: %u, use_entries: %u\n"
"ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64
", total_size: %u, frag_size: %u, last_idx: %u\n\n",
__func__, __LINE__, mb,
tbl, tbl->max_entries, tbl->use_entries,
fp, fp->key.src_dst[0], fp->key.id, fp->start,
fp->total_size, fp->frag_size, fp->last_idx);
return mb;
}