diff --git a/app/test/test_ipfrag.c b/app/test/test_ipfrag.c index baff5ed083..dc62b0e547 100644 --- a/app/test/test_ipfrag.c +++ b/app/test/test_ipfrag.c @@ -29,10 +29,50 @@ test_ipfrag(void) #define NUM_MBUFS 128 #define BURST 32 +uint8_t expected_first_frag_ipv4_opts_copied[] = { + 0x07, 0x0b, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x83, + 0x07, 0x04, 0xc0, 0xa8, + 0xe3, 0x96, 0x00, 0x00, +}; + +uint8_t expected_sub_frag_ipv4_opts_copied[] = { + 0x83, 0x07, 0x04, 0xc0, + 0xa8, 0xe3, 0x96, 0x00, +}; + +uint8_t expected_first_frag_ipv4_opts_nocopied[] = { + 0x07, 0x0b, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, +}; + +uint8_t expected_sub_frag_ipv4_opts_nocopied[0]; + +struct test_opt_data { + bool is_first_frag; /**< offset is 0 */ + bool opt_copied; /**< ip option copied flag */ + uint16_t len; /**< option data len */ + uint8_t data[RTE_IPV4_HDR_OPT_MAX_LEN]; /**< option data */ +}; + static struct rte_mempool *pkt_pool, *direct_pool, *indirect_pool; +static inline void +hex_to_str(uint8_t *hex, uint16_t len, char *str) +{ + int i; + + for (i = 0; i < len; i++) { + sprintf(str, "%02x", hex[i]); + str += 2; + } + *str = 0; +} + static int setup_buf_pool(void) { @@ -99,23 +139,67 @@ static void ut_teardown(void) { } +static inline void +test_get_ipv4_opt(bool is_first_frag, bool opt_copied, + struct test_opt_data *expected_opt) +{ + if (is_first_frag) { + if (opt_copied) { + expected_opt->len = + sizeof(expected_first_frag_ipv4_opts_copied); + rte_memcpy(expected_opt->data, + expected_first_frag_ipv4_opts_copied, + sizeof(expected_first_frag_ipv4_opts_copied)); + } else { + expected_opt->len = + sizeof(expected_first_frag_ipv4_opts_nocopied); + rte_memcpy(expected_opt->data, + expected_first_frag_ipv4_opts_nocopied, + sizeof(expected_first_frag_ipv4_opts_nocopied)); + } + } else { + if (opt_copied) { + expected_opt->len = + sizeof(expected_sub_frag_ipv4_opts_copied); + rte_memcpy(expected_opt->data, + expected_sub_frag_ipv4_opts_copied, + sizeof(expected_sub_frag_ipv4_opts_copied)); + } else { + expected_opt->len = + sizeof(expected_sub_frag_ipv4_opts_nocopied); + rte_memcpy(expected_opt->data, + expected_sub_frag_ipv4_opts_nocopied, + sizeof(expected_sub_frag_ipv4_opts_nocopied)); + } + } +} + static void -v4_allocate_packet_of(struct rte_mbuf *b, int fill, - size_t s, int df, uint8_t mf, uint16_t off, - uint8_t ttl, uint8_t proto, uint16_t pktid) +v4_allocate_packet_of(struct rte_mbuf *b, int fill, size_t s, + int df, uint8_t mf, uint16_t off, uint8_t ttl, uint8_t proto, + uint16_t pktid, bool have_opt, bool is_first_frag, bool opt_copied) { /* Create a packet, 2k bytes long */ b->data_off = 0; char *data = rte_pktmbuf_mtod(b, char *); - rte_be16_t fragment_offset = 0; /**< fragmentation offset */ + rte_be16_t fragment_offset = 0; /* fragmentation offset */ + uint16_t iph_len; + struct test_opt_data opt; - memset(data, fill, sizeof(struct rte_ipv4_hdr) + s); + opt.len = 0; + + if (have_opt) + test_get_ipv4_opt(is_first_frag, opt_copied, &opt); + + iph_len = sizeof(struct rte_ipv4_hdr) + opt.len; + memset(data, fill, iph_len + s); struct rte_ipv4_hdr *hdr = (struct rte_ipv4_hdr *)data; - hdr->version_ihl = 0x45; /* standard IP header... */ + hdr->version_ihl = 0x40; /* ipv4 */ + hdr->version_ihl += (iph_len / 4); hdr->type_of_service = 0; - b->pkt_len = s + sizeof(struct rte_ipv4_hdr); + b->pkt_len = s + iph_len; b->data_len = b->pkt_len; hdr->total_length = rte_cpu_to_be_16(b->pkt_len); hdr->packet_id = rte_cpu_to_be_16(pktid); @@ -142,6 +226,8 @@ v4_allocate_packet_of(struct rte_mbuf *b, int fill, hdr->hdr_checksum = 0; hdr->src_addr = rte_cpu_to_be_32(0x8080808); hdr->dst_addr = rte_cpu_to_be_32(0x8080404); + + rte_memcpy(hdr + 1, opt.data, opt.len); } static void @@ -198,6 +284,45 @@ test_get_offset(struct rte_mbuf **mb, int32_t len, } } +static inline void +test_get_frag_opt(struct rte_mbuf **mb, int32_t num, + struct test_opt_data *opt, int ipv, bool opt_copied) +{ + int32_t i; + + for (i = 0; i < num; i++) { + if (ipv == 4) { + struct rte_ipv4_hdr *iph = + rte_pktmbuf_mtod(mb[i], struct rte_ipv4_hdr *); + uint16_t header_len = (iph->version_ihl & + RTE_IPV4_HDR_IHL_MASK) * + RTE_IPV4_IHL_MULTIPLIER; + uint16_t opt_len = header_len - + sizeof(struct rte_ipv4_hdr); + + opt->opt_copied = opt_copied; + + if ((rte_be_to_cpu_16(iph->fragment_offset) & + RTE_IPV4_HDR_OFFSET_MASK) == 0) + opt->is_first_frag = true; + else + opt->is_first_frag = false; + + if (likely(opt_len <= RTE_IPV4_HDR_OPT_MAX_LEN)) { + char *iph_opt = rte_pktmbuf_mtod_offset(mb[i], + char *, sizeof(struct rte_ipv4_hdr)); + opt->len = opt_len; + rte_memcpy(opt->data, iph_opt, opt_len); + } else { + opt->len = RTE_IPV4_HDR_OPT_MAX_LEN; + memset(opt->data, RTE_IPV4_HDR_OPT_EOL, + sizeof(opt->data)); + } + opt++; + } + } +} + static int test_ip_frag(void) { @@ -217,32 +342,52 @@ test_ip_frag(void) uint16_t pkt_id; int expected_frags; uint16_t expected_fragment_offset[BURST]; + bool have_opt; + bool is_first_frag; + bool opt_copied; } tests[] = { {4, 1280, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID, 2, - {0x2000, 0x009D}}, + {0x2000, 0x009D}, false}, {4, 1280, 1400, 0, 0, 0, 64, IPPROTO_ICMP, 0, 2, - {0x2000, 0x009D}}, + {0x2000, 0x009D}, false}, {4, 600, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID, 3, - {0x2000, 0x2048, 0x0090}}, + {0x2000, 0x2048, 0x0090}, false}, {4, 4, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID, -EINVAL}, {4, 600, 1400, 1, 0, 0, 64, IPPROTO_ICMP, RND_ID, -ENOTSUP}, {4, 600, 1400, 0, 0, 0, 0, IPPROTO_ICMP, RND_ID, 3, - {0x2000, 0x2048, 0x0090}}, + {0x2000, 0x2046, 0x008C}, true, true, true}, + /* The first fragment */ + {4, 68, 104, 0, 1, 0, 0, IPPROTO_ICMP, RND_ID, 5, + {0x2000, 0x2003, 0x2006, 0x2009, 0x200C}, true, true, true}, + /* The middle fragment */ {4, 68, 104, 0, 1, 13, 0, IPPROTO_ICMP, RND_ID, 3, - {0x200D, 0x2013, 0x2019}}, - + {0x200D, 0x2012, 0x2017}, true, false, true}, + /* The last fragment */ + {4, 68, 104, 0, 0, 26, 0, IPPROTO_ICMP, RND_ID, 3, + {0x201A, 0x201F, 0x0024}, true, false, true}, + /* The first fragment */ + {4, 68, 104, 0, 1, 0, 0, IPPROTO_ICMP, RND_ID, 4, + {0x2000, 0x2004, 0x2008, 0x200C}, true, true, false}, + /* The middle fragment */ + {4, 68, 104, 0, 1, 13, 0, IPPROTO_ICMP, RND_ID, 3, + {0x200D, 0x2013, 0x2019}, true, false, false}, + /* The last fragment */ + {4, 68, 104, 0, 0, 26, 0, IPPROTO_ICMP, RND_ID, 3, + {0x201A, 0x2020, 0x0026}, true, false, false}, {6, 1280, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID, 2, - {0x0001, 0x04D0}}, + {0x0001, 0x04D0}, false}, {6, 1300, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID, 2, - {0x0001, 0x04E0}}, + {0x0001, 0x04E0}, false}, {6, 4, 1400, 0, 0, 0, 64, IPPROTO_ICMP, RND_ID, -EINVAL}, {6, 1300, 1400, 0, 0, 0, 0, IPPROTO_ICMP, RND_ID, 2, - {0x0001, 0x04E0}}, + {0x0001, 0x04E0}, false}, }; for (i = 0; i < RTE_DIM(tests); i++) { int32_t len = 0; uint16_t fragment_offset[BURST]; + struct test_opt_data opt_res[BURST]; + struct test_opt_data opt_exp; uint16_t pktid = tests[i].pkt_id; struct rte_mbuf *pkts_out[BURST]; struct rte_mbuf *b = rte_pktmbuf_alloc(pkt_pool); @@ -261,7 +406,10 @@ test_ip_frag(void) tests[i].set_of, tests[i].ttl, tests[i].proto, - pktid); + pktid, + tests[i].have_opt, + tests[i].is_first_frag, + tests[i].opt_copied); } else if (tests[i].ipv == 6) { v6_allocate_packet_of(b, 0x41414141, tests[i].pkt_size, @@ -286,17 +434,20 @@ test_ip_frag(void) if (len > 0) { test_get_offset(pkts_out, len, fragment_offset, tests[i].ipv); + if (tests[i].have_opt) + test_get_frag_opt(pkts_out, len, opt_res, + tests[i].ipv, tests[i].opt_copied); test_free_fragments(pkts_out, len); } - printf("%zd: checking %d with %d\n", i, len, + printf("[check frag number]%zd: checking %d with %d\n", i, len, tests[i].expected_frags); RTE_TEST_ASSERT_EQUAL(len, tests[i].expected_frags, "Failed case %zd.\n", i); if (len > 0) { for (j = 0; j < (size_t)len; j++) { - printf("%zd-%zd: checking %d with %d\n", + printf("[check offset]%zd-%zd: checking %d with %d\n", i, j, fragment_offset[j], rte_cpu_to_be_16( tests[i].expected_fragment_offset[j])); @@ -305,6 +456,36 @@ test_ip_frag(void) tests[i].expected_fragment_offset[j]), "Failed case %zd.\n", i); } + + if (tests[i].have_opt && (tests[i].ipv == 4)) { + for (j = 0; j < (size_t)len; j++) { + char opt_res_str[2 * + RTE_IPV4_HDR_OPT_MAX_LEN + 1]; + char opt_exp_str[2 * + RTE_IPV4_HDR_OPT_MAX_LEN + 1]; + + test_get_ipv4_opt( + opt_res[j].is_first_frag, + opt_res[j].opt_copied, + &opt_exp); + hex_to_str(opt_res[j].data, + opt_res[j].len, + opt_res_str); + hex_to_str(opt_exp.data, + opt_exp.len, + opt_exp_str); + + printf( + "[check ipv4 option]%zd-%zd: checking (len:%u)%s with (len:%u)%s\n", + i, j, + opt_res[j].len, opt_res_str, + opt_exp.len, opt_exp_str); + RTE_TEST_ASSERT_SUCCESS( + strcmp(opt_res_str, + opt_exp_str), + "Failed case %zd.\n", i); + } + } } } diff --git a/lib/ip_frag/rte_ipv4_fragmentation.c b/lib/ip_frag/rte_ipv4_fragmentation.c index 669682a0cf..a19f6fda64 100644 --- a/lib/ip_frag/rte_ipv4_fragmentation.c +++ b/lib/ip_frag/rte_ipv4_fragmentation.c @@ -20,6 +20,8 @@ #define IPV4_HDR_FO_ALIGN (1 << RTE_IPV4_HDR_FO_SHIFT) +#define IPV4_HDR_MAX_LEN 60 + static inline void __fill_ipv4hdr_frag(struct rte_ipv4_hdr *dst, const struct rte_ipv4_hdr *src, uint16_t header_len, uint16_t len, uint16_t fofs, uint16_t dofs, uint32_t mf) @@ -39,6 +41,49 @@ static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num) rte_pktmbuf_free(mb[i]); } +static inline uint16_t __create_ipopt_frag_hdr(uint8_t *iph, + uint16_t ipopt_len, uint8_t *ipopt_frag_hdr) +{ + uint16_t len = ipopt_len; + struct rte_ipv4_hdr *iph_opt = (struct rte_ipv4_hdr *)ipopt_frag_hdr; + + ipopt_len = 0; + rte_memcpy(ipopt_frag_hdr, iph, sizeof(struct rte_ipv4_hdr)); + ipopt_frag_hdr += sizeof(struct rte_ipv4_hdr); + + uint8_t *p_opt = iph + sizeof(struct rte_ipv4_hdr); + + while (len > 0) { + if (unlikely(*p_opt == RTE_IPV4_HDR_OPT_NOP)) { + len--; + p_opt++; + continue; + } else if (unlikely(*p_opt == RTE_IPV4_HDR_OPT_EOL)) + break; + + if (unlikely(p_opt[1] < 2 || p_opt[1] > len)) + break; + + if (RTE_IPV4_HDR_OPT_COPIED(*p_opt)) { + rte_memcpy(ipopt_frag_hdr + ipopt_len, + p_opt, p_opt[1]); + ipopt_len += p_opt[1]; + } + + len -= p_opt[1]; + p_opt += p_opt[1]; + } + + len = RTE_ALIGN_CEIL(ipopt_len, RTE_IPV4_IHL_MULTIPLIER); + memset(ipopt_frag_hdr + ipopt_len, + RTE_IPV4_HDR_OPT_EOL, len - ipopt_len); + ipopt_len = len; + iph_opt->ihl = (sizeof(struct rte_ipv4_hdr) + ipopt_len) / + RTE_IPV4_IHL_MULTIPLIER; + + return ipopt_len; +} + /** * IPv4 fragmentation. * @@ -74,6 +119,8 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, uint32_t more_in_segs; uint16_t fragment_offset, flag_offset, frag_size, header_len; uint16_t frag_bytes_remaining; + uint8_t ipopt_frag_hdr[IPV4_HDR_MAX_LEN]; + uint16_t ipopt_len; /* * Formal parameter checking. @@ -116,6 +163,10 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, out_pkt_pos = 0; fragment_offset = 0; + ipopt_len = header_len - sizeof(struct rte_ipv4_hdr); + if (unlikely(ipopt_len > RTE_IPV4_HDR_OPT_MAX_LEN)) + return -EINVAL; + more_in_segs = 1; while (likely(more_in_segs)) { struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; @@ -186,10 +237,21 @@ rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, (uint16_t)out_pkt->pkt_len, flag_offset, fragment_offset, more_in_segs); - fragment_offset = (uint16_t)(fragment_offset + - out_pkt->pkt_len - header_len); + if (unlikely((fragment_offset == 0) && (ipopt_len) && + ((flag_offset & RTE_IPV4_HDR_OFFSET_MASK) == 0))) { + ipopt_len = __create_ipopt_frag_hdr((uint8_t *)in_hdr, + ipopt_len, ipopt_frag_hdr); + fragment_offset = (uint16_t)(fragment_offset + + out_pkt->pkt_len - header_len); + out_pkt->l3_len = header_len; - out_pkt->l3_len = header_len; + header_len = sizeof(struct rte_ipv4_hdr) + ipopt_len; + in_hdr = (struct rte_ipv4_hdr *)ipopt_frag_hdr; + } else { + fragment_offset = (uint16_t)(fragment_offset + + out_pkt->pkt_len - header_len); + out_pkt->l3_len = header_len; + } /* Write the fragment to the output list */ pkts_out[out_pkt_pos] = out_pkt; diff --git a/lib/net/rte_ip.h b/lib/net/rte_ip.h index 534f401d26..b502481670 100644 --- a/lib/net/rte_ip.h +++ b/lib/net/rte_ip.h @@ -97,6 +97,12 @@ struct rte_ipv4_hdr { #define RTE_IPV4_HDR_OFFSET_UNITS 8 +/* IPv4 options */ +#define RTE_IPV4_HDR_OPT_EOL 0 +#define RTE_IPV4_HDR_OPT_NOP 1 +#define RTE_IPV4_HDR_OPT_COPIED(v) ((v) & 0x80) +#define RTE_IPV4_HDR_OPT_MAX_LEN 40 + /* * IPv4 address types */