diff --git a/doc/guides/sample_app_ug/ipsec_secgw.rst b/doc/guides/sample_app_ug/ipsec_secgw.rst index ac118c1a21..ad2d79e757 100644 --- a/doc/guides/sample_app_ug/ipsec_secgw.rst +++ b/doc/guides/sample_app_ug/ipsec_secgw.rst @@ -69,7 +69,6 @@ Constraints * No AH mode. * Supported algorithms: AES-CBC, AES-CTR, AES-GCM, 3DES-CBC, HMAC-SHA1 and NULL. * Each SA must be handle by a unique lcore (*1 RX queue per port*). -* No chained mbufs. Compiling the Application ------------------------- @@ -98,6 +97,8 @@ The application has a number of command line options:: --single-sa SAIDX --rxoffload MASK --txoffload MASK + --mtu MTU + --reassemble NUM -f CONFIG_FILE_PATH Where: @@ -111,9 +112,13 @@ Where: * ``-u PORTMASK``: hexadecimal bitmask of unprotected ports -* ``-j FRAMESIZE``: *optional*. Enables jumbo frames with the maximum size - specified as FRAMESIZE. If an invalid value is provided as FRAMESIZE - then the default value 9000 is used. +* ``-j FRAMESIZE``: *optional*. data buffer size (in bytes), + in other words maximum data size for one segment. + Packets with length bigger then FRAMESIZE still can be received, + but will be segmented. + Default value: RTE_MBUF_DEFAULT_BUF_SIZE (2176) + Minimum value: RTE_MBUF_DEFAULT_BUF_SIZE (2176) + Maximum value: UINT16_MAX (65535). * ``-l``: enables code-path that uses librte_ipsec. @@ -144,6 +149,15 @@ Where: allows user to disable some of the TX HW offload capabilities. By default all HW TX offloads are enabled. +* ``--mtu MTU``: MTU value (in bytes) on all attached ethernet ports. + Outgoing packets with length bigger then MTU will be fragmented. + Incoming packets with length bigger then MTU will be discarded. + Default value: 1500. + +* ``--reassemble NUM``: max number of entries in reassemble fragment table. + Zero value disables reassembly functionality. + Default value: 0. + * ``-f CONFIG_FILE_PATH``: the full path of text-based file containing all configuration items for running the application (See Configuration file syntax section below). ``-f CONFIG_FILE_PATH`` **must** be specified. diff --git a/examples/ipsec-secgw/ipsec-secgw.c b/examples/ipsec-secgw/ipsec-secgw.c index 33c4389640..b1ecbb975d 100644 --- a/examples/ipsec-secgw/ipsec-secgw.c +++ b/examples/ipsec-secgw/ipsec-secgw.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "ipsec.h" #include "parser.h" @@ -110,6 +111,11 @@ static uint16_t nb_txd = IPSEC_SECGW_TX_DESC_DEFAULT; (addr)->addr_bytes[4], (addr)->addr_bytes[5], \ 0, 0) +#define FRAG_TBL_BUCKET_ENTRIES 4 +#define FRAG_TTL_MS (10 * MS_PER_S) + +#define MTU_TO_FRAMELEN(x) ((x) + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN) + /* port/source ethernet addr and destination ethernet addr */ struct ethaddr_info { uint64_t src, dst; @@ -127,6 +133,8 @@ struct ethaddr_info ethaddr_tbl[RTE_MAX_ETHPORTS] = { #define CMD_LINE_OPT_CRYPTODEV_MASK "cryptodev_mask" #define CMD_LINE_OPT_RX_OFFLOAD "rxoffload" #define CMD_LINE_OPT_TX_OFFLOAD "txoffload" +#define CMD_LINE_OPT_REASSEMBLE "reassemble" +#define CMD_LINE_OPT_MTU "mtu" enum { /* long options mapped to a short option */ @@ -140,6 +148,8 @@ enum { CMD_LINE_OPT_CRYPTODEV_MASK_NUM, CMD_LINE_OPT_RX_OFFLOAD_NUM, CMD_LINE_OPT_TX_OFFLOAD_NUM, + CMD_LINE_OPT_REASSEMBLE_NUM, + CMD_LINE_OPT_MTU_NUM, }; static const struct option lgopts[] = { @@ -148,6 +158,7 @@ static const struct option lgopts[] = { {CMD_LINE_OPT_CRYPTODEV_MASK, 1, 0, CMD_LINE_OPT_CRYPTODEV_MASK_NUM}, {CMD_LINE_OPT_RX_OFFLOAD, 1, 0, CMD_LINE_OPT_RX_OFFLOAD_NUM}, {CMD_LINE_OPT_TX_OFFLOAD, 1, 0, CMD_LINE_OPT_TX_OFFLOAD_NUM}, + {CMD_LINE_OPT_REASSEMBLE, 1, 0, CMD_LINE_OPT_REASSEMBLE_NUM}, {NULL, 0, 0, 0} }; @@ -160,7 +171,6 @@ static int32_t numa_on = 1; /**< NUMA is enabled by default. */ static uint32_t nb_lcores; static uint32_t single_sa; static uint32_t single_sa_idx; -static uint32_t frame_size; /* * RX/TX HW offload capabilities to enable/use on ethernet ports. @@ -169,6 +179,13 @@ static uint32_t frame_size; static uint64_t dev_rx_offload = UINT64_MAX; static uint64_t dev_tx_offload = UINT64_MAX; +/* + * global values that determine multi-seg policy + */ +static uint32_t frag_tbl_sz; +static uint32_t frame_buf_size = RTE_MBUF_DEFAULT_BUF_SIZE; +static uint32_t mtu_size = RTE_ETHER_MTU; + /* application wide librte_ipsec/SA parameters */ struct app_sa_prm app_sa_prm = {.enable = 0}; @@ -205,6 +222,12 @@ struct lcore_conf { struct ipsec_ctx outbound; struct rt_ctx *rt4_ctx; struct rt_ctx *rt6_ctx; + struct { + struct rte_ip_frag_tbl *tbl; + struct rte_mempool *pool_dir; + struct rte_mempool *pool_indir; + struct rte_ip_frag_death_row dr; + } frag; } __rte_cache_aligned; static struct lcore_conf lcore_conf[RTE_MAX_LCORE]; @@ -230,6 +253,18 @@ static struct rte_eth_conf port_conf = { static struct socket_ctx socket_ctx[NB_SOCKETS]; +/* + * Determine is multi-segment support required: + * - either frame buffer size is smaller then mtu + * - or reassmeble support is requested + */ +static int +multi_seg_required(void) +{ + return (MTU_TO_FRAMELEN(mtu_size) + RTE_PKTMBUF_HEADROOM > + frame_buf_size || frag_tbl_sz != 0); +} + static inline void adjust_ipv4_pktlen(struct rte_mbuf *m, const struct rte_ipv4_hdr *iph, uint32_t l2_len) @@ -451,9 +486,52 @@ send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port) return 0; } +/* + * Helper function to fragment and queue for TX one packet. + */ +static inline uint32_t +send_fragment_packet(struct lcore_conf *qconf, struct rte_mbuf *m, + uint16_t port, uint8_t proto) +{ + struct buffer *tbl; + uint32_t len, n; + int32_t rc; + + tbl = qconf->tx_mbufs + port; + len = tbl->len; + + /* free space for new fragments */ + if (len + RTE_LIBRTE_IP_FRAG_MAX_FRAG >= RTE_DIM(tbl->m_table)) { + send_burst(qconf, len, port); + len = 0; + } + + n = RTE_DIM(tbl->m_table) - len; + + if (proto == IPPROTO_IP) + rc = rte_ipv4_fragment_packet(m, tbl->m_table + len, + n, mtu_size, qconf->frag.pool_dir, + qconf->frag.pool_indir); + else + rc = rte_ipv6_fragment_packet(m, tbl->m_table + len, + n, mtu_size, qconf->frag.pool_dir, + qconf->frag.pool_indir); + + if (rc >= 0) + len += rc; + else + RTE_LOG(ERR, IPSEC, + "%s: failed to fragment packet with size %u, " + "error code: %d\n", + __func__, m->pkt_len, rte_errno); + + rte_pktmbuf_free(m); + return len; +} + /* Enqueue a single packet, and send burst if queue is filled */ static inline int32_t -send_single_packet(struct rte_mbuf *m, uint16_t port) +send_single_packet(struct rte_mbuf *m, uint16_t port, uint8_t proto) { uint32_t lcore_id; uint16_t len; @@ -463,8 +541,14 @@ send_single_packet(struct rte_mbuf *m, uint16_t port) qconf = &lcore_conf[lcore_id]; len = qconf->tx_mbufs[port].len; - qconf->tx_mbufs[port].m_table[len] = m; - len++; + + if (m->pkt_len <= mtu_size) { + qconf->tx_mbufs[port].m_table[len] = m; + len++; + + /* need to fragment the packet */ + } else + len = send_fragment_packet(qconf, m, port, proto); /* enough pkts to be sent */ if (unlikely(len == MAX_PKT_BURST)) { @@ -818,7 +902,7 @@ route4_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf *pkts[], uint8_t nb_pkts) rte_pktmbuf_free(pkts[i]); continue; } - send_single_packet(pkts[i], pkt_hop & 0xff); + send_single_packet(pkts[i], pkt_hop & 0xff, IPPROTO_IP); } } @@ -870,7 +954,7 @@ route6_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf *pkts[], uint8_t nb_pkts) rte_pktmbuf_free(pkts[i]); continue; } - send_single_packet(pkts[i], pkt_hop & 0xff); + send_single_packet(pkts[i], pkt_hop & 0xff, IPPROTO_IPV6); } } @@ -1036,6 +1120,8 @@ main_loop(__attribute__((unused)) void *dummy) qconf->outbound.session_pool = socket_ctx[socket_id].session_pool; qconf->outbound.session_priv_pool = socket_ctx[socket_id].session_priv_pool; + qconf->frag.pool_dir = socket_ctx[socket_id].mbuf_pool; + qconf->frag.pool_indir = socket_ctx[socket_id].mbuf_pool_indir; if (qconf->nb_rx_queue == 0) { RTE_LOG(DEBUG, IPSEC, "lcore %u has nothing to do\n", @@ -1182,12 +1268,14 @@ print_usage(const char *prgname) " [--cryptodev_mask MASK]" " [--" CMD_LINE_OPT_RX_OFFLOAD " RX_OFFLOAD_MASK]" " [--" CMD_LINE_OPT_TX_OFFLOAD " TX_OFFLOAD_MASK]" + " [--" CMD_LINE_OPT_REASSEMBLE " REASSEMBLE_TABLE_SIZE]" + " [--" CMD_LINE_OPT_MTU " MTU]" "\n\n" " -p PORTMASK: Hexadecimal bitmask of ports to configure\n" " -P : Enable promiscuous mode\n" " -u PORTMASK: Hexadecimal bitmask of unprotected ports\n" - " -j FRAMESIZE: Enable jumbo frame with 'FRAMESIZE' as maximum\n" - " packet size\n" + " -j FRAMESIZE: Data buffer size, minimum (and default)\n" + " value: RTE_MBUF_DEFAULT_BUF_SIZE\n" " -l enables code-path that uses librte_ipsec\n" " -w REPLAY_WINDOW_SIZE specifies IPsec SQN replay window\n" " size for each SA\n" @@ -1205,6 +1293,13 @@ print_usage(const char *prgname) " --" CMD_LINE_OPT_TX_OFFLOAD ": bitmask of the TX HW offload capabilities to enable/use\n" " (DEV_TX_OFFLOAD_*)\n" + " --" CMD_LINE_OPT_REASSEMBLE " NUM" + ": max number of entries in reassemble(fragment) table\n" + " (zero (default value) disables reassembly)\n" + " --" CMD_LINE_OPT_MTU " MTU" + ": MTU value on all ports (default value: 1500)\n" + " outgoing packets with bigger size will be fragmented\n" + " incoming packets with bigger size will be discarded\n" "\n", prgname); } @@ -1375,21 +1470,16 @@ parse_args(int32_t argc, char **argv) f_present = 1; break; case 'j': - { - int32_t size = parse_decimal(optarg); - if (size <= 1518) { - printf("Invalid jumbo frame size\n"); - if (size < 0) { - print_usage(prgname); - return -1; - } - printf("Using default value 9000\n"); - frame_size = 9000; - } else { - frame_size = size; - } + ret = parse_decimal(optarg); + if (ret < RTE_MBUF_DEFAULT_BUF_SIZE || + ret > UINT16_MAX) { + printf("Invalid frame buffer size value: %s\n", + optarg); + print_usage(prgname); + return -1; } - printf("Enabled jumbo frames size %u\n", frame_size); + frame_buf_size = ret; + printf("Custom frame buffer size %u\n", frame_buf_size); break; case 'l': app_sa_prm.enable = 1; @@ -1457,6 +1547,26 @@ parse_args(int32_t argc, char **argv) return -1; } break; + case CMD_LINE_OPT_REASSEMBLE_NUM: + ret = parse_decimal(optarg); + if (ret < 0) { + printf("Invalid argument for \'%s\': %s\n", + CMD_LINE_OPT_REASSEMBLE, optarg); + print_usage(prgname); + return -1; + } + frag_tbl_sz = ret; + break; + case CMD_LINE_OPT_MTU_NUM: + ret = parse_decimal(optarg); + if (ret < 0 || ret > RTE_IPV4_MAX_PKT_LEN) { + printf("Invalid argument for \'%s\': %s\n", + CMD_LINE_OPT_MTU, optarg); + print_usage(prgname); + return -1; + } + mtu_size = ret; + break; default: print_usage(prgname); return -1; @@ -1468,6 +1578,16 @@ parse_args(int32_t argc, char **argv) return -1; } + /* check do we need to enable multi-seg support */ + if (multi_seg_required()) { + /* legacy mode doesn't support multi-seg */ + app_sa_prm.enable = 1; + printf("frame buf size: %u, mtu: %u, " + "number of reassemble entries: %u\n" + "multi-segment support is required\n", + frame_buf_size, mtu_size, frag_tbl_sz); + } + print_app_sa_prm(&app_sa_prm); if (optind >= 0) @@ -1685,6 +1805,9 @@ cryptodevs_init(void) int16_t cdev_id, port_id; struct rte_hash_parameters params = { 0 }; + const uint64_t mseg_flag = multi_seg_required() ? + RTE_CRYPTODEV_FF_IN_PLACE_SGL : 0; + params.entries = CDEV_MAP_ENTRIES; params.key_len = sizeof(struct cdev_key); params.hash_func = rte_jhash; @@ -1753,6 +1876,12 @@ cryptodevs_init(void) rte_cryptodev_info_get(cdev_id, &cdev_info); + if ((mseg_flag & cdev_info.feature_flags) != mseg_flag) + rte_exit(EXIT_FAILURE, + "Device %hd does not support \'%s\' feature\n", + cdev_id, + rte_cryptodev_get_feature_name(mseg_flag)); + if (nb_lcore_params > cdev_info.max_nb_queue_pairs) max_nb_qps = cdev_info.max_nb_queue_pairs; else @@ -1882,6 +2011,7 @@ cryptodevs_init(void) static void port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t req_tx_offloads) { + uint32_t frame_size; struct rte_eth_dev_info dev_info; struct rte_eth_txconf *txconf; uint16_t nb_tx_queue, nb_rx_queue; @@ -1920,9 +2050,14 @@ port_init(uint16_t portid, uint64_t req_rx_offloads, uint64_t req_tx_offloads) printf("Creating queues: nb_rx_queue=%d nb_tx_queue=%u...\n", nb_rx_queue, nb_tx_queue); - if (frame_size) { - local_port_conf.rxmode.max_rx_pkt_len = frame_size; + frame_size = MTU_TO_FRAMELEN(mtu_size); + if (frame_size > local_port_conf.rxmode.max_rx_pkt_len) local_port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; + local_port_conf.rxmode.max_rx_pkt_len = frame_size; + + if (multi_seg_required()) { + local_port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER; + local_port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; } local_port_conf.rxmode.offloads |= req_rx_offloads; @@ -2043,16 +2178,25 @@ static void pool_init(struct socket_ctx *ctx, int32_t socket_id, uint32_t nb_mbuf) { char s[64]; - uint32_t buff_size = frame_size ? (frame_size + RTE_PKTMBUF_HEADROOM) : - RTE_MBUF_DEFAULT_BUF_SIZE; - + int32_t ms; snprintf(s, sizeof(s), "mbuf_pool_%d", socket_id); ctx->mbuf_pool = rte_pktmbuf_pool_create(s, nb_mbuf, MEMPOOL_CACHE_SIZE, ipsec_metadata_size(), - buff_size, - socket_id); - if (ctx->mbuf_pool == NULL) + frame_buf_size, socket_id); + + /* + * if multi-segment support is enabled, then create a pool + * for indirect mbufs. + */ + ms = multi_seg_required(); + if (ms != 0) { + snprintf(s, sizeof(s), "mbuf_pool_indir_%d", socket_id); + ctx->mbuf_pool_indir = rte_pktmbuf_pool_create(s, nb_mbuf, + MEMPOOL_CACHE_SIZE, 0, 0, socket_id); + } + + if (ctx->mbuf_pool == NULL || (ms != 0 && ctx->mbuf_pool_indir == NULL)) rte_exit(EXIT_FAILURE, "Cannot init mbuf pool on socket %d\n", socket_id); else @@ -2114,6 +2258,140 @@ inline_ipsec_event_callback(uint16_t port_id, enum rte_eth_event_type type, return -1; } +static uint16_t +rx_callback(__rte_unused uint16_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint64_t tm; + uint32_t i, k; + struct lcore_conf *lc; + struct rte_mbuf *mb; + struct rte_ether_hdr *eth; + + lc = user_param; + k = 0; + tm = 0; + + for (i = 0; i != nb_pkts; i++) { + + mb = pkt[i]; + eth = rte_pktmbuf_mtod(mb, struct rte_ether_hdr *); + if (eth->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) { + + struct rte_ipv4_hdr *iph; + + iph = (struct rte_ipv4_hdr *)(eth + 1); + if (rte_ipv4_frag_pkt_is_fragmented(iph)) { + + mb->l2_len = sizeof(*eth); + mb->l3_len = sizeof(*iph); + tm = (tm != 0) ? tm : rte_rdtsc(); + mb = rte_ipv4_frag_reassemble_packet( + lc->frag.tbl, &lc->frag.dr, + mb, tm, iph); + + if (mb != NULL) { + /* fix ip cksum after reassemble. */ + iph = rte_pktmbuf_mtod_offset(mb, + struct rte_ipv4_hdr *, + mb->l2_len); + iph->hdr_checksum = 0; + iph->hdr_checksum = rte_ipv4_cksum(iph); + } + } + } else if (eth->ether_type == + rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6)) { + + struct rte_ipv6_hdr *iph; + struct ipv6_extension_fragment *fh; + + iph = (struct rte_ipv6_hdr *)(eth + 1); + fh = rte_ipv6_frag_get_ipv6_fragment_header(iph); + if (fh != NULL) { + mb->l2_len = sizeof(*eth); + mb->l3_len = (uintptr_t)fh - (uintptr_t)iph + + sizeof(*fh); + tm = (tm != 0) ? tm : rte_rdtsc(); + mb = rte_ipv6_frag_reassemble_packet( + lc->frag.tbl, &lc->frag.dr, + mb, tm, iph, fh); + if (mb != NULL) + /* fix l3_len after reassemble. */ + mb->l3_len = mb->l3_len - sizeof(*fh); + } + } + + pkt[k] = mb; + k += (mb != NULL); + } + + /* some fragments were encountered, drain death row */ + if (tm != 0) + rte_ip_frag_free_death_row(&lc->frag.dr, 0); + + return k; +} + + +static int +reassemble_lcore_init(struct lcore_conf *lc, uint32_t cid) +{ + int32_t sid; + uint32_t i; + uint64_t frag_cycles; + const struct lcore_rx_queue *rxq; + const struct rte_eth_rxtx_callback *cb; + + /* create fragment table */ + sid = rte_lcore_to_socket_id(cid); + frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / + MS_PER_S * FRAG_TTL_MS; + + lc->frag.tbl = rte_ip_frag_table_create(frag_tbl_sz, + FRAG_TBL_BUCKET_ENTRIES, frag_tbl_sz, frag_cycles, sid); + if (lc->frag.tbl == NULL) { + printf("%s(%u): failed to create fragment table of size: %u, " + "error code: %d\n", + __func__, cid, frag_tbl_sz, rte_errno); + return -ENOMEM; + } + + /* setup reassemble RX callbacks for all queues */ + for (i = 0; i != lc->nb_rx_queue; i++) { + + rxq = lc->rx_queue_list + i; + cb = rte_eth_add_rx_callback(rxq->port_id, rxq->queue_id, + rx_callback, lc); + if (cb == NULL) { + printf("%s(%u): failed to install RX callback for " + "portid=%u, queueid=%u, error code: %d\n", + __func__, cid, + rxq->port_id, rxq->queue_id, rte_errno); + return -ENOMEM; + } + } + + return 0; +} + +static int +reassemble_init(void) +{ + int32_t rc; + uint32_t i, lc; + + rc = 0; + for (i = 0; i != nb_lcore_params; i++) { + lc = lcore_params[i].lcore_id; + rc = reassemble_lcore_init(lcore_conf + lc, lc); + if (rc != 0) + break; + } + + return rc; +} + int32_t main(int32_t argc, char **argv) { @@ -2208,6 +2486,13 @@ main(int32_t argc, char **argv) RTE_ETH_EVENT_IPSEC, inline_ipsec_event_callback, NULL); } + /* fragment reassemble is enabled */ + if (frag_tbl_sz != 0) { + ret = reassemble_init(); + if (ret != 0) + rte_exit(EXIT_FAILURE, "failed at reassemble init"); + } + check_all_ports_link_status(enabled_port_mask); /* launch per-lcore init on every lcore */ diff --git a/examples/ipsec-secgw/ipsec.h b/examples/ipsec-secgw/ipsec.h index 88bd24dc14..6e48466cc7 100644 --- a/examples/ipsec-secgw/ipsec.h +++ b/examples/ipsec-secgw/ipsec.h @@ -203,6 +203,7 @@ struct socket_ctx { struct rt_ctx *rt_ip4; struct rt_ctx *rt_ip6; struct rte_mempool *mbuf_pool; + struct rte_mempool *mbuf_pool_indir; struct rte_mempool *session_pool; struct rte_mempool *session_priv_pool; }; diff --git a/examples/ipsec-secgw/meson.build b/examples/ipsec-secgw/meson.build index 81c146ebc8..9ece345cff 100644 --- a/examples/ipsec-secgw/meson.build +++ b/examples/ipsec-secgw/meson.build @@ -6,7 +6,7 @@ # To build this example as a standalone application with an already-installed # DPDK instance, use 'make' -deps += ['security', 'lpm', 'acl', 'hash', 'ipsec'] +deps += ['security', 'lpm', 'acl', 'hash', 'ip_frag', 'ipsec'] allow_experimental_apis = true sources = files( 'esp.c', 'ipsec.c', 'ipsec_process.c', 'ipsec-secgw.c',