numam-dpdk/examples/ipsec-secgw/ipsec_worker.h

/* SPDX-License-Identifier: BSD-3-Clause
 * Copyright (C) 2020 Marvell International Ltd.
 */
#ifndef _IPSEC_WORKER_H_
#define _IPSEC_WORKER_H_

#include <rte_acl.h>
#include <rte_ethdev.h>
#include <rte_lpm.h>
#include <rte_lpm6.h>

#include "ipsec.h"

/* Configure how many packets ahead to prefetch, when reading packets */
#define PREFETCH_OFFSET	3
#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */

enum pkt_type {
	PKT_TYPE_PLAIN_IPV4 = 1,
	PKT_TYPE_IPSEC_IPV4,
	PKT_TYPE_PLAIN_IPV6,
	PKT_TYPE_IPSEC_IPV6,
	PKT_TYPE_INVALID
};

enum {
	PKT_DROPPED = 0,
	PKT_FORWARDED,
	PKT_POSTED	/* for lookaside case */
};

struct route_table {
	struct rt_ctx *rt4_ctx;
	struct rt_ctx *rt6_ctx;
};

/*
 * Conf required by event mode worker with tx internal port
 */
struct lcore_conf_ev_tx_int_port_wrkr {
	struct ipsec_ctx inbound;
	struct ipsec_ctx outbound;
	struct route_table rt;
} __rte_cache_aligned;

void ipsec_poll_mode_worker(void);
void ipsec_poll_mode_wrkr_inl_pr(void);
void ipsec_poll_mode_wrkr_inl_pr_ss(void);

int ipsec_launch_one_lcore(void *args);

/*
 * helper routine for inline and cpu(synchronous) processing
 * this is just to satisfy inbound_sa_check() and get_hop_for_offload_pkt().
 * Should be removed in future.
 */
static inline void
prep_process_group(void *sa, struct rte_mbuf *mb[], uint32_t cnt)
{
	uint32_t j;
	struct ipsec_mbuf_metadata *priv;

	for (j = 0; j != cnt; j++) {
		priv = get_priv(mb[j]);
		priv->sa = sa;
		/* setup TSO related fields if TSO enabled*/
		if (priv->sa->mss) {
			uint32_t ptype = mb[j]->packet_type;
			/* only TCP is supported */
			if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP) {
				mb[j]->tso_segsz = priv->sa->mss;
				if ((IS_TUNNEL(priv->sa->flags))) {
					mb[j]->outer_l3_len = mb[j]->l3_len;
					mb[j]->outer_l2_len = mb[j]->l2_len;
					mb[j]->ol_flags |=
						RTE_MBUF_F_TX_TUNNEL_ESP;
					if (RTE_ETH_IS_IPV4_HDR(ptype))
						mb[j]->ol_flags |=
						RTE_MBUF_F_TX_OUTER_IP_CKSUM;
				}
				mb[j]->l4_len = sizeof(struct rte_tcp_hdr);
				mb[j]->ol_flags |= (RTE_MBUF_F_TX_TCP_SEG |
						RTE_MBUF_F_TX_TCP_CKSUM);
				if (RTE_ETH_IS_IPV4_HDR(ptype))
					mb[j]->ol_flags |=
						RTE_MBUF_F_TX_OUTER_IPV4;
				else
					mb[j]->ol_flags |=
						RTE_MBUF_F_TX_OUTER_IPV6;
			}
		}
	}
}

static __rte_always_inline void
adjust_ipv4_pktlen(struct rte_mbuf *m, const struct rte_ipv4_hdr *iph,
	uint32_t l2_len)
{
	uint32_t plen, trim;

	plen = rte_be_to_cpu_16(iph->total_length) + l2_len;
	if (plen < m->pkt_len) {
		trim = m->pkt_len - plen;
		rte_pktmbuf_trim(m, trim);
	}
}

static __rte_always_inline void
adjust_ipv6_pktlen(struct rte_mbuf *m, const struct rte_ipv6_hdr *iph,
	uint32_t l2_len)
{
	uint32_t plen, trim;

	plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len;
	if (plen < m->pkt_len) {
		trim = m->pkt_len - plen;
		rte_pktmbuf_trim(m, trim);
	}
}

static __rte_always_inline void
prepare_one_packet(struct rte_security_ctx *ctx, struct rte_mbuf *pkt,
		   struct ipsec_traffic *t)
{
	uint32_t ptype = pkt->packet_type;
	const struct rte_ether_hdr *eth;
	const struct rte_ipv4_hdr *iph4;
	const struct rte_ipv6_hdr *iph6;
	uint32_t tun_type, l3_type;
	uint64_t tx_offload;
	uint16_t l3len;

	tun_type = ptype & RTE_PTYPE_TUNNEL_MASK;
	l3_type = ptype & RTE_PTYPE_L3_MASK;

	eth = rte_pktmbuf_mtod(pkt, const struct rte_ether_hdr *);
	if (RTE_ETH_IS_IPV4_HDR(l3_type)) {
		iph4 = (const struct rte_ipv4_hdr *)rte_pktmbuf_adj(pkt,
			RTE_ETHER_HDR_LEN);
		adjust_ipv4_pktlen(pkt, iph4, 0);

		if (tun_type == RTE_PTYPE_TUNNEL_ESP) {
			t->ipsec.pkts[(t->ipsec.num)++] = pkt;
		} else {
			t->ip4.data[t->ip4.num] = &iph4->next_proto_id;
			t->ip4.pkts[(t->ip4.num)++] = pkt;
		}
		tx_offload = sizeof(*iph4) << RTE_MBUF_L2_LEN_BITS;
	} else if (RTE_ETH_IS_IPV6_HDR(l3_type)) {
		int next_proto;
		size_t ext_len;
		uint8_t *p;

		/* get protocol type */
		iph6 = (const struct rte_ipv6_hdr *)rte_pktmbuf_adj(pkt,
			RTE_ETHER_HDR_LEN);
		adjust_ipv6_pktlen(pkt, iph6, 0);

		l3len = sizeof(struct ip6_hdr);

		if (tun_type == RTE_PTYPE_TUNNEL_ESP) {
			t->ipsec.pkts[(t->ipsec.num)++] = pkt;
		} else {
			t->ip6.data[t->ip6.num] = &iph6->proto;
			t->ip6.pkts[(t->ip6.num)++] = pkt;
		}

		/* Determine l3 header size up to ESP extension by walking
		 * through extension headers.
		 */
		if (l3_type == RTE_PTYPE_L3_IPV6_EXT ||
		     l3_type == RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) {
			p = rte_pktmbuf_mtod(pkt, uint8_t *);
			next_proto = iph6->proto;
			while (next_proto != IPPROTO_ESP &&
			       l3len < pkt->data_len &&
			       (next_proto = rte_ipv6_get_next_ext(p + l3len,
						next_proto, &ext_len)) >= 0)
				l3len += ext_len;

			/* Drop pkt when IPv6 header exceeds first seg size */
			if (unlikely(l3len > pkt->data_len)) {
				free_pkts(&pkt, 1);
				return;
			}
		}
		tx_offload = l3len << RTE_MBUF_L2_LEN_BITS;
	} else {
		/* Unknown/Unsupported type, drop the packet */
		RTE_LOG(ERR, IPSEC, "Unsupported packet type 0x%x\n",
			rte_be_to_cpu_16(eth->ether_type));
		free_pkts(&pkt, 1);
		return;
	}

	if  ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP)
		tx_offload |= (sizeof(struct rte_tcp_hdr) <<
			       (RTE_MBUF_L2_LEN_BITS + RTE_MBUF_L3_LEN_BITS));
	else if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP)
		tx_offload |= (sizeof(struct rte_udp_hdr) <<
			       (RTE_MBUF_L2_LEN_BITS + RTE_MBUF_L3_LEN_BITS));
	pkt->tx_offload = tx_offload;

	/* Check if the packet has been processed inline. For inline protocol
	 * processed packets, the metadata in the mbuf can be used to identify
	 * the security processing done on the packet. The metadata will be
	 * used to retrieve the application registered userdata associated
	 * with the security session.
	 */

	if (ctx && pkt->ol_flags & RTE_MBUF_F_RX_SEC_OFFLOAD) {
		struct ipsec_sa *sa;
		struct ipsec_mbuf_metadata *priv;

		/* Retrieve the userdata registered. Here, the userdata
		 * registered is the SA pointer.
		 */
		sa = (struct ipsec_sa *)rte_security_get_userdata(ctx,
				*rte_security_dynfield(pkt));
		if (sa == NULL) {
			/* userdata could not be retrieved */
			return;
		}

		/* Save SA as priv member in mbuf. This will be used in the
		 * IPsec selector(SP-SA) check.
		 */

		priv = get_priv(pkt);
		priv->sa = sa;
	}
}

static __rte_always_inline void
prepare_traffic(struct rte_security_ctx *ctx, struct rte_mbuf **pkts,
		struct ipsec_traffic *t, uint16_t nb_pkts)
{
	int32_t i;

	t->ipsec.num = 0;
	t->ip4.num = 0;
	t->ip6.num = 0;

	for (i = 0; i < (nb_pkts - PREFETCH_OFFSET); i++) {
		rte_prefetch0(rte_pktmbuf_mtod(pkts[i + PREFETCH_OFFSET],
					void *));
		prepare_one_packet(ctx, pkts[i], t);
	}
	/* Process left packets */
	for (; i < nb_pkts; i++)
		prepare_one_packet(ctx, pkts[i], t);
}

/* Send burst of packets on an output interface */
static __rte_always_inline int32_t
send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port)
{
	struct rte_mbuf **m_table;
	int32_t ret;
	uint16_t queueid;

	queueid = qconf->tx_queue_id[port];
	m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;

	ret = rte_eth_tx_burst(port, queueid, m_table, n);

	core_stats_update_tx(ret);

	if (unlikely(ret < n)) {
		do {
			free_pkts(&m_table[ret], 1);
		} while (++ret < n);
	}

	return 0;
}

/*
 * Helper function to fragment and queue for TX one packet.
 */
static __rte_always_inline uint32_t
send_fragment_packet(struct lcore_conf *qconf, struct rte_mbuf *m,
	uint16_t port, uint8_t proto)
{
	struct rte_ether_hdr *ethhdr;
	struct rte_ipv4_hdr *ip;
	struct rte_mbuf *pkt;
	struct buffer *tbl;
	uint32_t len, n, i;
	int32_t rc;

	tbl =  qconf->tx_mbufs + port;
	len = tbl->len;

	/* free space for new fragments */
	if (len + RTE_LIBRTE_IP_FRAG_MAX_FRAG >=  RTE_DIM(tbl->m_table)) {
		send_burst(qconf, len, port);
		len = 0;
	}

	n = RTE_DIM(tbl->m_table) - len;

	/* Strip the ethernet header that was prepended earlier */
	rte_pktmbuf_adj(m, RTE_ETHER_HDR_LEN);

	if (proto == IPPROTO_IP)
		rc = rte_ipv4_fragment_packet(m, tbl->m_table + len,
			n, mtu_size, m->pool, qconf->frag.pool_indir);
	else
		rc = rte_ipv6_fragment_packet(m, tbl->m_table + len,
			n, mtu_size, m->pool, qconf->frag.pool_indir);

	if (rc < 0) {
		RTE_LOG(ERR, IPSEC,
			"%s: failed to fragment packet with size %u, "
			"error code: %d\n",
			__func__, m->pkt_len, rte_errno);
		rc = 0;
	}

	i = len;
	len += rc;
	for (; i < len; i++) {
		pkt = tbl->m_table[i];

		/* Update Ethernet header */
		ethhdr = (struct rte_ether_hdr *)
			rte_pktmbuf_prepend(pkt, RTE_ETHER_HDR_LEN);
		pkt->l2_len = RTE_ETHER_HDR_LEN;

		if (proto == IPPROTO_IP) {
			ethhdr->ether_type =
				rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
			/* Update minimum offload data */
			pkt->l3_len = sizeof(struct rte_ipv4_hdr);
			pkt->ol_flags |= qconf->outbound.ipv4_offloads;

			ip = (struct rte_ipv4_hdr *)(ethhdr + 1);
			ip->hdr_checksum = 0;

			/* calculate IPv4 cksum in SW */
			if ((pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) == 0)
				ip->hdr_checksum = rte_ipv4_cksum(ip);
		} else {
			ethhdr->ether_type =
				rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);

			/* Update minimum offload data */
			pkt->l3_len = sizeof(struct rte_ipv6_hdr);
			pkt->ol_flags |= qconf->outbound.ipv6_offloads;
		}

		memcpy(&ethhdr->src_addr, &ethaddr_tbl[port].src,
		       sizeof(struct rte_ether_addr));
		memcpy(&ethhdr->dst_addr, &ethaddr_tbl[port].dst,
		       sizeof(struct rte_ether_addr));
	}

	free_pkts(&m, 1);
	return len;
}

/* Enqueue a single packet, and send burst if queue is filled */
static __rte_always_inline int32_t
send_single_packet(struct rte_mbuf *m, uint16_t port, uint8_t proto)
{
	uint32_t lcore_id;
	uint16_t len;
	struct lcore_conf *qconf;

	lcore_id = rte_lcore_id();

	qconf = &lcore_conf[lcore_id];
	len = qconf->tx_mbufs[port].len;

	/* L2 header is already part of packet */
	if (m->pkt_len - RTE_ETHER_HDR_LEN <= mtu_size) {
		qconf->tx_mbufs[port].m_table[len] = m;
		len++;

	/* need to fragment the packet */
	} else if (frag_tbl_sz > 0)
		len = send_fragment_packet(qconf, m, port, proto);
	else
		free_pkts(&m, 1);

	/* enough pkts to be sent */
	if (unlikely(len == MAX_PKT_BURST)) {
		send_burst(qconf, MAX_PKT_BURST, port);
		len = 0;
	}

	qconf->tx_mbufs[port].len = len;
	return 0;
}

static __rte_always_inline void
inbound_sp_sa(struct sp_ctx *sp, struct sa_ctx *sa, struct traffic_type *ip,
		uint16_t lim, struct ipsec_spd_stats *stats)
{
	struct rte_mbuf *m;
	uint32_t i, j, res, sa_idx;

	if (ip->num == 0 || sp == NULL)
		return;

	rte_acl_classify((struct rte_acl_ctx *)sp, ip->data, ip->res,
			ip->num, DEFAULT_MAX_CATEGORIES);

	j = 0;
	for (i = 0; i < ip->num; i++) {
		m = ip->pkts[i];
		res = ip->res[i];
		if (res == BYPASS) {
			ip->pkts[j++] = m;
			stats->bypass++;
			continue;
		}
		if (res == DISCARD) {
			free_pkts(&m, 1);
			stats->discard++;
			continue;
		}

		/* Only check SPI match for processed IPSec packets */
		if (i < lim && ((m->ol_flags & RTE_MBUF_F_RX_SEC_OFFLOAD) == 0)) {
			stats->discard++;
			free_pkts(&m, 1);
			continue;
		}

		sa_idx = res - 1;
		if (!inbound_sa_check(sa, m, sa_idx)) {
			stats->discard++;
			free_pkts(&m, 1);
			continue;
		}
		ip->pkts[j++] = m;
		stats->protect++;
	}
	ip->num = j;
}

static __rte_always_inline int32_t
get_hop_for_offload_pkt(struct rte_mbuf *pkt, int is_ipv6)
{
	struct ipsec_mbuf_metadata *priv;
	struct ipsec_sa *sa;

	priv = get_priv(pkt);

	sa = priv->sa;
	if (unlikely(sa == NULL)) {
		RTE_LOG(ERR, IPSEC, "SA not saved in private data\n");
		goto fail;
	}

	if (is_ipv6)
		return sa->portid;

	/* else */
	return (sa->portid | RTE_LPM_LOOKUP_SUCCESS);

fail:
	if (is_ipv6)
		return -1;

	/* else */
	return 0;
}

static __rte_always_inline void
route4_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf *pkts[],
	    uint8_t nb_pkts, uint64_t tx_offloads, bool ip_cksum)
{
	uint32_t hop[MAX_PKT_BURST * 2];
	uint32_t dst_ip[MAX_PKT_BURST * 2];
	struct rte_ether_hdr *ethhdr;
	int32_t pkt_hop = 0;
	uint16_t i, offset;
	uint16_t lpm_pkts = 0;
	unsigned int lcoreid = rte_lcore_id();
	struct rte_mbuf *pkt;
	uint16_t port;

	if (nb_pkts == 0)
		return;

	/* Need to do an LPM lookup for non-inline packets. Inline packets will
	 * have port ID in the SA
	 */

	for (i = 0; i < nb_pkts; i++) {
		pkt = pkts[i];
		if (!(pkt->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD)) {
			/* Security offload not enabled. So an LPM lookup is
			 * required to get the hop
			 */
			offset = offsetof(struct ip, ip_dst);
			dst_ip[lpm_pkts] = *rte_pktmbuf_mtod_offset(pkt,
					uint32_t *, offset);
			dst_ip[lpm_pkts] = rte_be_to_cpu_32(dst_ip[lpm_pkts]);
			lpm_pkts++;
		}
	}

	rte_lpm_lookup_bulk((struct rte_lpm *)rt_ctx, dst_ip, hop, lpm_pkts);

	lpm_pkts = 0;

	for (i = 0; i < nb_pkts; i++) {
		pkt = pkts[i];
		if (pkt->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) {
			/* Read hop from the SA */
			pkt_hop = get_hop_for_offload_pkt(pkt, 0);
		} else {
			/* Need to use hop returned by lookup */
			pkt_hop = hop[lpm_pkts++];
		}

		if ((pkt_hop & RTE_LPM_LOOKUP_SUCCESS) == 0) {
			core_statistics[lcoreid].lpm4.miss++;
			free_pkts(&pkt, 1);
			continue;
		}

		port = pkt_hop & 0xff;

		/* Update minimum offload data */
		pkt->l3_len = sizeof(struct rte_ipv4_hdr);
		pkt->l2_len = RTE_ETHER_HDR_LEN;
		pkt->ol_flags |= RTE_MBUF_F_TX_IPV4;

		/* Update Ethernet header */
		ethhdr = (struct rte_ether_hdr *)
			rte_pktmbuf_prepend(pkt, RTE_ETHER_HDR_LEN);

		if (ip_cksum) {
			struct rte_ipv4_hdr *ip;

			pkt->ol_flags |= tx_offloads;

			ip = (struct rte_ipv4_hdr *)(ethhdr + 1);
			ip->hdr_checksum = 0;

			/* calculate IPv4 cksum in SW */
			if ((pkt->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) == 0)
				ip->hdr_checksum = rte_ipv4_cksum(ip);
		}

		ethhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
		memcpy(&ethhdr->src_addr, &ethaddr_tbl[port].src,
		       sizeof(struct rte_ether_addr));
		memcpy(&ethhdr->dst_addr, &ethaddr_tbl[port].dst,
		       sizeof(struct rte_ether_addr));

		send_single_packet(pkt, port, IPPROTO_IP);
	}
}

static __rte_always_inline void
route6_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf *pkts[], uint8_t nb_pkts)
{
	int32_t hop[MAX_PKT_BURST * 2];
	uint8_t dst_ip[MAX_PKT_BURST * 2][16];
	struct rte_ether_hdr *ethhdr;
	uint8_t *ip6_dst;
	int32_t pkt_hop = 0;
	uint16_t i, offset;
	uint16_t lpm_pkts = 0;
	unsigned int lcoreid = rte_lcore_id();
	struct rte_mbuf *pkt;
	uint16_t port;

	if (nb_pkts == 0)
		return;

	/* Need to do an LPM lookup for non-inline packets. Inline packets will
	 * have port ID in the SA
	 */

	for (i = 0; i < nb_pkts; i++) {
		pkt = pkts[i];
		if (!(pkt->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD)) {
			/* Security offload not enabled. So an LPM lookup is
			 * required to get the hop
			 */
			offset = offsetof(struct ip6_hdr, ip6_dst);
			ip6_dst = rte_pktmbuf_mtod_offset(pkt, uint8_t *,
					offset);
			memcpy(&dst_ip[lpm_pkts][0], ip6_dst, 16);
			lpm_pkts++;
		}
	}

	rte_lpm6_lookup_bulk_func((struct rte_lpm6 *)rt_ctx, dst_ip, hop,
			lpm_pkts);

	lpm_pkts = 0;

	for (i = 0; i < nb_pkts; i++) {
		pkt = pkts[i];
		if (pkt->ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD) {
			/* Read hop from the SA */
			pkt_hop = get_hop_for_offload_pkt(pkt, 1);
		} else {
			/* Need to use hop returned by lookup */
			pkt_hop = hop[lpm_pkts++];
		}

		if (pkt_hop == -1) {
			core_statistics[lcoreid].lpm6.miss++;
			free_pkts(&pkt, 1);
			continue;
		}

		port = pkt_hop & 0xff;

		/* Update minimum offload data */
		pkt->ol_flags |= RTE_MBUF_F_TX_IPV6;
		pkt->l3_len = sizeof(struct ip6_hdr);
		pkt->l2_len = RTE_ETHER_HDR_LEN;

		/* Update Ethernet header */
		ethhdr = (struct rte_ether_hdr *)
			rte_pktmbuf_prepend(pkt, RTE_ETHER_HDR_LEN);

		ethhdr->ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
		memcpy(&ethhdr->src_addr, &ethaddr_tbl[port].src,
		       sizeof(struct rte_ether_addr));
		memcpy(&ethhdr->dst_addr, &ethaddr_tbl[port].dst,
		       sizeof(struct rte_ether_addr));

		send_single_packet(pkt, port, IPPROTO_IPV6);
	}
}

static __rte_always_inline void
drain_tx_buffers(struct lcore_conf *qconf)
{
	struct buffer *buf;
	uint32_t portid;

	for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
		buf = &qconf->tx_mbufs[portid];
		if (buf->len == 0)
			continue;
		send_burst(qconf, buf->len, portid);
		buf->len = 0;
	}
}

#endif /* _IPSEC_WORKER_H_ */