PFIL_MEMPTR for ipfw link level hook

With new pfil(9) KPI it is possible to pass a void pointer with length instead of mbuf pointer to a packet filter. Until this commit no filters supported that, so pfil run through a shim function pfil_fake_mbuf(). Now the ipfw(4) hook named "default-link", that is instantiated when net.link.ether.ipfw sysctl is on, supports processing pointer/length packets natively. - ip_fw_args now has union for either mbuf or void *, and if flags have non-zero length, then we use the void *. - through ipfw_chk() we handle mem/mbuf cases differently. - ether_header goes away from args. It is ipfw_chk() responsibility to do parsing of Ethernet header. - ipfw_log() now uses different bpf APIs to log packets. Although ipfw_chk() is now capable to process pointer/length packets, this commit adds support for the link level hook only, see ipfw_check_frame(). Potentially the IP processing hook ipfw_check_packet() can be improved too, but that requires more changes since the hook supports more complex actions: NAT, divert, etc. Reviewed by: ae Differential Revision: https://reviews.freebsd.org/D19357
svn path=/head/; revision=345166
2019-03-14 22:52:16 +00:00 · 2019-03-14 22:52:16 +00:00 · f355cb3e6f · 2020-12-20 02:59:44 +00:00
commit f355cb3e6f
parent dc0fa4f712
5 changed files with 177 additions and 113 deletions
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@ -1258,7 +1258,6 @@ jump_linear(struct ip_fw_chain *chain, struct ip_fw *f, int num,
 *
 *	args->m	(in/out) The packet; we set to NULL when/if we nuke it.
 *		Starts with the IP header.
- *	args->eh (in)	Mac header if present, NULL for layer3 packet.
 *	args->L3offset	Number of bytes bypassed if we came from L2.
 *			e.g. often sizeof(eh)  ** NOTYET **
 *	args->ifp	Incoming or outgoing interface.
@ -1297,23 +1296,19 @@ ipfw_chk(struct ip_fw_args *args)
 	 * the implementation of the various instructions to make sure
 	 * that they still work.
 	 *
-	 * args->eh	The MAC header. It is non-null for a layer2
-	 *	packet, it is NULL for a layer-3 packet.
-	 * **notyet**
-	 * args->L3offset Offset in the packet to the L3 (IP or equiv.) header.
-	 *
 	 * m | args->m	Pointer to the mbuf, as received from the caller.
 	 *	It may change if ipfw_chk() does an m_pullup, or if it
 	 *	consumes the packet because it calls send_reject().
 	 *	XXX This has to change, so that ipfw_chk() never modifies
 	 *	or consumes the buffer.
-	 * ip	is the beginning of the ip(4 or 6) header.
-	 *	Calculated by adding the L3offset to the start of data.
-	 *	(Until we start using L3offset, the packet is
-	 *	supposed to start with the ip header).
+	 *	OR
+	 * args->mem	Pointer to contigous memory chunk.
+	 * ip	Is the beginning of the ip(4 or 6) header.
+	 * eh	Ethernet header in case if input is Layer2.
 	 */
-	struct mbuf *m = args->m;
-	struct ip *ip = mtod(m, struct ip *);
+	struct mbuf *m;
+	struct ip *ip;
+	struct ether_header *eh;

 	/*
 	 * For rules which contain uid/gid or jail constraints, cache
@ -1370,7 +1365,6 @@ ipfw_chk(struct ip_fw_args *args)
 	struct in_addr src_ip, dst_ip;		/* NOTE: network format	*/
 	int iplen = 0;
 	int pktlen;
-	uint16_t etype;			/* Host order stored ether type */

 	struct ipfw_dyn_info dyn_info;
 	struct ip_fw *q = NULL;
@ -1394,14 +1388,45 @@ ipfw_chk(struct ip_fw_args *args)

 	int done = 0;		/* flag to exit the outer loop */
 	IPFW_RLOCK_TRACKER;
+	bool mem;

-	if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
-		return (IP_FW_PASS);	/* accept */
+	if ((mem = (args->flags & IPFW_ARGS_LENMASK))) {
+		if (args->flags & IPFW_ARGS_ETHER) {
+			eh = (struct ether_header *)args->mem;
+			if (eh->ether_type == htons(ETHERTYPE_VLAN))
+				ip = (struct ip *)
+				    ((struct ether_vlan_header *)eh + 1);
+			else
+				ip = (struct ip *)(eh + 1);
+		} else {
+			eh = NULL;
+			ip = (struct ip *)args->mem;
+		}
+		pktlen = IPFW_ARGS_LENGTH(args->flags);
+		args->f_id.fib = args->ifp->if_fib;	/* best guess */
+	} else {
+		m = args->m;
+		if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready))
+			return (IP_FW_PASS);	/* accept */
+		if (args->flags & IPFW_ARGS_ETHER) {
+	                /* We need some amount of data to be contiguous. */
+			if (m->m_len < min(m->m_pkthdr.len, max_protohdr) &&
+			    (args->m = m = m_pullup(m, min(m->m_pkthdr.len,
+			    max_protohdr))) == NULL)
+				goto pullup_failed;
+			eh = mtod(m, struct ether_header *);
+			ip = (struct ip *)(eh + 1);
+		} else {
+			eh = NULL;
+			ip = mtod(m, struct ip *);
+		}
+		pktlen = m->m_pkthdr.len;
+		args->f_id.fib = M_GETFIB(m); /* mbuf not altered */
+	}

 	dst_ip.s_addr = 0;		/* make sure it is initialized */
 	src_ip.s_addr = 0;		/* make sure it is initialized */
 	src_port = dst_port = 0;
-	pktlen = m->m_pkthdr.len;

 	DYN_INFO_INIT(&dyn_info);
 /*
@ -1411,28 +1436,41 @@ ipfw_chk(struct ip_fw_args *args)
 * this way).
 */
 #define PULLUP_TO(_len, p, T)	PULLUP_LEN(_len, p, sizeof(T))
+#define	EHLEN	(eh != NULL ? ((char *)ip - (char *)eh) : 0)
 #define PULLUP_LEN(_len, p, T)					\
 do {								\
-	int x = (_len) + T;					\
-	if ((m)->m_len < x) {					\
-		args->m = m = m_pullup(m, x);			\
-		if (m == NULL)					\
-			goto pullup_failed;			\
+	int x = (_len) + T + EHLEN;				\
+	if (mem) {						\
+		MPASS(pktlen >= x);				\
+		p = (char *)args->mem + (_len) + EHLEN;		\
+	} else {						\
+		if (__predict_false((m)->m_len < x)) {		\
+			args->m = m = m_pullup(m, x);		\
+			if (m == NULL)				\
+				goto pullup_failed;		\
+		}						\
+		p = mtod(m, char *) + (_len) + EHLEN;		\
+	}							\
+} while (0)
+/*
+ * In case pointers got stale after pullups, update them.
+ */
+#define	UPDATE_POINTERS()					\
+do {								\
+	if (!mem) {						\
+		if (eh != NULL) {				\
+			eh = mtod(m, struct ether_header *);	\
+			ip = (struct ip *)(eh + 1);		\
+		} else						\
+			ip = mtod(m, struct ip *);		\
+		args->m = m;					\
 	}							\
-	p = (mtod(m, char *) + (_len));				\
 } while (0)
-
-	/*
-	 * if we have an ether header,
-	 */
-	if (args->flags & IPFW_ARGS_ETHER)
-		etype = ntohs(args->eh->ether_type);
-	else
-		etype = 0;

 	/* Identify IP packets and fill up variables. */
 	if (pktlen >= sizeof(struct ip6_hdr) &&
-	    (etype == 0 || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) {
+	    (eh == NULL || eh->ether_type == htons(ETHERTYPE_IPV6)) &&
+	    ip->ip_v == 6) {
 		struct ip6_hdr *ip6 = (struct ip6_hdr *)ip;

 		is_ipv6 = 1;
@ -1609,7 +1647,7 @@ do {								\
 				break;
 			} /*switch */
 		}
-		ip = mtod(m, struct ip *);
+		UPDATE_POINTERS();
 		ip6 = (struct ip6_hdr *)ip;
 		args->f_id.addr_type = 6;
 		args->f_id.src_ip6 = ip6->ip6_src;
@ -1617,7 +1655,8 @@ do {								\
 		args->f_id.flow_id6 = ntohl(ip6->ip6_flow);
 		iplen = ntohs(ip6->ip6_plen) + sizeof(*ip6);
 	} else if (pktlen >= sizeof(struct ip) &&
-	    (etype == 0 || etype == ETHERTYPE_IP) && ip->ip_v == 4) {
+	    (eh == NULL || eh->ether_type == htons(ETHERTYPE_IP)) &&
+	    ip->ip_v == 4) {
 		is_ipv4 = 1;
 		args->flags |= IPFW_ARGS_IP4;
 		hlen = ip->ip_hl << 2;
@ -1675,7 +1714,7 @@ do {								\
 			}
 		}

-		ip = mtod(m, struct ip *);
+		UPDATE_POINTERS();
 		args->f_id.addr_type = 4;
 		args->f_id.src_ip = ntohl(src_ip.s_addr);
 		args->f_id.dst_ip = ntohl(dst_ip.s_addr);
@ -1692,7 +1731,6 @@ do {								\
 	args->f_id.proto = proto;
 	args->f_id.src_port = src_port = ntohs(src_port);
 	args->f_id.dst_port = dst_port = ntohs(dst_port);
-	args->f_id.fib = M_GETFIB(m);

 	IPFW_PF_RLOCK(chain);
 	if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */
@ -1720,7 +1758,7 @@ do {								\
 		oif = NULL;
 	} else {
 		MPASS(args->flags & IPFW_ARGS_OUT);
-		iif = m->m_pkthdr.rcvif;
+		iif = mem ? NULL : m->m_pkthdr.rcvif;
 		oif = args->ifp;
 	}

@ -1840,7 +1878,7 @@ do {								\
 						((ipfw_insn_mac *)cmd)->addr;
 					u_int32_t *mask = (u_int32_t *)
 						((ipfw_insn_mac *)cmd)->mask;
-					u_int32_t *hdr = (u_int32_t *)args->eh;
+					u_int32_t *hdr = (u_int32_t *)eh;

 					match =
 					    ( want[0] == (hdr[0] & mask[0]) &&
@ -1857,8 +1895,11 @@ do {								\

 					for (i = cmdlen - 1; !match && i>0;
 					    i--, p += 2)
-						match = (etype >= p[0] &&
-						    etype <= p[1]);
+						match =
+						    (ntohs(eh->ether_type) >=
+						    p[0] &&
+						    ntohs(eh->ether_type) <=
+						    p[1]);
 				}
 				break;

@ -2332,7 +2373,7 @@ do {								\
 			}

 			case O_LOG:
-				ipfw_log(chain, f, hlen, args, m,
+				ipfw_log(chain, f, hlen, args,
 				    offset | ip6f_mf, tablearg, ip);
 				match = 1;
 				break;
--- a/sys/netpfil/ipfw/ip_fw_bpf.c
+++ b/sys/netpfil/ipfw/ip_fw_bpf.c
@ -160,6 +160,28 @@ ipfwlog_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	return (0);
 }

+void
+ipfw_bpf_tap(u_char *pkt, u_int pktlen)
+{
+	LOGIF_RLOCK_TRACKER;
+
+	LOGIF_RLOCK();
+	if (V_log_if != NULL)
+		BPF_TAP(V_log_if, pkt, pktlen);
+	LOGIF_RUNLOCK();
+}
+
+void
+ipfw_bpf_mtap(struct mbuf *m)
+{
+	LOGIF_RLOCK_TRACKER;
+
+	LOGIF_RLOCK();
+	if (V_log_if != NULL)
+		BPF_MTAP(V_log_if, m);
+	LOGIF_RUNLOCK();
+}
+
 void
 ipfw_bpf_mtap2(void *data, u_int dlen, struct mbuf *m)
 {
--- a/sys/netpfil/ipfw/ip_fw_log.c
+++ b/sys/netpfil/ipfw/ip_fw_log.c
@ -99,30 +99,32 @@ __FBSDID("$FreeBSD$");
 */
 void
 ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
-    struct ip_fw_args *args, struct mbuf *m,
-    u_short offset, uint32_t tablearg, struct ip *ip)
+    struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip)
 {
 	char *action;
 	int limit_reached = 0;
 	char action2[92], proto[128], fragment[32];

 	if (V_fw_verbose == 0) {
-		if (args->flags & IPFW_ARGS_ETHER) /* layer2, use orig hdr */
-			ipfw_bpf_mtap2(args->eh, ETHER_HDR_LEN, m);
+		if (args->flags & IPFW_ARGS_LENMASK)
+			ipfw_bpf_tap(args->mem, IPFW_ARGS_LENGTH(args->flags));
+		else if (args->flags & IPFW_ARGS_ETHER)
+			/* layer2, use orig hdr */
+			ipfw_bpf_mtap(args->m);
 		else {
 			/* Add fake header. Later we will store
 			 * more info in the header.
 			 */
 			if (ip->ip_v == 4)
 				ipfw_bpf_mtap2("DDDDDDSSSSSS\x08\x00",
-				    ETHER_HDR_LEN, m);
+				    ETHER_HDR_LEN, args->m);
 			else if (ip->ip_v == 6)
 				ipfw_bpf_mtap2("DDDDDDSSSSSS\x86\xdd",
-				    ETHER_HDR_LEN, m);
+				    ETHER_HDR_LEN, args->m);
 			else
 				/* Obviously bogus EtherType. */
 				ipfw_bpf_mtap2("DDDDDDSSSSSS\xff\xff",
-				    ETHER_HDR_LEN, m);
+				    ETHER_HDR_LEN, args->m);
 		}
 		return;
 	}
--- a/sys/netpfil/ipfw/ip_fw_pfil.c
+++ b/sys/netpfil/ipfw/ip_fw_pfil.c
@ -328,69 +328,50 @@ ipfw_check_packet(struct mbuf **m0, struct ifnet *ifp, int flags,
 * ipfw processing for ethernet packets (in and out).
 */
 static pfil_return_t
-ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int flags,
+ipfw_check_frame(pfil_packet_t p, struct ifnet *ifp, int flags,
    void *ruleset __unused, struct inpcb *inp)
 {
 	struct ip_fw_args args;
-	struct ether_header save_eh;
-	struct ether_header *eh;
-	struct m_tag *mtag;
-	struct mbuf *m;
 	pfil_return_t ret;
-	int i;
+	bool mem, realloc;
+	int ipfw;

-	args.flags = IPFW_ARGS_ETHER;
+	if (flags & PFIL_MEMPTR) {
+		mem = true;
+		realloc = false;
+		args.flags = PFIL_LENGTH(flags) | IPFW_ARGS_ETHER;
+		args.mem = p.mem;
+	} else {
+		mem = realloc = false;
+		args.flags = IPFW_ARGS_ETHER;
+	}
 	args.flags |= (flags & PFIL_IN) ? IPFW_ARGS_IN : IPFW_ARGS_OUT;
-again:
-	/* fetch start point from rule, if any.  remove the tag if present. */
-	mtag = m_tag_locate(*m0, MTAG_IPFW_RULE, 0, NULL);
-	if (mtag != NULL) {
-		args.rule = *((struct ipfw_rule_ref *)(mtag+1));
-		m_tag_delete(*m0, mtag);
-		if (args.rule.info & IPFW_ONEPASS)
-			return (0);
-		args.flags |= IPFW_ARGS_REF;
-	}
-
-	/* I need some amt of data to be contiguous */
-	m = *m0;
-	i = min(m->m_pkthdr.len, max_protohdr);
-	if (m->m_len < i) {
-		m = m_pullup(m, i);
-		if (m == NULL) {
-			*m0 = m;
-			return (0);
-		}
-	}
-	eh = mtod(m, struct ether_header *);
-	save_eh = *eh;			/* save copy for restore below */
-	m_adj(m, ETHER_HDR_LEN);	/* strip ethernet header */
-
-	args.m = m;		/* the packet we are looking at		*/
 	args.ifp = ifp;
-	args.eh = &save_eh;	/* MAC header for bridged/MAC packets	*/
-	args.inp = inp;	/* used by ipfw uid/gid/jail rules	*/
-	i = ipfw_chk(&args);
-	m = args.m;
-	if (m != NULL) {
+	args.inp = inp;
+
+again:
+	if (!mem) {
 		/*
-		 * Restore Ethernet header, as needed, in case the
-		 * mbuf chain was replaced by ipfw.
+		 * Fetch start point from rule, if any.
+		 * Remove the tag if present.
 		 */
-		M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
-		if (m == NULL) {
-			*m0 = NULL;
-			return (0);
+		struct m_tag *mtag;
+
+		mtag = m_tag_locate(*p.m, MTAG_IPFW_RULE, 0, NULL);
+		if (mtag != NULL) {
+			args.rule = *((struct ipfw_rule_ref *)(mtag+1));
+			m_tag_delete(*p.m, mtag);
+			if (args.rule.info & IPFW_ONEPASS)
+				return (PFIL_PASS);
+			args.flags |= IPFW_ARGS_REF;
 		}
-		if (eh != mtod(m, struct ether_header *))
-			bcopy(&save_eh, mtod(m, struct ether_header *),
-				ETHER_HDR_LEN);
+		args.m = *p.m;
 	}
-	*m0 = m;
+
+	ipfw = ipfw_chk(&args);

 	ret = PFIL_PASS;
-	/* Check result of ipfw_chk() */
-	switch (i) {
+	switch (ipfw) {
 	case IP_FW_PASS:
 		break;

@ -403,9 +384,16 @@ ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int flags,
 			ret = PFIL_DROPPED;
 			break;
 		}
-		*m0 = NULL;
+		if (mem) {
+			if (pfil_realloc(&p, flags, ifp) != 0) {
+				ret = PFIL_DROPPED;
+				break;
+			}
+			mem = false;
+			realloc = true;
+		}
 		MPASS(args.flags & IPFW_ARGS_REF);
-		ip_dn_io_ptr(&m, &args);
+		ip_dn_io_ptr(p.m, &args);
 		return (PFIL_CONSUMED);

 	case IP_FW_NGTEE:
@ -414,9 +402,17 @@ ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int flags,
 			ret = PFIL_DROPPED;
 			break;
 		}
+		if (mem) {
+			if (pfil_realloc(&p, flags, ifp) != 0) {
+				ret = PFIL_DROPPED;
+				break;
+			}
+			mem = false;
+			realloc = true;
+		}
 		MPASS(args.flags & IPFW_ARGS_REF);
-		(void )ng_ipfw_input_p(m0, &args, i == IP_FW_NGTEE);
-		if (i == IP_FW_NGTEE) /* ignore errors for NGTEE */
+		(void )ng_ipfw_input_p(p.m, &args, ipfw == IP_FW_NGTEE);
+		if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */
 			goto again;	/* continue with packet */
 		ret = PFIL_CONSUMED;
 		break;
@ -425,12 +421,15 @@ ipfw_check_frame(struct mbuf **m0, struct ifnet *ifp, int flags,
 		KASSERT(0, ("%s: unknown retval", __func__));
 	}

-	if (ret != PFIL_PASS) {
-		if (*m0)
-			FREE_PKT(*m0);
-		*m0 = NULL;
+	if (!mem && ret != PFIL_PASS) {
+		if (*p.m)
+			FREE_PKT(*p.m);
+		*p.m = NULL;
 	}

+	if (realloc && ret == PFIL_PASS)
+		ret = PFIL_REALLOCED;
+
 	return (ret);
 }

@ -545,7 +544,7 @@ ipfw_hook(int onoff, int pf)
 	pfil_hook_t *h;

 	pha.pa_version = PFIL_VERSION;
-	pha.pa_flags = PFIL_IN | PFIL_OUT;
+	pha.pa_flags = PFIL_IN | PFIL_OUT | PFIL_MEMPTR;
 	pha.pa_modname = "ipfw";
 	pha.pa_ruleset = NULL;

--- a/sys/netpfil/ipfw/ip_fw_private.h
+++ b/sys/netpfil/ipfw/ip_fw_private.h
@ -111,14 +111,11 @@ struct ip_fw_args {
 	struct inpcb		*inp;
 	union {
 		/*
-		 * We don't support forwarding on layer2, thus we can
-		 * keep eh pointer in this union.
 		 * next_hop[6] pointers can be used to point to next hop
 		 * stored in rule's opcode to avoid copying into hopstore.
 		 * Also, it is expected that all 0x1-0x10 flags are mutually
 		 * exclusive.
 		 */
-		struct ether_header	*eh;	/* for bridged packets	*/
 		struct sockaddr_in	*next_hop;
 		struct sockaddr_in6	*next_hop6;
 		/* ipfw next hop storage */
@ -129,8 +126,10 @@ struct ip_fw_args {
 			uint16_t	sin6_port;
 		} hopstore6;
 	};
-
-	struct mbuf		*m;	/* the mbuf chain		*/
+	union {
+		struct mbuf	*m;	/* the mbuf chain		*/
+		void		*mem;	/* or memory pointer		*/
+	};
 	struct ipfw_flow_id	f_id;	/* grabbed from IP header	*/
 };

@ -164,10 +163,11 @@ struct ip_fw_chain;

 void ipfw_bpf_init(int);
 void ipfw_bpf_uninit(int);
+void ipfw_bpf_tap(u_char *, u_int);
+void ipfw_bpf_mtap(struct mbuf *);
 void ipfw_bpf_mtap2(void *, u_int, struct mbuf *);
 void ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
-    struct ip_fw_args *args, struct mbuf *m,
-    u_short offset, uint32_t tablearg, struct ip *ip);
+    struct ip_fw_args *args, u_short offset, uint32_t tablearg, struct ip *ip);
 VNET_DECLARE(u_int64_t, norule_counter);
 #define	V_norule_counter	VNET(norule_counter)
 VNET_DECLARE(int, verbose_limit);