Introduce ip_fastforward and remove ip_flow.

Short description of ip_fastforward: o adds full direct process-to-completion IPv4 forwarding code o handles ip fragmentation incl. hw support (ip_flow did not) o sends icmp needfrag to source if DF is set (ip_flow did not) o supports ipfw and ipfilter (ip_flow did not) o supports divert, ipfw fwd and ipfilter nat (ip_flow did not) o returns anything it can't handle back to normal ip_input Enable with sysctl -w net.inet.ip.fastforwarding=1 Reviewed by: sam (mentor)
svn path=/head/; revision=122702
2003-11-14 21:02:22 +00:00 · 2003-11-14 21:02:22 +00:00 · 9188b4a169 · 2020-12-20 02:59:44 +00:00
commit 9188b4a169
parent f9d801d6f7
14 changed files with 846 additions and 460 deletions
--- a/sys/conf/files
+++ b/sys/conf/files
@ -1448,7 +1448,7 @@ netinet/ip_ecn.c	optional inet
 netinet/ip_ecn.c	optional inet6
 netinet/ip_encap.c	optional inet
 netinet/ip_encap.c	optional inet6
-netinet/ip_flow.c	optional inet
+netinet/ip_fastfwd.c	optional inet
 netinet/ip_fw2.c	optional ipfirewall
 netinet/ip_icmp.c	optional inet
 netinet/ip_input.c	optional inet
--- a/sys/net/if_arcsubr.c
+++ b/sys/net/if_arcsubr.c
@ -543,14 +543,14 @@ arc_input(ifp, m)
 #ifdef INET
 	case ARCTYPE_IP:
 		m_adj(m, ARC_HDRNEWLEN);
-		if (ipflow_fastforward(m))
+		if (ip_fastforward(m))
 			return;
 		isr = NETISR_IP;
 		break;

 	case ARCTYPE_IP_OLD:
 		m_adj(m, ARC_HDRLEN);
-		if (ipflow_fastforward(m))
+		if (ip_fastforward(m))
 			return;
 		isr = NETISR_IP;
 		break;
--- a/sys/net/if_ef.c
+++ b/sys/net/if_ef.c
@ -253,7 +253,7 @@ ef_inputEII(struct mbuf *m, struct ether_header *eh, u_short ether_type)
 #endif
 #ifdef INET
 	case ETHERTYPE_IP:
-		if (ipflow_fastforward(m))
+		if (ip_fastforward(m))
 			return (0);
 		isr = NETISR_IP;
 		break;
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@ -714,7 +714,7 @@ ether_demux(struct ifnet *ifp, struct mbuf *m)
 	switch (ether_type) {
 #ifdef INET
 	case ETHERTYPE_IP:
-		if (ipflow_fastforward(m))
+		if (ip_fastforward(m))
 			return;
 		isr = NETISR_IP;
 		break;
--- a/sys/net/if_fddisubr.c
+++ b/sys/net/if_fddisubr.c
@ -471,7 +471,7 @@ fddi_input(ifp, m)
 		switch (type) {
 #ifdef INET
 		case ETHERTYPE_IP:
-			if (ipflow_fastforward(m))
+			if (ip_fastforward(m))
 				return;
 			isr = NETISR_IP;
 			break;
--- a/sys/net/if_iso88025subr.c
+++ b/sys/net/if_iso88025subr.c
@ -556,7 +556,7 @@ iso88025_input(ifp, m)
 #ifdef INET
 		case ETHERTYPE_IP:
 			th->iso88025_shost[0] &= ~(TR_RII); 
-			if (ipflow_fastforward(m))
+			if (ip_fastforward(m))
 				return;
 			isr = NETISR_IP;
 			break;
--- a/sys/net/if_ppp.c
+++ b/sys/net/if_ppp.c
@ -1537,7 +1537,7 @@ ppp_inproc(sc, m)
 	m->m_pkthdr.len -= PPP_HDRLEN;
 	m->m_data += PPP_HDRLEN;
 	m->m_len -= PPP_HDRLEN;
-	if (ipflow_fastforward(m))
+	if (ip_fastforward(m))
 	    return;
 	isr = NETISR_IP;
 	break;
--- a/sys/netinet/in_var.h
+++ b/sys/netinet/in_var.h
@ -230,9 +230,7 @@ void	in_rtqdrain(void);
 void	ip_input(struct mbuf *);
 int	in_ifadown(struct ifaddr *ifa, int);
 void	in_ifscrub(struct ifnet *, struct in_ifaddr *);
-int	ipflow_fastforward(struct mbuf *);
-void	ipflow_create(const struct route *, struct mbuf *);
-void	ipflow_slowtimo(void);
+int	ip_fastforward(struct mbuf *);

 #endif /* _KERNEL */

--- a/sys/netinet/ip_fastfwd.c
+++ b/sys/netinet/ip_fastfwd.c
@ -0,0 +1,806 @@
+/*
+ * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * ip_fastforward gets its speed from processing the forwarded packet to
+ * completion (if_output on the other side) without any queues or netisr's.
+ * The receiving interface DMAs the packet into memory, the upper half of
+ * driver calls ip_fastforward, we do our routing table lookup and directly
+ * send it off to the outgoing interface which DMAs the packet to the
+ * network card. The only part of the packet we touch with the CPU is the
+ * IP header. We are essentially limited by bus bandwidth and how fast the
+ * network card/driver can set up receives and transmits.
+ *
+ * We handle basic errors, ip header errors, checksum errors,
+ * destination unreachable, fragmentation and fragmentation needed and
+ * report them via icmp to the sender.
+ *
+ * Else if something is not pure IPv4 unicast forwarding we fall back to
+ * the normal ip_input processing path. We should only be called from
+ * interfaces connected to the outside world.
+ *
+ * Firewalling is fully supported including divert, ipfw fwd and ipfilter
+ * ipnat and address rewrite.
+ *
+ * IPSEC is not supported if this host is a tunnel broker. IPSEC is
+ * supported for connections to/from local host.
+ *
+ * We try to do the least expensive (in CPU ops) checks and operations
+ * first to catch junk with as little overhead as possible.
+ * 
+ * We take full advantage of hardware support for ip checksum and
+ * fragmentation offloading.
+ *
+ * We don't do ICMP redirect in the fast forwarding path. I have had my own
+ * cases where two core routers with Zebra routing suite would send millions
+ * ICMP redirects to connected hosts if the router to dest was not the default
+ * gateway. In one case it was filling the routing table of a host with close
+ * 300'000 cloned redirect entries until it ran out of kernel memory. However
+ * the networking code proved very robust and it didn't crash or went ill
+ * otherwise.
+ */
+
+/*
+ * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which
+ * is being followed here.
+ */
+
+#include "opt_ipfw.h"
+#include "opt_ipdn.h"
+#include "opt_ipdivert.h"
+#include "opt_ipfilter.h"
+#include "opt_ipstealth.h"
+#include "opt_mac.h"
+#include "opt_pfil_hooks.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mac.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/pfil.h>
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+
+#include <machine/in_cksum.h>
+
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+
+static int ipfastforward_active = 0;
+SYSCTL_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_RW,
+    &ipfastforward_active, 0, "Enable fast IP forwarding");
+
+/*
+ * Try to forward a packet based on the destination address.
+ * This is a fast path optimized for the plain forwarding case.
+ * If the packet is handled (and consumed) here then we return 1;
+ * otherwise 0 is returned and the packet should be delivered
+ * to ip_input for full processing.
+ */
+int
+ip_fastforward(struct mbuf *m)
+{
+	struct ip *ip;
+	struct mbuf *m0 = NULL;
+#ifdef IPDIVERT
+	struct ip *tip;
+	struct mbuf *teem = NULL;
+#endif
+	struct mbuf *tag = NULL;
+	struct route ro;
+	struct sockaddr_in *dst = NULL;
+	struct in_ifaddr *ia = NULL;
+	struct ifaddr *ifa = NULL;
+	struct ifnet *ifp = NULL;
+	struct ip_fw_args args;
+	in_addr_t odest, dest;
+	u_short sum;
+	int error = 0;
+	int hlen, ipfw, mtu;
+
+	/*
+	 * Are we active and forwarding packets?
+	 */
+	if (!ipfastforward_active || !ipforwarding)
+		return 0;
+
+	/*
+	 * If there is any MT_TAG we fall back to ip_input because we can't
+	 * handle TAGs here. Should never happen as we get directly called
+	 * from the if_output routines.
+	 */
+	if (m->m_type == MT_TAG) {
+		KASSERT(0, ("%s: packet with MT_TAG not expected", __func__));
+		return 0;
+	}
+
+	M_ASSERTVALID(m);
+	M_ASSERTPKTHDR(m);
+
+	/*
+	 * Step 1: check for packet drop conditions (and sanity checks)
+	 */
+
+	/*
+	 * Is entire packet big enough?
+	 */
+	if (m->m_pkthdr.len < sizeof(struct ip)) {
+		ipstat.ips_tooshort++;
+		goto drop;
+	}
+
+	/*
+	 * Is first mbuf large enough for ip header and is header present?
+	 */
+	if (m->m_len < sizeof (struct ip) &&
+	   (m = m_pullup(m, sizeof (struct ip))) == 0) {
+		ipstat.ips_toosmall++;
+		goto drop;
+	}
+
+	ip = mtod(m, struct ip *);
+
+	/*
+	 * Is it IPv4?
+	 */
+	if (ip->ip_v != IPVERSION) {
+		ipstat.ips_badvers++;
+		goto drop;
+	}
+
+	/*
+	 * Is IP header length correct and is it in first mbuf?
+	 */
+	hlen = ip->ip_hl << 2;
+	if (hlen < sizeof(struct ip)) {	/* minimum header length */
+		ipstat.ips_badlen++;
+		goto drop;
+	}
+	if (hlen > m->m_len) {
+		if ((m = m_pullup(m, hlen)) == 0) {
+			ipstat.ips_badhlen++;
+			goto drop;
+		}
+		ip = mtod(m, struct ip *);
+	}
+
+	/*
+	 * Checksum correct?
+	 */
+	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED)
+		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
+	else {
+		if (hlen == sizeof(struct ip))
+			sum = in_cksum_hdr(ip);
+		else
+			sum = in_cksum(m, hlen);
+	}
+	if (sum) {
+		ipstat.ips_badsum++;
+		goto drop;
+	}
+	m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
+
+	/*
+	 * Convert to host representation
+	 */
+	ip->ip_len = ntohs(ip->ip_len);
+	ip->ip_off = ntohs(ip->ip_off);
+
+	/*
+	 * Is IP length longer than packet we have got?
+	 */
+	if (m->m_pkthdr.len < ip->ip_len) {
+		ipstat.ips_tooshort++;
+		goto drop;
+	}
+
+	/*
+	 * Is packet longer than IP header tells us? If yes, truncate packet.
+	 */
+	if (m->m_pkthdr.len > ip->ip_len) {
+		if (m->m_len == m->m_pkthdr.len) {
+			m->m_len = ip->ip_len;
+			m->m_pkthdr.len = ip->ip_len;
+		} else
+			m_adj(m, ip->ip_len - m->m_pkthdr.len);
+	}
+
+	/*
+	 * Is packet from or to 127/8?
+	 */
+	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
+	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
+		ipstat.ips_badaddr++;
+		goto drop;
+	}
+
+	/*
+	 * Step 2: fallback conditions to normal ip_input path processing
+	 */
+
+	/*
+	 * Only IP packets without options
+	 */
+	if (ip->ip_hl != (sizeof(struct ip) >> 2))
+		goto fallback;
+
+	/*
+	 * Only unicast IP, not from loopback, no L2 or IP broadcast,
+	 * no multicast, no INADDR_ANY
+	 *
+	 * XXX: Probably some of these checks could be direct drop
+	 * conditions.  However it is not clear whether there are some
+	 * hacks or obscure behaviours which make it neccessary to
+	 * let ip_input handle it.  We play safe here and let ip_input
+	 * deal with it until it is proven that we can directly drop it.
+	 */
+	if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) ||
+	    ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST ||
+	    ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST ||
+	    IN_MULTICAST(ntohl(ip->ip_src.s_addr)) ||
+	    IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
+	    ip->ip_dst.s_addr == INADDR_ANY )
+		goto fallback;
+
+	/*
+	 * Is it for a local address on this host?
+	 */
+	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
+		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
+			goto fallback;
+	}
+
+	/*
+	 * Or is it for a local IP broadcast address on this host?
+	 */
+	if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
+	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
+			if (ifa->ifa_addr->sa_family != AF_INET)
+				continue;
+			ia = ifatoia(ifa);
+			if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr)
+				goto fallback;
+			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
+			    ip->ip_dst.s_addr)
+				goto fallback;
+			continue;
+fallback:
+			/* return packet back to netisr for slow processing */
+			ip->ip_len = htons(ip->ip_len);
+			ip->ip_off = htons(ip->ip_off);
+			return 0;
+		}
+	}
+	ipstat.ips_total++;
+
+	/*
+	 * Step 3: incoming packet firewall processing
+	 */
+
+	odest = dest = ip->ip_dst.s_addr;
+#ifdef PFIL_HOOKS
+	/*
+	 * Run through list of ipfilter hooks for input packets
+	 */
+	if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN) ||
+	    m == NULL)
+		return 1;
+
+	M_ASSERTVALID(m);
+	M_ASSERTPKTHDR(m);
+
+	ip = mtod(m, struct ip *);	/* m may have changed by pfil hook */
+	dest = ip->ip_dst.s_addr;
+#endif
+
+	/*
+	 * Run through ipfw for input packets
+	 */
+	if (fw_enable && IPFW_LOADED) {
+		bzero(&args, sizeof(args));
+		args.m = m;
+		ipfw = 0;
+
+		ipfw = ip_fw_chk_ptr(&args);
+		m = args.m;
+
+		M_ASSERTVALID(m);
+		M_ASSERTPKTHDR(m);
+
+		/*
+		 * Packet denied, drop it
+		 */
+		if ((ipfw & IP_FW_PORT_DENY_FLAG) || m == NULL)
+			goto drop;
+		/*
+		 * Send packet to the appropriate pipe
+		 */
+		if (DUMMYNET_LOADED && (ipfw & IP_FW_PORT_DYNT_FLAG) != 0) {
+			ip_dn_io_ptr(m, ipfw & 0xffff, DN_TO_IP_IN, &args);
+			return 1;
+		}
+#ifdef IPDIVERT
+		/*
+		 * Divert packet
+		 */
+		if (ipfw != 0 && (ipfw & IP_FW_PORT_DYNT_FLAG) == 0) {
+			/*
+			 * See if this is a fragment
+			 */
+			if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
+				MGETHDR(tag, M_DONTWAIT, MT_TAG);
+				if (tag == NULL)
+					goto drop;
+				tag->m_flags = PACKET_TAG_DIVERT;
+				tag->m_data = (caddr_t)(u_long)args.divert_rule;
+				tag->m_next = m;
+				/* XXX: really bloody hack, see ip_input */
+				tag->m_nextpkt = (struct mbuf *)1;
+				m = tag;
+				tag = NULL;
+
+				goto droptoours;
+			}
+			/*
+			 * Tee packet
+			 */
+			if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0)
+				teem = m_dup(m, M_DONTWAIT);
+			else
+				teem = m;
+			if (teem == NULL)
+				goto passin;
+
+			/*
+			 * Delayed checksums are not compatible
+			 */
+			if (teem->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+				in_delayed_cksum(teem);
+				teem->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+			}
+			/*
+			 * Restore packet header fields to original values
+			 */
+			tip = mtod(teem, struct ip *);
+			tip->ip_len = htons(tip->ip_len);
+			tip->ip_off = htons(tip->ip_off);
+			/*
+			 * Deliver packet to divert input routine
+			 */
+			divert_packet(teem, 0, ipfw & 0xffff, args.divert_rule);
+			/*
+			 * If this was not tee, we are done
+			 */
+			if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0)
+				return 1;
+			/* Continue if it was tee */
+			goto passin;
+		}
+#endif
+		if (ipfw == 0 && args.next_hop != NULL) {
+			dest = args.next_hop->sin_addr.s_addr;
+			goto passin;
+		}
+		/*
+		 * Let through or not?
+		 */
+		if (ipfw != 0)
+			goto drop;
+	}
+passin:
+	ip = mtod(m, struct ip *);	/* if m changed during fw processing */
+
+	/*
+	 * Destination address changed?
+	 */
+	if (odest != dest) {
+		/*
+		 * Is it now for a local address on this host?
+		 */
+		LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
+			if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
+				goto forwardlocal;
+		}
+		/*
+		 * Go on with new destination address
+		 */
+	}
+
+	/*
+	 * Step 4: decrement TTL and look up route
+	 */
+
+	/*
+	 * Check TTL
+	 */
+#ifdef IPSTEALTH
+	if (!ipstealth) {
+#endif
+	if (ip->ip_ttl <= IPTTLDEC) {
+		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, NULL, NULL);
+		return 1;
+	}
+
+	/*
+	 * Decrement the TTL and incrementally change the checksum.
+	 * Don't bother doing this with hw checksum offloading.
+	 */
+	ip->ip_ttl -= IPTTLDEC;
+	if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8))
+		ip->ip_sum -= ~htons(IPTTLDEC << 8);
+	else
+		ip->ip_sum += htons(IPTTLDEC << 8);
+#ifdef IPSTEALTH
+	}
+#endif
+
+	/*
+	 * Find route to destination.
+	 */
+	bzero(&ro, sizeof(ro));
+	dst = (struct sockaddr_in *)&ro.ro_dst;
+	dst->sin_family = AF_INET;
+	dst->sin_len = sizeof(*dst);
+	dst->sin_addr.s_addr = dest;
+	rtalloc_ign(&ro, (RTF_PRCLONING | RTF_CLONING));
+
+	/*
+	 * Route there and interface still up?
+	 */
+	if (ro.ro_rt &&
+	    (ro.ro_rt->rt_flags & RTF_UP) &&
+	    (ro.ro_rt->rt_ifp->if_flags & IFF_UP)) {
+		ia = ifatoia(ro.ro_rt->rt_ifa);
+		ifp = ro.ro_rt->rt_ifp;
+		if (ro.ro_rt->rt_flags & RTF_GATEWAY)
+			dst = (struct sockaddr_in *)ro.ro_rt->rt_gateway;
+	} else {
+		ipstat.ips_noroute++;
+		ipstat.ips_cantforward++;
+		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, NULL, NULL);
+		if (ro.ro_rt)
+			RTFREE(ro.ro_rt);
+		return 1;
+	}
+
+	/*
+	 * Step 5: outgoing firewall packet processing
+	 */
+
+#ifdef PFIL_HOOKS
+	/*
+	 * Run through list of hooks for output packets.
+	 */
+	if (pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT) || m == NULL) {
+		RTFREE(ro.ro_rt);
+		return 1;
+	}
+
+	M_ASSERTVALID(m);
+	M_ASSERTPKTHDR(m);
+
+	ip = mtod(m, struct ip *);
+	dest = ip->ip_dst.s_addr;
+#endif
+	if (fw_enable && IPFW_LOADED && !args.next_hop) {
+                bzero(&args, sizeof(args));
+		args.m = m;
+		args.oif = ifp;
+                ipfw = 0;
+
+		ipfw = ip_fw_chk_ptr(&args);
+		m = args.m;
+
+		M_ASSERTVALID(m);
+		M_ASSERTPKTHDR(m);
+
+		if ((ipfw & IP_FW_PORT_DENY_FLAG) || m == NULL) {
+			RTFREE(ro.ro_rt);
+			goto drop;
+		}
+		if (DUMMYNET_LOADED && (ipfw & IP_FW_PORT_DYNT_FLAG) != 0) {
+			/*
+			 * XXX note: if the ifp or rt entry are deleted
+			 * while a pkt is in dummynet, we are in trouble!
+			 */
+			args.ro = &ro;		/* dummynet does not save it */
+			args.dst = dst;
+
+			ip_dn_io_ptr(m, ipfw & 0xffff, DN_TO_IP_OUT, &args);
+			RTFREE(ro.ro_rt);
+			return 1;
+		}
+#ifdef IPDIVERT
+		if (ipfw != 0 && (ipfw & IP_FW_PORT_DYNT_FLAG) == 0) {
+			/*
+			 * See if this is a fragment
+			 */
+			if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
+				MGETHDR(tag, M_DONTWAIT, MT_TAG);
+				if (tag == NULL) {
+					RTFREE(ro.ro_rt);
+					goto drop;
+				}
+				tag->m_flags = PACKET_TAG_DIVERT;
+				tag->m_data = (caddr_t)(u_int32_t)args.divert_rule;
+				tag->m_next = m;
+				/* XXX: really bloody hack, see ip_input */
+				tag->m_nextpkt = (struct mbuf *)1;
+				m = tag;
+				tag = NULL;
+
+				goto droptoours;
+			}
+			/*
+			 * Tee packet
+			 */
+			if ((ipfw & IP_FW_PORT_TEE_FLAG) != 0)
+				teem = m_dup(m, M_DONTWAIT);
+			else
+				teem = m;
+			if (teem == NULL)
+				goto passout;
+
+			/*
+			 * Delayed checksums are not compatible with divert
+			 */
+			if (teem->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
+				in_delayed_cksum(teem);
+				teem->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
+			}
+			/*
+			 * Restore packet header fields to original values
+			 */
+			tip = mtod(teem, struct ip *);
+			tip->ip_len = htons(tip->ip_len);
+			tip->ip_off = htons(tip->ip_off);
+			/*
+			 * Deliver packet to divert input routine
+			 */
+			divert_packet(teem, 0, ipfw & 0xffff, args.divert_rule);
+			/*
+			 * If this was not tee, we are done
+			 */
+			if ((ipfw & IP_FW_PORT_TEE_FLAG) == 0) {
+				RTFREE(ro.ro_rt);
+				return 1;
+			}
+			/* Continue if it was tee */
+			goto passout;
+		}
+#endif
+		if (ipfw == 0 && args.next_hop != NULL) {
+			dest = args.next_hop->sin_addr.s_addr;
+			goto passout;
+		}
+		/*
+		 * Let through or not?
+		 */
+		if (ipfw != 0)
+			goto drop;
+	}
+passout:
+	ip = mtod(m, struct ip *);
+
+	/*
+	 * Destination address changed?
+	 */
+	if (odest != dest) {
+		/*
+		 * Is it now for a local address on this host?
+		 */
+		LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
+			if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) {
+forwardlocal:
+				if (args.next_hop) {
+					/* XXX leak */
+					MGETHDR(tag, M_DONTWAIT, MT_TAG);
+					if (tag == NULL) {
+						if (ro.ro_rt)
+							RTFREE(ro.ro_rt);
+						goto drop;
+					}
+					tag->m_flags = PACKET_TAG_IPFORWARD;
+					tag->m_data = (caddr_t)args.next_hop;
+					tag->m_next = m;
+					/* XXX: really bloody hack,
+					 * see ip_input */
+					tag->m_nextpkt = (struct mbuf *)1;
+					m = tag;
+					tag = NULL;
+				}
+#ifdef IPDIVERT
+droptoours:	/* Used for DIVERT */
+#endif
+				MGETHDR(tag, M_DONTWAIT, MT_TAG);
+				if (tag == NULL) {
+					if (ro.ro_rt)
+						RTFREE(ro.ro_rt);
+					goto drop;
+				}
+				tag->m_flags = PACKET_TAG_IPFASTFWD_OURS;
+				tag->m_data = NULL;
+				tag->m_next = m;
+				/* XXX: really bloody hack, see ip_input */
+				tag->m_nextpkt = (struct mbuf *)1;
+				m = tag;
+				tag = NULL;
+
+				/* ip still points to the real packet */
+				ip->ip_len = htons(ip->ip_len);
+				ip->ip_off = htons(ip->ip_off);
+
+				/*
+				 * Return packet for processing by ip_input
+				 */
+				if (ro.ro_rt)
+					RTFREE(ro.ro_rt);
+				return 0;
+			}
+		}
+		/*
+		 * Redo route lookup with new destination address
+		 */
+		RTFREE(ro.ro_rt);
+		bzero(&ro, sizeof(ro));
+		dst = (struct sockaddr_in *)&ro.ro_dst;
+		dst->sin_family = AF_INET;
+		dst->sin_len = sizeof(*dst);
+		dst->sin_addr.s_addr = dest;
+		rtalloc_ign(&ro, (RTF_PRCLONING | RTF_CLONING));
+
+		/*
+		 * Route there and interface still up?
+		 */
+		if (ro.ro_rt &&
+		    (ro.ro_rt->rt_flags & RTF_UP) &&
+		    (ro.ro_rt->rt_ifp->if_flags & IFF_UP)) {
+			ia = ifatoia(ro.ro_rt->rt_ifa);
+			ifp = ro.ro_rt->rt_ifp;
+			if (ro.ro_rt->rt_flags & RTF_GATEWAY)
+				dst = (struct sockaddr_in *)ro.ro_rt->rt_gateway;
+		} else {
+			ipstat.ips_noroute++;
+			ipstat.ips_cantforward++;
+			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST,
+				NULL, NULL);
+			if (ro.ro_rt)
+				RTFREE(ro.ro_rt);
+			return 1;
+		}
+	}
+
+	/*
+	 * Step 6: send off the packet
+	 */
+
+	/*
+	 * Check if packet fits MTU or if hardware will fragement for us
+	 */
+	if (ro.ro_rt->rt_rmx.rmx_mtu)
+		mtu = min(ro.ro_rt->rt_rmx.rmx_mtu, ifp->if_mtu);
+	else
+		mtu = ifp->if_mtu;
+
+	if (ip->ip_len <= mtu ||
+	    (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) {
+		/*
+		 * Restore packet header fields to original values
+		 */
+		ip->ip_len = htons(ip->ip_len);
+		ip->ip_off = htons(ip->ip_off);
+		/*
+		 * Send off the packet via outgoing interface
+		 */
+		error = (*ifp->if_output)(ifp, m,
+				(struct sockaddr *)dst, ro.ro_rt);
+		if (ia) {
+			ia->ia_ifa.if_opackets++;
+			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
+		}
+	} else {
+		/*
+		 * Handle EMSGSIZE with icmp reply
+		 * needfrag for TCP MTU discovery
+		 */
+		if (ip->ip_off & IP_DF) {
+			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
+				NULL, ifp);
+			ipstat.ips_cantfrag++;
+			RTFREE(ro.ro_rt);
+			return 1;
+		} else {
+			/*
+			 * We have to fragement the packet
+			 */
+			m->m_pkthdr.csum_flags |= CSUM_IP;
+			if (ip_fragment(ip, &m, mtu, ifp->if_hwassist,
+					(~ifp->if_hwassist & CSUM_DELAY_IP))) {
+				RTFREE(ro.ro_rt);
+				goto drop;
+			}
+			KASSERT(m != NULL, ("null mbuf and no error"));
+			/*
+			 * Send off the fragments via outgoing interface
+			 */
+			error = 0;
+			do {
+				m0 = m->m_nextpkt;
+				m->m_nextpkt = NULL;
+
+				error = (*ifp->if_output)(ifp, m,
+					(struct sockaddr *)dst, ro.ro_rt);
+				if (error)
+					break;
+			} while ((m = m0) != NULL);
+			if (error) {
+				/* Reclaim remaining fragments */
+				for (; m; m = m0) {
+					m0 = m->m_nextpkt;
+					m->m_nextpkt = NULL;
+					m_freem(m);
+				}
+			} else
+				ipstat.ips_fragmented++;
+		}
+	}
+
+	if (error != 0)
+		ipstat.ips_odropped++;
+	else {
+		ro.ro_rt->rt_rmx.rmx_pksent++;
+		ipstat.ips_forward++;
+		ipstat.ips_fastforward++;
+	}
+	RTFREE(ro.ro_rt);
+	return 1;
+drop:
+	if (m)
+		m_freem(m);
+	return 1;
+}
--- a/sys/netinet/ip_flow.c
+++ b/sys/netinet/ip_flow.c
@ -1,377 +0,0 @@
-/*-
- * Copyright (c) 1998 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by the 3am Software Foundry ("3am").  It was developed by Matt Thomas.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the NetBSD
- *	Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/lock.h>
-#include <sys/malloc.h>
-#include <sys/mbuf.h>
-#include <sys/mutex.h>
-#include <sys/protosw.h>
-#include <sys/socket.h>
-#include <sys/kernel.h>
-
-#include <sys/sysctl.h>
-
-#include <net/if.h>
-#include <net/route.h>
-
-#include <netinet/in.h>
-#include <netinet/in_systm.h>
-#include <netinet/ip.h>
-#include <netinet/in_var.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip_flow.h>
-
-#define	IPFLOW_TIMER		(5 * PR_SLOWHZ)
-#define IPFLOW_HASHBITS		6	/* should not be a multiple of 8 */
-#define	IPFLOW_HASHSIZE		(1 << IPFLOW_HASHBITS)
-#if IPFLOW_HASHSIZE > 255
-#error "make ipf_hash larger"
-#endif
-static struct ipflow_head ipflows[IPFLOW_HASHSIZE];
-static int ipflow_inuse;
-#define	IPFLOW_MAX		256
-
-/*
- * Each flow list has a lock that guards updates to the list and to
- * all entries on the list.  Flow entries hold the hash index for
- * finding the head of the list so the lock can be found quickly.
- *
- * ipflow_inuse holds a count of the number of flow entries present.
- * This is used to bound the size of the table.  When IPFLOW_MAX entries
- * are present and an additional entry is needed one is chosen for
- * replacement.  We could use atomic ops for this counter but having it
- * inconsistent doesn't appear to be a problem.
- */
-#define	IPFLOW_HEAD_LOCK(_ipfh)		mtx_lock(&(_ipfh)->ipfh_mtx)
-#define	IPFLOW_HEAD_UNLOCK(_ipfh)	mtx_unlock(&(_ipfh)->ipfh_mtx)
-#define	IPFLOW_LOCK(_ipf) \
-	IPFLOW_HEAD_LOCK(&ipflows[(_ipf)->ipf_hash])
-#define	IPFLOW_UNLOCK(_ipf) \
-	IPFLOW_HEAD_UNLOCK(&ipflows[(_ipf)->ipf_hash])
-
-static int ipflow_active = 0;
-SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW,
-    &ipflow_active, 0, "Enable flow-based IP forwarding");
-
-static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow");
-
-static unsigned
-ipflow_hash(struct in_addr dst, struct in_addr src, unsigned tos)
-{
-	unsigned hash = tos;
-	int idx;
-	for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS)
-		hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx);
-	return hash & (IPFLOW_HASHSIZE-1);
-}
-
-static struct ipflow *
-ipflow_lookup(const struct ip *ip)
-{
-	unsigned hash;
-	struct ipflow_head *head;
-	struct ipflow *ipf;
-
-	hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
-	head = &ipflows[hash];
-
-	IPFLOW_HEAD_LOCK(head);
-	LIST_FOREACH(ipf, &head->ipfh_head, ipf_next) {
-		if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr
-		    && ip->ip_src.s_addr == ipf->ipf_src.s_addr
-		    && ip->ip_tos == ipf->ipf_tos) {
-			/* NB: return head locked */
-			return ipf;
-		}
-	}
-	IPFLOW_HEAD_UNLOCK(head);
-	return NULL;
-}
-
-int
-ipflow_fastforward(struct mbuf *m)
-{
-	struct ip *ip;
-	struct ipflow *ipf;
-	struct rtentry *rt;
-	struct sockaddr *dst;
-	int error;
-
-	/*
-	 * Are we forwarding packets?  Big enough for an IP packet?
-	 */
-	if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip))
-		return 0;
-	/*
-	 * IP header with no option and valid version and length
-	 */
-	ip = mtod(m, struct ip *);
-	if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2)
-	    || ntohs(ip->ip_len) > m->m_pkthdr.len)
-		return 0;
-	/*
-	 * Find a flow.
-	 */
-	if ((ipf = ipflow_lookup(ip)) == NULL)
-		return 0;
-
-	/*
-	 * Route and interface still up?
-	 */
-	rt = ipf->ipf_ro.ro_rt;
-	if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) {
-		IPFLOW_UNLOCK(ipf);
-		return 0;
-	}
-
-	/*
-	 * Packet size OK?  TTL?
-	 */
-	if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) {
-		IPFLOW_UNLOCK(ipf);
-		return 0;
-	}
-
-	/*
-	 * Everything checks out and so we can forward this packet.
-	 * Modify the TTL and incrementally change the checksum.
-	 */
-	ip->ip_ttl -= IPTTLDEC;
-	if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) {
-		ip->ip_sum += htons(IPTTLDEC << 8) + 1;
-	} else {
-		ip->ip_sum += htons(IPTTLDEC << 8);
-	}
-
-	/*
-	 * Send the packet on its way.  All we can get back is ENOBUFS
-	 */
-	ipf->ipf_uses++;
-	ipf->ipf_timer = IPFLOW_TIMER;
-
-	if (rt->rt_flags & RTF_GATEWAY)
-		dst = rt->rt_gateway;
-	else
-		dst = &ipf->ipf_ro.ro_dst;
-	if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) {
-		if (error == ENOBUFS)
-			ipf->ipf_dropped++;
-		else
-			ipf->ipf_errors++;
-	}
-	IPFLOW_UNLOCK(ipf);
-	return 1;
-}
-
-static void
-ipflow_addstats(struct ipflow *ipf)
-{
-	ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
-	ipstat.ips_cantforward += ipf->ipf_errors + ipf->ipf_dropped;
-	ipstat.ips_forward += ipf->ipf_uses;
-	ipstat.ips_fastforward += ipf->ipf_uses;
-}
-
-/*
- * XXX the locking here makes reaping an entry very expensive...
- */
-static struct ipflow *
-ipflow_reap(void)
-{
-	struct ipflow *victim = NULL;
-	struct ipflow *ipf;
-	int idx;
-
-	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
-		struct ipflow_head *head = &ipflows[idx];
-
-		IPFLOW_HEAD_LOCK(head);
-		LIST_FOREACH(ipf, &head->ipfh_head, ipf_next) {
-			/*
-			 * If this no longer points to a valid route
-			 * reclaim it.
-			 */
-			if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0)
-				goto done;
-			/*
-			 * choose the one that's been least recently used
-			 * or has had the least uses in the last 1.5 
-			 * intervals.
-			 */
-			if (victim == NULL)
-				victim = ipf;
-			else if (ipf->ipf_timer < victim->ipf_timer
-			    || (ipf->ipf_timer == victim->ipf_timer
-				&& ipf->ipf_last_uses + ipf->ipf_uses <
-				    victim->ipf_last_uses + victim->ipf_uses)) {
-				if (victim->ipf_hash != ipf->ipf_hash)
-					IPFLOW_UNLOCK(victim);
-				victim = ipf;
-			}
-		}
-		if (victim && victim->ipf_hash != idx)
-			IPFLOW_HEAD_UNLOCK(head);
-	}
-	ipf = victim;
-    done:
-	/*
-	 * Remove the entry from the flow table.
-	 */
-	LIST_REMOVE(ipf, ipf_next);
-	IPFLOW_UNLOCK(ipf);
-
-	ipflow_addstats(ipf);
-	RTFREE(ipf->ipf_ro.ro_rt);
-	return ipf;
-}
-
-static void
-ipflow_free(struct ipflow *ipf)
-{
-	/*
-	 * Remove the flow from the hash table.
-	 */
-	LIST_REMOVE(ipf, ipf_next);
-
-	ipflow_addstats(ipf);
-	RTFREE(ipf->ipf_ro.ro_rt);
-	ipflow_inuse--;
-	free(ipf, M_IPFLOW);
-}
-
-void
-ipflow_slowtimo(void)
-{
-	struct ipflow *ipf;
-	int idx;
-
-	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
-		struct ipflow_head *head = &ipflows[idx];
-
-		IPFLOW_HEAD_LOCK(head);
-		ipf = LIST_FIRST(&head->ipfh_head);
-		while (ipf != NULL) {
-			struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next);
-			if (--ipf->ipf_timer == 0) {
-				ipflow_free(ipf);
-			} else {
-				ipf->ipf_last_uses = ipf->ipf_uses;
-				ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses;
-				ipstat.ips_forward += ipf->ipf_uses;
-				ipstat.ips_fastforward += ipf->ipf_uses;
-				ipf->ipf_uses = 0;
-			}
-			ipf = next_ipf;
-		}
-		IPFLOW_HEAD_UNLOCK(head);
-	}
-}
-
-void
-ipflow_create(const struct route *ro, struct mbuf *m)
-{
-	const struct ip *const ip = mtod(m, struct ip *);
-	struct ipflow *ipf;
-
-	/*
-	 * Don't create cache entries for ICMP messages.
-	 */
-	if (!ipflow_active || ip->ip_p == IPPROTO_ICMP)
-		return;
-	/*
-	 * See if an existing flow struct exists.  If so remove it from it's
-	 * list and free the old route.  If not, try to malloc a new one
-	 * (if we aren't at our limit).
-	 */
-	ipf = ipflow_lookup(ip);
-	if (ipf == NULL) {
-		if (ipflow_inuse == IPFLOW_MAX) {
-			ipf = ipflow_reap();
-		} else {
-			ipf = (struct ipflow *) malloc(sizeof(*ipf), M_IPFLOW,
-						       M_NOWAIT);
-			if (ipf == NULL)
-				return;
-			ipflow_inuse++;
-		}
-		bzero((caddr_t) ipf, sizeof(*ipf));
-
-		ipf->ipf_hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos);
-		ipf->ipf_dst = ip->ip_dst;
-		ipf->ipf_src = ip->ip_src;
-		ipf->ipf_tos = ip->ip_tos;
-
-		IPFLOW_LOCK(ipf);
-	} else {
-		LIST_REMOVE(ipf, ipf_next);
-
-		ipflow_addstats(ipf);		/* add stats to old route */
-		RTFREE(ipf->ipf_ro.ro_rt);	/* clear reference */
-		ipf->ipf_uses = ipf->ipf_last_uses = 0;
-		ipf->ipf_errors = ipf->ipf_dropped = 0;
-	}
-
-	/*
-	 * Fill in the updated information.
-	 */
-	ipf->ipf_ro = *ro;
-	RT_LOCK(ro->ro_rt);
-	RT_ADDREF(ro->ro_rt);
-	RT_UNLOCK(ro->ro_rt);
-	ipf->ipf_timer = IPFLOW_TIMER;
-	/*
-	 * Insert into the approriate bucket of the flow table.
-	 */
-	LIST_INSERT_HEAD(&ipflows[ipf->ipf_hash].ipfh_head, ipf, ipf_next);
-	IPFLOW_UNLOCK(ipf);
-}
-
-static void
-ipflow_init(void)
-{
-	int idx;
-
-	for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) {
-		struct ipflow_head *head = &ipflows[idx];
-		LIST_INIT(&head->ipfh_head);
-		mtx_init(&head->ipfh_mtx, "ipflow list head", NULL, MTX_DEF);
-	}
-}
-SYSINIT(ipflow, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipflow_init, 0);
--- a/sys/netinet/ip_flow.h
+++ b/sys/netinet/ip_flow.h
@ -1,64 +0,0 @@
-/*-
- * Copyright (c) 1998 The NetBSD Foundation, Inc.
- * All rights reserved.
- *
- * This code is derived from software contributed to The NetBSD Foundation
- * by the 3am Software Foundry ("3am").  It was developed by Matt Thomas.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the NetBSD
- *	Foundation, Inc. and its contributors.
- * 4. Neither the name of The NetBSD Foundation nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _NETINET_IP_FLOW_H
-#define _NETINET_IP_FLOW_H
-
-struct ipflow {
-	LIST_ENTRY(ipflow) ipf_next;	/* next ipflow in bucket */
-	struct in_addr ipf_dst;		/* destination address */
-	struct in_addr ipf_src;		/* source address */
-
-	/* NB: this assumes the size of the list head hash table is <=256 */
-	u_int8_t ipf_hash;		/* index in list head table */
-	u_int8_t ipf_tos;		/* type-of-service */
-	struct route ipf_ro;		/* associated route entry */
-	u_long ipf_uses;		/* number of uses in this period */
-
-	int ipf_timer;			/* remaining lifetime of this entry */
-	u_long ipf_dropped;		/* ENOBUFS returned by if_output */
-	u_long ipf_errors;		/* other errors returned by if_output */
-	u_long ipf_last_uses;		/* number of uses in last period */
-};
-
-struct ipflow_head {
-	LIST_HEAD(ipflowhead, ipflow) ipfh_head;
-	struct mtx ipfh_mtx;
-};
-
-#endif
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@ -360,11 +360,12 @@ ip_forward_cacheinval(void)
 void
 ip_input(struct mbuf *m)
 {
-	struct ip *ip;
+	struct ip *ip = NULL;
 	struct ipq *fp;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
-	int    i, hlen, checkif;
+	int    i, checkif, hlen = 0;
+	int    ours = 0;
 	u_short sum;
 	struct in_addr pkt_dst;
 	u_int32_t divert_info = 0;		/* packet divert/tee info */
@ -387,8 +388,18 @@ ip_input(struct mbuf *m)
 	args.divert_rule = 0;			/* divert cookie */
 	args.next_hop = NULL;

-	/* Grab info from MT_TAG mbufs prepended to the chain.	*/
-	for (; m && m->m_type == MT_TAG; m = m->m_next) {
+	/*
+	 * Grab info from MT_TAG mbufs prepended to the chain.
+	 *
+	 * XXX: This is ugly. These pseudo mbuf prepend tags should really
+	 * be real m_tags.  Before these have always been allocated on the
+	 * callers stack, so we didn't have to free them.  Now with
+	 * ip_fastforward they are true mbufs and we have to free them
+	 * otherwise we have a leak.  Must rewrite ipfw to use m_tags.
+	 */
+	for (; m && m->m_type == MT_TAG;) {
+		struct mbuf *m0;
+
 		switch(m->_m_tag_id) {
 		default:
 			printf("ip_input: unrecognised MT_TAG tag %d\n",
@ -406,11 +417,24 @@ ip_input(struct mbuf *m)
 		case PACKET_TAG_IPFORWARD:
 			args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
 			break;
+
+		case PACKET_TAG_IPFASTFWD_OURS:
+			ours = 1;
+			break;
 		}
+
+		m0 = m;
+		m = m->m_next;
+		/* XXX: This is set by ip_fastforward */
+		if (m0->m_nextpkt == (struct mbuf *)1)
+			m_free(m0);
 	}

 	M_ASSERTPKTHDR(m);

+	if (ours)		/* ip_fastforward firewall changed dest to local */
+		goto ours;
+
 	if (args.rule) {	/* dummynet already filtered us */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
@ -1350,7 +1374,6 @@ ip_slowtimo()
 		}
 	}
 	IPQ_UNLOCK();
-	ipflow_slowtimo();
 	splx(s);
 }

@ -1980,10 +2003,8 @@ ip_forward(struct mbuf *m, struct route *ro,
 		if (type)
 			ipstat.ips_redirectsent++;
 		else {
-			if (mcopy) {
-				ipflow_create(ro, mcopy);
+			if (mcopy)
 				m_freem(mcopy);
-			}
 			return;
 		}
 	}
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@ -920,6 +920,7 @@ ip_output(struct mbuf *m0, struct mbuf *opt, struct route *ro,
 				tag.mh_flags = PACKET_TAG_IPFORWARD;
 				tag.mh_data = (caddr_t)args.next_hop;
 				tag.mh_next = m;
+				tag.mh_nextpkt = NULL;

 				if (m->m_pkthdr.rcvif == NULL)
 					m->m_pkthdr.rcvif = ifunit("lo0");
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@ -558,6 +558,7 @@ struct	mbuf	*m_split(struct mbuf *, int, int);
 #define	PACKET_TAG_DIVERT			17 /* divert info */
 #define	PACKET_TAG_IPFORWARD			18 /* ipforward info */
 #define	PACKET_TAG_MACLABEL	(19 | MTAG_PERSISTENT) /* MAC label */
+#define	PACKET_TAG_IPFASTFWD_OURS		20 /* IP fastforward dropback */

 /* Packet tag routines */
 struct	m_tag 	*m_tag_alloc(u_int32_t, int, int, int);