freebsd-dev/sys/netinet/ip_gre.c

324 lines
8.2 KiB
C
Raw Normal View History

/* $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ */
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Heiko W.Rupp <hwr@pilhuhn.de>
*
* IPv6-over-GRE contributed by Gert Doering <gert@greenie.muc.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* deencapsulate tunneled packets and send them on
* output half is in net/if_gre.[ch]
* This currently handles IPPROTO_GRE, IPPROTO_MOBILE
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/raw_cb.h>
#ifdef INET
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip_gre.h>
#include <machine/in_cksum.h>
#else
#error "ip_gre requires INET"
#endif
/* Needs IP headers. */
#include <net/if_gre.h>
#include <machine/stdarg.h>
#if 1
void gre_inet_ntoa(struct in_addr in); /* XXX */
#endif
2002-10-16 22:27:27 +00:00
static struct gre_softc *gre_lookup(struct mbuf *, u_int8_t);
static struct mbuf *gre_input2(struct mbuf *, int, u_char);
/*
* De-encapsulate a packet and feed it back through ip input (this
* routine is called whenever IP gets a packet with proto type
* IPPROTO_GRE and a local destination address).
* This really is simple
*/
void
gre_input(struct mbuf *m, int off)
{
int proto;
proto = (mtod(m, struct ip *))->ip_p;
m = gre_input2(m, off, proto);
/*
* If no matching tunnel that is up is found. We inject
* the mbuf to raw ip socket to see if anyone picks it up.
*/
if (m != NULL)
rip_input(m, off);
}
/*
* Decapsulate. Does the real work and is called from gre_input()
* (above). Returns an mbuf back if packet is not yet processed,
* and NULL if it needs no further processing. proto is the protocol
* number of the "calling" foo_input() routine.
*/
static struct mbuf *
gre_input2(struct mbuf *m ,int hlen, u_char proto)
{
struct greip *gip;
int isr;
struct gre_softc *sc;
u_int16_t flags;
u_int32_t af;
if ((sc = gre_lookup(m, proto)) == NULL) {
/* No matching tunnel or tunnel is down. */
return (m);
}
if (m->m_len < sizeof(*gip)) {
m = m_pullup(m, sizeof(*gip));
if (m == NULL)
return (NULL);
}
gip = mtod(m, struct greip *);
GRE2IFP(sc)->if_ipackets++;
GRE2IFP(sc)->if_ibytes += m->m_pkthdr.len;
switch (proto) {
case IPPROTO_GRE:
hlen += sizeof(struct gre_h);
/* process GRE flags as packet can be of variable len */
flags = ntohs(gip->gi_flags);
/* Checksum & Offset are present */
if ((flags & GRE_CP) | (flags & GRE_RP))
hlen += 4;
/* We don't support routing fields (variable length) */
if (flags & GRE_RP)
return (m);
if (flags & GRE_KP)
hlen += 4;
if (flags & GRE_SP)
hlen += 4;
switch (ntohs(gip->gi_ptype)) { /* ethertypes */
case WCCP_PROTOCOL_TYPE:
if (sc->wccp_ver == WCCP_V2)
hlen += 4;
/* FALLTHROUGH */
case ETHERTYPE_IP: /* shouldn't need a schednetisr(), */
isr = NETISR_IP;/* as we are in ip_input */
af = AF_INET;
break;
#ifdef INET6
case ETHERTYPE_IPV6:
isr = NETISR_IPV6;
af = AF_INET6;
break;
#endif
default:
/* Others not yet supported. */
return (m);
}
break;
default:
/* Others not yet supported. */
return (m);
}
if (hlen > m->m_pkthdr.len) {
m_freem(m);
return (NULL);
}
/* Unlike NetBSD, in FreeBSD m_adj() adjusts m->m_pkthdr.len as well */
m_adj(m, hlen);
Fix the following bpf(4) race condition which can result in a panic: (1) bpf peer attaches to interface netif0 (2) Packet is received by netif0 (3) ifp->if_bpf pointer is checked and handed off to bpf (4) bpf peer detaches from netif0 resulting in ifp->if_bpf being initialized to NULL. (5) ifp->if_bpf is dereferenced by bpf machinery (6) Kaboom This race condition likely explains the various different kernel panics reported around sending SIGINT to tcpdump or dhclient processes. But really this race can result in kernel panics anywhere you have frequent bpf attach and detach operations with high packet per second load. Summary of changes: - Remove the bpf interface's "driverp" member - When we attach bpf interfaces, we now set the ifp->if_bpf member to the bpf interface structure. Once this is done, ifp->if_bpf should never be NULL. [1] - Introduce bpf_peers_present function, an inline operation which will do a lockless read bpf peer list associated with the interface. It should be noted that the bpf code will pickup the bpf_interface lock before adding or removing bpf peers. This should serialize the access to the bpf descriptor list, removing the race. - Expose the bpf_if structure in bpf.h so that the bpf_peers_present function can use it. This also removes the struct bpf_if; hack that was there. - Adjust all consumers of the raw if_bpf structure to use bpf_peers_present Now what happens is: (1) Packet is received by netif0 (2) Check to see if bpf descriptor list is empty (3) Pickup the bpf interface lock (4) Hand packet off to process From the attach/detach side: (1) Pickup the bpf interface lock (2) Add/remove from bpf descriptor list Now that we are storing the bpf interface structure with the ifnet, there is is no need to walk the bpf interface list to locate the correct bpf interface. We now simply look up the interface, and initialize the pointer. This has a nice side effect of changing a bpf interface attach operation from O(N) (where N is the number of bpf interfaces), to O(1). [1] From now on, we can no longer check ifp->if_bpf to tell us whether or not we have any bpf peers that might be interested in receiving packets. In collaboration with: sam@ MFC after: 1 month
2006-06-02 19:59:33 +00:00
if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
}
if ((GRE2IFP(sc)->if_flags & IFF_MONITOR) != 0) {
m_freem(m);
return(NULL);
}
m->m_pkthdr.rcvif = GRE2IFP(sc);
netisr_queue(isr, m);
/* Packet is done, no further processing needed. */
return (NULL);
}
/*
* input routine for IPPRPOTO_MOBILE
* This is a little bit diffrent from the other modes, as the
* encapsulating header was not prepended, but instead inserted
* between IP header and payload
*/
void
gre_mobile_input(struct mbuf *m, int hlen)
{
struct ip *ip;
struct mobip_h *mip;
struct gre_softc *sc;
int msiz;
if ((sc = gre_lookup(m, IPPROTO_MOBILE)) == NULL) {
/* No matching tunnel or tunnel is down. */
m_freem(m);
return;
}
if (m->m_len < sizeof(*mip)) {
m = m_pullup(m, sizeof(*mip));
if (m == NULL)
return;
}
ip = mtod(m, struct ip *);
mip = mtod(m, struct mobip_h *);
GRE2IFP(sc)->if_ipackets++;
GRE2IFP(sc)->if_ibytes += m->m_pkthdr.len;
if (ntohs(mip->mh.proto) & MOB_H_SBIT) {
msiz = MOB_H_SIZ_L;
mip->mi.ip_src.s_addr = mip->mh.osrc;
} else
msiz = MOB_H_SIZ_S;
if (m->m_len < (ip->ip_hl << 2) + msiz) {
m = m_pullup(m, (ip->ip_hl << 2) + msiz);
if (m == NULL)
return;
ip = mtod(m, struct ip *);
mip = mtod(m, struct mobip_h *);
}
mip->mi.ip_dst.s_addr = mip->mh.odst;
mip->mi.ip_p = (ntohs(mip->mh.proto) >> 8);
if (gre_in_cksum((u_int16_t *)&mip->mh, msiz) != 0) {
m_freem(m);
return;
}
bcopy((caddr_t)(ip) + (ip->ip_hl << 2) + msiz, (caddr_t)(ip) +
(ip->ip_hl << 2), m->m_len - msiz - (ip->ip_hl << 2));
m->m_len -= msiz;
m->m_pkthdr.len -= msiz;
/*
* On FreeBSD, rip_input() supplies us with ip->ip_len
* decreased by the lengh of IP header, however, ip_input()
* expects it to be full size of IP packet, so adjust accordingly.
*/
ip->ip_len = htons(ntohs(ip->ip_len) + sizeof(struct ip) - msiz);
ip->ip_sum = 0;
ip->ip_sum = in_cksum(m, (ip->ip_hl << 2));
Fix the following bpf(4) race condition which can result in a panic: (1) bpf peer attaches to interface netif0 (2) Packet is received by netif0 (3) ifp->if_bpf pointer is checked and handed off to bpf (4) bpf peer detaches from netif0 resulting in ifp->if_bpf being initialized to NULL. (5) ifp->if_bpf is dereferenced by bpf machinery (6) Kaboom This race condition likely explains the various different kernel panics reported around sending SIGINT to tcpdump or dhclient processes. But really this race can result in kernel panics anywhere you have frequent bpf attach and detach operations with high packet per second load. Summary of changes: - Remove the bpf interface's "driverp" member - When we attach bpf interfaces, we now set the ifp->if_bpf member to the bpf interface structure. Once this is done, ifp->if_bpf should never be NULL. [1] - Introduce bpf_peers_present function, an inline operation which will do a lockless read bpf peer list associated with the interface. It should be noted that the bpf code will pickup the bpf_interface lock before adding or removing bpf peers. This should serialize the access to the bpf descriptor list, removing the race. - Expose the bpf_if structure in bpf.h so that the bpf_peers_present function can use it. This also removes the struct bpf_if; hack that was there. - Adjust all consumers of the raw if_bpf structure to use bpf_peers_present Now what happens is: (1) Packet is received by netif0 (2) Check to see if bpf descriptor list is empty (3) Pickup the bpf interface lock (4) Hand packet off to process From the attach/detach side: (1) Pickup the bpf interface lock (2) Add/remove from bpf descriptor list Now that we are storing the bpf interface structure with the ifnet, there is is no need to walk the bpf interface list to locate the correct bpf interface. We now simply look up the interface, and initialize the pointer. This has a nice side effect of changing a bpf interface attach operation from O(N) (where N is the number of bpf interfaces), to O(1). [1] From now on, we can no longer check ifp->if_bpf to tell us whether or not we have any bpf peers that might be interested in receiving packets. In collaboration with: sam@ MFC after: 1 month
2006-06-02 19:59:33 +00:00
if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) {
u_int32_t af = AF_INET;
bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m);
}
if ((GRE2IFP(sc)->if_flags & IFF_MONITOR) != 0) {
m_freem(m);
return;
}
m->m_pkthdr.rcvif = GRE2IFP(sc);
netisr_queue(NETISR_IP, m);
}
/*
* Find the gre interface associated with our src/dst/proto set.
*
* XXXRW: Need some sort of drain/refcount mechanism so that the softc
* reference remains valid after it's returned from gre_lookup(). Right
* now, I'm thinking it should be reference-counted with a gre_dropref()
* when the caller is done with the softc. This is complicated by how
* to handle destroying the gre softc; probably using a gre_drain() in
* in_gre.c during destroy.
*/
static struct gre_softc *
gre_lookup(struct mbuf *m, u_int8_t proto)
{
struct ip *ip = mtod(m, struct ip *);
struct gre_softc *sc;
mtx_lock(&gre_mtx);
for (sc = LIST_FIRST(&gre_softc_list); sc != NULL;
sc = LIST_NEXT(sc, sc_list)) {
if ((sc->g_dst.s_addr == ip->ip_src.s_addr) &&
(sc->g_src.s_addr == ip->ip_dst.s_addr) &&
(sc->g_proto == proto) &&
((GRE2IFP(sc)->if_flags & IFF_UP) != 0)) {
mtx_unlock(&gre_mtx);
return (sc);
}
}
mtx_unlock(&gre_mtx);
return (NULL);
}