freebsd-dev/sys/netinet/ip_options.c
Marko Zec 8b615593fc Step 1.5 of importing the network stack virtualization infrastructure
from the vimage project, as per plan established at devsummit 08/08:
http://wiki.freebsd.org/Image/Notes200808DevSummit

Introduce INIT_VNET_*() initializer macros, VNET_FOREACH() iterator
macros, and CURVNET_SET() context setting macros, all currently
resolving to NOPs.

Prepare for virtualization of selected SYSCTL objects by introducing a
family of SYSCTL_V_*() macros, currently resolving to their global
counterparts, i.e. SYSCTL_V_INT() == SYSCTL_INT().

Move selected #defines from sys/sys/vimage.h to newly introduced header
files specific to virtualized subsystems (sys/net/vnet.h,
sys/netinet/vinet.h etc.).

All the changes are verified to have zero functional impact at this
point in time by doing MD5 comparision between pre- and post-change
object files(*).

(*) netipsec/keysock.c did not validate depending on compile time options.

Implemented by:	julian, bz, brooks, zec
Reviewed by:	julian, bz, brooks, kris, rwatson, ...
Approved by:	julian (mentor)
Obtained from:	//depot/projects/vimage-commit2/...
X-MFC after:	never
Sponsored by:	NLnet Foundation, The FreeBSD Foundation
2008-10-02 15:37:58 +00:00

685 lines
18 KiB
C

/*
* Copyright (c) 1982, 1986, 1988, 1993
* The Regents of the University of California.
* Copyright (c) 2005 Andre Oppermann, Internet Business Solutions AG.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_ipstealth.h"
#include "opt_mac.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/netisr.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/ip_options.h>
#include <netinet/ip_icmp.h>
#include <machine/in_cksum.h>
#include <sys/socketvar.h>
#include <security/mac/mac_framework.h>
static int ip_dosourceroute = 0;
SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
&ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
static int ip_acceptsourceroute = 0;
SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute,
CTLFLAG_RW, &ip_acceptsourceroute, 0,
"Enable accepting source routed IP packets");
int ip_doopts = 1; /* 0 = ignore, 1 = process, 2 = reject */
SYSCTL_INT(_net_inet_ip, OID_AUTO, process_options, CTLFLAG_RW,
&ip_doopts, 0, "Enable IP options processing ([LS]SRR, RR, TS)");
static void save_rte(struct mbuf *m, u_char *, struct in_addr);
/*
* Do option processing on a datagram, possibly discarding it if bad options
* are encountered, or forwarding it if source-routed.
*
* The pass argument is used when operating in the IPSTEALTH mode to tell
* what options to process: [LS]SRR (pass 0) or the others (pass 1). The
* reason for as many as two passes is that when doing IPSTEALTH, non-routing
* options should be processed only if the packet is for us.
*
* Returns 1 if packet has been forwarded/freed, 0 if the packet should be
* processed further.
*/
int
ip_dooptions(struct mbuf *m, int pass)
{
INIT_VNET_INET(curvnet);
struct ip *ip = mtod(m, struct ip *);
u_char *cp;
struct in_ifaddr *ia;
int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
struct in_addr *sin, dst;
n_time ntime;
struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
/* Ignore or reject packets with IP options. */
if (ip_doopts == 0)
return 0;
else if (ip_doopts == 2) {
type = ICMP_UNREACH;
code = ICMP_UNREACH_FILTER_PROHIB;
goto bad;
}
dst = ip->ip_dst;
cp = (u_char *)(ip + 1);
cnt = (ip->ip_hl << 2) - sizeof (struct ip);
for (; cnt > 0; cnt -= optlen, cp += optlen) {
opt = cp[IPOPT_OPTVAL];
if (opt == IPOPT_EOL)
break;
if (opt == IPOPT_NOP)
optlen = 1;
else {
if (cnt < IPOPT_OLEN + sizeof(*cp)) {
code = &cp[IPOPT_OLEN] - (u_char *)ip;
goto bad;
}
optlen = cp[IPOPT_OLEN];
if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
code = &cp[IPOPT_OLEN] - (u_char *)ip;
goto bad;
}
}
switch (opt) {
default:
break;
/*
* Source routing with record. Find interface with current
* destination address. If none on this machine then drop if
* strictly routed, or do nothing if loosely routed. Record
* interface address and bring up next address component. If
* strictly routed make sure next address is on directly
* accessible net.
*/
case IPOPT_LSRR:
case IPOPT_SSRR:
#ifdef IPSTEALTH
if (V_ipstealth && pass > 0)
break;
#endif
if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
code = &cp[IPOPT_OLEN] - (u_char *)ip;
goto bad;
}
if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
code = &cp[IPOPT_OFFSET] - (u_char *)ip;
goto bad;
}
ipaddr.sin_addr = ip->ip_dst;
ia = (struct in_ifaddr *)
ifa_ifwithaddr((struct sockaddr *)&ipaddr);
if (ia == NULL) {
if (opt == IPOPT_SSRR) {
type = ICMP_UNREACH;
code = ICMP_UNREACH_SRCFAIL;
goto bad;
}
if (!ip_dosourceroute)
goto nosourcerouting;
/*
* Loose routing, and not at next destination
* yet; nothing to do except forward.
*/
break;
}
off--; /* 0 origin */
if (off > optlen - (int)sizeof(struct in_addr)) {
/*
* End of source route. Should be for us.
*/
if (!ip_acceptsourceroute)
goto nosourcerouting;
save_rte(m, cp, ip->ip_src);
break;
}
#ifdef IPSTEALTH
if (V_ipstealth)
goto dropit;
#endif
if (!ip_dosourceroute) {
if (V_ipforwarding) {
char buf[16]; /* aaa.bbb.ccc.ddd\0 */
/*
* Acting as a router, so generate
* ICMP
*/
nosourcerouting:
strcpy(buf, inet_ntoa(ip->ip_dst));
log(LOG_WARNING,
"attempted source route from %s to %s\n",
inet_ntoa(ip->ip_src), buf);
type = ICMP_UNREACH;
code = ICMP_UNREACH_SRCFAIL;
goto bad;
} else {
/*
* Not acting as a router, so
* silently drop.
*/
#ifdef IPSTEALTH
dropit:
#endif
V_ipstat.ips_cantforward++;
m_freem(m);
return (1);
}
}
/*
* locate outgoing interface
*/
(void)memcpy(&ipaddr.sin_addr, cp + off,
sizeof(ipaddr.sin_addr));
if (opt == IPOPT_SSRR) {
#define INA struct in_ifaddr *
#define SA struct sockaddr *
if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == NULL)
ia = (INA)ifa_ifwithnet((SA)&ipaddr);
} else
/* XXX MRT 0 for routing */
ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m));
if (ia == NULL) {
type = ICMP_UNREACH;
code = ICMP_UNREACH_SRCFAIL;
goto bad;
}
ip->ip_dst = ipaddr.sin_addr;
(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
sizeof(struct in_addr));
cp[IPOPT_OFFSET] += sizeof(struct in_addr);
/*
* Let ip_intr's mcast routing check handle mcast pkts
*/
forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
break;
case IPOPT_RR:
#ifdef IPSTEALTH
if (V_ipstealth && pass == 0)
break;
#endif
if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
code = &cp[IPOPT_OFFSET] - (u_char *)ip;
goto bad;
}
if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
code = &cp[IPOPT_OFFSET] - (u_char *)ip;
goto bad;
}
/*
* If no space remains, ignore.
*/
off--; /* 0 origin */
if (off > optlen - (int)sizeof(struct in_addr))
break;
(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
sizeof(ipaddr.sin_addr));
/*
* Locate outgoing interface; if we're the
* destination, use the incoming interface (should be
* same).
*/
if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == NULL &&
(ia = ip_rtaddr(ipaddr.sin_addr, M_GETFIB(m))) == NULL) {
type = ICMP_UNREACH;
code = ICMP_UNREACH_HOST;
goto bad;
}
(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
sizeof(struct in_addr));
cp[IPOPT_OFFSET] += sizeof(struct in_addr);
break;
case IPOPT_TS:
#ifdef IPSTEALTH
if (V_ipstealth && pass == 0)
break;
#endif
code = cp - (u_char *)ip;
if (optlen < 4 || optlen > 40) {
code = &cp[IPOPT_OLEN] - (u_char *)ip;
goto bad;
}
if ((off = cp[IPOPT_OFFSET]) < 5) {
code = &cp[IPOPT_OLEN] - (u_char *)ip;
goto bad;
}
if (off > optlen - (int)sizeof(int32_t)) {
cp[IPOPT_OFFSET + 1] += (1 << 4);
if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
code = &cp[IPOPT_OFFSET] - (u_char *)ip;
goto bad;
}
break;
}
off--; /* 0 origin */
sin = (struct in_addr *)(cp + off);
switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
case IPOPT_TS_TSONLY:
break;
case IPOPT_TS_TSANDADDR:
if (off + sizeof(n_time) +
sizeof(struct in_addr) > optlen) {
code = &cp[IPOPT_OFFSET] - (u_char *)ip;
goto bad;
}
ipaddr.sin_addr = dst;
ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
m->m_pkthdr.rcvif);
if (ia == NULL)
continue;
(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
sizeof(struct in_addr));
cp[IPOPT_OFFSET] += sizeof(struct in_addr);
off += sizeof(struct in_addr);
break;
case IPOPT_TS_PRESPEC:
if (off + sizeof(n_time) +
sizeof(struct in_addr) > optlen) {
code = &cp[IPOPT_OFFSET] - (u_char *)ip;
goto bad;
}
(void)memcpy(&ipaddr.sin_addr, sin,
sizeof(struct in_addr));
if (ifa_ifwithaddr((SA)&ipaddr) == NULL)
continue;
cp[IPOPT_OFFSET] += sizeof(struct in_addr);
off += sizeof(struct in_addr);
break;
default:
code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
goto bad;
}
ntime = iptime();
(void)memcpy(cp + off, &ntime, sizeof(n_time));
cp[IPOPT_OFFSET] += sizeof(n_time);
}
}
if (forward && V_ipforwarding) {
ip_forward(m, 1);
return (1);
}
return (0);
bad:
icmp_error(m, type, code, 0, 0);
V_ipstat.ips_badoptions++;
return (1);
}
/*
* Save incoming source route for use in replies, to be picked up later by
* ip_srcroute if the receiver is interested.
*/
static void
save_rte(struct mbuf *m, u_char *option, struct in_addr dst)
{
unsigned olen;
struct ipopt_tag *opts;
opts = (struct ipopt_tag *)m_tag_get(PACKET_TAG_IPOPTIONS,
sizeof(struct ipopt_tag), M_NOWAIT);
if (opts == NULL)
return;
olen = option[IPOPT_OLEN];
if (olen > sizeof(opts->ip_srcrt) - (1 + sizeof(dst))) {
m_tag_free((struct m_tag *)opts);
return;
}
bcopy(option, opts->ip_srcrt.srcopt, olen);
opts->ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
opts->ip_srcrt.dst = dst;
m_tag_prepend(m, (struct m_tag *)opts);
}
/*
* Retrieve incoming source route for use in replies, in the same form used
* by setsockopt. The first hop is placed before the options, will be
* removed later.
*/
struct mbuf *
ip_srcroute(struct mbuf *m0)
{
struct in_addr *p, *q;
struct mbuf *m;
struct ipopt_tag *opts;
opts = (struct ipopt_tag *)m_tag_find(m0, PACKET_TAG_IPOPTIONS, NULL);
if (opts == NULL)
return (NULL);
if (opts->ip_nhops == 0)
return (NULL);
m = m_get(M_DONTWAIT, MT_DATA);
if (m == NULL)
return (NULL);
#define OPTSIZ (sizeof(opts->ip_srcrt.nop) + sizeof(opts->ip_srcrt.srcopt))
/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
m->m_len = opts->ip_nhops * sizeof(struct in_addr) +
sizeof(struct in_addr) + OPTSIZ;
/*
* First, save first hop for return route.
*/
p = &(opts->ip_srcrt.route[opts->ip_nhops - 1]);
*(mtod(m, struct in_addr *)) = *p--;
/*
* Copy option fields and padding (nop) to mbuf.
*/
opts->ip_srcrt.nop = IPOPT_NOP;
opts->ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
&(opts->ip_srcrt.nop), OPTSIZ);
q = (struct in_addr *)(mtod(m, caddr_t) +
sizeof(struct in_addr) + OPTSIZ);
#undef OPTSIZ
/*
* Record return path as an IP source route, reversing the path
* (pointers are now aligned).
*/
while (p >= opts->ip_srcrt.route) {
*q++ = *p--;
}
/*
* Last hop goes to final destination.
*/
*q = opts->ip_srcrt.dst;
m_tag_delete(m0, (struct m_tag *)opts);
return (m);
}
/*
* Strip out IP options, at higher level protocol in the kernel. Second
* argument is buffer to which options will be moved, and return value is
* their length.
*
* XXX should be deleted; last arg currently ignored.
*/
void
ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
{
int i;
struct ip *ip = mtod(m, struct ip *);
caddr_t opts;
int olen;
olen = (ip->ip_hl << 2) - sizeof (struct ip);
opts = (caddr_t)(ip + 1);
i = m->m_len - (sizeof (struct ip) + olen);
bcopy(opts + olen, opts, (unsigned)i);
m->m_len -= olen;
if (m->m_flags & M_PKTHDR)
m->m_pkthdr.len -= olen;
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(struct ip) >> 2;
}
/*
* Insert IP options into preformed packet. Adjust IP destination as
* required for IP source routing, as indicated by a non-zero in_addr at the
* start of the options.
*
* XXX This routine assumes that the packet has no options in place.
*/
struct mbuf *
ip_insertoptions(struct mbuf *m, struct mbuf *opt, int *phlen)
{
struct ipoption *p = mtod(opt, struct ipoption *);
struct mbuf *n;
struct ip *ip = mtod(m, struct ip *);
unsigned optlen;
optlen = opt->m_len - sizeof(p->ipopt_dst);
if (optlen + ip->ip_len > IP_MAXPACKET) {
*phlen = 0;
return (m); /* XXX should fail */
}
if (p->ipopt_dst.s_addr)
ip->ip_dst = p->ipopt_dst;
if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
MGETHDR(n, M_DONTWAIT, MT_DATA);
if (n == NULL) {
*phlen = 0;
return (m);
}
M_MOVE_PKTHDR(n, m);
n->m_pkthdr.rcvif = NULL;
n->m_pkthdr.len += optlen;
m->m_len -= sizeof(struct ip);
m->m_data += sizeof(struct ip);
n->m_next = m;
m = n;
m->m_len = optlen + sizeof(struct ip);
m->m_data += max_linkhdr;
bcopy(ip, mtod(m, void *), sizeof(struct ip));
} else {
m->m_data -= optlen;
m->m_len += optlen;
m->m_pkthdr.len += optlen;
bcopy(ip, mtod(m, void *), sizeof(struct ip));
}
ip = mtod(m, struct ip *);
bcopy(p->ipopt_list, ip + 1, optlen);
*phlen = sizeof(struct ip) + optlen;
ip->ip_v = IPVERSION;
ip->ip_hl = *phlen >> 2;
ip->ip_len += optlen;
return (m);
}
/*
* Copy options from ip to jp, omitting those not copied during
* fragmentation.
*/
int
ip_optcopy(struct ip *ip, struct ip *jp)
{
u_char *cp, *dp;
int opt, optlen, cnt;
cp = (u_char *)(ip + 1);
dp = (u_char *)(jp + 1);
cnt = (ip->ip_hl << 2) - sizeof (struct ip);
for (; cnt > 0; cnt -= optlen, cp += optlen) {
opt = cp[0];
if (opt == IPOPT_EOL)
break;
if (opt == IPOPT_NOP) {
/* Preserve for IP mcast tunnel's LSRR alignment. */
*dp++ = IPOPT_NOP;
optlen = 1;
continue;
}
KASSERT(cnt >= IPOPT_OLEN + sizeof(*cp),
("ip_optcopy: malformed ipv4 option"));
optlen = cp[IPOPT_OLEN];
KASSERT(optlen >= IPOPT_OLEN + sizeof(*cp) && optlen <= cnt,
("ip_optcopy: malformed ipv4 option"));
/* Bogus lengths should have been caught by ip_dooptions. */
if (optlen > cnt)
optlen = cnt;
if (IPOPT_COPIED(opt)) {
bcopy(cp, dp, optlen);
dp += optlen;
}
}
for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
*dp++ = IPOPT_EOL;
return (optlen);
}
/*
* Set up IP options in pcb for insertion in output packets. Store in mbuf
* with pointer in pcbopt, adding pseudo-option with destination address if
* source routed.
*/
int
ip_pcbopts(struct inpcb *inp, int optname, struct mbuf *m)
{
int cnt, optlen;
u_char *cp;
struct mbuf **pcbopt;
u_char opt;
INP_WLOCK_ASSERT(inp);
pcbopt = &inp->inp_options;
/* turn off any old options */
if (*pcbopt)
(void)m_free(*pcbopt);
*pcbopt = 0;
if (m == NULL || m->m_len == 0) {
/*
* Only turning off any previous options.
*/
if (m != NULL)
(void)m_free(m);
return (0);
}
if (m->m_len % sizeof(int32_t))
goto bad;
/*
* IP first-hop destination address will be stored before actual
* options; move other options back and clear it when none present.
*/
if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
goto bad;
cnt = m->m_len;
m->m_len += sizeof(struct in_addr);
cp = mtod(m, u_char *) + sizeof(struct in_addr);
bcopy(mtod(m, void *), cp, (unsigned)cnt);
bzero(mtod(m, void *), sizeof(struct in_addr));
for (; cnt > 0; cnt -= optlen, cp += optlen) {
opt = cp[IPOPT_OPTVAL];
if (opt == IPOPT_EOL)
break;
if (opt == IPOPT_NOP)
optlen = 1;
else {
if (cnt < IPOPT_OLEN + sizeof(*cp))
goto bad;
optlen = cp[IPOPT_OLEN];
if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
goto bad;
}
switch (opt) {
default:
break;
case IPOPT_LSRR:
case IPOPT_SSRR:
/*
* User process specifies route as:
*
* ->A->B->C->D
*
* D must be our final destination (but we can't
* check that since we may not have connected yet).
* A is first hop destination, which doesn't appear
* in actual IP option, but is stored before the
* options.
*/
/* XXX-BZ PRIV_NETINET_SETHDROPTS? */
if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
goto bad;
m->m_len -= sizeof(struct in_addr);
cnt -= sizeof(struct in_addr);
optlen -= sizeof(struct in_addr);
cp[IPOPT_OLEN] = optlen;
/*
* Move first hop before start of options.
*/
bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
sizeof(struct in_addr));
/*
* Then copy rest of options back
* to close up the deleted entry.
*/
bcopy((&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)),
&cp[IPOPT_OFFSET+1],
(unsigned)cnt - (IPOPT_MINOFF - 1));
break;
}
}
if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
goto bad;
*pcbopt = m;
return (0);
bad:
(void)m_free(m);
return (EINVAL);
}