freebsd-dev/sys/netinet6/in6_proto.c

651 lines
22 KiB
C
Raw Normal View History

/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
2007-12-10 16:03:40 +00:00
*
* $KAME: in6_proto.c,v 1.91 2001/05/27 13:28:35 itojun Exp $
*/
/*-
* Copyright (c) 1982, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in_proto.c 8.1 (Berkeley) 6/10/93
*/
2007-12-10 16:03:40 +00:00
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_ipstealth.h"
#include "opt_sctp.h"
#include "opt_mpath.h"
#include "opt_route.h"
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
Conditionally compile out V_ globals while instantiating the appropriate container structures, depending on VIMAGE_GLOBALS compile time option. Make VIMAGE_GLOBALS a new compile-time option, which by default will not be defined, resulting in instatiations of global variables selected for V_irtualization (enclosed in #ifdef VIMAGE_GLOBALS blocks) to be effectively compiled out. Instantiate new global container structures to hold V_irtualized variables: vnet_net_0, vnet_inet_0, vnet_inet6_0, vnet_ipsec_0, vnet_netgraph_0, and vnet_gif_0. Update the VSYM() macro so that depending on VIMAGE_GLOBALS the V_ macros resolve either to the original globals, or to fields inside container structures, i.e. effectively #ifdef VIMAGE_GLOBALS #define V_rt_tables rt_tables #else #define V_rt_tables vnet_net_0._rt_tables #endif Update SYSCTL_V_*() macros to operate either on globals or on fields inside container structs. Extend the internal kldsym() lookups with the ability to resolve selected fields inside the virtualization container structs. This applies only to the fields which are explicitly registered for kldsym() visibility via VNET_MOD_DECLARE() and vnet_mod_register(), currently this is done only in sys/net/if.c. Fix a few broken instances of MODULE_GLOBAL() macro use in SCTP code, and modify the MODULE_GLOBAL() macro to resolve to V_ macros, which in turn result in proper code being generated depending on VIMAGE_GLOBALS. De-virtualize local static variables in sys/contrib/pf/net/pf_subr.c which were prematurely V_irtualized by automated V_ prepending scripts during earlier merging steps. PF virtualization will be done separately, most probably after next PF import. Convert a few variable initializations at instantiation to initialization in init functions, most notably in ipfw. Also convert TUNABLE_INT() initializers for V_ variables to TUNABLE_FETCH_INT() in initializer functions. Discussed at: devsummit Strassburg Reviewed by: bz, julian Approved by: julian (mentor) Obtained from: //depot/projects/vimage-commit2/... X-MFC after: never Sponsored by: NLnet Foundation, The FreeBSD Foundation
2008-12-10 23:12:39 +00:00
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/domain.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/radix.h>
#include <net/route.h>
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip_encap.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet/icmp6.h>
#include <netinet/tcp.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netinet6/tcp6_var.h>
#include <netinet6/raw_ip6.h>
#include <netinet6/udp6_var.h>
#include <netinet6/pim6_var.h>
#include <netinet6/nd6.h>
#ifdef SCTP
#include <netinet/in_pcb.h>
#include <netinet/sctp_pcb.h>
#include <netinet/sctp.h>
#include <netinet/sctp_var.h>
#include <netinet6/sctp6_var.h>
#endif /* SCTP */
#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#endif /* IPSEC */
#include <netinet6/ip6protosw.h>
/*
* TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
*/
FEATURE(inet6, "Internet Protocol version 6");
extern struct domain inet6domain;
2005-08-10 06:38:46 +00:00
static struct pr_usrreqs nousrreqs;
#define PR_LISTEN 0
#define PR_ABRTACPTDIS 0
/* Spacer for loadable protocols. */
#define IP6PROTOSPACER \
{ \
.pr_domain = &inet6domain, \
.pr_protocol = PROTO_SPACER, \
.pr_usrreqs = &nousrreqs \
}
struct protosw inet6sw[] = {
{
.pr_type = 0,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_IPV6,
.pr_init = ip6_init,
#ifdef VIMAGE
.pr_destroy = ip6_destroy,
#endif
.pr_slowtimo = frag6_slowtimo,
.pr_drain = frag6_drain,
.pr_usrreqs = &nousrreqs,
},
{
.pr_type = SOCK_DGRAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_UDP,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = udp6_input,
.pr_ctlinput = udp6_ctlinput,
.pr_ctloutput = ip6_ctloutput,
#ifndef INET /* Do not call initialization twice. */
.pr_init = udp_init,
#endif
.pr_usrreqs = &udp6_usrreqs,
},
{
.pr_type = SOCK_STREAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_TCP,
.pr_flags = PR_CONNREQUIRED|PR_WANTRCVD|PR_LISTEN,
.pr_input = tcp6_input,
.pr_ctlinput = tcp6_ctlinput,
.pr_ctloutput = tcp_ctloutput,
#ifndef INET /* don't call initialization and timeout routines twice */
.pr_init = tcp_init,
.pr_slowtimo = tcp_slowtimo,
#endif
.pr_drain = tcp_drain,
.pr_usrreqs = &tcp6_usrreqs,
},
#ifdef SCTP
{
.pr_type = SOCK_SEQPACKET,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_WANTRCVD,
.pr_input = sctp6_input,
.pr_ctlinput = sctp6_ctlinput,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = sctp_drain,
#ifndef INET /* Do not call initialization twice. */
.pr_init = sctp_init,
#endif
.pr_usrreqs = &sctp6_usrreqs
},
{
.pr_type = SOCK_STREAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_SCTP,
.pr_flags = PR_WANTRCVD,
.pr_input = sctp6_input,
2014-04-07 01:55:53 +00:00
.pr_ctlinput = sctp6_ctlinput,
.pr_ctloutput = sctp_ctloutput,
.pr_drain = sctp_drain,
.pr_usrreqs = &sctp6_usrreqs
},
#endif /* SCTP */
{
.pr_type = SOCK_DGRAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_UDPLITE,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = udp6_input,
.pr_ctlinput = udplite6_ctlinput,
.pr_ctloutput = udp_ctloutput,
#ifndef INET /* Do not call initialization twice. */
.pr_init = udplite_init,
#endif
.pr_usrreqs = &udp6_usrreqs,
},
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_RAW,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = rip6_input,
.pr_output = rip6_output,
.pr_ctlinput = rip6_ctlinput,
.pr_ctloutput = rip6_ctloutput,
#ifndef INET /* Do not call initialization twice. */
.pr_init = rip_init,
#endif
.pr_usrreqs = &rip6_usrreqs
},
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_ICMPV6,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = icmp6_input,
.pr_output = rip6_output,
.pr_ctlinput = rip6_ctlinput,
.pr_ctloutput = rip6_ctloutput,
.pr_fasttimo = icmp6_fasttimo,
Bite the bullet, and make the IPv6 SSM and MLDv2 mega-commit: import from p4 bms_netdev. Summary of changes: * Connect netinet6/in6_mcast.c to build. The legacy KAME KPIs are mostly preserved. * Eliminate now dead code from ip6_output.c. Don't do mbuf bingo, we are not going to do RFC 2292 style CMSG tricks for multicast options as they are not required by any current IPv6 normative reference. * Refactor transports (UDP, raw_ip6) to do own mcast filtering. SCTP, TCP unaffected by this change. * Add ip6_msource, in6_msource structs to in6_var.h. * Hookup mld_ifinfo state to in6_ifextra, allocate from domifattach path. * Eliminate IN6_LOOKUP_MULTI(), it is no longer referenced. Kernel consumers which need this should use in6m_lookup(). * Refactor IPv6 socket group memberships to use a vector (like IPv4). * Update ifmcstat(8) for IPv6 SSM. * Add witness lock order for IN6_MULTI_LOCK. * Move IN6_MULTI_LOCK out of lower ip6_output()/ip6_input() paths. * Introduce IP6STAT_ADD/SUB/INC/DEC as per rwatson's IPv4 cleanup. * Update carp(4) for new IPv6 SSM KPIs. * Virtualize ip6_mrouter socket. Changes mostly localized to IPv6 MROUTING. * Don't do a local group lookup in MROUTING. * Kill unused KAME prototypes in6_purgemkludge(), in6_restoremkludge(). * Preserve KAME DAD timer jitter behaviour in MLDv1 compatibility mode. * Bump __FreeBSD_version to 800084. * Update UPDATING. NOTE WELL: * This code hasn't been tested against real MLDv2 queriers (yet), although the on-wire protocol has been verified in Wireshark. * There are a few unresolved issues in the socket layer APIs to do with scope ID propagation. * There is a LOR present in ip6_output()'s use of in6_setscope() which needs to be resolved. See comments in mld6.c. This is believed to be benign and can't be avoided for the moment without re-introducing an indirect netisr. This work was mostly derived from the IGMPv3 implementation, and has been sponsored by a third party.
2009-04-29 19:19:13 +00:00
.pr_slowtimo = icmp6_slowtimo,
.pr_usrreqs = &rip6_usrreqs
},
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_DSTOPTS,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = dest6_input,
.pr_usrreqs = &nousrreqs
},
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_ROUTING,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = route6_input,
.pr_usrreqs = &nousrreqs
},
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_FRAGMENT,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = frag6_input,
.pr_usrreqs = &nousrreqs
},
#ifdef IPSEC
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_AH,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = ipsec6_common_input,
.pr_usrreqs = &nousrreqs,
},
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_ESP,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = ipsec6_common_input,
.pr_ctlinput = esp6_ctlinput,
.pr_usrreqs = &nousrreqs,
},
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_IPCOMP,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = ipsec6_common_input,
.pr_usrreqs = &nousrreqs,
},
#endif /* IPSEC */
#ifdef INET
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_IPV4,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = encap6_input,
.pr_output = rip6_output,
.pr_ctloutput = rip6_ctloutput,
.pr_init = encap_init,
.pr_usrreqs = &rip6_usrreqs
},
#endif /* INET */
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_IPV6,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = encap6_input,
.pr_output = rip6_output,
.pr_ctloutput = rip6_ctloutput,
.pr_init = encap_init,
.pr_usrreqs = &rip6_usrreqs
},
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_GRE,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = encap6_input,
.pr_output = rip6_output,
.pr_ctloutput = rip6_ctloutput,
.pr_init = encap_init,
.pr_usrreqs = &rip6_usrreqs
},
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_PIM,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = encap6_input,
.pr_output = rip6_output,
.pr_ctloutput = rip6_ctloutput,
.pr_usrreqs = &rip6_usrreqs
},
/* Spacer n-times for loadable protocols. */
IP6PROTOSPACER,
IP6PROTOSPACER,
IP6PROTOSPACER,
IP6PROTOSPACER,
IP6PROTOSPACER,
IP6PROTOSPACER,
IP6PROTOSPACER,
IP6PROTOSPACER,
/* raw wildcard */
{
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_flags = PR_ATOMIC|PR_ADDR,
.pr_input = rip6_input,
.pr_output = rip6_output,
.pr_ctloutput = rip6_ctloutput,
.pr_usrreqs = &rip6_usrreqs
},
};
2008-01-08 19:08:58 +00:00
extern int in6_inithead(void **, int);
#ifdef VIMAGE
extern int in6_detachhead(void **, int);
#endif
struct domain inet6domain = {
.dom_family = AF_INET6,
.dom_name = "internet6",
.dom_protosw = (struct protosw *)inet6sw,
.dom_protoswNPROTOSW = (struct protosw *)&inet6sw[nitems(inet6sw)],
#ifdef RADIX_MPATH
.dom_rtattach = rn6_mpath_inithead,
#else
.dom_rtattach = in6_inithead,
#endif
#ifdef VIMAGE
.dom_rtdetach = in6_detachhead,
#endif
.dom_ifattach = in6_domifattach,
Make checks for rt_mtu generic: Some virtual if drivers has (ab)used ifa ifa_rtrequest hook to enforce route MTU to be not bigger that interface MTU. While ifa_rtrequest hooking might be an option in some situation, it is not feasible to do MTU checks there: generic (or per-domain) routing code is perfectly capable of doing this. We currrently have 3 places where MTU is altered: 1) route addition. In this case domain overrides radix _addroute callback (in[6]_addroute) and all necessary checks/fixes are/can be done there. 2) route change (especially, GW change). In this case, there are no explicit per-domain calls, but one can override rte by setting ifa_rtrequest hook to domain handler (inet6 does this). 3) ifconfig ifaceX mtu YYYY In this case, we have no callbacks, but ip[6]_output performes runtime checks and decreases rt_mtu if necessary. Generally, the goals are to be able to handle all MTU changes in control plane, not in runtime part, and properly deal with increased interface MTU. This commit changes the following: * removes hooks setting MTU from drivers side * adds proper per-doman MTU checks for case 1) * adds generic MTU check for case 2) * The latter is done by using new dom_ifmtu callback since if_mtu denotes L3 interface MTU, e.g. maximum trasmitted _packet_ size. However, IPv6 mtu might be different from if_mtu one (e.g. default 1280) for some cases, so we need an abstract way to know maximum MTU size for given interface and domain. * moves rt_setmetrics() before MTU/ifa_rtrequest hooks since it copies user-supplied data which must be checked. * removes RT_LOCK_ASSERT() from other ifa_rtrequest hooks to be able to use this functions on new non-inserted rte. More changes will follow soon. MFC after: 1 month Sponsored by: Yandex LLC
2014-11-06 13:13:09 +00:00
.dom_ifdetach = in6_domifdetach,
.dom_ifmtu = in6_domifmtu
};
VNET_DOMAIN_SET(inet6);
/*
* Internet configuration info
*/
#ifndef IPV6FORWARDING
#ifdef GATEWAY6
#define IPV6FORWARDING 1 /* forward IP6 packets not for us */
#else
#define IPV6FORWARDING 0 /* don't forward IP6 packets not for us */
#endif /* GATEWAY6 */
#endif /* !IPV6FORWARDING */
#ifndef IPV6_SENDREDIRECTS
#define IPV6_SENDREDIRECTS 1
#endif
VNET_DEFINE(int, ip6_forwarding) = IPV6FORWARDING; /* act as router? */
VNET_DEFINE(int, ip6_sendredirects) = IPV6_SENDREDIRECTS;
VNET_DEFINE(int, ip6_defhlim) = IPV6_DEFHLIM;
VNET_DEFINE(int, ip6_defmcasthlim) = IPV6_DEFAULT_MULTICAST_HOPS;
VNET_DEFINE(int, ip6_accept_rtadv) = 0;
- Accept Router Advertisement messages even when net.inet6.ip6.forwarding=1. - A new per-interface knob IFF_ND6_NO_RADR and sysctl IPV6CTL_NO_RADR. This controls if accepting a route in an RA message as the default route. The default value for each interface can be set by net.inet6.ip6.no_radr. The system wide default value is 0. - A new sysctl: net.inet6.ip6.norbit_raif. This controls if setting R-bit in NA on RA accepting interfaces. The default is 0 (R-bit is set based on net.inet6.ip6.forwarding). Background: IPv6 host/router model suggests a router sends an RA and a host accepts it for router discovery. Because of that, KAME implementation does not allow accepting RAs when net.inet6.ip6.forwarding=1. Accepting RAs on a router can make the routing table confused since it can change the default router unintentionally. However, in practice there are cases where we cannot distinguish a host from a router clearly. For example, a customer edge router often works as a host against the ISP, and as a router against the LAN at the same time. Another example is a complex network configurations like an L2TP tunnel for IPv6 connection to Internet over an Ethernet link with another native IPv6 subnet. In this case, the physical interface for the native IPv6 subnet works as a host, and the pseudo-interface for L2TP works as the default IP forwarding route. Problem: Disabling processing RA messages when net.inet6.ip6.forwarding=1 and accepting them when net.inet6.ip6.forward=0 cause the following practical issues: - A router cannot perform SLAAC. It becomes a problem if a box has multiple interfaces and you want to use SLAAC on some of them, for example. A customer edge router for IPv6 Internet access service using an IPv6-over-IPv6 tunnel sometimes needs SLAAC on the physical interface for administration purpose; updating firmware and so on (link-local addresses can be used there, but GUAs by SLAAC are often used for scalability). - When a host has multiple IPv6 interfaces and it receives multiple RAs on them, controlling the default route is difficult. Router preferences defined in RFC 4191 works only when the routers on the links are under your control. Details of Implementation Changes: Router Advertisement messages will be accepted even when net.inet6.ip6.forwarding=1. More precisely, the conditions are as follow: (ACCEPT_RTADV && !NO_RADR && !ip6.forwarding) => Normal RA processing on that interface. (as IPv6 host) (ACCEPT_RTADV && (NO_RADR || ip6.forwarding)) => Accept RA but add the router to the defroute list with rtlifetime=0 unconditionally. This effectively prevents from setting the received router address as the box's default route. (!ACCEPT_RTADV) => No RA processing on that interface. ACCEPT_RTADV and NO_RADR are per-interface knob. In short, all interface are classified as "RA-accepting" or not. An RA-accepting interface always processes RA messages regardless of ip6.forwarding. The difference caused by NO_RADR or ip6.forwarding is whether the RA source address is considered as the default router or not. R-bit in NA on the RA accepting interfaces is set based on net.inet6.ip6.forwarding. While RFC 6204 W-1 rule (for CPE case) suggests a router should disable the R-bit completely even when the box has net.inet6.ip6.forwarding=1, I believe there is no technical reason with doing so. This behavior can be set by a new sysctl net.inet6.ip6.norbit_raif (the default is 0). Usage: # ifconfig fxp0 inet6 accept_rtadv => accept RA on fxp0 # ifconfig fxp0 inet6 accept_rtadv no_radr => accept RA on fxp0 but ignore default route information in it. # sysctl net.inet6.ip6.norbit_no_radr=1 => R-bit in NAs on RA accepting interfaces will always be set to 0.
2011-06-06 02:14:23 +00:00
VNET_DEFINE(int, ip6_no_radr) = 0;
VNET_DEFINE(int, ip6_norbit_raif) = 0;
VNET_DEFINE(int, ip6_rfc6204w3) = 0;
VNET_DEFINE(int, ip6_maxfragpackets); /* initialized in frag6.c:frag6_init() */
VNET_DEFINE(int, ip6_maxfrags); /* initialized in frag6.c:frag6_init() */
VNET_DEFINE(int, ip6_log_interval) = 5;
VNET_DEFINE(int, ip6_hdrnestlimit) = 15;/* How many header options will we
* process? */
VNET_DEFINE(int, ip6_dad_count) = 1; /* DupAddrDetectionTransmits */
VNET_DEFINE(int, ip6_auto_flowlabel) = 1;
VNET_DEFINE(int, ip6_use_deprecated) = 1;/* allow deprecated addr
* (RFC2462 5.5.4) */
VNET_DEFINE(int, ip6_rr_prune) = 5; /* router renumbering prefix
* walk list every 5 sec. */
VNET_DEFINE(int, ip6_mcast_pmtu) = 0; /* enable pMTU discovery for multicast? */
VNET_DEFINE(int, ip6_v6only) = 1;
VNET_DEFINE(time_t, ip6_log_time) = (time_t)0L;
#ifdef IPSTEALTH
VNET_DEFINE(int, ip6stealth) = 0;
#endif
VNET_DEFINE(int, nd6_onlink_ns_rfc4861) = 0;/* allow 'on-link' nd6 NS
* (RFC 4861) */
/* icmp6 */
/*
* BSDI4 defines these variables in in_proto.c...
* XXX: what if we don't define INET? Should we define pmtu6_expire
* or so? (jinmei@kame.net 19990310)
*/
VNET_DEFINE(int, pmtu_expire) = 60*10;
VNET_DEFINE(int, pmtu_probe) = 60*2;
/* ICMPV6 parameters */
VNET_DEFINE(int, icmp6_rediraccept) = 1;/* accept and process redirects */
VNET_DEFINE(int, icmp6_redirtimeout) = 10 * 60; /* 10 minutes */
VNET_DEFINE(int, icmp6errppslim) = 100; /* 100pps */
/* control how to respond to NI queries */
VNET_DEFINE(int, icmp6_nodeinfo) =
(ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
VNET_DEFINE(int, icmp6_nodeinfo_oldmcprefix) = 1;
/*
* sysctl related items.
*/
SYSCTL_NODE(_net, PF_INET6, inet6, CTLFLAG_RW, 0,
"Internet6 Family");
/* net.inet6 */
SYSCTL_NODE(_net_inet6, IPPROTO_IPV6, ip6, CTLFLAG_RW, 0, "IP6");
SYSCTL_NODE(_net_inet6, IPPROTO_ICMPV6, icmp6, CTLFLAG_RW, 0, "ICMP6");
SYSCTL_NODE(_net_inet6, IPPROTO_UDP, udp6, CTLFLAG_RW, 0, "UDP6");
SYSCTL_NODE(_net_inet6, IPPROTO_TCP, tcp6, CTLFLAG_RW, 0, "TCP6");
#ifdef SCTP
SYSCTL_NODE(_net_inet6, IPPROTO_SCTP, sctp6, CTLFLAG_RW, 0, "SCTP6");
#endif
#ifdef IPSEC
SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW, 0, "IPSEC6");
#endif /* IPSEC */
/* net.inet6.ip6 */
static int
sysctl_ip6_temppltime(SYSCTL_HANDLER_ARGS)
{
int error, val;
val = V_ip6_temp_preferred_lifetime;
error = sysctl_handle_int(oidp, &val, 0, req);
if (error != 0 || !req->newptr)
return (error);
if (val < V_ip6_desync_factor + V_ip6_temp_regen_advance)
return (EINVAL);
V_ip6_temp_preferred_lifetime = val;
return (0);
}
static int
sysctl_ip6_tempvltime(SYSCTL_HANDLER_ARGS)
{
int error, val;
val = V_ip6_temp_valid_lifetime;
error = sysctl_handle_int(oidp, &val, 0, req);
if (error != 0 || !req->newptr)
return (error);
if (val < V_ip6_temp_preferred_lifetime)
return (EINVAL);
V_ip6_temp_valid_lifetime = val;
return (0);
}
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_FORWARDING, forwarding,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_forwarding), 0,
"Enable IPv6 forwarding between interfaces");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_SENDREDIRECTS, redirect,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_sendredirects), 0,
"Send a redirect message when forwarding back to a source link");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFHLIM, hlim,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defhlim), 0,
"Default hop limit");
SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_STATS, stats, struct ip6stat,
ip6stat,
"IP6 statistics (struct ip6stat, netinet6/ip6_var.h)");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGPACKETS, maxfragpackets,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfragpackets), 0,
"Maximum allowed number of outstanding fragmented IPv6 packets");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_ACCEPT_RTADV, accept_rtadv,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_accept_rtadv), 0,
"Default value of per-interface flag for accepting ICMPv6 RA messages");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NO_RADR, no_radr,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_no_radr), 0,
- Accept Router Advertisement messages even when net.inet6.ip6.forwarding=1. - A new per-interface knob IFF_ND6_NO_RADR and sysctl IPV6CTL_NO_RADR. This controls if accepting a route in an RA message as the default route. The default value for each interface can be set by net.inet6.ip6.no_radr. The system wide default value is 0. - A new sysctl: net.inet6.ip6.norbit_raif. This controls if setting R-bit in NA on RA accepting interfaces. The default is 0 (R-bit is set based on net.inet6.ip6.forwarding). Background: IPv6 host/router model suggests a router sends an RA and a host accepts it for router discovery. Because of that, KAME implementation does not allow accepting RAs when net.inet6.ip6.forwarding=1. Accepting RAs on a router can make the routing table confused since it can change the default router unintentionally. However, in practice there are cases where we cannot distinguish a host from a router clearly. For example, a customer edge router often works as a host against the ISP, and as a router against the LAN at the same time. Another example is a complex network configurations like an L2TP tunnel for IPv6 connection to Internet over an Ethernet link with another native IPv6 subnet. In this case, the physical interface for the native IPv6 subnet works as a host, and the pseudo-interface for L2TP works as the default IP forwarding route. Problem: Disabling processing RA messages when net.inet6.ip6.forwarding=1 and accepting them when net.inet6.ip6.forward=0 cause the following practical issues: - A router cannot perform SLAAC. It becomes a problem if a box has multiple interfaces and you want to use SLAAC on some of them, for example. A customer edge router for IPv6 Internet access service using an IPv6-over-IPv6 tunnel sometimes needs SLAAC on the physical interface for administration purpose; updating firmware and so on (link-local addresses can be used there, but GUAs by SLAAC are often used for scalability). - When a host has multiple IPv6 interfaces and it receives multiple RAs on them, controlling the default route is difficult. Router preferences defined in RFC 4191 works only when the routers on the links are under your control. Details of Implementation Changes: Router Advertisement messages will be accepted even when net.inet6.ip6.forwarding=1. More precisely, the conditions are as follow: (ACCEPT_RTADV && !NO_RADR && !ip6.forwarding) => Normal RA processing on that interface. (as IPv6 host) (ACCEPT_RTADV && (NO_RADR || ip6.forwarding)) => Accept RA but add the router to the defroute list with rtlifetime=0 unconditionally. This effectively prevents from setting the received router address as the box's default route. (!ACCEPT_RTADV) => No RA processing on that interface. ACCEPT_RTADV and NO_RADR are per-interface knob. In short, all interface are classified as "RA-accepting" or not. An RA-accepting interface always processes RA messages regardless of ip6.forwarding. The difference caused by NO_RADR or ip6.forwarding is whether the RA source address is considered as the default router or not. R-bit in NA on the RA accepting interfaces is set based on net.inet6.ip6.forwarding. While RFC 6204 W-1 rule (for CPE case) suggests a router should disable the R-bit completely even when the box has net.inet6.ip6.forwarding=1, I believe there is no technical reason with doing so. This behavior can be set by a new sysctl net.inet6.ip6.norbit_raif (the default is 0). Usage: # ifconfig fxp0 inet6 accept_rtadv => accept RA on fxp0 # ifconfig fxp0 inet6 accept_rtadv no_radr => accept RA on fxp0 but ignore default route information in it. # sysctl net.inet6.ip6.norbit_no_radr=1 => R-bit in NAs on RA accepting interfaces will always be set to 0.
2011-06-06 02:14:23 +00:00
"Default value of per-interface flag to control whether routers "
"sending ICMPv6 RA messages on that interface are added into the "
"default router list");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_NORBIT_RAIF, norbit_raif,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_norbit_raif), 0,
"Always set clear the R flag in ICMPv6 NA messages when accepting RA "
"on the interface");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RFC6204W3, rfc6204w3,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rfc6204w3), 0,
"Accept the default router list from ICMPv6 RA messages even "
"when packet forwarding is enabled");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_LOG_INTERVAL, log_interval,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_log_interval), 0,
"Frequency in seconds at which to log IPv6 forwarding errors");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_HDRNESTLIMIT, hdrnestlimit,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_hdrnestlimit), 0,
"Maximum allowed number of nested protocol headers");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DAD_COUNT, dad_count,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_dad_count), 0,
"Number of ICMPv6 NS messages sent during duplicate address detection");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_FLOWLABEL, auto_flowlabel,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_flowlabel), 0,
"Provide an IPv6 flowlabel in outbound packets");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_DEFMCASTHLIM, defmcasthlim,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_defmcasthlim), 0,
"Default hop limit for multicast packets");
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator (DPCPU), as suggested by Peter Wemm, and implement a new per-virtual network stack memory allocator. Modify vnet to use the allocator instead of monolithic global container structures (vinet, ...). This change solves many binary compatibility problems associated with VIMAGE, and restores ELF symbols for virtualized global variables. Each virtualized global variable exists as a "reference copy", and also once per virtual network stack. Virtualized global variables are tagged at compile-time, placing the in a special linker set, which is loaded into a contiguous region of kernel memory. Virtualized global variables in the base kernel are linked as normal, but those in modules are copied and relocated to a reserved portion of the kernel's vnet region with the help of a the kernel linker. Virtualized global variables exist in per-vnet memory set up when the network stack instance is created, and are initialized statically from the reference copy. Run-time access occurs via an accessor macro, which converts from the current vnet and requested symbol to a per-vnet address. When "options VIMAGE" is not compiled into the kernel, normal global ELF symbols will be used instead and indirection is avoided. This change restores static initialization for network stack global variables, restores support for non-global symbols and types, eliminates the need for many subsystem constructors, eliminates large per-subsystem structures that caused many binary compatibility issues both for monitoring applications (netstat) and kernel modules, removes the per-function INIT_VNET_*() macros throughout the stack, eliminates the need for vnet_symmap ksym(2) munging, and eliminates duplicate definitions of virtualized globals under VIMAGE_GLOBALS. Bump __FreeBSD_version and update UPDATING. Portions submitted by: bz Reviewed by: bz, zec Discussed with: gnn, jamie, jeff, jhb, julian, sam Suggested by: peter Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
SYSCTL_STRING(_net_inet6_ip6, IPV6CTL_KAME_VERSION, kame_version,
CTLFLAG_RD, __KAME_VERSION, 0,
"KAME version string");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEPRECATED, use_deprecated,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_deprecated), 0,
"Allow the use of addresses whose preferred lifetimes have expired");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RR_PRUNE, rr_prune,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rr_prune), 0,
""); /* XXX unused */
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USETEMPADDR, use_tempaddr,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_tempaddr), 0,
"Create RFC3041 temporary addresses for autoconfigured addresses");
SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPPLTIME, temppltime,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
NULL, 0, sysctl_ip6_temppltime, "I",
"Maximum preferred lifetime for temporary addresses");
SYSCTL_PROC(_net_inet6_ip6, IPV6CTL_TEMPVLTIME, tempvltime,
CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW,
NULL, 0, sysctl_ip6_tempvltime, "I",
"Maximum valid lifetime for temporary addresses");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_V6ONLY, v6only,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_v6only), 0,
"Restrict AF_INET6 sockets to IPv6 addresses only");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_AUTO_LINKLOCAL, auto_linklocal,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_auto_linklocal), 0,
"Default value of per-interface flag for automatically adding an IPv6 "
"link-local address to interfaces when attached");
SYSCTL_VNET_PCPUSTAT(_net_inet6_ip6, IPV6CTL_RIP6STATS, rip6stats,
struct rip6stat, rip6stat,
"Raw IP6 statistics (struct rip6stat, netinet6/raw_ip6.h)");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_PREFER_TEMPADDR, prefer_tempaddr,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_prefer_tempaddr), 0,
"Prefer RFC3041 temporary addresses in source address selection");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_USE_DEFAULTZONE, use_defaultzone,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_use_defzone), 0,
"Use the default scope zone when none is specified");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MAXFRAGS, maxfrags,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_maxfrags), 0,
"Maximum allowed number of outstanding IPv6 packet fragments");
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_MCAST_PMTU, mcast_pmtu,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_mcast_pmtu), 0,
"Enable path MTU discovery for multicast packets");
#ifdef IPSTEALTH
SYSCTL_INT(_net_inet6_ip6, IPV6CTL_STEALTH, stealth, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip6stealth), 0,
"Forward IPv6 packets without decrementing their TTL");
#endif
/* net.inet6.icmp6 */
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRACCEPT, rediraccept,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_rediraccept), 0,
"Accept ICMPv6 redirect messages");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0,
""); /* XXX unused */
SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats,
struct icmp6stat, icmp6stat,
"ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_PRUNE, nd6_prune,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_prune), 0,
"Frequency in seconds of checks for expired prefixes and routers");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DELAY, nd6_delay,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_delay), 0,
"Delay in seconds before probing for reachability");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_UMAXTRIES, nd6_umaxtries,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_umaxtries), 0,
"Number of ICMPv6 NS messages sent during reachability detection");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MMAXTRIES, nd6_mmaxtries,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_mmaxtries), 0,
"Number of ICMPv6 NS messages sent during address resolution");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_USELOOPBACK, nd6_useloopback,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_useloopback), 0,
"Create a loopback route when configuring an IPv6 address");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO, nodeinfo,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_nodeinfo), 0,
"Mask of enabled RF4620 node information query types");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_NODEINFO_OLDMCPREFIX,
nodeinfo_oldmcprefix, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(icmp6_nodeinfo_oldmcprefix), 0,
"Join old IPv6 NI group address in draft-ietf-ipngwg-icmp-name-lookup "
"for compatibility with KAME implementation");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ERRPPSLIMIT, errppslimit,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6errppslim), 0,
"Maximum number of ICMPv6 error messages per second");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXNUDHINT, nd6_maxnudhint,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxnudhint), 0,
""); /* XXX unused */
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_DEBUG, nd6_debug,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_debug), 0,
"Log NDP debug messages");
SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_ONLINKNSRFC4861,
nd6_onlink_ns_rfc4861, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(nd6_onlink_ns_rfc4861), 0,
"Accept 'on-link' ICMPv6 NS messages in compliance with RFC 4861");