freebsd-skq/sys/netinet6/nd6.h

437 lines
15 KiB
C
Raw Normal View History

/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
2007-12-10 16:03:40 +00:00
*
* $KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $
* $FreeBSD$
*/
#ifndef _NETINET6_ND6_H_
#define _NETINET6_ND6_H_
/* see net/route.h, or net/if_inarp.h */
#ifndef RTF_ANNOUNCE
#define RTF_ANNOUNCE RTF_PROTO2
#endif
#include <sys/queue.h>
#include <sys/callout.h>
struct llentry;
#define ND6_LLINFO_NOSTATE -2
/*
* We don't need the WAITDELETE state any more, but we keep the definition
* in a comment line instead of removing it. This is necessary to avoid
* unintentionally reusing the value for another purpose, which might
* affect backward compatibility with old applications.
* (20000711 jinmei@kame.net)
*/
/* #define ND6_LLINFO_WAITDELETE -1 */
#define ND6_LLINFO_INCOMPLETE 0
#define ND6_LLINFO_REACHABLE 1
#define ND6_LLINFO_STALE 2
#define ND6_LLINFO_DELAY 3
#define ND6_LLINFO_PROBE 4
#define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE)
#define ND6_LLINFO_PERMANENT(n) (((n)->la_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE))
struct nd_ifinfo {
u_int32_t linkmtu; /* LinkMTU */
u_int32_t maxmtu; /* Upper bound of LinkMTU */
u_int32_t basereachable; /* BaseReachableTime */
u_int32_t reachable; /* Reachable Time */
u_int32_t retrans; /* Retrans Timer */
u_int32_t flags; /* Flags */
int recalctm; /* BaseReacable re-calculation timer */
u_int8_t chlim; /* CurHopLimit */
u_int8_t initialized; /* Flag to see the entry is initialized */
/* the following 3 members are for privacy extension for addrconf */
u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */
u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
u_int8_t randomid[8]; /* current random ID */
};
#define ND6_IFF_PERFORMNUD 0x1
#define ND6_IFF_ACCEPT_RTADV 0x2
#define ND6_IFF_PREFER_SOURCE 0x4 /* Not used in FreeBSD. */
#define ND6_IFF_IFDISABLED 0x8 /* IPv6 operation is disabled due to
* DAD failure. (XXX: not ND-specific)
*/
#define ND6_IFF_DONT_SET_IFROUTE 0x10
#define ND6_IFF_AUTO_LINKLOCAL 0x20
- Accept Router Advertisement messages even when net.inet6.ip6.forwarding=1. - A new per-interface knob IFF_ND6_NO_RADR and sysctl IPV6CTL_NO_RADR. This controls if accepting a route in an RA message as the default route. The default value for each interface can be set by net.inet6.ip6.no_radr. The system wide default value is 0. - A new sysctl: net.inet6.ip6.norbit_raif. This controls if setting R-bit in NA on RA accepting interfaces. The default is 0 (R-bit is set based on net.inet6.ip6.forwarding). Background: IPv6 host/router model suggests a router sends an RA and a host accepts it for router discovery. Because of that, KAME implementation does not allow accepting RAs when net.inet6.ip6.forwarding=1. Accepting RAs on a router can make the routing table confused since it can change the default router unintentionally. However, in practice there are cases where we cannot distinguish a host from a router clearly. For example, a customer edge router often works as a host against the ISP, and as a router against the LAN at the same time. Another example is a complex network configurations like an L2TP tunnel for IPv6 connection to Internet over an Ethernet link with another native IPv6 subnet. In this case, the physical interface for the native IPv6 subnet works as a host, and the pseudo-interface for L2TP works as the default IP forwarding route. Problem: Disabling processing RA messages when net.inet6.ip6.forwarding=1 and accepting them when net.inet6.ip6.forward=0 cause the following practical issues: - A router cannot perform SLAAC. It becomes a problem if a box has multiple interfaces and you want to use SLAAC on some of them, for example. A customer edge router for IPv6 Internet access service using an IPv6-over-IPv6 tunnel sometimes needs SLAAC on the physical interface for administration purpose; updating firmware and so on (link-local addresses can be used there, but GUAs by SLAAC are often used for scalability). - When a host has multiple IPv6 interfaces and it receives multiple RAs on them, controlling the default route is difficult. Router preferences defined in RFC 4191 works only when the routers on the links are under your control. Details of Implementation Changes: Router Advertisement messages will be accepted even when net.inet6.ip6.forwarding=1. More precisely, the conditions are as follow: (ACCEPT_RTADV && !NO_RADR && !ip6.forwarding) => Normal RA processing on that interface. (as IPv6 host) (ACCEPT_RTADV && (NO_RADR || ip6.forwarding)) => Accept RA but add the router to the defroute list with rtlifetime=0 unconditionally. This effectively prevents from setting the received router address as the box's default route. (!ACCEPT_RTADV) => No RA processing on that interface. ACCEPT_RTADV and NO_RADR are per-interface knob. In short, all interface are classified as "RA-accepting" or not. An RA-accepting interface always processes RA messages regardless of ip6.forwarding. The difference caused by NO_RADR or ip6.forwarding is whether the RA source address is considered as the default router or not. R-bit in NA on the RA accepting interfaces is set based on net.inet6.ip6.forwarding. While RFC 6204 W-1 rule (for CPE case) suggests a router should disable the R-bit completely even when the box has net.inet6.ip6.forwarding=1, I believe there is no technical reason with doing so. This behavior can be set by a new sysctl net.inet6.ip6.norbit_raif (the default is 0). Usage: # ifconfig fxp0 inet6 accept_rtadv => accept RA on fxp0 # ifconfig fxp0 inet6 accept_rtadv no_radr => accept RA on fxp0 but ignore default route information in it. # sysctl net.inet6.ip6.norbit_no_radr=1 => R-bit in NAs on RA accepting interfaces will always be set to 0.
2011-06-06 02:14:23 +00:00
#define ND6_IFF_NO_RADR 0x40
#define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */
#define ND6_IFF_NO_DAD 0x100
#ifdef EXPERIMENTAL
/* XXX: not related to ND. */
#define ND6_IFF_IPV6_ONLY 0x200 /* draft-ietf-6man-ipv6only-flag */
#define ND6_IFF_IPV6_ONLY_MANUAL 0x400
#define ND6_IFF_IPV6_ONLY_MASK (ND6_IFF_IPV6_ONLY|ND6_IFF_IPV6_ONLY_MANUAL)
#endif
#ifdef _KERNEL
#define ND_IFINFO(ifp) \
(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo)
#define IN6_LINKMTU(ifp) \
((ND_IFINFO(ifp)->linkmtu && ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) \
? ND_IFINFO(ifp)->linkmtu \
: ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < (ifp)->if_mtu) \
? ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu))
#endif
struct in6_nbrinfo {
char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
struct in6_addr addr; /* IPv6 address of the neighbor */
long asked; /* number of queries already sent for this addr */
int isrouter; /* if it acts as a router */
int state; /* reachability state */
int expire; /* lifetime for NDP state transition */
};
/* Sysctls, shared with user space. */
struct in6_defrouter {
struct sockaddr_in6 rtaddr;
u_char flags;
u_short rtlifetime;
u_long expire;
u_short if_index;
};
struct in6_prefix {
struct sockaddr_in6 prefix;
struct prf_ra raflags;
u_char prefixlen;
u_char origin;
u_int32_t vltime;
u_int32_t pltime;
time_t expire;
u_int32_t flags;
int refcnt;
u_short if_index;
u_short advrtrs; /* number of advertisement routers */
/* struct sockaddr_in6 advrtr[] */
};
#ifdef _KERNEL
struct in6_ondireq {
char ifname[IFNAMSIZ];
struct {
u_int32_t linkmtu; /* LinkMTU */
u_int32_t maxmtu; /* Upper bound of LinkMTU */
u_int32_t basereachable; /* BaseReachableTime */
u_int32_t reachable; /* Reachable Time */
u_int32_t retrans; /* Retrans Timer */
u_int32_t flags; /* Flags */
int recalctm; /* BaseReacable re-calculation timer */
u_int8_t chlim; /* CurHopLimit */
u_int8_t receivedra;
} ndi;
};
#endif
struct in6_ndireq {
char ifname[IFNAMSIZ];
struct nd_ifinfo ndi;
};
struct in6_ndifreq {
char ifname[IFNAMSIZ];
u_long ifindex;
};
/* Prefix status */
#define NDPRF_ONLINK 0x1
#define NDPRF_DETACHED 0x2
/* protocol constants */
#define MAX_RTR_SOLICITATION_DELAY 1 /* 1sec */
#define RTR_SOLICITATION_INTERVAL 4 /* 4sec */
#define MAX_RTR_SOLICITATIONS 3
#define ND6_INFINITE_LIFETIME 0xffffffff
#ifdef _KERNEL
/* node constants */
#define MAX_REACHABLE_TIME 3600000 /* msec */
#define REACHABLE_TIME 30000 /* msec */
#define RETRANS_TIMER 1000 /* msec */
#define MIN_RANDOM_FACTOR 512 /* 1024 * 0.5 */
#define MAX_RANDOM_FACTOR 1536 /* 1024 * 1.5 */
#define DEF_TEMP_VALID_LIFETIME 604800 /* 1 week */
#define DEF_TEMP_PREFERRED_LIFETIME 86400 /* 1 day */
#define TEMPADDR_REGEN_ADVANCE 5 /* sec */
#define MAX_TEMP_DESYNC_FACTOR 600 /* 10 min */
#define ND_COMPUTE_RTIME(x) \
2003-10-31 16:06:05 +00:00
(((MIN_RANDOM_FACTOR * (x >> 10)) + (arc4random() & \
((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000)
struct nd_defrouter {
TAILQ_ENTRY(nd_defrouter) dr_entry;
struct in6_addr rtaddr;
u_char raflags; /* flags on RA message */
u_short rtlifetime;
u_long expire;
struct ifnet *ifp;
int installed; /* is installed into kernel routing table */
u_int refcnt;
};
struct nd_prefixctl {
struct ifnet *ndpr_ifp;
/* prefix */
struct sockaddr_in6 ndpr_prefix;
u_char ndpr_plen;
u_int32_t ndpr_vltime; /* advertised valid lifetime */
u_int32_t ndpr_pltime; /* advertised preferred lifetime */
struct prf_ra ndpr_flags;
};
LIST_HEAD(nd_prhead, nd_prefix);
struct nd_prefix {
struct ifnet *ndpr_ifp;
LIST_ENTRY(nd_prefix) ndpr_entry;
struct sockaddr_in6 ndpr_prefix; /* prefix */
struct in6_addr ndpr_mask; /* netmask derived from the prefix */
u_int32_t ndpr_vltime; /* advertised valid lifetime */
u_int32_t ndpr_pltime; /* advertised preferred lifetime */
time_t ndpr_expire; /* expiration time of the prefix */
time_t ndpr_preferred; /* preferred time of the prefix */
time_t ndpr_lastupdate; /* reception time of last advertisement */
struct prf_ra ndpr_flags;
u_int32_t ndpr_stateflags; /* actual state flags */
/* list of routers that advertise the prefix: */
LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs;
u_char ndpr_plen;
int ndpr_addrcnt; /* count of derived addresses */
volatile u_int ndpr_refcnt;
};
#define ndpr_raf ndpr_flags
#define ndpr_raf_onlink ndpr_flags.onlink
#define ndpr_raf_auto ndpr_flags.autonomous
#define ndpr_raf_router ndpr_flags.router
struct nd_pfxrouter {
LIST_ENTRY(nd_pfxrouter) pfr_entry;
struct nd_defrouter *router;
};
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_IP6NDP);
#endif
/* nd6.c */
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator (DPCPU), as suggested by Peter Wemm, and implement a new per-virtual network stack memory allocator. Modify vnet to use the allocator instead of monolithic global container structures (vinet, ...). This change solves many binary compatibility problems associated with VIMAGE, and restores ELF symbols for virtualized global variables. Each virtualized global variable exists as a "reference copy", and also once per virtual network stack. Virtualized global variables are tagged at compile-time, placing the in a special linker set, which is loaded into a contiguous region of kernel memory. Virtualized global variables in the base kernel are linked as normal, but those in modules are copied and relocated to a reserved portion of the kernel's vnet region with the help of a the kernel linker. Virtualized global variables exist in per-vnet memory set up when the network stack instance is created, and are initialized statically from the reference copy. Run-time access occurs via an accessor macro, which converts from the current vnet and requested symbol to a per-vnet address. When "options VIMAGE" is not compiled into the kernel, normal global ELF symbols will be used instead and indirection is avoided. This change restores static initialization for network stack global variables, restores support for non-global symbols and types, eliminates the need for many subsystem constructors, eliminates large per-subsystem structures that caused many binary compatibility issues both for monitoring applications (netstat) and kernel modules, removes the per-function INIT_VNET_*() macros throughout the stack, eliminates the need for vnet_symmap ksym(2) munging, and eliminates duplicate definitions of virtualized globals under VIMAGE_GLOBALS. Bump __FreeBSD_version and update UPDATING. Portions submitted by: bz Reviewed by: bz, zec Discussed with: gnn, jamie, jeff, jhb, julian, sam Suggested by: peter Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
VNET_DECLARE(int, nd6_prune);
VNET_DECLARE(int, nd6_delay);
VNET_DECLARE(int, nd6_umaxtries);
VNET_DECLARE(int, nd6_mmaxtries);
VNET_DECLARE(int, nd6_useloopback);
VNET_DECLARE(int, nd6_maxnudhint);
VNET_DECLARE(int, nd6_gctimer);
VNET_DECLARE(struct nd_prhead, nd_prefix);
VNET_DECLARE(int, nd6_debug);
VNET_DECLARE(int, nd6_onlink_ns_rfc4861);
#define V_nd6_prune VNET(nd6_prune)
#define V_nd6_delay VNET(nd6_delay)
#define V_nd6_umaxtries VNET(nd6_umaxtries)
#define V_nd6_mmaxtries VNET(nd6_mmaxtries)
#define V_nd6_useloopback VNET(nd6_useloopback)
#define V_nd6_maxnudhint VNET(nd6_maxnudhint)
#define V_nd6_gctimer VNET(nd6_gctimer)
#define V_nd_prefix VNET(nd_prefix)
#define V_nd6_debug VNET(nd6_debug)
#define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861)
/* Lock for the prefix and default router lists. */
VNET_DECLARE(struct rwlock, nd6_lock);
VNET_DECLARE(uint64_t, nd6_list_genid);
#define V_nd6_lock VNET(nd6_lock)
#define V_nd6_list_genid VNET(nd6_list_genid)
#define ND6_RLOCK() rw_rlock(&V_nd6_lock)
#define ND6_RUNLOCK() rw_runlock(&V_nd6_lock)
#define ND6_WLOCK() rw_wlock(&V_nd6_lock)
#define ND6_WUNLOCK() rw_wunlock(&V_nd6_lock)
#define ND6_TRY_UPGRADE() rw_try_upgrade(&V_nd6_lock)
#define ND6_WLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_WLOCKED)
#define ND6_RLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_RLOCKED)
#define ND6_LOCK_ASSERT() rw_assert(&V_nd6_lock, RA_LOCKED)
#define ND6_UNLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_UNLOCKED)
/* Mutex for prefix onlink/offlink transitions. */
VNET_DECLARE(struct mtx, nd6_onlink_mtx);
#define V_nd6_onlink_mtx VNET(nd6_onlink_mtx)
#define ND6_ONLINK_LOCK() mtx_lock(&V_nd6_onlink_mtx)
#define ND6_ONLINK_TRYLOCK() mtx_trylock(&V_nd6_onlink_mtx)
#define ND6_ONLINK_UNLOCK() mtx_unlock(&V_nd6_onlink_mtx)
#define ND6_ONLINK_LOCK_ASSERT() mtx_assert(&V_nd6_onlink_mtx, MA_OWNED)
#define ND6_ONLINK_UNLOCK_ASSERT() mtx_assert(&V_nd6_onlink_mtx, MA_NOTOWNED)
#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
/* nd6_rtr.c */
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator (DPCPU), as suggested by Peter Wemm, and implement a new per-virtual network stack memory allocator. Modify vnet to use the allocator instead of monolithic global container structures (vinet, ...). This change solves many binary compatibility problems associated with VIMAGE, and restores ELF symbols for virtualized global variables. Each virtualized global variable exists as a "reference copy", and also once per virtual network stack. Virtualized global variables are tagged at compile-time, placing the in a special linker set, which is loaded into a contiguous region of kernel memory. Virtualized global variables in the base kernel are linked as normal, but those in modules are copied and relocated to a reserved portion of the kernel's vnet region with the help of a the kernel linker. Virtualized global variables exist in per-vnet memory set up when the network stack instance is created, and are initialized statically from the reference copy. Run-time access occurs via an accessor macro, which converts from the current vnet and requested symbol to a per-vnet address. When "options VIMAGE" is not compiled into the kernel, normal global ELF symbols will be used instead and indirection is avoided. This change restores static initialization for network stack global variables, restores support for non-global symbols and types, eliminates the need for many subsystem constructors, eliminates large per-subsystem structures that caused many binary compatibility issues both for monitoring applications (netstat) and kernel modules, removes the per-function INIT_VNET_*() macros throughout the stack, eliminates the need for vnet_symmap ksym(2) munging, and eliminates duplicate definitions of virtualized globals under VIMAGE_GLOBALS. Bump __FreeBSD_version and update UPDATING. Portions submitted by: bz Reviewed by: bz, zec Discussed with: gnn, jamie, jeff, jhb, julian, sam Suggested by: peter Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
VNET_DECLARE(int, nd6_defifindex);
VNET_DECLARE(int, ip6_desync_factor); /* seconds */
VNET_DECLARE(u_int32_t, ip6_temp_preferred_lifetime); /* seconds */
VNET_DECLARE(u_int32_t, ip6_temp_valid_lifetime); /* seconds */
VNET_DECLARE(int, ip6_temp_regen_advance); /* seconds */
#define V_nd6_defifindex VNET(nd6_defifindex)
#define V_ip6_desync_factor VNET(ip6_desync_factor)
#define V_ip6_temp_preferred_lifetime VNET(ip6_temp_preferred_lifetime)
#define V_ip6_temp_valid_lifetime VNET(ip6_temp_valid_lifetime)
#define V_ip6_temp_regen_advance VNET(ip6_temp_regen_advance)
union nd_opts {
struct nd_opt_hdr *nd_opt_array[16]; /* max = ND_OPT_NONCE */
struct {
struct nd_opt_hdr *zero;
struct nd_opt_hdr *src_lladdr;
struct nd_opt_hdr *tgt_lladdr;
struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */
struct nd_opt_rd_hdr *rh;
struct nd_opt_mtu *mtu;
struct nd_opt_hdr *__res6;
struct nd_opt_hdr *__res7;
struct nd_opt_hdr *__res8;
struct nd_opt_hdr *__res9;
struct nd_opt_hdr *__res10;
struct nd_opt_hdr *__res11;
struct nd_opt_hdr *__res12;
struct nd_opt_hdr *__res13;
struct nd_opt_nonce *nonce;
struct nd_opt_hdr *__res15;
struct nd_opt_hdr *search; /* multiple opts */
struct nd_opt_hdr *last; /* multiple opts */
int done;
struct nd_opt_prefix_info *pi_end;/* multiple opts, end */
} nd_opt_each;
};
#define nd_opts_src_lladdr nd_opt_each.src_lladdr
#define nd_opts_tgt_lladdr nd_opt_each.tgt_lladdr
#define nd_opts_pi nd_opt_each.pi_beg
#define nd_opts_pi_end nd_opt_each.pi_end
#define nd_opts_rh nd_opt_each.rh
#define nd_opts_mtu nd_opt_each.mtu
#define nd_opts_nonce nd_opt_each.nonce
#define nd_opts_search nd_opt_each.search
#define nd_opts_last nd_opt_each.last
#define nd_opts_done nd_opt_each.done
/* XXX: need nd6_var.h?? */
/* nd6.c */
void nd6_init(void);
#ifdef VIMAGE
void nd6_destroy(void);
#endif
struct nd_ifinfo *nd6_ifattach(struct ifnet *);
Get closer to a VIMAGE network stack teardown from top to bottom rather than removing the network interfaces first. This change is rather larger and convoluted as the ordering requirements cannot be separated. Move the pfil(9) framework to SI_SUB_PROTO_PFIL, move Firewalls and related modules to their own SI_SUB_PROTO_FIREWALL. Move initialization of "physical" interfaces to SI_SUB_DRIVERS, move virtual (cloned) interfaces to SI_SUB_PSEUDO. Move Multicast to SI_SUB_PROTO_MC. Re-work parts of multicast initialisation and teardown, not taking the huge amount of memory into account if used as a module yet. For interface teardown we try to do as many of them as we can on SI_SUB_INIT_IF, but for some this makes no sense, e.g., when tunnelling over a higher layer protocol such as IP. In that case the interface has to go along (or before) the higher layer protocol is shutdown. Kernel hhooks need to go last on teardown as they may be used at various higher layers and we cannot remove them before we cleaned up the higher layers. For interface teardown there are multiple paths: (a) a cloned interface is destroyed (inside a VIMAGE or in the base system), (b) any interface is moved from a virtual network stack to a different network stack ("vmove"), or (c) a virtual network stack is being shut down. All code paths go through if_detach_internal() where we, depending on the vmove flag or the vnet state, make a decision on how much to shut down; in case we are destroying a VNET the individual protocol layers will cleanup their own parts thus we cannot do so again for each interface as we end up with, e.g., double-frees, destroying locks twice or acquiring already destroyed locks. When calling into protocol cleanups we equally have to tell them whether they need to detach upper layer protocols ("ulp") or not (e.g., in6_ifdetach()). Provide or enahnce helper functions to do proper cleanup at a protocol rather than at an interface level. Approved by: re (hrs) Obtained from: projects/vnet Reviewed by: gnn, jhb Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D6747
2016-06-21 13:48:49 +00:00
void nd6_ifdetach(struct ifnet *, struct nd_ifinfo *);
int nd6_is_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *);
void nd6_option_init(void *, int, union nd_opts *);
struct nd_opt_hdr *nd6_option(union nd_opts *);
int nd6_options(union nd_opts *);
struct llentry *nd6_lookup(const struct in6_addr *, int, struct ifnet *);
void nd6_setmtu(struct ifnet *);
void nd6_llinfo_setstate(struct llentry *lle, int newstate);
void nd6_timer(void *);
void nd6_purge(struct ifnet *);
Implement interface link header precomputation API. Add if_requestencap() interface method which is capable of calculating various link headers for given interface. Right now there is support for INET/INET6/ARP llheader calculation (IFENCAP_LL type request). Other types are planned to support more complex calculation (L2 multipath lagg nexthops, tunnel encap nexthops, etc..). Reshape 'struct route' to be able to pass additional data (with is length) to prepend to mbuf. These two changes permits routing code to pass pre-calculated nexthop data (like L2 header for route w/gateway) down to the stack eliminating the need for other lookups. It also brings us closer to more complex scenarios like transparently handling MPLS nexthops and tunnel interfaces. Last, but not least, it removes layering violation introduced by flowtable code (ro_lle) and simplifies handling of existing if_output consumers. ARP/ND changes: Make arp/ndp stack pre-calculate link header upon installing/updating lle record. Interface link address change are handled by re-calculating headers for all lles based on if_lladdr event. After these changes, arpresolve()/nd6_resolve() returns full pre-calculated header for supported interfaces thus simplifying if_output(). Move these lookups to separate ether_resolve_addr() function which ether returs error or fully-prepared link header. Add <arp|nd6_>resolve_addr() compat versions to return link addresses instead of pre-calculated data. BPF changes: Raw bpf writes occupied _two_ cases: AF_UNSPEC and pseudo_AF_HDRCMPLT. Despite the naming, both of there have ther header "complete". The only difference is that interface source mac has to be filled by OS for AF_UNSPEC (controlled via BIOCGHDRCMPLT). This logic has to stay inside BPF and not pollute if_output() routines. Convert BPF to pass prepend data via new 'struct route' mechanism. Note that it does not change non-optimized if_output(): ro_prepend handling is purely optional. Side note: hackish pseudo_AF_HDRCMPLT is supported for ethernet and FDDI. It is not needed for ethernet anymore. The only remaining FDDI user is dev/pdq mostly untouched since 2007. FDDI support was eliminated from OpenBSD in 2013 (sys/net/if_fddisubr.c rev 1.65). Flowtable changes: Flowtable violates layering by saving (and not correctly managing) rtes/lles. Instead of passing lle pointer, pass pointer to pre-calculated header data from that lle. Differential Revision: https://reviews.freebsd.org/D4102
2015-12-31 05:03:27 +00:00
int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
char *desten, uint32_t *pflags);
Simplify the way of attaching IPv6 link-layer header. Problem description: How do we currently perform layer 2 resolution and header imposition: For IPv4 we have the following chain: ip_output() -> (ether|atm|whatever)_output() -> arpresolve() Lookup is done in proper place (link-layer output routine) and it is possible to provide cached lle data. For IPv6 situation is more complex: ip6_output() -> nd6_output() -> nd6_output_ifp() -> (whatever)_output() -> nd6_storelladdr() We have ip6_ouput() which calls nd6_output() instead of link output routine. nd6_output() does the following: * checks if lle exists, creates it if needed (similar to arpresolve()) * performes lle state transitions (similar to arpresolve()) * calls nd6_output_ifp() which pushes packets to link output routine along with running SeND/MAC hooks regardless of lle state (e.g. works as run-hooks placeholder). After that, iface output routine like ether_output() calls nd6_storelladdr() which performs lle lookup once again. As a result, we perform lookup twice for each outgoing packet for most types of interfaces. We also need to maintain runtime-checked table of 'nd6-free' interfaces (see nd6_need_cache()). Fix this behavior by eliminating first ND lookup. To be more specific: * make all nd6_output() consumers use nd6_output_ifp() instead * rename nd6_output[_slow]() to nd6_resolve_[slow]() * convert nd6_resolve() and nd6_resolve_slow() to arpresolve() semantics, e.g. copy L2 address to buffer instead of pushing packet towards lower layers * Make all nd6_storelladdr() users use nd6_resolve() * eliminate nd6_storelladdr() The resulting callchain is the following: ip6_output() -> nd6_output_ifp() -> (whatever)_output() -> nd6_resolve() Error handling: Currently sending packet to non-existing la results in ip6_<output|forward> -> nd6_output() -> nd6_output _lle() which returns 0. In new scenario packet is propagated to <ether|whatever>_output() -> nd6_resolve() which will return EWOULDBLOCK, and that result will be converted to 0. (And EWOULDBLOCK is actually used by IB/TOE code). Sponsored by: Yandex LLC Differential Revision: https://reviews.freebsd.org/D1469
2015-09-16 14:26:28 +00:00
int nd6_resolve(struct ifnet *, int, struct mbuf *,
const struct sockaddr *, u_char *, uint32_t *, struct llentry **);
int nd6_ioctl(u_long, caddr_t, struct ifnet *);
void nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
char *, int, int, int);
void nd6_grab_holdchain(struct llentry *, struct mbuf **,
struct sockaddr_in6 *);
int nd6_flush_holdchain(struct ifnet *, struct mbuf *,
struct sockaddr_in6 *);
int nd6_add_ifa_lle(struct in6_ifaddr *);
void nd6_rem_ifa_lle(struct in6_ifaddr *, int);
Simplify the way of attaching IPv6 link-layer header. Problem description: How do we currently perform layer 2 resolution and header imposition: For IPv4 we have the following chain: ip_output() -> (ether|atm|whatever)_output() -> arpresolve() Lookup is done in proper place (link-layer output routine) and it is possible to provide cached lle data. For IPv6 situation is more complex: ip6_output() -> nd6_output() -> nd6_output_ifp() -> (whatever)_output() -> nd6_storelladdr() We have ip6_ouput() which calls nd6_output() instead of link output routine. nd6_output() does the following: * checks if lle exists, creates it if needed (similar to arpresolve()) * performes lle state transitions (similar to arpresolve()) * calls nd6_output_ifp() which pushes packets to link output routine along with running SeND/MAC hooks regardless of lle state (e.g. works as run-hooks placeholder). After that, iface output routine like ether_output() calls nd6_storelladdr() which performs lle lookup once again. As a result, we perform lookup twice for each outgoing packet for most types of interfaces. We also need to maintain runtime-checked table of 'nd6-free' interfaces (see nd6_need_cache()). Fix this behavior by eliminating first ND lookup. To be more specific: * make all nd6_output() consumers use nd6_output_ifp() instead * rename nd6_output[_slow]() to nd6_resolve_[slow]() * convert nd6_resolve() and nd6_resolve_slow() to arpresolve() semantics, e.g. copy L2 address to buffer instead of pushing packet towards lower layers * Make all nd6_storelladdr() users use nd6_resolve() * eliminate nd6_storelladdr() The resulting callchain is the following: ip6_output() -> nd6_output_ifp() -> (whatever)_output() -> nd6_resolve() Error handling: Currently sending packet to non-existing la results in ip6_<output|forward> -> nd6_output() -> nd6_output _lle() which returns 0. In new scenario packet is propagated to <ether|whatever>_output() -> nd6_resolve() which will return EWOULDBLOCK, and that result will be converted to 0. (And EWOULDBLOCK is actually used by IB/TOE code). Sponsored by: Yandex LLC Differential Revision: https://reviews.freebsd.org/D1469
2015-09-16 14:26:28 +00:00
int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *,
struct sockaddr_in6 *, struct route *);
struct rib_head;
struct rib_cmd_info;
void nd6_subscription_cb(struct rib_head *rnh, struct rib_cmd_info *rc,
void *arg);
/* nd6_nbr.c */
void nd6_na_input(struct mbuf *, int, int);
void nd6_na_output(struct ifnet *, const struct in6_addr *,
const struct in6_addr *, u_long, int, struct sockaddr *);
void nd6_ns_input(struct mbuf *, int, int);
void nd6_ns_output(struct ifnet *, const struct in6_addr *,
const struct in6_addr *, const struct in6_addr *, uint8_t *);
caddr_t nd6_ifptomac(struct ifnet *);
void nd6_dad_init(void);
void nd6_dad_start(struct ifaddr *, int);
void nd6_dad_stop(struct ifaddr *);
/* nd6_rtr.c */
void nd6_rs_input(struct mbuf *, int, int);
void nd6_ra_input(struct mbuf *, int, int);
void nd6_ifnet_link_event(void *, struct ifnet *, int);
Convert rtentry field accesses into nhop field accesses. One of the goals of the new routing KPI defined in r359823 is to entirely hide`struct rtentry` from the consumers. It will allow to improve routing subsystem internals and deliver more features much faster. This commit is mostly mechanical change to eliminate direct struct rtentry field accesses. The only notable difference is AF_LINK gateway encoding. AF_LINK gw is used in routing stack for operations with interface routes and host loopback routes. In the former case it indicates _some_ non-NULL gateway, as the interface is the same as in rt_ifp in kernel and rtm_ifindex in rtsock reporting. In the latter case the interface index inside gateway was used by the IPv6 datapath to verify address scope for link-local interfaces. Kernel uses struct sockaddr_dl for this type of gateway. This structure allows for specifying rich interface data, such as mac address and interface name. However, this results in relatively large structure size - 52 bytes. Routing stack fils in only 2 fields - sdl_index and sdl_type, which reside in the first 8 bytes of the structure. In the new KPI, struct nhop_object tries to be cache-efficient, hence embodies gateway address inside the structure. In the AF_LINK case it stores stortened version of the structure - struct sockaddr_dl_short, which occupies 16 bytes. After D24340 changes, the data inside AF_LINK gateway will not be used in the kernel at all, leaving rtsock as the only potential concern. The difference in rtsock reporting: (old) got message of size 240 on Thu Apr 16 03:12:13 2020 RTM_ADD: Add Route: len 240, pid: 0, seq 0, errno 0, flags:<UP,DONE,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 link#5 255.255.255.0 (new) got message of size 200 on Sun Apr 19 09:46:32 2020 RTM_ADD: Add Route: len 200, pid: 0, seq 0, errno 0, flags:<UP,DONE,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 link#5 255.255.255.0 Note 40 bytes different (52-16 + alignment). However, gateway is still a valid AF_LINK gateway with proper data filled in. It is worth noting that these particular messages (interface routes) are mostly ignored by routing daemons: * bird/quagga/frr uses RTM_NEWADDR and ignores prefix route addition messages. * quagga/frr ignores routes without gateway More detailed overview on how rtsock messages are used by the routing daemons to reconstruct the kernel view, can be found in D22974. Differential Revision: https://reviews.freebsd.org/D24519
2020-04-23 08:04:20 +00:00
struct nd_defrouter *defrouter_lookup(const struct in6_addr *, struct ifnet *);
struct nd_defrouter *defrouter_lookup_locked(const struct in6_addr *,
struct ifnet *);
void defrouter_reset(void);
Constrain IPv6 routes to single FIBs when net.add_addr_allfibs=0 sys/netinet6/icmp6.c Use the interface's FIB for source address selection in ICMPv6 error responses. sys/netinet6/in6.c In in6_newaddrmsg, announce arrival of local addresses on the interface's FIB only. In in6_lltable_rtcheck, use a per-fib ND6 cache instead of a single cache. sys/netinet6/in6_src.c In in6_selectsrc, use the caller's fib instead of the default fib. In in6_selectsrc_socket, remove a superfluous check. sys/netinet6/nd6.c In nd6_lle_event, use the interface's fib for routing socket messages. In nd6_is_new_addr_neighbor, check all FIBs when trying to determine whether an address is a neighbor. Also, simplify the code for point to point interfaces. sys/netinet6/nd6.h sys/netinet6/nd6.c sys/netinet6/nd6_rtr.c Make defrouter_select fib-aware, and make all of its callers pass in the interface fib. sys/netinet6/nd6_nbr.c When inputting a Neighbor Solicitation packet, consider the interface fib instead of the default fib for DAD. Output NS and Neighbor Advertisement packets on the correct fib. sys/netinet6/nd6_rtr.c Allow installing the same host route on different interfaces in different FIBs. If rt_add_addr_allfibs=0, only install or delete the prefix route on the interface fib. tests/sys/netinet/fibs_test.sh Clear some expected failures, but add a skip for the newly revealed BUG217871. PR: 196361 Submitted by: Erick Turnquist <jhujhiti@adjectivism.org> Reported by: Jason Healy <jhealy@logn.net> Reviewed by: asomers MFC after: 3 weeks Sponsored by: Spectra Logic Corp Differential Revision: https://reviews.freebsd.org/D9451
2017-03-17 16:50:37 +00:00
void defrouter_select_fib(int fibnum);
void defrouter_rele(struct nd_defrouter *);
bool defrouter_remove(struct in6_addr *, struct ifnet *);
bool nd6_defrouter_list_empty(void);
void nd6_defrouter_flush_all(void);
void nd6_defrouter_purge(struct ifnet *);
void nd6_defrouter_timer(void);
void nd6_defrouter_init(void);
int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *,
struct nd_prefix **);
void nd6_prefix_unlink(struct nd_prefix *, struct nd_prhead *);
void nd6_prefix_del(struct nd_prefix *);
void nd6_prefix_ref(struct nd_prefix *);
void nd6_prefix_rele(struct nd_prefix *);
int nd6_prefix_offlink(struct nd_prefix *);
void pfxlist_onlink_check(void);
struct nd_prefix *nd6_prefix_lookup(struct nd_prefixctl *);
void rt6_flush(struct in6_addr *, struct ifnet *);
int nd6_setdefaultiface(int);
int in6_tmpifadd(const struct in6_ifaddr *, int, int);
#endif /* _KERNEL */
#endif /* _NETINET6_ND6_H_ */