d7c5a620e2
Run on LLNW canaries and tested by pho@ gallatin: Using a 14-core, 28-HTT single socket E5-2697 v3 with a 40GbE MLX5 based ConnectX 4-LX NIC, I see an almost 12% improvement in received packet rate, and a larger improvement in bytes delivered all the way to userspace. When the host receiving 64 streams of netperf -H $DUT -t UDP_STREAM -- -m 1, I see, using nstat -I mce0 1 before the patch: InMpps OMpps InGbs OGbs err TCP Est %CPU syscalls csw irq GBfree 4.98 0.00 4.42 0.00 4235592 33 83.80 4720653 2149771 1235 247.32 4.73 0.00 4.20 0.00 4025260 33 82.99 4724900 2139833 1204 247.32 4.72 0.00 4.20 0.00 4035252 33 82.14 4719162 2132023 1264 247.32 4.71 0.00 4.21 0.00 4073206 33 83.68 4744973 2123317 1347 247.32 4.72 0.00 4.21 0.00 4061118 33 80.82 4713615 2188091 1490 247.32 4.72 0.00 4.21 0.00 4051675 33 85.29 4727399 2109011 1205 247.32 4.73 0.00 4.21 0.00 4039056 33 84.65 4724735 2102603 1053 247.32 After the patch InMpps OMpps InGbs OGbs err TCP Est %CPU syscalls csw irq GBfree 5.43 0.00 4.20 0.00 3313143 33 84.96 5434214 1900162 2656 245.51 5.43 0.00 4.20 0.00 3308527 33 85.24 5439695 1809382 2521 245.51 5.42 0.00 4.19 0.00 3316778 33 87.54 5416028 1805835 2256 245.51 5.42 0.00 4.19 0.00 3317673 33 90.44 5426044 1763056 2332 245.51 5.42 0.00 4.19 0.00 3314839 33 88.11 5435732 1792218 2499 245.52 5.44 0.00 4.19 0.00 3293228 33 91.84 5426301 1668597 2121 245.52 Similarly, netperf reports 230Mb/s before the patch, and 270Mb/s after the patch Reviewed by: gallatin Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D15366
905 lines
24 KiB
C
905 lines
24 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
*
|
|
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the name of the project nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $KAME: in6_ifattach.c,v 1.118 2001/05/24 07:44:00 itojun Exp $
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/sockio.h>
|
|
#include <sys/jail.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/rmlock.h>
|
|
#include <sys/syslog.h>
|
|
#include <sys/md5.h>
|
|
|
|
#include <net/if.h>
|
|
#include <net/if_var.h>
|
|
#include <net/if_dl.h>
|
|
#include <net/if_types.h>
|
|
#include <net/route.h>
|
|
#include <net/vnet.h>
|
|
|
|
#include <netinet/in.h>
|
|
#include <netinet/in_var.h>
|
|
#include <netinet/if_ether.h>
|
|
#include <netinet/in_pcb.h>
|
|
#include <netinet/ip_var.h>
|
|
#include <netinet/udp.h>
|
|
#include <netinet/udp_var.h>
|
|
|
|
#include <netinet/ip6.h>
|
|
#include <netinet6/ip6_var.h>
|
|
#include <netinet6/in6_var.h>
|
|
#include <netinet6/in6_pcb.h>
|
|
#include <netinet6/in6_ifattach.h>
|
|
#include <netinet6/ip6_var.h>
|
|
#include <netinet6/nd6.h>
|
|
#include <netinet6/mld6_var.h>
|
|
#include <netinet6/scope6_var.h>
|
|
|
|
VNET_DEFINE(unsigned long, in6_maxmtu) = 0;
|
|
|
|
#ifdef IP6_AUTO_LINKLOCAL
|
|
VNET_DEFINE(int, ip6_auto_linklocal) = IP6_AUTO_LINKLOCAL;
|
|
#else
|
|
VNET_DEFINE(int, ip6_auto_linklocal) = 1; /* enabled by default */
|
|
#endif
|
|
|
|
VNET_DEFINE(struct callout, in6_tmpaddrtimer_ch);
|
|
#define V_in6_tmpaddrtimer_ch VNET(in6_tmpaddrtimer_ch)
|
|
|
|
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
|
|
#define V_ripcbinfo VNET(ripcbinfo)
|
|
|
|
static int get_rand_ifid(struct ifnet *, struct in6_addr *);
|
|
static int generate_tmp_ifid(u_int8_t *, const u_int8_t *, u_int8_t *);
|
|
static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *);
|
|
static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *);
|
|
static int in6_ifattach_loopback(struct ifnet *);
|
|
static void in6_purgemaddrs(struct ifnet *);
|
|
|
|
#define EUI64_GBIT 0x01
|
|
#define EUI64_UBIT 0x02
|
|
#define EUI64_TO_IFID(in6) do {(in6)->s6_addr[8] ^= EUI64_UBIT; } while (0)
|
|
#define EUI64_GROUP(in6) ((in6)->s6_addr[8] & EUI64_GBIT)
|
|
#define EUI64_INDIVIDUAL(in6) (!EUI64_GROUP(in6))
|
|
#define EUI64_LOCAL(in6) ((in6)->s6_addr[8] & EUI64_UBIT)
|
|
#define EUI64_UNIVERSAL(in6) (!EUI64_LOCAL(in6))
|
|
|
|
#define IFID_LOCAL(in6) (!EUI64_LOCAL(in6))
|
|
#define IFID_UNIVERSAL(in6) (!EUI64_UNIVERSAL(in6))
|
|
|
|
/*
|
|
* Generate a last-resort interface identifier, when the machine has no
|
|
* IEEE802/EUI64 address sources.
|
|
* The goal here is to get an interface identifier that is
|
|
* (1) random enough and (2) does not change across reboot.
|
|
* We currently use MD5(hostname) for it.
|
|
*
|
|
* in6 - upper 64bits are preserved
|
|
*/
|
|
static int
|
|
get_rand_ifid(struct ifnet *ifp, struct in6_addr *in6)
|
|
{
|
|
MD5_CTX ctxt;
|
|
struct prison *pr;
|
|
u_int8_t digest[16];
|
|
int hostnamelen;
|
|
|
|
pr = curthread->td_ucred->cr_prison;
|
|
mtx_lock(&pr->pr_mtx);
|
|
hostnamelen = strlen(pr->pr_hostname);
|
|
#if 0
|
|
/* we need at least several letters as seed for ifid */
|
|
if (hostnamelen < 3) {
|
|
mtx_unlock(&pr->pr_mtx);
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
/* generate 8 bytes of pseudo-random value. */
|
|
bzero(&ctxt, sizeof(ctxt));
|
|
MD5Init(&ctxt);
|
|
MD5Update(&ctxt, pr->pr_hostname, hostnamelen);
|
|
mtx_unlock(&pr->pr_mtx);
|
|
MD5Final(digest, &ctxt);
|
|
|
|
/* assumes sizeof(digest) > sizeof(ifid) */
|
|
bcopy(digest, &in6->s6_addr[8], 8);
|
|
|
|
/* make sure to set "u" bit to local, and "g" bit to individual. */
|
|
in6->s6_addr[8] &= ~EUI64_GBIT; /* g bit to "individual" */
|
|
in6->s6_addr[8] |= EUI64_UBIT; /* u bit to "local" */
|
|
|
|
/* convert EUI64 into IPv6 interface identifier */
|
|
EUI64_TO_IFID(in6);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
generate_tmp_ifid(u_int8_t *seed0, const u_int8_t *seed1, u_int8_t *ret)
|
|
{
|
|
MD5_CTX ctxt;
|
|
u_int8_t seed[16], digest[16], nullbuf[8];
|
|
u_int32_t val32;
|
|
|
|
/* If there's no history, start with a random seed. */
|
|
bzero(nullbuf, sizeof(nullbuf));
|
|
if (bcmp(nullbuf, seed0, sizeof(nullbuf)) == 0) {
|
|
int i;
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
val32 = arc4random();
|
|
bcopy(&val32, seed + sizeof(val32) * i, sizeof(val32));
|
|
}
|
|
} else
|
|
bcopy(seed0, seed, 8);
|
|
|
|
/* copy the right-most 64-bits of the given address */
|
|
/* XXX assumption on the size of IFID */
|
|
bcopy(seed1, &seed[8], 8);
|
|
|
|
if (0) { /* for debugging purposes only */
|
|
int i;
|
|
|
|
printf("generate_tmp_ifid: new randomized ID from: ");
|
|
for (i = 0; i < 16; i++)
|
|
printf("%02x", seed[i]);
|
|
printf(" ");
|
|
}
|
|
|
|
/* generate 16 bytes of pseudo-random value. */
|
|
bzero(&ctxt, sizeof(ctxt));
|
|
MD5Init(&ctxt);
|
|
MD5Update(&ctxt, seed, sizeof(seed));
|
|
MD5Final(digest, &ctxt);
|
|
|
|
/*
|
|
* RFC 3041 3.2.1. (3)
|
|
* Take the left-most 64-bits of the MD5 digest and set bit 6 (the
|
|
* left-most bit is numbered 0) to zero.
|
|
*/
|
|
bcopy(digest, ret, 8);
|
|
ret[0] &= ~EUI64_UBIT;
|
|
|
|
/*
|
|
* XXX: we'd like to ensure that the generated value is not zero
|
|
* for simplicity. If the caclculated digest happens to be zero,
|
|
* use a random non-zero value as the last resort.
|
|
*/
|
|
if (bcmp(nullbuf, ret, sizeof(nullbuf)) == 0) {
|
|
nd6log((LOG_INFO,
|
|
"generate_tmp_ifid: computed MD5 value is zero.\n"));
|
|
|
|
val32 = arc4random();
|
|
val32 = 1 + (val32 % (0xffffffff - 1));
|
|
}
|
|
|
|
/*
|
|
* RFC 3041 3.2.1. (4)
|
|
* Take the rightmost 64-bits of the MD5 digest and save them in
|
|
* stable storage as the history value to be used in the next
|
|
* iteration of the algorithm.
|
|
*/
|
|
bcopy(&digest[8], seed0, 8);
|
|
|
|
if (0) { /* for debugging purposes only */
|
|
int i;
|
|
|
|
printf("to: ");
|
|
for (i = 0; i < 16; i++)
|
|
printf("%02x", digest[i]);
|
|
printf("\n");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Get interface identifier for the specified interface.
|
|
* XXX assumes single sockaddr_dl (AF_LINK address) per an interface
|
|
*
|
|
* in6 - upper 64bits are preserved
|
|
*/
|
|
int
|
|
in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6)
|
|
{
|
|
struct ifaddr *ifa;
|
|
struct sockaddr_dl *sdl;
|
|
u_int8_t *addr;
|
|
size_t addrlen;
|
|
static u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
|
|
static u_int8_t allone[8] =
|
|
{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
|
|
|
|
IF_ADDR_RLOCK(ifp);
|
|
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
|
|
if (ifa->ifa_addr->sa_family != AF_LINK)
|
|
continue;
|
|
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
|
|
if (sdl == NULL)
|
|
continue;
|
|
if (sdl->sdl_alen == 0)
|
|
continue;
|
|
|
|
goto found;
|
|
}
|
|
IF_ADDR_RUNLOCK(ifp);
|
|
|
|
return -1;
|
|
|
|
found:
|
|
IF_ADDR_LOCK_ASSERT(ifp);
|
|
addr = LLADDR(sdl);
|
|
addrlen = sdl->sdl_alen;
|
|
|
|
/* get EUI64 */
|
|
switch (ifp->if_type) {
|
|
case IFT_BRIDGE:
|
|
case IFT_ETHER:
|
|
case IFT_L2VLAN:
|
|
case IFT_ATM:
|
|
case IFT_IEEE1394:
|
|
/* IEEE802/EUI64 cases - what others? */
|
|
/* IEEE1394 uses 16byte length address starting with EUI64 */
|
|
if (addrlen > 8)
|
|
addrlen = 8;
|
|
|
|
/* look at IEEE802/EUI64 only */
|
|
if (addrlen != 8 && addrlen != 6) {
|
|
IF_ADDR_RUNLOCK(ifp);
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* check for invalid MAC address - on bsdi, we see it a lot
|
|
* since wildboar configures all-zero MAC on pccard before
|
|
* card insertion.
|
|
*/
|
|
if (bcmp(addr, allzero, addrlen) == 0) {
|
|
IF_ADDR_RUNLOCK(ifp);
|
|
return -1;
|
|
}
|
|
if (bcmp(addr, allone, addrlen) == 0) {
|
|
IF_ADDR_RUNLOCK(ifp);
|
|
return -1;
|
|
}
|
|
|
|
/* make EUI64 address */
|
|
if (addrlen == 8)
|
|
bcopy(addr, &in6->s6_addr[8], 8);
|
|
else if (addrlen == 6) {
|
|
in6->s6_addr[8] = addr[0];
|
|
in6->s6_addr[9] = addr[1];
|
|
in6->s6_addr[10] = addr[2];
|
|
in6->s6_addr[11] = 0xff;
|
|
in6->s6_addr[12] = 0xfe;
|
|
in6->s6_addr[13] = addr[3];
|
|
in6->s6_addr[14] = addr[4];
|
|
in6->s6_addr[15] = addr[5];
|
|
}
|
|
break;
|
|
|
|
case IFT_GIF:
|
|
case IFT_STF:
|
|
/*
|
|
* RFC2893 says: "SHOULD use IPv4 address as ifid source".
|
|
* however, IPv4 address is not very suitable as unique
|
|
* identifier source (can be renumbered).
|
|
* we don't do this.
|
|
*/
|
|
IF_ADDR_RUNLOCK(ifp);
|
|
return -1;
|
|
|
|
default:
|
|
IF_ADDR_RUNLOCK(ifp);
|
|
return -1;
|
|
}
|
|
|
|
/* sanity check: g bit must not indicate "group" */
|
|
if (EUI64_GROUP(in6)) {
|
|
IF_ADDR_RUNLOCK(ifp);
|
|
return -1;
|
|
}
|
|
|
|
/* convert EUI64 into IPv6 interface identifier */
|
|
EUI64_TO_IFID(in6);
|
|
|
|
/*
|
|
* sanity check: ifid must not be all zero, avoid conflict with
|
|
* subnet router anycast
|
|
*/
|
|
if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 &&
|
|
bcmp(&in6->s6_addr[9], allzero, 7) == 0) {
|
|
IF_ADDR_RUNLOCK(ifp);
|
|
return -1;
|
|
}
|
|
|
|
IF_ADDR_RUNLOCK(ifp);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Get interface identifier for the specified interface. If it is not
|
|
* available on ifp0, borrow interface identifier from other information
|
|
* sources.
|
|
*
|
|
* altifp - secondary EUI64 source
|
|
*/
|
|
static int
|
|
get_ifid(struct ifnet *ifp0, struct ifnet *altifp,
|
|
struct in6_addr *in6)
|
|
{
|
|
struct ifnet *ifp;
|
|
|
|
/* first, try to get it from the interface itself */
|
|
if (in6_get_hw_ifid(ifp0, in6) == 0) {
|
|
nd6log((LOG_DEBUG, "%s: got interface identifier from itself\n",
|
|
if_name(ifp0)));
|
|
goto success;
|
|
}
|
|
|
|
/* try secondary EUI64 source. this basically is for ATM PVC */
|
|
if (altifp && in6_get_hw_ifid(altifp, in6) == 0) {
|
|
nd6log((LOG_DEBUG, "%s: got interface identifier from %s\n",
|
|
if_name(ifp0), if_name(altifp)));
|
|
goto success;
|
|
}
|
|
|
|
/* next, try to get it from some other hardware interface */
|
|
IFNET_RLOCK_NOSLEEP();
|
|
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
|
|
if (ifp == ifp0)
|
|
continue;
|
|
if (in6_get_hw_ifid(ifp, in6) != 0)
|
|
continue;
|
|
|
|
/*
|
|
* to borrow ifid from other interface, ifid needs to be
|
|
* globally unique
|
|
*/
|
|
if (IFID_UNIVERSAL(in6)) {
|
|
nd6log((LOG_DEBUG,
|
|
"%s: borrow interface identifier from %s\n",
|
|
if_name(ifp0), if_name(ifp)));
|
|
IFNET_RUNLOCK_NOSLEEP();
|
|
goto success;
|
|
}
|
|
}
|
|
IFNET_RUNLOCK_NOSLEEP();
|
|
|
|
/* last resort: get from random number source */
|
|
if (get_rand_ifid(ifp, in6) == 0) {
|
|
nd6log((LOG_DEBUG,
|
|
"%s: interface identifier generated by random number\n",
|
|
if_name(ifp0)));
|
|
goto success;
|
|
}
|
|
|
|
printf("%s: failed to get interface identifier\n", if_name(ifp0));
|
|
return -1;
|
|
|
|
success:
|
|
nd6log((LOG_INFO, "%s: ifid: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n",
|
|
if_name(ifp0), in6->s6_addr[8], in6->s6_addr[9], in6->s6_addr[10],
|
|
in6->s6_addr[11], in6->s6_addr[12], in6->s6_addr[13],
|
|
in6->s6_addr[14], in6->s6_addr[15]));
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* altifp - secondary EUI64 source
|
|
*/
|
|
static int
|
|
in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp)
|
|
{
|
|
struct in6_ifaddr *ia;
|
|
struct in6_aliasreq ifra;
|
|
struct nd_prefixctl pr0;
|
|
struct nd_prefix *pr;
|
|
int error;
|
|
|
|
/*
|
|
* configure link-local address.
|
|
*/
|
|
in6_prepare_ifra(&ifra, NULL, &in6mask64);
|
|
|
|
ifra.ifra_addr.sin6_addr.s6_addr32[0] = htonl(0xfe800000);
|
|
ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0;
|
|
if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
|
|
ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0;
|
|
ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1);
|
|
} else {
|
|
if (get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr) != 0) {
|
|
nd6log((LOG_ERR,
|
|
"%s: no ifid available\n", if_name(ifp)));
|
|
return (-1);
|
|
}
|
|
}
|
|
if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL))
|
|
return (-1);
|
|
|
|
/* link-local addresses should NEVER expire. */
|
|
ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
|
|
ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
|
|
|
|
/*
|
|
* Now call in6_update_ifa() to do a bunch of procedures to configure
|
|
* a link-local address. We can set the 3rd argument to NULL, because
|
|
* we know there's no other link-local address on the interface
|
|
* and therefore we are adding one (instead of updating one).
|
|
*/
|
|
if ((error = in6_update_ifa(ifp, &ifra, NULL,
|
|
IN6_IFAUPDATE_DADDELAY)) != 0) {
|
|
/*
|
|
* XXX: When the interface does not support IPv6, this call
|
|
* would fail in the SIOCSIFADDR ioctl. I believe the
|
|
* notification is rather confusing in this case, so just
|
|
* suppress it. (jinmei@kame.net 20010130)
|
|
*/
|
|
if (error != EAFNOSUPPORT)
|
|
nd6log((LOG_NOTICE, "in6_ifattach_linklocal: failed to "
|
|
"configure a link-local address on %s "
|
|
"(errno=%d)\n",
|
|
if_name(ifp), error));
|
|
return (-1);
|
|
}
|
|
|
|
ia = in6ifa_ifpforlinklocal(ifp, 0); /* ia must not be NULL */
|
|
KASSERT(ia != NULL, ("%s: ia == NULL, ifp=%p", __func__, ifp));
|
|
|
|
ifa_free(&ia->ia_ifa);
|
|
|
|
/*
|
|
* Make the link-local prefix (fe80::%link/64) as on-link.
|
|
* Since we'd like to manage prefixes separately from addresses,
|
|
* we make an ND6 prefix structure for the link-local prefix,
|
|
* and add it to the prefix list as a never-expire prefix.
|
|
* XXX: this change might affect some existing code base...
|
|
*/
|
|
bzero(&pr0, sizeof(pr0));
|
|
pr0.ndpr_ifp = ifp;
|
|
/* this should be 64 at this moment. */
|
|
pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL);
|
|
pr0.ndpr_prefix = ifra.ifra_addr;
|
|
/* apply the mask for safety. (nd6_prelist_add will apply it again) */
|
|
IN6_MASK_ADDR(&pr0.ndpr_prefix.sin6_addr, &in6mask64);
|
|
/*
|
|
* Initialize parameters. The link-local prefix must always be
|
|
* on-link, and its lifetimes never expire.
|
|
*/
|
|
pr0.ndpr_raf_onlink = 1;
|
|
pr0.ndpr_raf_auto = 1; /* probably meaningless */
|
|
pr0.ndpr_vltime = ND6_INFINITE_LIFETIME;
|
|
pr0.ndpr_pltime = ND6_INFINITE_LIFETIME;
|
|
/*
|
|
* Since there is no other link-local addresses, nd6_prefix_lookup()
|
|
* probably returns NULL. However, we cannot always expect the result.
|
|
* For example, if we first remove the (only) existing link-local
|
|
* address, and then reconfigure another one, the prefix is still
|
|
* valid with referring to the old link-local address.
|
|
*/
|
|
if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
|
|
if ((error = nd6_prelist_add(&pr0, NULL, NULL)) != 0)
|
|
return (error);
|
|
} else
|
|
nd6_prefix_rele(pr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* ifp - must be IFT_LOOP
|
|
*/
|
|
static int
|
|
in6_ifattach_loopback(struct ifnet *ifp)
|
|
{
|
|
struct in6_aliasreq ifra;
|
|
int error;
|
|
|
|
in6_prepare_ifra(&ifra, &in6addr_loopback, &in6mask128);
|
|
|
|
/*
|
|
* Always initialize ia_dstaddr (= broadcast address) to loopback
|
|
* address. Follows IPv4 practice - see in_ifinit().
|
|
*/
|
|
ifra.ifra_dstaddr.sin6_len = sizeof(struct sockaddr_in6);
|
|
ifra.ifra_dstaddr.sin6_family = AF_INET6;
|
|
ifra.ifra_dstaddr.sin6_addr = in6addr_loopback;
|
|
|
|
/* the loopback address should NEVER expire. */
|
|
ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME;
|
|
ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME;
|
|
|
|
/*
|
|
* We are sure that this is a newly assigned address, so we can set
|
|
* NULL to the 3rd arg.
|
|
*/
|
|
if ((error = in6_update_ifa(ifp, &ifra, NULL, 0)) != 0) {
|
|
nd6log((LOG_ERR, "in6_ifattach_loopback: failed to configure "
|
|
"the loopback address on %s (errno=%d)\n",
|
|
if_name(ifp), error));
|
|
return (-1);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* compute NI group address, based on the current hostname setting.
|
|
* see RFC 4620.
|
|
*
|
|
* when ifp == NULL, the caller is responsible for filling scopeid.
|
|
*
|
|
* If oldmcprefix == 1, FF02:0:0:0:0:2::/96 is used for NI group address
|
|
* while it is FF02:0:0:0:0:2:FF00::/104 in RFC 4620.
|
|
*/
|
|
static int
|
|
in6_nigroup0(struct ifnet *ifp, const char *name, int namelen,
|
|
struct in6_addr *in6, int oldmcprefix)
|
|
{
|
|
struct prison *pr;
|
|
const char *p;
|
|
u_char *q;
|
|
MD5_CTX ctxt;
|
|
u_int8_t digest[16];
|
|
char l;
|
|
char n[64]; /* a single label must not exceed 63 chars */
|
|
|
|
/*
|
|
* If no name is given and namelen is -1,
|
|
* we try to do the hostname lookup ourselves.
|
|
*/
|
|
if (!name && namelen == -1) {
|
|
pr = curthread->td_ucred->cr_prison;
|
|
mtx_lock(&pr->pr_mtx);
|
|
name = pr->pr_hostname;
|
|
namelen = strlen(name);
|
|
} else
|
|
pr = NULL;
|
|
if (!name || !namelen) {
|
|
if (pr != NULL)
|
|
mtx_unlock(&pr->pr_mtx);
|
|
return -1;
|
|
}
|
|
|
|
p = name;
|
|
while (p && *p && *p != '.' && p - name < namelen)
|
|
p++;
|
|
if (p == name || p - name > sizeof(n) - 1) {
|
|
if (pr != NULL)
|
|
mtx_unlock(&pr->pr_mtx);
|
|
return -1; /* label too long */
|
|
}
|
|
l = p - name;
|
|
strncpy(n, name, l);
|
|
if (pr != NULL)
|
|
mtx_unlock(&pr->pr_mtx);
|
|
n[(int)l] = '\0';
|
|
for (q = n; *q; q++) {
|
|
if ('A' <= *q && *q <= 'Z')
|
|
*q = *q - 'A' + 'a';
|
|
}
|
|
|
|
/* generate 16 bytes of pseudo-random value. */
|
|
bzero(&ctxt, sizeof(ctxt));
|
|
MD5Init(&ctxt);
|
|
MD5Update(&ctxt, &l, sizeof(l));
|
|
MD5Update(&ctxt, n, l);
|
|
MD5Final(digest, &ctxt);
|
|
|
|
bzero(in6, sizeof(*in6));
|
|
in6->s6_addr16[0] = IPV6_ADDR_INT16_MLL;
|
|
in6->s6_addr8[11] = 2;
|
|
if (oldmcprefix == 0) {
|
|
in6->s6_addr8[12] = 0xff;
|
|
/* Copy the first 24 bits of 128-bit hash into the address. */
|
|
bcopy(digest, &in6->s6_addr8[13], 3);
|
|
} else {
|
|
/* Copy the first 32 bits of 128-bit hash into the address. */
|
|
bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3]));
|
|
}
|
|
if (in6_setscope(in6, ifp, NULL))
|
|
return (-1); /* XXX: should not fail */
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
|
|
struct in6_addr *in6)
|
|
{
|
|
|
|
return (in6_nigroup0(ifp, name, namelen, in6, 0));
|
|
}
|
|
|
|
int
|
|
in6_nigroup_oldmcprefix(struct ifnet *ifp, const char *name, int namelen,
|
|
struct in6_addr *in6)
|
|
{
|
|
|
|
return (in6_nigroup0(ifp, name, namelen, in6, 1));
|
|
}
|
|
|
|
/*
|
|
* XXX multiple loopback interface needs more care. for instance,
|
|
* nodelocal address needs to be configured onto only one of them.
|
|
* XXX multiple link-local address case
|
|
*
|
|
* altifp - secondary EUI64 source
|
|
*/
|
|
void
|
|
in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
|
|
{
|
|
struct in6_ifaddr *ia;
|
|
|
|
if (ifp->if_afdata[AF_INET6] == NULL)
|
|
return;
|
|
/*
|
|
* quirks based on interface type
|
|
*/
|
|
switch (ifp->if_type) {
|
|
case IFT_STF:
|
|
/*
|
|
* 6to4 interface is a very special kind of beast.
|
|
* no multicast, no linklocal. RFC2529 specifies how to make
|
|
* linklocals for 6to4 interface, but there's no use and
|
|
* it is rather harmful to have one.
|
|
*/
|
|
ND_IFINFO(ifp)->flags &= ~ND6_IFF_AUTO_LINKLOCAL;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* usually, we require multicast capability to the interface
|
|
*/
|
|
if ((ifp->if_flags & IFF_MULTICAST) == 0) {
|
|
nd6log((LOG_INFO, "in6_ifattach: "
|
|
"%s is not multicast capable, IPv6 not enabled\n",
|
|
if_name(ifp)));
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* assign loopback address for loopback interface.
|
|
*/
|
|
if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
|
|
/*
|
|
* check that loopback address doesn't exist yet.
|
|
*/
|
|
ia = in6ifa_ifwithaddr(&in6addr_loopback, 0);
|
|
if (ia == NULL)
|
|
in6_ifattach_loopback(ifp);
|
|
else
|
|
ifa_free(&ia->ia_ifa);
|
|
}
|
|
|
|
/*
|
|
* assign a link-local address, if there's none.
|
|
*/
|
|
if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
|
|
ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL) {
|
|
ia = in6ifa_ifpforlinklocal(ifp, 0);
|
|
if (ia == NULL)
|
|
in6_ifattach_linklocal(ifp, altifp);
|
|
else
|
|
ifa_free(&ia->ia_ifa);
|
|
}
|
|
|
|
/* update dynamically. */
|
|
if (V_in6_maxmtu < ifp->if_mtu)
|
|
V_in6_maxmtu = ifp->if_mtu;
|
|
}
|
|
|
|
/*
|
|
* NOTE: in6_ifdetach() does not support loopback if at this moment.
|
|
*
|
|
* When shutting down a VNET we clean up layers top-down. In that case
|
|
* upper layer protocols (ulp) are cleaned up already and locks are destroyed
|
|
* and we must not call into these cleanup functions anymore, thus purgeulp
|
|
* is set to 0 in that case by in6_ifdetach_destroy().
|
|
* The normal case of destroying a (cloned) interface still needs to cleanup
|
|
* everything related to the interface and will have purgeulp set to 1.
|
|
*/
|
|
static void
|
|
_in6_ifdetach(struct ifnet *ifp, int purgeulp)
|
|
{
|
|
struct ifaddr *ifa, *next;
|
|
|
|
if (ifp->if_afdata[AF_INET6] == NULL)
|
|
return;
|
|
|
|
/*
|
|
* nuke any of IPv6 addresses we have
|
|
* XXX: all addresses should be already removed
|
|
*/
|
|
CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
|
|
if (ifa->ifa_addr->sa_family != AF_INET6)
|
|
continue;
|
|
in6_purgeaddr(ifa);
|
|
}
|
|
if (purgeulp) {
|
|
in6_pcbpurgeif0(&V_udbinfo, ifp);
|
|
in6_pcbpurgeif0(&V_ulitecbinfo, ifp);
|
|
in6_pcbpurgeif0(&V_ripcbinfo, ifp);
|
|
}
|
|
/* leave from all multicast groups joined */
|
|
in6_purgemaddrs(ifp);
|
|
|
|
/*
|
|
* Remove neighbor management table.
|
|
* Enabling the nd6_purge will panic on vmove for interfaces on VNET
|
|
* teardown as the IPv6 layer is cleaned up already and the locks
|
|
* are destroyed.
|
|
*/
|
|
if (purgeulp)
|
|
nd6_purge(ifp);
|
|
}
|
|
|
|
void
|
|
in6_ifdetach(struct ifnet *ifp)
|
|
{
|
|
|
|
_in6_ifdetach(ifp, 1);
|
|
}
|
|
|
|
void
|
|
in6_ifdetach_destroy(struct ifnet *ifp)
|
|
{
|
|
|
|
_in6_ifdetach(ifp, 0);
|
|
}
|
|
|
|
int
|
|
in6_get_tmpifid(struct ifnet *ifp, u_int8_t *retbuf,
|
|
const u_int8_t *baseid, int generate)
|
|
{
|
|
u_int8_t nullbuf[8];
|
|
struct nd_ifinfo *ndi = ND_IFINFO(ifp);
|
|
|
|
bzero(nullbuf, sizeof(nullbuf));
|
|
if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) == 0) {
|
|
/* we've never created a random ID. Create a new one. */
|
|
generate = 1;
|
|
}
|
|
|
|
if (generate) {
|
|
bcopy(baseid, ndi->randomseed1, sizeof(ndi->randomseed1));
|
|
|
|
/* generate_tmp_ifid will update seedn and buf */
|
|
(void)generate_tmp_ifid(ndi->randomseed0, ndi->randomseed1,
|
|
ndi->randomid);
|
|
}
|
|
bcopy(ndi->randomid, retbuf, 8);
|
|
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
in6_tmpaddrtimer(void *arg)
|
|
{
|
|
CURVNET_SET((struct vnet *) arg);
|
|
struct nd_ifinfo *ndi;
|
|
u_int8_t nullbuf[8];
|
|
struct ifnet *ifp;
|
|
|
|
callout_reset(&V_in6_tmpaddrtimer_ch,
|
|
(V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
|
|
V_ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, curvnet);
|
|
|
|
bzero(nullbuf, sizeof(nullbuf));
|
|
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
|
|
if (ifp->if_afdata[AF_INET6] == NULL)
|
|
continue;
|
|
ndi = ND_IFINFO(ifp);
|
|
if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) {
|
|
/*
|
|
* We've been generating a random ID on this interface.
|
|
* Create a new one.
|
|
*/
|
|
(void)generate_tmp_ifid(ndi->randomseed0,
|
|
ndi->randomseed1, ndi->randomid);
|
|
}
|
|
}
|
|
|
|
CURVNET_RESTORE();
|
|
}
|
|
|
|
static void
|
|
in6_purgemaddrs(struct ifnet *ifp)
|
|
{
|
|
struct in6_multi_head purgeinms;
|
|
struct in6_multi *inm;
|
|
struct ifmultiaddr *ifma, *next;
|
|
|
|
SLIST_INIT(&purgeinms);
|
|
IN6_MULTI_LOCK();
|
|
IN6_MULTI_LIST_LOCK();
|
|
IF_ADDR_WLOCK(ifp);
|
|
/*
|
|
* Extract list of in6_multi associated with the detaching ifp
|
|
* which the PF_INET6 layer is about to release.
|
|
*/
|
|
restart:
|
|
CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) {
|
|
if (ifma->ifma_addr->sa_family != AF_INET6 ||
|
|
ifma->ifma_protospec == NULL)
|
|
continue;
|
|
inm = (struct in6_multi *)ifma->ifma_protospec;
|
|
in6m_rele_locked(&purgeinms, inm);
|
|
if (__predict_false(ifma6_restart)) {
|
|
ifma6_restart = false;
|
|
goto restart;
|
|
}
|
|
}
|
|
IF_ADDR_WUNLOCK(ifp);
|
|
mld_ifdetach(ifp);
|
|
IN6_MULTI_LIST_UNLOCK();
|
|
IN6_MULTI_UNLOCK();
|
|
in6m_release_list_deferred(&purgeinms);
|
|
}
|
|
|
|
void
|
|
in6_ifattach_destroy(void)
|
|
{
|
|
|
|
callout_drain(&V_in6_tmpaddrtimer_ch);
|
|
}
|
|
|
|
static void
|
|
in6_ifattach_init(void *dummy)
|
|
{
|
|
|
|
/* Timer for regeneranation of temporary addresses randomize ID. */
|
|
callout_init(&V_in6_tmpaddrtimer_ch, 0);
|
|
callout_reset(&V_in6_tmpaddrtimer_ch,
|
|
(V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
|
|
V_ip6_temp_regen_advance) * hz,
|
|
in6_tmpaddrtimer, curvnet);
|
|
}
|
|
|
|
/*
|
|
* Cheat.
|
|
* This must be after route_init(), which is now SI_ORDER_THIRD.
|
|
*/
|
|
SYSINIT(in6_ifattach_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE,
|
|
in6_ifattach_init, NULL);
|