freebsd-skq/sys/netinet/in.c

1567 lines
40 KiB
C
Raw Normal View History

/*-
1994-05-24 10:09:53 +00:00
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
* Copyright (C) 2001 WIDE Project. All rights reserved.
1994-05-24 10:09:53 +00:00
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in.c 8.4 (Berkeley) 1/9/95
1994-05-24 10:09:53 +00:00
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_mpath.h"
1994-05-24 10:09:53 +00:00
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sockio.h>
1994-05-24 10:09:53 +00:00
#include <sys/malloc.h>
#include <sys/priv.h>
1994-05-24 10:09:53 +00:00
#include <sys/socket.h>
#include <sys/jail.h>
1995-12-09 20:43:53 +00:00
#include <sys/kernel.h>
#include <sys/proc.h>
1995-12-09 20:43:53 +00:00
#include <sys/sysctl.h>
#include <sys/syslog.h>
1994-05-24 10:09:53 +00:00
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_arp.h>
#include <net/if_dl.h>
#include <net/if_llatbl.h>
#include <net/if_types.h>
1994-05-24 10:09:53 +00:00
#include <net/route.h>
#include <net/vnet.h>
1994-05-24 10:09:53 +00:00
#include <netinet/if_ether.h>
1994-05-24 10:09:53 +00:00
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_pcb.h>
Import rewrite of IPv4 socket multicast layer to support source-specific and protocol-independent host mode multicast. The code is written to accomodate IPv6, IGMPv3 and MLDv2 with only a little additional work. This change only pertains to FreeBSD's use as a multicast end-station and does not concern multicast routing; for an IGMPv3/MLDv2 router implementation, consider the XORP project. The work is based on Wilbert de Graaf's IGMPv3 code drop for FreeBSD 4.6, which is available at: http://www.kloosterhof.com/wilbert/igmpv3.html Summary * IPv4 multicast socket processing is now moved out of ip_output.c into a new module, in_mcast.c. * The in_mcast.c module implements the IPv4 legacy any-source API in terms of the protocol-independent source-specific API. * Source filters are lazy allocated as the common case does not use them. They are part of per inpcb state and are covered by the inpcb lock. * struct ip_mreqn is now supported to allow applications to specify multicast joins by interface index in the legacy IPv4 any-source API. * In UDP, an incoming multicast datagram only requires that the source port matches the 4-tuple if the socket was already bound by source port. An unbound socket SHOULD be able to receive multicasts sent from an ephemeral source port. * The UDP socket multicast filter mode defaults to exclusive, that is, sources present in the per-socket list will be blocked from delivery. * The RFC 3678 userland functions have been added to libc: setsourcefilter, getsourcefilter, setipv4sourcefilter, getipv4sourcefilter. * Definitions for IGMPv3 are merged but not yet used. * struct sockaddr_storage is now referenced from <netinet/in.h>. It is therefore defined there if not already declared in the same way as for the C99 types. * The RFC 1724 hack (specify 0.0.0.0/8 addresses to IP_MULTICAST_IF which are then interpreted as interface indexes) is now deprecated. * A patch for the Rhyolite.com routed in the FreeBSD base system is available in the -net archives. This only affects individuals running RIPv1 or RIPv2 via point-to-point and/or unnumbered interfaces. * Make IPv6 detach path similar to IPv4's in code flow; functionally same. * Bump __FreeBSD_version to 700048; see UPDATING. This work was financially supported by another FreeBSD committer. Obtained from: p4://bms_netdev Submitted by: Wilbert de Graaf (original work) Reviewed by: rwatson (locking), silence from fenner, net@ (but with encouragement)
2007-06-12 16:24:56 +00:00
#include <netinet/ip_var.h>
#include <netinet/ip_carp.h>
#include <netinet/igmp_var.h>
Build on Jeff Roberson's linker-set based dynamic per-CPU allocator (DPCPU), as suggested by Peter Wemm, and implement a new per-virtual network stack memory allocator. Modify vnet to use the allocator instead of monolithic global container structures (vinet, ...). This change solves many binary compatibility problems associated with VIMAGE, and restores ELF symbols for virtualized global variables. Each virtualized global variable exists as a "reference copy", and also once per virtual network stack. Virtualized global variables are tagged at compile-time, placing the in a special linker set, which is loaded into a contiguous region of kernel memory. Virtualized global variables in the base kernel are linked as normal, but those in modules are copied and relocated to a reserved portion of the kernel's vnet region with the help of a the kernel linker. Virtualized global variables exist in per-vnet memory set up when the network stack instance is created, and are initialized statically from the reference copy. Run-time access occurs via an accessor macro, which converts from the current vnet and requested symbol to a per-vnet address. When "options VIMAGE" is not compiled into the kernel, normal global ELF symbols will be used instead and indirection is avoided. This change restores static initialization for network stack global variables, restores support for non-global symbols and types, eliminates the need for many subsystem constructors, eliminates large per-subsystem structures that caused many binary compatibility issues both for monitoring applications (netstat) and kernel modules, removes the per-function INIT_VNET_*() macros throughout the stack, eliminates the need for vnet_symmap ksym(2) munging, and eliminates duplicate definitions of virtualized globals under VIMAGE_GLOBALS. Bump __FreeBSD_version and update UPDATING. Portions submitted by: bz Reviewed by: bz, zec Discussed with: gnn, jamie, jeff, jhb, julian, sam Suggested by: peter Approved by: re (kensmith)
2009-07-14 22:48:30 +00:00
#include <netinet/udp.h>
#include <netinet/udp_var.h>
2002-03-19 21:25:46 +00:00
static int in_mask2len(struct in_addr *);
static void in_len2mask(struct in_addr *, int);
static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
struct ifnet *, struct thread *);
2002-03-19 21:25:46 +00:00
static void in_socktrim(struct sockaddr_in *);
static int in_ifinit(struct ifnet *, struct in_ifaddr *,
struct sockaddr_in *, int, int);
static void in_purgemaddrs(struct ifnet *);
1994-05-24 10:09:53 +00:00
static VNET_DEFINE(int, nosameprefix);
#define V_nosameprefix VNET(nosameprefix)
SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_RW,
&VNET_NAME(nosameprefix), 0,
"Refuse to create same prefixes on different interfaces");
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
#define V_ripcbinfo VNET(ripcbinfo)
1994-05-24 10:09:53 +00:00
/*
* Return 1 if an internet address is for a ``local'' host
* (one to which we have a connection).
1994-05-24 10:09:53 +00:00
*/
int
in_localaddr(struct in_addr in)
1994-05-24 10:09:53 +00:00
{
register u_long i = ntohl(in.s_addr);
register struct in_ifaddr *ia;
IN_IFADDR_RLOCK();
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if ((i & ia->ia_subnetmask) == ia->ia_subnet) {
IN_IFADDR_RUNLOCK();
return (1);
}
1994-05-24 10:09:53 +00:00
}
IN_IFADDR_RUNLOCK();
1994-05-24 10:09:53 +00:00
return (0);
}
/*
* Return 1 if an internet address is for the local host and configured
* on one of its interfaces.
*/
int
in_localip(struct in_addr in)
{
struct in_ifaddr *ia;
IN_IFADDR_RLOCK();
LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr) {
IN_IFADDR_RUNLOCK();
return (1);
}
}
IN_IFADDR_RUNLOCK();
return (0);
}
1994-05-24 10:09:53 +00:00
/*
* Determine whether an IP address is in a reserved set of addresses
* that may not be forwarded, or whether datagrams to that destination
* may be forwarded.
*/
int
in_canforward(struct in_addr in)
1994-05-24 10:09:53 +00:00
{
register u_long i = ntohl(in.s_addr);
register u_long net;
if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
1994-05-24 10:09:53 +00:00
return (0);
if (IN_CLASSA(i)) {
net = i & IN_CLASSA_NET;
if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
return (0);
}
return (1);
}
/*
* Trim a mask in a sockaddr
*/
static void
in_socktrim(struct sockaddr_in *ap)
1994-05-24 10:09:53 +00:00
{
register char *cplim = (char *) &ap->sin_addr;
register char *cp = (char *) (&ap->sin_addr + 1);
ap->sin_len = 0;
while (--cp >= cplim)
if (*cp) {
1994-05-24 10:09:53 +00:00
(ap)->sin_len = cp - (char *) (ap) + 1;
break;
}
}
static int
in_mask2len(mask)
struct in_addr *mask;
{
int x, y;
u_char *p;
p = (u_char *)mask;
for (x = 0; x < sizeof(*mask); x++) {
if (p[x] != 0xff)
break;
}
y = 0;
if (x < sizeof(*mask)) {
for (y = 0; y < 8; y++) {
if ((p[x] & (0x80 >> y)) == 0)
break;
}
}
return (x * 8 + y);
}
static void
in_len2mask(struct in_addr *mask, int len)
{
int i;
u_char *p;
p = (u_char *)mask;
bzero(mask, sizeof(*mask));
for (i = 0; i < len / 8; i++)
p[i] = 0xff;
if (len % 8)
p[i] = (0xff00 >> (len % 8)) & 0xff;
}
1994-05-24 10:09:53 +00:00
/*
* Generic internet control operations (ioctl's).
*
* ifp is NULL if not an interface-specific ioctl.
1994-05-24 10:09:53 +00:00
*/
/* ARGSUSED */
int
in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
struct thread *td)
1994-05-24 10:09:53 +00:00
{
register struct ifreq *ifr = (struct ifreq *)data;
register struct in_ifaddr *ia, *iap;
1994-05-24 10:09:53 +00:00
register struct ifaddr *ifa;
struct in_addr allhosts_addr;
struct in_addr dst;
struct in_ifinfo *ii;
1994-05-24 10:09:53 +00:00
struct in_aliasreq *ifra = (struct in_aliasreq *)data;
int error, hostIsNew, iaIsNew, maskIsNew;
int iaIsFirst;
u_long ocmd = cmd;
/*
* Pre-10.x compat: OSIOCAIFADDR passes a shorter
* struct in_aliasreq, without ifra_vhid.
*/
if (cmd == OSIOCAIFADDR)
cmd = SIOCAIFADDR;
ia = NULL;
iaIsFirst = 0;
iaIsNew = 0;
allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
1994-05-24 10:09:53 +00:00
/*
* Filter out ioctls we implement directly; forward the rest on to
* in_lifaddr_ioctl() and ifp->if_ioctl().
*/
switch (cmd) {
case SIOCGIFADDR:
case SIOCGIFBRDADDR:
case SIOCGIFDSTADDR:
case SIOCGIFNETMASK:
case SIOCDIFADDR:
break;
case SIOCAIFADDR:
/*
* ifra_addr must be present and be of INET family.
* ifra_broadaddr and ifra_mask are optional.
*/
if (ifra->ifra_addr.sin_len != sizeof(struct sockaddr_in) ||
ifra->ifra_addr.sin_family != AF_INET)
return (EINVAL);
if (ifra->ifra_broadaddr.sin_len != 0 &&
2011-11-22 19:39:27 +00:00
(ifra->ifra_broadaddr.sin_len !=
sizeof(struct sockaddr_in) ||
ifra->ifra_broadaddr.sin_family != AF_INET))
return (EINVAL);
#if 0
/*
* ifconfig(8) in pre-10.x doesn't set sin_family for the
* mask. The code is disabled for the 10.x timeline, to
* make SIOCAIFADDR compatible with 9.x ifconfig(8).
* The code should be enabled in 11.x
*/
if (ifra->ifra_mask.sin_len != 0 &&
(ifra->ifra_mask.sin_len != sizeof(struct sockaddr_in) ||
ifra->ifra_mask.sin_family != AF_INET))
return (EINVAL);
#endif
break;
case SIOCSIFADDR:
case SIOCSIFBRDADDR:
case SIOCSIFDSTADDR:
case SIOCSIFNETMASK:
/* We no longer support that old commands. */
return (EINVAL);
case SIOCALIFADDR:
if (td != NULL) {
error = priv_check(td, PRIV_NET_ADDIFADDR);
if (error)
return (error);
}
if (ifp == NULL)
return (EINVAL);
return in_lifaddr_ioctl(so, cmd, data, ifp, td);
case SIOCDLIFADDR:
if (td != NULL) {
error = priv_check(td, PRIV_NET_DELIFADDR);
if (error)
return (error);
}
if (ifp == NULL)
return (EINVAL);
return in_lifaddr_ioctl(so, cmd, data, ifp, td);
case SIOCGLIFADDR:
if (ifp == NULL)
return (EINVAL);
return in_lifaddr_ioctl(so, cmd, data, ifp, td);
default:
if (ifp == NULL || ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
return ((*ifp->if_ioctl)(ifp, cmd, data));
}
if (ifp == NULL)
return (EADDRNOTAVAIL);
/*
* Security checks before we get involved in any work.
*/
switch (cmd) {
case SIOCAIFADDR:
if (td != NULL) {
error = priv_check(td, PRIV_NET_ADDIFADDR);
if (error)
return (error);
}
break;
case SIOCDIFADDR:
if (td != NULL) {
error = priv_check(td, PRIV_NET_DELIFADDR);
if (error)
return (error);
}
break;
}
1994-05-24 10:09:53 +00:00
/*
* Find address for this interface, if it exists.
*
* If an alias address was specified, find that one instead of the
* first one on the interface, if possible.
1994-05-24 10:09:53 +00:00
*/
dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
IN_IFADDR_RLOCK();
LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash) {
if (iap->ia_ifp == ifp &&
iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
if (td == NULL || prison_check_ip4(td->td_ucred,
&dst) == 0)
ia = iap;
break;
}
}
if (ia != NULL)
ifa_ref(&ia->ia_ifa);
IN_IFADDR_RUNLOCK();
if (ia == NULL) {
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
iap = ifatoia(ifa);
if (iap->ia_addr.sin_family == AF_INET) {
if (td != NULL &&
prison_check_ip4(td->td_ucred,
&iap->ia_addr.sin_addr) != 0)
continue;
ia = iap;
break;
}
}
if (ia != NULL)
ifa_ref(&ia->ia_ifa);
IF_ADDR_RUNLOCK(ifp);
}
if (ia == NULL)
iaIsFirst = 1;
1994-05-24 10:09:53 +00:00
error = 0;
1994-05-24 10:09:53 +00:00
switch (cmd) {
case SIOCAIFADDR:
case SIOCDIFADDR:
if (ifra->ifra_addr.sin_family == AF_INET) {
struct in_ifaddr *oia;
IN_IFADDR_RLOCK();
for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
if (ia->ia_ifp == ifp &&
ia->ia_addr.sin_addr.s_addr ==
ifra->ifra_addr.sin_addr.s_addr)
break;
}
if (ia != NULL && ia != oia)
ifa_ref(&ia->ia_ifa);
if (oia != NULL && ia != oia)
ifa_free(&oia->ia_ifa);
IN_IFADDR_RUNLOCK();
1995-05-30 08:16:23 +00:00
if ((ifp->if_flags & IFF_POINTOPOINT)
&& (cmd == SIOCAIFADDR)
&& (ifra->ifra_dstaddr.sin_addr.s_addr
== INADDR_ANY)) {
error = EDESTADDRREQ;
goto out;
}
1994-05-24 10:09:53 +00:00
}
if (cmd == SIOCDIFADDR && ia == NULL) {
error = EADDRNOTAVAIL;
goto out;
}
if (ia == NULL) {
ia = (struct in_ifaddr *)
malloc(sizeof *ia, M_IFADDR, M_NOWAIT |
M_ZERO);
if (ia == NULL) {
error = ENOBUFS;
goto out;
}
2002-12-18 11:46:59 +00:00
ifa = &ia->ia_ifa;
ifa_init(ifa);
ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
2002-12-18 11:46:59 +00:00
1994-05-24 10:09:53 +00:00
ia->ia_sockmask.sin_len = 8;
ia->ia_sockmask.sin_family = AF_INET;
1994-05-24 10:09:53 +00:00
if (ifp->if_flags & IFF_BROADCAST) {
ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
ia->ia_broadaddr.sin_family = AF_INET;
}
ia->ia_ifp = ifp;
ifa_ref(ifa); /* if_addrhead */
IF_ADDR_WLOCK(ifp);
TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
IF_ADDR_WUNLOCK(ifp);
ifa_ref(ifa); /* in_ifaddrhead */
IN_IFADDR_WLOCK();
TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
IN_IFADDR_WUNLOCK();
iaIsNew = 1;
1994-05-24 10:09:53 +00:00
}
break;
case SIOCGIFADDR:
case SIOCGIFNETMASK:
case SIOCGIFDSTADDR:
case SIOCGIFBRDADDR:
if (ia == NULL) {
error = EADDRNOTAVAIL;
goto out;
}
1994-05-24 10:09:53 +00:00
break;
}
/*
* Most paths in this switch return directly or via out. Only paths
* that remove the address break in order to hit common removal code.
*/
switch (cmd) {
1994-05-24 10:09:53 +00:00
case SIOCGIFADDR:
*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
goto out;
1994-05-24 10:09:53 +00:00
case SIOCGIFBRDADDR:
if ((ifp->if_flags & IFF_BROADCAST) == 0) {
error = EINVAL;
goto out;
}
1994-05-24 10:09:53 +00:00
*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
goto out;
1994-05-24 10:09:53 +00:00
case SIOCGIFDSTADDR:
if ((ifp->if_flags & IFF_POINTOPOINT) == 0) {
error = EINVAL;
goto out;
}
1994-05-24 10:09:53 +00:00
*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
goto out;
1994-05-24 10:09:53 +00:00
case SIOCGIFNETMASK:
*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
goto out;
1994-05-24 10:09:53 +00:00
case SIOCAIFADDR:
maskIsNew = 0;
hostIsNew = 1;
error = 0;
if (ifra->ifra_addr.sin_addr.s_addr ==
ia->ia_addr.sin_addr.s_addr)
hostIsNew = 0;
1994-05-24 10:09:53 +00:00
if (ifra->ifra_mask.sin_len) {
/*
* QL: XXX
* Need to scrub the prefix here in case
* the issued command is SIOCAIFADDR with
* the same address, but with a different
* prefix length. And if the prefix length
* is the same as before, then the call is
* un-necessarily executed here.
*/
in_ifscrub(ifp, ia, LLE_STATIC);
1994-05-24 10:09:53 +00:00
ia->ia_sockmask = ifra->ifra_mask;
ia->ia_sockmask.sin_family = AF_INET;
1994-05-24 10:09:53 +00:00
ia->ia_subnetmask =
ntohl(ia->ia_sockmask.sin_addr.s_addr);
1994-05-24 10:09:53 +00:00
maskIsNew = 1;
}
if ((ifp->if_flags & IFF_POINTOPOINT) &&
(ifra->ifra_dstaddr.sin_family == AF_INET)) {
in_ifscrub(ifp, ia, LLE_STATIC);
1994-05-24 10:09:53 +00:00
ia->ia_dstaddr = ifra->ifra_dstaddr;
maskIsNew = 1; /* We lie; but the effect's the same */
}
if (hostIsNew || maskIsNew)
error = in_ifinit(ifp, ia, &ifra->ifra_addr, maskIsNew,
(ocmd == cmd ? ifra->ifra_vhid : 0));
if (error != 0 && iaIsNew)
break;
1994-05-24 10:09:53 +00:00
if ((ifp->if_flags & IFF_BROADCAST) &&
ifra->ifra_broadaddr.sin_len)
1994-05-24 10:09:53 +00:00
ia->ia_broadaddr = ifra->ifra_broadaddr;
if (error == 0) {
ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
if (iaIsFirst &&
(ifp->if_flags & IFF_MULTICAST) != 0) {
error = in_joingroup(ifp, &allhosts_addr,
NULL, &ii->ii_allhosts);
}
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
}
goto out;
1994-05-24 10:09:53 +00:00
case SIOCDIFADDR:
/*
* in_ifscrub kills the interface route.
*/
in_ifscrub(ifp, ia, LLE_STATIC);
/*
* in_ifadown gets rid of all the rest of
* the routes. This is not quite the right
* thing to do, but at least if we are running
* a routing process they will come back.
*/
in_ifadown(&ia->ia_ifa, 1);
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
error = 0;
1994-05-24 10:09:53 +00:00
break;
default:
panic("in_control: unsupported ioctl");
1994-05-24 10:09:53 +00:00
}
if (ia->ia_ifa.ifa_carp)
(*carp_detach_p)(&ia->ia_ifa);
IF_ADDR_WLOCK(ifp);
/* Re-check that ia is still part of the list. */
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa == &ia->ia_ifa)
break;
}
if (ifa == NULL) {
/*
* If we lost the race with another thread, there is no need to
* try it again for the next loop as there is no other exit
* path between here and out.
*/
IF_ADDR_WUNLOCK(ifp);
error = EADDRNOTAVAIL;
goto out;
}
TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
IF_ADDR_WUNLOCK(ifp);
ifa_free(&ia->ia_ifa); /* if_addrhead */
IN_IFADDR_WLOCK();
TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
LIST_REMOVE(ia, ia_hash);
IN_IFADDR_WUNLOCK();
/*
* If this is the last IPv4 address configured on this
* interface, leave the all-hosts group.
* No state-change report need be transmitted.
*/
IFP_TO_IA(ifp, iap);
if (iap == NULL) {
ii = ((struct in_ifinfo *)ifp->if_afdata[AF_INET]);
IN_MULTI_LOCK();
if (ii->ii_allhosts) {
(void)in_leavegroup_locked(ii->ii_allhosts, NULL);
ii->ii_allhosts = NULL;
}
IN_MULTI_UNLOCK();
} else
ifa_free(&iap->ia_ifa);
ifa_free(&ia->ia_ifa); /* in_ifaddrhead */
out:
if (ia != NULL)
ifa_free(&ia->ia_ifa);
return (error);
1994-05-24 10:09:53 +00:00
}
/*
* SIOC[GAD]LIFADDR.
* SIOCGLIFADDR: get first address. (?!?)
* SIOCGLIFADDR with IFLR_PREFIX:
* get first address that matches the specified prefix.
* SIOCALIFADDR: add the specified address.
* SIOCALIFADDR with IFLR_PREFIX:
* EINVAL since we can't deduce hostid part of the address.
* SIOCDLIFADDR: delete the specified address.
* SIOCDLIFADDR with IFLR_PREFIX:
* delete the first address that matches the specified prefix.
* return values:
* EINVAL on invalid parameters
* EADDRNOTAVAIL on prefix match failed/specified address not found
* other values may be returned from in_ioctl()
*/
static int
in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
struct ifnet *ifp, struct thread *td)
{
struct if_laddrreq *iflr = (struct if_laddrreq *)data;
struct ifaddr *ifa;
/* sanity checks */
if (data == NULL || ifp == NULL) {
panic("invalid argument to in_lifaddr_ioctl");
/*NOTRECHED*/
}
switch (cmd) {
case SIOCGLIFADDR:
/* address must be specified on GET with IFLR_PREFIX */
if ((iflr->flags & IFLR_PREFIX) == 0)
break;
/*FALLTHROUGH*/
case SIOCALIFADDR:
case SIOCDLIFADDR:
/* address must be specified on ADD and DELETE */
if (iflr->addr.ss_family != AF_INET)
return (EINVAL);
if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
return (EINVAL);
/* XXX need improvement */
if (iflr->dstaddr.ss_family
&& iflr->dstaddr.ss_family != AF_INET)
return (EINVAL);
if (iflr->dstaddr.ss_family
&& iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
return (EINVAL);
break;
default: /*shouldn't happen*/
return (EOPNOTSUPP);
}
if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
return (EINVAL);
switch (cmd) {
case SIOCALIFADDR:
{
struct in_aliasreq ifra;
if (iflr->flags & IFLR_PREFIX)
return (EINVAL);
/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR). */
bzero(&ifra, sizeof(ifra));
bcopy(iflr->iflr_name, ifra.ifra_name,
sizeof(ifra.ifra_name));
bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);
if (iflr->dstaddr.ss_family) { /*XXX*/
bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
iflr->dstaddr.ss_len);
}
ifra.ifra_mask.sin_family = AF_INET;
ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
}
case SIOCGLIFADDR:
case SIOCDLIFADDR:
{
struct in_ifaddr *ia;
struct in_addr mask, candidate, match;
struct sockaddr_in *sin;
bzero(&mask, sizeof(mask));
bzero(&match, sizeof(match));
if (iflr->flags & IFLR_PREFIX) {
/* lookup a prefix rather than address. */
in_len2mask(&mask, iflr->prefixlen);
sin = (struct sockaddr_in *)&iflr->addr;
match.s_addr = sin->sin_addr.s_addr;
match.s_addr &= mask.s_addr;
/* if you set extra bits, that's wrong */
if (match.s_addr != sin->sin_addr.s_addr)
return (EINVAL);
} else {
/* on getting an address, take the 1st match */
/* on deleting an address, do exact match */
if (cmd != SIOCGLIFADDR) {
in_len2mask(&mask, 32);
sin = (struct sockaddr_in *)&iflr->addr;
match.s_addr = sin->sin_addr.s_addr;
}
}
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
if (match.s_addr == 0)
break;
sin = (struct sockaddr_in *)&ifa->ifa_addr;
candidate.s_addr = sin->sin_addr.s_addr;
candidate.s_addr &= mask.s_addr;
if (candidate.s_addr == match.s_addr)
break;
}
if (ifa != NULL)
ifa_ref(ifa);
IF_ADDR_RUNLOCK(ifp);
if (ifa == NULL)
return (EADDRNOTAVAIL);
ia = (struct in_ifaddr *)ifa;
if (cmd == SIOCGLIFADDR) {
/* fill in the if_laddrreq structure */
bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
ia->ia_dstaddr.sin_len);
} else
bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
iflr->prefixlen =
in_mask2len(&ia->ia_sockmask.sin_addr);
iflr->flags = 0; /*XXX*/
ifa_free(ifa);
return (0);
} else {
struct in_aliasreq ifra;
/* fill in_aliasreq and do ioctl(SIOCDIFADDR) */
bzero(&ifra, sizeof(ifra));
bcopy(iflr->iflr_name, ifra.ifra_name,
sizeof(ifra.ifra_name));
bcopy(&ia->ia_addr, &ifra.ifra_addr,
ia->ia_addr.sin_len);
if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
ia->ia_dstaddr.sin_len);
}
bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
ia->ia_sockmask.sin_len);
ifa_free(ifa);
return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
ifp, td));
}
}
}
return (EOPNOTSUPP); /*just for safety*/
}
1994-05-24 10:09:53 +00:00
/*
* Delete any existing route for an interface.
*/
void
in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags)
1994-05-24 10:09:53 +00:00
{
in_scrubprefix(ia, flags);
1994-05-24 10:09:53 +00:00
}
/*
* Initialize an interface's internet address
* and routing table entry.
*/
static int
in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
int masksupplied, int vhid)
1994-05-24 10:09:53 +00:00
{
register u_long i = ntohl(sin->sin_addr.s_addr);
int flags, error = 0;
1994-05-24 10:09:53 +00:00
IN_IFADDR_WLOCK();
if (ia->ia_addr.sin_family == AF_INET)
LIST_REMOVE(ia, ia_hash);
1994-05-24 10:09:53 +00:00
ia->ia_addr = *sin;
LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
ia, ia_hash);
IN_IFADDR_WUNLOCK();
if (vhid > 0) {
if (carp_attach_p != NULL)
error = (*carp_attach_p)(&ia->ia_ifa, vhid);
else
error = EPROTONOSUPPORT;
}
if (error)
return (error);
1994-05-24 10:09:53 +00:00
/*
* Give the interface a chance to initialize
* if this is its first address,
* and to validate the address if necessary.
1994-05-24 10:09:53 +00:00
*/
if (ifp->if_ioctl != NULL &&
(error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia)) != 0)
/* LIST_REMOVE(ia, ia_hash) is done in in_control */
return (error);
1994-05-24 10:09:53 +00:00
/*
* Be compatible with network classes, if netmask isn't supplied,
* guess it based on classes.
1994-05-24 10:09:53 +00:00
*/
if (!masksupplied) {
if (IN_CLASSA(i))
ia->ia_subnetmask = IN_CLASSA_NET;
else if (IN_CLASSB(i))
ia->ia_subnetmask = IN_CLASSB_NET;
else
ia->ia_subnetmask = IN_CLASSC_NET;
1994-05-24 10:09:53 +00:00
ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
}
1994-05-24 10:09:53 +00:00
ia->ia_subnet = i & ia->ia_subnetmask;
in_socktrim(&ia->ia_sockmask);
1994-05-24 10:09:53 +00:00
/*
* Add route for the network.
*/
flags = RTF_UP;
1994-05-24 10:09:53 +00:00
ia->ia_ifa.ifa_metric = ifp->if_metric;
if (ifp->if_flags & IFF_BROADCAST) {
if (ia->ia_subnetmask == IN_RFC3021_MASK)
ia->ia_broadaddr.sin_addr.s_addr = INADDR_BROADCAST;
else
ia->ia_broadaddr.sin_addr.s_addr =
htonl(ia->ia_subnet | ~ia->ia_subnetmask);
1994-05-24 10:09:53 +00:00
} else if (ifp->if_flags & IFF_LOOPBACK) {
ia->ia_dstaddr = ia->ia_addr;
1994-05-24 10:09:53 +00:00
flags |= RTF_HOST;
} else if (ifp->if_flags & IFF_POINTOPOINT) {
if (ia->ia_dstaddr.sin_family != AF_INET)
return (0);
flags |= RTF_HOST;
}
if (!vhid && (error = in_addprefix(ia, flags)) != 0)
return (error);
if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
return (0);
if (ifp->if_flags & IFF_POINTOPOINT &&
ia->ia_dstaddr.sin_addr.s_addr == ia->ia_addr.sin_addr.s_addr)
return (0);
/*
* add a loopback route to self
*/
if (V_useloopback && !vhid && !(ifp->if_flags & IFF_LOOPBACK)) {
struct route ia_ro;
bzero(&ia_ro, sizeof(ia_ro));
*((struct sockaddr_in *)(&ia_ro.ro_dst)) = ia->ia_addr;
rtalloc_ign_fib(&ia_ro, 0, RT_DEFAULT_FIB);
if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
(ia_ro.ro_rt->rt_ifp == V_loif)) {
RT_LOCK(ia_ro.ro_rt);
RT_ADDREF(ia_ro.ro_rt);
RTFREE_LOCKED(ia_ro.ro_rt);
} else
error = ifa_add_loopback_route((struct ifaddr *)ia,
(struct sockaddr *)&ia->ia_addr);
if (error == 0)
ia->ia_flags |= IFA_RTSELF;
if (ia_ro.ro_rt != NULL)
RTFREE(ia_ro.ro_rt);
}
1994-05-24 10:09:53 +00:00
return (error);
}
#define rtinitflags(x) \
((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
? RTF_HOST : 0)
/*
* Check if we have a route for the given prefix already or add one accordingly.
*/
int
in_addprefix(struct in_ifaddr *target, int flags)
{
struct in_ifaddr *ia;
struct in_addr prefix, mask, p, m;
MFC changes relating to running multiple interfaces on different fibs but with addresses on the same subnet. MFC r266860 Fix unintended KBI change from r264905. Add _fib versions of ifa_ifwithnet() and ifa_ifwithdstaddr() The legacy functions will call the _fib() versions with RT_ALL_FIBS, preserving legacy behavior. sys/net/if_var.h sys/net/if.c Add legacy-compatible functions as described above. Ensure legacy behavior when RT_ALL_FIBS is passed as fibnum. sys/netinet/in_pcb.c sys/netinet/ip_output.c sys/netinet/ip_options.c sys/net/route.c sys/net/rtsock.c sys/netinet6/nd6.c Call with _fib() functions if we must use a specific fib, or the legacy functions otherwise. tests/sys/netinet/fibs_test.sh tests/sys/netinet/udp_dontroute.c Improve the udp_dontroute test. The bug that this test exercises is that ifa_ifwithnet() will return the wrong address, if multiple interfaces have addresses on the same subnet but with different fibs. The previous version of the test only considered one possible failure mode: that ifa_ifwithnet_fib() might fail to find any suitable address at all. The new version also checks whether ifa_ifwithnet_fib() finds the correct address by checking where the ARP request goes. MFC r264917 Style fixes, mostly trailing whitespace elimination. No functional change. MFC r264905 Fix subnet and default routes on different FIBs on the same subnet. These two bugs are closely related. The root cause is that ifa_ifwithnet does not consider FIBs when searching for an interface address. sys/net/if_var.h sys/net/if.c Add a fib argument to ifa_ifwithnet and ifa_ifwithdstadddr. Those functions will only return an address whose interface fib equals the argument. sys/net/route.c Update calls to ifa_ifwithnet and ifa_ifwithdstaddr with fib arguments. sys/netinet/in.c Update in_addprefix to consider the interface fib when adding prefixes. This will prevent it from not adding a subnet route when one already exists on a different fib. sys/net/rtsock.c sys/netinet/in_pcb.c sys/netinet/ip_output.c sys/netinet/ip_options.c sys/netinet6/nd6.c Add RT_DEFAULT_FIB arguments to ifa_ifwithdstaddr and ifa_ifwithnet. In some cases it there wasn't a clear specific fib number to use. In others, I was unable to test those functions so I chose RT_DEFAULT_FIB to minimize divergence from current behavior. I will fix some of the latter changes along with PR kern/187553. tests/sys/netinet/fibs_test.sh tests/sys/netinet/udp_dontroute.c tests/sys/netinet/Makefile Revert r263738. The udp_dontroute test was right all along. However, bugs kern/187550 and kern/187553 cancelled each other out when it came to this test. Because of kern/187553, ifa_ifwithnet searched the default fib instead of the requested one, but because of kern/187550, there was an applicable subnet route on the default fib. The new test added in r263738 doesn't work right, however. I can verify with dtrace that ifa_ifwithnet returned the wrong address before I applied this commit, but route(8) miraculously found the correct interface to use anyway. I don't know how. Clear expected failure messages for kern/187550 and kern/187552. MFC r263738 tests/sys/netinet/Makefile tests/sys/netinet/fibs.sh Replace fibs:udp_dontroute with fibs:src_addr_selection_by_subnet. The original test was poorly written; it was actually testing kern/167947 instead of the desired kern/187553. The root cause of the bug is that ifa_ifwithnet did not have a fib argument. The new test more directly targets that behavior. tests/sys/netinet/udp_dontroute.c Delete the auxilliary binary used by the old test
2014-06-06 20:35:40 +00:00
int error;
if ((flags & RTF_HOST) != 0) {
prefix = target->ia_dstaddr.sin_addr;
mask.s_addr = 0;
} else {
prefix = target->ia_addr.sin_addr;
mask = target->ia_sockmask.sin_addr;
prefix.s_addr &= mask.s_addr;
}
IN_IFADDR_RLOCK();
MFC changes relating to running multiple interfaces on different fibs but with addresses on the same subnet. MFC r266860 Fix unintended KBI change from r264905. Add _fib versions of ifa_ifwithnet() and ifa_ifwithdstaddr() The legacy functions will call the _fib() versions with RT_ALL_FIBS, preserving legacy behavior. sys/net/if_var.h sys/net/if.c Add legacy-compatible functions as described above. Ensure legacy behavior when RT_ALL_FIBS is passed as fibnum. sys/netinet/in_pcb.c sys/netinet/ip_output.c sys/netinet/ip_options.c sys/net/route.c sys/net/rtsock.c sys/netinet6/nd6.c Call with _fib() functions if we must use a specific fib, or the legacy functions otherwise. tests/sys/netinet/fibs_test.sh tests/sys/netinet/udp_dontroute.c Improve the udp_dontroute test. The bug that this test exercises is that ifa_ifwithnet() will return the wrong address, if multiple interfaces have addresses on the same subnet but with different fibs. The previous version of the test only considered one possible failure mode: that ifa_ifwithnet_fib() might fail to find any suitable address at all. The new version also checks whether ifa_ifwithnet_fib() finds the correct address by checking where the ARP request goes. MFC r264917 Style fixes, mostly trailing whitespace elimination. No functional change. MFC r264905 Fix subnet and default routes on different FIBs on the same subnet. These two bugs are closely related. The root cause is that ifa_ifwithnet does not consider FIBs when searching for an interface address. sys/net/if_var.h sys/net/if.c Add a fib argument to ifa_ifwithnet and ifa_ifwithdstadddr. Those functions will only return an address whose interface fib equals the argument. sys/net/route.c Update calls to ifa_ifwithnet and ifa_ifwithdstaddr with fib arguments. sys/netinet/in.c Update in_addprefix to consider the interface fib when adding prefixes. This will prevent it from not adding a subnet route when one already exists on a different fib. sys/net/rtsock.c sys/netinet/in_pcb.c sys/netinet/ip_output.c sys/netinet/ip_options.c sys/netinet6/nd6.c Add RT_DEFAULT_FIB arguments to ifa_ifwithdstaddr and ifa_ifwithnet. In some cases it there wasn't a clear specific fib number to use. In others, I was unable to test those functions so I chose RT_DEFAULT_FIB to minimize divergence from current behavior. I will fix some of the latter changes along with PR kern/187553. tests/sys/netinet/fibs_test.sh tests/sys/netinet/udp_dontroute.c tests/sys/netinet/Makefile Revert r263738. The udp_dontroute test was right all along. However, bugs kern/187550 and kern/187553 cancelled each other out when it came to this test. Because of kern/187553, ifa_ifwithnet searched the default fib instead of the requested one, but because of kern/187550, there was an applicable subnet route on the default fib. The new test added in r263738 doesn't work right, however. I can verify with dtrace that ifa_ifwithnet returned the wrong address before I applied this commit, but route(8) miraculously found the correct interface to use anyway. I don't know how. Clear expected failure messages for kern/187550 and kern/187552. MFC r263738 tests/sys/netinet/Makefile tests/sys/netinet/fibs.sh Replace fibs:udp_dontroute with fibs:src_addr_selection_by_subnet. The original test was poorly written; it was actually testing kern/167947 instead of the desired kern/187553. The root cause of the bug is that ifa_ifwithnet did not have a fib argument. The new test more directly targets that behavior. tests/sys/netinet/udp_dontroute.c Delete the auxilliary binary used by the old test
2014-06-06 20:35:40 +00:00
/* Look for an existing address with the same prefix, mask, and fib */
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if (rtinitflags(ia)) {
p = ia->ia_dstaddr.sin_addr;
if (prefix.s_addr != p.s_addr)
continue;
} else {
p = ia->ia_addr.sin_addr;
m = ia->ia_sockmask.sin_addr;
p.s_addr &= m.s_addr;
if (prefix.s_addr != p.s_addr ||
mask.s_addr != m.s_addr)
continue;
}
MFC changes relating to running multiple interfaces on different fibs but with addresses on the same subnet. MFC r266860 Fix unintended KBI change from r264905. Add _fib versions of ifa_ifwithnet() and ifa_ifwithdstaddr() The legacy functions will call the _fib() versions with RT_ALL_FIBS, preserving legacy behavior. sys/net/if_var.h sys/net/if.c Add legacy-compatible functions as described above. Ensure legacy behavior when RT_ALL_FIBS is passed as fibnum. sys/netinet/in_pcb.c sys/netinet/ip_output.c sys/netinet/ip_options.c sys/net/route.c sys/net/rtsock.c sys/netinet6/nd6.c Call with _fib() functions if we must use a specific fib, or the legacy functions otherwise. tests/sys/netinet/fibs_test.sh tests/sys/netinet/udp_dontroute.c Improve the udp_dontroute test. The bug that this test exercises is that ifa_ifwithnet() will return the wrong address, if multiple interfaces have addresses on the same subnet but with different fibs. The previous version of the test only considered one possible failure mode: that ifa_ifwithnet_fib() might fail to find any suitable address at all. The new version also checks whether ifa_ifwithnet_fib() finds the correct address by checking where the ARP request goes. MFC r264917 Style fixes, mostly trailing whitespace elimination. No functional change. MFC r264905 Fix subnet and default routes on different FIBs on the same subnet. These two bugs are closely related. The root cause is that ifa_ifwithnet does not consider FIBs when searching for an interface address. sys/net/if_var.h sys/net/if.c Add a fib argument to ifa_ifwithnet and ifa_ifwithdstadddr. Those functions will only return an address whose interface fib equals the argument. sys/net/route.c Update calls to ifa_ifwithnet and ifa_ifwithdstaddr with fib arguments. sys/netinet/in.c Update in_addprefix to consider the interface fib when adding prefixes. This will prevent it from not adding a subnet route when one already exists on a different fib. sys/net/rtsock.c sys/netinet/in_pcb.c sys/netinet/ip_output.c sys/netinet/ip_options.c sys/netinet6/nd6.c Add RT_DEFAULT_FIB arguments to ifa_ifwithdstaddr and ifa_ifwithnet. In some cases it there wasn't a clear specific fib number to use. In others, I was unable to test those functions so I chose RT_DEFAULT_FIB to minimize divergence from current behavior. I will fix some of the latter changes along with PR kern/187553. tests/sys/netinet/fibs_test.sh tests/sys/netinet/udp_dontroute.c tests/sys/netinet/Makefile Revert r263738. The udp_dontroute test was right all along. However, bugs kern/187550 and kern/187553 cancelled each other out when it came to this test. Because of kern/187553, ifa_ifwithnet searched the default fib instead of the requested one, but because of kern/187550, there was an applicable subnet route on the default fib. The new test added in r263738 doesn't work right, however. I can verify with dtrace that ifa_ifwithnet returned the wrong address before I applied this commit, but route(8) miraculously found the correct interface to use anyway. I don't know how. Clear expected failure messages for kern/187550 and kern/187552. MFC r263738 tests/sys/netinet/Makefile tests/sys/netinet/fibs.sh Replace fibs:udp_dontroute with fibs:src_addr_selection_by_subnet. The original test was poorly written; it was actually testing kern/167947 instead of the desired kern/187553. The root cause of the bug is that ifa_ifwithnet did not have a fib argument. The new test more directly targets that behavior. tests/sys/netinet/udp_dontroute.c Delete the auxilliary binary used by the old test
2014-06-06 20:35:40 +00:00
if (target->ia_ifp->if_fib != ia->ia_ifp->if_fib)
continue;
/*
* If we got a matching prefix route inserted by other
* interface address, we are done here.
*/
if (ia->ia_flags & IFA_ROUTE) {
#ifdef RADIX_MPATH
if (ia->ia_addr.sin_addr.s_addr ==
target->ia_addr.sin_addr.s_addr) {
IN_IFADDR_RUNLOCK();
return (EEXIST);
} else
break;
#endif
if (V_nosameprefix) {
IN_IFADDR_RUNLOCK();
return (EEXIST);
} else {
MFC changes relating to running multiple interfaces on different fibs but with addresses on the same subnet. MFC r266860 Fix unintended KBI change from r264905. Add _fib versions of ifa_ifwithnet() and ifa_ifwithdstaddr() The legacy functions will call the _fib() versions with RT_ALL_FIBS, preserving legacy behavior. sys/net/if_var.h sys/net/if.c Add legacy-compatible functions as described above. Ensure legacy behavior when RT_ALL_FIBS is passed as fibnum. sys/netinet/in_pcb.c sys/netinet/ip_output.c sys/netinet/ip_options.c sys/net/route.c sys/net/rtsock.c sys/netinet6/nd6.c Call with _fib() functions if we must use a specific fib, or the legacy functions otherwise. tests/sys/netinet/fibs_test.sh tests/sys/netinet/udp_dontroute.c Improve the udp_dontroute test. The bug that this test exercises is that ifa_ifwithnet() will return the wrong address, if multiple interfaces have addresses on the same subnet but with different fibs. The previous version of the test only considered one possible failure mode: that ifa_ifwithnet_fib() might fail to find any suitable address at all. The new version also checks whether ifa_ifwithnet_fib() finds the correct address by checking where the ARP request goes. MFC r264917 Style fixes, mostly trailing whitespace elimination. No functional change. MFC r264905 Fix subnet and default routes on different FIBs on the same subnet. These two bugs are closely related. The root cause is that ifa_ifwithnet does not consider FIBs when searching for an interface address. sys/net/if_var.h sys/net/if.c Add a fib argument to ifa_ifwithnet and ifa_ifwithdstadddr. Those functions will only return an address whose interface fib equals the argument. sys/net/route.c Update calls to ifa_ifwithnet and ifa_ifwithdstaddr with fib arguments. sys/netinet/in.c Update in_addprefix to consider the interface fib when adding prefixes. This will prevent it from not adding a subnet route when one already exists on a different fib. sys/net/rtsock.c sys/netinet/in_pcb.c sys/netinet/ip_output.c sys/netinet/ip_options.c sys/netinet6/nd6.c Add RT_DEFAULT_FIB arguments to ifa_ifwithdstaddr and ifa_ifwithnet. In some cases it there wasn't a clear specific fib number to use. In others, I was unable to test those functions so I chose RT_DEFAULT_FIB to minimize divergence from current behavior. I will fix some of the latter changes along with PR kern/187553. tests/sys/netinet/fibs_test.sh tests/sys/netinet/udp_dontroute.c tests/sys/netinet/Makefile Revert r263738. The udp_dontroute test was right all along. However, bugs kern/187550 and kern/187553 cancelled each other out when it came to this test. Because of kern/187553, ifa_ifwithnet searched the default fib instead of the requested one, but because of kern/187550, there was an applicable subnet route on the default fib. The new test added in r263738 doesn't work right, however. I can verify with dtrace that ifa_ifwithnet returned the wrong address before I applied this commit, but route(8) miraculously found the correct interface to use anyway. I don't know how. Clear expected failure messages for kern/187550 and kern/187552. MFC r263738 tests/sys/netinet/Makefile tests/sys/netinet/fibs.sh Replace fibs:udp_dontroute with fibs:src_addr_selection_by_subnet. The original test was poorly written; it was actually testing kern/167947 instead of the desired kern/187553. The root cause of the bug is that ifa_ifwithnet did not have a fib argument. The new test more directly targets that behavior. tests/sys/netinet/udp_dontroute.c Delete the auxilliary binary used by the old test
2014-06-06 20:35:40 +00:00
int fibnum;
fibnum = rt_add_addr_allfibs ? RT_ALL_FIBS :
target->ia_ifp->if_fib;
Merge 260488, r260508. r260488: Split rt_newaddrmsg_fib() into two different functions. Adding/deleting interface addresses involves access to 3 different subsystems, int different parts of code. Each call can fail, so reporting successful operation by rtsock in the middle of the process error-prone. Further split routing notification API and actual rtsock calls via creating public-available rt_addrmsg() / rt_routemsg() functions with "private" rtsock_* backend. r260508: Simplify inet alias handling code: if we're adding/removing alias which has the same prefix as some other alias on the same interface, use newly-added rt_addrmsg() instead of hand-rolled in_addralias_rtmsg(). This eliminates the following rtsock messages: Pinned RTM_ADD for prefix (for alias addition). Pinned RTM_DELETE for prefix (for alias withdrawal). Example (got 10.0.0.1/24 on vlan4, playing with 10.0.0.2/24): before commit, addition: got message of size 116 on Fri Jan 10 14:13:15 2014 RTM_NEWADDR: address being added to iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 got message of size 192 on Fri Jan 10 14:13:15 2014 RTM_ADD: Add Route: len 192, pid: 0, seq 0, errno 0, flags:<UP,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 10.0.0.2 (255) ffff ffff ff after commit, addition: got message of size 116 on Fri Jan 10 13:56:26 2014 RTM_NEWADDR: address being added to iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 14.0.0.2 14.0.0.255 before commit, wihdrawal: got message of size 192 on Fri Jan 10 13:58:59 2014 RTM_DELETE: Delete Route: len 192, pid: 0, seq 0, errno 0, flags:<UP,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 10.0.0.2 (255) ffff ffff ff got message of size 116 on Fri Jan 10 13:58:59 2014 RTM_DELADDR: address being removed from iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 adter commit, withdrawal: got message of size 116 on Fri Jan 10 14:14:11 2014 RTM_DELADDR: address being removed from iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 Sending both RTM_ADD/RTM_DELETE messages to rtsock is completely wrong (and requires some hacks to keep prefix in route table on RTM_DELETE). I've tested this change with quagga (no change) and bird (*). bird alias handling is already broken in *BSD sysdep code, so nothing changes here, too. I'm going to MFC this change if there will be no complains about behavior change. While here, fix some style(9) bugs introduced by r260488 (pointed by glebius and bde).
2014-05-08 21:03:31 +00:00
rt_addrmsg(RTM_ADD, &target->ia_ifa, fibnum);
IN_IFADDR_RUNLOCK();
return (0);
}
}
}
IN_IFADDR_RUNLOCK();
/*
* No-one seem to have this prefix route, so we try to insert it.
*/
error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
if (!error)
target->ia_flags |= IFA_ROUTE;
return (error);
}
/*
* If there is no other address in the system that can serve a route to the
* same prefix, remove the route. Hand over the route to the new address
* otherwise.
*/
int
in_scrubprefix(struct in_ifaddr *target, u_int flags)
{
struct in_ifaddr *ia;
struct in_addr prefix, mask, p, m;
Merge 260488, r260508. r260488: Split rt_newaddrmsg_fib() into two different functions. Adding/deleting interface addresses involves access to 3 different subsystems, int different parts of code. Each call can fail, so reporting successful operation by rtsock in the middle of the process error-prone. Further split routing notification API and actual rtsock calls via creating public-available rt_addrmsg() / rt_routemsg() functions with "private" rtsock_* backend. r260508: Simplify inet alias handling code: if we're adding/removing alias which has the same prefix as some other alias on the same interface, use newly-added rt_addrmsg() instead of hand-rolled in_addralias_rtmsg(). This eliminates the following rtsock messages: Pinned RTM_ADD for prefix (for alias addition). Pinned RTM_DELETE for prefix (for alias withdrawal). Example (got 10.0.0.1/24 on vlan4, playing with 10.0.0.2/24): before commit, addition: got message of size 116 on Fri Jan 10 14:13:15 2014 RTM_NEWADDR: address being added to iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 got message of size 192 on Fri Jan 10 14:13:15 2014 RTM_ADD: Add Route: len 192, pid: 0, seq 0, errno 0, flags:<UP,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 10.0.0.2 (255) ffff ffff ff after commit, addition: got message of size 116 on Fri Jan 10 13:56:26 2014 RTM_NEWADDR: address being added to iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 14.0.0.2 14.0.0.255 before commit, wihdrawal: got message of size 192 on Fri Jan 10 13:58:59 2014 RTM_DELETE: Delete Route: len 192, pid: 0, seq 0, errno 0, flags:<UP,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 10.0.0.2 (255) ffff ffff ff got message of size 116 on Fri Jan 10 13:58:59 2014 RTM_DELADDR: address being removed from iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 adter commit, withdrawal: got message of size 116 on Fri Jan 10 14:14:11 2014 RTM_DELADDR: address being removed from iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 Sending both RTM_ADD/RTM_DELETE messages to rtsock is completely wrong (and requires some hacks to keep prefix in route table on RTM_DELETE). I've tested this change with quagga (no change) and bird (*). bird alias handling is already broken in *BSD sysdep code, so nothing changes here, too. I'm going to MFC this change if there will be no complains about behavior change. While here, fix some style(9) bugs introduced by r260488 (pointed by glebius and bde).
2014-05-08 21:03:31 +00:00
int error = 0, fibnum;
struct sockaddr_in prefix0, mask0;
Merge 260488, r260508. r260488: Split rt_newaddrmsg_fib() into two different functions. Adding/deleting interface addresses involves access to 3 different subsystems, int different parts of code. Each call can fail, so reporting successful operation by rtsock in the middle of the process error-prone. Further split routing notification API and actual rtsock calls via creating public-available rt_addrmsg() / rt_routemsg() functions with "private" rtsock_* backend. r260508: Simplify inet alias handling code: if we're adding/removing alias which has the same prefix as some other alias on the same interface, use newly-added rt_addrmsg() instead of hand-rolled in_addralias_rtmsg(). This eliminates the following rtsock messages: Pinned RTM_ADD for prefix (for alias addition). Pinned RTM_DELETE for prefix (for alias withdrawal). Example (got 10.0.0.1/24 on vlan4, playing with 10.0.0.2/24): before commit, addition: got message of size 116 on Fri Jan 10 14:13:15 2014 RTM_NEWADDR: address being added to iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 got message of size 192 on Fri Jan 10 14:13:15 2014 RTM_ADD: Add Route: len 192, pid: 0, seq 0, errno 0, flags:<UP,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 10.0.0.2 (255) ffff ffff ff after commit, addition: got message of size 116 on Fri Jan 10 13:56:26 2014 RTM_NEWADDR: address being added to iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 14.0.0.2 14.0.0.255 before commit, wihdrawal: got message of size 192 on Fri Jan 10 13:58:59 2014 RTM_DELETE: Delete Route: len 192, pid: 0, seq 0, errno 0, flags:<UP,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 10.0.0.2 (255) ffff ffff ff got message of size 116 on Fri Jan 10 13:58:59 2014 RTM_DELADDR: address being removed from iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 adter commit, withdrawal: got message of size 116 on Fri Jan 10 14:14:11 2014 RTM_DELADDR: address being removed from iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 Sending both RTM_ADD/RTM_DELETE messages to rtsock is completely wrong (and requires some hacks to keep prefix in route table on RTM_DELETE). I've tested this change with quagga (no change) and bird (*). bird alias handling is already broken in *BSD sysdep code, so nothing changes here, too. I'm going to MFC this change if there will be no complains about behavior change. While here, fix some style(9) bugs introduced by r260488 (pointed by glebius and bde).
2014-05-08 21:03:31 +00:00
fibnum = rt_add_addr_allfibs ? RT_ALL_FIBS : target->ia_ifp->if_fib;
/*
* Remove the loopback route to the interface address.
* The "useloopback" setting is not consulted because if the
* user configures an interface address, turns off this
* setting, and then tries to delete that interface address,
* checking the current setting of "useloopback" would leave
* that interface address loopback route untouched, which
* would be wrong. Therefore the interface address loopback route
* deletion is unconditional.
*/
if ((target->ia_addr.sin_addr.s_addr != INADDR_ANY) &&
!(target->ia_ifp->if_flags & IFF_LOOPBACK) &&
(target->ia_flags & IFA_RTSELF)) {
struct route ia_ro;
int freeit = 0;
bzero(&ia_ro, sizeof(ia_ro));
*((struct sockaddr_in *)(&ia_ro.ro_dst)) = target->ia_addr;
rtalloc_ign_fib(&ia_ro, 0, 0);
if ((ia_ro.ro_rt != NULL) && (ia_ro.ro_rt->rt_ifp != NULL) &&
(ia_ro.ro_rt->rt_ifp == V_loif)) {
RT_LOCK(ia_ro.ro_rt);
if (ia_ro.ro_rt->rt_refcnt <= 1)
freeit = 1;
else if (flags & LLE_STATIC) {
RT_REMREF(ia_ro.ro_rt);
target->ia_flags &= ~IFA_RTSELF;
}
RTFREE_LOCKED(ia_ro.ro_rt);
}
if (freeit && (flags & LLE_STATIC)) {
error = ifa_del_loopback_route((struct ifaddr *)target,
(struct sockaddr *)&target->ia_addr);
if (error == 0)
target->ia_flags &= ~IFA_RTSELF;
}
if ((flags & LLE_STATIC) &&
!(target->ia_ifp->if_flags & IFF_NOARP))
/* remove arp cache */
arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
}
if (rtinitflags(target)) {
prefix = target->ia_dstaddr.sin_addr;
mask.s_addr = 0;
} else {
prefix = target->ia_addr.sin_addr;
mask = target->ia_sockmask.sin_addr;
prefix.s_addr &= mask.s_addr;
}
if ((target->ia_flags & IFA_ROUTE) == 0) {
Merge 260488, r260508. r260488: Split rt_newaddrmsg_fib() into two different functions. Adding/deleting interface addresses involves access to 3 different subsystems, int different parts of code. Each call can fail, so reporting successful operation by rtsock in the middle of the process error-prone. Further split routing notification API and actual rtsock calls via creating public-available rt_addrmsg() / rt_routemsg() functions with "private" rtsock_* backend. r260508: Simplify inet alias handling code: if we're adding/removing alias which has the same prefix as some other alias on the same interface, use newly-added rt_addrmsg() instead of hand-rolled in_addralias_rtmsg(). This eliminates the following rtsock messages: Pinned RTM_ADD for prefix (for alias addition). Pinned RTM_DELETE for prefix (for alias withdrawal). Example (got 10.0.0.1/24 on vlan4, playing with 10.0.0.2/24): before commit, addition: got message of size 116 on Fri Jan 10 14:13:15 2014 RTM_NEWADDR: address being added to iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 got message of size 192 on Fri Jan 10 14:13:15 2014 RTM_ADD: Add Route: len 192, pid: 0, seq 0, errno 0, flags:<UP,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 10.0.0.2 (255) ffff ffff ff after commit, addition: got message of size 116 on Fri Jan 10 13:56:26 2014 RTM_NEWADDR: address being added to iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 14.0.0.2 14.0.0.255 before commit, wihdrawal: got message of size 192 on Fri Jan 10 13:58:59 2014 RTM_DELETE: Delete Route: len 192, pid: 0, seq 0, errno 0, flags:<UP,PINNED> locks: inits: sockaddrs: <DST,GATEWAY,NETMASK> 10.0.0.0 10.0.0.2 (255) ffff ffff ff got message of size 116 on Fri Jan 10 13:58:59 2014 RTM_DELADDR: address being removed from iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 adter commit, withdrawal: got message of size 116 on Fri Jan 10 14:14:11 2014 RTM_DELADDR: address being removed from iface: len 116, metric 0, flags: sockaddrs: <NETMASK,IFP,IFA,BRD> 255.255.255.0 vlan4:8.0.27.c5.29.d4 10.0.0.2 10.0.0.255 Sending both RTM_ADD/RTM_DELETE messages to rtsock is completely wrong (and requires some hacks to keep prefix in route table on RTM_DELETE). I've tested this change with quagga (no change) and bird (*). bird alias handling is already broken in *BSD sysdep code, so nothing changes here, too. I'm going to MFC this change if there will be no complains about behavior change. While here, fix some style(9) bugs introduced by r260488 (pointed by glebius and bde).
2014-05-08 21:03:31 +00:00
rt_addrmsg(RTM_DELETE, &target->ia_ifa, fibnum);
return (0);
}
IN_IFADDR_RLOCK();
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if (rtinitflags(ia)) {
p = ia->ia_dstaddr.sin_addr;
if (prefix.s_addr != p.s_addr)
continue;
} else {
p = ia->ia_addr.sin_addr;
m = ia->ia_sockmask.sin_addr;
p.s_addr &= m.s_addr;
if (prefix.s_addr != p.s_addr ||
mask.s_addr != m.s_addr)
continue;
}
if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
continue;
/*
* If we got a matching prefix address, move IFA_ROUTE and
* the route itself to it. Make sure that routing daemons
* get a heads-up.
*/
if ((ia->ia_flags & IFA_ROUTE) == 0) {
ifa_ref(&ia->ia_ifa);
IN_IFADDR_RUNLOCK();
error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
rtinitflags(target));
if (error == 0)
target->ia_flags &= ~IFA_ROUTE;
else
log(LOG_INFO, "in_scrubprefix: err=%d, old prefix delete failed\n",
error);
error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
rtinitflags(ia) | RTF_UP);
if (error == 0)
ia->ia_flags |= IFA_ROUTE;
else
log(LOG_INFO, "in_scrubprefix: err=%d, new prefix add failed\n",
error);
ifa_free(&ia->ia_ifa);
return (error);
}
}
IN_IFADDR_RUNLOCK();
/*
* remove all L2 entries on the given prefix
*/
bzero(&prefix0, sizeof(prefix0));
prefix0.sin_len = sizeof(prefix0);
prefix0.sin_family = AF_INET;
prefix0.sin_addr.s_addr = target->ia_subnet;
bzero(&mask0, sizeof(mask0));
mask0.sin_len = sizeof(mask0);
mask0.sin_family = AF_INET;
mask0.sin_addr.s_addr = target->ia_subnetmask;
lltable_prefix_free(AF_INET, (struct sockaddr *)&prefix0,
(struct sockaddr *)&mask0, flags);
/*
* As no-one seem to have this prefix, we can remove the route.
*/
error = rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
if (error == 0)
target->ia_flags &= ~IFA_ROUTE;
else
log(LOG_INFO, "in_scrubprefix: err=%d, prefix delete failed\n", error);
return (error);
}
#undef rtinitflags
1994-05-24 10:09:53 +00:00
/*
* Return 1 if the address might be a local broadcast address.
*/
int
in_broadcast(struct in_addr in, struct ifnet *ifp)
1994-05-24 10:09:53 +00:00
{
register struct ifaddr *ifa;
u_long t;
if (in.s_addr == INADDR_BROADCAST ||
in.s_addr == INADDR_ANY)
return (1);
1994-05-24 10:09:53 +00:00
if ((ifp->if_flags & IFF_BROADCAST) == 0)
return (0);
1994-05-24 10:09:53 +00:00
t = ntohl(in.s_addr);
/*
* Look through the list of addresses for a match
* with a broadcast address.
*/
#define ia ((struct in_ifaddr *)ifa)
2001-03-16 20:00:53 +00:00
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
1994-05-24 10:09:53 +00:00
if (ifa->ifa_addr->sa_family == AF_INET &&
(in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
/*
* Check for old-style (host 0) broadcast, but
* taking into account that RFC 3021 obsoletes it.
1994-05-24 10:09:53 +00:00
*/
(ia->ia_subnetmask != IN_RFC3021_MASK &&
t == ia->ia_subnet)) &&
/*
* Check for an all one subnetmask. These
* only exist when an interface gets a secondary
* address.
*/
ia->ia_subnetmask != (u_long)0xffffffff)
return (1);
1994-05-24 10:09:53 +00:00
return (0);
#undef ia
}
/*
* On interface removal, clean up IPv4 data structures hung off of the ifnet.
*/
void
in_ifdetach(struct ifnet *ifp)
{
in_pcbpurgeif0(&V_ripcbinfo, ifp);
in_pcbpurgeif0(&V_udbinfo, ifp);
in_pcbpurgeif0(&V_ulitecbinfo, ifp);
in_purgemaddrs(ifp);
}
/*
* Delete all IPv4 multicast address records, and associated link-layer
* multicast address records, associated with ifp.
* XXX It looks like domifdetach runs AFTER the link layer cleanup.
* XXX This should not race with ifma_protospec being set during
* a new allocation, if it does, we have bigger problems.
*/
static void
in_purgemaddrs(struct ifnet *ifp)
{
LIST_HEAD(,in_multi) purgeinms;
struct in_multi *inm, *tinm;
struct ifmultiaddr *ifma;
LIST_INIT(&purgeinms);
IN_MULTI_LOCK();
/*
* Extract list of in_multi associated with the detaching ifp
* which the PF_INET layer is about to release.
* We need to do this as IF_ADDR_LOCK() may be re-acquired
* by code further down.
*/
IF_ADDR_RLOCK(ifp);
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (ifma->ifma_addr->sa_family != AF_INET ||
ifma->ifma_protospec == NULL)
continue;
#if 0
KASSERT(ifma->ifma_protospec != NULL,
("%s: ifma_protospec is NULL", __func__));
#endif
inm = (struct in_multi *)ifma->ifma_protospec;
LIST_INSERT_HEAD(&purgeinms, inm, inm_link);
}
IF_ADDR_RUNLOCK(ifp);
LIST_FOREACH_SAFE(inm, &purgeinms, inm_link, tinm) {
LIST_REMOVE(inm, inm_link);
inm_release_locked(inm);
}
igmp_ifdetach(ifp);
IN_MULTI_UNLOCK();
}
struct in_llentry {
struct llentry base;
struct sockaddr_in l3_addr4;
};
/*
* Deletes an address from the address table.
* This function is called by the timer functions
* such as arptimer() and nd6_llinfo_timer(), and
* the caller does the locking.
*/
static void
in_lltable_free(struct lltable *llt, struct llentry *lle)
{
LLE_WUNLOCK(lle);
LLE_LOCK_DESTROY(lle);
free(lle, M_LLTABLE);
}
static struct llentry *
in_lltable_new(const struct sockaddr *l3addr, u_int flags)
{
struct in_llentry *lle;
lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_NOWAIT | M_ZERO);
if (lle == NULL) /* NB: caller generates msg */
return NULL;
/*
* For IPv4 this will trigger "arpresolve" to generate
* an ARP request.
*/
lle->base.la_expire = time_uptime; /* mark expired */
lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
lle->base.lle_refcnt = 1;
lle->base.lle_free = in_lltable_free;
LLE_LOCK_INIT(&lle->base);
callout_init_rw(&lle->base.la_timer, &lle->base.lle_lock,
CALLOUT_RETURNUNLOCKED);
return (&lle->base);
}
#define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \
(((ntohl((d)->sin_addr.s_addr) ^ (a)->sin_addr.s_addr) & (m)->sin_addr.s_addr)) == 0 )
static void
in_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix,
const struct sockaddr *mask, u_int flags)
{
const struct sockaddr_in *pfx = (const struct sockaddr_in *)prefix;
const struct sockaddr_in *msk = (const struct sockaddr_in *)mask;
struct llentry *lle, *next;
int i;
size_t pkts_dropped;
IF_AFDATA_WLOCK(llt->llt_ifp);
for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
/*
* (flags & LLE_STATIC) means deleting all entries
* including static ARP entries.
*/
if (IN_ARE_MASKED_ADDR_EQUAL(satosin(L3_ADDR(lle)),
pfx, msk) && ((flags & LLE_STATIC) ||
!(lle->la_flags & LLE_STATIC))) {
LLE_WLOCK(lle);
if (callout_stop(&lle->la_timer))
LLE_REMREF(lle);
pkts_dropped = llentry_free(lle);
ARPSTAT_ADD(dropped, pkts_dropped);
}
}
}
IF_AFDATA_WUNLOCK(llt->llt_ifp);
}
static int
in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
{
struct rtentry *rt;
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
/* XXX rtalloc1_fib should take a const param */
rt = rtalloc1_fib(__DECONST(struct sockaddr *, l3addr), 0, 0,
ifp->if_fib);
if (rt == NULL)
return (EINVAL);
/*
* If the gateway for an existing host route matches the target L3
* address, which is a special route inserted by some implementation
* such as MANET, and the interface is of the correct type, then
* allow for ARP to proceed.
*/
if (rt->rt_flags & RTF_GATEWAY) {
if (!(rt->rt_flags & RTF_HOST) || !rt->rt_ifp ||
rt->rt_ifp->if_type != IFT_ETHER ||
(rt->rt_ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) != 0 ||
memcmp(rt->rt_gateway->sa_data, l3addr->sa_data,
sizeof(in_addr_t)) != 0) {
RTFREE_LOCKED(rt);
return (EINVAL);
}
}
/*
* Make sure that at least the destination address is covered
* by the route. This is for handling the case where 2 or more
* interfaces have the same prefix. An incoming packet arrives
* on one interface and the corresponding outgoing packet leaves
* another interface.
*/
if (!(rt->rt_flags & RTF_HOST) && rt->rt_ifp != ifp) {
const char *sa, *mask, *addr, *lim;
int len;
mask = (const char *)rt_mask(rt);
/*
* Just being extra cautious to avoid some custom
* code getting into trouble.
*/
if (mask == NULL) {
RTFREE_LOCKED(rt);
return (EINVAL);
}
sa = (const char *)rt_key(rt);
addr = (const char *)l3addr;
len = ((const struct sockaddr_in *)l3addr)->sin_len;
lim = addr + len;
for ( ; addr < lim; sa++, mask++, addr++) {
if ((*sa ^ *addr) & *mask) {
#ifdef DIAGNOSTIC
log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
#endif
RTFREE_LOCKED(rt);
return (EINVAL);
}
}
}
RTFREE_LOCKED(rt);
return (0);
}
/*
* Return NULL if not found or marked for deletion.
* If found return lle read locked.
*/
static struct llentry *
in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
{
const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
struct llentries *lleh;
u_int hashkey;
IF_AFDATA_LOCK_ASSERT(ifp);
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
hashkey = sin->sin_addr.s_addr;
lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
LIST_FOREACH(lle, lleh, lle_next) {
struct sockaddr_in *sa2 = satosin(L3_ADDR(lle));
if (lle->la_flags & LLE_DELETED)
continue;
if (sa2->sin_addr.s_addr == sin->sin_addr.s_addr)
break;
}
if (lle == NULL) {
#ifdef DIAGNOSTIC
if (flags & LLE_DELETE)
2012-08-01 09:00:26 +00:00
log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle);
#endif
if (!(flags & LLE_CREATE))
return (NULL);
IF_AFDATA_WLOCK_ASSERT(ifp);
/*
* A route that covers the given address must have
* been installed 1st because we are doing a resolution,
* verify this.
*/
if (!(flags & LLE_IFADDR) &&
in_lltable_rtcheck(ifp, flags, l3addr) != 0)
goto done;
lle = in_lltable_new(l3addr, flags);
if (lle == NULL) {
log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
goto done;
}
lle->la_flags = flags & ~LLE_CREATE;
if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
lle->la_flags |= (LLE_VALID | LLE_STATIC);
}
lle->lle_tbl = llt;
lle->lle_head = lleh;
lle->la_flags |= LLE_LINKED;
LIST_INSERT_HEAD(lleh, lle, lle_next);
} else if (flags & LLE_DELETE) {
if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) {
LLE_WLOCK(lle);
lle->la_flags |= LLE_DELETED;
EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED);
#ifdef DIAGNOSTIC
log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
#endif
if ((lle->la_flags &
(LLE_STATIC | LLE_IFADDR)) == LLE_STATIC)
llentry_free(lle);
else
LLE_WUNLOCK(lle);
}
lle = (void *)-1;
2012-08-01 09:00:26 +00:00
}
if (LLE_IS_VALID(lle)) {
if (flags & LLE_EXCLUSIVE)
LLE_WLOCK(lle);
else
LLE_RLOCK(lle);
}
done:
return (lle);
}
static int
in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
{
#define SIN(lle) ((struct sockaddr_in *) L3_ADDR(lle))
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
/* XXX stack use */
struct {
struct rt_msghdr rtm;
struct sockaddr_in sin;
struct sockaddr_dl sdl;
} arpc;
int error, i;
LLTABLE_LOCK_ASSERT();
error = 0;
for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
struct sockaddr_dl *sdl;
2012-08-01 09:00:26 +00:00
/* skip deleted entries */
if ((lle->la_flags & LLE_DELETED) == LLE_DELETED)
continue;
/* Skip if jailed and not a valid IP of the prison. */
if (prison_if(wr->td->td_ucred, L3_ADDR(lle)) != 0)
continue;
/*
* produce a msg made of:
* struct rt_msghdr;
* struct sockaddr_in; (IPv4)
* struct sockaddr_dl;
*/
bzero(&arpc, sizeof(arpc));
arpc.rtm.rtm_msglen = sizeof(arpc);
arpc.rtm.rtm_version = RTM_VERSION;
arpc.rtm.rtm_type = RTM_GET;
arpc.rtm.rtm_flags = RTF_UP;
arpc.rtm.rtm_addrs = RTA_DST | RTA_GATEWAY;
arpc.sin.sin_family = AF_INET;
arpc.sin.sin_len = sizeof(arpc.sin);
arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
/* publish */
if (lle->la_flags & LLE_PUB)
arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
sdl = &arpc.sdl;
sdl->sdl_family = AF_LINK;
sdl->sdl_len = sizeof(*sdl);
sdl->sdl_index = ifp->if_index;
sdl->sdl_type = ifp->if_type;
if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
sdl->sdl_alen = ifp->if_addrlen;
bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
} else {
sdl->sdl_alen = 0;
bzero(LLADDR(sdl), ifp->if_addrlen);
}
arpc.rtm.rtm_rmx.rmx_expire =
lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
2008-12-26 19:45:24 +00:00
arpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
if (lle->la_flags & LLE_STATIC)
arpc.rtm.rtm_flags |= RTF_STATIC;
arpc.rtm.rtm_index = ifp->if_index;
error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
if (error)
break;
}
}
return error;
#undef SIN
}
void *
in_domifattach(struct ifnet *ifp)
{
struct in_ifinfo *ii;
struct lltable *llt;
ii = malloc(sizeof(struct in_ifinfo), M_IFADDR, M_WAITOK|M_ZERO);
llt = lltable_init(ifp, AF_INET);
if (llt != NULL) {
llt->llt_prefix_free = in_lltable_prefix_free;
llt->llt_lookup = in_lltable_lookup;
llt->llt_dump = in_lltable_dump;
}
ii->ii_llt = llt;
ii->ii_igmp = igmp_domifattach(ifp);
return ii;
}
void
in_domifdetach(struct ifnet *ifp, void *aux)
{
struct in_ifinfo *ii = (struct in_ifinfo *)aux;
igmp_domifdetach(ifp);
lltable_free(ii->ii_llt);
free(ii, M_IFADDR);
}