freebsd-skq/sys/netinet/ip_carp.c

2255 lines
54 KiB
C
Raw Normal View History

/*
* Copyright (c) 2002 Michael Shalayeff. All rights reserved.
* Copyright (c) 2003 Ryan McBride. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_carp.h"
#include "opt_bpf.h"
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/types.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/time.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/signalvar.h>
#include <sys/filio.h>
#include <sys/sockio.h>
#include <sys/socket.h>
#include <sys/vnode.h>
#include <sys/vimage.h>
#include <machine/stdarg.h>
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/fddi.h>
#include <net/iso88025.h>
#include <net/if.h>
#include <net/if_clone.h>
2005-11-14 12:50:23 +00:00
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/route.h>
#ifdef INET
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <netinet/if_ether.h>
#include <machine/in_cksum.h>
#include <netinet/vinet.h>
#endif
#ifdef INET6
#include <netinet/icmp6.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
2005-07-25 12:36:43 +00:00
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/vinet6.h>
#endif
#include <crypto/sha1.h>
#include <netinet/ip_carp.h>
#define CARP_IFNAME "carp"
static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
SYSCTL_DECL(_net_inet_carp);
struct carp_softc {
struct ifnet *sc_ifp; /* Interface clue */
struct ifnet *sc_carpdev; /* Pointer to parent interface */
struct in_ifaddr *sc_ia; /* primary iface address */
struct ip_moptions sc_imo;
#ifdef INET6
struct in6_ifaddr *sc_ia6; /* primary iface address v6 */
struct ip6_moptions sc_im6o;
#endif /* INET6 */
TAILQ_ENTRY(carp_softc) sc_list;
enum { INIT = 0, BACKUP, MASTER } sc_state;
int sc_flags_backup;
int sc_suppress;
int sc_sendad_errors;
#define CARP_SENDAD_MAX_ERRORS 3
int sc_sendad_success;
#define CARP_SENDAD_MIN_SUCCESS 3
int sc_vhid;
int sc_advskew;
int sc_naddrs;
int sc_naddrs6;
int sc_advbase; /* seconds */
int sc_init_counter;
u_int64_t sc_counter;
/* authentication */
#define CARP_HMAC_PAD 64
unsigned char sc_key[CARP_KEY_LEN];
unsigned char sc_pad[CARP_HMAC_PAD];
SHA1_CTX sc_sha1;
struct callout sc_ad_tmo; /* advertisement timeout */
struct callout sc_md_tmo; /* master down timeout */
struct callout sc_md6_tmo; /* master down timeout */
LIST_ENTRY(carp_softc) sc_next; /* Interface clue */
};
#define SC2IFP(sc) ((sc)->sc_ifp)
int carp_suppress_preempt = 0;
int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
&carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
&carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
&carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
&carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
&carp_suppress_preempt, 0, "Preemption is suppressed");
struct carpstats carpstats;
SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
&carpstats, carpstats,
"CARP statistics (struct carpstats, netinet/ip_carp.h)");
struct carp_if {
TAILQ_HEAD(, carp_softc) vhif_vrs;
int vhif_nvrs;
struct ifnet *vhif_ifp;
struct mtx vhif_mtx;
};
/* Get carp_if from softc. Valid after carp_set_addr{,6}. */
#define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp)
/* lock per carp_if queue */
#define CARP_LOCK_INIT(cif) mtx_init(&(cif)->vhif_mtx, "carp_if", \
NULL, MTX_DEF)
#define CARP_LOCK_DESTROY(cif) mtx_destroy(&(cif)->vhif_mtx)
#define CARP_LOCK_ASSERT(cif) mtx_assert(&(cif)->vhif_mtx, MA_OWNED)
#define CARP_LOCK(cif) mtx_lock(&(cif)->vhif_mtx)
#define CARP_UNLOCK(cif) mtx_unlock(&(cif)->vhif_mtx)
#define CARP_SCLOCK(sc) mtx_lock(&SC2CIF(sc)->vhif_mtx)
#define CARP_SCUNLOCK(sc) mtx_unlock(&SC2CIF(sc)->vhif_mtx)
#define CARP_SCLOCK_ASSERT(sc) mtx_assert(&SC2CIF(sc)->vhif_mtx, MA_OWNED)
#define CARP_LOG(...) do { \
if (carp_opts[CARPCTL_LOG] > 0) \
log(LOG_INFO, __VA_ARGS__); \
} while (0)
#define CARP_DEBUG(...) do { \
if (carp_opts[CARPCTL_LOG] > 1) \
log(LOG_DEBUG, __VA_ARGS__); \
} while (0)
2005-02-26 10:33:14 +00:00
static void carp_hmac_prepare(struct carp_softc *);
static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
unsigned char *);
static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
unsigned char *);
static void carp_setroute(struct carp_softc *, int);
static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
static int carp_clone_create(struct if_clone *, int, caddr_t);
2005-02-26 10:33:14 +00:00
static void carp_clone_destroy(struct ifnet *);
static void carpdetach(struct carp_softc *, int);
2005-02-26 10:33:14 +00:00
static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
struct carp_header *);
static void carp_send_ad_all(void);
static void carp_send_ad(void *);
static void carp_send_ad_locked(struct carp_softc *);
2005-02-26 10:33:14 +00:00
static void carp_send_arp(struct carp_softc *);
static void carp_master_down(void *);
static void carp_master_down_locked(struct carp_softc *);
2005-02-26 10:33:14 +00:00
static int carp_ioctl(struct ifnet *, u_long, caddr_t);
static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
static void carp_start(struct ifnet *);
static void carp_setrun(struct carp_softc *, sa_family_t);
static void carp_set_state(struct carp_softc *, int);
static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
static void carp_multicast_cleanup(struct carp_softc *);
2005-02-26 10:33:14 +00:00
static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
static int carp_del_addr(struct carp_softc *, struct sockaddr_in *);
static void carp_carpdev_state_locked(struct carp_if *);
static void carp_sc_state_locked(struct carp_softc *);
#ifdef INET6
2005-02-26 10:33:14 +00:00
static void carp_send_na(struct carp_softc *);
static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
static void carp_multicast6_cleanup(struct carp_softc *);
#endif
static LIST_HEAD(, carp_softc) carpif_list;
static struct mtx carp_mtx;
IFC_SIMPLE_DECLARE(carp, 0);
static eventhandler_tag if_detach_event_tag;
static __inline u_int16_t
carp_cksum(struct mbuf *m, int len)
{
return (in_cksum(m, len));
}
2005-02-26 10:33:14 +00:00
static void
carp_hmac_prepare(struct carp_softc *sc)
{
u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
u_int8_t vhid = sc->sc_vhid & 0xff;
struct ifaddr *ifa;
int i, found;
#ifdef INET
struct in_addr last, cur, in;
#endif
#ifdef INET6
struct in6_addr last6, cur6, in6;
#endif
if (sc->sc_carpdev)
CARP_SCLOCK(sc);
/* XXX: possible race here */
/* compute ipad from key */
bzero(sc->sc_pad, sizeof(sc->sc_pad));
bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
for (i = 0; i < sizeof(sc->sc_pad); i++)
sc->sc_pad[i] ^= 0x36;
/* precompute first part of inner hash */
SHA1Init(&sc->sc_sha1);
SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
#ifdef INET
cur.s_addr = 0;
do {
found = 0;
last = cur;
cur.s_addr = 0xffffffff;
TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
if (ifa->ifa_addr->sa_family == AF_INET &&
ntohl(in.s_addr) > ntohl(last.s_addr) &&
ntohl(in.s_addr) < ntohl(cur.s_addr)) {
cur.s_addr = in.s_addr;
found++;
}
}
if (found)
SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur));
} while (found);
#endif /* INET */
#ifdef INET6
memset(&cur6, 0, sizeof(cur6));
do {
found = 0;
last6 = cur6;
memset(&cur6, 0xff, sizeof(cur6));
TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
if (IN6_IS_SCOPE_EMBED(&in6))
in6.s6_addr16[1] = 0;
if (ifa->ifa_addr->sa_family == AF_INET6 &&
memcmp(&in6, &last6, sizeof(in6)) > 0 &&
memcmp(&in6, &cur6, sizeof(in6)) < 0) {
cur6 = in6;
found++;
}
}
if (found)
SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6));
} while (found);
#endif /* INET6 */
/* convert ipad to opad */
for (i = 0; i < sizeof(sc->sc_pad); i++)
sc->sc_pad[i] ^= 0x36 ^ 0x5c;
if (sc->sc_carpdev)
CARP_SCUNLOCK(sc);
}
2005-02-26 10:33:14 +00:00
static void
carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
unsigned char md[20])
{
SHA1_CTX sha1ctx;
/* fetch first half of inner hash */
bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
SHA1Final(md, &sha1ctx);
/* outer hash */
SHA1Init(&sha1ctx);
SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
SHA1Update(&sha1ctx, md, 20);
SHA1Final(md, &sha1ctx);
}
2005-02-26 10:33:14 +00:00
static int
carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
unsigned char md[20])
{
unsigned char md2[20];
CARP_SCLOCK_ASSERT(sc);
carp_hmac_generate(sc, counter, md2);
return (bcmp(md, md2, sizeof(md2)));
}
2005-02-26 10:33:14 +00:00
static void
carp_setroute(struct carp_softc *sc, int cmd)
{
struct ifaddr *ifa;
int s;
if (sc->sc_carpdev)
CARP_SCLOCK_ASSERT(sc);
s = splnet();
TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family == AF_INET &&
sc->sc_carpdev != NULL) {
int count = carp_addrcount(
(struct carp_if *)sc->sc_carpdev->if_carp,
ifatoia(ifa), CARP_COUNT_MASTER);
if ((cmd == RTM_ADD && count == 1) ||
(cmd == RTM_DELETE && count == 0))
rtinit(ifa, cmd, RTF_UP | RTF_HOST);
}
}
splx(s);
}
2005-02-26 10:33:14 +00:00
static int
carp_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
struct carp_softc *sc;
struct ifnet *ifp;
sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
ifp = SC2IFP(sc) = if_alloc(IFT_ETHER);
if (ifp == NULL) {
free(sc, M_CARP);
return (ENOSPC);
}
sc->sc_flags_backup = 0;
sc->sc_suppress = 0;
sc->sc_advbase = CARP_DFLTINTV;
sc->sc_vhid = -1; /* required setting */
sc->sc_advskew = 0;
sc->sc_init_counter = 1;
sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
#ifdef INET6
sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
#endif
sc->sc_imo.imo_membership = (struct in_multi **)malloc(
(sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP,
M_WAITOK);
Import rewrite of IPv4 socket multicast layer to support source-specific and protocol-independent host mode multicast. The code is written to accomodate IPv6, IGMPv3 and MLDv2 with only a little additional work. This change only pertains to FreeBSD's use as a multicast end-station and does not concern multicast routing; for an IGMPv3/MLDv2 router implementation, consider the XORP project. The work is based on Wilbert de Graaf's IGMPv3 code drop for FreeBSD 4.6, which is available at: http://www.kloosterhof.com/wilbert/igmpv3.html Summary * IPv4 multicast socket processing is now moved out of ip_output.c into a new module, in_mcast.c. * The in_mcast.c module implements the IPv4 legacy any-source API in terms of the protocol-independent source-specific API. * Source filters are lazy allocated as the common case does not use them. They are part of per inpcb state and are covered by the inpcb lock. * struct ip_mreqn is now supported to allow applications to specify multicast joins by interface index in the legacy IPv4 any-source API. * In UDP, an incoming multicast datagram only requires that the source port matches the 4-tuple if the socket was already bound by source port. An unbound socket SHOULD be able to receive multicasts sent from an ephemeral source port. * The UDP socket multicast filter mode defaults to exclusive, that is, sources present in the per-socket list will be blocked from delivery. * The RFC 3678 userland functions have been added to libc: setsourcefilter, getsourcefilter, setipv4sourcefilter, getipv4sourcefilter. * Definitions for IGMPv3 are merged but not yet used. * struct sockaddr_storage is now referenced from <netinet/in.h>. It is therefore defined there if not already declared in the same way as for the C99 types. * The RFC 1724 hack (specify 0.0.0.0/8 addresses to IP_MULTICAST_IF which are then interpreted as interface indexes) is now deprecated. * A patch for the Rhyolite.com routed in the FreeBSD base system is available in the -net archives. This only affects individuals running RIPv1 or RIPv2 via point-to-point and/or unnumbered interfaces. * Make IPv6 detach path similar to IPv4's in code flow; functionally same. * Bump __FreeBSD_version to 700048; see UPDATING. This work was financially supported by another FreeBSD committer. Obtained from: p4://bms_netdev Submitted by: Wilbert de Graaf (original work) Reviewed by: rwatson (locking), silence from fenner, net@ (but with encouragement)
2007-06-12 16:24:56 +00:00
sc->sc_imo.imo_mfilters = NULL;
sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
sc->sc_imo.imo_multicast_vif = -1;
callout_init(&sc->sc_ad_tmo, CALLOUT_MPSAFE);
callout_init(&sc->sc_md_tmo, CALLOUT_MPSAFE);
callout_init(&sc->sc_md6_tmo, CALLOUT_MPSAFE);
ifp->if_softc = sc;
if_initname(ifp, CARP_IFNAME, unit);
ifp->if_mtu = ETHERMTU;
ifp->if_flags = IFF_LOOPBACK;
ifp->if_ioctl = carp_ioctl;
ifp->if_output = carp_looutput;
ifp->if_start = carp_start;
ifp->if_type = IFT_CARP;
ifp->if_snd.ifq_maxlen = ifqmaxlen;
ifp->if_hdrlen = 0;
if_attach(ifp);
bpfattach(SC2IFP(sc), DLT_NULL, sizeof(u_int32_t));
mtx_lock(&carp_mtx);
LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
mtx_unlock(&carp_mtx);
return (0);
}
2005-02-26 10:33:14 +00:00
static void
carp_clone_destroy(struct ifnet *ifp)
{
struct carp_softc *sc = ifp->if_softc;
if (sc->sc_carpdev)
CARP_SCLOCK(sc);
carpdetach(sc, 1); /* Returns unlocked. */
mtx_lock(&carp_mtx);
LIST_REMOVE(sc, sc_next);
mtx_unlock(&carp_mtx);
bpfdetach(ifp);
if_detach(ifp);
if_free_type(ifp, IFT_ETHER);
free(sc->sc_imo.imo_membership, M_CARP);
free(sc, M_CARP);
}
/*
* This function can be called on CARP interface destroy path,
* and in case of the removal of the underlying interface as
* well. We differentiate these two cases. In the latter case
* we do not cleanup our multicast memberships, since they
* are already freed. Also, in the latter case we do not
* release the lock on return, because the function will be
* called once more, for another CARP instance on the same
* interface.
*/
static void
carpdetach(struct carp_softc *sc, int unlock)
{
struct carp_if *cif;
callout_stop(&sc->sc_ad_tmo);
callout_stop(&sc->sc_md_tmo);
callout_stop(&sc->sc_md6_tmo);
if (sc->sc_suppress)
carp_suppress_preempt--;
sc->sc_suppress = 0;
if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
carp_suppress_preempt--;
sc->sc_sendad_errors = 0;
carp_set_state(sc, INIT);
SC2IFP(sc)->if_flags &= ~IFF_UP;
carp_setrun(sc, 0);
if (unlock)
carp_multicast_cleanup(sc);
#ifdef INET6
carp_multicast6_cleanup(sc);
#endif
if (sc->sc_carpdev != NULL) {
cif = (struct carp_if *)sc->sc_carpdev->if_carp;
CARP_LOCK_ASSERT(cif);
TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
if (!--cif->vhif_nvrs) {
ifpromisc(sc->sc_carpdev, 0);
sc->sc_carpdev->if_carp = NULL;
CARP_LOCK_DESTROY(cif);
free(cif, M_IFADDR);
} else if (unlock)
CARP_UNLOCK(cif);
sc->sc_carpdev = NULL;
}
}
/* Detach an interface from the carp. */
static void
carp_ifdetach(void *arg __unused, struct ifnet *ifp)
{
struct carp_if *cif = (struct carp_if *)ifp->if_carp;
struct carp_softc *sc, *nextsc;
2007-01-25 17:58:16 +00:00
if (cif == NULL)
return;
/*
* XXX: At the end of for() cycle the lock will be destroyed.
*/
CARP_LOCK(cif);
for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
nextsc = TAILQ_NEXT(sc, sc_list);
carpdetach(sc, 0);
}
}
/*
* process input packet.
* we have rearranged checks order compared to the rfc,
* but it seems more efficient this way or not possible otherwise.
*/
void
carp_input(struct mbuf *m, int hlen)
{
struct ip *ip = mtod(m, struct ip *);
struct carp_header *ch;
int iplen, len;
carpstats.carps_ipackets++;
if (!carp_opts[CARPCTL_ALLOW]) {
m_freem(m);
return;
}
/* check if received on a valid carp interface */
if (m->m_pkthdr.rcvif->if_carp == NULL) {
carpstats.carps_badif++;
2005-02-25 11:26:39 +00:00
CARP_LOG("carp_input: packet received on non-carp "
"interface: %s\n",
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return;
}
/* verify that the IP TTL is 255. */
if (ip->ip_ttl != CARP_DFLTTL) {
carpstats.carps_badttl++;
2005-02-25 11:26:39 +00:00
CARP_LOG("carp_input: received ttl %d != 255i on %s\n",
ip->ip_ttl,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return;
}
iplen = ip->ip_hl << 2;
if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
carpstats.carps_badlen++;
CARP_LOG("carp_input: received len %zd < "
2005-02-25 11:26:39 +00:00
"sizeof(struct carp_header)\n",
m->m_len - sizeof(struct ip));
m_freem(m);
return;
}
if (iplen + sizeof(*ch) < m->m_len) {
if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
carpstats.carps_hdrops++;
2005-02-25 11:26:39 +00:00
CARP_LOG("carp_input: pullup failed\n");
return;
}
ip = mtod(m, struct ip *);
}
ch = (struct carp_header *)((char *)ip + iplen);
/*
* verify that the received packet length is
* equal to the CARP header
*/
len = iplen + sizeof(*ch);
if (len > m->m_pkthdr.len) {
carpstats.carps_badlen++;
2005-02-25 11:26:39 +00:00
CARP_LOG("carp_input: packet too short %d on %s\n",
m->m_pkthdr.len,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return;
}
if ((m = m_pullup(m, len)) == NULL) {
carpstats.carps_hdrops++;
return;
}
ip = mtod(m, struct ip *);
ch = (struct carp_header *)((char *)ip + iplen);
/* verify the CARP checksum */
m->m_data += iplen;
if (carp_cksum(m, len - iplen)) {
carpstats.carps_badsum++;
2005-02-25 11:26:39 +00:00
CARP_LOG("carp_input: checksum failed on %s\n",
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return;
}
m->m_data -= iplen;
carp_input_c(m, ch, AF_INET);
}
#ifdef INET6
int
carp6_input(struct mbuf **mp, int *offp, int proto)
{
struct mbuf *m = *mp;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct carp_header *ch;
u_int len;
carpstats.carps_ipackets6++;
if (!carp_opts[CARPCTL_ALLOW]) {
m_freem(m);
return (IPPROTO_DONE);
}
/* check if received on a valid carp interface */
if (m->m_pkthdr.rcvif->if_carp == NULL) {
carpstats.carps_badif++;
CARP_LOG("carp6_input: packet received on non-carp "
2005-02-25 11:26:39 +00:00
"interface: %s\n",
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
}
/* verify that the IP TTL is 255 */
if (ip6->ip6_hlim != CARP_DFLTTL) {
carpstats.carps_badttl++;
2005-02-25 11:26:39 +00:00
CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
ip6->ip6_hlim,
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
}
/* verify that we have a complete carp packet */
len = m->m_len;
IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
if (ch == NULL) {
carpstats.carps_badlen++;
CARP_LOG("carp6_input: packet size %u too small\n", len);
return (IPPROTO_DONE);
}
/* verify the CARP checksum */
m->m_data += *offp;
if (carp_cksum(m, sizeof(*ch))) {
carpstats.carps_badsum++;
2005-02-25 11:26:39 +00:00
CARP_LOG("carp6_input: checksum failed, on %s\n",
m->m_pkthdr.rcvif->if_xname);
m_freem(m);
return (IPPROTO_DONE);
}
m->m_data -= *offp;
carp_input_c(m, ch, AF_INET6);
return (IPPROTO_DONE);
}
#endif /* INET6 */
2005-02-26 10:33:14 +00:00
static void
carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
{
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct carp_softc *sc;
u_int64_t tmp_counter;
struct timeval sc_tv, ch_tv;
/* verify that the VHID is valid on the receiving interface */
CARP_LOCK(ifp->if_carp);
TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
if (sc->sc_vhid == ch->carp_vhid)
break;
if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) &&
(SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
carpstats.carps_badvhid++;
CARP_UNLOCK(ifp->if_carp);
m_freem(m);
return;
}
getmicrotime(&SC2IFP(sc)->if_lastchange);
SC2IFP(sc)->if_ipackets++;
SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
Fix the following bpf(4) race condition which can result in a panic: (1) bpf peer attaches to interface netif0 (2) Packet is received by netif0 (3) ifp->if_bpf pointer is checked and handed off to bpf (4) bpf peer detaches from netif0 resulting in ifp->if_bpf being initialized to NULL. (5) ifp->if_bpf is dereferenced by bpf machinery (6) Kaboom This race condition likely explains the various different kernel panics reported around sending SIGINT to tcpdump or dhclient processes. But really this race can result in kernel panics anywhere you have frequent bpf attach and detach operations with high packet per second load. Summary of changes: - Remove the bpf interface's "driverp" member - When we attach bpf interfaces, we now set the ifp->if_bpf member to the bpf interface structure. Once this is done, ifp->if_bpf should never be NULL. [1] - Introduce bpf_peers_present function, an inline operation which will do a lockless read bpf peer list associated with the interface. It should be noted that the bpf code will pickup the bpf_interface lock before adding or removing bpf peers. This should serialize the access to the bpf descriptor list, removing the race. - Expose the bpf_if structure in bpf.h so that the bpf_peers_present function can use it. This also removes the struct bpf_if; hack that was there. - Adjust all consumers of the raw if_bpf structure to use bpf_peers_present Now what happens is: (1) Packet is received by netif0 (2) Check to see if bpf descriptor list is empty (3) Pickup the bpf interface lock (4) Hand packet off to process From the attach/detach side: (1) Pickup the bpf interface lock (2) Add/remove from bpf descriptor list Now that we are storing the bpf interface structure with the ifnet, there is is no need to walk the bpf interface list to locate the correct bpf interface. We now simply look up the interface, and initialize the pointer. This has a nice side effect of changing a bpf interface attach operation from O(N) (where N is the number of bpf interfaces), to O(1). [1] From now on, we can no longer check ifp->if_bpf to tell us whether or not we have any bpf peers that might be interested in receiving packets. In collaboration with: sam@ MFC after: 1 month
2006-06-02 19:59:33 +00:00
if (bpf_peers_present(SC2IFP(sc)->if_bpf)) {
struct ip *ip = mtod(m, struct ip *);
uint32_t af1 = af;
/* BPF wants net byte order */
ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
ip->ip_off = htons(ip->ip_off);
bpf_mtap2(SC2IFP(sc)->if_bpf, &af1, sizeof(af1), m);
}
/* verify the CARP version. */
if (ch->carp_version != CARP_VERSION) {
carpstats.carps_badver++;
SC2IFP(sc)->if_ierrors++;
CARP_UNLOCK(ifp->if_carp);
2005-02-25 11:26:39 +00:00
CARP_LOG("%s; invalid version %d\n",
SC2IFP(sc)->if_xname,
ch->carp_version);
m_freem(m);
return;
}
/* verify the hash */
if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
carpstats.carps_badauth++;
SC2IFP(sc)->if_ierrors++;
CARP_UNLOCK(ifp->if_carp);
CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
m_freem(m);
return;
}
tmp_counter = ntohl(ch->carp_counter[0]);
tmp_counter = tmp_counter<<32;
tmp_counter += ntohl(ch->carp_counter[1]);
/* XXX Replay protection goes here */
sc->sc_init_counter = 0;
sc->sc_counter = tmp_counter;
sc_tv.tv_sec = sc->sc_advbase;
if (carp_suppress_preempt && sc->sc_advskew < 240)
sc_tv.tv_usec = 240 * 1000000 / 256;
else
sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
ch_tv.tv_sec = ch->carp_advbase;
ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
switch (sc->sc_state) {
case INIT:
break;
case MASTER:
/*
* If we receive an advertisement from a master who's going to
* be more frequent than us, go into BACKUP state.
*/
if (timevalcmp(&sc_tv, &ch_tv, >) ||
timevalcmp(&sc_tv, &ch_tv, ==)) {
callout_stop(&sc->sc_ad_tmo);
CARP_DEBUG("%s: MASTER -> BACKUP "
2005-02-25 11:26:39 +00:00
"(more frequent advertisement received)\n",
SC2IFP(sc)->if_xname);
carp_set_state(sc, BACKUP);
carp_setrun(sc, 0);
carp_setroute(sc, RTM_DELETE);
}
break;
case BACKUP:
/*
* If we're pre-empting masters who advertise slower than us,
* and this one claims to be slower, treat him as down.
*/
if (carp_opts[CARPCTL_PREEMPT] &&
timevalcmp(&sc_tv, &ch_tv, <)) {
CARP_DEBUG("%s: BACKUP -> MASTER "
2005-02-25 11:26:39 +00:00
"(preempting a slower master)\n",
SC2IFP(sc)->if_xname);
carp_master_down_locked(sc);
break;
}
/*
* If the master is going to advertise at such a low frequency
* that he's guaranteed to time out, we'd might as well just
* treat him as timed out now.
*/
sc_tv.tv_sec = sc->sc_advbase * 3;
if (timevalcmp(&sc_tv, &ch_tv, <)) {
CARP_DEBUG("%s: BACKUP -> MASTER "
2005-02-25 11:26:39 +00:00
"(master timed out)\n",
SC2IFP(sc)->if_xname);
carp_master_down_locked(sc);
break;
}
/*
* Otherwise, we reset the counter and wait for the next
* advertisement.
*/
carp_setrun(sc, af);
break;
}
CARP_UNLOCK(ifp->if_carp);
m_freem(m);
return;
}
2005-02-26 10:33:14 +00:00
static int
carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
{
struct m_tag *mtag;
struct ifnet *ifp = SC2IFP(sc);
if (sc->sc_init_counter) {
/* this could also be seconds since unix epoch */
sc->sc_counter = arc4random();
sc->sc_counter = sc->sc_counter << 32;
sc->sc_counter += arc4random();
} else
sc->sc_counter++;
ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
/* Tag packet for carp_output */
mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
if (mtag == NULL) {
m_freem(m);
SC2IFP(sc)->if_oerrors++;
return (ENOMEM);
}
bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
m_tag_prepend(m, mtag);
return (0);
}
2005-02-26 10:33:14 +00:00
static void
carp_send_ad_all(void)
{
struct carp_softc *sc;
mtx_lock(&carp_mtx);
LIST_FOREACH(sc, &carpif_list, sc_next) {
if (sc->sc_carpdev == NULL)
continue;
CARP_SCLOCK(sc);
if ((SC2IFP(sc)->if_flags & IFF_UP) &&
(SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING) &&
sc->sc_state == MASTER)
carp_send_ad_locked(sc);
CARP_SCUNLOCK(sc);
}
mtx_unlock(&carp_mtx);
}
2005-02-26 10:33:14 +00:00
static void
carp_send_ad(void *v)
{
struct carp_softc *sc = v;
CARP_SCLOCK(sc);
carp_send_ad_locked(sc);
CARP_SCUNLOCK(sc);
}
static void
carp_send_ad_locked(struct carp_softc *sc)
{
struct carp_header ch;
struct timeval tv;
struct carp_header *ch_ptr;
struct mbuf *m;
int len, advbase, advskew;
CARP_SCLOCK_ASSERT(sc);
/* bow out if we've lost our UPness or RUNNINGuiness */
if (!((SC2IFP(sc)->if_flags & IFF_UP) &&
(SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING))) {
advbase = 255;
advskew = 255;
} else {
advbase = sc->sc_advbase;
if (!carp_suppress_preempt || sc->sc_advskew > 240)
advskew = sc->sc_advskew;
else
advskew = 240;
tv.tv_sec = advbase;
tv.tv_usec = advskew * 1000000 / 256;
}
ch.carp_version = CARP_VERSION;
ch.carp_type = CARP_ADVERTISEMENT;
ch.carp_vhid = sc->sc_vhid;
ch.carp_advbase = advbase;
ch.carp_advskew = advskew;
ch.carp_authlen = 7; /* XXX DEFINE */
ch.carp_pad1 = 0; /* must be zero */
ch.carp_cksum = 0;
#ifdef INET
INIT_VNET_INET(curvnet);
if (sc->sc_ia) {
struct ip *ip;
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m == NULL) {
SC2IFP(sc)->if_oerrors++;
carpstats.carps_onomem++;
/* XXX maybe less ? */
if (advbase != 255 || advskew != 255)
callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
carp_send_ad, sc);
return;
}
len = sizeof(*ip) + sizeof(ch);
m->m_pkthdr.len = len;
m->m_pkthdr.rcvif = NULL;
m->m_len = len;
MH_ALIGN(m, m->m_len);
m->m_flags |= M_MCAST;
ip = mtod(m, struct ip *);
ip->ip_v = IPVERSION;
ip->ip_hl = sizeof(*ip) >> 2;
ip->ip_tos = IPTOS_LOWDELAY;
ip->ip_len = len;
ip->ip_id = ip_newid();
ip->ip_off = IP_DF;
ip->ip_ttl = CARP_DFLTTL;
ip->ip_p = IPPROTO_CARP;
ip->ip_sum = 0;
ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
ch_ptr = (struct carp_header *)(&ip[1]);
bcopy(&ch, ch_ptr, sizeof(ch));
if (carp_prepare_ad(m, sc, ch_ptr))
return;
m->m_data += sizeof(*ip);
ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
m->m_data -= sizeof(*ip);
getmicrotime(&SC2IFP(sc)->if_lastchange);
SC2IFP(sc)->if_opackets++;
SC2IFP(sc)->if_obytes += len;
carpstats.carps_opackets++;
if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
SC2IFP(sc)->if_oerrors++;
if (sc->sc_sendad_errors < INT_MAX)
sc->sc_sendad_errors++;
if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
carp_suppress_preempt++;
if (carp_suppress_preempt == 1) {
CARP_SCUNLOCK(sc);
carp_send_ad_all();
CARP_SCLOCK(sc);
}
}
sc->sc_sendad_success = 0;
} else {
if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
if (++sc->sc_sendad_success >=
CARP_SENDAD_MIN_SUCCESS) {
carp_suppress_preempt--;
sc->sc_sendad_errors = 0;
}
} else
sc->sc_sendad_errors = 0;
}
}
#endif /* INET */
#ifdef INET6
if (sc->sc_ia6) {
struct ip6_hdr *ip6;
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m == NULL) {
SC2IFP(sc)->if_oerrors++;
carpstats.carps_onomem++;
/* XXX maybe less ? */
if (advbase != 255 || advskew != 255)
callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
carp_send_ad, sc);
return;
}
len = sizeof(*ip6) + sizeof(ch);
m->m_pkthdr.len = len;
m->m_pkthdr.rcvif = NULL;
m->m_len = len;
MH_ALIGN(m, m->m_len);
m->m_flags |= M_MCAST;
ip6 = mtod(m, struct ip6_hdr *);
bzero(ip6, sizeof(*ip6));
ip6->ip6_vfc |= IPV6_VERSION;
ip6->ip6_hlim = CARP_DFLTTL;
ip6->ip6_nxt = IPPROTO_CARP;
bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
sizeof(struct in6_addr));
/* set the multicast destination */
ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
ip6->ip6_dst.s6_addr8[15] = 0x12;
if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
SC2IFP(sc)->if_oerrors++;
m_freem(m);
CARP_LOG("%s: in6_setscope failed\n", __func__);
return;
}
ch_ptr = (struct carp_header *)(&ip6[1]);
bcopy(&ch, ch_ptr, sizeof(ch));
if (carp_prepare_ad(m, sc, ch_ptr))
return;
m->m_data += sizeof(*ip6);
ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
m->m_data -= sizeof(*ip6);
getmicrotime(&SC2IFP(sc)->if_lastchange);
SC2IFP(sc)->if_opackets++;
SC2IFP(sc)->if_obytes += len;
carpstats.carps_opackets6++;
if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
SC2IFP(sc)->if_oerrors++;
if (sc->sc_sendad_errors < INT_MAX)
sc->sc_sendad_errors++;
if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
carp_suppress_preempt++;
if (carp_suppress_preempt == 1) {
CARP_SCUNLOCK(sc);
carp_send_ad_all();
CARP_SCLOCK(sc);
}
}
sc->sc_sendad_success = 0;
} else {
if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
if (++sc->sc_sendad_success >=
CARP_SENDAD_MIN_SUCCESS) {
carp_suppress_preempt--;
sc->sc_sendad_errors = 0;
}
} else
sc->sc_sendad_errors = 0;
}
}
#endif /* INET6 */
if (advbase != 255 || advskew != 255)
callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
carp_send_ad, sc);
}
/*
* Broadcast a gratuitous ARP request containing
* the virtual router MAC address for each IP address
* associated with the virtual router.
*/
2005-02-26 10:33:14 +00:00
static void
carp_send_arp(struct carp_softc *sc)
{
struct ifaddr *ifa;
TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET)
continue;
/* arprequest(sc->sc_carpdev, &in, &in, IF_LLADDR(sc->sc_ifp)); */
arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
DELAY(1000); /* XXX */
}
}
#ifdef INET6
2005-02-26 10:33:14 +00:00
static void
carp_send_na(struct carp_softc *sc)
{
struct ifaddr *ifa;
struct in6_addr *in6;
static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
nd6_na_output(sc->sc_carpdev, &mcast, in6,
ND_NA_FLAG_OVERRIDE, 1, NULL);
DELAY(1000); /* XXX */
}
}
#endif /* INET6 */
2005-02-26 10:33:14 +00:00
static int
carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
{
struct carp_softc *vh;
struct ifaddr *ifa;
int count = 0;
CARP_LOCK_ASSERT(cif);
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
if ((type == CARP_COUNT_RUNNING &&
(SC2IFP(vh)->if_flags & IFF_UP) &&
(SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) ||
(type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
ifa_list) {
if (ifa->ifa_addr->sa_family == AF_INET &&
ia->ia_addr.sin_addr.s_addr ==
ifatoia(ifa)->ia_addr.sin_addr.s_addr)
count++;
}
}
}
return (count);
}
int
carp_iamatch(void *v, struct in_ifaddr *ia,
struct in_addr *isaddr, u_int8_t **enaddr)
{
struct carp_if *cif = v;
struct carp_softc *vh;
int index, count = 0;
struct ifaddr *ifa;
CARP_LOCK(cif);
if (carp_opts[CARPCTL_ARPBALANCE]) {
/*
* XXX proof of concept implementation.
* We use the source ip to decide which virtual host should
* handle the request. If we're master of that virtual host,
* then we respond, otherwise, just drop the arp packet on
* the floor.
*/
count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
if (count == 0) {
/* should never reach this */
CARP_UNLOCK(cif);
return (0);
}
/* this should be a hash, like pf_hash() */
index = ntohl(isaddr->s_addr) % count;
count = 0;
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
if ((SC2IFP(vh)->if_flags & IFF_UP) &&
(SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING)) {
TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
ifa_list) {
if (ifa->ifa_addr->sa_family ==
AF_INET &&
ia->ia_addr.sin_addr.s_addr ==
ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
if (count == index) {
if (vh->sc_state ==
MASTER) {
*enaddr = IF_LLADDR(vh->sc_ifp);
CARP_UNLOCK(cif);
return (1);
} else {
CARP_UNLOCK(cif);
return (0);
}
}
count++;
}
}
}
}
} else {
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
if ((SC2IFP(vh)->if_flags & IFF_UP) &&
(SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
ia->ia_ifp == SC2IFP(vh) &&
vh->sc_state == MASTER) {
*enaddr = IF_LLADDR(vh->sc_ifp);
CARP_UNLOCK(cif);
return (1);
}
}
}
CARP_UNLOCK(cif);
return (0);
}
#ifdef INET6
struct ifaddr *
carp_iamatch6(void *v, struct in6_addr *taddr)
{
struct carp_if *cif = v;
struct carp_softc *vh;
struct ifaddr *ifa;
CARP_LOCK(cif);
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
if (IN6_ARE_ADDR_EQUAL(taddr,
&ifatoia6(ifa)->ia_addr.sin6_addr) &&
(SC2IFP(vh)->if_flags & IFF_UP) &&
(SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
vh->sc_state == MASTER) {
CARP_UNLOCK(cif);
return (ifa);
}
}
}
CARP_UNLOCK(cif);
return (NULL);
}
void *
carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
{
struct m_tag *mtag;
struct carp_if *cif = v;
struct carp_softc *sc;
struct ifaddr *ifa;
CARP_LOCK(cif);
TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
if (IN6_ARE_ADDR_EQUAL(taddr,
&ifatoia6(ifa)->ia_addr.sin6_addr) &&
(SC2IFP(sc)->if_flags & IFF_UP) &&
(SC2IFP(sc)->if_drv_flags & IFF_DRV_RUNNING)) {
struct ifnet *ifp = SC2IFP(sc);
mtag = m_tag_get(PACKET_TAG_CARP,
sizeof(struct ifnet *), M_NOWAIT);
if (mtag == NULL) {
/* better a bit than nothing */
CARP_UNLOCK(cif);
return (IF_LLADDR(sc->sc_ifp));
}
bcopy(&ifp, (caddr_t)(mtag + 1),
sizeof(struct ifnet *));
m_tag_prepend(m, mtag);
CARP_UNLOCK(cif);
return (IF_LLADDR(sc->sc_ifp));
}
}
}
CARP_UNLOCK(cif);
return (NULL);
}
#endif
struct ifnet *
carp_forus(void *v, void *dhost)
{
struct carp_if *cif = v;
struct carp_softc *vh;
u_int8_t *ena = dhost;
if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
return (NULL);
CARP_LOCK(cif);
TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
if ((SC2IFP(vh)->if_flags & IFF_UP) &&
(SC2IFP(vh)->if_drv_flags & IFF_DRV_RUNNING) &&
vh->sc_state == MASTER &&
!bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
CARP_UNLOCK(cif);
return (SC2IFP(vh));
}
CARP_UNLOCK(cif);
return (NULL);
}
2005-02-26 10:33:14 +00:00
static void
carp_master_down(void *v)
{
struct carp_softc *sc = v;
CARP_SCLOCK(sc);
carp_master_down_locked(sc);
CARP_SCUNLOCK(sc);
}
static void
carp_master_down_locked(struct carp_softc *sc)
{
if (sc->sc_carpdev)
CARP_SCLOCK_ASSERT(sc);
switch (sc->sc_state) {
case INIT:
printf("%s: master_down event in INIT state\n",
SC2IFP(sc)->if_xname);
break;
case MASTER:
break;
case BACKUP:
carp_set_state(sc, MASTER);
carp_send_ad_locked(sc);
carp_send_arp(sc);
#ifdef INET6
carp_send_na(sc);
#endif /* INET6 */
carp_setrun(sc, 0);
carp_setroute(sc, RTM_ADD);
break;
}
}
/*
* When in backup state, af indicates whether to reset the master down timer
* for v4 or v6. If it's set to zero, reset the ones which are already pending.
*/
2005-02-26 10:33:14 +00:00
static void
carp_setrun(struct carp_softc *sc, sa_family_t af)
{
struct timeval tv;
if (sc->sc_carpdev == NULL) {
SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
carp_set_state(sc, INIT);
return;
} else
CARP_SCLOCK_ASSERT(sc);
if (SC2IFP(sc)->if_flags & IFF_UP &&
sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6))
SC2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
else {
SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
carp_setroute(sc, RTM_DELETE);
return;
}
switch (sc->sc_state) {
case INIT:
if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
carp_send_ad_locked(sc);
carp_send_arp(sc);
#ifdef INET6
carp_send_na(sc);
#endif /* INET6 */
2005-02-25 11:26:39 +00:00
CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
SC2IFP(sc)->if_xname);
carp_set_state(sc, MASTER);
carp_setroute(sc, RTM_ADD);
} else {
CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
carp_set_state(sc, BACKUP);
carp_setroute(sc, RTM_DELETE);
carp_setrun(sc, 0);
}
break;
case BACKUP:
callout_stop(&sc->sc_ad_tmo);
tv.tv_sec = 3 * sc->sc_advbase;
tv.tv_usec = sc->sc_advskew * 1000000 / 256;
switch (af) {
#ifdef INET
case AF_INET:
callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
carp_master_down, sc);
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
carp_master_down, sc);
break;
#endif /* INET6 */
default:
if (sc->sc_naddrs)
callout_reset(&sc->sc_md_tmo, tvtohz(&tv),
carp_master_down, sc);
if (sc->sc_naddrs6)
callout_reset(&sc->sc_md6_tmo, tvtohz(&tv),
carp_master_down, sc);
break;
}
break;
case MASTER:
tv.tv_sec = sc->sc_advbase;
tv.tv_usec = sc->sc_advskew * 1000000 / 256;
callout_reset(&sc->sc_ad_tmo, tvtohz(&tv),
carp_send_ad, sc);
break;
}
}
static void
carp_multicast_cleanup(struct carp_softc *sc)
{
struct ip_moptions *imo = &sc->sc_imo;
u_int16_t n = imo->imo_num_memberships;
2007-01-25 17:58:16 +00:00
/* Clean up our own multicast memberships */
while (n-- > 0) {
if (imo->imo_membership[n] != NULL) {
in_delmulti(imo->imo_membership[n]);
imo->imo_membership[n] = NULL;
}
}
Import rewrite of IPv4 socket multicast layer to support source-specific and protocol-independent host mode multicast. The code is written to accomodate IPv6, IGMPv3 and MLDv2 with only a little additional work. This change only pertains to FreeBSD's use as a multicast end-station and does not concern multicast routing; for an IGMPv3/MLDv2 router implementation, consider the XORP project. The work is based on Wilbert de Graaf's IGMPv3 code drop for FreeBSD 4.6, which is available at: http://www.kloosterhof.com/wilbert/igmpv3.html Summary * IPv4 multicast socket processing is now moved out of ip_output.c into a new module, in_mcast.c. * The in_mcast.c module implements the IPv4 legacy any-source API in terms of the protocol-independent source-specific API. * Source filters are lazy allocated as the common case does not use them. They are part of per inpcb state and are covered by the inpcb lock. * struct ip_mreqn is now supported to allow applications to specify multicast joins by interface index in the legacy IPv4 any-source API. * In UDP, an incoming multicast datagram only requires that the source port matches the 4-tuple if the socket was already bound by source port. An unbound socket SHOULD be able to receive multicasts sent from an ephemeral source port. * The UDP socket multicast filter mode defaults to exclusive, that is, sources present in the per-socket list will be blocked from delivery. * The RFC 3678 userland functions have been added to libc: setsourcefilter, getsourcefilter, setipv4sourcefilter, getipv4sourcefilter. * Definitions for IGMPv3 are merged but not yet used. * struct sockaddr_storage is now referenced from <netinet/in.h>. It is therefore defined there if not already declared in the same way as for the C99 types. * The RFC 1724 hack (specify 0.0.0.0/8 addresses to IP_MULTICAST_IF which are then interpreted as interface indexes) is now deprecated. * A patch for the Rhyolite.com routed in the FreeBSD base system is available in the -net archives. This only affects individuals running RIPv1 or RIPv2 via point-to-point and/or unnumbered interfaces. * Make IPv6 detach path similar to IPv4's in code flow; functionally same. * Bump __FreeBSD_version to 700048; see UPDATING. This work was financially supported by another FreeBSD committer. Obtained from: p4://bms_netdev Submitted by: Wilbert de Graaf (original work) Reviewed by: rwatson (locking), silence from fenner, net@ (but with encouragement)
2007-06-12 16:24:56 +00:00
KASSERT(imo->imo_mfilters == NULL,
("%s: imo_mfilters != NULL", __func__));
imo->imo_num_memberships = 0;
imo->imo_multicast_ifp = NULL;
}
#ifdef INET6
static void
carp_multicast6_cleanup(struct carp_softc *sc)
{
struct ip6_moptions *im6o = &sc->sc_im6o;
while (!LIST_EMPTY(&im6o->im6o_memberships)) {
struct in6_multi_mship *imm =
LIST_FIRST(&im6o->im6o_memberships);
2007-01-25 17:58:16 +00:00
LIST_REMOVE(imm, i6mm_chain);
in6_leavegroup(imm);
}
im6o->im6o_multicast_ifp = NULL;
}
#endif
2005-02-26 10:33:14 +00:00
static int
carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
{
INIT_VNET_INET(curvnet);
struct ifnet *ifp;
struct carp_if *cif;
struct in_ifaddr *ia, *ia_if;
struct ip_moptions *imo = &sc->sc_imo;
struct in_addr addr;
u_long iaddr = htonl(sin->sin_addr.s_addr);
int own, error;
if (sin->sin_addr.s_addr == 0) {
if (!(SC2IFP(sc)->if_flags & IFF_UP))
carp_set_state(sc, INIT);
if (sc->sc_naddrs)
SC2IFP(sc)->if_flags |= IFF_UP;
if (sc->sc_carpdev)
CARP_SCLOCK(sc);
carp_setrun(sc, 0);
if (sc->sc_carpdev)
CARP_SCUNLOCK(sc);
return (0);
}
/* we have to do it by hands to check we won't match on us */
ia_if = NULL; own = 0;
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
/* and, yeah, we need a multicast-capable iface too */
if (ia->ia_ifp != SC2IFP(sc) &&
(ia->ia_ifp->if_flags & IFF_MULTICAST) &&
(iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
if (!ia_if)
ia_if = ia;
if (sin->sin_addr.s_addr ==
ia->ia_addr.sin_addr.s_addr)
own++;
}
}
if (!ia_if)
return (EADDRNOTAVAIL);
ia = ia_if;
ifp = ia->ia_ifp;
if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
(imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp))
return (EADDRNOTAVAIL);
if (imo->imo_num_memberships == 0) {
addr.s_addr = htonl(INADDR_CARP_GROUP);
if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL)
return (ENOBUFS);
imo->imo_num_memberships++;
imo->imo_multicast_ifp = ifp;
imo->imo_multicast_ttl = CARP_DFLTTL;
imo->imo_multicast_loop = 0;
}
if (!ifp->if_carp) {
cif = malloc(sizeof(*cif), M_CARP,
M_WAITOK|M_ZERO);
if (!cif) {
error = ENOBUFS;
goto cleanup;
}
if ((error = ifpromisc(ifp, 1))) {
free(cif, M_CARP);
goto cleanup;
}
CARP_LOCK_INIT(cif);
CARP_LOCK(cif);
cif->vhif_ifp = ifp;
TAILQ_INIT(&cif->vhif_vrs);
ifp->if_carp = cif;
} else {
struct carp_softc *vr;
cif = (struct carp_if *)ifp->if_carp;
CARP_LOCK(cif);
TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
CARP_UNLOCK(cif);
error = EEXIST;
goto cleanup;
}
}
sc->sc_ia = ia;
sc->sc_carpdev = ifp;
{ /* XXX prevent endless loop if already in queue */
struct carp_softc *vr, *after = NULL;
int myself = 0;
cif = (struct carp_if *)ifp->if_carp;
/* XXX: cif should not change, right? So we still hold the lock */
CARP_LOCK_ASSERT(cif);
TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
if (vr == sc)
myself = 1;
if (vr->sc_vhid < sc->sc_vhid)
after = vr;
}
if (!myself) {
/* We're trying to keep things in order */
if (after == NULL) {
TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
} else {
TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
}
cif->vhif_nvrs++;
}
}
sc->sc_naddrs++;
SC2IFP(sc)->if_flags |= IFF_UP;
if (own)
sc->sc_advskew = 0;
carp_sc_state_locked(sc);
carp_setrun(sc, 0);
CARP_UNLOCK(cif);
return (0);
cleanup:
in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
return (error);
}
2005-02-26 10:33:14 +00:00
static int
carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
{
int error = 0;
if (!--sc->sc_naddrs) {
struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
struct ip_moptions *imo = &sc->sc_imo;
CARP_LOCK(cif);
callout_stop(&sc->sc_ad_tmo);
SC2IFP(sc)->if_flags &= ~IFF_UP;
SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
sc->sc_vhid = -1;
in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
imo->imo_multicast_ifp = NULL;
TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
if (!--cif->vhif_nvrs) {
sc->sc_carpdev->if_carp = NULL;
CARP_LOCK_DESTROY(cif);
free(cif, M_IFADDR);
} else {
CARP_UNLOCK(cif);
}
}
return (error);
}
#ifdef INET6
2005-02-26 10:33:14 +00:00
static int
carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
{
INIT_VNET_INET6(curvnet);
struct ifnet *ifp;
struct carp_if *cif;
struct in6_ifaddr *ia, *ia_if;
struct ip6_moptions *im6o = &sc->sc_im6o;
struct in6_multi_mship *imm;
struct in6_addr in6;
int own, error;
if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
if (!(SC2IFP(sc)->if_flags & IFF_UP))
carp_set_state(sc, INIT);
if (sc->sc_naddrs6)
SC2IFP(sc)->if_flags |= IFF_UP;
if (sc->sc_carpdev)
CARP_SCLOCK(sc);
carp_setrun(sc, 0);
if (sc->sc_carpdev)
CARP_SCUNLOCK(sc);
return (0);
}
/* we have to do it by hands to check we won't match on us */
ia_if = NULL; own = 0;
for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
int i;
for (i = 0; i < 4; i++) {
if ((sin6->sin6_addr.s6_addr32[i] &
ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
(ia->ia_addr.sin6_addr.s6_addr32[i] &
ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
break;
}
/* and, yeah, we need a multicast-capable iface too */
if (ia->ia_ifp != SC2IFP(sc) &&
(ia->ia_ifp->if_flags & IFF_MULTICAST) &&
(i == 4)) {
if (!ia_if)
ia_if = ia;
if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
&ia->ia_addr.sin6_addr))
own++;
}
}
if (!ia_if)
return (EADDRNOTAVAIL);
ia = ia_if;
ifp = ia->ia_ifp;
if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
(im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
return (EADDRNOTAVAIL);
if (!sc->sc_naddrs6) {
im6o->im6o_multicast_ifp = ifp;
/* join CARP multicast address */
bzero(&in6, sizeof(in6));
in6.s6_addr16[0] = htons(0xff02);
in6.s6_addr8[15] = 0x12;
if (in6_setscope(&in6, ifp, NULL) != 0)
goto cleanup;
if ((imm = in6_joingroup(ifp, &in6, &error, 0)) == NULL)
goto cleanup;
LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
/* join solicited multicast address */
bzero(&in6, sizeof(in6));
in6.s6_addr16[0] = htons(0xff02);
in6.s6_addr32[1] = 0;
in6.s6_addr32[2] = htonl(1);
in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
in6.s6_addr8[12] = 0xff;
if (in6_setscope(&in6, ifp, NULL) != 0)
goto cleanup;
if ((imm = in6_joingroup(ifp, &in6, &error, 0)) == NULL)
goto cleanup;
LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
}
if (!ifp->if_carp) {
cif = malloc(sizeof(*cif), M_CARP,
M_WAITOK|M_ZERO);
if (!cif) {
error = ENOBUFS;
goto cleanup;
}
if ((error = ifpromisc(ifp, 1))) {
free(cif, M_CARP);
goto cleanup;
}
CARP_LOCK_INIT(cif);
CARP_LOCK(cif);
cif->vhif_ifp = ifp;
TAILQ_INIT(&cif->vhif_vrs);
ifp->if_carp = cif;
} else {
struct carp_softc *vr;
cif = (struct carp_if *)ifp->if_carp;
CARP_LOCK(cif);
TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
CARP_UNLOCK(cif);
error = EINVAL;
goto cleanup;
}
}
sc->sc_ia6 = ia;
sc->sc_carpdev = ifp;
{ /* XXX prevent endless loop if already in queue */
struct carp_softc *vr, *after = NULL;
int myself = 0;
cif = (struct carp_if *)ifp->if_carp;
CARP_LOCK_ASSERT(cif);
TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
if (vr == sc)
myself = 1;
if (vr->sc_vhid < sc->sc_vhid)
after = vr;
}
if (!myself) {
/* We're trying to keep things in order */
if (after == NULL) {
TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
} else {
TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
}
cif->vhif_nvrs++;
}
}
sc->sc_naddrs6++;
SC2IFP(sc)->if_flags |= IFF_UP;
if (own)
sc->sc_advskew = 0;
carp_sc_state_locked(sc);
carp_setrun(sc, 0);
CARP_UNLOCK(cif);
return (0);
cleanup:
/* clean up multicast memberships */
if (!sc->sc_naddrs6) {
while (!LIST_EMPTY(&im6o->im6o_memberships)) {
imm = LIST_FIRST(&im6o->im6o_memberships);
LIST_REMOVE(imm, i6mm_chain);
in6_leavegroup(imm);
}
}
return (error);
}
2005-02-26 10:33:14 +00:00
static int
carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
{
int error = 0;
if (!--sc->sc_naddrs6) {
struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
struct ip6_moptions *im6o = &sc->sc_im6o;
CARP_LOCK(cif);
callout_stop(&sc->sc_ad_tmo);
SC2IFP(sc)->if_flags &= ~IFF_UP;
SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
sc->sc_vhid = -1;
while (!LIST_EMPTY(&im6o->im6o_memberships)) {
struct in6_multi_mship *imm =
LIST_FIRST(&im6o->im6o_memberships);
LIST_REMOVE(imm, i6mm_chain);
in6_leavegroup(imm);
}
im6o->im6o_multicast_ifp = NULL;
TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
if (!--cif->vhif_nvrs) {
CARP_LOCK_DESTROY(cif);
sc->sc_carpdev->if_carp = NULL;
free(cif, M_IFADDR);
} else
CARP_UNLOCK(cif);
}
return (error);
}
#endif /* INET6 */
2005-02-26 10:33:14 +00:00
static int
carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr)
{
struct carp_softc *sc = ifp->if_softc, *vr;
struct carpreq carpr;
struct ifaddr *ifa;
struct ifreq *ifr;
struct ifaliasreq *ifra;
int locked = 0, error = 0;
ifa = (struct ifaddr *)addr;
ifra = (struct ifaliasreq *)addr;
ifr = (struct ifreq *)addr;
switch (cmd) {
case SIOCSIFADDR:
switch (ifa->ifa_addr->sa_family) {
#ifdef INET
case AF_INET:
SC2IFP(sc)->if_flags |= IFF_UP;
bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
sizeof(struct sockaddr));
error = carp_set_addr(sc, satosin(ifa->ifa_addr));
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
SC2IFP(sc)->if_flags |= IFF_UP;
error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
break;
#endif /* INET6 */
default:
error = EAFNOSUPPORT;
break;
}
break;
case SIOCAIFADDR:
switch (ifa->ifa_addr->sa_family) {
#ifdef INET
case AF_INET:
SC2IFP(sc)->if_flags |= IFF_UP;
bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
sizeof(struct sockaddr));
error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
SC2IFP(sc)->if_flags |= IFF_UP;
error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
break;
#endif /* INET6 */
default:
error = EAFNOSUPPORT;
break;
}
break;
case SIOCDIFADDR:
switch (ifa->ifa_addr->sa_family) {
#ifdef INET
case AF_INET:
error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
break;
#endif /* INET6 */
default:
error = EAFNOSUPPORT;
break;
}
break;
case SIOCSIFFLAGS:
if (sc->sc_carpdev) {
locked = 1;
CARP_SCLOCK(sc);
}
if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
callout_stop(&sc->sc_ad_tmo);
callout_stop(&sc->sc_md_tmo);
callout_stop(&sc->sc_md6_tmo);
if (sc->sc_state == MASTER)
carp_send_ad_locked(sc);
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
} else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
SC2IFP(sc)->if_flags |= IFF_UP;
carp_setrun(sc, 0);
}
break;
case SIOCSVH:
error = priv_check(curthread, PRIV_NETINET_CARP);
if (error)
break;
if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
break;
error = 1;
if (sc->sc_carpdev) {
locked = 1;
CARP_SCLOCK(sc);
}
if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
switch (carpr.carpr_state) {
case BACKUP:
callout_stop(&sc->sc_ad_tmo);
carp_set_state(sc, BACKUP);
carp_setrun(sc, 0);
carp_setroute(sc, RTM_DELETE);
break;
case MASTER:
carp_master_down_locked(sc);
break;
default:
break;
}
}
if (carpr.carpr_vhid > 0) {
if (carpr.carpr_vhid > 255) {
error = EINVAL;
break;
}
if (sc->sc_carpdev) {
struct carp_if *cif;
cif = (struct carp_if *)sc->sc_carpdev->if_carp;
TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
if (vr != sc &&
vr->sc_vhid == carpr.carpr_vhid) {
error = EEXIST;
break;
}
if (error == EEXIST)
break;
}
sc->sc_vhid = carpr.carpr_vhid;
IF_LLADDR(sc->sc_ifp)[0] = 0;
IF_LLADDR(sc->sc_ifp)[1] = 0;
IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
IF_LLADDR(sc->sc_ifp)[3] = 0;
IF_LLADDR(sc->sc_ifp)[4] = 1;
IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
error--;
}
if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
if (carpr.carpr_advskew >= 255) {
error = EINVAL;
break;
}
if (carpr.carpr_advbase > 255) {
error = EINVAL;
break;
}
sc->sc_advbase = carpr.carpr_advbase;
sc->sc_advskew = carpr.carpr_advskew;
error--;
}
bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
if (error > 0)
error = EINVAL;
else {
error = 0;
carp_setrun(sc, 0);
}
break;
case SIOCGVH:
/* XXX: lockless read */
bzero(&carpr, sizeof(carpr));
carpr.carpr_state = sc->sc_state;
carpr.carpr_vhid = sc->sc_vhid;
carpr.carpr_advbase = sc->sc_advbase;
carpr.carpr_advskew = sc->sc_advskew;
error = priv_check(curthread, PRIV_NETINET_CARP);
if (error == 0)
bcopy(sc->sc_key, carpr.carpr_key,
sizeof(carpr.carpr_key));
error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
break;
default:
error = EINVAL;
}
if (locked)
CARP_SCUNLOCK(sc);
carp_hmac_prepare(sc);
return (error);
}
/*
* XXX: this is looutput. We should eventually use it from there.
*/
static int
carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
struct rtentry *rt)
{
u_int32_t af;
M_ASSERTPKTHDR(m); /* check if we have the packet header */
if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
m_freem(m);
return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
}
ifp->if_opackets++;
ifp->if_obytes += m->m_pkthdr.len;
/* BPF writes need to be handled specially. */
if (dst->sa_family == AF_UNSPEC) {
bcopy(dst->sa_data, &af, sizeof(af));
dst->sa_family = af;
}
#if 1 /* XXX */
switch (dst->sa_family) {
case AF_INET:
case AF_INET6:
case AF_IPX:
case AF_APPLETALK:
break;
default:
printf("carp_looutput: af=%d unexpected\n", dst->sa_family);
m_freem(m);
return (EAFNOSUPPORT);
}
#endif
return(if_simloop(ifp, m, dst->sa_family, 0));
}
/*
* Start output on carp interface. This function should never be called.
*/
2005-02-26 10:33:14 +00:00
static void
carp_start(struct ifnet *ifp)
{
#ifdef DEBUG
printf("%s: start called\n", ifp->if_xname);
#endif
}
int
carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
struct rtentry *rt)
{
struct m_tag *mtag;
struct carp_softc *sc;
struct ifnet *carp_ifp;
if (!sa)
return (0);
switch (sa->sa_family) {
#ifdef INET
case AF_INET:
break;
#endif /* INET */
#ifdef INET6
case AF_INET6:
break;
#endif /* INET6 */
default:
return (0);
}
mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
if (mtag == NULL)
return (0);
bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
sc = carp_ifp->if_softc;
/* Set the source MAC address to Virtual Router MAC Address */
switch (ifp->if_type) {
case IFT_ETHER:
case IFT_L2VLAN: {
struct ether_header *eh;
eh = mtod(m, struct ether_header *);
eh->ether_shost[0] = 0;
eh->ether_shost[1] = 0;
eh->ether_shost[2] = 0x5e;
eh->ether_shost[3] = 0;
eh->ether_shost[4] = 1;
eh->ether_shost[5] = sc->sc_vhid;
}
break;
case IFT_FDDI: {
struct fddi_header *fh;
fh = mtod(m, struct fddi_header *);
fh->fddi_shost[0] = 0;
fh->fddi_shost[1] = 0;
fh->fddi_shost[2] = 0x5e;
fh->fddi_shost[3] = 0;
fh->fddi_shost[4] = 1;
fh->fddi_shost[5] = sc->sc_vhid;
}
break;
case IFT_ISO88025: {
struct iso88025_header *th;
th = mtod(m, struct iso88025_header *);
th->iso88025_shost[0] = 3;
th->iso88025_shost[1] = 0;
th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
th->iso88025_shost[4] = 0;
th->iso88025_shost[5] = 0;
}
break;
default:
printf("%s: carp is not supported for this interface type\n",
ifp->if_xname);
return (EOPNOTSUPP);
}
return (0);
}
2005-02-26 10:33:14 +00:00
static void
carp_set_state(struct carp_softc *sc, int state)
{
int link_state;
if (sc->sc_carpdev)
CARP_SCLOCK_ASSERT(sc);
if (sc->sc_state == state)
return;
sc->sc_state = state;
switch (state) {
case BACKUP:
link_state = LINK_STATE_DOWN;
break;
case MASTER:
link_state = LINK_STATE_UP;
break;
default:
link_state = LINK_STATE_UNKNOWN;
break;
}
if_link_state_change(SC2IFP(sc), link_state);
}
void
carp_carpdev_state(void *v)
{
struct carp_if *cif = v;
CARP_LOCK(cif);
carp_carpdev_state_locked(cif);
CARP_UNLOCK(cif);
}
static void
carp_carpdev_state_locked(struct carp_if *cif)
{
struct carp_softc *sc;
TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
carp_sc_state_locked(sc);
}
static void
carp_sc_state_locked(struct carp_softc *sc)
{
CARP_SCLOCK_ASSERT(sc);
if (sc->sc_carpdev->if_link_state != LINK_STATE_UP ||
!(sc->sc_carpdev->if_flags & IFF_UP)) {
sc->sc_flags_backup = SC2IFP(sc)->if_flags;
SC2IFP(sc)->if_flags &= ~IFF_UP;
SC2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
callout_stop(&sc->sc_ad_tmo);
callout_stop(&sc->sc_md_tmo);
callout_stop(&sc->sc_md6_tmo);
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
if (!sc->sc_suppress) {
carp_suppress_preempt++;
if (carp_suppress_preempt == 1) {
CARP_SCUNLOCK(sc);
carp_send_ad_all();
CARP_SCLOCK(sc);
}
}
sc->sc_suppress = 1;
} else {
SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
if (sc->sc_suppress)
carp_suppress_preempt--;
sc->sc_suppress = 0;
}
return;
}
static int
carp_modevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
if (if_detach_event_tag == NULL)
return (ENOMEM);
mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF);
LIST_INIT(&carpif_list);
if_clone_attach(&carp_cloner);
break;
case MOD_UNLOAD:
EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
if_clone_detach(&carp_cloner);
mtx_destroy(&carp_mtx);
break;
default:
return (EINVAL);
}
return (0);
}
static moduledata_t carp_mod = {
"carp",
carp_modevent,
0
};
DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);