A major overhaul of the CARP implementation. The ip_carp.c was started
from scratch, copying needed functionality from the old implemenation on demand, with a thorough review of all code. The main change is that interface layer has been removed from the CARP. Now redundant addresses are configured exactly on the interfaces, they run on. The CARP configuration itself is, as before, configured and read via SIOCSVH/SIOCGVH ioctls. A new prefix created with SIOCAIFADDR or SIOCAIFADDR_IN6 may now be configured to a particular virtual host id, which makes the prefix redundant. ifconfig(8) semantics has been changed too: now one doesn't need to clone carpXX interface, he/she should directly configure a vhid on a Ethernet interface. To supply vhid data from the kernel to an application the getifaddrs(8) function had been changed to pass ifam_data with each address. [1] The new implementation definitely closes all PRs related to carp(4) being an interface, and may close several others. It also allows to run a single redundant IP per interface. Big thanks to Bjoern Zeeb for his help with inet6 part of patch, for idea on using ifam_data and for several rounds of reviewing! PR: kern/117000, kern/126945, kern/126714, kern/120130, kern/117448 Reviewed by: bz Submitted by: bz [1]
This commit is contained in:
parent
b646d3f699
commit
27a36f6ac8
11
UPDATING
11
UPDATING
@ -22,6 +22,17 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 10.x IS SLOW:
|
||||
machines to maximize performance. (To disable malloc debugging, run
|
||||
ln -s aj /etc/malloc.conf.)
|
||||
|
||||
20111215:
|
||||
The carp(4) facility has been changed significantly. Configuration
|
||||
of the CARP protocol via ifconfig(8) has changed, as well as format
|
||||
of CARP events submitted to devd(8) has changed. See manual pages
|
||||
for more information. The arpbalance feature of carp(4) is currently
|
||||
not supported anymore.
|
||||
|
||||
Size of struct in_aliasreq, struct in6_aliasreq has changed. User
|
||||
utilities using SIOCAIFADDR, SIOCAIFADDR_IN6, e.g. ifconfig(8),
|
||||
need to be recompiled.
|
||||
|
||||
20111122:
|
||||
The acpi_wmi(4) status device /dev/wmistat has been renamed to
|
||||
/dev/wmistat0.
|
||||
|
@ -76,7 +76,7 @@ __FBSDID("$FreeBSD$");
|
||||
#define HAVE_IFM_DATA
|
||||
#endif
|
||||
|
||||
#if _BSDI_VERSION >= 199802
|
||||
#if (_BSDI_VERSION >= 199802) || (__FreeBSD_version >= 1000003)
|
||||
/* ifam_data is very specific to recent versions of bsdi */
|
||||
#define HAVE_IFAM_DATA
|
||||
#endif
|
||||
|
@ -84,8 +84,11 @@ in_status(int s __unused, const struct ifaddrs *ifa)
|
||||
if (ifa->ifa_flags & IFF_BROADCAST) {
|
||||
sin = (struct sockaddr_in *)ifa->ifa_broadaddr;
|
||||
if (sin != NULL && sin->sin_addr.s_addr != 0)
|
||||
printf("broadcast %s", inet_ntoa(sin->sin_addr));
|
||||
printf("broadcast %s ", inet_ntoa(sin->sin_addr));
|
||||
}
|
||||
|
||||
print_vhid(ifa, " ");
|
||||
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
|
@ -307,6 +307,8 @@ in6_status(int s __unused, const struct ifaddrs *ifa)
|
||||
printf("infty ");
|
||||
}
|
||||
|
||||
print_vhid(ifa, " ");
|
||||
|
||||
putchar('\n');
|
||||
}
|
||||
|
||||
|
@ -35,10 +35,11 @@
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <net/ethernet.h>
|
||||
#include <net/if.h>
|
||||
#include <net/if_var.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_var.h>
|
||||
#include <netinet/ip_carp.h>
|
||||
#include <net/route.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
@ -52,127 +53,153 @@
|
||||
|
||||
static const char *carp_states[] = { CARP_STATES };
|
||||
|
||||
void carp_status(int s);
|
||||
void setcarp_advbase(const char *,int, int, const struct afswtch *rafp);
|
||||
void setcarp_advskew(const char *, int, int, const struct afswtch *rafp);
|
||||
void setcarp_passwd(const char *, int, int, const struct afswtch *rafp);
|
||||
void setcarp_vhid(const char *, int, int, const struct afswtch *rafp);
|
||||
static void carp_status(int s);
|
||||
static void setcarp_vhid(const char *, int, int, const struct afswtch *rafp);
|
||||
static void setcarp_callback(int, void *);
|
||||
static void setcarp_advbase(const char *,int, int, const struct afswtch *rafp);
|
||||
static void setcarp_advskew(const char *, int, int, const struct afswtch *rafp);
|
||||
static void setcarp_passwd(const char *, int, int, const struct afswtch *rafp);
|
||||
|
||||
void
|
||||
static int carpr_vhid = -1;
|
||||
static int carpr_advskew = -1;
|
||||
static int carpr_advbase = -1;
|
||||
static int carpr_state = -1;
|
||||
static unsigned char const *carpr_key;
|
||||
|
||||
static void
|
||||
carp_status(int s)
|
||||
{
|
||||
const char *state;
|
||||
struct carpreq carpr;
|
||||
struct carpreq carpr[CARP_MAXVHID];
|
||||
int i;
|
||||
|
||||
memset((char *)&carpr, 0, sizeof(struct carpreq));
|
||||
bzero(carpr, sizeof(struct carpreq) * CARP_MAXVHID);
|
||||
carpr[0].carpr_count = CARP_MAXVHID;
|
||||
ifr.ifr_data = (caddr_t)&carpr;
|
||||
|
||||
if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1)
|
||||
return;
|
||||
|
||||
if (carpr.carpr_vhid > 0) {
|
||||
if (carpr.carpr_state > CARP_MAXSTATE)
|
||||
state = "<UNKNOWN>";
|
||||
for (i = 0; i < carpr[0].carpr_count; i++) {
|
||||
printf("\tcarp: %s vhid %d advbase %d advskew %d",
|
||||
carp_states[carpr[i].carpr_state], carpr[i].carpr_vhid,
|
||||
carpr[i].carpr_advbase, carpr[i].carpr_advskew);
|
||||
if (printkeys && carpr[i].carpr_key[0] != '\0')
|
||||
printf(" key \"%s\"\n", carpr[i].carpr_key);
|
||||
else
|
||||
state = carp_states[carpr.carpr_state];
|
||||
|
||||
printf("\tcarp: %s vhid %d advbase %d advskew %d\n",
|
||||
state, carpr.carpr_vhid, carpr.carpr_advbase,
|
||||
carpr.carpr_advskew);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
setcarp_passwd(const char *val, int d, int s, const struct afswtch *afp)
|
||||
{
|
||||
struct carpreq carpr;
|
||||
|
||||
memset((char *)&carpr, 0, sizeof(struct carpreq));
|
||||
ifr.ifr_data = (caddr_t)&carpr;
|
||||
|
||||
if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1)
|
||||
err(1, "SIOCGVH");
|
||||
|
||||
memset(carpr.carpr_key, 0, sizeof(carpr.carpr_key));
|
||||
/* XXX Should hash the password into the key here, perhaps? */
|
||||
strlcpy(carpr.carpr_key, val, CARP_KEY_LEN);
|
||||
|
||||
if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1)
|
||||
err(1, "SIOCSVH");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
setcarp_vhid(const char *val, int d, int s, const struct afswtch *afp)
|
||||
{
|
||||
int vhid;
|
||||
|
||||
carpr_vhid = atoi(val);
|
||||
|
||||
if (carpr_vhid <= 0 || carpr_vhid > CARP_MAXVHID)
|
||||
errx(1, "vhid must be greater than 0 and less than %u",
|
||||
CARP_MAXVHID);
|
||||
|
||||
switch (afp->af_af) {
|
||||
#ifdef INET
|
||||
case AF_INET:
|
||||
{
|
||||
struct in_aliasreq *ifra;
|
||||
|
||||
ifra = (struct in_aliasreq *)afp->af_addreq;
|
||||
ifra->ifra_vhid = carpr_vhid;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#ifdef INET6
|
||||
case AF_INET6:
|
||||
{
|
||||
struct in6_aliasreq *ifra;
|
||||
|
||||
ifra = (struct in6_aliasreq *)afp->af_addreq;
|
||||
ifra->ifra_vhid = carpr_vhid;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
default:
|
||||
errx(1, "%s doesn't support carp(4)", afp->af_name);
|
||||
}
|
||||
|
||||
callback_register(setcarp_callback, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
setcarp_callback(int s, void *arg __unused)
|
||||
{
|
||||
struct carpreq carpr;
|
||||
|
||||
vhid = atoi(val);
|
||||
|
||||
if (vhid <= 0)
|
||||
errx(1, "vhid must be greater than 0");
|
||||
|
||||
memset((char *)&carpr, 0, sizeof(struct carpreq));
|
||||
bzero(&carpr, sizeof(struct carpreq));
|
||||
carpr.carpr_vhid = carpr_vhid;
|
||||
carpr.carpr_count = 1;
|
||||
ifr.ifr_data = (caddr_t)&carpr;
|
||||
|
||||
if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1)
|
||||
if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1 && errno != ENOENT)
|
||||
err(1, "SIOCGVH");
|
||||
|
||||
carpr.carpr_vhid = vhid;
|
||||
if (carpr_key != NULL)
|
||||
/* XXX Should hash the password into the key here? */
|
||||
strlcpy(carpr.carpr_key, carpr_key, CARP_KEY_LEN);
|
||||
if (carpr_advskew > -1)
|
||||
carpr.carpr_advskew = carpr_advskew;
|
||||
if (carpr_advbase > -1)
|
||||
carpr.carpr_advbase = carpr_advbase;
|
||||
if (carpr_state > -1)
|
||||
carpr.carpr_state = carpr_state;
|
||||
|
||||
if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1)
|
||||
err(1, "SIOCSVH");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
setcarp_passwd(const char *val, int d, int s, const struct afswtch *afp)
|
||||
{
|
||||
|
||||
if (carpr_vhid == -1)
|
||||
errx(1, "passwd requires vhid");
|
||||
|
||||
carpr_key = val;
|
||||
}
|
||||
|
||||
static void
|
||||
setcarp_advskew(const char *val, int d, int s, const struct afswtch *afp)
|
||||
{
|
||||
int advskew;
|
||||
struct carpreq carpr;
|
||||
|
||||
advskew = atoi(val);
|
||||
if (carpr_vhid == -1)
|
||||
errx(1, "advskew requires vhid");
|
||||
|
||||
memset((char *)&carpr, 0, sizeof(struct carpreq));
|
||||
ifr.ifr_data = (caddr_t)&carpr;
|
||||
|
||||
if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1)
|
||||
err(1, "SIOCGVH");
|
||||
|
||||
carpr.carpr_advskew = advskew;
|
||||
|
||||
if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1)
|
||||
err(1, "SIOCSVH");
|
||||
|
||||
return;
|
||||
carpr_advskew = atoi(val);
|
||||
}
|
||||
|
||||
void
|
||||
static void
|
||||
setcarp_advbase(const char *val, int d, int s, const struct afswtch *afp)
|
||||
{
|
||||
int advbase;
|
||||
struct carpreq carpr;
|
||||
|
||||
advbase = atoi(val);
|
||||
if (carpr_vhid == -1)
|
||||
errx(1, "advbase requires vhid");
|
||||
|
||||
memset((char *)&carpr, 0, sizeof(struct carpreq));
|
||||
ifr.ifr_data = (caddr_t)&carpr;
|
||||
carpr_advbase = atoi(val);
|
||||
}
|
||||
|
||||
if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1)
|
||||
err(1, "SIOCGVH");
|
||||
static void
|
||||
setcarp_state(const char *val, int d, int s, const struct afswtch *afp)
|
||||
{
|
||||
int i;
|
||||
|
||||
carpr.carpr_advbase = advbase;
|
||||
if (carpr_vhid == -1)
|
||||
errx(1, "state requires vhid");
|
||||
|
||||
if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1)
|
||||
err(1, "SIOCSVH");
|
||||
for (i = 0; i <= CARP_MAXSTATE; i++)
|
||||
if (strcasecmp(carp_states[i], val) == 0) {
|
||||
carpr_state = i;
|
||||
return;
|
||||
}
|
||||
|
||||
return;
|
||||
errx(1, "unknown state");
|
||||
}
|
||||
|
||||
static struct cmd carp_cmds[] = {
|
||||
@ -180,6 +207,7 @@ static struct cmd carp_cmds[] = {
|
||||
DEF_CMD_ARG("advskew", setcarp_advskew),
|
||||
DEF_CMD_ARG("pass", setcarp_passwd),
|
||||
DEF_CMD_ARG("vhid", setcarp_vhid),
|
||||
DEF_CMD_ARG("state", setcarp_state),
|
||||
};
|
||||
static struct afswtch af_carp = {
|
||||
.af_name = "af_carp",
|
||||
|
@ -28,7 +28,7 @@
|
||||
.\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd November 12, 2011
|
||||
.Dd December 16, 2011
|
||||
.Dt IFCONFIG 8
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -2445,16 +2445,36 @@ The
|
||||
argument is useless and hence deprecated.
|
||||
.El
|
||||
.Pp
|
||||
The following parameters are specific to
|
||||
The following parameters are used to configure
|
||||
.Xr carp 4
|
||||
interfaces:
|
||||
protocol on an interface:
|
||||
.Bl -tag -width indent
|
||||
.It Cm vhid Ar n
|
||||
Set the virtual host ID.
|
||||
This is a required setting to initiate
|
||||
.Xr carp 4 .
|
||||
If the virtual host ID doesn't exist yet, it is created and attached to the
|
||||
interface, otherwise configuration of an existing vhid is adjusted.
|
||||
If the
|
||||
.Cm vhid
|
||||
keyword is supplied along with an
|
||||
.Dq inet6
|
||||
or
|
||||
.Dq inet
|
||||
address, then this address is configured to be run under control of the
|
||||
specified vhid.
|
||||
Whenever a last address that refers to a particular vhid is removed from an
|
||||
interface, the vhid is automatically removed from interface and destroyed.
|
||||
Any other configuration parameters for the
|
||||
.Xr carp 4
|
||||
protocol should be supplied along with the
|
||||
.Cm vhid
|
||||
keyword.
|
||||
Acceptable values for vhid are 1 to 255.
|
||||
.It Cm advbase Ar seconds
|
||||
Specifies the base of the advertisement interval in seconds.
|
||||
The acceptable values are 1 to 255.
|
||||
The default value is 1.
|
||||
.\" The default value is
|
||||
.\" .Dv CARP_DFLTINTV .
|
||||
.It Cm advskew Ar interval
|
||||
Specifies the skew to add to the base advertisement interval to
|
||||
make one host advertise slower than another host.
|
||||
@ -2464,10 +2484,8 @@ The default value is 0.
|
||||
.It Cm pass Ar phrase
|
||||
Set the authentication key to
|
||||
.Ar phrase .
|
||||
.It Cm vhid Ar n
|
||||
Set the virtual host ID.
|
||||
This is a required setting.
|
||||
Acceptable values are 1 to 255.
|
||||
.It Cm state Ar MASTER|BACKUP
|
||||
Forcibly change state of a given vhid.
|
||||
.El
|
||||
.Pp
|
||||
The
|
||||
@ -2530,8 +2548,9 @@ The
|
||||
.Fl k
|
||||
flag causes keying information for the interface, if available, to be
|
||||
printed.
|
||||
For example, the values of 802.11 WEP keys will be printed, if accessible to
|
||||
the current user.
|
||||
For example, the values of 802.11 WEP keys and
|
||||
.Xr carp 4
|
||||
passphrases will be printed, if accessible to the current user.
|
||||
This information is not printed by default, as it may be considered
|
||||
sensitive.
|
||||
.Pp
|
||||
@ -2593,6 +2612,11 @@ as a synonym for the canonical form of the option
|
||||
.Fl alias :
|
||||
.Dl # ifconfig em0 inet6 2001:db8:bdbd::123/48 delete
|
||||
.Pp
|
||||
Configure a single CARP redundant address on igb0, and then switch it
|
||||
to be master:
|
||||
.Dl # ifconfig igb0 vhid 1 10.0.0.1/24 pass foobar
|
||||
.Dl # ifconfig igb0 vhid 1 state master
|
||||
.Pp
|
||||
Configure the interface
|
||||
.Li xl0 ,
|
||||
to use 100baseTX, full duplex Ethernet media options:
|
||||
|
@ -1077,6 +1077,21 @@ printb(const char *s, unsigned v, const char *bits)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
print_vhid(const struct ifaddrs *ifa, const char *s)
|
||||
{
|
||||
struct if_data *ifd;
|
||||
|
||||
if (ifa->ifa_data == NULL)
|
||||
return;
|
||||
|
||||
ifd = ifa->ifa_data;
|
||||
if (ifd->ifi_vhid == 0)
|
||||
return;
|
||||
|
||||
printf("vhid %d ", ifd->ifi_vhid);
|
||||
}
|
||||
|
||||
void
|
||||
ifmaybeload(const char *name)
|
||||
{
|
||||
|
@ -148,3 +148,6 @@ void clone_setdefcallback(const char *, clone_callback_func *);
|
||||
* operations on ifmedia can avoid cmd line ordering confusion.
|
||||
*/
|
||||
struct ifmediareq *ifmedia_getstate(int s);
|
||||
|
||||
void print_vhid(const struct ifaddrs *, const char *);
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
.\" $OpenBSD: carp.4,v 1.16 2004/12/07 23:41:35 jmc Exp $
|
||||
.\"
|
||||
.\" Copyright (c) 2003, Ryan McBride. All rights reserved.
|
||||
.\" Copyright (c) 2011, Gleb Smirnoff <glebius@FreeBSD.org>
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
@ -25,7 +26,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd August 15, 2011
|
||||
.Dd December 16, 2011
|
||||
.Dt CARP 4
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -34,33 +35,17 @@
|
||||
.Sh SYNOPSIS
|
||||
.Cd "device carp"
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm
|
||||
interface is a pseudo-device that implements and controls the
|
||||
CARP protocol.
|
||||
CARP allows multiple hosts on the same local network to share a set of IP addresses.
|
||||
The CARP allows multiple hosts on the same local network to share a set of
|
||||
IPv4 and/or IPv6 addresses.
|
||||
Its primary purpose is to ensure that these
|
||||
addresses are always available, but in some configurations
|
||||
.Nm
|
||||
can also provide load balancing functionality.
|
||||
.Pp
|
||||
A
|
||||
.Nm
|
||||
interface can be created at runtime using the
|
||||
.Nm ifconfig Li carp Ns Ar N Cm create
|
||||
command or by configuring
|
||||
it via
|
||||
.Va cloned_interfaces
|
||||
in the
|
||||
.Pa /etc/rc.conf
|
||||
file.
|
||||
addresses are always available.
|
||||
.Pp
|
||||
To use
|
||||
.Nm ,
|
||||
the administrator needs to configure at minimum a common virtual host ID (VHID)
|
||||
and virtual host IP address on each machine which is to take part in the virtual
|
||||
group.
|
||||
Additional parameters can also be set on a per-interface basis:
|
||||
the administrator needs to configure at minimum a common virtual host ID
|
||||
(vhid) and attach at least one IP address to this vhid on each machine which
|
||||
is to take part in the virtual group.
|
||||
Additional parameters can also be set on a per-vhid basis:
|
||||
.Cm advbase
|
||||
and
|
||||
.Cm advskew ,
|
||||
@ -93,9 +78,20 @@ or through the
|
||||
.Dv SIOCSVH
|
||||
.Xr ioctl 2 .
|
||||
.Pp
|
||||
CARP virtual hosts can be configured on multicast capable interfaces: Ethernet,
|
||||
layer 2 VLAN, FDDI and Token Ring.
|
||||
An arbitrary number of virtual host IDs can be configured on an interface.
|
||||
An arbitrary number of IPv4 or IPv6 addresses can be attached to a particular
|
||||
vhid.
|
||||
It is important that all hosts participating in a vhid have the same list
|
||||
of prefixes configured on the vhid, since all prefixes are included in the
|
||||
cryptographic checksum supplied in each advertisement.
|
||||
Multiple vhids running on one interface participate in master/backup
|
||||
elections independently.
|
||||
.Pp
|
||||
Additionally, there are a number of global parameters which can be set using
|
||||
.Xr sysctl 8 :
|
||||
.Bl -tag -width ".Va net.inet.carp.arpbalance"
|
||||
.Bl -tag -width ".Va net.inet.carp.preempt"
|
||||
.It Va net.inet.carp.allow
|
||||
Accept incoming
|
||||
.Nm
|
||||
@ -125,9 +121,6 @@ Values above 1 enable logging of bad
|
||||
.Nm
|
||||
packets.
|
||||
Default value is 1.
|
||||
.It Va net.inet.carp.arpbalance
|
||||
Balance local traffic using ARP (see below).
|
||||
Disabled by default.
|
||||
.It Va net.inet.carp.suppress_preempt
|
||||
A read only value showing the status of preemption suppression.
|
||||
Preemption can be suppressed if link on an interface is down
|
||||
@ -138,36 +131,36 @@ Value of 0 means that preemption is not suppressed, since no
|
||||
problems are detected.
|
||||
Every problem increments suppression counter.
|
||||
.El
|
||||
.Sh ARP level load balancing
|
||||
The
|
||||
.Nm
|
||||
has limited abilities for load balancing the incoming connections
|
||||
between hosts in Ethernet network.
|
||||
For load balancing operation, one needs several CARP interfaces that
|
||||
are configured to the same IP address, but to a different VHIDs.
|
||||
Once an ARP request is received, the CARP protocol will use a hashing
|
||||
function against the source IP address in the ARP request to determine
|
||||
which VHID should this request belong to.
|
||||
If the corresponding CARP interface is in master state, the ARP request
|
||||
will be replied, otherwise it will be ignored.
|
||||
See the
|
||||
.Sx EXAMPLES
|
||||
section for a practical example of load balancing.
|
||||
.Pp
|
||||
The ARP load balancing has some limitations.
|
||||
First, ARP balancing only works on the local network segment.
|
||||
It cannot balance traffic that crosses a router, because the
|
||||
router itself will always be balanced to the same virtual host.
|
||||
Second, ARP load balancing can lead to asymmetric routing
|
||||
of incoming and outgoing traffic, and thus combining it with
|
||||
.Xr pfsync 4
|
||||
is dangerous, because this creates a race condition between
|
||||
balanced routers and a host they are serving.
|
||||
Imagine an incoming packet creating state on the first router, being
|
||||
forwarded to its destination, and destination replying faster
|
||||
than the state information is packed and synced with the second router.
|
||||
If the reply would be load balanced to second router, it will be
|
||||
dropped due to no state.
|
||||
.\".Sh ARP level load balancing
|
||||
.\"The
|
||||
.\".Nm
|
||||
.\"has limited abilities for load balancing the incoming connections
|
||||
.\"between hosts in Ethernet network.
|
||||
.\"For load balancing operation, one needs several CARP interfaces that
|
||||
.\"are configured to the same IP address, but to a different vhids.
|
||||
.\"Once an ARP request is received, the CARP protocol will use a hashing
|
||||
.\"function against the source IP address in the ARP request to determine
|
||||
.\"which vhid should this request belong to.
|
||||
.\"If the corresponding CARP interface is in master state, the ARP request
|
||||
.\"will be replied, otherwise it will be ignored.
|
||||
.\"See the
|
||||
.\".Sx EXAMPLES
|
||||
.\"section for a practical example of load balancing.
|
||||
.\".Pp
|
||||
.\"The ARP load balancing has some limitations.
|
||||
.\"First, ARP balancing only works on the local network segment.
|
||||
.\"It cannot balance traffic that crosses a router, because the
|
||||
.\"router itself will always be balanced to the same virtual host.
|
||||
.\"Second, ARP load balancing can lead to asymmetric routing
|
||||
.\"of incoming and outgoing traffic, and thus combining it with
|
||||
.\".Xr pfsync 4
|
||||
.\"is dangerous, because this creates a race condition between
|
||||
.\"balanced routers and a host they are serving.
|
||||
.\"Imagine an incoming packet creating state on the first router, being
|
||||
.\"forwarded to its destination, and destination replying faster
|
||||
.\"than the state information is packed and synced with the second router.
|
||||
.\"If the reply would be load balanced to second router, it will be
|
||||
.\"dropped due to no state.
|
||||
.Sh STATE CHANGE NOTIFICATIONS
|
||||
Sometimes it is useful to get notified about
|
||||
.Nm
|
||||
@ -175,13 +168,10 @@ status change events.
|
||||
This can be accomplished by using
|
||||
.Xr devd 8
|
||||
hooks.
|
||||
Master/slave events are signalled as
|
||||
.Nm
|
||||
interface
|
||||
.Dv LINK_UP
|
||||
or
|
||||
.Dv LINK_DOWN
|
||||
event.
|
||||
Master/slave events are signalled under system
|
||||
.Dv CARP .
|
||||
Subsystem specifies vhid and name of interface, where event occured.
|
||||
Type of the message displays new state of vhid.
|
||||
Please see
|
||||
.Xr devd.conf 5
|
||||
and
|
||||
@ -197,23 +187,19 @@ Enable it on both host A and B:
|
||||
.Pp
|
||||
.Dl sysctl net.inet.carp.preempt=1
|
||||
.Pp
|
||||
Assume that host A is the preferred master and 192.168.1.x/24 is
|
||||
configured on one physical interface and 192.168.2.y/24 on another.
|
||||
Assume that host A is the preferred master and we are running the
|
||||
192.168.1.0/24 prefix on em0 and 192.168.2.0/24 on em1.
|
||||
This is the setup for host A:
|
||||
.Bd -literal -offset indent
|
||||
ifconfig carp0 create
|
||||
ifconfig carp0 vhid 1 pass mekmitasdigoat 192.168.1.1/24
|
||||
ifconfig carp1 create
|
||||
ifconfig carp1 vhid 2 pass mekmitasdigoat 192.168.2.1/24
|
||||
ifconfig em0 vhid 1 pass mekmitasdigoat 192.168.1.1/24
|
||||
ifconfig em1 vhid 2 pass mekmitasdigoat 192.168.2.1/24
|
||||
.Ed
|
||||
.Pp
|
||||
The setup for host B is identical, but it has a higher
|
||||
.Cm advskew :
|
||||
.Bd -literal -offset indent
|
||||
ifconfig carp0 create
|
||||
ifconfig carp0 vhid 1 advskew 100 pass mekmitasdigoat 192.168.1.1/24
|
||||
ifconfig carp1 create
|
||||
ifconfig carp1 vhid 2 advskew 100 pass mekmitasdigoat 192.168.2.1/24
|
||||
ifconfig em0 vhid 1 advskew 100 pass mekmitasdigoat 192.168.1.1/24
|
||||
ifconfig em1 vhid 2 advskew 100 pass mekmitasdigoat 192.168.2.1/24
|
||||
.Ed
|
||||
.Pp
|
||||
Because of the preempt option, when one of the physical interfaces of
|
||||
@ -224,67 +210,60 @@ is adjusted to 240 on all its
|
||||
interfaces.
|
||||
This will cause host B to preempt on both interfaces instead of
|
||||
just the failed one.
|
||||
.Pp
|
||||
In order to set up an ARP balanced virtual host, it is necessary to configure
|
||||
one virtual host for each physical host which would respond to ARP requests
|
||||
and thus handle the traffic.
|
||||
In the following example, two virtual hosts are configured on two hosts to
|
||||
provide balancing and failover for the IP address 192.168.1.10.
|
||||
.Pp
|
||||
First the
|
||||
.Nm
|
||||
interfaces on host A are configured.
|
||||
The
|
||||
.Cm advskew
|
||||
of 100 on the second virtual host means that its advertisements will be sent
|
||||
out slightly less frequently.
|
||||
.Bd -literal -offset indent
|
||||
ifconfig carp0 create
|
||||
ifconfig carp0 vhid 1 pass mekmitasdigoat 192.168.1.10/24
|
||||
ifconfig carp1 create
|
||||
ifconfig carp1 vhid 2 advskew 100 pass mekmitasdigoat 192.168.1.10/24
|
||||
.Ed
|
||||
.Pp
|
||||
The configuration for host B is identical, except the
|
||||
.Cm advskew
|
||||
is on virtual host 1 rather than virtual host 2.
|
||||
.Bd -literal -offset indent
|
||||
ifconfig carp0 create
|
||||
ifconfig carp0 vhid 1 advskew 100 pass mekmitasdigoat 192.168.1.10/24
|
||||
ifconfig carp1 create
|
||||
ifconfig carp1 vhid 2 pass mekmitasdigoat 192.168.1.10/24
|
||||
.Ed
|
||||
.Pp
|
||||
Finally, the ARP balancing feature must be enabled on both hosts:
|
||||
.Pp
|
||||
.Dl sysctl net.inet.carp.arpbalance=1
|
||||
.Pp
|
||||
When the hosts receive an ARP request for 192.168.1.10, the source IP address
|
||||
of the request is used to compute which virtual host should answer the request.
|
||||
The host which is master of the selected virtual host will reply to the
|
||||
request, the other(s) will ignore it.
|
||||
.Pp
|
||||
This way, locally connected systems will receive different ARP replies and
|
||||
subsequent IP traffic will be balanced among the hosts.
|
||||
If one of the hosts fails, the other will take over the virtual MAC address,
|
||||
and begin answering ARP requests on its behalf.
|
||||
.\".Pp
|
||||
.\"In order to set up an ARP balanced virtual host, it is necessary to configure
|
||||
.\"one virtual host for each physical host which would respond to ARP requests
|
||||
.\"and thus handle the traffic.
|
||||
.\"In the following example, two virtual hosts are configured on two hosts to
|
||||
.\"provide balancing and failover for the IP address 192.168.1.10.
|
||||
.\".Pp
|
||||
.\"First the
|
||||
.\".Nm
|
||||
.\"interfaces on host A are configured.
|
||||
.\"The
|
||||
.\".Cm advskew
|
||||
.\"of 100 on the second virtual host means that its advertisements will be sent
|
||||
.\"out slightly less frequently.
|
||||
.\".Bd -literal -offset indent
|
||||
.\"ifconfig carp0 create
|
||||
.\"ifconfig carp0 vhid 1 pass mekmitasdigoat 192.168.1.10/24
|
||||
.\"ifconfig carp1 create
|
||||
.\"ifconfig carp1 vhid 2 advskew 100 pass mekmitasdigoat 192.168.1.10/24
|
||||
.\".Ed
|
||||
.\".Pp
|
||||
.\"The configuration for host B is identical, except the
|
||||
.\".Cm advskew
|
||||
.\"is on virtual host 1 rather than virtual host 2.
|
||||
.\".Bd -literal -offset indent
|
||||
.\"ifconfig carp0 create
|
||||
.\"ifconfig carp0 vhid 1 advskew 100 pass mekmitasdigoat 192.168.1.10/24
|
||||
.\"ifconfig carp1 create
|
||||
.\"ifconfig carp1 vhid 2 pass mekmitasdigoat 192.168.1.10/24
|
||||
.\".Ed
|
||||
.\".Pp
|
||||
.\"Finally, the ARP balancing feature must be enabled on both hosts:
|
||||
.\".Pp
|
||||
.\".Dl sysctl net.inet.carp.arpbalance=1
|
||||
.\".Pp
|
||||
.\"When the hosts receive an ARP request for 192.168.1.10, the source IP address
|
||||
.\"of the request is used to compute which virtual host should answer the request.
|
||||
.\"The host which is master of the selected virtual host will reply to the
|
||||
.\"request, the other(s) will ignore it.
|
||||
.\".Pp
|
||||
.\"This way, locally connected systems will receive different ARP replies and
|
||||
.\"subsequent IP traffic will be balanced among the hosts.
|
||||
.\"If one of the hosts fails, the other will take over the virtual MAC address,
|
||||
.\"and begin answering ARP requests on its behalf.
|
||||
.Pp
|
||||
Processing of
|
||||
.Nm
|
||||
status change events can be set up by using the following devd.conf rules:
|
||||
status change events can be set up by using the following devd.conf rule:
|
||||
.Bd -literal -offset indent
|
||||
notify 0 {
|
||||
match "system" "IFNET";
|
||||
match "type" "LINK_UP";
|
||||
match "subsystem" "carp*";
|
||||
action "/root/carpcontrol.sh $type $subsystem";
|
||||
};
|
||||
|
||||
notify 0 {
|
||||
match "system" "IFNET";
|
||||
match "type" "LINK_DOWN";
|
||||
match "subsystem" "carp*";
|
||||
action "/root/carpcontrol.sh $type $subsystem";
|
||||
match "system" "CARP";
|
||||
match "subsystem" "[0-9]+@";
|
||||
match "type" "(MASTER|BACKUP)";
|
||||
action "/root/carpcontrol.sh $subsystem $type";
|
||||
};
|
||||
.Ed
|
||||
.Sh SEE ALSO
|
||||
@ -303,3 +282,8 @@ The
|
||||
.Nm
|
||||
device was imported into
|
||||
.Fx 5.4 .
|
||||
In
|
||||
.Fx 10
|
||||
the
|
||||
.Nm
|
||||
was significantly rewritten, and is no longer a pseudo-interface.
|
||||
|
22
sys/net/if.c
22
sys/net/if.c
@ -130,17 +130,19 @@ void (*lagg_linkstate_p)(struct ifnet *ifp, int state);
|
||||
/* These are external hooks for CARP. */
|
||||
void (*carp_linkstate_p)(struct ifnet *ifp);
|
||||
#if defined(INET) || defined(INET6)
|
||||
struct ifnet *(*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
|
||||
int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
|
||||
int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m,
|
||||
struct sockaddr *sa, struct rtentry *rt);
|
||||
struct sockaddr *sa);
|
||||
int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
|
||||
int (*carp_attach_p)(struct ifaddr *, int);
|
||||
void (*carp_detach_p)(struct ifaddr *);
|
||||
#endif
|
||||
#ifdef INET
|
||||
int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *, struct in_addr *,
|
||||
u_int8_t **);
|
||||
int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
|
||||
#endif
|
||||
#ifdef INET6
|
||||
struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6);
|
||||
caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
|
||||
caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m,
|
||||
const struct in6_addr *taddr);
|
||||
#endif
|
||||
|
||||
@ -2506,6 +2508,16 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
|
||||
error = if_getgroupmembers((struct ifgroupreq *)data);
|
||||
CURVNET_RESTORE();
|
||||
return (error);
|
||||
#if defined(INET) || defined(INET6)
|
||||
case SIOCSVH:
|
||||
case SIOCGVH:
|
||||
if (carp_ioctl_p == NULL)
|
||||
error = EPROTONOSUPPORT;
|
||||
else
|
||||
error = (*carp_ioctl_p)(ifr, cmd, td);
|
||||
CURVNET_RESTORE();
|
||||
return (error);
|
||||
#endif
|
||||
}
|
||||
|
||||
ifp = ifunit_ref(ifr->ifr_name);
|
||||
|
@ -85,7 +85,7 @@ struct if_data {
|
||||
u_char ifi_addrlen; /* media address length */
|
||||
u_char ifi_hdrlen; /* media header length */
|
||||
u_char ifi_link_state; /* current link state */
|
||||
u_char ifi_spare_char1; /* spare byte */
|
||||
u_char ifi_vhid; /* carp vhid */
|
||||
u_char ifi_spare_char2; /* spare byte */
|
||||
u_char ifi_datalen; /* length of this data struct */
|
||||
u_long ifi_mtu; /* maximum transmission unit */
|
||||
@ -267,6 +267,8 @@ struct ifa_msghdr {
|
||||
int ifam_flags; /* value of ifa_flags */
|
||||
u_short ifam_index; /* index for associated ifp */
|
||||
int ifam_metric; /* value of ifa_metric */
|
||||
struct if_data ifam_data;/* statistics and other data about if or
|
||||
* address */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -357,6 +359,7 @@ struct ifaliasreq {
|
||||
struct sockaddr ifra_addr;
|
||||
struct sockaddr ifra_broadaddr;
|
||||
struct sockaddr ifra_mask;
|
||||
int ifra_vhid;
|
||||
};
|
||||
|
||||
struct ifmediareq {
|
||||
|
@ -397,7 +397,7 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
|
||||
|
||||
#if defined(INET) || defined(INET6)
|
||||
if (ifp->if_carp &&
|
||||
(error = (*carp_output_p)(ifp, m, dst, NULL)))
|
||||
(error = (*carp_output_p)(ifp, m, dst)))
|
||||
goto bad;
|
||||
#endif
|
||||
|
||||
|
@ -250,6 +250,5 @@
|
||||
#define IFT_ENC 0xf4
|
||||
#define IFT_PFLOG 0xf6
|
||||
#define IFT_PFSYNC 0xf7
|
||||
#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */
|
||||
#define IFT_IPXIP 0xf9 /* IPX over IP tunneling; no longer used. */
|
||||
#endif /* !_NET_IF_TYPES_H_ */
|
||||
|
@ -69,6 +69,7 @@ struct rt_addrinfo;
|
||||
struct socket;
|
||||
struct ether_header;
|
||||
struct carp_if;
|
||||
struct carp_softc;
|
||||
struct ifvlantrunk;
|
||||
struct route;
|
||||
struct vnet;
|
||||
@ -729,6 +730,7 @@ struct ifaddr {
|
||||
struct sockaddr *ifa_netmask; /* used to determine subnet */
|
||||
struct if_data if_data; /* not all members are meaningful */
|
||||
struct ifnet *ifa_ifp; /* back-pointer to interface */
|
||||
struct carp_softc *ifa_carp; /* pointer to CARP data */
|
||||
TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
|
||||
void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */
|
||||
(int, struct rtentry *, struct rt_addrinfo *);
|
||||
|
@ -63,6 +63,7 @@
|
||||
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/if_ether.h>
|
||||
#include <netinet/ip_carp.h>
|
||||
#ifdef INET6
|
||||
#include <netinet6/scope6_var.h>
|
||||
#endif
|
||||
@ -83,7 +84,7 @@ struct if_data32 {
|
||||
uint8_t ifi_addrlen;
|
||||
uint8_t ifi_hdrlen;
|
||||
uint8_t ifi_link_state;
|
||||
uint8_t ifi_spare_char1;
|
||||
uint8_t ifi_vhid;
|
||||
uint8_t ifi_spare_char2;
|
||||
uint8_t ifi_datalen;
|
||||
uint32_t ifi_mtu;
|
||||
@ -122,6 +123,9 @@ MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
|
||||
static struct sockaddr route_src = { 2, PF_ROUTE, };
|
||||
static struct sockaddr sa_zero = { sizeof(sa_zero), AF_INET, };
|
||||
|
||||
/* These are external hooks for CARP. */
|
||||
int (*carp_get_vhid_p)(struct ifaddr *);
|
||||
|
||||
/*
|
||||
* Used by rtsock/raw_input callback code to decide whether to filter the update
|
||||
* notification to a socket bound to a particular FIB.
|
||||
@ -1508,6 +1512,7 @@ copy_ifdata32(struct if_data *src, struct if_data32 *dst)
|
||||
CP(*src, *dst, ifi_addrlen);
|
||||
CP(*src, *dst, ifi_hdrlen);
|
||||
CP(*src, *dst, ifi_link_state);
|
||||
CP(*src, *dst, ifi_vhid);
|
||||
dst->ifi_datalen = sizeof(struct if_data32);
|
||||
CP(*src, *dst, ifi_mtu);
|
||||
CP(*src, *dst, ifi_metric);
|
||||
@ -1559,6 +1564,9 @@ sysctl_iflist(int af, struct walkarg *w)
|
||||
ifm32->ifm_flags = ifp->if_flags |
|
||||
ifp->if_drv_flags;
|
||||
copy_ifdata32(&ifp->if_data, &ifm32->ifm_data);
|
||||
if (carp_get_vhid_p != NULL)
|
||||
ifm32->ifm_data.ifi_vhid =
|
||||
(*carp_get_vhid_p)(ifa);
|
||||
ifm32->ifm_addrs = info.rti_addrs;
|
||||
error = SYSCTL_OUT(w->w_req, (caddr_t)ifm32,
|
||||
len);
|
||||
@ -1569,6 +1577,9 @@ sysctl_iflist(int af, struct walkarg *w)
|
||||
ifm->ifm_index = ifp->if_index;
|
||||
ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
|
||||
ifm->ifm_data = ifp->if_data;
|
||||
if (carp_get_vhid_p != NULL)
|
||||
ifm->ifm_data.ifi_vhid =
|
||||
(*carp_get_vhid_p)(ifa);
|
||||
ifm->ifm_addrs = info.rti_addrs;
|
||||
error = SYSCTL_OUT(w->w_req, (caddr_t)ifm, len);
|
||||
#ifdef COMPAT_FREEBSD32
|
||||
@ -1595,6 +1606,9 @@ sysctl_iflist(int af, struct walkarg *w)
|
||||
ifam->ifam_flags = ifa->ifa_flags;
|
||||
ifam->ifam_metric = ifa->ifa_metric;
|
||||
ifam->ifam_addrs = info.rti_addrs;
|
||||
if (carp_get_vhid_p != NULL)
|
||||
ifam->ifam_data.ifi_vhid =
|
||||
(*carp_get_vhid_p)(ifa);
|
||||
error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
|
||||
if (error)
|
||||
goto done;
|
||||
|
@ -139,8 +139,6 @@ static const struct netisr_handler arp_nh = {
|
||||
};
|
||||
|
||||
#ifdef AF_INET
|
||||
void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
|
||||
|
||||
/*
|
||||
* called by in_ifscrub to remove entry from the table when
|
||||
* the interface goes away
|
||||
@ -516,7 +514,7 @@ in_arpinput(struct mbuf *m)
|
||||
int op, flags;
|
||||
int req_len;
|
||||
int bridged = 0, is_bridge = 0;
|
||||
int carp_match = 0;
|
||||
int carped;
|
||||
struct sockaddr_in sin;
|
||||
sin.sin_len = sizeof(struct sockaddr_in);
|
||||
sin.sin_family = AF_INET;
|
||||
@ -561,24 +559,14 @@ in_arpinput(struct mbuf *m)
|
||||
* For a bridge, we want to check the address irrespective
|
||||
* of the receive interface. (This will change slightly
|
||||
* when we have clusters of interfaces).
|
||||
* If the interface does not match, but the recieving interface
|
||||
* is part of carp, we call carp_iamatch to see if this is a
|
||||
* request for the virtual host ip.
|
||||
* XXX: This is really ugly!
|
||||
*/
|
||||
IN_IFADDR_RLOCK();
|
||||
LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
|
||||
if (((bridged && ia->ia_ifp->if_bridge == ifp->if_bridge) ||
|
||||
ia->ia_ifp == ifp) &&
|
||||
itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
|
||||
ifa_ref(&ia->ia_ifa);
|
||||
IN_IFADDR_RUNLOCK();
|
||||
goto match;
|
||||
}
|
||||
if (ifp->if_carp != NULL &&
|
||||
(*carp_iamatch_p)(ifp, ia, &isaddr, &enaddr) &&
|
||||
itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
|
||||
carp_match = 1;
|
||||
itaddr.s_addr == ia->ia_addr.sin_addr.s_addr &&
|
||||
(ia->ia_ifa.ifa_carp == NULL ||
|
||||
(*carp_iamatch_p)(&ia->ia_ifa, &enaddr))) {
|
||||
ifa_ref(&ia->ia_ifa);
|
||||
IN_IFADDR_RUNLOCK();
|
||||
goto match;
|
||||
@ -643,6 +631,7 @@ in_arpinput(struct mbuf *m)
|
||||
match:
|
||||
if (!enaddr)
|
||||
enaddr = (u_int8_t *)IF_LLADDR(ifp);
|
||||
carped = (ia->ia_ifa.ifa_carp != NULL);
|
||||
myaddr = ia->ia_addr.sin_addr;
|
||||
ifa_free(&ia->ia_ifa);
|
||||
if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
|
||||
@ -659,9 +648,9 @@ in_arpinput(struct mbuf *m)
|
||||
* case we suppress the warning to avoid false positive complaints of
|
||||
* potential misconfiguration.
|
||||
*/
|
||||
if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) {
|
||||
log(LOG_ERR,
|
||||
"arp: %*D is using my IP address %s on %s!\n",
|
||||
if (!bridged && !carped && isaddr.s_addr == myaddr.s_addr &&
|
||||
myaddr.s_addr != 0) {
|
||||
log(LOG_ERR, "arp: %*D is using my IP address %s on %s!\n",
|
||||
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
|
||||
inet_ntoa(isaddr), ifp->if_xname);
|
||||
itaddr = myaddr;
|
||||
@ -682,7 +671,7 @@ in_arpinput(struct mbuf *m)
|
||||
IF_AFDATA_UNLOCK(ifp);
|
||||
if (la != NULL) {
|
||||
/* the following is not an error when doing bridging */
|
||||
if (!bridged && la->lle_tbl->llt_ifp != ifp && !carp_match) {
|
||||
if (!bridged && la->lle_tbl->llt_ifp != ifp) {
|
||||
if (log_arp_wrong_iface)
|
||||
log(LOG_WARNING, "arp: %s is on %s "
|
||||
"but got reply from %*D on %s\n",
|
||||
@ -879,6 +868,9 @@ arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
|
||||
{
|
||||
struct llentry *lle;
|
||||
|
||||
if (ifa->ifa_carp != NULL)
|
||||
return;
|
||||
|
||||
if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY) {
|
||||
arprequest(ifp, &IA_SIN(ifa)->sin_addr,
|
||||
&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
|
||||
|
@ -117,6 +117,7 @@ int arpresolve(struct ifnet *ifp, struct rtentry *rt,
|
||||
struct llentry **lle);
|
||||
void arp_ifinit(struct ifnet *, struct ifaddr *);
|
||||
void arp_ifinit2(struct ifnet *, struct ifaddr *, u_char *);
|
||||
void arp_ifscrub(struct ifnet *, uint32_t);
|
||||
|
||||
#include <sys/eventhandler.h>
|
||||
typedef void (*llevent_arp_update_fn)(void *, struct llentry *);
|
||||
|
@ -56,10 +56,12 @@ __FBSDID("$FreeBSD$");
|
||||
#include <net/route.h>
|
||||
#include <net/vnet.h>
|
||||
|
||||
#include <netinet/if_ether.h>
|
||||
#include <netinet/in.h>
|
||||
#include <netinet/in_var.h>
|
||||
#include <netinet/in_pcb.h>
|
||||
#include <netinet/ip_var.h>
|
||||
#include <netinet/ip_carp.h>
|
||||
#include <netinet/igmp_var.h>
|
||||
#include <netinet/udp.h>
|
||||
#include <netinet/udp_var.h>
|
||||
@ -69,17 +71,15 @@ static void in_len2mask(struct in_addr *, int);
|
||||
static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
|
||||
struct ifnet *, struct thread *);
|
||||
|
||||
static int in_addprefix(struct in_ifaddr *, int);
|
||||
static int in_scrubprefix(struct in_ifaddr *, u_int);
|
||||
static void in_socktrim(struct sockaddr_in *);
|
||||
static int in_ifinit(struct ifnet *,
|
||||
struct in_ifaddr *, struct sockaddr_in *, int, int);
|
||||
static int in_ifinit(struct ifnet *, struct in_ifaddr *,
|
||||
struct sockaddr_in *, int, int, int);
|
||||
static void in_purgemaddrs(struct ifnet *);
|
||||
|
||||
static VNET_DEFINE(int, sameprefixcarponly);
|
||||
#define V_sameprefixcarponly VNET(sameprefixcarponly)
|
||||
SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW,
|
||||
&VNET_NAME(sameprefixcarponly), 0,
|
||||
static VNET_DEFINE(int, nosameprefix);
|
||||
#define V_nosameprefix VNET(nosameprefix)
|
||||
SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, no_same_prefix, CTLFLAG_RW,
|
||||
&VNET_NAME(nosameprefix), 0,
|
||||
"Refuse to create same prefixes on different interfaces");
|
||||
|
||||
VNET_DECLARE(struct inpcbinfo, ripcbinfo);
|
||||
@ -517,7 +517,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
|
||||
|
||||
case SIOCSIFADDR:
|
||||
error = in_ifinit(ifp, ia,
|
||||
(struct sockaddr_in *) &ifr->ifr_addr, 1, 0);
|
||||
(struct sockaddr_in *) &ifr->ifr_addr, 1, 0, 0);
|
||||
if (error != 0 && iaIsNew)
|
||||
break;
|
||||
if (error == 0) {
|
||||
@ -570,7 +570,7 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
|
||||
}
|
||||
if (hostIsNew || maskIsNew)
|
||||
error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0,
|
||||
maskIsNew);
|
||||
maskIsNew, ifra->ifra_vhid);
|
||||
if (error != 0 && iaIsNew)
|
||||
break;
|
||||
|
||||
@ -609,6 +609,9 @@ in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
|
||||
panic("in_control: unsupported ioctl");
|
||||
}
|
||||
|
||||
if (ia->ia_ifa.ifa_carp)
|
||||
(*carp_detach_p)(&ia->ia_ifa);
|
||||
|
||||
IF_ADDR_LOCK(ifp);
|
||||
/* Re-check that ia is still part of the list. */
|
||||
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
|
||||
@ -843,7 +846,7 @@ in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia, u_int flags)
|
||||
*/
|
||||
static int
|
||||
in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
|
||||
int scrub, int masksupplied)
|
||||
int scrub, int masksupplied, int vhid)
|
||||
{
|
||||
register u_long i = ntohl(sin->sin_addr.s_addr);
|
||||
int flags = RTF_UP, error = 0;
|
||||
@ -859,6 +862,15 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
|
||||
ia, ia_hash);
|
||||
IN_IFADDR_WUNLOCK();
|
||||
|
||||
if (vhid > 0) {
|
||||
if (carp_attach_p != NULL)
|
||||
error = (*carp_attach_p)(&ia->ia_ifa, vhid);
|
||||
else
|
||||
error = EPROTONOSUPPORT;
|
||||
}
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* Give the interface a chance to initialize
|
||||
* if this is its first address,
|
||||
@ -884,11 +896,6 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
|
||||
}
|
||||
ia->ia_subnet = i & ia->ia_subnetmask;
|
||||
in_socktrim(&ia->ia_sockmask);
|
||||
/*
|
||||
* XXX: carp(4) does not have interface route
|
||||
*/
|
||||
if (ifp->if_type == IFT_CARP)
|
||||
return (0);
|
||||
/*
|
||||
* Add route for the network.
|
||||
*/
|
||||
@ -907,7 +914,7 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
|
||||
return (0);
|
||||
flags |= RTF_HOST;
|
||||
}
|
||||
if ((error = in_addprefix(ia, flags)) != 0)
|
||||
if (!vhid && (error = in_addprefix(ia, flags)) != 0)
|
||||
return (error);
|
||||
|
||||
if (ia->ia_addr.sin_addr.s_addr == INADDR_ANY)
|
||||
@ -920,7 +927,7 @@ in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
|
||||
/*
|
||||
* add a loopback route to self
|
||||
*/
|
||||
if (V_useloopback && !(ifp->if_flags & IFF_LOOPBACK)) {
|
||||
if (V_useloopback && !vhid && !(ifp->if_flags & IFF_LOOPBACK)) {
|
||||
struct route ia_ro;
|
||||
|
||||
bzero(&ia_ro, sizeof(ia_ro));
|
||||
@ -992,7 +999,7 @@ static void in_addralias_rtmsg(int cmd, struct in_addr *prefix,
|
||||
/*
|
||||
* Check if we have a route for the given prefix already or add one accordingly.
|
||||
*/
|
||||
static int
|
||||
int
|
||||
in_addprefix(struct in_ifaddr *target, int flags)
|
||||
{
|
||||
struct in_ifaddr *ia;
|
||||
@ -1038,9 +1045,7 @@ in_addprefix(struct in_ifaddr *target, int flags)
|
||||
} else
|
||||
break;
|
||||
#endif
|
||||
if (V_sameprefixcarponly &&
|
||||
target->ia_ifp->if_type != IFT_CARP &&
|
||||
ia->ia_ifp->if_type != IFT_CARP) {
|
||||
if (V_nosameprefix) {
|
||||
IN_IFADDR_RUNLOCK();
|
||||
return (EEXIST);
|
||||
} else {
|
||||
@ -1061,14 +1066,12 @@ in_addprefix(struct in_ifaddr *target, int flags)
|
||||
return (error);
|
||||
}
|
||||
|
||||
extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
|
||||
|
||||
/*
|
||||
* If there is no other address in the system that can serve a route to the
|
||||
* same prefix, remove the route. Hand over the route to the new address
|
||||
* otherwise.
|
||||
*/
|
||||
static int
|
||||
int
|
||||
in_scrubprefix(struct in_ifaddr *target, u_int flags)
|
||||
{
|
||||
struct in_ifaddr *ia;
|
||||
@ -1156,13 +1159,8 @@ in_scrubprefix(struct in_ifaddr *target, u_int flags)
|
||||
* If we got a matching prefix address, move IFA_ROUTE and
|
||||
* the route itself to it. Make sure that routing daemons
|
||||
* get a heads-up.
|
||||
*
|
||||
* XXX: a special case for carp(4) interface - this should
|
||||
* be more generally specified as an interface that
|
||||
* doesn't support such action.
|
||||
*/
|
||||
if ((ia->ia_flags & IFA_ROUTE) == 0
|
||||
&& (ia->ia_ifp->if_type != IFT_CARP)) {
|
||||
if ((ia->ia_flags & IFA_ROUTE) == 0) {
|
||||
ifa_ref(&ia->ia_ifa);
|
||||
IN_IFADDR_RUNLOCK();
|
||||
error = rtinit(&(target->ia_ifa), (int)RTM_DELETE,
|
||||
@ -1310,9 +1308,6 @@ in_purgemaddrs(struct ifnet *ifp)
|
||||
IN_MULTI_UNLOCK();
|
||||
}
|
||||
|
||||
#include <net/if_dl.h>
|
||||
#include <netinet/if_ether.h>
|
||||
|
||||
struct in_llentry {
|
||||
struct llentry base;
|
||||
struct sockaddr_in l3_addr4;
|
||||
|
@ -77,6 +77,7 @@ struct in_aliasreq {
|
||||
struct sockaddr_in ifra_broadaddr;
|
||||
#define ifra_dstaddr ifra_broadaddr
|
||||
struct sockaddr_in ifra_mask;
|
||||
int ifra_vhid;
|
||||
};
|
||||
/*
|
||||
* Given a pointer to an in_ifaddr (ifaddr),
|
||||
@ -442,6 +443,8 @@ int in_leavegroup_locked(struct in_multi *,
|
||||
int in_control(struct socket *, u_long, caddr_t, struct ifnet *,
|
||||
struct thread *);
|
||||
void in_rtqdrain(void);
|
||||
int in_addprefix(struct in_ifaddr *, int);
|
||||
int in_scrubprefix(struct in_ifaddr *, u_int);
|
||||
void ip_input(struct mbuf *);
|
||||
int in_ifadown(struct ifaddr *ifa, int);
|
||||
void in_ifscrub(struct ifnet *, struct in_ifaddr *, u_int);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -126,10 +126,12 @@ struct carpstats {
|
||||
* Configuration structure for SIOCSVH SIOCGVH
|
||||
*/
|
||||
struct carpreq {
|
||||
int carpr_count;
|
||||
int carpr_vhid;
|
||||
#define CARP_MAXVHID 255
|
||||
int carpr_state;
|
||||
#define CARP_STATES "INIT", "BACKUP", "MASTER"
|
||||
#define CARP_MAXSTATE 2
|
||||
int carpr_vhid;
|
||||
int carpr_advskew;
|
||||
int carpr_advbase;
|
||||
unsigned char carpr_key[CARP_KEY_LEN];
|
||||
@ -144,8 +146,7 @@ struct carpreq {
|
||||
#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */
|
||||
#define CARPCTL_LOG 3 /* log bad packets */
|
||||
#define CARPCTL_STATS 4 /* statistics (read-only) */
|
||||
#define CARPCTL_ARPBALANCE 5 /* balance arp responses */
|
||||
#define CARPCTL_MAXID 6
|
||||
#define CARPCTL_MAXID 5
|
||||
|
||||
#define CARPCTL_NAMES { \
|
||||
{ 0, 0 }, \
|
||||
@ -153,33 +154,37 @@ struct carpreq {
|
||||
{ "preempt", CTLTYPE_INT }, \
|
||||
{ "log", CTLTYPE_INT }, \
|
||||
{ "stats", CTLTYPE_STRUCT }, \
|
||||
{ "arpbalance", CTLTYPE_INT }, \
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
void carp_carpdev_state(struct ifnet *);
|
||||
void carp_input (struct mbuf *, int);
|
||||
int carp6_input (struct mbuf **, int *, int);
|
||||
int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *,
|
||||
struct rtentry *);
|
||||
int carp_iamatch (struct ifnet *, struct in_ifaddr *, struct in_addr *,
|
||||
u_int8_t **);
|
||||
int carp_ioctl(struct ifreq *, u_long, struct thread *);
|
||||
int carp_attach(struct ifaddr *, int);
|
||||
void carp_detach(struct ifaddr *);
|
||||
void carp_carpdev_state(struct ifnet *);
|
||||
void carp_input (struct mbuf *, int);
|
||||
int carp6_input (struct mbuf **, int *, int);
|
||||
int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *);
|
||||
int carp_iamatch(struct ifaddr *, uint8_t **);
|
||||
struct ifaddr *carp_iamatch6(struct ifnet *, struct in6_addr *);
|
||||
caddr_t carp_macmatch6(struct ifnet *, struct mbuf *, const struct in6_addr *);
|
||||
struct ifnet *carp_forus (struct ifnet *, u_char *);
|
||||
int carp_forus(struct ifnet *, u_char *);
|
||||
|
||||
/* These are external networking stack hooks for CARP */
|
||||
/* net/if.c */
|
||||
extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
|
||||
extern int (*carp_attach_p)(struct ifaddr *, int);
|
||||
extern void (*carp_detach_p)(struct ifaddr *);
|
||||
extern void (*carp_linkstate_p)(struct ifnet *);
|
||||
/* net/if_bridge.c net/if_ethersubr.c */
|
||||
extern struct ifnet *(*carp_forus_p)(struct ifnet *, u_char *);
|
||||
extern int (*carp_forus_p)(struct ifnet *, u_char *);
|
||||
/* net/if_ethersubr.c */
|
||||
extern int (*carp_output_p)(struct ifnet *, struct mbuf *,
|
||||
struct sockaddr *, struct rtentry *);
|
||||
struct sockaddr *);
|
||||
/* net/rtsock.c */
|
||||
extern int (*carp_get_vhid_p)(struct ifaddr *);
|
||||
#ifdef INET
|
||||
/* netinet/if_ether.c */
|
||||
extern int (*carp_iamatch_p)(struct ifnet *, struct in_ifaddr *,
|
||||
struct in_addr *, u_int8_t **);
|
||||
extern int (*carp_iamatch_p)(struct ifaddr *, uint8_t **);
|
||||
#endif
|
||||
#ifdef INET6
|
||||
/* netinet6/nd6_nbr.c */
|
||||
|
@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <netinet/in_systm.h>
|
||||
#include <netinet/ip.h>
|
||||
#include <netinet/in_pcb.h>
|
||||
#include <netinet/ip_carp.h>
|
||||
|
||||
#include <netinet/ip6.h>
|
||||
#include <netinet6/ip6_var.h>
|
||||
@ -272,6 +273,7 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
|
||||
struct in6_ifaddr *ia = NULL;
|
||||
struct in6_aliasreq *ifra = (struct in6_aliasreq *)data;
|
||||
struct sockaddr_in6 *sa6;
|
||||
int carp_attached = 0;
|
||||
int error;
|
||||
|
||||
switch (cmd) {
|
||||
@ -652,6 +654,18 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
|
||||
break;
|
||||
}
|
||||
|
||||
if (ifra->ifra_vhid > 0) {
|
||||
if (carp_attach_p != NULL)
|
||||
error = (*carp_attach_p)(&ia->ia_ifa,
|
||||
ifra->ifra_vhid);
|
||||
else
|
||||
error = EPROTONOSUPPORT;
|
||||
if (error)
|
||||
goto out;
|
||||
else
|
||||
carp_attached = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* then, make the prefix on-link on the interface.
|
||||
* XXX: we'd rather create the prefix before the address, but
|
||||
@ -695,9 +709,14 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
|
||||
* nd6_prelist_add will install the corresponding
|
||||
* interface route.
|
||||
*/
|
||||
if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
|
||||
if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) {
|
||||
if (carp_attached)
|
||||
(*carp_detach_p)(&ia->ia_ifa);
|
||||
goto out;
|
||||
}
|
||||
if (pr == NULL) {
|
||||
if (carp_attached)
|
||||
(*carp_detach_p)(&ia->ia_ifa);
|
||||
log(LOG_ERR, "nd6_prelist_add succeeded but "
|
||||
"no prefix\n");
|
||||
error = EINVAL;
|
||||
@ -1301,6 +1320,9 @@ in6_purgeaddr(struct ifaddr *ifa)
|
||||
struct rtentry *rt;
|
||||
struct ifaddr *ifa0, *nifa;
|
||||
|
||||
if (ifa->ifa_carp)
|
||||
(*carp_detach_p)(ifa);
|
||||
|
||||
/*
|
||||
* find another IPv6 address as the gateway for the
|
||||
* link-local and node-local all-nodes multicast
|
||||
|
@ -705,7 +705,6 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
|
||||
switch (ifp->if_type) {
|
||||
case IFT_PFLOG:
|
||||
case IFT_PFSYNC:
|
||||
case IFT_CARP:
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -287,6 +287,7 @@ struct in6_aliasreq {
|
||||
struct sockaddr_in6 ifra_prefixmask;
|
||||
int ifra_flags;
|
||||
struct in6_addrlifetime ifra_lifetime;
|
||||
int ifra_vhid;
|
||||
};
|
||||
|
||||
/* prefix type macro */
|
||||
|
@ -2172,9 +2172,6 @@ nd6_need_cache(struct ifnet *ifp)
|
||||
#endif
|
||||
#ifdef IFT_IEEE80211
|
||||
case IFT_IEEE80211:
|
||||
#endif
|
||||
#ifdef IFT_CARP
|
||||
case IFT_CARP:
|
||||
#endif
|
||||
case IFT_INFINIBAND:
|
||||
case IFT_GIF: /* XXX need more cases? */
|
||||
|
@ -225,7 +225,7 @@ nd6_ns_input(struct mbuf *m, int off, int icmp6len)
|
||||
/* (1) and (3) check. */
|
||||
if (ifp->if_carp)
|
||||
ifa = (*carp_iamatch6_p)(ifp, &taddr6);
|
||||
if (ifa == NULL)
|
||||
else
|
||||
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
|
||||
|
||||
/* (2) check. */
|
||||
@ -688,7 +688,14 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
|
||||
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
|
||||
}
|
||||
|
||||
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
|
||||
/*
|
||||
* This effectively disables the DAD check on a non-master CARP
|
||||
* address.
|
||||
*/
|
||||
if (ifp->if_carp)
|
||||
ifa = (*carp_iamatch6_p)(ifp, &taddr6);
|
||||
else
|
||||
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
|
||||
|
||||
/*
|
||||
* Target address matches one of my interface address.
|
||||
@ -1132,9 +1139,6 @@ nd6_ifptomac(struct ifnet *ifp)
|
||||
#endif
|
||||
#ifdef IFT_IEEE80211
|
||||
case IFT_IEEE80211:
|
||||
#endif
|
||||
#ifdef IFT_CARP
|
||||
case IFT_CARP:
|
||||
#endif
|
||||
case IFT_INFINIBAND:
|
||||
case IFT_BRIDGE:
|
||||
|
@ -58,7 +58,7 @@
|
||||
* in the range 5 to 9.
|
||||
*/
|
||||
#undef __FreeBSD_version
|
||||
#define __FreeBSD_version 1000002 /* Master, propagated to newvers */
|
||||
#define __FreeBSD_version 1000003 /* Master, propagated to newvers */
|
||||
|
||||
/*
|
||||
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
|
||||
|
Loading…
Reference in New Issue
Block a user