Restore a feature that was present in 5.x and 6.x, and was cleared in

7.x, 8.x and 9.x with pf(4) imports: pfsync(4) should suppress CARP
preemption, while it is running its bulk update.

However, reimplement the feature in more elegant manner, that is
partially inspired by newer OpenBSD:

- Rename term "suppression" to "demotion", to match with OpenBSD.
- Keep a global demotion factor, that can be raised by several
  conditions, for now these are:
  - interface goes down
  - carp(4) has problems with ip_output() or ip6_output()
  - pfsync performs bulk update
- Unlike in OpenBSD the demotion factor isn't a counter, but
  is actual value added to advskew. The adjustment values for
  particular error conditions are also configurable, and their
  defaults are maximum advskew value, so a single failure bumps
  demotion to maximum. This is for POLA compatibility, and should
  satisfy most users.
- Demotion factor is a writable sysctl, so user can do
  foot shooting, if he desires to.
This commit is contained in:
Gleb Smirnoff 2011-12-20 13:53:31 +00:00
parent 73889c808a
commit f08535f872
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=228736
6 changed files with 154 additions and 126 deletions

View File

@ -114,6 +114,23 @@ Either run the pfsync protocol on a trusted network \- ideally a network
dedicated to pfsync messages such as a crossover cable between two firewalls,
or specify a peer address and protect the traffic with
.Xr ipsec 4 .
.Pp
.Nm
has the following
.Xr sysctl 8
tunables:
.Bl -tag -width ".Va net.pfsync"
.It Va net.pfsync.carp_demotion_factor
Value added to
.Va net.inet.carp.demotion
while
.Nm
tries to perform its bulk update.
See
.Xr carp 4
for more information.
Default value is 240.
.El
.Sh EXAMPLES
.Nm
and

View File

@ -26,7 +26,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd December 16, 2011
.Dd December 20, 2011
.Dt CARP 4
.Os
.Sh NAME
@ -121,15 +121,38 @@ Values above 1 enable logging of bad
.Nm
packets.
Default value is 1.
.It Va net.inet.carp.suppress_preempt
A read only value showing the status of preemption suppression.
Preemption can be suppressed if link on an interface is down
or when
.It Va net.inet.carp.demotion
This value shows current level of CARP demotion.
The value is added to the actual advskew sent in announcements for
all vhids.
At normal system operation the demotion factor is zero.
However, problematic conditions raise its level: when
.Nm
experiences problem with sending announcements, when an interface
running a vhid goes down, or while the
.Xr pfsync 4
interface is not synchronized.
Value of 0 means that preemption is not suppressed, since no
problems are detected.
Every problem increments suppression counter.
The demotion value is writable, so that user may alter it
depending on some external conditions, for example on status of some
daemon utility.
However, altering the value should be performed with care, do
not conflict with subsystems that adjust demotion factor
automatically:
.Nm
and
.Xr pfsync 4 .
.It Va net.inet.carp.ifdown_demotion_factor
Value added to
.Va net.inet.carp.demotion
when interface running a vhid goes down.
Default value is 240 (maximum advskew value).
.It Va net.inet.carp.senderr_demotion_factor
Value added to
.Va net.inet.carp.demotion
when
.Nm
experiences errors sending its announcements.
Default value is 240 (maximum advskew value).
.El
.\".Sh ARP level load balancing
.\"The

View File

@ -62,12 +62,6 @@ __FBSDID("$FreeBSD$");
#else
#define NPFSYNC 0
#endif
#ifdef DEV_CARP
#define NCARP DEV_CARP
#else
#define NCARP 0
#endif
#endif /* __FreeBSD__ */
#include <sys/param.h>
@ -127,12 +121,14 @@ __FBSDID("$FreeBSD$");
#include <netinet6/nd6.h>
#endif /* INET6 */
#ifndef __FreeBSD__
#ifdef __FreeBSD__
#include <netinet/ip_carp.h>
#else
#include "carp.h"
#endif
#if NCARP > 0
#include <netinet/ip_carp.h>
#endif
#endif
#include <net/pfvar.h>
#include <net/if_pfsync.h>
@ -308,11 +304,15 @@ static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL;
static VNET_DEFINE(struct pfsyncstats, pfsyncstats);
#define V_pfsyncstats VNET(pfsyncstats)
static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW;
#define V_pfsync_carp_adj VNET(pfsync_carp_adj)
SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW,
&VNET_NAME(pfsyncstats), pfsyncstats,
"PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
&VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment");
#else
struct pfsync_softc *pfsyncif = NULL;
struct pfsyncstats pfsyncstats;
@ -505,11 +505,11 @@ pfsync_clone_create(struct if_clone *ifc, int unit)
if_attach(ifp);
#ifndef __FreeBSD__
if_alloc_sadl(ifp);
#endif
#if NCARP > 0
if_addgroup(ifp, "carp");
#endif
#endif
#if NBPFILTER > 0
#ifdef __FreeBSD__
@ -545,14 +545,11 @@ pfsync_clone_destroy(struct ifnet *ifp)
timeout_del(&sc->sc_tmo);
#ifdef __FreeBSD__
PF_UNLOCK();
#endif
#if NCARP > 0
#ifdef notyet
#ifdef __FreeBSD__
if (!sc->pfsync_sync_ok)
if (!sc->pfsync_sync_ok && carp_demote_adj_p)
(*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy");
#else
#if NCARP > 0
if (!pfsync_sync_ok)
#endif
carp_group_demote_adj(&sc->sc_if, -1);
#endif
#endif
@ -1636,19 +1633,16 @@ pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count)
sc->sc_ureq_sent = 0;
sc->sc_bulk_tries = 0;
timeout_del(&sc->sc_bulkfail_tmo);
#if NCARP > 0
#ifdef notyet
#ifdef __FreeBSD__
if (!sc->pfsync_sync_ok)
#else
if (!pfsync_sync_ok)
#endif
carp_group_demote_adj(&sc->sc_if, -1);
#endif
#endif
#ifdef __FreeBSD__
if (!sc->pfsync_sync_ok && carp_demote_adj_p)
(*carp_demote_adj_p)(-V_pfsync_carp_adj,
"pfsync bulk done");
sc->pfsync_sync_ok = 1;
#else
#if NCARP > 0
if (!pfsync_sync_ok)
carp_group_demote_adj(&sc->sc_if, -1);
#endif
pfsync_sync_ok = 1;
#endif
#ifdef __FreeBSD__
@ -1988,19 +1982,16 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
if (sc->sc_sync_if) {
/* Request a full state table update. */
sc->sc_ureq_sent = time_uptime;
#if NCARP > 0
#ifdef notyet
#ifdef __FreeBSD__
if (sc->pfsync_sync_ok)
#else
if (pfsync_sync_ok)
#endif
carp_group_demote_adj(&sc->sc_if, 1);
#endif
#endif
#ifdef __FreeBSD__
if (sc->pfsync_sync_ok && carp_demote_adj_p)
(*carp_demote_adj_p)(V_pfsync_carp_adj,
"pfsync bulk start");
sc->pfsync_sync_ok = 0;
#else
#if NCARP > 0
if (pfsync_sync_ok)
carp_group_demote_adj(&sc->sc_if, 1);
#endif
pfsync_sync_ok = 0;
#endif
#ifdef __FreeBSD__
@ -3159,19 +3150,16 @@ pfsync_bulk_fail(void *arg)
/* Pretend like the transfer was ok */
sc->sc_ureq_sent = 0;
sc->sc_bulk_tries = 0;
#if NCARP > 0
#ifdef notyet
#ifdef __FreeBSD__
if (!sc->pfsync_sync_ok)
#else
if (!pfsync_sync_ok)
#endif
carp_group_demote_adj(&sc->sc_if, -1);
#endif
#endif
#ifdef __FreeBSD__
if (!sc->pfsync_sync_ok && carp_demote_adj_p)
(*carp_demote_adj_p)(-V_pfsync_carp_adj,
"pfsync bulk fail");
sc->pfsync_sync_ok = 1;
#else
#if NCARP > 0
if (!pfsync_sync_ok)
carp_group_demote_adj(&sc->sc_if, -1);
#endif
pfsync_sync_ok = 1;
#endif
#ifdef __FreeBSD__

View File

@ -129,6 +129,7 @@ void (*ng_ether_link_state_p)(struct ifnet *ifp, int state);
void (*lagg_linkstate_p)(struct ifnet *ifp, int state);
/* These are external hooks for CARP. */
void (*carp_linkstate_p)(struct ifnet *ifp);
void (*carp_demote_adj_p)(int, char *);
#if defined(INET) || defined(INET6)
int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost);
int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m,

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/taskqueue.h>
#include <net/ethernet.h>
#include <net/fddi.h>
@ -185,22 +186,30 @@ static int proto_reg[] = {-1, -1};
* dereferencing our function pointers.
*/
int carp_suppress_preempt = 0;
int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, };
SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
&carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
&carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
&carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
&carp_suppress_preempt, 0, "Preemption is suppressed");
static int carp_allow = 1; /* Accept incoming CARP packets. */
static int carp_preempt = 0; /* Preempt slower nodes. */
static int carp_log = 1; /* Log level. */
static int carp_demotion = 0; /* Global advskew demotion. */
static int carp_senderr_adj = CARP_MAXSKEW; /* Send error demotion factor */
static int carp_ifdown_adj = CARP_MAXSKEW; /* Iface down demotion factor */
struct carpstats carpstats;
SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
&carpstats, carpstats,
"CARP statistics (struct carpstats, netinet/ip_carp.h)");
SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, &carp_allow, 0,
"Accept incoming CARP packets");
SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, &carp_preempt, 0,
"High-priority backup preemption mode");
SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, &carp_log, 0,
"CARP log level");
SYSCTL_INT(_net_inet_carp, OID_AUTO, demotion, CTLFLAG_RW, &carp_demotion, 0,
"Demotion factor (skew of advskew)");
SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW,
&carp_senderr_adj, 0, "Send error demotion factor adjustment");
SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW,
&carp_ifdown_adj, 0, "Interface down demotion factor adjustment");
static struct carpstats carpstats;
SYSCTL_STRUCT(_net_inet_carp, OID_AUTO, stats, CTLFLAG_RW, &carpstats,
carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)");
#define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \
NULL, MTX_DEF)
@ -216,12 +225,12 @@ SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
#define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx)
#define CARP_LOG(...) do { \
if (carp_opts[CARPCTL_LOG] > 0) \
if (carp_log > 0) \
log(LOG_INFO, "carp: " __VA_ARGS__); \
} while (0)
#define CARP_DEBUG(...) do { \
if (carp_opts[CARPCTL_LOG] > 1) \
if (carp_log > 1) \
log(LOG_DEBUG, __VA_ARGS__); \
} while (0)
@ -241,6 +250,10 @@ SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
CIF_LOCK_ASSERT(ifp->if_carp); \
TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list)
#define DEMOTE_ADVSKEW(sc) \
(((sc)->sc_advskew + carp_demotion > CARP_MAXSKEW) ? \
CARP_MAXSKEW : ((sc)->sc_advskew + carp_demotion))
static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
static struct carp_softc
*carp_alloc(struct ifnet *);
@ -257,9 +270,13 @@ static void carp_send_ad(void *);
static void carp_send_ad_locked(struct carp_softc *);
static void carp_addroute(struct carp_softc *);
static void carp_delroute(struct carp_softc *);
static void carp_send_ad_all(void *, int);
static void carp_demote_adj(int, char *);
static LIST_HEAD(, carp_softc) carp_list;
static struct mtx carp_mtx;
static struct task carp_sendall_task =
TASK_INITIALIZER(0, carp_send_ad_all, NULL);
static __inline uint16_t
carp_cksum(struct mbuf *m, int len)
@ -390,7 +407,7 @@ carp_input(struct mbuf *m, int hlen)
CARPSTATS_INC(carps_ipackets);
if (!carp_opts[CARPCTL_ALLOW]) {
if (!carp_allow) {
m_freem(m);
return;
}
@ -473,7 +490,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto)
CARPSTATS_INC(carps_ipackets6);
if (!carp_opts[CARPCTL_ALLOW]) {
if (!carp_allow) {
m_freem(m);
return (IPPROTO_DONE);
}
@ -578,10 +595,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
sc->sc_counter = tmp_counter;
sc_tv.tv_sec = sc->sc_advbase;
if (carp_suppress_preempt && sc->sc_advskew < 240)
sc_tv.tv_usec = 240 * 1000000 / 256;
else
sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256;
ch_tv.tv_sec = ch->carp_advbase;
ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
@ -610,8 +624,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
* If we're pre-empting masters who advertise slower than us,
* and this one claims to be slower, treat him as down.
*/
if (carp_opts[CARPCTL_PREEMPT] &&
timevalcmp(&sc_tv, &ch_tv, <)) {
if (carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) {
CARP_LOG("VHID %u@%s: BACKUP -> MASTER "
"(preempting a slower master)\n",
sc->sc_vhid,
@ -679,26 +692,23 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
return (0);
}
/*
* To avoid LORs and possible recursions this function shouldn't
* be called directly, but scheduled via taskqueue.
*/
static void
carp_send_ad_all(struct carp_softc *badsc)
carp_send_ad_all(void *ctx __unused, int pending __unused)
{
struct carp_softc *sc;
/*
* Avoid LOR and recursive call to carp_send_ad_locked().
*/
CARP_UNLOCK(badsc);
mtx_lock(&carp_mtx);
LIST_FOREACH(sc, &carp_list, sc_next)
if (sc != badsc && sc->sc_state == MASTER) {
if (sc->sc_state == MASTER) {
CARP_LOCK(sc);
carp_send_ad_locked(sc);
CARP_UNLOCK(sc);
}
mtx_unlock(&carp_mtx);
CARP_LOCK(badsc);
}
static void
@ -724,10 +734,7 @@ carp_send_ad_locked(struct carp_softc *sc)
CARP_LOCK_ASSERT(sc);
if (!carp_suppress_preempt || sc->sc_advskew > 240)
advskew = sc->sc_advskew;
else
advskew = 240;
advskew = DEMOTE_ADVSKEW(sc);
tv.tv_sec = sc->sc_advbase;
tv.tv_usec = advskew * 1000000 / 256;
@ -797,17 +804,15 @@ carp_send_ad_locked(struct carp_softc *sc)
&sc->sc_carpdev->if_carp->cif_imo, NULL)) {
if (sc->sc_sendad_errors < INT_MAX)
sc->sc_sendad_errors++;
if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
carp_suppress_preempt++;
if (carp_suppress_preempt == 1)
carp_send_ad_all(sc);
}
if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS)
carp_demote_adj(carp_senderr_adj, "send error");
sc->sc_sendad_success = 0;
} else {
if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
if (++sc->sc_sendad_success >=
CARP_SENDAD_MIN_SUCCESS) {
carp_suppress_preempt--;
carp_demote_adj(-carp_senderr_adj,
"send ok");
sc->sc_sendad_errors = 0;
}
} else
@ -875,17 +880,16 @@ carp_send_ad_locked(struct carp_softc *sc)
&sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)) {
if (sc->sc_sendad_errors < INT_MAX)
sc->sc_sendad_errors++;
if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
carp_suppress_preempt++;
if (carp_suppress_preempt == 1)
carp_send_ad_all(sc);
}
if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS)
carp_demote_adj(carp_senderr_adj,
"send6 error");
sc->sc_sendad_success = 0;
} else {
if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
if (++sc->sc_sendad_success >=
CARP_SENDAD_MIN_SUCCESS) {
carp_suppress_preempt--;
carp_demote_adj(-carp_senderr_adj,
"send6 ok");
sc->sc_sendad_errors = 0;
}
} else
@ -1479,6 +1483,8 @@ carp_destroy(struct carp_softc *sc)
mtx_unlock(&carp_mtx);
CARP_LOCK(sc);
if (sc->sc_suppress)
carp_demote_adj(-carp_ifdown_adj, "vhid removed");
callout_drain(&sc->sc_ad_tmo);
#ifdef INET
callout_drain(&sc->sc_md_tmo);
@ -1914,21 +1920,25 @@ carp_sc_state(struct carp_softc *sc)
#endif
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
if (!sc->sc_suppress) {
carp_suppress_preempt++;
if (carp_suppress_preempt == 1)
carp_send_ad_all(sc);
}
if (!sc->sc_suppress)
carp_demote_adj(carp_ifdown_adj, "interface down");
sc->sc_suppress = 1;
} else {
carp_set_state(sc, INIT);
carp_setrun(sc, 0);
if (sc->sc_suppress)
carp_suppress_preempt--;
carp_demote_adj(-carp_ifdown_adj, "interface up");
sc->sc_suppress = 0;
}
}
static void
carp_demote_adj(int adj, char *reason)
{
carp_demotion += adj;
CARP_LOG("demoted by %d to %d (%s)\n", adj, carp_demotion, reason);
taskqueue_enqueue(taskqueue_swi, &carp_sendall_task);
}
#ifdef INET
extern struct domain inetdomain;
@ -1986,6 +1996,9 @@ carp_mod_cleanup(void)
carp_linkstate_p = NULL;
carp_forus_p = NULL;
carp_output_p = NULL;
carp_demote_adj_p = NULL;
mtx_unlock(&carp_mtx);
taskqueue_drain(taskqueue_swi, &carp_sendall_task);
mtx_destroy(&carp_mtx);
}
@ -2003,6 +2016,7 @@ carp_mod_load(void)
carp_ioctl_p = carp_ioctl;
carp_attach_p = carp_attach;
carp_detach_p = carp_detach;
carp_demote_adj_p = carp_demote_adj;
#ifdef INET6
carp_iamatch6_p = carp_iamatch6;
carp_macmatch6_p = carp_macmatch6;

View File

@ -133,29 +133,13 @@ struct carpreq {
#define CARP_STATES "INIT", "BACKUP", "MASTER"
#define CARP_MAXSTATE 2
int carpr_advskew;
#define CARP_MAXSKEW 240
int carpr_advbase;
unsigned char carpr_key[CARP_KEY_LEN];
};
#define SIOCSVH _IOWR('i', 245, struct ifreq)
#define SIOCGVH _IOWR('i', 246, struct ifreq)
/*
* Names for CARP sysctl objects
*/
#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */
#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */
#define CARPCTL_LOG 3 /* log bad packets */
#define CARPCTL_STATS 4 /* statistics (read-only) */
#define CARPCTL_MAXID 5
#define CARPCTL_NAMES { \
{ 0, 0 }, \
{ "allow", CTLTYPE_INT }, \
{ "preempt", CTLTYPE_INT }, \
{ "log", CTLTYPE_INT }, \
{ "stats", CTLTYPE_STRUCT }, \
}
#ifdef _KERNEL
int carp_ioctl(struct ifreq *, u_long, struct thread *);
int carp_attach(struct ifaddr *, int);
@ -175,6 +159,7 @@ extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *);
extern int (*carp_attach_p)(struct ifaddr *, int);
extern void (*carp_detach_p)(struct ifaddr *);
extern void (*carp_linkstate_p)(struct ifnet *);
extern void (*carp_demote_adj_p)(int, char *);
/* net/if_bridge.c net/if_ethersubr.c */
extern int (*carp_forus_p)(struct ifnet *, u_char *);
/* net/if_ethersubr.c */