diff --git a/contrib/pf/man/pfsync.4 b/contrib/pf/man/pfsync.4 index 451925e2b162..b00bf9d0fc78 100644 --- a/contrib/pf/man/pfsync.4 +++ b/contrib/pf/man/pfsync.4 @@ -114,6 +114,23 @@ Either run the pfsync protocol on a trusted network \- ideally a network dedicated to pfsync messages such as a crossover cable between two firewalls, or specify a peer address and protect the traffic with .Xr ipsec 4 . +.Pp +.Nm +has the following +.Xr sysctl 8 +tunables: +.Bl -tag -width ".Va net.pfsync" +.It Va net.pfsync.carp_demotion_factor +Value added to +.Va net.inet.carp.demotion +while +.Nm +tries to perform its bulk update. +See +.Xr carp 4 +for more information. +Default value is 240. +.El .Sh EXAMPLES .Nm and diff --git a/share/man/man4/carp.4 b/share/man/man4/carp.4 index 7c214ff9b7e4..1b59e72f8ad3 100644 --- a/share/man/man4/carp.4 +++ b/share/man/man4/carp.4 @@ -26,7 +26,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 16, 2011 +.Dd December 20, 2011 .Dt CARP 4 .Os .Sh NAME @@ -121,15 +121,38 @@ Values above 1 enable logging of bad .Nm packets. Default value is 1. -.It Va net.inet.carp.suppress_preempt -A read only value showing the status of preemption suppression. -Preemption can be suppressed if link on an interface is down -or when +.It Va net.inet.carp.demotion +This value shows current level of CARP demotion. +The value is added to the actual advskew sent in announcements for +all vhids. +At normal system operation the demotion factor is zero. +However, problematic conditions raise its level: when +.Nm +experiences problem with sending announcements, when an interface +running a vhid goes down, or while the .Xr pfsync 4 interface is not synchronized. -Value of 0 means that preemption is not suppressed, since no -problems are detected. -Every problem increments suppression counter. +The demotion value is writable, so that user may alter it +depending on some external conditions, for example on status of some +daemon utility. +However, altering the value should be performed with care, do +not conflict with subsystems that adjust demotion factor +automatically: +.Nm +and +.Xr pfsync 4 . +.It Va net.inet.carp.ifdown_demotion_factor +Value added to +.Va net.inet.carp.demotion +when interface running a vhid goes down. +Default value is 240 (maximum advskew value). +.It Va net.inet.carp.senderr_demotion_factor +Value added to +.Va net.inet.carp.demotion +when +.Nm +experiences errors sending its announcements. +Default value is 240 (maximum advskew value). .El .\".Sh ARP level load balancing .\"The diff --git a/sys/contrib/pf/net/if_pfsync.c b/sys/contrib/pf/net/if_pfsync.c index a8e5fa98ee64..8614c0048f6c 100644 --- a/sys/contrib/pf/net/if_pfsync.c +++ b/sys/contrib/pf/net/if_pfsync.c @@ -62,12 +62,6 @@ __FBSDID("$FreeBSD$"); #else #define NPFSYNC 0 #endif - -#ifdef DEV_CARP -#define NCARP DEV_CARP -#else -#define NCARP 0 -#endif #endif /* __FreeBSD__ */ #include @@ -127,12 +121,14 @@ __FBSDID("$FreeBSD$"); #include #endif /* INET6 */ -#ifndef __FreeBSD__ +#ifdef __FreeBSD__ +#include +#else #include "carp.h" -#endif #if NCARP > 0 #include #endif +#endif #include #include @@ -308,11 +304,15 @@ static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; static VNET_DEFINE(struct pfsyncstats, pfsyncstats); #define V_pfsyncstats VNET(pfsyncstats) +static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; +#define V_pfsync_carp_adj VNET(pfsync_carp_adj) SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); SYSCTL_VNET_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_RW, &VNET_NAME(pfsyncstats), pfsyncstats, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); +SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, + &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); #else struct pfsync_softc *pfsyncif = NULL; struct pfsyncstats pfsyncstats; @@ -505,11 +505,11 @@ pfsync_clone_create(struct if_clone *ifc, int unit) if_attach(ifp); #ifndef __FreeBSD__ if_alloc_sadl(ifp); -#endif #if NCARP > 0 if_addgroup(ifp, "carp"); #endif +#endif #if NBPFILTER > 0 #ifdef __FreeBSD__ @@ -545,14 +545,11 @@ pfsync_clone_destroy(struct ifnet *ifp) timeout_del(&sc->sc_tmo); #ifdef __FreeBSD__ PF_UNLOCK(); -#endif -#if NCARP > 0 -#ifdef notyet -#ifdef __FreeBSD__ - if (!sc->pfsync_sync_ok) + if (!sc->pfsync_sync_ok && carp_demote_adj_p) + (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); #else +#if NCARP > 0 if (!pfsync_sync_ok) -#endif carp_group_demote_adj(&sc->sc_if, -1); #endif #endif @@ -1636,19 +1633,16 @@ pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; timeout_del(&sc->sc_bulkfail_tmo); -#if NCARP > 0 -#ifdef notyet -#ifdef __FreeBSD__ - if (!sc->pfsync_sync_ok) -#else - if (!pfsync_sync_ok) -#endif - carp_group_demote_adj(&sc->sc_if, -1); -#endif -#endif #ifdef __FreeBSD__ + if (!sc->pfsync_sync_ok && carp_demote_adj_p) + (*carp_demote_adj_p)(-V_pfsync_carp_adj, + "pfsync bulk done"); sc->pfsync_sync_ok = 1; #else +#if NCARP > 0 + if (!pfsync_sync_ok) + carp_group_demote_adj(&sc->sc_if, -1); +#endif pfsync_sync_ok = 1; #endif #ifdef __FreeBSD__ @@ -1988,19 +1982,16 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) if (sc->sc_sync_if) { /* Request a full state table update. */ sc->sc_ureq_sent = time_uptime; -#if NCARP > 0 -#ifdef notyet -#ifdef __FreeBSD__ - if (sc->pfsync_sync_ok) -#else - if (pfsync_sync_ok) -#endif - carp_group_demote_adj(&sc->sc_if, 1); -#endif -#endif #ifdef __FreeBSD__ + if (sc->pfsync_sync_ok && carp_demote_adj_p) + (*carp_demote_adj_p)(V_pfsync_carp_adj, + "pfsync bulk start"); sc->pfsync_sync_ok = 0; #else +#if NCARP > 0 + if (pfsync_sync_ok) + carp_group_demote_adj(&sc->sc_if, 1); +#endif pfsync_sync_ok = 0; #endif #ifdef __FreeBSD__ @@ -3159,19 +3150,16 @@ pfsync_bulk_fail(void *arg) /* Pretend like the transfer was ok */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; -#if NCARP > 0 -#ifdef notyet -#ifdef __FreeBSD__ - if (!sc->pfsync_sync_ok) -#else - if (!pfsync_sync_ok) -#endif - carp_group_demote_adj(&sc->sc_if, -1); -#endif -#endif #ifdef __FreeBSD__ + if (!sc->pfsync_sync_ok && carp_demote_adj_p) + (*carp_demote_adj_p)(-V_pfsync_carp_adj, + "pfsync bulk fail"); sc->pfsync_sync_ok = 1; #else +#if NCARP > 0 + if (!pfsync_sync_ok) + carp_group_demote_adj(&sc->sc_if, -1); +#endif pfsync_sync_ok = 1; #endif #ifdef __FreeBSD__ diff --git a/sys/net/if.c b/sys/net/if.c index 437734354af3..50aa11ca1291 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -129,6 +129,7 @@ void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); void (*lagg_linkstate_p)(struct ifnet *ifp, int state); /* These are external hooks for CARP. */ void (*carp_linkstate_p)(struct ifnet *ifp); +void (*carp_demote_adj_p)(int, char *); #if defined(INET) || defined(INET6) int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 2875537228d5..a5e0cb7882d4 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -185,22 +186,30 @@ static int proto_reg[] = {-1, -1}; * dereferencing our function pointers. */ -int carp_suppress_preempt = 0; -int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, }; -SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); -SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW, - &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets"); -SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW, - &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode"); -SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW, - &carp_opts[CARPCTL_LOG], 0, "log bad carp packets"); -SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD, - &carp_suppress_preempt, 0, "Preemption is suppressed"); +static int carp_allow = 1; /* Accept incoming CARP packets. */ +static int carp_preempt = 0; /* Preempt slower nodes. */ +static int carp_log = 1; /* Log level. */ +static int carp_demotion = 0; /* Global advskew demotion. */ +static int carp_senderr_adj = CARP_MAXSKEW; /* Send error demotion factor */ +static int carp_ifdown_adj = CARP_MAXSKEW; /* Iface down demotion factor */ -struct carpstats carpstats; -SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, - &carpstats, carpstats, - "CARP statistics (struct carpstats, netinet/ip_carp.h)"); +SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); +SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_RW, &carp_allow, 0, + "Accept incoming CARP packets"); +SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_RW, &carp_preempt, 0, + "High-priority backup preemption mode"); +SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_RW, &carp_log, 0, + "CARP log level"); +SYSCTL_INT(_net_inet_carp, OID_AUTO, demotion, CTLFLAG_RW, &carp_demotion, 0, + "Demotion factor (skew of advskew)"); +SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_RW, + &carp_senderr_adj, 0, "Send error demotion factor adjustment"); +SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_RW, + &carp_ifdown_adj, 0, "Interface down demotion factor adjustment"); + +static struct carpstats carpstats; +SYSCTL_STRUCT(_net_inet_carp, OID_AUTO, stats, CTLFLAG_RW, &carpstats, + carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ NULL, MTX_DEF) @@ -216,12 +225,12 @@ SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) #define CARP_LOG(...) do { \ - if (carp_opts[CARPCTL_LOG] > 0) \ + if (carp_log > 0) \ log(LOG_INFO, "carp: " __VA_ARGS__); \ } while (0) #define CARP_DEBUG(...) do { \ - if (carp_opts[CARPCTL_LOG] > 1) \ + if (carp_log > 1) \ log(LOG_DEBUG, __VA_ARGS__); \ } while (0) @@ -241,6 +250,10 @@ SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW, CIF_LOCK_ASSERT(ifp->if_carp); \ TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) +#define DEMOTE_ADVSKEW(sc) \ + (((sc)->sc_advskew + carp_demotion > CARP_MAXSKEW) ? \ + CARP_MAXSKEW : ((sc)->sc_advskew + carp_demotion)) + static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); static struct carp_softc *carp_alloc(struct ifnet *); @@ -257,9 +270,13 @@ static void carp_send_ad(void *); static void carp_send_ad_locked(struct carp_softc *); static void carp_addroute(struct carp_softc *); static void carp_delroute(struct carp_softc *); +static void carp_send_ad_all(void *, int); +static void carp_demote_adj(int, char *); static LIST_HEAD(, carp_softc) carp_list; static struct mtx carp_mtx; +static struct task carp_sendall_task = + TASK_INITIALIZER(0, carp_send_ad_all, NULL); static __inline uint16_t carp_cksum(struct mbuf *m, int len) @@ -390,7 +407,7 @@ carp_input(struct mbuf *m, int hlen) CARPSTATS_INC(carps_ipackets); - if (!carp_opts[CARPCTL_ALLOW]) { + if (!carp_allow) { m_freem(m); return; } @@ -473,7 +490,7 @@ carp6_input(struct mbuf **mp, int *offp, int proto) CARPSTATS_INC(carps_ipackets6); - if (!carp_opts[CARPCTL_ALLOW]) { + if (!carp_allow) { m_freem(m); return (IPPROTO_DONE); } @@ -578,10 +595,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) sc->sc_counter = tmp_counter; sc_tv.tv_sec = sc->sc_advbase; - if (carp_suppress_preempt && sc->sc_advskew < 240) - sc_tv.tv_usec = 240 * 1000000 / 256; - else - sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256; + sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; ch_tv.tv_sec = ch->carp_advbase; ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; @@ -610,8 +624,7 @@ carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) * If we're pre-empting masters who advertise slower than us, * and this one claims to be slower, treat him as down. */ - if (carp_opts[CARPCTL_PREEMPT] && - timevalcmp(&sc_tv, &ch_tv, <)) { + if (carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { CARP_LOG("VHID %u@%s: BACKUP -> MASTER " "(preempting a slower master)\n", sc->sc_vhid, @@ -679,26 +692,23 @@ carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) return (0); } +/* + * To avoid LORs and possible recursions this function shouldn't + * be called directly, but scheduled via taskqueue. + */ static void -carp_send_ad_all(struct carp_softc *badsc) +carp_send_ad_all(void *ctx __unused, int pending __unused) { struct carp_softc *sc; - /* - * Avoid LOR and recursive call to carp_send_ad_locked(). - */ - CARP_UNLOCK(badsc); - mtx_lock(&carp_mtx); LIST_FOREACH(sc, &carp_list, sc_next) - if (sc != badsc && sc->sc_state == MASTER) { + if (sc->sc_state == MASTER) { CARP_LOCK(sc); carp_send_ad_locked(sc); CARP_UNLOCK(sc); } mtx_unlock(&carp_mtx); - - CARP_LOCK(badsc); } static void @@ -724,10 +734,7 @@ carp_send_ad_locked(struct carp_softc *sc) CARP_LOCK_ASSERT(sc); - if (!carp_suppress_preempt || sc->sc_advskew > 240) - advskew = sc->sc_advskew; - else - advskew = 240; + advskew = DEMOTE_ADVSKEW(sc); tv.tv_sec = sc->sc_advbase; tv.tv_usec = advskew * 1000000 / 256; @@ -797,17 +804,15 @@ carp_send_ad_locked(struct carp_softc *sc) &sc->sc_carpdev->if_carp->cif_imo, NULL)) { if (sc->sc_sendad_errors < INT_MAX) sc->sc_sendad_errors++; - if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { - carp_suppress_preempt++; - if (carp_suppress_preempt == 1) - carp_send_ad_all(sc); - } + if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) + carp_demote_adj(carp_senderr_adj, "send error"); sc->sc_sendad_success = 0; } else { if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { - carp_suppress_preempt--; + carp_demote_adj(-carp_senderr_adj, + "send ok"); sc->sc_sendad_errors = 0; } } else @@ -875,17 +880,16 @@ carp_send_ad_locked(struct carp_softc *sc) &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)) { if (sc->sc_sendad_errors < INT_MAX) sc->sc_sendad_errors++; - if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { - carp_suppress_preempt++; - if (carp_suppress_preempt == 1) - carp_send_ad_all(sc); - } + if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) + carp_demote_adj(carp_senderr_adj, + "send6 error"); sc->sc_sendad_success = 0; } else { if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) { if (++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { - carp_suppress_preempt--; + carp_demote_adj(-carp_senderr_adj, + "send6 ok"); sc->sc_sendad_errors = 0; } } else @@ -1479,6 +1483,8 @@ carp_destroy(struct carp_softc *sc) mtx_unlock(&carp_mtx); CARP_LOCK(sc); + if (sc->sc_suppress) + carp_demote_adj(-carp_ifdown_adj, "vhid removed"); callout_drain(&sc->sc_ad_tmo); #ifdef INET callout_drain(&sc->sc_md_tmo); @@ -1914,21 +1920,25 @@ carp_sc_state(struct carp_softc *sc) #endif carp_set_state(sc, INIT); carp_setrun(sc, 0); - if (!sc->sc_suppress) { - carp_suppress_preempt++; - if (carp_suppress_preempt == 1) - carp_send_ad_all(sc); - } + if (!sc->sc_suppress) + carp_demote_adj(carp_ifdown_adj, "interface down"); sc->sc_suppress = 1; } else { carp_set_state(sc, INIT); carp_setrun(sc, 0); if (sc->sc_suppress) - carp_suppress_preempt--; + carp_demote_adj(-carp_ifdown_adj, "interface up"); sc->sc_suppress = 0; } } +static void +carp_demote_adj(int adj, char *reason) +{ + carp_demotion += adj; + CARP_LOG("demoted by %d to %d (%s)\n", adj, carp_demotion, reason); + taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); +} #ifdef INET extern struct domain inetdomain; @@ -1986,6 +1996,9 @@ carp_mod_cleanup(void) carp_linkstate_p = NULL; carp_forus_p = NULL; carp_output_p = NULL; + carp_demote_adj_p = NULL; + mtx_unlock(&carp_mtx); + taskqueue_drain(taskqueue_swi, &carp_sendall_task); mtx_destroy(&carp_mtx); } @@ -2003,6 +2016,7 @@ carp_mod_load(void) carp_ioctl_p = carp_ioctl; carp_attach_p = carp_attach; carp_detach_p = carp_detach; + carp_demote_adj_p = carp_demote_adj; #ifdef INET6 carp_iamatch6_p = carp_iamatch6; carp_macmatch6_p = carp_macmatch6; diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h index d8b82a8d00fc..7be91c015e6e 100644 --- a/sys/netinet/ip_carp.h +++ b/sys/netinet/ip_carp.h @@ -133,29 +133,13 @@ struct carpreq { #define CARP_STATES "INIT", "BACKUP", "MASTER" #define CARP_MAXSTATE 2 int carpr_advskew; +#define CARP_MAXSKEW 240 int carpr_advbase; unsigned char carpr_key[CARP_KEY_LEN]; }; #define SIOCSVH _IOWR('i', 245, struct ifreq) #define SIOCGVH _IOWR('i', 246, struct ifreq) -/* - * Names for CARP sysctl objects - */ -#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */ -#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */ -#define CARPCTL_LOG 3 /* log bad packets */ -#define CARPCTL_STATS 4 /* statistics (read-only) */ -#define CARPCTL_MAXID 5 - -#define CARPCTL_NAMES { \ - { 0, 0 }, \ - { "allow", CTLTYPE_INT }, \ - { "preempt", CTLTYPE_INT }, \ - { "log", CTLTYPE_INT }, \ - { "stats", CTLTYPE_STRUCT }, \ -} - #ifdef _KERNEL int carp_ioctl(struct ifreq *, u_long, struct thread *); int carp_attach(struct ifaddr *, int); @@ -175,6 +159,7 @@ extern int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); extern int (*carp_attach_p)(struct ifaddr *, int); extern void (*carp_detach_p)(struct ifaddr *); extern void (*carp_linkstate_p)(struct ifnet *); +extern void (*carp_demote_adj_p)(int, char *); /* net/if_bridge.c net/if_ethersubr.c */ extern int (*carp_forus_p)(struct ifnet *, u_char *); /* net/if_ethersubr.c */