Convert the if_lagg rwlock to an rmlock.
We've been seeing lots of cache line contention (but not lock contention!) in our workloads between the various TX and RX threads going on. The write lock is only grabbed when configuration changes are made - which are infrequent. With this patch, the contention and cycles spent waiting for updates disappear. Sponsored by: Netflix, Inc.
This commit is contained in:
parent
0289d87d9c
commit
310915a45a
@ -37,7 +37,7 @@ __FBSDID("$FreeBSD$");
|
|||||||
#include <sys/proc.h>
|
#include <sys/proc.h>
|
||||||
#include <sys/hash.h>
|
#include <sys/hash.h>
|
||||||
#include <sys/lock.h>
|
#include <sys/lock.h>
|
||||||
#include <sys/rwlock.h>
|
#include <sys/rmlock.h>
|
||||||
#include <sys/taskqueue.h>
|
#include <sys/taskqueue.h>
|
||||||
#include <sys/eventhandler.h>
|
#include <sys/eventhandler.h>
|
||||||
|
|
||||||
@ -233,16 +233,17 @@ lagg_register_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
|
|||||||
{
|
{
|
||||||
struct lagg_softc *sc = ifp->if_softc;
|
struct lagg_softc *sc = ifp->if_softc;
|
||||||
struct lagg_port *lp;
|
struct lagg_port *lp;
|
||||||
|
struct rm_priotracker tracker;
|
||||||
|
|
||||||
if (ifp->if_softc != arg) /* Not our event */
|
if (ifp->if_softc != arg) /* Not our event */
|
||||||
return;
|
return;
|
||||||
|
|
||||||
LAGG_RLOCK(sc);
|
LAGG_RLOCK(sc, &tracker);
|
||||||
if (!SLIST_EMPTY(&sc->sc_ports)) {
|
if (!SLIST_EMPTY(&sc->sc_ports)) {
|
||||||
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
|
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
|
||||||
EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
|
EVENTHANDLER_INVOKE(vlan_config, lp->lp_ifp, vtag);
|
||||||
}
|
}
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -254,16 +255,17 @@ lagg_unregister_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
|
|||||||
{
|
{
|
||||||
struct lagg_softc *sc = ifp->if_softc;
|
struct lagg_softc *sc = ifp->if_softc;
|
||||||
struct lagg_port *lp;
|
struct lagg_port *lp;
|
||||||
|
struct rm_priotracker tracker;
|
||||||
|
|
||||||
if (ifp->if_softc != arg) /* Not our event */
|
if (ifp->if_softc != arg) /* Not our event */
|
||||||
return;
|
return;
|
||||||
|
|
||||||
LAGG_RLOCK(sc);
|
LAGG_RLOCK(sc, &tracker);
|
||||||
if (!SLIST_EMPTY(&sc->sc_ports)) {
|
if (!SLIST_EMPTY(&sc->sc_ports)) {
|
||||||
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
|
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
|
||||||
EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
|
EVENTHANDLER_INVOKE(vlan_unconfig, lp->lp_ifp, vtag);
|
||||||
}
|
}
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -322,9 +324,15 @@ lagg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
LAGG_LOCK_INIT(sc);
|
LAGG_LOCK_INIT(sc);
|
||||||
|
LAGG_CALLOUT_LOCK_INIT(sc);
|
||||||
SLIST_INIT(&sc->sc_ports);
|
SLIST_INIT(&sc->sc_ports);
|
||||||
TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
|
TASK_INIT(&sc->sc_lladdr_task, 0, lagg_port_setlladdr, sc);
|
||||||
callout_init_rw(&sc->sc_callout, &sc->sc_mtx, CALLOUT_SHAREDLOCK);
|
|
||||||
|
/*
|
||||||
|
* This uses the callout lock rather than the rmlock; one can't
|
||||||
|
* hold said rmlock during SWI.
|
||||||
|
*/
|
||||||
|
callout_init_mtx(&sc->sc_callout, &sc->sc_call_mtx, 0);
|
||||||
|
|
||||||
/* Initialise pseudo media types */
|
/* Initialise pseudo media types */
|
||||||
ifmedia_init(&sc->sc_media, 0, lagg_media_change,
|
ifmedia_init(&sc->sc_media, 0, lagg_media_change,
|
||||||
@ -389,7 +397,10 @@ lagg_clone_destroy(struct ifnet *ifp)
|
|||||||
ether_ifdetach(ifp);
|
ether_ifdetach(ifp);
|
||||||
if_free(ifp);
|
if_free(ifp);
|
||||||
|
|
||||||
|
/* This grabs sc_callout_mtx, serialising it correctly */
|
||||||
callout_drain(&sc->sc_callout);
|
callout_drain(&sc->sc_callout);
|
||||||
|
|
||||||
|
/* At this point it's drained; we can free this */
|
||||||
counter_u64_free(sc->sc_ipackets);
|
counter_u64_free(sc->sc_ipackets);
|
||||||
counter_u64_free(sc->sc_opackets);
|
counter_u64_free(sc->sc_opackets);
|
||||||
counter_u64_free(sc->sc_ibytes);
|
counter_u64_free(sc->sc_ibytes);
|
||||||
@ -401,6 +412,7 @@ lagg_clone_destroy(struct ifnet *ifp)
|
|||||||
|
|
||||||
taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
|
taskqueue_drain(taskqueue_swi, &sc->sc_lladdr_task);
|
||||||
LAGG_LOCK_DESTROY(sc);
|
LAGG_LOCK_DESTROY(sc);
|
||||||
|
LAGG_CALLOUT_LOCK_DESTROY(sc);
|
||||||
free(sc, M_DEVBUF);
|
free(sc, M_DEVBUF);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -764,6 +776,7 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
|
|||||||
struct lagg_softc *sc;
|
struct lagg_softc *sc;
|
||||||
struct lagg_port *lp = NULL;
|
struct lagg_port *lp = NULL;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
struct rm_priotracker tracker;
|
||||||
|
|
||||||
/* Should be checked by the caller */
|
/* Should be checked by the caller */
|
||||||
if (ifp->if_type != IFT_IEEE8023ADLAG ||
|
if (ifp->if_type != IFT_IEEE8023ADLAG ||
|
||||||
@ -778,15 +791,15 @@ lagg_port_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
LAGG_RLOCK(sc);
|
LAGG_RLOCK(sc, &tracker);
|
||||||
if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
|
if ((lp = ifp->if_lagg) == NULL || lp->lp_softc != sc) {
|
||||||
error = ENOENT;
|
error = ENOENT;
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
lagg_port2req(lp, rp);
|
lagg_port2req(lp, rp);
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SIOCSIFCAP:
|
case SIOCSIFCAP:
|
||||||
@ -955,21 +968,22 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
|
|||||||
struct thread *td = curthread;
|
struct thread *td = curthread;
|
||||||
char *buf, *outbuf;
|
char *buf, *outbuf;
|
||||||
int count, buflen, len, error = 0;
|
int count, buflen, len, error = 0;
|
||||||
|
struct rm_priotracker tracker;
|
||||||
|
|
||||||
bzero(&rpbuf, sizeof(rpbuf));
|
bzero(&rpbuf, sizeof(rpbuf));
|
||||||
|
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case SIOCGLAGG:
|
case SIOCGLAGG:
|
||||||
LAGG_RLOCK(sc);
|
LAGG_RLOCK(sc, &tracker);
|
||||||
count = 0;
|
count = 0;
|
||||||
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
|
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries)
|
||||||
count++;
|
count++;
|
||||||
buflen = count * sizeof(struct lagg_reqport);
|
buflen = count * sizeof(struct lagg_reqport);
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
|
|
||||||
outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
|
outbuf = malloc(buflen, M_TEMP, M_WAITOK | M_ZERO);
|
||||||
|
|
||||||
LAGG_RLOCK(sc);
|
LAGG_RLOCK(sc, &tracker);
|
||||||
ra->ra_proto = sc->sc_proto;
|
ra->ra_proto = sc->sc_proto;
|
||||||
if (sc->sc_req != NULL)
|
if (sc->sc_req != NULL)
|
||||||
(*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
|
(*sc->sc_req)(sc, (caddr_t)&ra->ra_psc);
|
||||||
@ -987,7 +1001,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
|
|||||||
buf += sizeof(rpbuf);
|
buf += sizeof(rpbuf);
|
||||||
len -= sizeof(rpbuf);
|
len -= sizeof(rpbuf);
|
||||||
}
|
}
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
ra->ra_ports = count;
|
ra->ra_ports = count;
|
||||||
ra->ra_size = count * sizeof(rpbuf);
|
ra->ra_size = count * sizeof(rpbuf);
|
||||||
error = copyout(outbuf, ra->ra_port, ra->ra_size);
|
error = copyout(outbuf, ra->ra_port, ra->ra_size);
|
||||||
@ -1065,16 +1079,16 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
LAGG_RLOCK(sc);
|
LAGG_RLOCK(sc, &tracker);
|
||||||
if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
|
if ((lp = (struct lagg_port *)tpif->if_lagg) == NULL ||
|
||||||
lp->lp_softc != sc) {
|
lp->lp_softc != sc) {
|
||||||
error = ENOENT;
|
error = ENOENT;
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
lagg_port2req(lp, rp);
|
lagg_port2req(lp, rp);
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
break;
|
break;
|
||||||
case SIOCSLAGGPORT:
|
case SIOCSLAGGPORT:
|
||||||
error = priv_check(td, PRIV_NET_LAGG);
|
error = priv_check(td, PRIV_NET_LAGG);
|
||||||
@ -1280,14 +1294,15 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
|
|||||||
{
|
{
|
||||||
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
|
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
|
||||||
int error, len, mcast;
|
int error, len, mcast;
|
||||||
|
struct rm_priotracker tracker;
|
||||||
|
|
||||||
len = m->m_pkthdr.len;
|
len = m->m_pkthdr.len;
|
||||||
mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
|
mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
|
||||||
|
|
||||||
LAGG_RLOCK(sc);
|
LAGG_RLOCK(sc, &tracker);
|
||||||
/* We need a Tx algorithm and at least one port */
|
/* We need a Tx algorithm and at least one port */
|
||||||
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
|
if (sc->sc_proto == LAGG_PROTO_NONE || sc->sc_count == 0) {
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
m_freem(m);
|
m_freem(m);
|
||||||
ifp->if_oerrors++;
|
ifp->if_oerrors++;
|
||||||
return (ENXIO);
|
return (ENXIO);
|
||||||
@ -1296,7 +1311,7 @@ lagg_transmit(struct ifnet *ifp, struct mbuf *m)
|
|||||||
ETHER_BPF_MTAP(ifp, m);
|
ETHER_BPF_MTAP(ifp, m);
|
||||||
|
|
||||||
error = (*sc->sc_start)(sc, m);
|
error = (*sc->sc_start)(sc, m);
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
|
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
counter_u64_add(sc->sc_opackets, 1);
|
counter_u64_add(sc->sc_opackets, 1);
|
||||||
@ -1322,12 +1337,13 @@ lagg_input(struct ifnet *ifp, struct mbuf *m)
|
|||||||
struct lagg_port *lp = ifp->if_lagg;
|
struct lagg_port *lp = ifp->if_lagg;
|
||||||
struct lagg_softc *sc = lp->lp_softc;
|
struct lagg_softc *sc = lp->lp_softc;
|
||||||
struct ifnet *scifp = sc->sc_ifp;
|
struct ifnet *scifp = sc->sc_ifp;
|
||||||
|
struct rm_priotracker tracker;
|
||||||
|
|
||||||
LAGG_RLOCK(sc);
|
LAGG_RLOCK(sc, &tracker);
|
||||||
if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
|
if ((scifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
|
||||||
(lp->lp_flags & LAGG_PORT_DISABLED) ||
|
(lp->lp_flags & LAGG_PORT_DISABLED) ||
|
||||||
sc->sc_proto == LAGG_PROTO_NONE) {
|
sc->sc_proto == LAGG_PROTO_NONE) {
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
m_freem(m);
|
m_freem(m);
|
||||||
return (NULL);
|
return (NULL);
|
||||||
}
|
}
|
||||||
@ -1346,7 +1362,7 @@ lagg_input(struct ifnet *ifp, struct mbuf *m)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
return (m);
|
return (m);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1367,16 +1383,17 @@ lagg_media_status(struct ifnet *ifp, struct ifmediareq *imr)
|
|||||||
{
|
{
|
||||||
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
|
struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc;
|
||||||
struct lagg_port *lp;
|
struct lagg_port *lp;
|
||||||
|
struct rm_priotracker tracker;
|
||||||
|
|
||||||
imr->ifm_status = IFM_AVALID;
|
imr->ifm_status = IFM_AVALID;
|
||||||
imr->ifm_active = IFM_ETHER | IFM_AUTO;
|
imr->ifm_active = IFM_ETHER | IFM_AUTO;
|
||||||
|
|
||||||
LAGG_RLOCK(sc);
|
LAGG_RLOCK(sc, &tracker);
|
||||||
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
|
SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) {
|
||||||
if (LAGG_PORTACTIVE(lp))
|
if (LAGG_PORTACTIVE(lp))
|
||||||
imr->ifm_status |= IFM_ACTIVE;
|
imr->ifm_status |= IFM_ACTIVE;
|
||||||
}
|
}
|
||||||
LAGG_RUNLOCK(sc);
|
LAGG_RUNLOCK(sc, &tracker);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -187,7 +187,8 @@ struct lagg_llq {
|
|||||||
|
|
||||||
struct lagg_softc {
|
struct lagg_softc {
|
||||||
struct ifnet *sc_ifp; /* virtual interface */
|
struct ifnet *sc_ifp; /* virtual interface */
|
||||||
struct rwlock sc_mtx;
|
struct rmlock sc_mtx;
|
||||||
|
struct mtx sc_call_mtx;
|
||||||
int sc_proto; /* lagg protocol */
|
int sc_proto; /* lagg protocol */
|
||||||
u_int sc_count; /* number of ports */
|
u_int sc_count; /* number of ports */
|
||||||
u_int sc_active; /* active port count */
|
u_int sc_active; /* active port count */
|
||||||
@ -255,14 +256,19 @@ struct lagg_port {
|
|||||||
SLIST_ENTRY(lagg_port) lp_entries;
|
SLIST_ENTRY(lagg_port) lp_entries;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define LAGG_LOCK_INIT(_sc) rw_init(&(_sc)->sc_mtx, "if_lagg rwlock")
|
#define LAGG_LOCK_INIT(_sc) rm_init(&(_sc)->sc_mtx, "if_lagg rmlock")
|
||||||
#define LAGG_LOCK_DESTROY(_sc) rw_destroy(&(_sc)->sc_mtx)
|
#define LAGG_LOCK_DESTROY(_sc) rm_destroy(&(_sc)->sc_mtx)
|
||||||
#define LAGG_RLOCK(_sc) rw_rlock(&(_sc)->sc_mtx)
|
#define LAGG_RLOCK(_sc, _p) rm_rlock(&(_sc)->sc_mtx, (_p))
|
||||||
#define LAGG_WLOCK(_sc) rw_wlock(&(_sc)->sc_mtx)
|
#define LAGG_WLOCK(_sc) rm_wlock(&(_sc)->sc_mtx)
|
||||||
#define LAGG_RUNLOCK(_sc) rw_runlock(&(_sc)->sc_mtx)
|
#define LAGG_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->sc_mtx, (_p))
|
||||||
#define LAGG_WUNLOCK(_sc) rw_wunlock(&(_sc)->sc_mtx)
|
#define LAGG_WUNLOCK(_sc) rm_wunlock(&(_sc)->sc_mtx)
|
||||||
#define LAGG_RLOCK_ASSERT(_sc) rw_assert(&(_sc)->sc_mtx, RA_RLOCKED)
|
#define LAGG_RLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_RLOCKED)
|
||||||
#define LAGG_WLOCK_ASSERT(_sc) rw_assert(&(_sc)->sc_mtx, RA_WLOCKED)
|
#define LAGG_WLOCK_ASSERT(_sc) rm_assert(&(_sc)->sc_mtx, RA_WLOCKED)
|
||||||
|
|
||||||
|
#define LAGG_CALLOUT_LOCK_INIT(_sc) \
|
||||||
|
mtx_init(&(_sc)->sc_call_mtx, "if_lagg callout mutex", NULL,\
|
||||||
|
MTX_DEF)
|
||||||
|
#define LAGG_CALLOUT_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_call_mtx)
|
||||||
|
|
||||||
extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
|
extern struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *);
|
||||||
extern void (*lagg_linkstate_p)(struct ifnet *, int );
|
extern void (*lagg_linkstate_p)(struct ifnet *, int );
|
||||||
|
Loading…
Reference in New Issue
Block a user