From abf245020a075c487a1ac4e60c7069e2d8c9c7c3 Mon Sep 17 00:00:00 2001 From: glebius Date: Thu, 2 Aug 2012 13:57:49 +0000 Subject: [PATCH] Fix races between in_lltable_prefix_free(), lla_lookup(), llentry_free() and arptimer(): o Use callout_init_rw() for lle timeout, this allows us safely disestablish them. - This allows us to simplify the arptimer() and make it race safe. o Consistently use ifp->if_afdata_lock to lock access to linked lists in the lle hashes. o Introduce new lle flag LLE_LINKED, which marks an entry that is attached to the hash. - Use LLE_LINKED to avoid double unlinking via consequent calls to llentry_free(). - Mark lle with LLE_DELETED via |= operation istead of =, so that other flags won't be lost. o Make LLE_ADDREF(), LLE_REMREF() and LLE_FREE_LOCKED() more consistent and provide more informative KASSERTs. The patch is a collaborative work of all submitters and myself. PR: kern/165863 Submitted by: Andrey Zonov Submitted by: Ryan Stone Submitted by: Eric van Gyzen --- sys/net/if_llatbl.c | 21 ++++++++++----- sys/net/if_llatbl.h | 17 +++++++----- sys/net/if_var.h | 2 ++ sys/netinet/if_ether.c | 61 ++++++++++++++++++------------------------ sys/netinet/in.c | 16 ++++++----- sys/netinet6/in6.c | 18 ++++++------- 6 files changed, 70 insertions(+), 65 deletions(-) diff --git a/sys/net/if_llatbl.c b/sys/net/if_llatbl.c index 2fc75ef0e1b6..bb49fdddb4af 100644 --- a/sys/net/if_llatbl.c +++ b/sys/net/if_llatbl.c @@ -106,10 +106,19 @@ llentry_free(struct llentry *lle) size_t pkts_dropped; struct mbuf *next; - pkts_dropped = 0; + IF_AFDATA_WLOCK_ASSERT(lle->lle_tbl->llt_ifp); LLE_WLOCK_ASSERT(lle); - LIST_REMOVE(lle, lle_next); + /* XXX: guard against race with other llentry_free(). */ + if (!(lle->la_flags & LLE_LINKED)) { + LLE_FREE_LOCKED(lle); + return (0); + } + + LIST_REMOVE(lle, lle_next); + lle->la_flags &= ~(LLE_VALID | LLE_LINKED); + + pkts_dropped = 0; while ((lle->la_numheld > 0) && (lle->la_hold != NULL)) { next = lle->la_hold->m_nextpkt; m_freem(lle->la_hold); @@ -122,7 +131,6 @@ llentry_free(struct llentry *lle) ("%s: la_numheld %d > 0, pkts_droped %zd", __func__, lle->la_numheld, pkts_dropped)); - lle->la_flags &= ~LLE_VALID; LLE_FREE_LOCKED(lle); return (pkts_dropped); @@ -173,17 +181,16 @@ lltable_free(struct lltable *llt) SLIST_REMOVE(&V_lltables, llt, lltable, llt_link); LLTABLE_WUNLOCK(); + IF_AFDATA_WLOCK(llt->llt_ifp); for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { - int canceled; - - canceled = callout_drain(&lle->la_timer); LLE_WLOCK(lle); - if (canceled) + if (callout_stop(&lle->la_timer)) LLE_REMREF(lle); llentry_free(lle); } } + IF_AFDATA_WUNLOCK(llt->llt_ifp); free(llt, M_LLTABLE); } diff --git a/sys/net/if_llatbl.h b/sys/net/if_llatbl.h index 63bed2203d26..8da08ba6c4f6 100644 --- a/sys/net/if_llatbl.h +++ b/sys/net/if_llatbl.h @@ -103,26 +103,28 @@ struct llentry { #define LLE_ADDREF(lle) do { \ LLE_WLOCK_ASSERT(lle); \ KASSERT((lle)->lle_refcnt >= 0, \ - ("negative refcnt %d", (lle)->lle_refcnt)); \ + ("negative refcnt %d on lle %p", \ + (lle)->lle_refcnt, (lle))); \ (lle)->lle_refcnt++; \ } while (0) #define LLE_REMREF(lle) do { \ LLE_WLOCK_ASSERT(lle); \ - KASSERT((lle)->lle_refcnt > 1, \ - ("bogus refcnt %d", (lle)->lle_refcnt)); \ + KASSERT((lle)->lle_refcnt > 0, \ + ("bogus refcnt %d on lle %p", \ + (lle)->lle_refcnt, (lle))); \ (lle)->lle_refcnt--; \ } while (0) #define LLE_FREE_LOCKED(lle) do { \ - if ((lle)->lle_refcnt <= 1) \ - (lle)->lle_free((lle)->lle_tbl, (lle));\ + if ((lle)->lle_refcnt == 1) \ + (lle)->lle_free((lle)->lle_tbl, (lle)); \ else { \ - (lle)->lle_refcnt--; \ + LLE_REMREF(lle); \ LLE_WUNLOCK(lle); \ } \ /* guard against invalid refs */ \ - lle = NULL; \ + (lle) = NULL; \ } while (0) #define LLE_FREE(lle) do { \ @@ -172,6 +174,7 @@ MALLOC_DECLARE(M_LLTABLE); #define LLE_VALID 0x0008 /* ll_addr is valid */ #define LLE_PROXY 0x0010 /* proxy entry ??? */ #define LLE_PUB 0x0020 /* publish entry ??? */ +#define LLE_LINKED 0x0040 /* linked to lookup structure */ #define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */ #define LLE_DELETE 0x4000 /* delete on a lookup - match LLE_IFADDR */ #define LLE_CREATE 0x8000 /* create on a lookup miss */ diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 3d57953c47c7..a244359e8f56 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -415,6 +415,8 @@ EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #define IF_AFDATA_DESTROY(ifp) rw_destroy(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_LOCKED) +#define IF_AFDATA_RLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_RLOCKED) +#define IF_AFDATA_WLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_WLOCKED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED) int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index df1f98097f41..8e2daebbe049 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -163,49 +163,40 @@ arp_ifscrub(struct ifnet *ifp, uint32_t addr) static void arptimer(void *arg) { + struct llentry *lle = (struct llentry *)arg; struct ifnet *ifp; - struct llentry *lle; - int pkts_dropped; + size_t pkts_dropped; + + if (lle->la_flags & LLE_STATIC) { + LLE_WUNLOCK(lle); + return; + } - KASSERT(arg != NULL, ("%s: arg NULL", __func__)); - lle = (struct llentry *)arg; ifp = lle->lle_tbl->llt_ifp; CURVNET_SET(ifp->if_vnet); + + if (lle->la_flags != LLE_DELETED) { + int evt; + + if (lle->la_flags & LLE_VALID) + evt = LLENTRY_EXPIRED; + else + evt = LLENTRY_TIMEDOUT; + EVENTHANDLER_INVOKE(lle_event, lle, evt); + } + + callout_stop(&lle->la_timer); + + /* XXX: LOR avoidance. We still have ref on lle. */ + LLE_WUNLOCK(lle); IF_AFDATA_LOCK(ifp); LLE_WLOCK(lle); - if (lle->la_flags & LLE_STATIC) - LLE_WUNLOCK(lle); - else { - if (!callout_pending(&lle->la_timer) && - callout_active(&lle->la_timer)) { - callout_stop(&lle->la_timer); - LLE_REMREF(lle); - if (lle->la_flags != LLE_DELETED) { - int evt; - - if (lle->la_flags & LLE_VALID) - evt = LLENTRY_EXPIRED; - else - evt = LLENTRY_TIMEDOUT; - EVENTHANDLER_INVOKE(lle_event, lle, evt); - } - - pkts_dropped = llentry_free(lle); - ARPSTAT_ADD(dropped, pkts_dropped); - ARPSTAT_INC(timeouts); - } else { -#ifdef DIAGNOSTIC - struct sockaddr *l3addr = L3_ADDR(lle); - log(LOG_INFO, - "arptimer issue: %p, IPv4 address: \"%s\"\n", lle, - inet_ntoa( - ((const struct sockaddr_in *)l3addr)->sin_addr)); -#endif - LLE_WUNLOCK(lle); - } - } + LLE_REMREF(lle); + pkts_dropped = llentry_free(lle); IF_AFDATA_UNLOCK(ifp); + ARPSTAT_ADD(dropped, pkts_dropped); + ARPSTAT_INC(timeouts); CURVNET_RESTORE(); } diff --git a/sys/netinet/in.c b/sys/netinet/in.c index 5292d1067e19..69f40b95bf19 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -1280,7 +1280,6 @@ in_lltable_new(const struct sockaddr *l3addr, u_int flags) if (lle == NULL) /* NB: caller generates msg */ return NULL; - callout_init(&lle->base.la_timer, CALLOUT_MPSAFE); /* * For IPv4 this will trigger "arpresolve" to generate * an ARP request. @@ -1290,7 +1289,10 @@ in_lltable_new(const struct sockaddr *l3addr, u_int flags) lle->base.lle_refcnt = 1; lle->base.lle_free = in_lltable_free; LLE_LOCK_INIT(&lle->base); - return &lle->base; + callout_init_rw(&lle->base.la_timer, &lle->base.lle_lock, + CALLOUT_RETURNUNLOCKED); + + return (&lle->base); } #define IN_ARE_MASKED_ADDR_EQUAL(d, a, m) ( \ @@ -1306,6 +1308,7 @@ in_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, int i; size_t pkts_dropped; + IF_AFDATA_WLOCK(llt->llt_ifp); for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { /* @@ -1315,17 +1318,15 @@ in_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, if (IN_ARE_MASKED_ADDR_EQUAL(satosin(L3_ADDR(lle)), pfx, msk) && ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) { - int canceled; - - canceled = callout_drain(&lle->la_timer); LLE_WLOCK(lle); - if (canceled) + if (callout_stop(&lle->la_timer)) LLE_REMREF(lle); pkts_dropped = llentry_free(lle); ARPSTAT_ADD(dropped, pkts_dropped); } } } + IF_AFDATA_WUNLOCK(llt->llt_ifp); } @@ -1457,11 +1458,12 @@ in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3add lle->lle_tbl = llt; lle->lle_head = lleh; + lle->la_flags |= LLE_LINKED; LIST_INSERT_HEAD(lleh, lle, lle_next); } else if (flags & LLE_DELETE) { if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { LLE_WLOCK(lle); - lle->la_flags = LLE_DELETED; + lle->la_flags |= LLE_DELETED; EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_DELETED); LLE_WUNLOCK(lle); #ifdef DIAGNOSTIC diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c index 320bd7e89ab1..a7df9f8a0b17 100644 --- a/sys/netinet6/in6.c +++ b/sys/netinet6/in6.c @@ -2497,23 +2497,22 @@ in6_lltable_prefix_free(struct lltable *llt, const struct sockaddr *prefix, * (flags & LLE_STATIC) means deleting all entries * including static ND6 entries. */ + IF_AFDATA_WLOCK(llt->llt_ifp); for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) { LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) { if (IN6_ARE_MASKED_ADDR_EQUAL( - &((struct sockaddr_in6 *)L3_ADDR(lle))->sin6_addr, - &pfx->sin6_addr, - &msk->sin6_addr) && - ((flags & LLE_STATIC) || !(lle->la_flags & LLE_STATIC))) { - int canceled; - - canceled = callout_drain(&lle->la_timer); + &satosin6(L3_ADDR(lle))->sin6_addr, + &pfx->sin6_addr, &msk->sin6_addr) && + ((flags & LLE_STATIC) || + !(lle->la_flags & LLE_STATIC))) { LLE_WLOCK(lle); - if (canceled) + if (callout_stop(&lle->la_timer)) LLE_REMREF(lle); llentry_free(lle); } } } + IF_AFDATA_WUNLOCK(llt->llt_ifp); } static int @@ -2605,11 +2604,12 @@ in6_lltable_lookup(struct lltable *llt, u_int flags, lle->lle_tbl = llt; lle->lle_head = lleh; + lle->la_flags |= LLE_LINKED; LIST_INSERT_HEAD(lleh, lle, lle_next); } else if (flags & LLE_DELETE) { if (!(lle->la_flags & LLE_IFADDR) || (flags & LLE_IFADDR)) { LLE_WLOCK(lle); - lle->la_flags = LLE_DELETED; + lle->la_flags |= LLE_DELETED; LLE_WUNLOCK(lle); #ifdef DIAGNOSTIC log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);