This fixes a bug in the way that the LLE timers for nd6

and arp were being used. They basically would pass in the
mutex to the callout_init. Because they used this method
to the callout system, it was possible to "stop" the callout.
When flushing the table and you stopped the running callout, the
callout_stop code would return 1 indicating that it was going
to stop the callout (that was about to run on the callout_wheel blocked
by the function calling the stop). Now when 1 was returned, it would
lower the reference count one extra time for the stopped timer, then
a few lines later delete the memory. Of course the callout_wheel was
stuck in the lock code and would then crash since it was accessing
freed memory. By using callout_init(c, 1) we always get a 0 back
and the reference counting bug does not rear its head. We do have
to make a few adjustments to the callouts themselves though to make
sure it does the proper thing if rescheduled as well as gets the lock.

Commented upon by hiren and sbruno
See Phabricator D1777 for more details.

Commented upon by hiren and sbruno
Reviewed by:	adrian, jhb and bz
Sponsored by:	Netflix Inc.
This commit is contained in:
Randall Stewart 2015-02-09 19:28:11 +00:00
parent 0174acd439
commit 2575fbb827
4 changed files with 43 additions and 8 deletions

View File

@ -166,10 +166,28 @@ arptimer(void *arg)
struct ifnet *ifp;
if (lle->la_flags & LLE_STATIC) {
LLE_WUNLOCK(lle);
return;
}
LLE_WLOCK(lle);
if (callout_pending(&lle->la_timer)) {
/*
* Here we are a bit odd here in the treatment of
* active/pending. If the pending bit is set, it got
* rescheduled before I ran. The active
* bit we ignore, since if it was stopped
* in ll_tablefree() and was currently running
* it would have return 0 so the code would
* not have deleted it since the callout could
* not be stopped so we want to go through
* with the delete here now. If the callout
* was restarted, the pending bit will be back on and
* we just want to bail since the callout_reset would
* return 1 and our reference would have been removed
* by arpresolve() below.
*/
LLE_WUNLOCK(lle);
return;
}
ifp = lle->lle_tbl->llt_ifp;
CURVNET_SET(ifp->if_vnet);

View File

@ -962,8 +962,7 @@ in_lltable_new(const struct sockaddr *l3addr, u_int flags)
lle->base.lle_refcnt = 1;
lle->base.lle_free = in_lltable_free;
LLE_LOCK_INIT(&lle->base);
callout_init_rw(&lle->base.la_timer, &lle->base.lle_lock,
CALLOUT_RETURNUNLOCKED);
callout_init(&lle->base.la_timer, 1);
return (&lle->base);
}

View File

@ -2047,8 +2047,7 @@ in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
lle->base.lle_refcnt = 1;
lle->base.lle_free = in6_lltable_free;
LLE_LOCK_INIT(&lle->base);
callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock,
CALLOUT_RETURNUNLOCKED);
callout_init(&lle->base.ln_timer_ch, 1);
return (&lle->base);
}

View File

@ -473,9 +473,28 @@ nd6_llinfo_timer(void *arg)
KASSERT(arg != NULL, ("%s: arg NULL", __func__));
ln = (struct llentry *)arg;
LLE_WLOCK_ASSERT(ln);
LLE_WLOCK(ln);
if (callout_pending(&ln->la_timer)) {
/*
* Here we are a bit odd here in the treatment of
* active/pending. If the pending bit is set, it got
* rescheduled before I ran. The active
* bit we ignore, since if it was stopped
* in ll_tablefree() and was currently running
* it would have return 0 so the code would
* not have deleted it since the callout could
* not be stopped so we want to go through
* with the delete here now. If the callout
* was restarted, the pending bit will be back on and
* we just want to bail since the callout_reset would
* return 1 and our reference would have been removed
* by nd6_llinfo_settimer_locked above since canceled
* would have been 1.
*/
LLE_WUNLOCK(ln);
return;
}
ifp = ln->lle_tbl->llt_ifp;
CURVNET_SET(ifp->if_vnet);
if (ln->ln_ntick > 0) {