Plug well observed races on la_hold entries with the callout handler.

Call the handler function with the lock held, return unlocked as we
might free the entry.  Rework functions later in the call graph to be
either called with the lock held or, only if needed, unlocked.

Place asserts to document and tighten assumptions on various lle locking,
which were not always true before.

We call nd6_ns_output() unlocked and the assignment of ip6->ip6_src was
decentralized to minimize possible complexity introduced with the formerly
missing locking there.  This also resulted in a push down of local
variable scopes into smaller blocks.

Reported by:	many
PR:		kern/148857
Submitted by:	Dmitrij Tejblum (tejblum yandex-team.ru) (original version)
MFC After:	4 days
This commit is contained in:
Bjoern A. Zeeb 2010-11-29 00:04:08 +00:00
parent 7ea6a15d9f
commit e6950476b9
3 changed files with 90 additions and 58 deletions

View File

@ -2349,10 +2349,12 @@ in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
if (lle == NULL) /* NB: caller generates msg */
return NULL;
callout_init(&lle->base.ln_timer_ch, CALLOUT_MPSAFE);
lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr;
lle->base.lle_refcnt = 1;
LLE_LOCK_INIT(&lle->base);
callout_init_rw(&lle->base.ln_timer_ch, &lle->base.lle_lock,
CALLOUT_RETURNUNLOCKED);
return &lle->base;
}

View File

@ -411,6 +411,8 @@ nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
{
int canceled;
LLE_WLOCK_ASSERT(ln);
if (tick < 0) {
ln->la_expire = 0;
ln->ln_ntick = 0;
@ -451,6 +453,7 @@ nd6_llinfo_timer(void *arg)
KASSERT(arg != NULL, ("%s: arg NULL", __func__));
ln = (struct llentry *)arg;
LLE_WLOCK_ASSERT(ln);
ifp = ln->lle_tbl->llt_ifp;
CURVNET_SET(ifp->if_vnet);
@ -458,10 +461,10 @@ nd6_llinfo_timer(void *arg)
if (ln->ln_ntick > 0) {
if (ln->ln_ntick > INT_MAX) {
ln->ln_ntick -= INT_MAX;
nd6_llinfo_settimer(ln, INT_MAX);
nd6_llinfo_settimer_locked(ln, INT_MAX);
} else {
ln->ln_ntick = 0;
nd6_llinfo_settimer(ln, ln->ln_ntick);
nd6_llinfo_settimer_locked(ln, ln->ln_ntick);
}
goto done;
}
@ -482,8 +485,10 @@ nd6_llinfo_timer(void *arg)
case ND6_LLINFO_INCOMPLETE:
if (ln->la_asked < V_nd6_mmaxtries) {
ln->la_asked++;
nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
LLE_WUNLOCK(ln);
nd6_ns_output(ifp, NULL, dst, ln, 0);
LLE_WLOCK(ln);
} else {
struct mbuf *m = ln->la_hold;
if (m) {
@ -491,24 +496,24 @@ nd6_llinfo_timer(void *arg)
/*
* assuming every packet in la_hold has the
* same IP header
* same IP header. Send error after unlock.
*/
m0 = m->m_nextpkt;
m->m_nextpkt = NULL;
icmp6_error2(m, ICMP6_DST_UNREACH,
ICMP6_DST_UNREACH_ADDR, 0, ifp);
ln->la_hold = m0;
clear_llinfo_pqueue(ln);
}
(void)nd6_free(ln, 0);
ln = NULL;
if (m != NULL)
icmp6_error2(m, ICMP6_DST_UNREACH,
ICMP6_DST_UNREACH_ADDR, 0, ifp);
}
break;
case ND6_LLINFO_REACHABLE:
if (!ND6_LLINFO_PERMANENT(ln)) {
ln->ln_state = ND6_LLINFO_STALE;
nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
}
break;
@ -525,27 +530,34 @@ nd6_llinfo_timer(void *arg)
/* We need NUD */
ln->la_asked = 1;
ln->ln_state = ND6_LLINFO_PROBE;
nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
LLE_WUNLOCK(ln);
nd6_ns_output(ifp, dst, dst, ln, 0);
LLE_WLOCK(ln);
} else {
ln->ln_state = ND6_LLINFO_STALE; /* XXX */
nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz);
}
break;
case ND6_LLINFO_PROBE:
if (ln->la_asked < V_nd6_umaxtries) {
ln->la_asked++;
nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
LLE_WUNLOCK(ln);
nd6_ns_output(ifp, dst, dst, ln, 0);
LLE_WLOCK(ln);
} else {
(void)nd6_free(ln, 0);
ln = NULL;
}
break;
default:
panic("%s: paths in a dark night can be confusing: %d",
__func__, ln->ln_state);
}
done:
if (ln != NULL)
LLE_FREE(ln);
LLE_FREE_LOCKED(ln);
CURVNET_RESTORE();
}
@ -996,7 +1008,9 @@ nd6_free(struct llentry *ln, int gc)
{
struct llentry *next;
struct nd_defrouter *dr;
struct ifnet *ifp=NULL;
struct ifnet *ifp;
LLE_WLOCK_ASSERT(ln);
/*
* we used to have pfctlinput(PRC_HOSTDEAD) here.
@ -1004,12 +1018,13 @@ nd6_free(struct llentry *ln, int gc)
*/
/* cancel timer */
nd6_llinfo_settimer(ln, -1);
nd6_llinfo_settimer_locked(ln, -1);
ifp = ln->lle_tbl->llt_ifp;
if (!V_ip6_forwarding) {
int s;
s = splnet();
dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
if (dr != NULL && dr->expire &&
ln->ln_state == ND6_LLINFO_STALE && gc) {
@ -1026,15 +1041,16 @@ nd6_free(struct llentry *ln, int gc)
* but we intentionally keep it just in case.
*/
if (dr->expire > time_second)
nd6_llinfo_settimer(ln,
nd6_llinfo_settimer_locked(ln,
(dr->expire - time_second) * hz);
else
nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
splx(s);
LLE_WLOCK(ln);
nd6_llinfo_settimer_locked(ln,
(long)V_nd6_gctimer * hz);
next = LIST_NEXT(ln, lle_next);
LLE_REMREF(ln);
LLE_WUNLOCK(ln);
return (LIST_NEXT(ln, lle_next));
return (next);
}
if (ln->ln_router || dr) {
@ -1043,7 +1059,7 @@ nd6_free(struct llentry *ln, int gc)
* is in the Default Router List.
* See a corresponding comment in nd6_na_input().
*/
rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp);
}
if (dr) {
@ -1071,11 +1087,13 @@ nd6_free(struct llentry *ln, int gc)
pfxlist_onlink_check();
/*
* refresh default router list
* Refresh default router list. Have to unlock as
* it calls into nd6_lookup(), still holding a ref.
*/
LLE_WUNLOCK(ln);
defrouter_select();
LLE_WLOCK(ln);
}
splx(s);
}
/*
@ -1086,7 +1104,11 @@ nd6_free(struct llentry *ln, int gc)
*/
next = LIST_NEXT(ln, lle_next);
ifp = ln->lle_tbl->llt_ifp;
/*
* Save to unlock. We still hold an extra reference and will not
* free(9) in llentry_free() if someone else holds one as well.
*/
LLE_WUNLOCK(ln);
IF_AFDATA_LOCK(ifp);
LLE_WLOCK(ln);
LLE_REMREF(ln);
@ -1875,6 +1897,9 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
LLE_RUNLOCK(ln);
goto retry;
}
LLE_WLOCK_ASSERT(ln);
if (ln->la_hold) {
struct mbuf *m_hold;
int i;
@ -1896,17 +1921,7 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
} else {
ln->la_hold = m;
}
/*
* We did the lookup (no lle arg) so we
* need to do the unlock here
*/
if (lle == NULL) {
if (flags & LLE_EXCLUSIVE)
LLE_WUNLOCK(ln);
else
LLE_RUNLOCK(ln);
}
/*
* If there has been no NS for the neighbor after entering the
* INCOMPLETE state, send the first solicitation.
@ -1914,10 +1929,21 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
ln->la_asked++;
nd6_llinfo_settimer(ln,
nd6_llinfo_settimer_locked(ln,
(long)ND_IFINFO(ifp)->retrans * hz / 1000);
LLE_WUNLOCK(ln);
nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
if (lle != NULL && ln == lle)
LLE_WLOCK(lle);
} else if (lle == NULL || ln != lle) {
/*
* We did the lookup (no lle arg) so we
* need to do the unlock here.
*/
LLE_WUNLOCK(ln);
}
return (0);
sendpkt:

View File

@ -383,7 +383,6 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
struct m_tag *mtag;
struct ip6_hdr *ip6;
struct nd_neighbor_solicit *nd_ns;
struct in6_addr *src, src_in;
struct ip6_moptions im6o;
int icmp6len;
int maxlen;
@ -467,28 +466,35 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
* - saddr6 belongs to the outgoing interface.
* Otherwise, we perform the source address selection as usual.
*/
struct ip6_hdr *hip6; /* hold ip6 */
struct in6_addr *hsrc = NULL;
struct in6_addr *hsrc;
if ((ln != NULL) && ln->la_hold) {
/*
* assuming every packet in la_hold has the same IP
* header
*/
hip6 = mtod(ln->la_hold, struct ip6_hdr *);
/* XXX pullup? */
if (sizeof(*hip6) < ln->la_hold->m_len)
hsrc = &hip6->ip6_src;
else
hsrc = NULL;
hsrc = NULL;
if (ln != NULL) {
LLE_RLOCK(ln);
if (ln->la_hold != NULL) {
struct ip6_hdr *hip6; /* hold ip6 */
/*
* assuming every packet in la_hold has the same IP
* header
*/
hip6 = mtod(ln->la_hold, struct ip6_hdr *);
/* XXX pullup? */
if (sizeof(*hip6) < ln->la_hold->m_len) {
ip6->ip6_src = hip6->ip6_src;
hsrc = &hip6->ip6_src;
}
}
LLE_RUNLOCK(ln);
}
if (hsrc && (ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
hsrc)) != NULL) {
src = hsrc;
/* ip6_src set already. */
ifa_free(ifa);
} else {
int error;
struct sockaddr_in6 dst_sa;
struct in6_addr src_in;
bzero(&dst_sa, sizeof(dst_sa));
dst_sa.sin6_family = AF_INET6;
@ -506,7 +512,7 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
error));
goto bad;
}
src = &src_in;
ip6->ip6_src = src_in;
}
} else {
/*
@ -516,10 +522,8 @@ nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
* above), but we do so here explicitly to make the intention
* clearer.
*/
bzero(&src_in, sizeof(src_in));
src = &src_in;
bzero(&ip6->ip6_src, sizeof(ip6->ip6_src));
}
ip6->ip6_src = *src;
nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1);
nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
nd_ns->nd_ns_code = 0;