Remove LLE read lock from IPv6 fast path.

LLE structure is mostly unchanged during its lifecycle: there are only 2
things relevant for fast path lookup code:
1) link-level address change. Since r286722, these updates are performed
  under AFDATA WLOCK.
2) Some sort of feedback indicating that this particular entry is used so
  we send NS to perform reachability verification instead of expiring entry.
  The only signal that is needed from fast path is something like binary
  yes/no.
The latter is solved by the following changes:

Special r_skip_req (introduced in D3688) value is used for fast path feedback.
  It is read lockless by fast path, but updated under req_mutex mutex. If this
  field is non-zero, then fast path will acquire lock and set it back to 0.

After transitioning to STALE state, callout timer is armed to run each
  V_nd6_delay seconds to make sure that if packet was transmitted at the start
  of given interval, we would be able to switch to PROBE state in V_nd6_delay
  seconds as user expects.
(in STALE state) timer is rescheduled until original V_nd6_gctimer expires
  keeping lle in STALE state (remaining timer value stored in lle_remtime).
(in STALE state) timer is rescheduled if packet was transmitted less that
  V_nd6_delay seconds ago to make sure we transition to PROBE state exactly
  after V_n6_delay seconds.

As a result, all packets towards lle in REACHABLE/STALE/PROBE states are handled
  by fast path without acquiring lle read lock.

Differential Revision:		https://reviews.freebsd.org/D3780
This commit is contained in:
Alexander V. Chernikov 2015-12-13 07:39:49 +00:00
parent 1385475525
commit 12cb7521c2
5 changed files with 236 additions and 37 deletions

View File

@ -287,6 +287,47 @@ lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
lle->r_flags |= RLLE_VALID; lle->r_flags |= RLLE_VALID;
} }
/*
* Tries to update @lle link-level address.
* Since update requires AFDATA WLOCK, function
* drops @lle lock, acquires AFDATA lock and then acquires
* @lle lock to maintain lock order.
*
* Returns 1 on success.
*/
int
lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
const char *lladdr)
{
/* Perform real LLE update */
/* use afdata WLOCK to update fields */
LLE_WLOCK_ASSERT(lle);
LLE_ADDREF(lle);
LLE_WUNLOCK(lle);
IF_AFDATA_WLOCK(ifp);
LLE_WLOCK(lle);
/*
* Since we droppped LLE lock, other thread might have deleted
* this lle. Check and return
*/
if ((lle->la_flags & LLE_DELETED) != 0) {
IF_AFDATA_WUNLOCK(ifp);
LLE_FREE_LOCKED(lle);
return (0);
}
/* Update data */
lltable_set_entry_addr(ifp, lle, lladdr);
IF_AFDATA_WUNLOCK(ifp);
LLE_REMREF(lle);
return (1);
}
/* /*
* *
* Performes generic cleanup routines and frees lle. * Performes generic cleanup routines and frees lle.

View File

@ -79,6 +79,8 @@ struct llentry {
int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */ int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */
uint16_t ln_router; uint16_t ln_router;
time_t ln_ntick; time_t ln_ntick;
time_t lle_remtime; /* Real time remaining */
time_t lle_hittime; /* Time when r_skip_req was unset */
int lle_refcnt; int lle_refcnt;
LIST_ENTRY(llentry) lle_chain; /* chain of deleted items */ LIST_ENTRY(llentry) lle_chain; /* chain of deleted items */
@ -222,6 +224,8 @@ struct llentry *llentry_alloc(struct ifnet *, struct lltable *,
size_t lltable_drop_entry_queue(struct llentry *); size_t lltable_drop_entry_queue(struct llentry *);
void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle, void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
const char *lladdr); const char *lladdr);
int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
const char *lladdr);
struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags, struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags,
const struct sockaddr *l4addr); const struct sockaddr *l4addr);

View File

@ -2064,6 +2064,7 @@ in6_lltable_destroy_lle(struct llentry *lle)
LLE_WUNLOCK(lle); LLE_WUNLOCK(lle);
LLE_LOCK_DESTROY(lle); LLE_LOCK_DESTROY(lle);
LLE_REQ_DESTROY(lle);
free(lle, M_LLTABLE); free(lle, M_LLTABLE);
} }
@ -2080,6 +2081,7 @@ in6_lltable_new(const struct in6_addr *addr6, u_int flags)
lle->base.lle_refcnt = 1; lle->base.lle_refcnt = 1;
lle->base.lle_free = in6_lltable_destroy_lle; lle->base.lle_free = in6_lltable_destroy_lle;
LLE_LOCK_INIT(&lle->base); LLE_LOCK_INIT(&lle->base);
LLE_REQ_INIT(&lle->base);
callout_init(&lle->base.lle_timer, 1); callout_init(&lle->base.lle_timer, 1);
return (&lle->base); return (&lle->base);
@ -2288,6 +2290,13 @@ in6_lltable_lookup(struct lltable *llt, u_int flags,
if (lle == NULL) if (lle == NULL)
return (NULL); return (NULL);
KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) !=
(LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X",
flags));
if (flags & LLE_UNLOCKED)
return (lle);
if (flags & LLE_EXCLUSIVE) if (flags & LLE_EXCLUSIVE)
LLE_WLOCK(lle); LLE_WLOCK(lle);
else else
@ -2350,8 +2359,8 @@ in6_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
sdl->sdl_index = ifp->if_index; sdl->sdl_index = ifp->if_index;
sdl->sdl_type = ifp->if_type; sdl->sdl_type = ifp->if_type;
bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen); bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
ndpc.rtm.rtm_rmx.rmx_expire = ndpc.rtm.rtm_rmx.rmx_expire = lle->la_expire +
lle->la_flags & LLE_STATIC ? 0 : lle->la_expire; lle->lle_remtime / hz;
ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA); ndpc.rtm.rtm_flags |= (RTF_HOST | RTF_LLDATA);
if (lle->la_flags & LLE_STATIC) if (lle->la_flags & LLE_STATIC)
ndpc.rtm.rtm_flags |= RTF_STATIC; ndpc.rtm.rtm_flags |= RTF_STATIC;

View File

@ -541,6 +541,107 @@ nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src)
return (src); return (src);
} }
/*
* Checks if we need to switch from STALE state.
*
* RFC 4861 requires switching from STALE to DELAY state
* on first packet matching entry, waiting V_nd6_delay and
* transition to PROBE state (if upper layer confirmation was
* not received).
*
* This code performs a bit differently:
* On packet hit we don't change state (but desired state
* can be guessed by control plane). However, after V_nd6_delay
* seconds code will transition to PROBE state (so DELAY state
* is kinda skipped in most situations).
*
* Typically, V_nd6_gctimer is bigger than V_nd6_delay, so
* we perform the following upon entering STALE state:
*
* 1) Arm timer to run each V_nd6_delay seconds to make sure that
* if packet was transmitted at the start of given interval, we
* would be able to switch to PROBE state in V_nd6_delay seconds
* as user expects.
*
* 2) Reschedule timer until original V_nd6_gctimer expires keeping
* lle in STALE state (remaining timer value stored in lle_remtime).
*
* 3) Reschedule timer if packet was transmitted less that V_nd6_delay
* seconds ago.
*
* Returns non-zero value if the entry is still STALE (storing
* the next timer interval in @pdelay).
*
* Returns zero value if original timer expired or we need to switch to
* PROBE (store that in @do_switch variable).
*/
static int
nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch)
{
int nd_delay, nd_gctimer, r_skip_req;
time_t lle_hittime;
long delay;
*do_switch = 0;
nd_gctimer = V_nd6_gctimer;
nd_delay = V_nd6_delay;
LLE_REQ_LOCK(lle);
r_skip_req = lle->r_skip_req;
lle_hittime = lle->lle_hittime;
LLE_REQ_UNLOCK(lle);
if (r_skip_req > 0) {
/*
* Nonzero r_skip_req value was set upon entering
* STALE state. Since value was not changed, no
* packets were passed using this lle. Ask for
* timer reschedule and keep STALE state.
*/
delay = (long)(MIN(nd_gctimer, nd_delay));
delay *= hz;
if (lle->lle_remtime > delay)
lle->lle_remtime -= delay;
else {
delay = lle->lle_remtime;
lle->lle_remtime = 0;
}
if (delay == 0) {
/*
* The original ng6_gctime timeout ended,
* no more rescheduling.
*/
return (0);
}
*pdelay = delay;
return (1);
}
/*
* Packet received. Verify timestamp
*/
delay = (long)(time_uptime - lle_hittime);
if (delay < nd_delay) {
/*
* V_nd6_delay still not passed since the first
* hit in STALE state.
* Reshedule timer and return.
*/
*pdelay = (long)(nd_delay - delay) * hz;
return (1);
}
/* Request switching to probe */
*do_switch = 1;
return (0);
}
/* /*
* Switch @lle state to new state optionally arming timers. * Switch @lle state to new state optionally arming timers.
* *
@ -550,9 +651,11 @@ __noinline void
nd6_llinfo_setstate(struct llentry *lle, int newstate) nd6_llinfo_setstate(struct llentry *lle, int newstate)
{ {
struct ifnet *ifp; struct ifnet *ifp;
long delay; int nd_gctimer, nd_delay;
long delay, remtime;
delay = 0; delay = 0;
remtime = 0;
switch (newstate) { switch (newstate) {
case ND6_LLINFO_INCOMPLETE: case ND6_LLINFO_INCOMPLETE:
@ -566,7 +669,19 @@ nd6_llinfo_setstate(struct llentry *lle, int newstate)
} }
break; break;
case ND6_LLINFO_STALE: case ND6_LLINFO_STALE:
delay = (long)V_nd6_gctimer * hz;
/*
* Notify fast path that we want to know if any packet
* is transmitted by setting r_skip_req.
*/
LLE_REQ_LOCK(lle);
lle->r_skip_req = 1;
LLE_REQ_UNLOCK(lle);
nd_delay = V_nd6_delay;
nd_gctimer = V_nd6_gctimer;
delay = (long)(MIN(nd_gctimer, nd_delay)) * hz;
remtime = (long)nd_gctimer * hz - delay;
break; break;
case ND6_LLINFO_DELAY: case ND6_LLINFO_DELAY:
lle->la_asked = 0; lle->la_asked = 0;
@ -577,6 +692,7 @@ nd6_llinfo_setstate(struct llentry *lle, int newstate)
if (delay > 0) if (delay > 0)
nd6_llinfo_settimer_locked(lle, delay); nd6_llinfo_settimer_locked(lle, delay);
lle->lle_remtime = remtime;
lle->ln_state = newstate; lle->ln_state = newstate;
} }
@ -592,7 +708,8 @@ nd6_llinfo_timer(void *arg)
struct in6_addr *dst, *pdst, *psrc, src; struct in6_addr *dst, *pdst, *psrc, src;
struct ifnet *ifp; struct ifnet *ifp;
struct nd_ifinfo *ndi = NULL; struct nd_ifinfo *ndi = NULL;
int send_ns; int do_switch, send_ns;
long delay;
KASSERT(arg != NULL, ("%s: arg NULL", __func__)); KASSERT(arg != NULL, ("%s: arg NULL", __func__));
ln = (struct llentry *)arg; ln = (struct llentry *)arg;
@ -680,13 +797,35 @@ nd6_llinfo_timer(void *arg)
break; break;
case ND6_LLINFO_STALE: case ND6_LLINFO_STALE:
/* Garbage Collection(RFC 2461 5.3) */ if (nd6_is_stale(ln, &delay, &do_switch) != 0) {
if (!ND6_LLINFO_PERMANENT(ln)) {
EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); /*
nd6_free(ln, 1); * No packet has used this entry and GC timeout
ln = NULL; * has not been passed. Reshedule timer and
* return.
*/
nd6_llinfo_settimer_locked(ln, delay);
break;
} }
break;
if (do_switch == 0) {
/*
* GC timer has ended and entry hasn't been used.
* Run Garbage collector (RFC 4861, 5.3)
*/
if (!ND6_LLINFO_PERMANENT(ln)) {
EVENTHANDLER_INVOKE(lle_event, ln,
LLENTRY_EXPIRED);
nd6_free(ln, 1);
ln = NULL;
}
break;
}
/* Entry has been used AND delay timer has ended. */
/* FALLTHROUGH */
case ND6_LLINFO_DELAY: case ND6_LLINFO_DELAY:
if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
@ -1796,7 +1935,11 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
* Record source link-layer address * Record source link-layer address
* XXX is it dependent to ifp->if_type? * XXX is it dependent to ifp->if_type?
*/ */
lltable_set_entry_addr(ifp, ln, lladdr); if (lltable_try_set_entry_addr(ifp, ln, lladdr) == 0) {
/* Entry was deleted */
return;
}
nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
@ -1996,31 +2139,25 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
} }
IF_AFDATA_RLOCK(ifp); IF_AFDATA_RLOCK(ifp);
ln = nd6_lookup(&dst6->sin6_addr, 0, ifp); ln = nd6_lookup(&dst6->sin6_addr, LLE_UNLOCKED, ifp);
if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
/* Entry found, let's copy lle info */
bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
if (pflags != NULL)
*pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR);
/* Check if we have feedback request from nd6 timer */
if (ln->r_skip_req != 0) {
LLE_REQ_LOCK(ln);
ln->r_skip_req = 0; /* Notify that entry was used */
ln->lle_hittime = time_uptime;
LLE_REQ_UNLOCK(ln);
}
IF_AFDATA_RUNLOCK(ifp);
return (0);
}
IF_AFDATA_RUNLOCK(ifp); IF_AFDATA_RUNLOCK(ifp);
/* return (nd6_resolve_slow(ifp, m, dst6, desten, pflags));
* Perform fast path for the following cases:
* 1) lle state is REACHABLE
* 2) lle state is DELAY (NS message sent)
*
* Every other case involves lle modification, so we handle
* them separately.
*/
if (ln == NULL || (ln->ln_state != ND6_LLINFO_REACHABLE &&
ln->ln_state != ND6_LLINFO_DELAY)) {
/* Fall back to slow processing path */
if (ln != NULL)
LLE_RUNLOCK(ln);
return (nd6_resolve_slow(ifp, m, dst6, desten, pflags));
}
bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
if (pflags != NULL)
*pflags = ln->la_flags;
LLE_RUNLOCK(ln);
return (0);
} }

View File

@ -765,7 +765,10 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
/* /*
* Record link-layer address, and update the state. * Record link-layer address, and update the state.
*/ */
lltable_set_entry_addr(ifp, ln, lladdr); if (lltable_try_set_entry_addr(ifp, ln, lladdr) == 0) {
ln = NULL;
goto freeit;
}
EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
if (is_solicited) if (is_solicited)
nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE); nd6_llinfo_setstate(ln, ND6_LLINFO_REACHABLE);
@ -831,7 +834,12 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* Update link-local address, if any. * Update link-local address, if any.
*/ */
if (lladdr != NULL) { if (lladdr != NULL) {
lltable_set_entry_addr(ifp, ln, lladdr); int ret;
ret = lltable_try_set_entry_addr(ifp, ln,lladdr);
if (ret == 0) {
ln = NULL;
goto freeit;
}
EVENTHANDLER_INVOKE(lle_event, ln, EVENTHANDLER_INVOKE(lle_event, ln,
LLENTRY_RESOLVED); LLENTRY_RESOLVED);
} }