Simplify the way of attaching IPv6 link-layer header.

Problem description:
How do we currently perform layer 2 resolution and header imposition:

For IPv4 we have the following chain:
  ip_output() -> (ether|atm|whatever)_output() -> arpresolve()

Lookup is done in proper place (link-layer output routine) and it is possible
  to provide cached lle data.

For IPv6 situation is more complex:
  ip6_output() -> nd6_output() -> nd6_output_ifp() -> (whatever)_output() ->
    nd6_storelladdr()

We have ip6_ouput() which calls nd6_output() instead of link output routine.
nd6_output() does the following:
  * checks if lle exists, creates it if needed (similar to arpresolve())
  * performes lle state transitions (similar to arpresolve())
  * calls nd6_output_ifp() which pushes packets to link output routine along
    with running SeND/MAC hooks regardless of lle state
    (e.g. works as run-hooks placeholder).

After that, iface output routine like ether_output() calls nd6_storelladdr()
  which performs lle lookup once again.

As a result, we perform lookup twice for each outgoing packet for most types
  of interfaces. We also need to maintain runtime-checked table of 'nd6-free'
  interfaces (see nd6_need_cache()).

Fix this behavior by eliminating first ND lookup. To be more specific:
  * make all nd6_output() consumers use nd6_output_ifp() instead
  * rename nd6_output[_slow]() to nd6_resolve_[slow]()
  * convert nd6_resolve() and nd6_resolve_slow() to arpresolve() semantics,
    e.g. copy L2 address to buffer instead of pushing packet towards lower
    layers
  * Make all nd6_storelladdr() users use nd6_resolve()
  * eliminate nd6_storelladdr()

The resulting callchain is the following:
  ip6_output() -> nd6_output_ifp() -> (whatever)_output() -> nd6_resolve()

Error handling:
Currently sending packet to non-existing la results in ip6_<output|forward>
  -> nd6_output() -> nd6_output _lle() which returns 0.
In new scenario packet is propagated to <ether|whatever>_output() ->
  nd6_resolve() which will return EWOULDBLOCK, and that result
  will be converted to 0.

(And EWOULDBLOCK is actually used by IB/TOE code).

Sponsored by:		Yandex LLC
Differential Revision:	https://reviews.freebsd.org/D1469
This commit is contained in:
Alexander V. Chernikov 2015-09-16 14:26:28 +00:00
parent 5c5fb901eb
commit 1fe201c322
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=287861
11 changed files with 114 additions and 160 deletions

View File

@ -103,8 +103,8 @@ arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
u_int8_t atype, adst;
int loop_copy = 0;
int isphds;
#ifdef INET
int is_gw;
#if defined(INET) || defined(INET6)
int is_gw = 0;
#endif
if (!((ifp->if_flags & IFF_UP) &&
@ -112,6 +112,11 @@ arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
return(ENETDOWN); /* m, m1 aren't initialized yet */
error = 0;
#if defined(INET) || defined(INET6)
if (ro != NULL && ro->ro_rt != NULL &&
(ro->ro_rt->rt_flags & RTF_GATEWAY) != 0)
is_gw = 1;
#endif
switch (dst->sa_family) {
#ifdef INET
@ -125,10 +130,6 @@ arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
else if (ifp->if_flags & IFF_NOARP)
adst = ntohl(SIN(dst)->sin_addr.s_addr) & 0xFF;
else {
is_gw = 0;
if (ro != NULL && ro->ro_rt != NULL &&
(ro->ro_rt->rt_flags & RTF_GATEWAY) != 0)
is_gw = 1;
error = arpresolve(ifp, is_gw, m, dst, &adst, NULL);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
@ -169,10 +170,11 @@ arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
case AF_INET6:
if ((m->m_flags & M_MCAST) != 0)
adst = arcbroadcastaddr; /* ARCnet broadcast address */
else
error = nd6_storelladdr(ifp, m, dst, (u_char *)&adst, NULL);
if (error)
return (error);
else {
error = nd6_resolve(ifp, is_gw, m, dst, &adst, NULL);
if (error != 0)
return (error == EWOULDBLOCK ? 0 : error);
}
atype = ARCTYPE_INET6;
break;
#endif

View File

@ -225,10 +225,10 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
if (lle != NULL && (pflags & LLE_VALID))
memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
else
error = nd6_storelladdr(ifp, m, dst, (u_char *)edst,
error = nd6_resolve(ifp, is_gw, m, dst, (u_char *)edst,
&pflags);
if (error)
return error;
return (error == EWOULDBLOCK ? 0 : error);
type = htons(ETHERTYPE_IPV6);
break;
#endif

View File

@ -101,8 +101,8 @@ fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
int loop_copy = 0, error = 0, hdrcmplt = 0;
u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN];
struct fddi_header *fh;
#ifdef INET
int is_gw;
#if defined(INET) || defined(INET6)
int is_gw = 0;
#endif
#ifdef MAC
@ -118,13 +118,15 @@ fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
senderr(ENETDOWN);
getmicrotime(&ifp->if_lastchange);
#if defined(INET) || defined(INET6)
if (ro != NULL && ro->ro_rt != NULL &&
(ro->ro_rt->rt_flags & RTF_GATEWAY) != 0)
is_gw = 1;
#endif
switch (dst->sa_family) {
#ifdef INET
case AF_INET: {
is_gw = 0;
if (ro != NULL && ro->ro_rt != NULL &&
(ro->ro_rt->rt_flags & RTF_GATEWAY) != 0)
is_gw = 1;
error = arpresolve(ifp, is_gw, m, dst, edst, NULL);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
@ -161,9 +163,9 @@ fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
#endif /* INET */
#ifdef INET6
case AF_INET6:
error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, NULL);
error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL);
if (error)
return (error); /* Something bad happened */
return (error == EWOULDBLOCK ? 0 : error);
type = htons(ETHERTYPE_IPV6);
break;
#endif /* INET6 */

View File

@ -89,8 +89,8 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct mbuf *mtail;
int unicast, dgl, foff;
static int next_dgl;
#ifdef INET
int is_gw;
#if defined(INET) || defined(INET6)
int is_gw = 0;
#endif
#ifdef MAC
@ -105,6 +105,11 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
goto bad;
}
#if defined(INET) || defined(INET6)
if (ro != NULL && ro->ro_rt != NULL &&
(ro->ro_rt->rt_flags & RTF_GATEWAY) != 0)
is_gw = 1;
#endif
/*
* For unicast, we make a tag to store the lladdr of the
* destination. This might not be the first time we have seen
@ -173,10 +178,10 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
#ifdef INET6
case AF_INET6:
if (unicast) {
error = nd6_storelladdr(fc->fc_ifp, m, dst,
error = nd6_resolve(fc->fc_ifp, is_gw, m, dst,
(u_char *) destfw, NULL);
if (error)
return (error);
return (error == EWOULDBLOCK ? 0 : error);
}
type = ETHERTYPE_IPV6;
break;

View File

@ -293,9 +293,9 @@ iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
#endif /* INET */
#ifdef INET6
case AF_INET6:
error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, NULL);
error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL);
if (error)
return (error);
return (error == EWOULDBLOCK ? 0 : error);
snap_type = ETHERTYPE_IPV6;
break;
#endif /* INET6 */

View File

@ -571,7 +571,7 @@ ip6_forward(struct mbuf *m, int srcrt)
goto bad;
}
error = nd6_output(rt->rt_ifp, origifp, m, dst, rt);
error = nd6_output_ifp(rt->rt_ifp, origifp, m, dst);
if (error) {
in6_ifstat_inc(rt->rt_ifp, ifs6_out_discard);
IP6STAT_INC(ip6s_cantforward);

View File

@ -935,7 +935,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
m->m_pkthdr.len);
ifa_free(&ia6->ia_ifa);
}
error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
error = nd6_output_ifp(ifp, origifp, m, dst);
goto done;
}
@ -1034,7 +1034,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
counter_u64_add(ia->ia_ifa.ifa_obytes,
m->m_pkthdr.len);
}
error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
error = nd6_output_ifp(ifp, origifp, m, dst);
} else
m_freem(m);
}

View File

@ -136,10 +136,10 @@ static void nd6_free_redirect(const struct llentry *);
static void nd6_llinfo_timer(void *);
static void clear_llinfo_pqueue(struct llentry *);
static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int nd6_output_lle(struct ifnet *, struct ifnet *, struct mbuf *,
struct sockaddr_in6 *);
static int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *,
struct sockaddr_in6 *);
static int nd6_resolve_slow(struct ifnet *, struct mbuf *,
const struct sockaddr_in6 *, u_char *, uint32_t *);
static int nd6_need_cache(struct ifnet *);
static VNET_DEFINE(struct callout, nd6_slowtimo_ch);
#define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch)
@ -1904,7 +1904,7 @@ nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain,
}
}
static int
int
nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
struct sockaddr_in6 *dst)
{
@ -1950,16 +1950,29 @@ nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
}
/*
* IPv6 packet output - light version.
* Checks if destination LLE exists and is in proper state
* (e.g no modification required). If not true, fall back to
* "heavy" version.
* Do L2 address resolution for @sa_dst address. Stores found
* address in @desten buffer. Copy of lle ln_flags can be also
* saved in @pflags if @pflags is non-NULL.
*
* If destination LLE does not exists or lle state modification
* is required, call "slow" version.
*
* Return values:
* - 0 on success (address copied to buffer).
* - EWOULDBLOCK (no local error, but address is still unresolved)
* - other errors (alloc failure, etc)
*/
int
nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
struct sockaddr_in6 *dst, struct rtentry *rt0)
nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags)
{
struct llentry *ln = NULL;
const struct sockaddr_in6 *dst6;
if (pflags != NULL)
*pflags = 0;
dst6 = (const struct sockaddr_in6 *)sa_dst;
/* discard the packet if IPv6 operation is disabled on the interface */
if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
@ -1967,14 +1980,25 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
return (ENETDOWN); /* better error? */
}
if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
goto sendpkt;
if (nd6_need_cache(ifp) == 0)
goto sendpkt;
if (m != NULL && m->m_flags & M_MCAST) {
switch (ifp->if_type) {
case IFT_ETHER:
case IFT_FDDI:
case IFT_L2VLAN:
case IFT_IEEE80211:
case IFT_BRIDGE:
case IFT_ISO88025:
ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr,
desten);
return (0);
default:
m_freem(m);
return (EAFNOSUPPORT);
}
}
IF_AFDATA_RLOCK(ifp);
ln = nd6_lookup(&dst->sin6_addr, 0, ifp);
ln = nd6_lookup(&dst6->sin6_addr, 0, ifp);
IF_AFDATA_RUNLOCK(ifp);
/*
@ -1990,45 +2014,33 @@ nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
/* Fall back to slow processing path */
if (ln != NULL)
LLE_RUNLOCK(ln);
return (nd6_output_lle(ifp, origifp, m, dst));
return (nd6_resolve_slow(ifp, m, dst6, desten, pflags));
}
sendpkt:
if (ln != NULL)
LLE_RUNLOCK(ln);
return (nd6_output_ifp(ifp, origifp, m, dst));
bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
if (pflags != NULL)
*pflags = ln->la_flags;
LLE_RUNLOCK(ln);
return (0);
}
/*
* Output IPv6 packet - heavy version.
* Function assume that either
* 1) destination LLE does not exist, is invalid or stale, so
* ND6_EXCLUSIVE lock needs to be acquired
* 2) destination lle is provided (with ND6_EXCLUSIVE lock),
* in that case packets are queued in &chain.
* Do L2 address resolution for @sa_dst address. Stores found
* address in @desten buffer. Copy of lle ln_flags can be also
* saved in @pflags if @pflags is non-NULL.
*
* Heavy version.
* Function assume that destination LLE does not exist,
* is invalid or stale, so ND6_EXCLUSIVE lock needs to be acquired.
*/
static int
nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
struct sockaddr_in6 *dst)
nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m,
const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags)
{
struct llentry *lle = NULL, *lle_tmp;
KASSERT(m != NULL, ("NULL mbuf, nothing to send"));
/* discard the packet if IPv6 operation is disabled on the interface */
if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
m_freem(m);
return (ENETDOWN); /* better error? */
}
if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
goto sendpkt;
if (nd6_need_cache(ifp) == 0)
goto sendpkt;
/*
* Address resolution or Neighbor Unreachability Detection
* for the next hop.
@ -2072,23 +2084,18 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
}
}
if (lle == NULL) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
if (!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
m_freem(m);
return (ENOBUFS);
}
goto sendpkt; /* send anyway */
if (m != NULL)
m_freem(m);
return (ENOBUFS);
}
LLE_WLOCK_ASSERT(lle);
/* We don't have to do link-layer address resolution on a p2p link. */
if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
lle->ln_state < ND6_LLINFO_REACHABLE) {
lle->ln_state = ND6_LLINFO_STALE;
nd6_llinfo_settimer_locked(lle, (long)V_nd6_gctimer * hz);
}
/*
* The first time we send a packet to a neighbor whose entry is
* STALE, we have to change the state to DELAY and a sets a timer to
@ -2107,8 +2114,13 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
* (i.e. its link-layer address is already resolved), just
* send the packet.
*/
if (lle->ln_state > ND6_LLINFO_INCOMPLETE)
goto sendpkt;
if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
bcopy(&lle->ll_addr, desten, ifp->if_addrlen);
if (pflags != NULL)
*pflags = lle->la_flags;
LLE_WUNLOCK(lle);
return (0);
}
/*
* There is a neighbor cache entry, but no ethernet address
@ -2160,13 +2172,7 @@ nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
LLE_WUNLOCK(lle);
}
return (0);
sendpkt:
if (lle != NULL)
LLE_WUNLOCK(lle);
return (nd6_output_ifp(ifp, origifp, m, dst));
return (EWOULDBLOCK);
}
@ -2192,15 +2198,12 @@ nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain
/*
* XXX
* note that intermediate errors are blindly ignored - but this is
* the same convention as used with nd6_output when called by
* nd6_cache_lladdr
* note that intermediate errors are blindly ignored
*/
return (error);
}
int
static int
nd6_need_cache(struct ifnet *ifp)
{
/*
@ -2297,61 +2300,6 @@ nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all)
lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr);
}
/*
* the callers of this function need to be re-worked to drop
* the lle lock, drop here for now
*/
int
nd6_storelladdr(struct ifnet *ifp, struct mbuf *m,
const struct sockaddr *dst, u_char *desten, uint32_t *pflags)
{
struct llentry *ln;
if (pflags != NULL)
*pflags = 0;
IF_AFDATA_UNLOCK_ASSERT(ifp);
if (m != NULL && m->m_flags & M_MCAST) {
switch (ifp->if_type) {
case IFT_ETHER:
case IFT_FDDI:
case IFT_L2VLAN:
case IFT_IEEE80211:
case IFT_BRIDGE:
case IFT_ISO88025:
ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
desten);
return (0);
default:
m_freem(m);
return (EAFNOSUPPORT);
}
}
/*
* the entry should have been created in nd6_store_lladdr
*/
IF_AFDATA_RLOCK(ifp);
ln = lla_lookup(LLTABLE6(ifp), 0, dst);
IF_AFDATA_RUNLOCK(ifp);
if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) {
if (ln != NULL)
LLE_RUNLOCK(ln);
/* this could happen, if we could not allocate memory */
m_freem(m);
return (1);
}
bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
if (pflags != NULL)
*pflags = ln->la_flags;
LLE_RUNLOCK(ln);
/*
* A *small* use after free race exists here
*/
return (0);
}
static void
clear_llinfo_pqueue(struct llentry *ln)
{

View File

@ -414,22 +414,19 @@ void nd6_llinfo_settimer_locked(struct llentry *, long);
void nd6_timer(void *);
void nd6_purge(struct ifnet *);
void nd6_nud_hint(struct rtentry *, struct in6_addr *, int);
int nd6_resolve(struct ifnet *, struct rtentry *, struct mbuf *,
struct sockaddr *, u_char *);
int nd6_resolve(struct ifnet *, int, struct mbuf *,
const struct sockaddr *, u_char *, uint32_t *);
int nd6_ioctl(u_long, caddr_t, struct ifnet *);
void nd6_cache_lladdr(struct ifnet *, struct in6_addr *,
char *, int, int, int);
int nd6_output(struct ifnet *, struct ifnet *, struct mbuf *,
struct sockaddr_in6 *, struct rtentry *);
void nd6_grab_holdchain(struct llentry *, struct mbuf **,
struct sockaddr_in6 *);
int nd6_flush_holdchain(struct ifnet *, struct ifnet *, struct mbuf *,
struct sockaddr_in6 *);
int nd6_need_cache(struct ifnet *);
int nd6_add_ifa_lle(struct in6_ifaddr *);
void nd6_rem_ifa_lle(struct in6_ifaddr *, int);
int nd6_storelladdr(struct ifnet *, struct mbuf *,
const struct sockaddr *, u_char *, uint32_t *);
int nd6_output_ifp(struct ifnet *, struct ifnet *, struct mbuf *,
struct sockaddr_in6 *);
/* nd6_nbr.c */
void nd6_na_input(struct mbuf *, int, int);

View File

@ -5534,7 +5534,7 @@ pf_route6(struct mbuf **m, struct pf_rule *r, int dir, struct ifnet *oifp,
if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr))
dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index);
if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu)
nd6_output(ifp, ifp, m0, &dst, NULL);
nd6_output_ifp(ifp, ifp, m0, &dst);
else {
in6_ifstat_inc(ifp, ifs6_in_toobig);
if (r->rt != PF_DUPTO)

View File

@ -1333,7 +1333,7 @@ ipoib_output(struct ifnet *ifp, struct mbuf *m,
else if (m->m_flags & M_MCAST)
ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst);
else
error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, NULL);
error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL);
if (error)
return error;
type = htons(ETHERTYPE_IPV6);