Implement interface link header precomputation API.

Add if_requestencap() interface method which is capable of calculating
  various link headers for given interface. Right now there is support
  for INET/INET6/ARP llheader calculation (IFENCAP_LL type request).
  Other types are planned to support more complex calculation
  (L2 multipath lagg nexthops, tunnel encap nexthops, etc..).

Reshape 'struct route' to be able to pass additional data (with is length)
  to prepend to mbuf.

These two changes permits routing code to pass pre-calculated nexthop data
  (like L2 header for route w/gateway) down to the stack eliminating the
  need for other lookups. It also brings us closer to more complex scenarios
  like transparently handling MPLS nexthops and tunnel interfaces.
  Last, but not least, it removes layering violation introduced by flowtable
  code (ro_lle) and simplifies handling of existing if_output consumers.

ARP/ND changes:
Make arp/ndp stack pre-calculate link header upon installing/updating lle
  record. Interface link address change are handled by re-calculating
  headers for all lles based on if_lladdr event. After these changes,
  arpresolve()/nd6_resolve() returns full pre-calculated header for
  supported interfaces thus simplifying if_output().
Move these lookups to separate ether_resolve_addr() function which ether
  returs error or fully-prepared link header. Add <arp|nd6_>resolve_addr()
  compat versions to return link addresses instead of pre-calculated data.

BPF changes:
Raw bpf writes occupied _two_ cases: AF_UNSPEC and pseudo_AF_HDRCMPLT.
Despite the naming, both of there have ther header "complete". The only
  difference is that interface source mac has to be filled by OS for
  AF_UNSPEC (controlled via BIOCGHDRCMPLT). This logic has to stay inside
  BPF and not pollute if_output() routines. Convert BPF to pass prepend data
  via new 'struct route' mechanism. Note that it does not change
  non-optimized if_output(): ro_prepend handling is purely optional.
Side note: hackish pseudo_AF_HDRCMPLT is supported for ethernet and FDDI.
  It is not needed for ethernet anymore. The only remaining FDDI user is
  dev/pdq mostly untouched since 2007. FDDI support was eliminated from
  OpenBSD in 2013 (sys/net/if_fddisubr.c rev 1.65).

Flowtable changes:
  Flowtable violates layering by saving (and not correctly managing)
  rtes/lles. Instead of passing lle pointer, pass pointer to pre-calculated
  header data from that lle.

Differential Revision:	https://reviews.freebsd.org/D4102
This commit is contained in:
Alexander V. Chernikov 2015-12-31 05:03:27 +00:00
parent 2bfd3dfb9f
commit 4fb3a8208c
22 changed files with 672 additions and 213 deletions

View File

@ -215,7 +215,7 @@ resolve_entry(struct adapter *sc, struct l2t_entry *e)
struct tom_data *td = sc->tom_softc;
struct toedev *tod = &td->tod;
struct sockaddr_in sin = {0};
uint8_t dmac[ETHER_ADDR_LEN];
uint8_t dmac[ETHER_HDR_LEN];
uint16_t vtag = EVL_VLID_MASK;
int rc;

View File

@ -233,7 +233,7 @@ resolve_entry(struct adapter *sc, struct l2t_entry *e)
struct sockaddr_in sin = {0};
struct sockaddr_in6 sin6 = {0};
struct sockaddr *sa;
uint8_t dmac[ETHER_ADDR_LEN];
uint8_t dmac[ETHER_HDR_LEN];
uint16_t vtag = VLAN_NONE;
int rc;

View File

@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/bpf.h>
#include <net/bpf_buffer.h>
#ifdef BPF_JITTER
@ -76,6 +77,7 @@ __FBSDID("$FreeBSD$");
#endif
#include <net/bpf_zerocopy.h>
#include <net/bpfdesc.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
@ -164,7 +166,7 @@ static void bpf_detachd(struct bpf_d *);
static void bpf_detachd_locked(struct bpf_d *);
static void bpf_freed(struct bpf_d *);
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
struct sockaddr *, int *, struct bpf_insn *);
struct sockaddr *, int *, struct bpf_d *);
static int bpf_setif(struct bpf_d *, struct ifreq *);
static void bpf_timed_out(void *);
static __inline void
@ -454,7 +456,7 @@ bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
*/
static int
bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
{
const struct ieee80211_bpf_params *p;
struct ether_header *eh;
@ -549,7 +551,7 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
if (error)
goto bad;
slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
if (slen == 0) {
error = EPERM;
goto bad;
@ -566,6 +568,10 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
else
m->m_flags |= M_MCAST;
}
if (d->bd_hdrcmplt == 0) {
memcpy(eh->ether_shost, IF_LLADDR(ifp),
sizeof(eh->ether_shost));
}
break;
}
@ -1088,6 +1094,7 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
struct ifnet *ifp;
struct mbuf *m, *mc;
struct sockaddr dst;
struct route ro;
int error, hlen;
error = devfs_get_cdevpriv((void **)&d);
@ -1119,7 +1126,7 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
hlen = 0;
/* XXX: bpf_movein() can sleep */
error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
&m, &dst, &hlen, d->bd_wfilter);
&m, &dst, &hlen, d);
if (error) {
d->bd_wdcount++;
return (error);
@ -1151,7 +1158,14 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
BPFD_UNLOCK(d);
#endif
error = (*ifp->if_output)(ifp, m, &dst, NULL);
bzero(&ro, sizeof(ro));
if (hlen != 0) {
ro.ro_prepend = (u_char *)&dst.sa_data;
ro.ro_plen = hlen;
ro.ro_flags = RT_HAS_HEADER;
}
error = (*ifp->if_output)(ifp, m, &dst, &ro);
if (error)
d->bd_wdcount++;

View File

@ -665,6 +665,7 @@ int
flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro)
{
struct flentry *fle;
struct llentry *lle;
if (V_flowtable_enable == 0)
return (ENXIO);
@ -693,8 +694,15 @@ flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro)
}
ro->ro_rt = fle->f_rt;
ro->ro_lle = fle->f_lle;
ro->ro_flags |= RT_NORTREF;
lle = fle->f_lle;
if (lle != NULL && (lle->la_flags & LLE_VALID)) {
ro->ro_prepend = lle->r_linkdata;
ro->ro_plen = lle->r_hdrlen;
ro->ro_flags |= RT_MAY_LOOP;
if (lle->la_flags & LLE_IFADDR)
ro->ro_flags |= RT_L2_ME;
}
return (0);
}

View File

@ -161,6 +161,7 @@ static int ifconf(u_long, caddr_t);
static void if_freemulti(struct ifmultiaddr *);
static void if_grow(void);
static void if_input_default(struct ifnet *, struct mbuf *);
static int if_requestencap_default(struct ifnet *, struct if_encap_req *);
static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
@ -673,6 +674,9 @@ if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
if (ifp->if_input == NULL)
ifp->if_input = if_input_default;
if (ifp->if_requestencap == NULL)
ifp->if_requestencap = if_requestencap_default;
if (!vmove) {
#ifdef MAC
mac_ifnet_create(ifp);
@ -3397,6 +3401,43 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
return (0);
}
/*
* Compat function for handling basic encapsulation requests.
* Not converted stacks (FDDI, IB, ..) supports traditional
* output model: ARP (and other similar L2 protocols) are handled
* inside output routine, arpresolve/nd6_resolve() returns MAC
* address instead of full prepend.
*
* This function creates calculated header==MAC for IPv4/IPv6 and
* returns EAFNOSUPPORT (which is then handled in ARP code) for other
* address families.
*/
static int
if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
{
if (req->rtype != IFENCAP_LL)
return (EOPNOTSUPP);
if (req->bufsize < req->lladdr_len)
return (ENOMEM);
switch (req->family) {
case AF_INET:
case AF_INET6:
break;
default:
return (EAFNOSUPPORT);
}
/* Copy lladdr to storage as is */
memmove(req->buf, req->lladdr, req->lladdr_len);
req->bufsize = req->lladdr_len;
req->lladdr_off = 0;
return (0);
}
/*
* The name argument must be a pointer to storage which will last as
* long as the interface does. For physical devices, the result of

View File

@ -113,6 +113,7 @@ static int ether_resolvemulti(struct ifnet *, struct sockaddr **,
#ifdef VIMAGE
static void ether_reassign(struct ifnet *, struct vnet *, char *);
#endif
static int ether_requestencap(struct ifnet *, struct if_encap_req *);
#define ETHER_IS_BROADCAST(addr) \
(bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0)
@ -135,6 +136,138 @@ update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst)
dst->m_pkthdr.csum_data = 0xffff;
}
/*
* Handle link-layer encapsulation requests.
*/
static int
ether_requestencap(struct ifnet *ifp, struct if_encap_req *req)
{
struct ether_header *eh;
struct arphdr *ah;
uint16_t etype;
const u_char *lladdr;
if (req->rtype != IFENCAP_LL)
return (EOPNOTSUPP);
if (req->bufsize < ETHER_HDR_LEN)
return (ENOMEM);
eh = (struct ether_header *)req->buf;
lladdr = req->lladdr;
req->lladdr_off = 0;
switch (req->family) {
case AF_INET:
etype = htons(ETHERTYPE_IP);
break;
case AF_INET6:
etype = htons(ETHERTYPE_IPV6);
break;
case AF_ARP:
ah = (struct arphdr *)req->hdata;
ah->ar_hrd = htons(ARPHRD_ETHER);
switch(ntohs(ah->ar_op)) {
case ARPOP_REVREQUEST:
case ARPOP_REVREPLY:
etype = htons(ETHERTYPE_REVARP);
break;
case ARPOP_REQUEST:
case ARPOP_REPLY:
default:
etype = htons(ETHERTYPE_ARP);
break;
}
if (req->flags & IFENCAP_FLAG_BROADCAST)
lladdr = ifp->if_broadcastaddr;
break;
default:
return (EAFNOSUPPORT);
}
memcpy(&eh->ether_type, &etype, sizeof(eh->ether_type));
memcpy(eh->ether_dhost, lladdr, ETHER_ADDR_LEN);
memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
req->bufsize = sizeof(struct ether_header);
return (0);
}
static int
ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
const struct sockaddr *dst, struct route *ro, u_char *phdr,
uint32_t *pflags)
{
struct ether_header *eh;
struct rtentry *rt;
uint32_t lleflags = 0;
int error = 0;
#if defined(INET) || defined(INET6)
uint16_t etype;
#endif
eh = (struct ether_header *)phdr;
switch (dst->sa_family) {
#ifdef INET
case AF_INET:
if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
error = arpresolve(ifp, 0, m, dst, phdr, &lleflags);
else {
if (m->m_flags & M_BCAST)
memcpy(eh->ether_dhost, ifp->if_broadcastaddr,
ETHER_ADDR_LEN);
else {
const struct in_addr *a;
a = &(((const struct sockaddr_in *)dst)->sin_addr);
ETHER_MAP_IP_MULTICAST(a, eh->ether_dhost);
}
etype = htons(ETHERTYPE_IP);
memcpy(&eh->ether_type, &etype, sizeof(etype));
memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
}
break;
#endif
#ifdef INET6
case AF_INET6:
if ((m->m_flags & M_MCAST) == 0)
error = nd6_resolve(ifp, 0, m, dst, phdr, &lleflags);
else {
const struct in6_addr *a6;
a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
etype = htons(ETHERTYPE_IPV6);
memcpy(&eh->ether_type, &etype, sizeof(etype));
memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
}
break;
#endif
default:
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
if (m != NULL)
m_freem(m);
return (EAFNOSUPPORT);
}
if (error == EHOSTDOWN) {
rt = (ro != NULL) ? ro->ro_rt : NULL;
if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0)
error = EHOSTUNREACH;
}
if (error != 0)
return (error);
*pflags = RT_MAY_LOOP;
if (lleflags & LLE_IFADDR)
*pflags |= RT_L2_ME;
return (0);
}
/*
* Ethernet output routine.
* Encapsulate a packet of type family for the local net.
@ -145,27 +278,20 @@ int
ether_output(struct ifnet *ifp, struct mbuf *m,
const struct sockaddr *dst, struct route *ro)
{
short type;
int error = 0, hdrcmplt = 0;
u_char edst[ETHER_ADDR_LEN];
struct llentry *lle = NULL;
struct rtentry *rt0 = NULL;
int error = 0;
char linkhdr[ETHER_HDR_LEN], *phdr;
struct ether_header *eh;
struct pf_mtag *t;
int loop_copy = 1;
int hlen; /* link layer header length */
int is_gw = 0;
uint32_t pflags = 0;
uint32_t pflags;
phdr = NULL;
pflags = 0;
if (ro != NULL) {
if (!(m->m_flags & (M_BCAST | M_MCAST))) {
lle = ro->ro_lle;
if (lle != NULL)
pflags = lle->la_flags;
}
rt0 = ro->ro_rt;
if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0)
is_gw = 1;
phdr = ro->ro_prepend;
hlen = ro->ro_plen;
pflags = ro->ro_flags;
}
#ifdef MAC
error = mac_ifnet_check_transmit(ifp, m);
@ -180,94 +306,31 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
(ifp->if_drv_flags & IFF_DRV_RUNNING)))
senderr(ENETDOWN);
hlen = ETHER_HDR_LEN;
switch (dst->sa_family) {
#ifdef INET
case AF_INET:
if (lle != NULL && (pflags & LLE_VALID) != 0)
memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
else
error = arpresolve(ifp, is_gw, m, dst, edst, &pflags);
if (error)
if (phdr == NULL) {
/* No prepend data supplied. Try to calculate ourselves. */
phdr = linkhdr;
hlen = ETHER_HDR_LEN;
error = ether_resolve_addr(ifp, m, dst, ro, phdr, &pflags);
if (error != 0)
return (error == EWOULDBLOCK ? 0 : error);
type = htons(ETHERTYPE_IP);
break;
case AF_ARP:
{
struct arphdr *ah;
ah = mtod(m, struct arphdr *);
ah->ar_hrd = htons(ARPHRD_ETHER);
loop_copy = 0; /* if this is for us, don't do it */
switch(ntohs(ah->ar_op)) {
case ARPOP_REVREQUEST:
case ARPOP_REVREPLY:
type = htons(ETHERTYPE_REVARP);
break;
case ARPOP_REQUEST:
case ARPOP_REPLY:
default:
type = htons(ETHERTYPE_ARP);
break;
}
if (m->m_flags & M_BCAST)
bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN);
else
bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN);
}
break;
#endif
#ifdef INET6
case AF_INET6:
if (lle != NULL && (pflags & LLE_VALID))
memcpy(edst, &lle->ll_addr.mac16, sizeof(edst));
else
error = nd6_resolve(ifp, is_gw, m, dst, (u_char *)edst,
&pflags);
if (error)
return (error == EWOULDBLOCK ? 0 : error);
type = htons(ETHERTYPE_IPV6);
break;
#endif
case pseudo_AF_HDRCMPLT:
{
const struct ether_header *eh;
hdrcmplt = 1;
/* FALLTHROUGH */
case AF_UNSPEC:
loop_copy = 0; /* if this is for us, don't do it */
eh = (const struct ether_header *)dst->sa_data;
(void)memcpy(edst, eh->ether_dhost, sizeof (edst));
type = eh->ether_type;
break;
}
default:
if_printf(ifp, "can't handle af%d\n", dst->sa_family);
senderr(EAFNOSUPPORT);
}
if ((pflags & LLE_IFADDR) != 0) {
if ((pflags & RT_L2_ME) != 0) {
update_mbuf_csumflags(m, m);
return (if_simloop(ifp, m, dst->sa_family, 0));
}
loop_copy = pflags & RT_MAY_LOOP;
/*
* Add local net header. If no space in first mbuf,
* allocate another.
*/
M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL)
senderr(ENOBUFS);
eh = mtod(m, struct ether_header *);
if (hdrcmplt == 0) {
memcpy(&eh->ether_type, &type, sizeof(eh->ether_type));
memcpy(eh->ether_dhost, edst, sizeof (edst));
memcpy(eh->ether_shost, IF_LLADDR(ifp),sizeof(eh->ether_shost));
if ((pflags & RT_HAS_HEADER) == 0) {
eh = mtod(m, struct ether_header *);
memcpy(eh, phdr, hlen);
}
/*
@ -279,34 +342,27 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
* on the wire). However, we don't do that here for security
* reasons and compatibility with the original behavior.
*/
if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy &&
if ((m->m_flags & M_BCAST) && loop_copy && (ifp->if_flags & IFF_SIMPLEX) &&
((t = pf_find_mtag(m)) == NULL || !t->routed)) {
if (m->m_flags & M_BCAST) {
struct mbuf *n;
struct mbuf *n;
/*
* Because if_simloop() modifies the packet, we need a
* writable copy through m_dup() instead of a readonly
* one as m_copy[m] would give us. The alternative would
* be to modify if_simloop() to handle the readonly mbuf,
* but performancewise it is mostly equivalent (trading
* extra data copying vs. extra locking).
*
* XXX This is a local workaround. A number of less
* often used kernel parts suffer from the same bug.
* See PR kern/105943 for a proposed general solution.
*/
if ((n = m_dup(m, M_NOWAIT)) != NULL) {
update_mbuf_csumflags(m, n);
(void)if_simloop(ifp, n, dst->sa_family, hlen);
} else
if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
} else if (bcmp(eh->ether_dhost, eh->ether_shost,
ETHER_ADDR_LEN) == 0) {
update_mbuf_csumflags(m, m);
(void) if_simloop(ifp, m, dst->sa_family, hlen);
return (0); /* XXX */
}
/*
* Because if_simloop() modifies the packet, we need a
* writable copy through m_dup() instead of a readonly
* one as m_copy[m] would give us. The alternative would
* be to modify if_simloop() to handle the readonly mbuf,
* but performancewise it is mostly equivalent (trading
* extra data copying vs. extra locking).
*
* XXX This is a local workaround. A number of less
* often used kernel parts suffer from the same bug.
* See PR kern/105943 for a proposed general solution.
*/
if ((n = m_dup(m, M_NOWAIT)) != NULL) {
update_mbuf_csumflags(m, n);
(void)if_simloop(ifp, n, dst->sa_family, hlen);
} else
if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
}
/*
@ -798,6 +854,7 @@ ether_ifattach(struct ifnet *ifp, const u_int8_t *lla)
ifp->if_output = ether_output;
ifp->if_input = ether_input;
ifp->if_resolvemulti = ether_resolvemulti;
ifp->if_requestencap = ether_requestencap;
#ifdef VIMAGE
ifp->if_reassign = ether_reassign;
#endif

View File

@ -278,10 +278,12 @@ lltable_drop_entry_queue(struct llentry *lle)
void
lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
const char *lladdr)
const char *linkhdr, size_t linkhdrsize, int lladdr_off)
{
bcopy(lladdr, &lle->ll_addr, ifp->if_addrlen);
memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
lle->r_hdrlen = linkhdrsize;
lle->ll_addr = &lle->r_linkdata[lladdr_off];
lle->la_flags |= LLE_VALID;
lle->r_flags |= RLLE_VALID;
}
@ -296,7 +298,7 @@ lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
*/
int
lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
const char *lladdr)
const char *linkhdr, size_t linkhdrsize, int lladdr_off)
{
/* Perform real LLE update */
@ -318,7 +320,7 @@ lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
}
/* Update data */
lltable_set_entry_addr(ifp, lle, lladdr);
lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize, lladdr_off);
IF_AFDATA_WUNLOCK(ifp);
@ -327,6 +329,84 @@ lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
return (1);
}
/*
* Helper function used to pre-compute full/partial link-layer
* header data suitable for feeding into if_output().
*/
int
lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
char *buf, size_t *bufsize, int *lladdr_off)
{
struct if_encap_req ereq;
int error;
bzero(buf, *bufsize);
bzero(&ereq, sizeof(ereq));
ereq.buf = buf;
ereq.bufsize = *bufsize;
ereq.rtype = IFENCAP_LL;
ereq.family = family;
ereq.lladdr = lladdr;
ereq.lladdr_len = ifp->if_addrlen;
error = ifp->if_requestencap(ifp, &ereq);
if (error == 0) {
*bufsize = ereq.bufsize;
*lladdr_off = ereq.lladdr_off;
}
return (error);
}
/*
* Update link-layer header for given @lle after
* interface lladdr was changed.
*/
static int
llentry_update_ifaddr(struct lltable *llt, struct llentry *lle, void *farg)
{
struct ifnet *ifp;
u_char linkhdr[LLE_MAX_LINKHDR];
size_t linkhdrsize;
u_char *lladdr;
int lladdr_off;
ifp = (struct ifnet *)farg;
lladdr = lle->ll_addr;
LLE_WLOCK(lle);
if ((lle->la_flags & LLE_VALID) == 0) {
LLE_WUNLOCK(lle);
return (0);
}
if ((lle->la_flags & LLE_IFADDR) != 0)
lladdr = IF_LLADDR(ifp);
linkhdrsize = sizeof(linkhdr);
lltable_calc_llheader(ifp, llt->llt_af, lladdr, linkhdr, &linkhdrsize,
&lladdr_off);
memcpy(lle->r_linkdata, linkhdr, linkhdrsize);
LLE_WUNLOCK(lle);
return (0);
}
/*
* Update all calculated headers for given @llt
*/
void
lltable_update_ifaddr(struct lltable *llt)
{
if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
return;
IF_AFDATA_WLOCK(llt->llt_ifp);
lltable_foreach_lle(llt, llentry_update_ifaddr, llt->llt_ifp);
IF_AFDATA_WUNLOCK(llt->llt_ifp);
}
/*
*
* Performes generic cleanup routines and frees lle.
@ -642,6 +722,9 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
struct ifnet *ifp;
struct lltable *llt;
struct llentry *lle, *lle_tmp;
uint8_t linkhdr[LLE_MAX_LINKHDR];
size_t linkhdrsize;
int lladdr_off;
u_int laflags = 0;
int error;
@ -677,11 +760,14 @@ lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
if (lle == NULL)
return (ENOMEM);
bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
linkhdrsize = sizeof(linkhdr);
if (lltable_calc_llheader(ifp, dst->sa_family, LLADDR(dl),
linkhdr, &linkhdrsize, &lladdr_off) != 0)
return (EINVAL);
lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
lladdr_off);
if ((rtm->rtm_flags & RTF_ANNOUNCE))
lle->la_flags |= LLE_PUB;
lle->la_flags |= LLE_VALID;
lle->r_flags |= RLLE_VALID;
lle->la_expire = rtm->rtm_rmx.rmx_expire;
laflags = lle->la_flags;
@ -767,7 +853,7 @@ llatbl_lle_show(struct llentry_sa *la)
db_printf(" ln_router=%u\n", lle->ln_router);
db_printf(" ln_ntick=%ju\n", (uintmax_t)lle->ln_ntick);
db_printf(" lle_refcnt=%d\n", lle->lle_refcnt);
bcopy(&lle->ll_addr.mac16, octet, sizeof(octet));
bcopy(lle->ll_addr, octet, sizeof(octet));
db_printf(" ll_addr=%02x:%02x:%02x:%02x:%02x:%02x\n",
octet[0], octet[1], octet[2], octet[3], octet[4], octet[5]);
db_printf(" lle_timer=%p\n", &lle->lle_timer);

View File

@ -48,6 +48,7 @@ extern struct rwlock lltable_rwlock;
#define LLTABLE_WUNLOCK() rw_wunlock(&lltable_rwlock)
#define LLTABLE_LOCK_ASSERT() rw_assert(&lltable_rwlock, RA_LOCKED)
#define LLE_MAX_LINKHDR 24 /* Full IB header */
/*
* Code referencing llentry must at least hold
* a shared lock
@ -58,14 +59,11 @@ struct llentry {
struct in_addr addr4;
struct in6_addr addr6;
} r_l3addr;
union {
uint64_t mac_aligned;
uint16_t mac16[3];
uint8_t mac8[20]; /* IB needs 20 bytes. */
} ll_addr;
char r_linkdata[LLE_MAX_LINKHDR]; /* L2 data */
uint8_t r_hdrlen; /* length for LL header */
uint8_t spare0[3];
uint16_t r_flags; /* LLE runtime flags */
uint16_t r_skip_req; /* feedback from fast path */
uint64_t spare1;
struct lltable *lle_tbl;
struct llentries *lle_head;
@ -82,6 +80,7 @@ struct llentry {
time_t lle_remtime; /* Real time remaining */
time_t lle_hittime; /* Time when r_skip_req was unset */
int lle_refcnt;
char *ll_addr; /* link-layer address */
LIST_ENTRY(llentry) lle_chain; /* chain of deleted items */
struct callout lle_timer;
@ -198,6 +197,8 @@ MALLOC_DECLARE(M_LLTABLE);
/* LLE request flags */
#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */
#define LLE_UNLOCKED 0x4000 /* return lle unlocked */
#define LLE_ADDRONLY 0x4000 /* return lladdr instead of full header */
#define LLE_CREATE 0x8000 /* hint to avoid lle lookup */
/* LLE flags used by fastpath code */
#define RLLE_VALID 0x0001 /* entry is valid */
@ -223,10 +224,13 @@ struct llentry *llentry_alloc(struct ifnet *, struct lltable *,
/* helper functions */
size_t lltable_drop_entry_queue(struct llentry *);
void lltable_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
const char *lladdr);
const char *linkhdr, size_t linkhdrsize, int lladdr_off);
int lltable_try_set_entry_addr(struct ifnet *ifp, struct llentry *lle,
const char *lladdr);
const char *linkhdr, size_t linkhdrsize, int lladdr_off);
int lltable_calc_llheader(struct ifnet *ifp, int family, char *lladdr,
char *buf, size_t *bufsize, int *lladdr_off);
void lltable_update_ifaddr(struct lltable *llt);
struct llentry *lltable_alloc_entry(struct lltable *llt, u_int flags,
const struct sockaddr *l4addr);
void lltable_free_entry(struct lltable *llt, struct llentry *lle);

View File

@ -134,6 +134,48 @@ struct ifnet_hw_tsomax {
u_int tsomaxsegsize; /* TSO maximum segment size in bytes */
};
/* Interface encap request types */
typedef enum {
IFENCAP_LL = 1 /* pre-calculate link-layer header */
} ife_type;
/*
* The structure below allows to request various pre-calculated L2/L3 headers
* for different media. Requests varies by type (rtype field).
*
* IFENCAP_LL type: pre-calculates link header based on address family
* and destination lladdr.
*
* Input data fields:
* buf: pointer to destination buffer
* bufsize: buffer size
* flags: IFENCAP_FLAG_BROADCAST if destination is broadcast
* family: address family defined by AF_ constant.
* lladdr: pointer to link-layer address
* lladdr_len: length of link-layer address
* hdata: pointer to L3 header (optional, used for ARP requests).
* Output data fields:
* buf: encap data is stored here
* bufsize: resulting encap length is stored here
* lladdr_off: offset of link-layer address from encap hdr start
* hdata: L3 header may be altered if necessary
*/
struct if_encap_req {
u_char *buf; /* Destination buffer (w) */
size_t bufsize; /* size of provided buffer (r) */
ife_type rtype; /* request type (r) */
uint32_t flags; /* Request flags (r) */
int family; /* Address family AF_* (r) */
int lladdr_off; /* offset from header start (w) */
int lladdr_len; /* lladdr length (r) */
char *lladdr; /* link-level address pointer (r) */
char *hdata; /* Upper layer header data (rw) */
};
#define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */
/*
* Structure defining a network interface.
*
@ -235,6 +277,8 @@ struct ifnet {
void (*if_reassign) /* reassign to vnet routine */
(struct ifnet *, struct vnet *, char *);
if_get_counter_t if_get_counter; /* get counter values */
int (*if_requestencap) /* make link header from request */
(struct ifnet *, struct if_encap_req *);
/* Statistics. */
counter_u64_t if_counters[IFCOUNTERS];

View File

@ -51,14 +51,21 @@
*/
struct route {
struct rtentry *ro_rt;
struct llentry *ro_lle;
struct in_ifaddr *ro_ia;
int ro_flags;
char *ro_prepend;
uint16_t ro_plen;
uint16_t ro_flags;
struct sockaddr ro_dst;
};
#define RT_L2_ME_BIT 2 /* dst L2 addr is our address */
#define RT_MAY_LOOP_BIT 3 /* dst may require loop copy */
#define RT_HAS_HEADER_BIT 4 /* mbuf already have its header prepended */
#define RT_CACHING_CONTEXT 0x1 /* XXX: not used anywhere */
#define RT_NORTREF 0x2 /* doesn't hold reference on ro_rt */
#define RT_L2_ME (1 << RT_L2_ME_BIT)
#define RT_MAY_LOOP (1 << RT_MAY_LOOP_BIT)
#define RT_HAS_HEADER (1 << RT_HAS_HEADER_BIT)
struct rt_metrics {
u_long rmx_locks; /* Kernel must leave these values alone */

View File

@ -281,6 +281,37 @@ arptimer(void *arg)
CURVNET_RESTORE();
}
/*
* Stores link-layer header for @ifp in format suitable for if_output()
* into buffer @buf. Resulting header length is stored in @bufsize.
*
* Returns 0 on success.
*/
static int
arp_fillheader(struct ifnet *ifp, struct arphdr *ah, int bcast, u_char *buf,
size_t *bufsize)
{
struct if_encap_req ereq;
int error;
bzero(buf, *bufsize);
bzero(&ereq, sizeof(ereq));
ereq.buf = buf;
ereq.bufsize = *bufsize;
ereq.rtype = IFENCAP_LL;
ereq.family = AF_ARP;
ereq.lladdr = ar_tha(ah);
ereq.hdata = (u_char *)ah;
if (bcast)
ereq.flags = IFENCAP_FLAG_BROADCAST;
error = ifp->if_requestencap(ifp, &ereq);
if (error == 0)
*bufsize = ereq.bufsize;
return (error);
}
/*
* Broadcast an ARP request. Caller specifies:
* - arp header source ip address
@ -295,6 +326,10 @@ arprequest(struct ifnet *ifp, const struct in_addr *sip,
struct arphdr *ah;
struct sockaddr sa;
u_char *carpaddr = NULL;
uint8_t linkhdr[LLE_MAX_LINKHDR];
size_t linkhdrsize;
struct route ro;
int error;
if (sip == NULL) {
/*
@ -350,12 +385,28 @@ arprequest(struct ifnet *ifp, const struct in_addr *sip,
bcopy(tip, ar_tpa(ah), ah->ar_pln);
sa.sa_family = AF_ARP;
sa.sa_len = 2;
/* Calculate link header for sending frame */
bzero(&ro, sizeof(ro));
linkhdrsize = sizeof(linkhdr);
error = arp_fillheader(ifp, ah, 1, linkhdr, &linkhdrsize);
if (error != 0 && error != EAFNOSUPPORT) {
ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
if_name(ifp), error);
return;
}
ro.ro_prepend = linkhdr;
ro.ro_plen = linkhdrsize;
ro.ro_flags = 0;
m->m_flags |= M_BCAST;
m_clrprotoflags(m); /* Avoid confusing lower layers. */
(*ifp->if_output)(ifp, m, &sa, NULL);
(*ifp->if_output)(ifp, m, &sa, &ro);
ARPSTAT_INC(txrequests);
}
/*
* Resolve an IP address into an ethernet address - heavy version.
* Used internally by arpresolve().
@ -368,18 +419,20 @@ arprequest(struct ifnet *ifp, const struct in_addr *sip,
* Note that m_freem() handles NULL.
*/
static int
arpresolve_full(struct ifnet *ifp, int is_gw, int create, struct mbuf *m,
arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m,
const struct sockaddr *dst, u_char *desten, uint32_t *pflags)
{
struct llentry *la = NULL, *la_tmp;
struct mbuf *curr = NULL;
struct mbuf *next = NULL;
int error, renew;
char *lladdr;
int ll_len;
if (pflags != NULL)
*pflags = 0;
if (create == 0) {
if ((flags & LLE_CREATE) == 0) {
IF_AFDATA_RLOCK(ifp);
la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
IF_AFDATA_RUNLOCK(ifp);
@ -413,7 +466,14 @@ arpresolve_full(struct ifnet *ifp, int is_gw, int create, struct mbuf *m,
if ((la->la_flags & LLE_VALID) &&
((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
bcopy(&la->ll_addr, desten, ifp->if_addrlen);
if (flags & LLE_ADDRONLY) {
lladdr = la->ll_addr;
ll_len = ifp->if_addrlen;
} else {
lladdr = la->r_linkdata;
ll_len = la->r_hdrlen;
}
bcopy(lladdr, desten, ll_len);
/* Check if we have feedback request from arptimer() */
if (la->r_skip_req != 0) {
@ -485,15 +545,31 @@ arpresolve_full(struct ifnet *ifp, int is_gw, int create, struct mbuf *m,
/*
* Resolve an IP address into an ethernet address.
*/
int
arpresolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
char *desten, uint32_t *pflags)
{
int error;
flags |= LLE_ADDRONLY;
error = arpresolve_full(ifp, 0, flags, NULL, dst, desten, pflags);
return (error);
}
/*
* Lookups link header based on an IP address.
* On input:
* ifp is the interface we use
* is_gw != 0 if @dst represents gateway to some destination
* m is the mbuf. May be NULL if we don't have a packet.
* dst is the next hop,
* desten is the storage to put LL address.
* desten is the storage to put LL header.
* flags returns subset of lle flags: LLE_VALID | LLE_IFADDR
*
* On success, desten and flags are filled in and the function returns 0;
* On success, full/partial link header and flags are filled in and
* the function returns 0.
* If the packet must be held pending resolution, we return EWOULDBLOCK
* On other errors, we return the corresponding error code.
* Note that m_freem() handles NULL.
@ -525,7 +601,7 @@ arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
la = lla_lookup(LLTABLE(ifp), LLE_UNLOCKED, dst);
if (la != NULL && (la->r_flags & RLLE_VALID) != 0) {
/* Entry found, let's copy lle info */
bcopy(&la->ll_addr, desten, ifp->if_addrlen);
bcopy(la->r_linkdata, desten, la->r_hdrlen);
if (pflags != NULL)
*pflags = LLE_VALID | (la->r_flags & RLLE_IFADDR);
/* Check if we have feedback request from arptimer() */
@ -539,7 +615,8 @@ arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
}
IF_AFDATA_RUNLOCK(ifp);
return (arpresolve_full(ifp, is_gw, 1, m, dst, desten, pflags));
return (arpresolve_full(ifp, is_gw, la == NULL ? LLE_CREATE : 0, m, dst,
desten, pflags));
}
/*
@ -683,6 +760,11 @@ in_arpinput(struct mbuf *m)
struct sockaddr_in sin;
struct sockaddr *dst;
struct nhop4_basic nh4;
uint8_t linkhdr[LLE_MAX_LINKHDR];
struct route ro;
size_t linkhdrsize;
int lladdr_off;
int error;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
@ -850,8 +932,14 @@ in_arpinput(struct mbuf *m)
else if (itaddr.s_addr == myaddr.s_addr) {
/*
* Request/reply to our address, but no lle exists yet.
* Try to create new llentry.
* Calculate full link prepend to use in lle.
*/
linkhdrsize = sizeof(linkhdr);
if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
&linkhdrsize, &lladdr_off) != 0)
goto reply;
/* Allocate new entry */
la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
if (la == NULL) {
@ -863,7 +951,8 @@ in_arpinput(struct mbuf *m)
*/
goto reply;
}
lltable_set_entry_addr(ifp, la, ar_sha(ah));
lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
lladdr_off);
IF_AFDATA_WLOCK(ifp);
LLE_WLOCK(la);
@ -921,7 +1010,7 @@ in_arpinput(struct mbuf *m)
if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
(void)memcpy(ar_sha(ah), &lle->ll_addr, ah->ar_hln);
(void)memcpy(ar_sha(ah), lle->ll_addr, ah->ar_hln);
LLE_RUNLOCK(lle);
} else {
@ -991,8 +1080,29 @@ in_arpinput(struct mbuf *m)
m->m_pkthdr.rcvif = NULL;
sa.sa_family = AF_ARP;
sa.sa_len = 2;
/* Calculate link header for sending frame */
bzero(&ro, sizeof(ro));
linkhdrsize = sizeof(linkhdr);
error = arp_fillheader(ifp, ah, 0, linkhdr, &linkhdrsize);
/*
* arp_fillheader() may fail due to lack of support inside encap request
* routing. This is not necessary an error, AF_ARP can/should be handled
* by if_output().
*/
if (error != 0 && error != EAFNOSUPPORT) {
ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
if_name(ifp), error);
return;
}
ro.ro_prepend = linkhdr;
ro.ro_plen = linkhdrsize;
ro.ro_flags = 0;
m_clrprotoflags(m); /* Avoid confusing lower layers. */
(*ifp->if_output)(ifp, m, &sa, NULL);
(*ifp->if_output)(ifp, m, &sa, &ro);
ARPSTAT_INC(txreplies);
return;
@ -1011,6 +1121,9 @@ arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp
{
struct sockaddr sa;
struct mbuf *m_hold, *m_hold_next;
uint8_t linkhdr[LLE_MAX_LINKHDR];
size_t linkhdrsize;
int lladdr_off;
LLE_WLOCK_ASSERT(la);
@ -1027,7 +1140,7 @@ arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp
return;
}
if ((la->la_flags & LLE_VALID) &&
bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) {
bcmp(ar_sha(ah), la->ll_addr, ifp->if_addrlen)) {
if (la->la_flags & LLE_STATIC) {
LLE_WUNLOCK(la);
if (log_arp_permanent_modify)
@ -1050,31 +1163,19 @@ arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp
}
}
/* Calculate full link prepend to use in lle */
linkhdrsize = sizeof(linkhdr);
if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
&linkhdrsize, &lladdr_off) != 0)
return;
/* Check if something has changed */
if (memcmp(&la->ll_addr, ar_sha(ah), ifp->if_addrlen) != 0 ||
if (memcmp(la->r_linkdata, linkhdr, linkhdrsize) != 0 ||
(la->la_flags & LLE_VALID) == 0) {
/* Perform real LLE update */
/* use afdata WLOCK to update fields */
LLE_ADDREF(la);
LLE_WUNLOCK(la);
IF_AFDATA_WLOCK(ifp);
LLE_WLOCK(la);
/*
* Since we droppped LLE lock, other thread might have deleted
* this lle. Check and return
*/
if ((la->la_flags & LLE_DELETED) != 0) {
IF_AFDATA_WUNLOCK(ifp);
LLE_FREE_LOCKED(la);
/* Try to perform LLE update */
if (lltable_try_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
lladdr_off) == 0)
return;
}
/* Update data */
lltable_set_entry_addr(ifp, la, ar_sha(ah));
IF_AFDATA_WUNLOCK(ifp);
LLE_REMREF(la);
/* Clear fast path feedback request if set */
la->r_skip_req = 0;
@ -1215,10 +1316,12 @@ arp_handle_ifllchange(struct ifnet *ifp)
/*
* A handler for interface link layer address change event.
*/
static __noinline void
static void
arp_iflladdr(void *arg __unused, struct ifnet *ifp)
{
lltable_update_ifaddr(LLTABLE(ifp));
if ((ifp->if_flags & IFF_UP) != 0)
arp_handle_ifllchange(ifp);
}
@ -1231,5 +1334,8 @@ arp_init(void)
if (IS_DEFAULT_VNET(curvnet))
iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
if (IS_DEFAULT_VNET(curvnet))
iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
}
SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);

View File

@ -114,6 +114,8 @@ extern u_char ether_ipmulticast_max[ETHER_ADDR_LEN];
struct ifaddr;
int arpresolve_addr(struct ifnet *ifp, int flags,
const struct sockaddr *dst, char *desten, uint32_t *pflags);
int arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
const struct sockaddr *dst, u_char *desten, uint32_t *pflags);
void arprequest(struct ifnet *, const struct in_addr *,

View File

@ -1240,6 +1240,9 @@ in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr
const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
char linkhdr[LLE_MAX_LINKHDR];
size_t linkhdrsize;
int lladdr_off;
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
@ -1262,7 +1265,12 @@ in_lltable_alloc(struct lltable *llt, u_int flags, const struct sockaddr *l3addr
if (flags & LLE_STATIC)
lle->r_flags |= RLLE_VALID;
if ((flags & LLE_IFADDR) == LLE_IFADDR) {
lltable_set_entry_addr(ifp, lle, IF_LLADDR(ifp));
linkhdrsize = LLE_MAX_LINKHDR;
if (lltable_calc_llheader(ifp, AF_INET, IF_LLADDR(ifp),
linkhdr, &linkhdrsize, &lladdr_off) != 0)
return (NULL);
lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
lladdr_off);
lle->la_flags |= LLE_STATIC;
lle->r_flags |= (RLLE_VALID | RLLE_IFADDR);
}
@ -1349,7 +1357,7 @@ in_lltable_dump_entry(struct lltable *llt, struct llentry *lle,
sdl->sdl_type = ifp->if_type;
if ((lle->la_flags & LLE_VALID) == LLE_VALID) {
sdl->sdl_alen = ifp->if_addrlen;
bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
bcopy(lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
} else {
sdl->sdl_alen = 0;
bzero(LLADDR(sdl), ifp->if_addrlen);

View File

@ -567,7 +567,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
RO_RTFREE(ro);
if (have_ia_ref)
ifa_free(&ia->ia_ifa);
ro->ro_lle = NULL;
ro->ro_prepend = NULL;
rte = NULL;
gw = dst;
ip = mtod(m, struct ip *);

View File

@ -428,7 +428,7 @@ toe_lle_event(void *arg __unused, struct llentry *lle, int evt)
KASSERT(lle->la_flags & LLE_VALID,
("%s: %p resolved but not valid?", __func__, lle));
lladdr = (uint8_t *)&lle->ll_addr;
lladdr = (uint8_t *)lle->ll_addr;
#ifdef VLAN_TAG
VLAN_TAG(ifp, &vtag);
#endif

View File

@ -2632,7 +2632,7 @@ icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
nd_opt->nd_opt_len = len >> 3;
lladdr = (char *)(nd_opt + 1);
bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen);
bcopy(ln->ll_addr, lladdr, ifp->if_addrlen);
p += len;
}
}

View File

@ -2245,6 +2245,9 @@ in6_lltable_alloc(struct lltable *llt, u_int flags,
const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
char linkhdr[LLE_MAX_LINKHDR];
size_t linkhdrsize;
int lladdr_off;
KASSERT(l3addr->sa_family == AF_INET6,
("sin_family %d", l3addr->sa_family));
@ -2265,7 +2268,12 @@ in6_lltable_alloc(struct lltable *llt, u_int flags,
}
lle->la_flags = flags;
if ((flags & LLE_IFADDR) == LLE_IFADDR) {
lltable_set_entry_addr(ifp, lle, IF_LLADDR(ifp));
linkhdrsize = LLE_MAX_LINKHDR;
if (lltable_calc_llheader(ifp, AF_INET6, IF_LLADDR(ifp),
linkhdr, &linkhdrsize, &lladdr_off) != 0)
return (NULL);
lltable_set_entry_addr(ifp, lle, linkhdr, linkhdrsize,
lladdr_off);
lle->la_flags |= LLE_STATIC;
}

View File

@ -375,9 +375,9 @@ extern const struct in6_addr in6addr_linklocal_allv2routers;
#if __BSD_VISIBLE
struct route_in6 {
struct rtentry *ro_rt;
struct llentry *ro_lle;
struct in6_addr *ro_ia6;
int ro_flags;
char *ro_prepend;
uint16_t ro_plen;
uint16_t ro_flags;
struct sockaddr_in6 ro_dst;
};
#endif

View File

@ -111,7 +111,7 @@ VNET_DEFINE(int, nd6_debug) = 1;
VNET_DEFINE(int, nd6_debug) = 0;
#endif
static eventhandler_tag lle_event_eh;
static eventhandler_tag lle_event_eh, iflladdr_event_eh;
/* for debugging? */
#if 0
@ -137,7 +137,7 @@ static void nd6_llinfo_timer(void *);
static void nd6_llinfo_settimer_locked(struct llentry *, long);
static void clear_llinfo_pqueue(struct llentry *);
static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int nd6_resolve_slow(struct ifnet *, struct mbuf *,
static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
const struct sockaddr_in6 *, u_char *, uint32_t *);
static int nd6_need_cache(struct ifnet *);
@ -188,7 +188,7 @@ nd6_lle_event(void *arg __unused, struct llentry *lle, int evt)
gw.sdl_index = ifp->if_index;
gw.sdl_type = ifp->if_type;
if (evt == LLENTRY_RESOLVED)
bcopy(&lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst;
rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw;
rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY;
@ -196,6 +196,16 @@ nd6_lle_event(void *arg __unused, struct llentry *lle, int evt)
type == RTM_ADD ? RTF_UP: 0), 0, RT_DEFAULT_FIB);
}
/*
* A handler for interface link layer address change event.
*/
static void
nd6_iflladdr(void *arg __unused, struct ifnet *ifp)
{
lltable_update_ifaddr(LLTABLE6(ifp));
}
void
nd6_init(void)
{
@ -211,9 +221,12 @@ nd6_init(void)
nd6_slowtimo, curvnet);
nd6_dad_init();
if (IS_DEFAULT_VNET(curvnet))
if (IS_DEFAULT_VNET(curvnet)) {
lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event,
NULL, EVENTHANDLER_PRI_ANY);
iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event,
nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
}
}
#ifdef VIMAGE
@ -223,8 +236,10 @@ nd6_destroy()
callout_drain(&V_nd6_slowtimo_ch);
callout_drain(&V_nd6_timer_ch);
if (IS_DEFAULT_VNET(curvnet))
if (IS_DEFAULT_VNET(curvnet)) {
EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
}
}
#endif
@ -1844,6 +1859,9 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
uint16_t router = 0;
struct sockaddr_in6 sin6;
struct mbuf *chain = NULL;
u_char linkhdr[LLE_MAX_LINKHDR];
size_t linkhdrsize;
int lladdr_off;
IF_AFDATA_UNLOCK_ASSERT(ifp);
@ -1878,8 +1896,15 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
* Since we already know all the data for the new entry,
* fill it before insertion.
*/
if (lladdr != NULL)
lltable_set_entry_addr(ifp, ln, lladdr);
if (lladdr != NULL) {
linkhdrsize = sizeof(linkhdr);
if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
linkhdr, &linkhdrsize, &lladdr_off) != 0)
return;
lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
lladdr_off);
}
IF_AFDATA_WLOCK(ifp);
LLE_WLOCK(ln);
/* Prefer any existing lle over newly-created one */
@ -1911,7 +1936,7 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
if (olladdr && lladdr) {
llchange = bcmp(lladdr, &ln->ll_addr,
llchange = bcmp(lladdr, ln->ll_addr,
ifp->if_addrlen);
} else if (!olladdr && lladdr)
llchange = 1;
@ -1937,7 +1962,13 @@ nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
* Record source link-layer address
* XXX is it dependent to ifp->if_type?
*/
if (lltable_try_set_entry_addr(ifp, ln, lladdr) == 0) {
linkhdrsize = sizeof(linkhdr);
if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
linkhdr, &linkhdrsize, &lladdr_off) != 0)
return;
if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
lladdr_off) == 0) {
/* Entry was deleted */
return;
}
@ -2093,8 +2124,8 @@ nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
}
/*
* Do L2 address resolution for @sa_dst address. Stores found
* address in @desten buffer. Copy of lle ln_flags can be also
* Lookup link headerfor @sa_dst address. Stores found
* data in @desten buffer. Copy of lle ln_flags can be also
* saved in @pflags if @pflags is non-NULL.
*
* If destination LLE does not exists or lle state modification
@ -2144,7 +2175,7 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
ln = nd6_lookup(&dst6->sin6_addr, LLE_UNLOCKED, ifp);
if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
/* Entry found, let's copy lle info */
bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
bcopy(ln->r_linkdata, desten, ln->r_hdrlen);
if (pflags != NULL)
*pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR);
/* Check if we have feedback request from nd6 timer */
@ -2159,7 +2190,7 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
}
IF_AFDATA_RUNLOCK(ifp);
return (nd6_resolve_slow(ifp, m, dst6, desten, pflags));
return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags));
}
@ -2175,12 +2206,13 @@ nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
* Set noinline to be dtrace-friendly
*/
static __noinline int
nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m,
nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags)
{
struct llentry *lle = NULL, *lle_tmp;
struct in6_addr *psrc, src;
int send_ns;
int send_ns, ll_len;
char *lladdr;
/*
* Address resolution or Neighbor Unreachability Detection
@ -2252,7 +2284,14 @@ nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m,
* send the packet.
*/
if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
bcopy(&lle->ll_addr, desten, ifp->if_addrlen);
if (flags & LLE_ADDRONLY) {
lladdr = lle->ll_addr;
ll_len = ifp->if_addrlen;
} else {
lladdr = lle->r_linkdata;
ll_len = lle->r_hdrlen;
}
bcopy(lladdr, desten, ll_len);
if (pflags != NULL)
*pflags = lle->la_flags;
LLE_WUNLOCK(lle);
@ -2312,6 +2351,27 @@ nd6_resolve_slow(struct ifnet *ifp, struct mbuf *m,
return (EWOULDBLOCK);
}
/*
* Do L2 address resolution for @sa_dst address. Stores found
* address in @desten buffer. Copy of lle ln_flags can be also
* saved in @pflags if @pflags is non-NULL.
*
* Return values:
* - 0 on success (address copied to buffer).
* - EWOULDBLOCK (no local error, but address is still unresolved)
* - other errors (alloc failure, etc)
*/
int
nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
char *desten, uint32_t *pflags)
{
int error;
flags |= LLE_ADDRONLY;
error = nd6_resolve_slow(ifp, flags, NULL,
(const struct sockaddr_in6 *)dst, desten, pflags);
return (error);
}
int
nd6_flush_holdchain(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain,

View File

@ -410,6 +410,8 @@ void nd6_setmtu(struct ifnet *);
void nd6_llinfo_setstate(struct llentry *lle, int newstate);
void nd6_timer(void *);
void nd6_purge(struct ifnet *);
int nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
char *desten, uint32_t *pflags);
int nd6_resolve(struct ifnet *, int, struct mbuf *,
const struct sockaddr *, u_char *, uint32_t *);
int nd6_ioctl(u_long, caddr_t, struct ifnet *);

View File

@ -643,6 +643,9 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
union nd_opts ndopts;
struct mbuf *chain = NULL;
struct sockaddr_in6 sin6;
u_char linkhdr[LLE_MAX_LINKHDR];
size_t linkhdrsize;
int lladdr_off;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
if (ip6->ip6_hlim != 255) {
@ -765,7 +768,13 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
/*
* Record link-layer address, and update the state.
*/
if (lltable_try_set_entry_addr(ifp, ln, lladdr) == 0) {
linkhdrsize = sizeof(linkhdr);
if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
linkhdr, &linkhdrsize, &lladdr_off) != 0)
return;
if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
lladdr_off) == 0) {
ln = NULL;
goto freeit;
}
@ -792,7 +801,7 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
llchange = 0;
else {
if (ln->la_flags & LLE_VALID) {
if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen))
if (bcmp(lladdr, ln->ll_addr, ifp->if_addrlen))
llchange = 1;
else
llchange = 0;
@ -834,9 +843,12 @@ nd6_na_input(struct mbuf *m, int off, int icmp6len)
* Update link-local address, if any.
*/
if (lladdr != NULL) {
int ret;
ret = lltable_try_set_entry_addr(ifp, ln,lladdr);
if (ret == 0) {
linkhdrsize = sizeof(linkhdr);
if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
linkhdr, &linkhdrsize, &lladdr_off) != 0)
goto freeit;
if (lltable_try_set_entry_addr(ifp, ln, linkhdr,
linkhdrsize, lladdr_off) == 0) {
ln = NULL;
goto freeit;
}

View File

@ -1257,15 +1257,15 @@ ipoib_output(struct ifnet *ifp, struct mbuf *m,
const struct sockaddr *dst, struct route *ro)
{
u_char edst[INFINIBAND_ALEN];
#if defined(INET) || defined(INET6)
struct llentry *lle = NULL;
#endif
struct rtentry *rt0 = NULL;
struct ipoib_header *eh;
int error = 0, is_gw = 0;
short type;
if (ro != NULL) {
if (!(m->m_flags & (M_BCAST | M_MCAST)))
lle = ro->ro_lle;
rt0 = ro->ro_rt;
if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0)
is_gw = 1;
@ -1291,7 +1291,7 @@ ipoib_output(struct ifnet *ifp, struct mbuf *m,
#ifdef INET
case AF_INET:
if (lle != NULL && (lle->la_flags & LLE_VALID))
memcpy(edst, &lle->ll_addr.mac8, sizeof(edst));
memcpy(edst, lle->ll_addr, sizeof(edst));
else if (m->m_flags & M_MCAST)
ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst);
else
@ -1329,7 +1329,7 @@ ipoib_output(struct ifnet *ifp, struct mbuf *m,
#ifdef INET6
case AF_INET6:
if (lle != NULL && (lle->la_flags & LLE_VALID))
memcpy(edst, &lle->ll_addr.mac8, sizeof(edst));
memcpy(edst, lle->ll_addr, sizeof(edst));
else if (m->m_flags & M_MCAST)
ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst);
else