Rework if_gif(4) to use new encap_lookup_t method to speedup lookup

of needed interface when many gif interfaces are present.

Remove rmlock from gif_softc, use epoch(9) and CK_LIST instead.
Move more AF-related code into AF-related locations.
Use hash table to speedup lookup of needed softc. Interfaces
with GIF_IGNORE_SOURCE flag are stored in plain CK_LIST.
Sysctl net.link.gif.parallel_tunnels is removed. The removal was planed
16 years ago, and actually it could work only for outbound direction.
Each protocol, that can be handled by if_gif(4) interface is registered
by separate encap handler, this helps avoid invoking the handler
for unrelated protocols (GRE, PIM, etc.).

This change allows dramatically improve performance when many gif(4)
interfaces are used.

Sponsored by:	Yandex LLC
This commit is contained in:
ae 2018-06-05 21:24:59 +00:00
parent 8066b881af
commit d1ee857bcf
5 changed files with 613 additions and 567 deletions

View File

@ -29,7 +29,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd September 10, 2015
.Dd June 5, 2018
.Dt GIF 4
.Os
.Sh NAME
@ -169,14 +169,6 @@ This behavior may be modified at runtime by setting the
variable
.Va net.link.gif.max_nesting
to the desired level of nesting.
Additionally,
.Nm
tunnels are restricted to one per pair of end points.
Parallel tunnels may be enabled by setting the
.Xr sysctl 8
variable
.Va net.link.gif.parallel_tunnels
to 1.
.Sh SEE ALSO
.Xr gre 4 ,
.Xr inet 4 ,

View File

@ -2,6 +2,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -39,7 +40,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@ -55,7 +55,6 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/conf.h>
#include <machine/cpu.h>
@ -85,8 +84,6 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip6.h>
#include <netinet6/ip6_ecn.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/ip6protosw.h>
#endif /* INET6 */
#include <netinet/ip_encap.h>
@ -98,32 +95,17 @@ __FBSDID("$FreeBSD$");
static const char gifname[] = "gif";
/*
* gif_mtx protects a per-vnet gif_softc_list.
*/
static VNET_DEFINE(struct mtx, gif_mtx);
#define V_gif_mtx VNET(gif_mtx)
static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list);
#define V_gif_softc_list VNET(gif_softc_list)
MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface");
static struct sx gif_ioctl_sx;
SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl");
#define GIF_LIST_LOCK_INIT(x) mtx_init(&V_gif_mtx, "gif_mtx", \
NULL, MTX_DEF)
#define GIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gif_mtx)
#define GIF_LIST_LOCK(x) mtx_lock(&V_gif_mtx)
#define GIF_LIST_UNLOCK(x) mtx_unlock(&V_gif_mtx)
void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af);
void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af);
void (*ng_gif_attach_p)(struct ifnet *ifp);
void (*ng_gif_detach_p)(struct ifnet *ifp);
static int gif_check_nesting(struct ifnet *, struct mbuf *);
static int gif_set_tunnel(struct ifnet *, struct sockaddr *,
struct sockaddr *);
static void gif_delete_tunnel(struct ifnet *);
static void gif_delete_tunnel(struct gif_softc *);
static int gif_ioctl(struct ifnet *, u_long, caddr_t);
static int gif_transmit(struct ifnet *, struct mbuf *);
static void gif_qflush(struct ifnet *);
@ -132,8 +114,6 @@ static void gif_clone_destroy(struct ifnet *);
static VNET_DEFINE(struct if_clone *, gif_cloner);
#define V_gif_cloner VNET(gif_cloner)
static int gifmodevent(module_t, int, void *);
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0,
"Generic Tunnel Interface");
@ -153,21 +133,6 @@ static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST;
SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(max_gif_nesting), 0, "Max nested tunnels");
/*
* By default, we disallow creation of multiple tunnels between the same
* pair of addresses. Some applications require this functionality so
* we allow control over this check here.
*/
#ifdef XBONEHACK
static VNET_DEFINE(int, parallel_tunnels) = 1;
#else
static VNET_DEFINE(int, parallel_tunnels) = 0;
#endif
#define V_parallel_tunnels VNET(parallel_tunnels)
SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0,
"Allow parallel tunnels?");
static int
gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
{
@ -176,20 +141,15 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO);
sc->gif_fibnum = curthread->td_proc->p_fibnum;
GIF2IFP(sc) = if_alloc(IFT_GIF);
GIF_LOCK_INIT(sc);
GIF2IFP(sc)->if_softc = sc;
if_initname(GIF2IFP(sc), gifname, unit);
GIF2IFP(sc)->if_addrlen = 0;
GIF2IFP(sc)->if_mtu = GIF_MTU;
GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
#if 0
/* turn off ingress filter */
GIF2IFP(sc)->if_flags |= IFF_LINK2;
#endif
GIF2IFP(sc)->if_ioctl = gif_ioctl;
GIF2IFP(sc)->if_transmit = gif_transmit;
GIF2IFP(sc)->if_qflush = gif_qflush;
GIF2IFP(sc)->if_transmit = gif_transmit;
GIF2IFP(sc)->if_qflush = gif_qflush;
GIF2IFP(sc)->if_output = gif_output;
GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
@ -198,9 +158,6 @@ gif_clone_create(struct if_clone *ifc, int unit, caddr_t params)
if (ng_gif_attach_p != NULL)
(*ng_gif_attach_p)(GIF2IFP(sc));
GIF_LIST_LOCK();
LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list);
GIF_LIST_UNLOCK();
return (0);
}
@ -211,10 +168,7 @@ gif_clone_destroy(struct ifnet *ifp)
sx_xlock(&gif_ioctl_sx);
sc = ifp->if_softc;
gif_delete_tunnel(ifp);
GIF_LIST_LOCK();
LIST_REMOVE(sc, gif_list);
GIF_LIST_UNLOCK();
gif_delete_tunnel(sc);
if (ng_gif_detach_p != NULL)
(*ng_gif_detach_p)(ifp);
bpfdetach(ifp);
@ -222,8 +176,8 @@ gif_clone_destroy(struct ifnet *ifp)
ifp->if_softc = NULL;
sx_xunlock(&gif_ioctl_sx);
GIF_WAIT();
if_free(ifp);
GIF_LOCK_DESTROY(sc);
free(sc, M_GIF);
}
@ -231,10 +185,14 @@ static void
vnet_gif_init(const void *unused __unused)
{
LIST_INIT(&V_gif_softc_list);
GIF_LIST_LOCK_INIT();
V_gif_cloner = if_clone_simple(gifname, gif_clone_create,
gif_clone_destroy, 0);
#ifdef INET
in_gif_init();
#endif
#ifdef INET6
in6_gif_init();
#endif
}
VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_gif_init, NULL);
@ -244,7 +202,12 @@ vnet_gif_uninit(const void *unused __unused)
{
if_clone_detach(V_gif_cloner);
GIF_LIST_LOCK_DESTROY();
#ifdef INET
in_gif_uninit();
#endif
#ifdef INET6
in6_gif_uninit();
#endif
}
VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_gif_uninit, NULL);
@ -272,65 +235,25 @@ static moduledata_t gif_mod = {
DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(if_gif, 1);
int
gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
struct gif_list *
gif_hashinit(void)
{
GIF_RLOCK_TRACKER;
const struct ip *ip;
struct gif_softc *sc;
int ret;
struct gif_list *hash;
int i;
sc = (struct gif_softc *)arg;
if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0)
return (0);
hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE,
M_GIF, M_WAITOK);
for (i = 0; i < GIF_HASH_SIZE; i++)
CK_LIST_INIT(&hash[i]);
ret = 0;
GIF_RLOCK(sc);
return (hash);
}
/* no physical address */
if (sc->gif_family == 0)
goto done;
void
gif_hashdestroy(struct gif_list *hash)
{
switch (proto) {
#ifdef INET
case IPPROTO_IPV4:
#endif
#ifdef INET6
case IPPROTO_IPV6:
#endif
case IPPROTO_ETHERIP:
break;
default:
goto done;
}
/* Bail on short packets */
M_ASSERTPKTHDR(m);
if (m->m_pkthdr.len < sizeof(struct ip))
goto done;
ip = mtod(m, const struct ip *);
switch (ip->ip_v) {
#ifdef INET
case 4:
if (sc->gif_family != AF_INET)
goto done;
ret = in_gif_encapcheck(m, off, proto, arg);
break;
#endif
#ifdef INET6
case 6:
if (m->m_pkthdr.len < sizeof(struct ip6_hdr))
goto done;
if (sc->gif_family != AF_INET6)
goto done;
ret = in6_gif_encapcheck(m, off, proto, arg);
break;
#endif
}
done:
GIF_RUNLOCK(sc);
return (ret);
free(hash, M_GIF);
}
static int
@ -357,6 +280,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m)
}
#endif
error = ENETDOWN;
GIF_RLOCK();
sc = ifp->if_softc;
if ((ifp->if_flags & IFF_MONITOR) != 0 ||
(ifp->if_flags & IFF_UP) == 0 ||
@ -444,6 +368,7 @@ gif_transmit(struct ifnet *ifp, struct mbuf *m)
err:
if (error)
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
GIF_RUNLOCK();
return (error);
}
@ -616,7 +541,8 @@ gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn)
break;
#endif
case AF_LINK:
n = sizeof(struct etherip_header) + sizeof(struct ether_header);
n = sizeof(struct etherip_header) +
sizeof(struct ether_header);
if (n > m->m_len)
m = m_pullup(m, n);
if (m == NULL)
@ -674,20 +600,11 @@ drop:
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
}
/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
int
static int
gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
GIF_RLOCK_TRACKER;
struct ifreq *ifr = (struct ifreq*)data;
struct sockaddr *dst, *src;
struct gif_softc *sc;
#ifdef INET
struct sockaddr_in *sin = NULL;
#endif
#ifdef INET6
struct sockaddr_in6 *sin6 = NULL;
#endif
u_int options;
int error;
@ -715,176 +632,25 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
error = 0;
switch (cmd) {
case SIOCSIFPHYADDR:
#ifdef INET6
case SIOCSIFPHYADDR_IN6:
#endif
error = EINVAL;
switch (cmd) {
#ifdef INET
case SIOCSIFPHYADDR:
src = (struct sockaddr *)
&(((struct in_aliasreq *)data)->ifra_addr);
dst = (struct sockaddr *)
&(((struct in_aliasreq *)data)->ifra_dstaddr);
break;
#endif
#ifdef INET6
case SIOCSIFPHYADDR_IN6:
src = (struct sockaddr *)
&(((struct in6_aliasreq *)data)->ifra_addr);
dst = (struct sockaddr *)
&(((struct in6_aliasreq *)data)->ifra_dstaddr);
break;
#endif
default:
goto bad;
}
/* sa_family must be equal */
if (src->sa_family != dst->sa_family ||
src->sa_len != dst->sa_len)
goto bad;
/* validate sa_len */
/* check sa_family looks sane for the cmd */
switch (src->sa_family) {
#ifdef INET
case AF_INET:
if (src->sa_len != sizeof(struct sockaddr_in))
goto bad;
if (cmd != SIOCSIFPHYADDR) {
error = EAFNOSUPPORT;
goto bad;
}
if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
satosin(dst)->sin_addr.s_addr == INADDR_ANY) {
error = EADDRNOTAVAIL;
goto bad;
}
break;
#endif
#ifdef INET6
case AF_INET6:
if (src->sa_len != sizeof(struct sockaddr_in6))
goto bad;
if (cmd != SIOCSIFPHYADDR_IN6) {
error = EAFNOSUPPORT;
goto bad;
}
error = EADDRNOTAVAIL;
if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)
||
IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr))
goto bad;
/*
* Check validity of the scope zone ID of the
* addresses, and convert it into the kernel
* internal form if necessary.
*/
error = sa6_embedscope(satosin6(src), 0);
if (error != 0)
goto bad;
error = sa6_embedscope(satosin6(dst), 0);
if (error != 0)
goto bad;
break;
#endif
default:
error = EAFNOSUPPORT;
goto bad;
}
error = gif_set_tunnel(ifp, src, dst);
break;
case SIOCDIFPHYADDR:
gif_delete_tunnel(ifp);
if (sc->gif_family == 0)
break;
gif_delete_tunnel(sc);
break;
#ifdef INET
case SIOCSIFPHYADDR:
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
error = in_gif_ioctl(sc, cmd, data);
break;
#endif
#ifdef INET6
case SIOCSIFPHYADDR_IN6:
case SIOCGIFPSRCADDR_IN6:
case SIOCGIFPDSTADDR_IN6:
#endif
if (sc->gif_family == 0) {
error = EADDRNOTAVAIL;
break;
}
GIF_RLOCK(sc);
switch (cmd) {
#ifdef INET
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
if (sc->gif_family != AF_INET) {
error = EADDRNOTAVAIL;
break;
}
sin = (struct sockaddr_in *)&ifr->ifr_addr;
memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
break;
#endif
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
case SIOCGIFPDSTADDR_IN6:
if (sc->gif_family != AF_INET6) {
error = EADDRNOTAVAIL;
break;
}
sin6 = (struct sockaddr_in6 *)
&(((struct in6_ifreq *)data)->ifr_addr);
memset(sin6, 0, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(*sin6);
break;
#endif
default:
error = EAFNOSUPPORT;
}
if (error == 0) {
switch (cmd) {
#ifdef INET
case SIOCGIFPSRCADDR:
sin->sin_addr = sc->gif_iphdr->ip_src;
break;
case SIOCGIFPDSTADDR:
sin->sin_addr = sc->gif_iphdr->ip_dst;
break;
#endif
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
sin6->sin6_addr = sc->gif_ip6hdr->ip6_src;
break;
case SIOCGIFPDSTADDR_IN6:
sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst;
break;
#endif
}
}
GIF_RUNLOCK(sc);
if (error != 0)
break;
switch (cmd) {
#ifdef INET
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
error = prison_if(curthread->td_ucred,
(struct sockaddr *)sin);
if (error != 0)
memset(sin, 0, sizeof(*sin));
break;
#endif
#ifdef INET6
case SIOCGIFPSRCADDR_IN6:
case SIOCGIFPDSTADDR_IN6:
error = prison_if(curthread->td_ucred,
(struct sockaddr *)sin6);
if (error == 0)
error = sa6_recoverscope(sin6);
if (error != 0)
memset(sin6, 0, sizeof(*sin6));
#endif
}
error = in6_gif_ioctl(sc, cmd, data);
break;
#endif
case SIOCGTUNFIB:
ifr->ifr_fib = sc->gif_fibnum;
break;
@ -908,171 +674,63 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
sizeof(options));
if (error)
break;
if (options & ~GIF_OPTMASK)
if (options & ~GIF_OPTMASK) {
error = EINVAL;
else
sc->gif_options = options;
break;
}
if (sc->gif_options != options) {
switch (sc->gif_family) {
#ifdef INET
case AF_INET:
error = in_gif_setopts(sc, options);
break;
#endif
#ifdef INET6
case AF_INET6:
error = in6_gif_setopts(sc, options);
break;
#endif
default:
/* No need to invoke AF-handler */
sc->gif_options = options;
}
}
break;
default:
error = EINVAL;
break;
}
if (error == 0 && sc->gif_family != 0) {
if (
#ifdef INET
cmd == SIOCSIFPHYADDR ||
#endif
#ifdef INET6
cmd == SIOCSIFPHYADDR_IN6 ||
#endif
0) {
ifp->if_drv_flags |= IFF_DRV_RUNNING;
if_link_state_change(ifp, LINK_STATE_UP);
}
}
bad:
sx_xunlock(&gif_ioctl_sx);
return (error);
}
static void
gif_detach(struct gif_softc *sc, int family)
gif_delete_tunnel(struct gif_softc *sc)
{
sx_assert(&gif_ioctl_sx, SA_XLOCKED);
if (sc->gif_ecookie != NULL) {
switch (family) {
#ifdef INET
case AF_INET:
ip_encap_detach(sc->gif_ecookie);
break;
#endif
#ifdef INET6
case AF_INET6:
ip6_encap_detach(sc->gif_ecookie);
break;
#endif
}
}
sc->gif_ecookie = NULL;
}
static int
gif_attach(struct gif_softc *sc, int af)
{
sx_assert(&gif_ioctl_sx, SA_XLOCKED);
switch (af) {
#ifdef INET
case AF_INET:
return (in_gif_attach(sc));
#endif
#ifdef INET6
case AF_INET6:
return (in6_gif_attach(sc));
#endif
}
return (EAFNOSUPPORT);
}
static int
gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst)
{
struct gif_softc *sc = ifp->if_softc;
struct gif_softc *tsc;
#ifdef INET
struct ip *ip;
#endif
#ifdef INET6
struct ip6_hdr *ip6;
#endif
void *hdr;
int error = 0;
if (sc == NULL)
return (ENXIO);
/* Disallow parallel tunnels unless instructed otherwise. */
if (V_parallel_tunnels == 0) {
GIF_LIST_LOCK();
LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) {
if (tsc == sc || tsc->gif_family != src->sa_family)
continue;
#ifdef INET
if (tsc->gif_family == AF_INET &&
tsc->gif_iphdr->ip_src.s_addr ==
satosin(src)->sin_addr.s_addr &&
tsc->gif_iphdr->ip_dst.s_addr ==
satosin(dst)->sin_addr.s_addr) {
error = EADDRNOTAVAIL;
GIF_LIST_UNLOCK();
goto bad;
}
#endif
#ifdef INET6
if (tsc->gif_family == AF_INET6 &&
IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src,
&satosin6(src)->sin6_addr) &&
IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst,
&satosin6(dst)->sin6_addr)) {
error = EADDRNOTAVAIL;
GIF_LIST_UNLOCK();
goto bad;
}
#endif
}
GIF_LIST_UNLOCK();
}
switch (src->sa_family) {
#ifdef INET
case AF_INET:
hdr = ip = malloc(sizeof(struct ip), M_GIF,
M_WAITOK | M_ZERO);
ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr;
ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr;
break;
#endif
#ifdef INET6
case AF_INET6:
hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF,
M_WAITOK | M_ZERO);
ip6->ip6_src = satosin6(src)->sin6_addr;
ip6->ip6_dst = satosin6(dst)->sin6_addr;
ip6->ip6_vfc = IPV6_VERSION;
break;
#endif
default:
return (EAFNOSUPPORT);
}
if (sc->gif_family != src->sa_family)
gif_detach(sc, sc->gif_family);
if (sc->gif_family == 0 ||
sc->gif_family != src->sa_family)
error = gif_attach(sc, src->sa_family);
GIF_WLOCK(sc);
if (sc->gif_family != 0)
if (sc->gif_family != 0) {
CK_LIST_REMOVE(sc, chain);
/* Wait until it become safe to free gif_hdr */
GIF_WAIT();
free(sc->gif_hdr, M_GIF);
sc->gif_family = src->sa_family;
sc->gif_hdr = hdr;
GIF_WUNLOCK(sc);
#if defined(INET) || defined(INET6)
bad:
#endif
if (error == 0 && sc->gif_family != 0) {
ifp->if_drv_flags |= IFF_DRV_RUNNING;
if_link_state_change(ifp, LINK_STATE_UP);
} else {
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
if_link_state_change(ifp, LINK_STATE_DOWN);
}
return (error);
}
static void
gif_delete_tunnel(struct ifnet *ifp)
{
struct gif_softc *sc = ifp->if_softc;
int family;
if (sc == NULL)
return;
GIF_WLOCK(sc);
family = sc->gif_family;
sc->gif_family = 0;
GIF_WUNLOCK(sc);
if (family != 0) {
gif_detach(sc, family);
free(sc->gif_hdr, M_GIF);
}
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
if_link_state_change(ifp, LINK_STATE_DOWN);
GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN);
}

View File

@ -5,6 +5,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -36,14 +37,9 @@
#define _NET_IF_GIF_H_
#ifdef _KERNEL
#include "opt_inet.h"
#include "opt_inet6.h"
#include <netinet/in.h>
struct ip;
struct ip6_hdr;
struct encaptab;
extern void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp,
int af);
@ -55,8 +51,6 @@ extern void (*ng_gif_detach_p)(struct ifnet *ifp);
struct gif_softc {
struct ifnet *gif_ifp;
struct rmlock gif_lock;
const struct encaptab *gif_ecookie;
int gif_family;
int gif_flags;
u_int gif_fibnum;
@ -65,28 +59,22 @@ struct gif_softc {
union {
void *hdr;
struct ip *iphdr;
#ifdef INET6
struct ip6_hdr *ip6hdr;
#endif
} gif_uhdr;
LIST_ENTRY(gif_softc) gif_list; /* all gif's are linked */
};
#define GIF2IFP(sc) ((sc)->gif_ifp)
#define GIF_LOCK_INIT(sc) rm_init(&(sc)->gif_lock, "gif softc")
#define GIF_LOCK_DESTROY(sc) rm_destroy(&(sc)->gif_lock)
#define GIF_RLOCK_TRACKER struct rm_priotracker gif_tracker
#define GIF_RLOCK(sc) rm_rlock(&(sc)->gif_lock, &gif_tracker)
#define GIF_RUNLOCK(sc) rm_runlock(&(sc)->gif_lock, &gif_tracker)
#define GIF_RLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_RLOCKED)
#define GIF_WLOCK(sc) rm_wlock(&(sc)->gif_lock)
#define GIF_WUNLOCK(sc) rm_wunlock(&(sc)->gif_lock)
#define GIF_WLOCK_ASSERT(sc) rm_assert(&(sc)->gif_lock, RA_WLOCKED)
CK_LIST_ENTRY(gif_softc) chain;
};
CK_LIST_HEAD(gif_list, gif_softc);
MALLOC_DECLARE(M_GIF);
#ifndef GIF_HASH_SIZE
#define GIF_HASH_SIZE (1 << 4)
#endif
#define GIF2IFP(sc) ((sc)->gif_ifp)
#define gif_iphdr gif_uhdr.iphdr
#define gif_hdr gif_uhdr.hdr
#ifdef INET6
#define gif_ip6hdr gif_uhdr.ip6hdr
#endif
#define GIF_MTU (1280) /* Default MTU */
#define GIF_MTU_MIN (1280) /* Minimum MTU */
@ -108,21 +96,29 @@ struct etherip_header {
/* mbuf adjust factor to force 32-bit alignment of IP header */
#define ETHERIP_ALIGN 2
#define GIF_RLOCK() epoch_enter_preempt(net_epoch_preempt)
#define GIF_RUNLOCK() epoch_exit_preempt(net_epoch_preempt)
#define GIF_WAIT() epoch_wait_preempt(net_epoch_preempt)
/* Prototypes */
struct gif_list *gif_hashinit(void);
void gif_hashdestroy(struct gif_list *);
void gif_input(struct mbuf *, struct ifnet *, int, uint8_t);
int gif_output(struct ifnet *, struct mbuf *, const struct sockaddr *,
struct route *);
int gif_encapcheck(const struct mbuf *, int, int, void *);
#ifdef INET
void in_gif_init(void);
void in_gif_uninit(void);
int in_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
int in_gif_encapcheck(const struct mbuf *, int, int, void *);
int in_gif_attach(struct gif_softc *);
#endif
#ifdef INET6
int in_gif_ioctl(struct gif_softc *, u_long, caddr_t);
int in_gif_setopts(struct gif_softc *, u_int);
void in6_gif_init(void);
void in6_gif_uninit(void);
int in6_gif_output(struct ifnet *, struct mbuf *, int, uint8_t);
int in6_gif_encapcheck(const struct mbuf *, int, int, void *);
int in6_gif_attach(struct gif_softc *);
#endif
int in6_gif_ioctl(struct gif_softc *, u_long, caddr_t);
int in6_gif_setopts(struct gif_softc *, u_int);
#endif /* _KERNEL */
#define GIFGOPTS _IOWR('i', 150, struct ifreq)

View File

@ -2,6 +2,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -38,9 +39,8 @@ __FBSDID("$FreeBSD$");
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
@ -75,15 +76,155 @@ static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL;
SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(ip_gif_ttl), 0, "Default TTL value for encapsulated packets");
/*
* We keep interfaces in a hash table using src+dst as key.
* Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list.
*/
static VNET_DEFINE(struct gif_list *, ipv4_hashtbl) = NULL;
static VNET_DEFINE(struct gif_list, ipv4_list) = CK_LIST_HEAD_INITIALIZER();
#define V_ipv4_hashtbl VNET(ipv4_hashtbl)
#define V_ipv4_list VNET(ipv4_list)
#define GIF_HASH(src, dst) (V_ipv4_hashtbl[\
in_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)])
#define GIF_HASH_SC(sc) GIF_HASH((sc)->gif_iphdr->ip_src.s_addr,\
(sc)->gif_iphdr->ip_dst.s_addr)
static uint32_t
in_gif_hashval(in_addr_t src, in_addr_t dst)
{
uint32_t ret;
ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
return (fnv_32_buf(&dst, sizeof(dst), ret));
}
static int
in_gif_checkdup(const struct gif_softc *sc, in_addr_t src, in_addr_t dst)
{
struct gif_softc *tmp;
if (sc->gif_family == AF_INET &&
sc->gif_iphdr->ip_src.s_addr == src &&
sc->gif_iphdr->ip_dst.s_addr == dst)
return (EEXIST);
CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) {
if (tmp == sc)
continue;
if (tmp->gif_iphdr->ip_src.s_addr == src &&
tmp->gif_iphdr->ip_dst.s_addr == dst)
return (EADDRNOTAVAIL);
}
return (0);
}
static void
in_gif_attach(struct gif_softc *sc)
{
if (sc->gif_options & GIF_IGNORE_SOURCE)
CK_LIST_INSERT_HEAD(&V_ipv4_list, sc, chain);
else
CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain);
}
int
in_gif_setopts(struct gif_softc *sc, u_int options)
{
/* NOTE: we are protected with gif_ioctl_sx lock */
MPASS(sc->gif_family == AF_INET);
MPASS(sc->gif_options != options);
if ((options & GIF_IGNORE_SOURCE) !=
(sc->gif_options & GIF_IGNORE_SOURCE)) {
CK_LIST_REMOVE(sc, chain);
sc->gif_options = options;
in_gif_attach(sc);
}
return (0);
}
int
in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data)
{
struct ifreq *ifr = (struct ifreq *)data;
struct sockaddr_in *dst, *src;
struct ip *ip;
int error;
/* NOTE: we are protected with gif_ioctl_sx lock */
error = EINVAL;
switch (cmd) {
case SIOCSIFPHYADDR:
src = &((struct in_aliasreq *)data)->ifra_addr;
dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
/* sanity checks */
if (src->sin_family != dst->sin_family ||
src->sin_family != AF_INET ||
src->sin_len != dst->sin_len ||
src->sin_len != sizeof(*src))
break;
if (src->sin_addr.s_addr == INADDR_ANY ||
dst->sin_addr.s_addr == INADDR_ANY) {
error = EADDRNOTAVAIL;
break;
}
if (V_ipv4_hashtbl == NULL)
V_ipv4_hashtbl = gif_hashinit();
error = in_gif_checkdup(sc, src->sin_addr.s_addr,
dst->sin_addr.s_addr);
if (error == EADDRNOTAVAIL)
break;
if (error == EEXIST) {
/* Addresses are the same. Just return. */
error = 0;
break;
}
ip = malloc(sizeof(*ip), M_GIF, M_WAITOK | M_ZERO);
ip->ip_src.s_addr = src->sin_addr.s_addr;
ip->ip_dst.s_addr = dst->sin_addr.s_addr;
if (sc->gif_family != 0) {
/* Detach existing tunnel first */
CK_LIST_REMOVE(sc, chain);
GIF_WAIT();
free(sc->gif_hdr, M_GIF);
/* XXX: should we notify about link state change? */
}
sc->gif_family = AF_INET;
sc->gif_iphdr = ip;
in_gif_attach(sc);
break;
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
if (sc->gif_family != AF_INET) {
error = EADDRNOTAVAIL;
break;
}
src = (struct sockaddr_in *)&ifr->ifr_addr;
memset(src, 0, sizeof(*src));
src->sin_family = AF_INET;
src->sin_len = sizeof(*src);
src->sin_addr = (cmd == SIOCGIFPSRCADDR) ?
sc->gif_iphdr->ip_src: sc->gif_iphdr->ip_dst;
error = prison_if(curthread->td_ucred, (struct sockaddr *)src);
if (error != 0)
memset(src, 0, sizeof(*src));
break;
}
return (error);
}
int
in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
struct ip *ip;
int len;
/* prepend new IP header */
MPASS(in_epoch());
len = sizeof(struct ip);
#ifndef __NO_STRICT_ALIGNMENT
if (proto == IPPROTO_ETHERIP)
@ -102,15 +243,9 @@ in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
}
#endif
ip = mtod(m, struct ip *);
GIF_RLOCK(sc);
if (sc->gif_family != AF_INET) {
m_freem(m);
GIF_RUNLOCK(sc);
return (ENETDOWN);
}
bcopy(sc->gif_iphdr, ip, sizeof(struct ip));
GIF_RUNLOCK(sc);
MPASS(sc->gif_family == AF_INET);
bcopy(sc->gif_iphdr, ip, sizeof(struct ip));
ip->ip_p = proto;
/* version will be set in ip_output() */
ip->ip_ttl = V_ip_gif_ttl;
@ -128,6 +263,7 @@ in_gif_input(struct mbuf *m, int off, int proto, void *arg)
struct ip *ip;
uint8_t ecn;
MPASS(in_epoch());
if (sc == NULL) {
m_freem(m);
KMOD_IPSTAT_INC(ips_nogif);
@ -146,61 +282,122 @@ in_gif_input(struct mbuf *m, int off, int proto, void *arg)
return (IPPROTO_DONE);
}
/*
* we know that we are in IFF_UP, outer address available, and outer family
* matched the physical addr family. see gif_encapcheck().
*/
int
in_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
static int
in_gif_lookup(const struct mbuf *m, int off, int proto, void **arg)
{
const struct ip *ip;
struct gif_softc *sc;
int ret;
/* sanity check done in caller */
sc = (struct gif_softc *)arg;
GIF_RLOCK_ASSERT(sc);
/* check for address match */
MPASS(in_epoch());
ip = mtod(m, const struct ip *);
if (sc->gif_iphdr->ip_src.s_addr != ip->ip_dst.s_addr)
/*
* NOTE: it is safe to iterate without any locking here, because softc
* can be reclaimed only when we are not within net_epoch_preempt
* section, but ip_encap lookup+input are executed in epoch section.
*/
ret = 0;
CK_LIST_FOREACH(sc, &GIF_HASH(ip->ip_dst.s_addr,
ip->ip_src.s_addr), chain) {
/*
* This is an inbound packet, its ip_dst is source address
* in softc.
*/
if (sc->gif_iphdr->ip_src.s_addr == ip->ip_dst.s_addr &&
sc->gif_iphdr->ip_dst.s_addr == ip->ip_src.s_addr) {
ret = ENCAP_DRV_LOOKUP;
goto done;
}
}
/*
* No exact match.
* Check the list of interfaces with GIF_IGNORE_SOURCE flag.
*/
CK_LIST_FOREACH(sc, &V_ipv4_list, chain) {
if (sc->gif_iphdr->ip_src.s_addr == ip->ip_dst.s_addr) {
ret = 32 + 8; /* src + proto */
goto done;
}
}
return (0);
done:
if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
return (0);
ret = 32 + 8; /* src + proto */
if (sc->gif_iphdr->ip_dst.s_addr != ip->ip_src.s_addr) {
if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
return (0);
} else
ret += 32;
/* ingress filters on outer source */
if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
struct nhop4_basic nh4;
struct in_addr dst;
dst = ip->ip_src;
if (fib4_lookup_nh_basic(sc->gif_fibnum, dst, 0, 0, &nh4) != 0)
return (0);
if (nh4.nh_ifp != m->m_pkthdr.rcvif)
return (0);
}
*arg = sc;
return (ret);
}
static const struct encap_config ipv4_encap_cfg = {
.proto = -1,
.min_length = sizeof(struct ip),
.exact_match = (sizeof(in_addr_t) << 4) + 8,
.check = gif_encapcheck,
.input = in_gif_input
static struct {
const struct encap_config encap;
const struct encaptab *cookie;
} ipv4_encap_cfg[] = {
{
.encap = {
.proto = IPPROTO_IPV4,
.min_length = 2 * sizeof(struct ip),
.exact_match = ENCAP_DRV_LOOKUP,
.lookup = in_gif_lookup,
.input = in_gif_input
},
},
#ifdef INET6
{
.encap = {
.proto = IPPROTO_IPV6,
.min_length = sizeof(struct ip) +
sizeof(struct ip6_hdr),
.exact_match = ENCAP_DRV_LOOKUP,
.lookup = in_gif_lookup,
.input = in_gif_input
},
},
#endif
{
.encap = {
.proto = IPPROTO_ETHERIP,
.min_length = sizeof(struct ip) +
sizeof(struct etherip_header) +
sizeof(struct ether_header),
.exact_match = ENCAP_DRV_LOOKUP,
.lookup = in_gif_lookup,
.input = in_gif_input
},
}
};
int
in_gif_attach(struct gif_softc *sc)
void
in_gif_init(void)
{
int i;
KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
sc->gif_ecookie = ip_encap_attach(&ipv4_encap_cfg, sc, M_WAITOK);
return (0);
if (!IS_DEFAULT_VNET(curvnet))
return;
for (i = 0; i < nitems(ipv4_encap_cfg); i++)
ipv4_encap_cfg[i].cookie = ip_encap_attach(
&ipv4_encap_cfg[i].encap, NULL, M_WAITOK);
}
void
in_gif_uninit(void)
{
int i;
if (IS_DEFAULT_VNET(curvnet)) {
for (i = 0; i < nitems(ipv4_encap_cfg); i++)
ip_encap_detach(ipv4_encap_cfg[i].cookie);
}
if (V_ipv4_hashtbl != NULL)
gif_hashdestroy(V_ipv4_hashtbl);
}

View File

@ -2,6 +2,7 @@
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* Copyright (c) 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -38,19 +39,18 @@ __FBSDID("$FreeBSD$");
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/queue.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
@ -60,18 +60,15 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#ifdef INET
#include <netinet/ip.h>
#include <netinet/ip_ecn.h>
#endif
#include <netinet/ip_encap.h>
#ifdef INET6
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_var.h>
#endif
#include <netinet/ip_ecn.h>
#ifdef INET6
#include <netinet6/scope6_var.h>
#include <netinet6/ip6_ecn.h>
#include <netinet6/in6_fib.h>
#endif
#include <net/if_gif.h>
@ -84,15 +81,167 @@ SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim,
CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_gif_hlim), 0,
"Default hop limit for encapsulated packets");
/*
* We keep interfaces in a hash table using src+dst as key.
* Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list.
*/
static VNET_DEFINE(struct gif_list *, ipv6_hashtbl) = NULL;
static VNET_DEFINE(struct gif_list, ipv6_list) = CK_LIST_HEAD_INITIALIZER();
#define V_ipv6_hashtbl VNET(ipv6_hashtbl)
#define V_ipv6_list VNET(ipv6_list)
#define GIF_HASH(src, dst) (V_ipv6_hashtbl[\
in6_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)])
#define GIF_HASH_SC(sc) GIF_HASH(&(sc)->gif_ip6hdr->ip6_src,\
&(sc)->gif_ip6hdr->ip6_dst)
static uint32_t
in6_gif_hashval(const struct in6_addr *src, const struct in6_addr *dst)
{
uint32_t ret;
ret = fnv_32_buf(src, sizeof(*src), FNV1_32_INIT);
return (fnv_32_buf(dst, sizeof(*dst), ret));
}
static int
in6_gif_checkdup(const struct gif_softc *sc, const struct in6_addr *src,
const struct in6_addr *dst)
{
struct gif_softc *tmp;
if (sc->gif_family == AF_INET6 &&
IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, src) &&
IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, dst))
return (EEXIST);
CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) {
if (tmp == sc)
continue;
if (IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_src, src) &&
IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_dst, dst))
return (EADDRNOTAVAIL);
}
return (0);
}
static void
in6_gif_attach(struct gif_softc *sc)
{
if (sc->gif_options & GIF_IGNORE_SOURCE)
CK_LIST_INSERT_HEAD(&V_ipv6_list, sc, chain);
else
CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain);
}
int
in6_gif_setopts(struct gif_softc *sc, u_int options)
{
/* NOTE: we are protected with gif_ioctl_sx lock */
MPASS(sc->gif_family == AF_INET6);
MPASS(sc->gif_options != options);
if ((options & GIF_IGNORE_SOURCE) !=
(sc->gif_options & GIF_IGNORE_SOURCE)) {
CK_LIST_REMOVE(sc, chain);
sc->gif_options = options;
in6_gif_attach(sc);
}
return (0);
}
int
in6_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data)
{
struct in6_ifreq *ifr = (struct in6_ifreq *)data;
struct sockaddr_in6 *dst, *src;
struct ip6_hdr *ip6;
int error;
/* NOTE: we are protected with gif_ioctl_sx lock */
error = EINVAL;
switch (cmd) {
case SIOCSIFPHYADDR_IN6:
src = &((struct in6_aliasreq *)data)->ifra_addr;
dst = &((struct in6_aliasreq *)data)->ifra_dstaddr;
/* sanity checks */
if (src->sin6_family != dst->sin6_family ||
src->sin6_family != AF_INET6 ||
src->sin6_len != dst->sin6_len ||
src->sin6_len != sizeof(*src))
break;
if (IN6_IS_ADDR_UNSPECIFIED(&src->sin6_addr) ||
IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr)) {
error = EADDRNOTAVAIL;
break;
}
/*
* Check validity of the scope zone ID of the
* addresses, and convert it into the kernel
* internal form if necessary.
*/
if ((error = sa6_embedscope(src, 0)) != 0 ||
(error = sa6_embedscope(dst, 0)) != 0)
break;
if (V_ipv6_hashtbl == NULL)
V_ipv6_hashtbl = gif_hashinit();
error = in6_gif_checkdup(sc, &src->sin6_addr,
&dst->sin6_addr);
if (error == EADDRNOTAVAIL)
break;
if (error == EEXIST) {
/* Addresses are the same. Just return. */
error = 0;
break;
}
ip6 = malloc(sizeof(*ip6), M_GIF, M_WAITOK | M_ZERO);
ip6->ip6_src = src->sin6_addr;
ip6->ip6_dst = dst->sin6_addr;
if (sc->gif_family != 0) {
/* Detach existing tunnel first */
CK_LIST_REMOVE(sc, chain);
GIF_WAIT();
free(sc->gif_hdr, M_GIF);
/* XXX: should we notify about link state change? */
}
sc->gif_family = AF_INET6;
sc->gif_ip6hdr = ip6;
in6_gif_attach(sc);
break;
case SIOCGIFPSRCADDR_IN6:
case SIOCGIFPDSTADDR_IN6:
if (sc->gif_family != AF_INET6) {
error = EADDRNOTAVAIL;
break;
}
src = (struct sockaddr_in6 *)&ifr->ifr_addr;
memset(src, 0, sizeof(*src));
src->sin6_family = AF_INET6;
src->sin6_len = sizeof(*src);
src->sin6_addr = (cmd == SIOCGIFPSRCADDR_IN6) ?
sc->gif_ip6hdr->ip6_src: sc->gif_ip6hdr->ip6_dst;
error = prison_if(curthread->td_ucred, (struct sockaddr *)src);
if (error == 0)
error = sa6_recoverscope(src);
if (error != 0)
memset(src, 0, sizeof(*src));
break;
}
return (error);
}
int
in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
{
GIF_RLOCK_TRACKER;
struct gif_softc *sc = ifp->if_softc;
struct ip6_hdr *ip6;
int len;
/* prepend new IP header */
MPASS(in_epoch());
len = sizeof(struct ip6_hdr);
#ifndef __NO_STRICT_ALIGNMENT
if (proto == IPPROTO_ETHERIP)
@ -112,14 +261,8 @@ in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn)
#endif
ip6 = mtod(m, struct ip6_hdr *);
GIF_RLOCK(sc);
if (sc->gif_family != AF_INET6) {
m_freem(m);
GIF_RUNLOCK(sc);
return (ENETDOWN);
}
MPASS(sc->gif_family == AF_INET6);
bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr));
GIF_RUNLOCK(sc);
ip6->ip6_flow |= htonl((uint32_t)ecn << 20);
ip6->ip6_nxt = proto;
@ -140,6 +283,7 @@ in6_gif_input(struct mbuf *m, int off, int proto, void *arg)
struct ip6_hdr *ip6;
uint8_t ecn;
MPASS(in_epoch());
if (sc == NULL) {
m_freem(m);
IP6STAT_INC(ip6s_nogif);
@ -158,64 +302,123 @@ in6_gif_input(struct mbuf *m, int off, int proto, void *arg)
return (IPPROTO_DONE);
}
/*
* we know that we are in IFF_UP, outer address available, and outer family
* matched the physical addr family. see gif_encapcheck().
*/
int
in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
static int
in6_gif_lookup(const struct mbuf *m, int off, int proto, void **arg)
{
const struct ip6_hdr *ip6;
struct gif_softc *sc;
int ret;
/* sanity check done in caller */
sc = (struct gif_softc *)arg;
GIF_RLOCK_ASSERT(sc);
MPASS(in_epoch());
/*
* Check for address match. Note that the check is for an incoming
* packet. We should compare the *source* address in our configuration
* and the *destination* address of the packet, and vice versa.
* NOTE: it is safe to iterate without any locking here, because softc
* can be reclaimed only when we are not within net_epoch_preempt
* section, but ip_encap lookup+input are executed in epoch section.
*/
ip6 = mtod(m, const struct ip6_hdr *);
if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst))
ret = 0;
CK_LIST_FOREACH(sc, &GIF_HASH(&ip6->ip6_dst, &ip6->ip6_src), chain) {
/*
* This is an inbound packet, its ip6_dst is source address
* in softc.
*/
if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src,
&ip6->ip6_dst) &&
IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst,
&ip6->ip6_src)) {
ret = ENCAP_DRV_LOOKUP;
goto done;
}
}
/*
* No exact match.
* Check the list of interfaces with GIF_IGNORE_SOURCE flag.
*/
CK_LIST_FOREACH(sc, &V_ipv6_list, chain) {
if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src,
&ip6->ip6_dst)) {
ret = 128 + 8; /* src + proto */
goto done;
}
}
return (0);
done:
if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0)
return (0);
ret = 128;
if (!IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) {
if ((sc->gif_options & GIF_IGNORE_SOURCE) == 0)
return (0);
} else
ret += 128;
/* ingress filters on outer source */
if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) {
struct nhop6_basic nh6;
/* XXX empty scope id */
if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, 0, 0, 0,
&nh6) != 0)
if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src,
ntohs(in6_getscope(&ip6->ip6_src)), 0, 0, &nh6) != 0)
return (0);
if (nh6.nh_ifp != m->m_pkthdr.rcvif)
return (0);
}
*arg = sc;
return (ret);
}
static const struct encap_config ipv6_encap_cfg = {
.proto = -1,
.min_length = sizeof(struct ip6_hdr),
.exact_match = (sizeof(struct in6_addr) << 4) + 8,
.check = gif_encapcheck,
.input = in6_gif_input
static struct {
const struct encap_config encap;
const struct encaptab *cookie;
} ipv6_encap_cfg[] = {
#ifdef INET
{
.encap = {
.proto = IPPROTO_IPV4,
.min_length = sizeof(struct ip6_hdr) +
sizeof(struct ip),
.exact_match = ENCAP_DRV_LOOKUP,
.lookup = in6_gif_lookup,
.input = in6_gif_input
},
},
#endif
{
.encap = {
.proto = IPPROTO_IPV6,
.min_length = 2 * sizeof(struct ip6_hdr),
.exact_match = ENCAP_DRV_LOOKUP,
.lookup = in6_gif_lookup,
.input = in6_gif_input
},
},
{
.encap = {
.proto = IPPROTO_ETHERIP,
.min_length = sizeof(struct ip6_hdr) +
sizeof(struct etherip_header) +
sizeof(struct ether_header),
.exact_match = ENCAP_DRV_LOOKUP,
.lookup = in6_gif_lookup,
.input = in6_gif_input
},
}
};
int
in6_gif_attach(struct gif_softc *sc)
void
in6_gif_init(void)
{
int i;
KASSERT(sc->gif_ecookie == NULL, ("gif_ecookie isn't NULL"));
sc->gif_ecookie = ip6_encap_attach(&ipv6_encap_cfg, sc, M_WAITOK);
return (0);
if (!IS_DEFAULT_VNET(curvnet))
return;
for (i = 0; i < nitems(ipv6_encap_cfg); i++)
ipv6_encap_cfg[i].cookie = ip6_encap_attach(
&ipv6_encap_cfg[i].encap, NULL, M_WAITOK);
}
void
in6_gif_uninit(void)
{
int i;
if (IS_DEFAULT_VNET(curvnet)) {
for (i = 0; i < nitems(ipv6_encap_cfg); i++)
ip6_encap_detach(ipv6_encap_cfg[i].cookie);
}
if (V_ipv6_hashtbl != NULL)
gif_hashdestroy(V_ipv6_hashtbl);
}