2014-11-07 19:13:19 +00:00
|
|
|
/*-
|
2018-06-14 14:53:24 +00:00
|
|
|
* Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
|
2014-11-07 19:13:19 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
2018-06-14 14:53:24 +00:00
|
|
|
#include <sys/systm.h>
|
2014-11-07 19:13:19 +00:00
|
|
|
#include <sys/jail.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/module.h>
|
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/priv.h>
|
|
|
|
#include <sys/proc.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/sockio.h>
|
|
|
|
#include <sys/sx.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/syslog.h>
|
|
|
|
|
|
|
|
#include <net/bpf.h>
|
|
|
|
#include <net/ethernet.h>
|
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/if_var.h>
|
|
|
|
#include <net/if_clone.h>
|
|
|
|
#include <net/if_types.h>
|
|
|
|
#include <net/netisr.h>
|
|
|
|
#include <net/vnet.h>
|
2015-05-12 07:37:27 +00:00
|
|
|
#include <net/route.h>
|
2014-11-07 19:13:19 +00:00
|
|
|
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/in_systm.h>
|
|
|
|
#include <netinet/in_var.h>
|
|
|
|
#include <netinet/ip.h>
|
|
|
|
#include <netinet/ip_var.h>
|
|
|
|
#include <netinet/ip_encap.h>
|
|
|
|
|
|
|
|
#include <machine/in_cksum.h>
|
|
|
|
#include <security/mac/mac_framework.h>
|
|
|
|
|
2017-04-11 08:56:18 +00:00
|
|
|
#define MEMTU (1500 - sizeof(struct mobhdr))
|
2014-11-07 19:13:19 +00:00
|
|
|
static const char mename[] = "me";
|
|
|
|
static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
|
|
|
|
/* Minimal forwarding header RFC 2004 */
|
|
|
|
struct mobhdr {
|
|
|
|
uint8_t mob_proto; /* protocol */
|
|
|
|
uint8_t mob_flags; /* flags */
|
|
|
|
#define MOB_FLAGS_SP 0x80 /* source present */
|
|
|
|
uint16_t mob_csum; /* header checksum */
|
|
|
|
struct in_addr mob_dst; /* original destination address */
|
|
|
|
struct in_addr mob_src; /* original source addr (optional) */
|
|
|
|
} __packed;
|
|
|
|
|
|
|
|
struct me_softc {
|
|
|
|
struct ifnet *me_ifp;
|
|
|
|
u_int me_fibnum;
|
|
|
|
struct in_addr me_src;
|
|
|
|
struct in_addr me_dst;
|
2018-06-14 14:53:24 +00:00
|
|
|
|
|
|
|
CK_LIST_ENTRY(me_softc) chain;
|
2018-10-21 18:18:37 +00:00
|
|
|
CK_LIST_ENTRY(me_softc) srchash;
|
2014-11-07 19:13:19 +00:00
|
|
|
};
|
2018-06-14 14:53:24 +00:00
|
|
|
CK_LIST_HEAD(me_list, me_softc);
|
2014-11-07 19:13:19 +00:00
|
|
|
#define ME2IFP(sc) ((sc)->me_ifp)
|
|
|
|
#define ME_READY(sc) ((sc)->me_src.s_addr != 0)
|
2018-10-21 18:18:37 +00:00
|
|
|
#define ME_RLOCK_TRACKER struct epoch_tracker me_et
|
|
|
|
#define ME_RLOCK() epoch_enter_preempt(net_epoch_preempt, &me_et)
|
2018-07-04 02:47:16 +00:00
|
|
|
#define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &me_et)
|
2018-06-14 14:53:24 +00:00
|
|
|
#define ME_WAIT() epoch_wait_preempt(net_epoch_preempt)
|
|
|
|
|
|
|
|
#ifndef ME_HASH_SIZE
|
|
|
|
#define ME_HASH_SIZE (1 << 4)
|
|
|
|
#endif
|
2018-07-24 16:35:52 +00:00
|
|
|
VNET_DEFINE_STATIC(struct me_list *, me_hashtbl) = NULL;
|
2018-10-21 18:18:37 +00:00
|
|
|
VNET_DEFINE_STATIC(struct me_list *, me_srchashtbl) = NULL;
|
2018-06-14 14:53:24 +00:00
|
|
|
#define V_me_hashtbl VNET(me_hashtbl)
|
2018-10-21 18:18:37 +00:00
|
|
|
#define V_me_srchashtbl VNET(me_srchashtbl)
|
2018-06-14 14:53:24 +00:00
|
|
|
#define ME_HASH(src, dst) (V_me_hashtbl[\
|
|
|
|
me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)])
|
2018-10-21 18:18:37 +00:00
|
|
|
#define ME_SRCHASH(src) (V_me_srchashtbl[\
|
|
|
|
fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (ME_HASH_SIZE - 1)])
|
2018-06-14 14:53:24 +00:00
|
|
|
|
2014-11-07 19:13:19 +00:00
|
|
|
static struct sx me_ioctl_sx;
|
|
|
|
SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
|
|
|
|
|
|
|
|
static int me_clone_create(struct if_clone *, int, caddr_t);
|
|
|
|
static void me_clone_destroy(struct ifnet *);
|
2018-07-24 16:35:52 +00:00
|
|
|
VNET_DEFINE_STATIC(struct if_clone *, me_cloner);
|
2014-11-07 19:13:19 +00:00
|
|
|
#define V_me_cloner VNET(me_cloner)
|
|
|
|
|
|
|
|
static void me_qflush(struct ifnet *);
|
|
|
|
static int me_transmit(struct ifnet *, struct mbuf *);
|
|
|
|
static int me_ioctl(struct ifnet *, u_long, caddr_t);
|
|
|
|
static int me_output(struct ifnet *, struct mbuf *,
|
|
|
|
const struct sockaddr *, struct route *);
|
Rework IP encapsulation handling code.
Currently it has several disadvantages:
- it uses single mutex to protect internal structures. It is used by
data- and control- path, thus there are no parallelism at all.
- it uses single list to keep encap handlers for both INET and INET6
families.
- struct encaptab keeps unneeded information (src, dst, masks, protosw),
that isn't used by code in the source tree.
- matches are prioritized and when many tunneling interfaces are
registered, encapcheck handler of each interface is invoked for each
packet. The search takes O(n) for n interfaces. All this work is done
with exclusive lock held.
What this patch includes:
- the datapath is converted to be lockless using epoch(9) KPI.
- struct encaptab now linked using CK_LIST.
- all unused fields removed from struct encaptab. Several new fields
addedr: min_length is the minimum packet length, that encapsulation
handler expects to see; exact_match is maximum number of bits, that
can return an encapsulation handler, when it wants to consume a packet.
- IPv6 and IPv4 handlers are stored in separate lists;
- added new "encap_lookup_t" method, that will be used later. It is
targeted to speedup lookup of needed interface, when gif(4)/gre(4) have
many interfaces.
- the need to use protosw structure is eliminated. The only pr_input
method was used from this structure, so I don't see the need to keep
using it.
- encap_input_t method changed to avoid using mbuf tags to store softc
pointer. Now it is passed directly trough encap_input_t method.
encap_getarg() funtions is removed.
- all sockaddr structures and code that uses them removed. We don't have
any code in the tree that uses them. All consumers use encap_attach_func()
method, that relies on invoking of encapcheck() to determine the needed
handler.
- introduced struct encap_config, it contains parameters of encap handler
that is going to be registered by encap_attach() function.
- encap handlers are stored in lists ordered by exact_match value, thus
handlers that need more bits to match will be checked first, and if
encapcheck method returns exact_match value, the search will be stopped.
- all current consumers changed to use new KPI.
Reviewed by: mmacy
Sponsored by: Yandex LLC
Differential Revision: https://reviews.freebsd.org/D15617
2018-06-05 20:51:01 +00:00
|
|
|
static int me_input(struct mbuf *, int, int, void *);
|
2014-11-07 19:13:19 +00:00
|
|
|
|
2018-06-14 14:53:24 +00:00
|
|
|
static int me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t);
|
|
|
|
static void me_delete_tunnel(struct me_softc *);
|
2014-11-07 19:13:19 +00:00
|
|
|
|
|
|
|
SYSCTL_DECL(_net_link);
|
2020-02-26 14:26:36 +00:00
|
|
|
static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
|
2014-11-07 19:13:19 +00:00
|
|
|
"Minimal Encapsulation for IP (RFC 2004)");
|
|
|
|
#ifndef MAX_ME_NEST
|
|
|
|
#define MAX_ME_NEST 1
|
|
|
|
#endif
|
|
|
|
|
2018-07-24 16:35:52 +00:00
|
|
|
VNET_DEFINE_STATIC(int, max_me_nesting) = MAX_ME_NEST;
|
2014-11-07 19:13:19 +00:00
|
|
|
#define V_max_me_nesting VNET(max_me_nesting)
|
|
|
|
SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
|
|
|
|
&VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
|
|
|
|
|
2018-06-14 14:53:24 +00:00
|
|
|
static uint32_t
|
|
|
|
me_hashval(in_addr_t src, in_addr_t dst)
|
|
|
|
{
|
|
|
|
uint32_t ret;
|
|
|
|
|
|
|
|
ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
|
|
|
|
return (fnv_32_buf(&dst, sizeof(dst), ret));
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct me_list *
|
|
|
|
me_hashinit(void)
|
|
|
|
{
|
|
|
|
struct me_list *hash;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE,
|
|
|
|
M_IFME, M_WAITOK);
|
|
|
|
for (i = 0; i < ME_HASH_SIZE; i++)
|
|
|
|
CK_LIST_INIT(&hash[i]);
|
|
|
|
|
|
|
|
return (hash);
|
|
|
|
}
|
|
|
|
|
2014-11-07 19:13:19 +00:00
|
|
|
static void
|
|
|
|
vnet_me_init(const void *unused __unused)
|
|
|
|
{
|
2018-10-23 13:11:45 +00:00
|
|
|
|
2014-11-07 19:13:19 +00:00
|
|
|
V_me_cloner = if_clone_simple(mename, me_clone_create,
|
|
|
|
me_clone_destroy, 0);
|
|
|
|
}
|
|
|
|
VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
|
|
|
|
vnet_me_init, NULL);
|
|
|
|
|
|
|
|
static void
|
|
|
|
vnet_me_uninit(const void *unused __unused)
|
|
|
|
{
|
|
|
|
|
2018-10-21 18:18:37 +00:00
|
|
|
if (V_me_hashtbl != NULL) {
|
2018-06-14 14:53:24 +00:00
|
|
|
free(V_me_hashtbl, M_IFME);
|
2018-10-23 13:11:45 +00:00
|
|
|
V_me_hashtbl = NULL;
|
|
|
|
ME_WAIT();
|
2018-10-21 18:18:37 +00:00
|
|
|
free(V_me_srchashtbl, M_IFME);
|
|
|
|
}
|
2014-11-07 19:13:19 +00:00
|
|
|
if_clone_detach(V_me_cloner);
|
|
|
|
}
|
|
|
|
VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
|
|
|
|
vnet_me_uninit, NULL);
|
|
|
|
|
|
|
|
static int
|
|
|
|
me_clone_create(struct if_clone *ifc, int unit, caddr_t params)
|
|
|
|
{
|
|
|
|
struct me_softc *sc;
|
|
|
|
|
|
|
|
sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
|
|
|
|
sc->me_fibnum = curthread->td_proc->p_fibnum;
|
|
|
|
ME2IFP(sc) = if_alloc(IFT_TUNNEL);
|
|
|
|
ME2IFP(sc)->if_softc = sc;
|
|
|
|
if_initname(ME2IFP(sc), mename, unit);
|
|
|
|
|
2020-03-30 16:04:25 +00:00
|
|
|
ME2IFP(sc)->if_mtu = MEMTU;
|
2014-11-07 19:13:19 +00:00
|
|
|
ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
|
|
|
|
ME2IFP(sc)->if_output = me_output;
|
|
|
|
ME2IFP(sc)->if_ioctl = me_ioctl;
|
|
|
|
ME2IFP(sc)->if_transmit = me_transmit;
|
|
|
|
ME2IFP(sc)->if_qflush = me_qflush;
|
2015-10-03 09:15:23 +00:00
|
|
|
ME2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE;
|
|
|
|
ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
|
2014-11-07 19:13:19 +00:00
|
|
|
if_attach(ME2IFP(sc));
|
|
|
|
bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
me_clone_destroy(struct ifnet *ifp)
|
|
|
|
{
|
|
|
|
struct me_softc *sc;
|
|
|
|
|
|
|
|
sx_xlock(&me_ioctl_sx);
|
|
|
|
sc = ifp->if_softc;
|
2018-06-14 14:53:24 +00:00
|
|
|
me_delete_tunnel(sc);
|
2014-11-07 19:13:19 +00:00
|
|
|
bpfdetach(ifp);
|
|
|
|
if_detach(ifp);
|
|
|
|
ifp->if_softc = NULL;
|
|
|
|
sx_xunlock(&me_ioctl_sx);
|
|
|
|
|
2018-06-14 14:53:24 +00:00
|
|
|
ME_WAIT();
|
2014-11-07 19:13:19 +00:00
|
|
|
if_free(ifp);
|
|
|
|
free(sc, M_IFME);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
|
|
|
|
{
|
|
|
|
struct ifreq *ifr = (struct ifreq *)data;
|
|
|
|
struct sockaddr_in *src, *dst;
|
|
|
|
struct me_softc *sc;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
switch (cmd) {
|
|
|
|
case SIOCSIFMTU:
|
|
|
|
if (ifr->ifr_mtu < 576)
|
|
|
|
return (EINVAL);
|
2017-04-11 08:56:18 +00:00
|
|
|
ifp->if_mtu = ifr->ifr_mtu;
|
2014-11-07 19:13:19 +00:00
|
|
|
return (0);
|
|
|
|
case SIOCSIFADDR:
|
|
|
|
ifp->if_flags |= IFF_UP;
|
|
|
|
case SIOCSIFFLAGS:
|
|
|
|
case SIOCADDMULTI:
|
|
|
|
case SIOCDELMULTI:
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
sx_xlock(&me_ioctl_sx);
|
|
|
|
sc = ifp->if_softc;
|
|
|
|
if (sc == NULL) {
|
|
|
|
error = ENXIO;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
error = 0;
|
|
|
|
switch (cmd) {
|
|
|
|
case SIOCSIFPHYADDR:
|
2018-06-14 14:53:24 +00:00
|
|
|
src = &((struct in_aliasreq *)data)->ifra_addr;
|
|
|
|
dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
|
2014-11-07 19:13:19 +00:00
|
|
|
if (src->sin_family != dst->sin_family ||
|
|
|
|
src->sin_family != AF_INET ||
|
|
|
|
src->sin_len != dst->sin_len ||
|
|
|
|
src->sin_len != sizeof(struct sockaddr_in)) {
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (src->sin_addr.s_addr == INADDR_ANY ||
|
|
|
|
dst->sin_addr.s_addr == INADDR_ANY) {
|
|
|
|
error = EADDRNOTAVAIL;
|
|
|
|
break;
|
|
|
|
}
|
2018-06-14 14:53:24 +00:00
|
|
|
error = me_set_tunnel(sc, src->sin_addr.s_addr,
|
|
|
|
dst->sin_addr.s_addr);
|
2014-11-07 19:13:19 +00:00
|
|
|
break;
|
|
|
|
case SIOCDIFPHYADDR:
|
2018-06-14 14:53:24 +00:00
|
|
|
me_delete_tunnel(sc);
|
2014-11-07 19:13:19 +00:00
|
|
|
break;
|
|
|
|
case SIOCGIFPSRCADDR:
|
|
|
|
case SIOCGIFPDSTADDR:
|
|
|
|
if (!ME_READY(sc)) {
|
|
|
|
error = EADDRNOTAVAIL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
src = (struct sockaddr_in *)&ifr->ifr_addr;
|
|
|
|
memset(src, 0, sizeof(*src));
|
|
|
|
src->sin_family = AF_INET;
|
|
|
|
src->sin_len = sizeof(*src);
|
|
|
|
switch (cmd) {
|
|
|
|
case SIOCGIFPSRCADDR:
|
|
|
|
src->sin_addr = sc->me_src;
|
|
|
|
break;
|
|
|
|
case SIOCGIFPDSTADDR:
|
|
|
|
src->sin_addr = sc->me_dst;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
error = prison_if(curthread->td_ucred, sintosa(src));
|
|
|
|
if (error != 0)
|
|
|
|
memset(src, 0, sizeof(*src));
|
|
|
|
break;
|
2015-05-12 07:37:27 +00:00
|
|
|
case SIOCGTUNFIB:
|
|
|
|
ifr->ifr_fib = sc->me_fibnum;
|
|
|
|
break;
|
|
|
|
case SIOCSTUNFIB:
|
|
|
|
if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0)
|
|
|
|
break;
|
|
|
|
if (ifr->ifr_fib >= rt_numfibs)
|
|
|
|
error = EINVAL;
|
|
|
|
else
|
|
|
|
sc->me_fibnum = ifr->ifr_fib;
|
|
|
|
break;
|
2014-11-07 19:13:19 +00:00
|
|
|
default:
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
end:
|
|
|
|
sx_xunlock(&me_ioctl_sx);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2018-06-14 14:53:24 +00:00
|
|
|
me_lookup(const struct mbuf *m, int off, int proto, void **arg)
|
2014-11-07 19:13:19 +00:00
|
|
|
{
|
2018-06-14 14:53:24 +00:00
|
|
|
const struct ip *ip;
|
2014-11-07 19:13:19 +00:00
|
|
|
struct me_softc *sc;
|
2018-06-28 11:39:27 +00:00
|
|
|
|
|
|
|
if (V_me_hashtbl == NULL)
|
|
|
|
return (0);
|
2014-11-07 19:13:19 +00:00
|
|
|
|
2020-01-15 05:45:27 +00:00
|
|
|
NET_EPOCH_ASSERT();
|
2018-06-14 14:53:24 +00:00
|
|
|
ip = mtod(m, const struct ip *);
|
|
|
|
CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr,
|
|
|
|
ip->ip_src.s_addr), chain) {
|
2014-11-07 19:13:19 +00:00
|
|
|
if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
|
2018-06-14 14:53:24 +00:00
|
|
|
sc->me_dst.s_addr == ip->ip_src.s_addr) {
|
|
|
|
if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
|
|
|
|
return (0);
|
|
|
|
*arg = sc;
|
|
|
|
return (ENCAP_DRV_LOOKUP);
|
|
|
|
}
|
2014-11-07 19:13:19 +00:00
|
|
|
}
|
2018-06-14 14:53:24 +00:00
|
|
|
return (0);
|
2014-11-07 19:13:19 +00:00
|
|
|
}
|
|
|
|
|
2018-10-21 18:18:37 +00:00
|
|
|
/*
|
|
|
|
* Check that ingress address belongs to local host.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
me_set_running(struct me_softc *sc)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (in_localip(sc->me_src))
|
|
|
|
ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
|
|
|
|
else
|
|
|
|
ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* ifaddr_event handler.
|
|
|
|
* Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent
|
|
|
|
* source address spoofing.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
me_srcaddr(void *arg __unused, const struct sockaddr *sa,
|
|
|
|
int event __unused)
|
|
|
|
{
|
|
|
|
const struct sockaddr_in *sin;
|
|
|
|
struct me_softc *sc;
|
|
|
|
|
2018-10-23 13:11:45 +00:00
|
|
|
/* Check that VNET is ready */
|
|
|
|
if (V_me_hashtbl == NULL)
|
2018-10-21 18:18:37 +00:00
|
|
|
return;
|
|
|
|
|
2020-01-15 05:45:27 +00:00
|
|
|
NET_EPOCH_ASSERT();
|
2018-10-21 18:18:37 +00:00
|
|
|
sin = (const struct sockaddr_in *)sa;
|
|
|
|
CK_LIST_FOREACH(sc, &ME_SRCHASH(sin->sin_addr.s_addr), srchash) {
|
|
|
|
if (sc->me_src.s_addr != sin->sin_addr.s_addr)
|
|
|
|
continue;
|
|
|
|
me_set_running(sc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-07 19:13:19 +00:00
|
|
|
static int
|
2018-06-14 14:53:24 +00:00
|
|
|
me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst)
|
2014-11-07 19:13:19 +00:00
|
|
|
{
|
2018-06-14 14:53:24 +00:00
|
|
|
struct me_softc *tmp;
|
2014-11-07 19:13:19 +00:00
|
|
|
|
|
|
|
sx_assert(&me_ioctl_sx, SA_XLOCKED);
|
2018-06-14 14:53:24 +00:00
|
|
|
|
2018-10-21 18:18:37 +00:00
|
|
|
if (V_me_hashtbl == NULL) {
|
2018-06-14 14:53:24 +00:00
|
|
|
V_me_hashtbl = me_hashinit();
|
2018-10-21 18:18:37 +00:00
|
|
|
V_me_srchashtbl = me_hashinit();
|
|
|
|
}
|
2018-06-14 14:53:24 +00:00
|
|
|
|
|
|
|
if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) {
|
|
|
|
if (tmp == sc)
|
2014-11-07 19:13:19 +00:00
|
|
|
continue;
|
2018-06-14 14:53:24 +00:00
|
|
|
if (tmp->me_src.s_addr == src &&
|
|
|
|
tmp->me_dst.s_addr == dst)
|
2014-11-07 19:13:19 +00:00
|
|
|
return (EADDRNOTAVAIL);
|
2015-10-03 09:15:23 +00:00
|
|
|
}
|
2018-06-14 14:53:24 +00:00
|
|
|
|
|
|
|
me_delete_tunnel(sc);
|
|
|
|
sc->me_dst.s_addr = dst;
|
|
|
|
sc->me_src.s_addr = src;
|
|
|
|
CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain);
|
2018-10-21 18:18:37 +00:00
|
|
|
CK_LIST_INSERT_HEAD(&ME_SRCHASH(src), sc, srchash);
|
2018-06-14 14:53:24 +00:00
|
|
|
|
2018-10-21 18:18:37 +00:00
|
|
|
me_set_running(sc);
|
2018-06-14 14:53:24 +00:00
|
|
|
if_link_state_change(ME2IFP(sc), LINK_STATE_UP);
|
2014-11-07 19:13:19 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2018-06-14 14:53:24 +00:00
|
|
|
me_delete_tunnel(struct me_softc *sc)
|
2014-11-07 19:13:19 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
sx_assert(&me_ioctl_sx, SA_XLOCKED);
|
2018-06-14 14:53:24 +00:00
|
|
|
if (ME_READY(sc)) {
|
|
|
|
CK_LIST_REMOVE(sc, chain);
|
2018-10-21 18:18:37 +00:00
|
|
|
CK_LIST_REMOVE(sc, srchash);
|
2018-06-14 14:53:24 +00:00
|
|
|
ME_WAIT();
|
|
|
|
|
|
|
|
sc->me_src.s_addr = 0;
|
|
|
|
sc->me_dst.s_addr = 0;
|
|
|
|
ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
|
|
|
|
if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN);
|
|
|
|
}
|
2014-11-07 19:13:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static uint16_t
|
|
|
|
me_in_cksum(uint16_t *p, int nwords)
|
|
|
|
{
|
|
|
|
uint32_t sum = 0;
|
|
|
|
|
|
|
|
while (nwords-- > 0)
|
|
|
|
sum += *p++;
|
|
|
|
sum = (sum >> 16) + (sum & 0xffff);
|
|
|
|
sum += (sum >> 16);
|
|
|
|
return (~sum);
|
|
|
|
}
|
|
|
|
|
Rework IP encapsulation handling code.
Currently it has several disadvantages:
- it uses single mutex to protect internal structures. It is used by
data- and control- path, thus there are no parallelism at all.
- it uses single list to keep encap handlers for both INET and INET6
families.
- struct encaptab keeps unneeded information (src, dst, masks, protosw),
that isn't used by code in the source tree.
- matches are prioritized and when many tunneling interfaces are
registered, encapcheck handler of each interface is invoked for each
packet. The search takes O(n) for n interfaces. All this work is done
with exclusive lock held.
What this patch includes:
- the datapath is converted to be lockless using epoch(9) KPI.
- struct encaptab now linked using CK_LIST.
- all unused fields removed from struct encaptab. Several new fields
addedr: min_length is the minimum packet length, that encapsulation
handler expects to see; exact_match is maximum number of bits, that
can return an encapsulation handler, when it wants to consume a packet.
- IPv6 and IPv4 handlers are stored in separate lists;
- added new "encap_lookup_t" method, that will be used later. It is
targeted to speedup lookup of needed interface, when gif(4)/gre(4) have
many interfaces.
- the need to use protosw structure is eliminated. The only pr_input
method was used from this structure, so I don't see the need to keep
using it.
- encap_input_t method changed to avoid using mbuf tags to store softc
pointer. Now it is passed directly trough encap_input_t method.
encap_getarg() funtions is removed.
- all sockaddr structures and code that uses them removed. We don't have
any code in the tree that uses them. All consumers use encap_attach_func()
method, that relies on invoking of encapcheck() to determine the needed
handler.
- introduced struct encap_config, it contains parameters of encap handler
that is going to be registered by encap_attach() function.
- encap handlers are stored in lists ordered by exact_match value, thus
handlers that need more bits to match will be checked first, and if
encapcheck method returns exact_match value, the search will be stopped.
- all current consumers changed to use new KPI.
Reviewed by: mmacy
Sponsored by: Yandex LLC
Differential Revision: https://reviews.freebsd.org/D15617
2018-06-05 20:51:01 +00:00
|
|
|
static int
|
|
|
|
me_input(struct mbuf *m, int off, int proto, void *arg)
|
2014-11-07 19:13:19 +00:00
|
|
|
{
|
Rework IP encapsulation handling code.
Currently it has several disadvantages:
- it uses single mutex to protect internal structures. It is used by
data- and control- path, thus there are no parallelism at all.
- it uses single list to keep encap handlers for both INET and INET6
families.
- struct encaptab keeps unneeded information (src, dst, masks, protosw),
that isn't used by code in the source tree.
- matches are prioritized and when many tunneling interfaces are
registered, encapcheck handler of each interface is invoked for each
packet. The search takes O(n) for n interfaces. All this work is done
with exclusive lock held.
What this patch includes:
- the datapath is converted to be lockless using epoch(9) KPI.
- struct encaptab now linked using CK_LIST.
- all unused fields removed from struct encaptab. Several new fields
addedr: min_length is the minimum packet length, that encapsulation
handler expects to see; exact_match is maximum number of bits, that
can return an encapsulation handler, when it wants to consume a packet.
- IPv6 and IPv4 handlers are stored in separate lists;
- added new "encap_lookup_t" method, that will be used later. It is
targeted to speedup lookup of needed interface, when gif(4)/gre(4) have
many interfaces.
- the need to use protosw structure is eliminated. The only pr_input
method was used from this structure, so I don't see the need to keep
using it.
- encap_input_t method changed to avoid using mbuf tags to store softc
pointer. Now it is passed directly trough encap_input_t method.
encap_getarg() funtions is removed.
- all sockaddr structures and code that uses them removed. We don't have
any code in the tree that uses them. All consumers use encap_attach_func()
method, that relies on invoking of encapcheck() to determine the needed
handler.
- introduced struct encap_config, it contains parameters of encap handler
that is going to be registered by encap_attach() function.
- encap handlers are stored in lists ordered by exact_match value, thus
handlers that need more bits to match will be checked first, and if
encapcheck method returns exact_match value, the search will be stopped.
- all current consumers changed to use new KPI.
Reviewed by: mmacy
Sponsored by: Yandex LLC
Differential Revision: https://reviews.freebsd.org/D15617
2018-06-05 20:51:01 +00:00
|
|
|
struct me_softc *sc = arg;
|
2014-11-07 19:13:19 +00:00
|
|
|
struct mobhdr *mh;
|
|
|
|
struct ifnet *ifp;
|
|
|
|
struct ip *ip;
|
|
|
|
int hlen;
|
|
|
|
|
Widen NET_EPOCH coverage.
When epoch(9) was introduced to network stack, it was basically
dropped in place of existing locking, which was mutexes and
rwlocks. For the sake of performance mutex covered areas were
as small as possible, so became epoch covered areas.
However, epoch doesn't introduce any contention, it just delays
memory reclaim. So, there is no point to minimise epoch covered
areas in sense of performance. Meanwhile entering/exiting epoch
also has non-zero CPU usage, so doing this less often is a win.
Not the least is also code maintainability. In the new paradigm
we can assume that at any stage of processing a packet, we are
inside network epoch. This makes coding both input and output
path way easier.
On output path we already enter epoch quite early - in the
ip_output(), in the ip6_output().
This patch does the same for the input path. All ISR processing,
network related callouts, other ways of packet injection to the
network stack shall be performed in net_epoch. Any leaf function
that walks network configuration now asserts epoch.
Tricky part is configuration code paths - ioctls, sysctls. They
also call into leaf functions, so some need to be changed.
This patch would introduce more epoch recursions (see EPOCH_TRACE)
than we had before. They will be cleaned up separately, as several
of them aren't trivial. Note, that unlike a lock recursion the
epoch recursion is safe and just wastes a bit of resources.
Reviewed by: gallatin, hselasky, cy, adrian, kristof
Differential Revision: https://reviews.freebsd.org/D19111
2019-10-07 22:40:05 +00:00
|
|
|
NET_EPOCH_ASSERT();
|
|
|
|
|
2014-11-07 19:13:19 +00:00
|
|
|
ifp = ME2IFP(sc);
|
|
|
|
/* checks for short packets */
|
|
|
|
hlen = sizeof(struct mobhdr);
|
|
|
|
if (m->m_pkthdr.len < sizeof(struct ip) + hlen)
|
|
|
|
hlen -= sizeof(struct in_addr);
|
|
|
|
if (m->m_len < sizeof(struct ip) + hlen)
|
|
|
|
m = m_pullup(m, sizeof(struct ip) + hlen);
|
|
|
|
if (m == NULL)
|
|
|
|
goto drop;
|
|
|
|
mh = (struct mobhdr *)mtodo(m, sizeof(struct ip));
|
|
|
|
/* check for wrong flags */
|
|
|
|
if (mh->mob_flags & (~MOB_FLAGS_SP)) {
|
|
|
|
m_freem(m);
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
if (mh->mob_flags) {
|
|
|
|
if (hlen != sizeof(struct mobhdr)) {
|
|
|
|
m_freem(m);
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
|
|
|
|
/* check mobile header checksum */
|
|
|
|
if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) {
|
|
|
|
m_freem(m);
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
#ifdef MAC
|
|
|
|
mac_ifnet_create_mbuf(ifp, m);
|
|
|
|
#endif
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
ip->ip_dst = mh->mob_dst;
|
|
|
|
ip->ip_p = mh->mob_proto;
|
|
|
|
ip->ip_sum = 0;
|
|
|
|
ip->ip_len = htons(m->m_pkthdr.len - hlen);
|
|
|
|
if (mh->mob_flags)
|
|
|
|
ip->ip_src = mh->mob_src;
|
|
|
|
memmove(mtodo(m, hlen), ip, sizeof(struct ip));
|
|
|
|
m_adj(m, hlen);
|
|
|
|
m_clrprotoflags(m);
|
|
|
|
m->m_pkthdr.rcvif = ifp;
|
|
|
|
m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID);
|
2015-05-12 07:37:27 +00:00
|
|
|
M_SETFIB(m, ifp->if_fib);
|
2014-11-07 19:13:19 +00:00
|
|
|
hlen = AF_INET;
|
|
|
|
BPF_MTAP2(ifp, &hlen, sizeof(hlen), m);
|
|
|
|
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
|
|
|
|
if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
|
|
|
|
if ((ifp->if_flags & IFF_MONITOR) != 0)
|
|
|
|
m_freem(m);
|
|
|
|
else
|
|
|
|
netisr_dispatch(NETISR_IP, m);
|
|
|
|
return (IPPROTO_DONE);
|
|
|
|
drop:
|
|
|
|
if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
|
|
|
|
return (IPPROTO_DONE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
|
2018-06-14 14:53:24 +00:00
|
|
|
struct route *ro __unused)
|
2014-11-07 19:13:19 +00:00
|
|
|
{
|
|
|
|
uint32_t af;
|
|
|
|
|
|
|
|
if (dst->sa_family == AF_UNSPEC)
|
|
|
|
bcopy(dst->sa_data, &af, sizeof(af));
|
|
|
|
else
|
|
|
|
af = dst->sa_family;
|
2018-06-14 14:53:24 +00:00
|
|
|
m->m_pkthdr.csum_data = af;
|
2014-11-07 19:13:19 +00:00
|
|
|
return (ifp->if_transmit(ifp, m));
|
|
|
|
}
|
|
|
|
|
2018-07-09 11:03:28 +00:00
|
|
|
#define MTAG_ME 1414491977
|
2014-11-07 19:13:19 +00:00
|
|
|
static int
|
|
|
|
me_transmit(struct ifnet *ifp, struct mbuf *m)
|
|
|
|
{
|
2018-10-21 18:18:37 +00:00
|
|
|
ME_RLOCK_TRACKER;
|
2014-11-07 19:13:19 +00:00
|
|
|
struct mobhdr mh;
|
|
|
|
struct me_softc *sc;
|
|
|
|
struct ip *ip;
|
2018-06-14 14:53:24 +00:00
|
|
|
uint32_t af;
|
2014-11-07 19:13:19 +00:00
|
|
|
int error, hlen, plen;
|
|
|
|
|
2018-10-21 12:39:00 +00:00
|
|
|
ME_RLOCK();
|
2018-06-14 14:53:24 +00:00
|
|
|
#ifdef MAC
|
|
|
|
error = mac_ifnet_check_transmit(ifp, m);
|
|
|
|
if (error != 0)
|
|
|
|
goto drop;
|
|
|
|
#endif
|
|
|
|
error = ENETDOWN;
|
2014-11-07 19:13:19 +00:00
|
|
|
sc = ifp->if_softc;
|
2018-06-14 14:53:24 +00:00
|
|
|
if (sc == NULL || !ME_READY(sc) ||
|
|
|
|
(ifp->if_flags & IFF_MONITOR) != 0 ||
|
|
|
|
(ifp->if_flags & IFF_UP) == 0 ||
|
2018-10-21 18:18:37 +00:00
|
|
|
(ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
|
2018-07-09 11:03:28 +00:00
|
|
|
(error = if_tunnel_check_nesting(ifp, m, MTAG_ME,
|
|
|
|
V_max_me_nesting)) != 0) {
|
2018-06-14 14:53:24 +00:00
|
|
|
m_freem(m);
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
af = m->m_pkthdr.csum_data;
|
|
|
|
if (af != AF_INET) {
|
|
|
|
error = EAFNOSUPPORT;
|
2014-11-07 19:13:19 +00:00
|
|
|
m_freem(m);
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
if (m->m_len < sizeof(struct ip))
|
|
|
|
m = m_pullup(m, sizeof(struct ip));
|
|
|
|
if (m == NULL) {
|
|
|
|
error = ENOBUFS;
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
/* Fragmented datagramms shouldn't be encapsulated */
|
|
|
|
if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
|
|
|
|
error = EINVAL;
|
|
|
|
m_freem(m);
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
mh.mob_proto = ip->ip_p;
|
|
|
|
mh.mob_src = ip->ip_src;
|
|
|
|
mh.mob_dst = ip->ip_dst;
|
|
|
|
if (in_hosteq(sc->me_src, ip->ip_src)) {
|
|
|
|
hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
|
|
|
|
mh.mob_flags = 0;
|
|
|
|
} else {
|
|
|
|
hlen = sizeof(struct mobhdr);
|
|
|
|
mh.mob_flags = MOB_FLAGS_SP;
|
|
|
|
}
|
2018-06-14 15:04:30 +00:00
|
|
|
BPF_MTAP2(ifp, &af, sizeof(af), m);
|
2014-11-07 19:13:19 +00:00
|
|
|
plen = m->m_pkthdr.len;
|
|
|
|
ip->ip_src = sc->me_src;
|
|
|
|
ip->ip_dst = sc->me_dst;
|
2018-06-14 14:53:24 +00:00
|
|
|
m->m_flags &= ~(M_BCAST|M_MCAST);
|
2014-11-07 19:13:19 +00:00
|
|
|
M_SETFIB(m, sc->me_fibnum);
|
|
|
|
M_PREPEND(m, hlen, M_NOWAIT);
|
|
|
|
if (m == NULL) {
|
|
|
|
error = ENOBUFS;
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
if (m->m_len < sizeof(struct ip) + hlen)
|
|
|
|
m = m_pullup(m, sizeof(struct ip) + hlen);
|
|
|
|
if (m == NULL) {
|
|
|
|
error = ENOBUFS;
|
|
|
|
goto drop;
|
|
|
|
}
|
|
|
|
memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip));
|
|
|
|
ip = mtod(m, struct ip *);
|
|
|
|
ip->ip_len = htons(m->m_pkthdr.len);
|
|
|
|
ip->ip_p = IPPROTO_MOBILE;
|
|
|
|
ip->ip_sum = 0;
|
|
|
|
mh.mob_csum = 0;
|
|
|
|
mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t));
|
|
|
|
bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen);
|
|
|
|
error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
|
|
|
|
drop:
|
|
|
|
if (error)
|
|
|
|
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
|
|
|
|
else {
|
|
|
|
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
|
|
|
|
if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
|
|
|
|
}
|
2018-06-14 14:53:24 +00:00
|
|
|
ME_RUNLOCK();
|
2014-11-07 19:13:19 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
me_qflush(struct ifnet *ifp __unused)
|
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2018-10-21 18:18:37 +00:00
|
|
|
static const struct srcaddrtab *me_srcaddrtab = NULL;
|
2018-06-14 14:53:24 +00:00
|
|
|
static const struct encaptab *ecookie = NULL;
|
|
|
|
static const struct encap_config me_encap_cfg = {
|
|
|
|
.proto = IPPROTO_MOBILE,
|
|
|
|
.min_length = sizeof(struct ip) + sizeof(struct mobhdr) -
|
|
|
|
sizeof(in_addr_t),
|
|
|
|
.exact_match = ENCAP_DRV_LOOKUP,
|
|
|
|
.lookup = me_lookup,
|
|
|
|
.input = me_input
|
|
|
|
};
|
|
|
|
|
2014-11-07 19:13:19 +00:00
|
|
|
static int
|
|
|
|
memodevent(module_t mod, int type, void *data)
|
|
|
|
{
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case MOD_LOAD:
|
2018-10-21 18:18:37 +00:00
|
|
|
me_srcaddrtab = ip_encap_register_srcaddr(me_srcaddr,
|
|
|
|
NULL, M_WAITOK);
|
2018-06-14 14:53:24 +00:00
|
|
|
ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK);
|
|
|
|
break;
|
2014-11-07 19:13:19 +00:00
|
|
|
case MOD_UNLOAD:
|
2018-06-14 14:53:24 +00:00
|
|
|
ip_encap_detach(ecookie);
|
2018-10-21 18:18:37 +00:00
|
|
|
ip_encap_unregister_srcaddr(me_srcaddrtab);
|
2014-11-07 19:13:19 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static moduledata_t me_mod = {
|
|
|
|
"if_me",
|
|
|
|
memodevent,
|
|
|
|
0
|
|
|
|
};
|
|
|
|
|
|
|
|
DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
|
|
|
|
MODULE_VERSION(if_me, 1);
|