Convert if_me(4) driver to use encap_lookup_t method and be lockless on

data path.
This commit is contained in:
Andrey V. Elsukov 2018-06-14 14:53:24 +00:00
parent 5597d2a341
commit 2addcba7d5
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=335141

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2014 Andrey V. Elsukov <ae@FreeBSD.org>
* Copyright (c) 2014, 2018 Andrey V. Elsukov <ae@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -28,22 +28,20 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mbuf.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <net/bpf.h>
#include <net/ethernet.h>
@ -68,8 +66,6 @@ __FBSDID("$FreeBSD$");
#define MEMTU (1500 - sizeof(struct mobhdr))
static const char mename[] = "me";
static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP");
static VNET_DEFINE(struct mtx, me_mtx);
#define V_me_mtx VNET(me_mtx)
/* Minimal forwarding header RFC 2004 */
struct mobhdr {
uint8_t mob_proto; /* protocol */
@ -82,32 +78,27 @@ struct mobhdr {
struct me_softc {
struct ifnet *me_ifp;
LIST_ENTRY(me_softc) me_list;
struct rmlock me_lock;
u_int me_fibnum;
const struct encaptab *me_ecookie;
struct in_addr me_src;
struct in_addr me_dst;
CK_LIST_ENTRY(me_softc) chain;
};
CK_LIST_HEAD(me_list, me_softc);
#define ME2IFP(sc) ((sc)->me_ifp)
#define ME_READY(sc) ((sc)->me_src.s_addr != 0)
#define ME_LOCK_INIT(sc) rm_init(&(sc)->me_lock, "me softc")
#define ME_LOCK_DESTROY(sc) rm_destroy(&(sc)->me_lock)
#define ME_RLOCK_TRACKER struct rm_priotracker me_tracker
#define ME_RLOCK(sc) rm_rlock(&(sc)->me_lock, &me_tracker)
#define ME_RUNLOCK(sc) rm_runlock(&(sc)->me_lock, &me_tracker)
#define ME_RLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_RLOCKED)
#define ME_WLOCK(sc) rm_wlock(&(sc)->me_lock)
#define ME_WUNLOCK(sc) rm_wunlock(&(sc)->me_lock)
#define ME_WLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_WLOCKED)
#define ME_RLOCK() epoch_enter_preempt(net_epoch_preempt)
#define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt)
#define ME_WAIT() epoch_wait_preempt(net_epoch_preempt)
#define ME_LIST_LOCK_INIT(x) mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF)
#define ME_LIST_LOCK_DESTROY(x) mtx_destroy(&V_me_mtx)
#define ME_LIST_LOCK(x) mtx_lock(&V_me_mtx)
#define ME_LIST_UNLOCK(x) mtx_unlock(&V_me_mtx)
#ifndef ME_HASH_SIZE
#define ME_HASH_SIZE (1 << 4)
#endif
static VNET_DEFINE(struct me_list *, me_hashtbl) = NULL;
#define V_me_hashtbl VNET(me_hashtbl)
#define ME_HASH(src, dst) (V_me_hashtbl[\
me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)])
static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list);
#define V_me_softc_list VNET(me_softc_list)
static struct sx me_ioctl_sx;
SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl");
@ -123,20 +114,8 @@ static int me_output(struct ifnet *, struct mbuf *,
const struct sockaddr *, struct route *);
static int me_input(struct mbuf *, int, int, void *);
static int me_set_tunnel(struct ifnet *, struct sockaddr_in *,
struct sockaddr_in *);
static void me_delete_tunnel(struct ifnet *);
static int me_encapcheck(const struct mbuf *, int, int, void *);
#define ME_MINLEN (sizeof(struct ip) + sizeof(struct mobhdr) -\
sizeof(in_addr_t))
static const struct encap_config ipv4_encap_cfg = {
.proto = IPPROTO_MOBILE,
.min_length = ME_MINLEN,
.exact_match = (sizeof(in_addr_t) << 4) + 8,
.check = me_encapcheck,
.input = me_input
};
static int me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t);
static void me_delete_tunnel(struct me_softc *);
SYSCTL_DECL(_net_link);
static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0,
@ -150,11 +129,32 @@ static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST;
SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET,
&VNET_NAME(max_me_nesting), 0, "Max nested tunnels");
static uint32_t
me_hashval(in_addr_t src, in_addr_t dst)
{
uint32_t ret;
ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT);
return (fnv_32_buf(&dst, sizeof(dst), ret));
}
static struct me_list *
me_hashinit(void)
{
struct me_list *hash;
int i;
hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE,
M_IFME, M_WAITOK);
for (i = 0; i < ME_HASH_SIZE; i++)
CK_LIST_INIT(&hash[i]);
return (hash);
}
static void
vnet_me_init(const void *unused __unused)
{
LIST_INIT(&V_me_softc_list);
ME_LIST_LOCK_INIT();
V_me_cloner = if_clone_simple(mename, me_clone_create,
me_clone_destroy, 0);
}
@ -165,8 +165,9 @@ static void
vnet_me_uninit(const void *unused __unused)
{
if (V_me_hashtbl != NULL)
free(V_me_hashtbl, M_IFME);
if_clone_detach(V_me_cloner);
ME_LIST_LOCK_DESTROY();
}
VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
vnet_me_uninit, NULL);
@ -179,7 +180,6 @@ me_clone_create(struct if_clone *ifc, int unit, caddr_t params)
sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO);
sc->me_fibnum = curthread->td_proc->p_fibnum;
ME2IFP(sc) = if_alloc(IFT_TUNNEL);
ME_LOCK_INIT(sc);
ME2IFP(sc)->if_softc = sc;
if_initname(ME2IFP(sc), mename, unit);
@ -193,9 +193,6 @@ me_clone_create(struct if_clone *ifc, int unit, caddr_t params)
ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE;
if_attach(ME2IFP(sc));
bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t));
ME_LIST_LOCK();
LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list);
ME_LIST_UNLOCK();
return (0);
}
@ -206,24 +203,20 @@ me_clone_destroy(struct ifnet *ifp)
sx_xlock(&me_ioctl_sx);
sc = ifp->if_softc;
me_delete_tunnel(ifp);
ME_LIST_LOCK();
LIST_REMOVE(sc, me_list);
ME_LIST_UNLOCK();
me_delete_tunnel(sc);
bpfdetach(ifp);
if_detach(ifp);
ifp->if_softc = NULL;
sx_xunlock(&me_ioctl_sx);
ME_WAIT();
if_free(ifp);
ME_LOCK_DESTROY(sc);
free(sc, M_IFME);
}
static int
me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
ME_RLOCK_TRACKER;
struct ifreq *ifr = (struct ifreq *)data;
struct sockaddr_in *src, *dst;
struct me_softc *sc;
@ -251,10 +244,8 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = 0;
switch (cmd) {
case SIOCSIFPHYADDR:
src = (struct sockaddr_in *)
&(((struct in_aliasreq *)data)->ifra_addr);
dst = (struct sockaddr_in *)
&(((struct in_aliasreq *)data)->ifra_dstaddr);
src = &((struct in_aliasreq *)data)->ifra_addr;
dst = &((struct in_aliasreq *)data)->ifra_dstaddr;
if (src->sin_family != dst->sin_family ||
src->sin_family != AF_INET ||
src->sin_len != dst->sin_len ||
@ -267,17 +258,16 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = EADDRNOTAVAIL;
break;
}
error = me_set_tunnel(ifp, src, dst);
error = me_set_tunnel(sc, src->sin_addr.s_addr,
dst->sin_addr.s_addr);
break;
case SIOCDIFPHYADDR:
me_delete_tunnel(ifp);
me_delete_tunnel(sc);
break;
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
ME_RLOCK(sc);
if (!ME_READY(sc)) {
error = EADDRNOTAVAIL;
ME_RUNLOCK(sc);
break;
}
src = (struct sockaddr_in *)&ifr->ifr_addr;
@ -292,7 +282,6 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
src->sin_addr = sc->me_dst;
break;
}
ME_RUNLOCK(sc);
error = prison_if(curthread->td_ucred, sintosa(src));
if (error != 0)
memset(src, 0, sizeof(*src));
@ -318,81 +307,71 @@ me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
}
static int
me_encapcheck(const struct mbuf *m, int off, int proto, void *arg)
me_lookup(const struct mbuf *m, int off, int proto, void **arg)
{
ME_RLOCK_TRACKER;
const struct ip *ip;
struct me_softc *sc;
struct ip *ip;
int ret;
sc = (struct me_softc *)arg;
if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
return (0);
M_ASSERTPKTHDR(m);
ret = 0;
ME_RLOCK(sc);
if (ME_READY(sc)) {
ip = mtod(m, struct ip *);
MPASS(in_epoch());
ip = mtod(m, const struct ip *);
CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr,
ip->ip_src.s_addr), chain) {
if (sc->me_src.s_addr == ip->ip_dst.s_addr &&
sc->me_dst.s_addr == ip->ip_src.s_addr)
ret = 32 * 2 + 8;
}
ME_RUNLOCK(sc);
return (ret);
}
static int
me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src,
struct sockaddr_in *dst)
{
struct me_softc *sc, *tsc;
sx_assert(&me_ioctl_sx, SA_XLOCKED);
ME_LIST_LOCK();
sc = ifp->if_softc;
LIST_FOREACH(tsc, &V_me_softc_list, me_list) {
if (tsc == sc || !ME_READY(tsc))
continue;
if (tsc->me_src.s_addr == src->sin_addr.s_addr &&
tsc->me_dst.s_addr == dst->sin_addr.s_addr) {
ME_LIST_UNLOCK();
return (EADDRNOTAVAIL);
sc->me_dst.s_addr == ip->ip_src.s_addr) {
if ((ME2IFP(sc)->if_flags & IFF_UP) == 0)
return (0);
*arg = sc;
return (ENCAP_DRV_LOOKUP);
}
}
ME_LIST_UNLOCK();
ME_WLOCK(sc);
sc->me_dst = dst->sin_addr;
sc->me_src = src->sin_addr;
ME_WUNLOCK(sc);
if (sc->me_ecookie == NULL)
sc->me_ecookie = ip_encap_attach(&ipv4_encap_cfg,
sc, M_WAITOK);
if (sc->me_ecookie != NULL) {
ifp->if_drv_flags |= IFF_DRV_RUNNING;
if_link_state_change(ifp, LINK_STATE_UP);
}
return (0);
}
static void
me_delete_tunnel(struct ifnet *ifp)
static int
me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst)
{
struct me_softc *sc = ifp->if_softc;
struct me_softc *tmp;
sx_assert(&me_ioctl_sx, SA_XLOCKED);
if (sc->me_ecookie != NULL)
ip_encap_detach(sc->me_ecookie);
sc->me_ecookie = NULL;
ME_WLOCK(sc);
sc->me_src.s_addr = 0;
sc->me_dst.s_addr = 0;
ME_WUNLOCK(sc);
ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
if_link_state_change(ifp, LINK_STATE_DOWN);
if (V_me_hashtbl == NULL)
V_me_hashtbl = me_hashinit();
if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst)
return (0);
CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) {
if (tmp == sc)
continue;
if (tmp->me_src.s_addr == src &&
tmp->me_dst.s_addr == dst)
return (EADDRNOTAVAIL);
}
me_delete_tunnel(sc);
sc->me_dst.s_addr = dst;
sc->me_src.s_addr = src;
CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain);
ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING;
if_link_state_change(ME2IFP(sc), LINK_STATE_UP);
return (0);
}
static void
me_delete_tunnel(struct me_softc *sc)
{
sx_assert(&me_ioctl_sx, SA_XLOCKED);
if (ME_READY(sc)) {
CK_LIST_REMOVE(sc, chain);
ME_WAIT();
sc->me_src.s_addr = 0;
sc->me_dst.s_addr = 0;
ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING;
if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN);
}
}
static uint16_t
@ -505,55 +484,45 @@ me_check_nesting(struct ifnet *ifp, struct mbuf *m)
static int
me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
struct route *ro)
struct route *ro __unused)
{
uint32_t af;
int error;
if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
else
af = dst->sa_family;
m->m_pkthdr.csum_data = af;
return (ifp->if_transmit(ifp, m));
}
static int
me_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct mobhdr mh;
struct me_softc *sc;
struct ip *ip;
uint32_t af;
int error, hlen, plen;
#ifdef MAC
error = mac_ifnet_check_transmit(ifp, m);
if (error != 0)
goto drop;
#endif
if ((ifp->if_flags & IFF_MONITOR) != 0 ||
(ifp->if_flags & IFF_UP) == 0) {
error = ENETDOWN;
error = ENETDOWN;
ME_RLOCK();
sc = ifp->if_softc;
if (sc == NULL || !ME_READY(sc) ||
(ifp->if_flags & IFF_MONITOR) != 0 ||
(ifp->if_flags & IFF_UP) == 0 ||
(error = me_check_nesting(ifp, m) != 0)) {
m_freem(m);
goto drop;
}
error = me_check_nesting(ifp, m);
if (error != 0)
goto drop;
m->m_flags &= ~(M_BCAST|M_MCAST);
if (dst->sa_family == AF_UNSPEC)
bcopy(dst->sa_data, &af, sizeof(af));
else
af = dst->sa_family;
af = m->m_pkthdr.csum_data;
if (af != AF_INET) {
error = EAFNOSUPPORT;
goto drop;
}
BPF_MTAP2(ifp, &af, sizeof(af), m);
return (ifp->if_transmit(ifp, m));
drop:
m_freem(m);
if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
return (error);
}
static int
me_transmit(struct ifnet *ifp, struct mbuf *m)
{
ME_RLOCK_TRACKER;
struct mobhdr mh;
struct me_softc *sc;
struct ip *ip;
int error, hlen, plen;
sc = ifp->if_softc;
if (sc == NULL) {
error = ENETDOWN;
m_freem(m);
goto drop;
}
@ -573,13 +542,6 @@ me_transmit(struct ifnet *ifp, struct mbuf *m)
mh.mob_proto = ip->ip_p;
mh.mob_src = ip->ip_src;
mh.mob_dst = ip->ip_dst;
ME_RLOCK(sc);
if (!ME_READY(sc)) {
ME_RUNLOCK(sc);
error = ENETDOWN;
m_freem(m);
goto drop;
}
if (in_hosteq(sc->me_src, ip->ip_src)) {
hlen = sizeof(struct mobhdr) - sizeof(struct in_addr);
mh.mob_flags = 0;
@ -590,8 +552,8 @@ me_transmit(struct ifnet *ifp, struct mbuf *m)
plen = m->m_pkthdr.len;
ip->ip_src = sc->me_src;
ip->ip_dst = sc->me_dst;
m->m_flags &= ~(M_BCAST|M_MCAST);
M_SETFIB(m, sc->me_fibnum);
ME_RUNLOCK(sc);
M_PREPEND(m, hlen, M_NOWAIT);
if (m == NULL) {
error = ENOBUFS;
@ -619,6 +581,7 @@ me_transmit(struct ifnet *ifp, struct mbuf *m)
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
}
ME_RUNLOCK();
return (error);
}
@ -628,13 +591,26 @@ me_qflush(struct ifnet *ifp __unused)
}
static const struct encaptab *ecookie = NULL;
static const struct encap_config me_encap_cfg = {
.proto = IPPROTO_MOBILE,
.min_length = sizeof(struct ip) + sizeof(struct mobhdr) -
sizeof(in_addr_t),
.exact_match = ENCAP_DRV_LOOKUP,
.lookup = me_lookup,
.input = me_input
};
static int
memodevent(module_t mod, int type, void *data)
{
switch (type) {
case MOD_LOAD:
ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK);
break;
case MOD_UNLOAD:
ip_encap_detach(ecookie);
break;
default:
return (EOPNOTSUPP);