Add vxlan interface

vxlan creates a virtual LAN by encapsulating the inner Ethernet frame in
a UDP packet. This implementation is based on RFC7348.

Currently, the IPv6 support is not fully compliant with the specification:
we should be able to receive UPDv6 packets with a zero checksum, but we
need to support RFC6935 first. Patches for this should come soon.

Encapsulation protocols such as vxlan emphasize the need for the FreeBSD
network stack to support batching, GRO, and GSO. Each frame has to make
two trips through the network stack, and each frame will be at most MTU
sized. Performance suffers accordingly.

Some latest generation NICs have begun to support vxlan HW offloads that
we should also take advantage of. VIMAGE support should also be added soon.

Differential Revision:	https://reviews.freebsd.org/D384
Reviewed by:	gnn
Relnotes:	yes
This commit is contained in:
Bryan Venteicher 2014-10-20 14:42:42 +00:00
parent 2113857ae0
commit 007054f070
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=273331
12 changed files with 4211 additions and 1 deletions

View File

@ -30,6 +30,7 @@ SRCS+= ifmac.c # MAC support
SRCS+= ifmedia.c # SIOC[GS]IFMEDIA support
SRCS+= iffib.c # non-default FIB support
SRCS+= ifvlan.c # SIOC[GS]ETVLAN support
SRCS+= ifvxlan.c # VXLAN support
SRCS+= ifgre.c # GRE keys etc
SRCS+= ifgif.c # GIF reversed header workaround

View File

@ -28,7 +28,7 @@
.\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94
.\" $FreeBSD$
.\"
.Dd October 1, 2014
.Dd October 20, 2014
.Dt IFCONFIG 8
.Os
.Sh NAME
@ -2541,6 +2541,76 @@ argument is useless and hence deprecated.
.El
.Pp
The following parameters are used to configure
.Xr vxlan 4
interfaces.
.Bl -tag -width indent
.It Cm vni Ar identifier
This value is a 24-bit VXLAN Network Identifier (VNI) that identifies the
virtual network segment membership of the interface.
.It Cm local Ar address
The source address used in the encapsulating IPv4/IPv6 header.
The address should already be assigned to an existing interface.
When the interface is configured in unicast mode, the listening socket
is bound to this address.
.It Cm remote Ar address
The interface can be configured in a unicast, or point-to-point, mode
to create a tunnel between two hosts.
This is the IP address of the remote end of the tunnel.
.It Cm group Ar address
The interface can be configured in a multicast mode
to create a virtual network of hosts.
This is the IP multicast group address the interface will join.
.It Cm localport Ar port
The port number the interface will listen on.
The default port number is 4789.
.It Cm remoteport Ar port
The destination port number used in the encapsulating IPv4/IPv6 header.
The remote host should be listening on this port.
The default port number is 4789.
Note some other implementations, such as Linux,
do not default to the IANA assigned port,
but instead listen on port 8472.
.It Cm portrange Ar low high
The range of source ports used in the encapsulating IPv4/IPv6 header.
The port selected within the range is based on a hash of the inner frame.
A range is useful to provide entropy within the outer IP header
for more effective load balancing.
The default range is between the
.Xr sysctl 8
variables
.Va net.inet.ip.portrange.first
and
.Va net.inet.ip.portrange.last
.It Cm timeout Ar timeout
The maximum time, in seconds, before an entry in the forwarding table
is pruned.
The default is 1200 seconds (20 minutes).
.It Cm maxaddr Ar max
The maximum number of entries in the forwarding table.
The default is 2000.
.It Cm vxlandev Ar dev
When the interface is configured in multicast mode, the
.Cm dev
interface is used to transmit IP multicast packets.
.It Cm ttl Ar ttl
The TTL used in the encapsulating IPv4/IPv6 header.
The default is 64.
.It Cm learn
The source IP address and inner source Ethernet MAC address of
received packets are used to dynamically populate the forwarding table.
When in multicast mode, an entry in the forwarding table allows the
interface to send the frame directly to the remote host instead of
broadcasting the frame to the multicast group.
This is the default.
.It Fl learn
The forwarding table is not populated by recevied packets.
.It Cm flush
Delete all dynamically-learned addresses from the forwarding table.
.It Cm flushall
Delete all addresses, including static addresses, from the forwarding table.
.El
.Pp
The following parameters are used to configure
.Xr carp 4
protocol on an interface:
.Bl -tag -width indent
@ -2745,6 +2815,7 @@ tried to alter an interface's configuration.
.Xr pfsync 4 ,
.Xr polling 4 ,
.Xr vlan 4 ,
.Xr vxlan 4 ,
.Xr devd.conf 5 ,
.\" .Xr eon 5 ,
.Xr devd 8 ,

648
sbin/ifconfig/ifvxlan.c Normal file
View File

@ -0,0 +1,648 @@
/*-
* Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <netdb.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_vxlan.h>
#include <net/route.h>
#include <netinet/in.h>
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <err.h>
#include <errno.h>
#include "ifconfig.h"
static struct ifvxlanparam params = {
.vxlp_vni = VXLAN_VNI_MAX,
};
static int
get_val(const char *cp, u_long *valp)
{
char *endptr;
u_long val;
errno = 0;
val = strtoul(cp, &endptr, 0);
if (cp[0] == '\0' || endptr[0] != '\0' || errno == ERANGE)
return (-1);
*valp = val;
return (0);
}
static int
do_cmd(int sock, u_long op, void *arg, size_t argsize, int set)
{
struct ifdrv ifd;
bzero(&ifd, sizeof(ifd));
strlcpy(ifd.ifd_name, ifr.ifr_name, sizeof(ifd.ifd_name));
ifd.ifd_cmd = op;
ifd.ifd_len = argsize;
ifd.ifd_data = arg;
return (ioctl(sock, set ? SIOCSDRVSPEC : SIOCGDRVSPEC, &ifd));
}
static int
vxlan_exists(int sock)
{
struct ifvxlancfg cfg;
bzero(&cfg, sizeof(cfg));
return (do_cmd(sock, VXLAN_CMD_GET_CONFIG, &cfg, sizeof(cfg), 0) != -1);
}
static void
vxlan_status(int s)
{
struct ifvxlancfg cfg;
char src[NI_MAXHOST], dst[NI_MAXHOST];
char srcport[NI_MAXSERV], dstport[NI_MAXSERV];
struct sockaddr *lsa, *rsa;
int vni, mc, ipv6;
bzero(&cfg, sizeof(cfg));
if (do_cmd(s, VXLAN_CMD_GET_CONFIG, &cfg, sizeof(cfg), 0) < 0)
return;
vni = cfg.vxlc_vni;
lsa = &cfg.vxlc_local_sa.sa;
rsa = &cfg.vxlc_remote_sa.sa;
ipv6 = rsa->sa_family == AF_INET6;
/* Just report nothing if the network identity isn't set yet. */
if (vni >= VXLAN_VNI_MAX)
return;
if (getnameinfo(lsa, lsa->sa_len, src, sizeof(src),
srcport, sizeof(srcport), NI_NUMERICHOST | NI_NUMERICSERV) != 0)
src[0] = srcport[0] = '\0';
if (getnameinfo(rsa, rsa->sa_len, dst, sizeof(dst),
dstport, sizeof(dstport), NI_NUMERICHOST | NI_NUMERICSERV) != 0)
dst[0] = dstport[0] = '\0';
if (!ipv6) {
struct sockaddr_in *sin = (struct sockaddr_in *)rsa;
mc = IN_MULTICAST(ntohl(sin->sin_addr.s_addr));
} else {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rsa;
mc = IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr);
}
printf("\tvxlan vni %d", vni);
printf(" local %s%s%s:%s", ipv6 ? "[" : "", src, ipv6 ? "]" : "",
srcport);
printf(" %s %s%s%s:%s", mc ? "group" : "remote", ipv6 ? "[" : "",
dst, ipv6 ? "]" : "", dstport);
if (verbose) {
printf("\n\t\tconfig: ");
printf("%slearning portrange %d-%d ttl %d",
cfg.vxlc_learn ? "" : "no", cfg.vxlc_port_min,
cfg.vxlc_port_max, cfg.vxlc_ttl);
printf("\n\t\tftable: ");
printf("cnt %d max %d timeout %d",
cfg.vxlc_ftable_cnt, cfg.vxlc_ftable_max,
cfg.vxlc_ftable_timeout);
}
putchar('\n');
}
#define _LOCAL_ADDR46 \
(VXLAN_PARAM_WITH_LOCAL_ADDR4 | VXLAN_PARAM_WITH_LOCAL_ADDR6)
#define _REMOTE_ADDR46 \
(VXLAN_PARAM_WITH_REMOTE_ADDR4 | VXLAN_PARAM_WITH_REMOTE_ADDR6)
static void
vxlan_check_params(void)
{
if ((params.vxlp_with & _LOCAL_ADDR46) == _LOCAL_ADDR46)
errx(1, "cannot specify both local IPv4 and IPv6 addresses");
if ((params.vxlp_with & _REMOTE_ADDR46) == _REMOTE_ADDR46)
errx(1, "cannot specify both remote IPv4 and IPv6 addresses");
if ((params.vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4 &&
params.vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) ||
(params.vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6 &&
params.vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4))
errx(1, "cannot mix IPv4 and IPv6 addresses");
}
#undef _LOCAL_ADDR46
#undef _REMOTE_ADDR46
static void
vxlan_cb(int s, void *arg)
{
}
static void
vxlan_create(int s, struct ifreq *ifr)
{
vxlan_check_params();
ifr->ifr_data = (caddr_t) &params;
if (ioctl(s, SIOCIFCREATE2, ifr) < 0)
err(1, "SIOCIFCREATE2");
}
static
DECL_CMD_FUNC(setvxlan_vni, arg, d)
{
struct ifvxlancmd cmd;
u_long val;
if (get_val(arg, &val) < 0 || val >= VXLAN_VNI_MAX)
errx(1, "invalid network identifier: %s", arg);
if (!vxlan_exists(s)) {
params.vxlp_with |= VXLAN_PARAM_WITH_VNI;
params.vxlp_vni = val;
return;
}
bzero(&cmd, sizeof(cmd));
cmd.vxlcmd_vni = val;
if (do_cmd(s, VXLAN_CMD_SET_VNI, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_VNI");
}
static
DECL_CMD_FUNC(setvxlan_local, addr, d)
{
struct ifvxlancmd cmd;
struct addrinfo *ai;
struct sockaddr *sa;
int error;
bzero(&cmd, sizeof(cmd));
if ((error = getaddrinfo(addr, NULL, NULL, &ai)) != 0)
errx(1, "error in parsing local address string: %s",
gai_strerror(error));
sa = ai->ai_addr;
switch (ai->ai_family) {
#ifdef INET
case AF_INET: {
struct in_addr addr = ((struct sockaddr_in *) sa)->sin_addr;
if (IN_MULTICAST(ntohl(addr.s_addr)))
errx(1, "local address cannot be multicast");
cmd.vxlcmd_sa.in4.sin_family = AF_INET;
cmd.vxlcmd_sa.in4.sin_addr = addr;
break;
}
#endif
#ifdef INET6
case AF_INET6: {
struct in6_addr *addr = &((struct sockaddr_in6 *)sa)->sin6_addr;
if (IN6_IS_ADDR_MULTICAST(addr))
errx(1, "local address cannot be multicast");
cmd.vxlcmd_sa.in6.sin6_family = AF_INET6;
cmd.vxlcmd_sa.in6.sin6_addr = *addr;
break;
}
#endif
default:
errx(1, "local address %s not supported", addr);
}
freeaddrinfo(ai);
if (!vxlan_exists(s)) {
if (cmd.vxlcmd_sa.sa.sa_family == AF_INET) {
params.vxlp_with |= VXLAN_PARAM_WITH_LOCAL_ADDR4;
params.vxlp_local_in4 = cmd.vxlcmd_sa.in4.sin_addr;
} else {
params.vxlp_with |= VXLAN_PARAM_WITH_LOCAL_ADDR6;
params.vxlp_local_in6 = cmd.vxlcmd_sa.in6.sin6_addr;
}
return;
}
if (do_cmd(s, VXLAN_CMD_SET_LOCAL_ADDR, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_LOCAL_ADDR");
}
static
DECL_CMD_FUNC(setvxlan_remote, addr, d)
{
struct ifvxlancmd cmd;
struct addrinfo *ai;
struct sockaddr *sa;
int error;
bzero(&cmd, sizeof(cmd));
if ((error = getaddrinfo(addr, NULL, NULL, &ai)) != 0)
errx(1, "error in parsing remote address string: %s",
gai_strerror(error));
sa = ai->ai_addr;
switch (ai->ai_family) {
#ifdef INET
case AF_INET: {
struct in_addr addr = ((struct sockaddr_in *)sa)->sin_addr;
if (IN_MULTICAST(ntohl(addr.s_addr)))
errx(1, "remote address cannot be multicast");
cmd.vxlcmd_sa.in4.sin_family = AF_INET;
cmd.vxlcmd_sa.in4.sin_addr = addr;
break;
}
#endif
#ifdef INET6
case AF_INET6: {
struct in6_addr *addr = &((struct sockaddr_in6 *)sa)->sin6_addr;
if (IN6_IS_ADDR_MULTICAST(addr))
errx(1, "remote address cannot be multicast");
cmd.vxlcmd_sa.in6.sin6_family = AF_INET6;
cmd.vxlcmd_sa.in6.sin6_addr = *addr;
break;
}
#endif
default:
errx(1, "remote address %s not supported", addr);
}
freeaddrinfo(ai);
if (!vxlan_exists(s)) {
if (cmd.vxlcmd_sa.sa.sa_family == AF_INET) {
params.vxlp_with |= VXLAN_PARAM_WITH_REMOTE_ADDR4;
params.vxlp_remote_in4 = cmd.vxlcmd_sa.in4.sin_addr;
} else {
params.vxlp_with |= VXLAN_PARAM_WITH_REMOTE_ADDR6;
params.vxlp_remote_in6 = cmd.vxlcmd_sa.in6.sin6_addr;
}
return;
}
if (do_cmd(s, VXLAN_CMD_SET_REMOTE_ADDR, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_REMOTE_ADDR");
}
static
DECL_CMD_FUNC(setvxlan_group, addr, d)
{
struct ifvxlancmd cmd;
struct addrinfo *ai;
struct sockaddr *sa;
int error;
bzero(&cmd, sizeof(cmd));
if ((error = getaddrinfo(addr, NULL, NULL, &ai)) != 0)
errx(1, "error in parsing group address string: %s",
gai_strerror(error));
sa = ai->ai_addr;
switch (ai->ai_family) {
#ifdef INET
case AF_INET: {
struct in_addr addr = ((struct sockaddr_in *)sa)->sin_addr;
if (!IN_MULTICAST(ntohl(addr.s_addr)))
errx(1, "group address must be multicast");
cmd.vxlcmd_sa.in4.sin_family = AF_INET;
cmd.vxlcmd_sa.in4.sin_addr = addr;
break;
}
#endif
#ifdef INET6
case AF_INET6: {
struct in6_addr *addr = &((struct sockaddr_in6 *)sa)->sin6_addr;
if (!IN6_IS_ADDR_MULTICAST(addr))
errx(1, "group address must be multicast");
cmd.vxlcmd_sa.in6.sin6_family = AF_INET6;
cmd.vxlcmd_sa.in6.sin6_addr = *addr;
break;
}
#endif
default:
errx(1, "group address %s not supported", addr);
}
freeaddrinfo(ai);
if (!vxlan_exists(s)) {
if (cmd.vxlcmd_sa.sa.sa_family == AF_INET) {
params.vxlp_with |= VXLAN_PARAM_WITH_REMOTE_ADDR4;
params.vxlp_remote_in4 = cmd.vxlcmd_sa.in4.sin_addr;
} else {
params.vxlp_with |= VXLAN_PARAM_WITH_REMOTE_ADDR6;
params.vxlp_remote_in6 = cmd.vxlcmd_sa.in6.sin6_addr;
}
return;
}
if (do_cmd(s, VXLAN_CMD_SET_REMOTE_ADDR, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_REMOTE_ADDR");
}
static
DECL_CMD_FUNC(setvxlan_local_port, arg, d)
{
struct ifvxlancmd cmd;
u_long val;
if (get_val(arg, &val) < 0 || val >= UINT16_MAX)
errx(1, "invalid local port: %s", arg);
if (!vxlan_exists(s)) {
params.vxlp_with |= VXLAN_PARAM_WITH_LOCAL_PORT;
params.vxlp_local_port = val;
return;
}
bzero(&cmd, sizeof(cmd));
cmd.vxlcmd_port = val;
if (do_cmd(s, VXLAN_CMD_SET_LOCAL_PORT, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_LOCAL_PORT");
}
static
DECL_CMD_FUNC(setvxlan_remote_port, arg, d)
{
struct ifvxlancmd cmd;
u_long val;
if (get_val(arg, &val) < 0 || val >= UINT16_MAX)
errx(1, "invalid remote port: %s", arg);
if (!vxlan_exists(s)) {
params.vxlp_with |= VXLAN_PARAM_WITH_REMOTE_PORT;
params.vxlp_remote_port = val;
return;
}
bzero(&cmd, sizeof(cmd));
cmd.vxlcmd_port = val;
if (do_cmd(s, VXLAN_CMD_SET_REMOTE_PORT, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_REMOTE_PORT");
}
static
DECL_CMD_FUNC2(setvxlan_port_range, arg1, arg2)
{
struct ifvxlancmd cmd;
u_long min, max;
if (get_val(arg1, &min) < 0 || min >= UINT16_MAX)
errx(1, "invalid port range minimum: %s", arg1);
if (get_val(arg2, &max) < 0 || max >= UINT16_MAX)
errx(1, "invalid port range maximum: %s", arg2);
if (max < min)
errx(1, "invalid port range");
if (!vxlan_exists(s)) {
params.vxlp_with |= VXLAN_PARAM_WITH_PORT_RANGE;
params.vxlp_min_port = min;
params.vxlp_max_port = max;
return;
}
bzero(&cmd, sizeof(cmd));
cmd.vxlcmd_port_min = min;
cmd.vxlcmd_port_max = max;
if (do_cmd(s, VXLAN_CMD_SET_PORT_RANGE, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_PORT_RANGE");
}
static
DECL_CMD_FUNC(setvxlan_timeout, arg, d)
{
struct ifvxlancmd cmd;
u_long val;
if (get_val(arg, &val) < 0 || (val & ~0xFFFFFFFF) != 0)
errx(1, "invalid timeout value: %s", arg);
if (!vxlan_exists(s)) {
params.vxlp_with |= VXLAN_PARAM_WITH_FTABLE_TIMEOUT;
params.vxlp_ftable_timeout = val & 0xFFFFFFFF;
return;
}
bzero(&cmd, sizeof(cmd));
cmd.vxlcmd_ftable_timeout = val & 0xFFFFFFFF;
if (do_cmd(s, VXLAN_CMD_SET_FTABLE_TIMEOUT, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_FTABLE_TIMEOUT");
}
static
DECL_CMD_FUNC(setvxlan_maxaddr, arg, d)
{
struct ifvxlancmd cmd;
u_long val;
if (get_val(arg, &val) < 0 || (val & ~0xFFFFFFFF) != 0)
errx(1, "invalid maxaddr value: %s", arg);
if (!vxlan_exists(s)) {
params.vxlp_with |= VXLAN_PARAM_WITH_FTABLE_MAX;
params.vxlp_ftable_max = val & 0xFFFFFFFF;
return;
}
bzero(&cmd, sizeof(cmd));
cmd.vxlcmd_ftable_max = val & 0xFFFFFFFF;
if (do_cmd(s, VXLAN_CMD_SET_FTABLE_MAX, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_FTABLE_MAX");
}
static
DECL_CMD_FUNC(setvxlan_dev, arg, d)
{
struct ifvxlancmd cmd;
if (!vxlan_exists(s)) {
params.vxlp_with |= VXLAN_PARAM_WITH_MULTICAST_IF;
strlcpy(params.vxlp_mc_ifname, arg,
sizeof(params.vxlp_mc_ifname));
return;
}
bzero(&cmd, sizeof(cmd));
strlcpy(cmd.vxlcmd_ifname, arg, sizeof(cmd.vxlcmd_ifname));
if (do_cmd(s, VXLAN_CMD_SET_MULTICAST_IF, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_MULTICAST_IF");
}
static
DECL_CMD_FUNC(setvxlan_ttl, arg, d)
{
struct ifvxlancmd cmd;
u_long val;
if (get_val(arg, &val) < 0 || val > 256)
errx(1, "invalid TTL value: %s", arg);
if (!vxlan_exists(s)) {
params.vxlp_with |= VXLAN_PARAM_WITH_TTL;
params.vxlp_ttl = val;
return;
}
bzero(&cmd, sizeof(cmd));
cmd.vxlcmd_ttl = val;
if (do_cmd(s, VXLAN_CMD_SET_TTL, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_TTL");
}
static
DECL_CMD_FUNC(setvxlan_learn, arg, d)
{
struct ifvxlancmd cmd;
if (!vxlan_exists(s)) {
params.vxlp_with |= VXLAN_PARAM_WITH_LEARN;
params.vxlp_learn = d;
return;
}
bzero(&cmd, sizeof(cmd));
if (d != 0)
cmd.vxlcmd_flags |= VXLAN_CMD_FLAG_LEARN;
if (do_cmd(s, VXLAN_CMD_SET_LEARN, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_SET_LEARN");
}
static void
setvxlan_flush(const char *val, int d, int s, const struct afswtch *afp)
{
struct ifvxlancmd cmd;
bzero(&cmd, sizeof(cmd));
if (d != 0)
cmd.vxlcmd_flags |= VXLAN_CMD_FLAG_FLUSH_ALL;
if (do_cmd(s, VXLAN_CMD_FLUSH, &cmd, sizeof(cmd), 1) < 0)
err(1, "VXLAN_CMD_FLUSH");
}
static struct cmd vxlan_cmds[] = {
DEF_CLONE_CMD_ARG("vni", setvxlan_vni),
DEF_CLONE_CMD_ARG("local", setvxlan_local),
DEF_CLONE_CMD_ARG("remote", setvxlan_remote),
DEF_CLONE_CMD_ARG("group", setvxlan_group),
DEF_CLONE_CMD_ARG("localport", setvxlan_local_port),
DEF_CLONE_CMD_ARG("remoteport", setvxlan_remote_port),
DEF_CLONE_CMD_ARG2("portrange", setvxlan_port_range),
DEF_CLONE_CMD_ARG("timeout", setvxlan_timeout),
DEF_CLONE_CMD_ARG("maxaddr", setvxlan_maxaddr),
DEF_CLONE_CMD_ARG("vxlandev", setvxlan_dev),
DEF_CLONE_CMD_ARG("ttl", setvxlan_ttl),
DEF_CLONE_CMD("learn", 1, setvxlan_learn),
DEF_CLONE_CMD("-learn", 0, setvxlan_learn),
DEF_CMD_ARG("vni", setvxlan_vni),
DEF_CMD_ARG("local", setvxlan_local),
DEF_CMD_ARG("remote", setvxlan_remote),
DEF_CMD_ARG("group", setvxlan_group),
DEF_CMD_ARG("localport", setvxlan_local_port),
DEF_CMD_ARG("remoteport", setvxlan_remote_port),
DEF_CMD_ARG2("portrange", setvxlan_port_range),
DEF_CMD_ARG("timeout", setvxlan_timeout),
DEF_CMD_ARG("maxaddr", setvxlan_maxaddr),
DEF_CMD_ARG("vxlandev", setvxlan_dev),
DEF_CMD_ARG("ttl", setvxlan_ttl),
DEF_CMD("learn", 1, setvxlan_learn),
DEF_CMD("-learn", 0, setvxlan_learn),
DEF_CMD("flush", 0, setvxlan_flush),
DEF_CMD("flushall", 1, setvxlan_flush),
};
static struct afswtch af_vxlan = {
.af_name = "af_vxlan",
.af_af = AF_UNSPEC,
.af_other_status = vxlan_status,
};
static __constructor void
vxlan_ctor(void)
{
#define N(a) (sizeof(a) / sizeof(a[0]))
size_t i;
for (i = 0; i < N(vxlan_cmds); i++)
cmd_register(&vxlan_cmds[i]);
af_register(&af_vxlan);
callback_register(vxlan_cb, NULL);
clone_setdefcallback("vxlan", vxlan_create);
#undef N
}

View File

@ -567,6 +567,7 @@ MAN= aac.4 \
${_virtio_scsi.4} \
vkbd.4 \
vlan.4 \
vxlan.4 \
${_vmx.4} \
vpo.4 \
vr.4 \
@ -743,6 +744,7 @@ MLINKS+=urndis.4 if_urndis.4
MLINKS+=${_urtw.4} ${_if_urtw.4}
MLINKS+=vge.4 if_vge.4
MLINKS+=vlan.4 if_vlan.4
MLINKS+=vxlan.4 if_vxlan.4
MLINKS+=${_vmx.4} ${_if_vmx.4}
MLINKS+=vpo.4 imm.4
MLINKS+=vr.4 if_vr.4

235
share/man/man4/vxlan.4 Normal file
View File

@ -0,0 +1,235 @@
.\" Copyright (c) 2014 Bryan Venteicher
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd October 20, 2014
.Dt VXLAN 4
.Os
.Sh NAME
.Nm vxlan
.Nd "Virtual eXtensible LAN interface"
.Sh SYNOPSIS
To compile this driver into the kernel,
place the following line in your
kernel configuration file:
.Bd -ragged -offset indent
.Cd "device vxlan"
.Ed
.Pp
Alternatively, to load the driver as a
module at boot time, place the following line in
.Xr loader.conf 5 :
.Bd -literal -offset indent
if_vxlan_load="YES"
.Ed
.Sh DESCRIPTION
The
.Nm
driver creates a virtual tunnel endpoint in a
.Nm
segment.
A
.Nm
segment is a virtual Layer 2 (Ethernet) network that is overlaid
in a Layer 3 (IP/UDP) network.
.Nm
is analogous to
.Xr vlan 4
but is designed to be better suited for large, multiple tenant
data center environments.
.Pp
Each
.Nm
interface is created at runtime using interface cloning.
This is most easily done with the
.Xr ifconfig 8
.Cm create
command or using the
.Va cloned_interfaces
variable in
.Xr rc.conf 5 .
The interface may be removed with the
.Xr ifconfig 8
.Cm destroy
command.
.Pp
The
.Nm
driver creates a pseudo Ethernet network interface
that supports the usual network
.Xr ioctl 2 Ns s
and is thus can be used with
.Xr ifconfig 8
like any other Ethernet interface.
The
.Nm
interface encapsulates the Ethernet frame
by prepending IP/UDP and
.Nm
headers.
Thus, the encapsulated (inner) frame is able to transmitted
over a routed, Layer 3 network to the remote host.
.Pp
The
.Nm
interface may be configured in either unicast or multicast mode.
When in unicast mode,
the interface creates a tunnel to a single remote host,
and all traffic is transmitted to that host.
When in multicast mode,
the interface joins an IP multicast group,
and receives packets sent to the group address,
and transmits packets to either the multicast group address,
or directly the remote host if there is an appropriate
forwarding table entry.
.Pp
When the
.Nm
interface is brought up, a
.Xr UDP 4
.Xr socket 9
is created based on the configuration,
such as the local address for unicast mode or
the group address for multicast mode,
and the listening (local) port number.
Since multiple
.Nm
interfaces may be created that either
use the same local address
or join the same group address,
and use the same port,
the driver may share a socket among multiple interfaces.
However, each interface within a socket must belong to
a unique
.Nm
segment.
The analogous
.Xr vlan 4
configuration would be a physical interface configured as
the parent device for multiple VLAN interfaces, each with
a unique VLAN tag.
Each
.Nm
segment is identified by a 24-bit value in the
.Nm
header called the
.Dq VXLAN Network Identifier ,
or VNI.
.Pp
When configured with the
.Xr ifconfig 8
.Cm learn
parameter, the interface dynamically creates forwarding table entries
from received packets.
An entry in the forwarding table maps the inner source MAC address
to the outer remote IP address.
During transmit, the interface attempts to lookup an entry for
the encapsulated destination MAC address.
If an entry is found, the IP address in the entry is used to directly
transmit the encapsulated frame to the destination.
Otherwise, when configured in multicast mode,
the interface must flood the frame to all hosts in the group.
The maximum number of entries in the table is configurable with the
.Xr ifconfig 8
.Cm maxaddr
command.
Stale entries in the table periodically pruned.
The timeout is configurable with the
.Xr ifconfig 8
.Cm timeout
command.
The table may be viewed with the
.Xr sysctl 8
.Cm net.link.vlxan.N.ftable.dump
command.
.Sh MTU
Since the
.Nm
interface encapsulates the Ethernet frame with an IP, UDP, and
.Nm
header, the resulting frame may be larger than the MTU of the
physical network.
The
.Nm
specification recommends the physical network MTU be configured
to use jumbo frames to accommodate the encapsulated frame size.
Alternatively, the
.Xr ifconfig 8
.Cm mtu
command may be used to reduce the MTU size on the
.Nm
interface to allow the encapsulated frame to fit in the
current MTU of the physical network.
.Sh EXAMPLES
Create a
.Nm
interface in unicast mode
with the
.Cm local
tunnel address of 192.168.100.1,
and the
.Cm remote
tunnel address of 192.168.100.2.
.Bd -literal -offset indent
ifconfig vxlan create vni 108 local 192.168.100.1 remote 192.168.100.2
.Ed
.Pp
Create a
.Nm
interface in multicast mode,
with the
.Cm local
address of 192.168.10.95,
and the
.Cm group
address of 224.0.2.6.
The em0 interface will be used to transmit multicast packets.
.Bd -literal -offset indent
ifconfig vxlan create vni 42 local 192.168.10.95 group 224.0.2.6 vxlandev em0
.Ed
.Pp
Once created, the
.Nm
interface can be configured with
.Xr ifconfig 8 .
.Sh SEE ALSO
.Xr ifconfig 8 ,
.Xr inet 4 ,
.Xr inet 6 ,
.Xr sysctl 8 ,
.Xr vlan 8
.Rs
.%A "M. Mahalingam"
.%A "et al"
.%T "Virtual eXtensible Local Area Network (VXLAN): A Framework for Overlaying Virtualized Layer 2 Networks over Layer 3 Networks"
.%D August 2014
.%O "RFC 7348"
.Re
.Sh AUTHOR
.An -nosplit
The
.Nm
driver was written by
.An Bryan Venteicher Aq bryanv@freebsd.org .

View File

@ -795,6 +795,10 @@ device ether
# according to IEEE 802.1Q.
device vlan
# The `vxlan' device implements the VXLAN encapsulation of Ethernet
# frames in UDP packets according to RFC7348.
device vxlan
# The `wlan' device provides generic code to support 802.11
# drivers, including host AP mode; it is MANDATORY for the wi,
# and ath drivers and will eventually be required by all 802.11 drivers.

View File

@ -3246,6 +3246,7 @@ net/if_stf.c optional stf inet inet6
net/if_tun.c optional tun
net/if_tap.c optional tap
net/if_vlan.c optional vlan
net/if_vxlan.c optional vxlan inet | vxlan inet6
net/mppcc.c optional netgraph_mppc_compression
net/mppcd.c optional netgraph_mppc_compression
net/netisr.c standard

View File

@ -154,6 +154,7 @@ SUBDIR= \
if_tap \
if_tun \
if_vlan \
if_vxlan \
${_igb} \
${_iir} \
imgact_binmisc \

View File

@ -0,0 +1,9 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../net
KMOD= if_vxlan
SRCS= if_vxlan.c
SRCS+= opt_inet.h opt_inet6.h
.include <bsd.kmod.mk>

3089
sys/net/if_vxlan.c Normal file

File diff suppressed because it is too large Load Diff

148
sys/net/if_vxlan.h Normal file
View File

@ -0,0 +1,148 @@
/*-
* Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _NET_IF_VXLAN_H_
#define _NET_IF_VXLAN_H_
#include <sys/types.h>
#include <sys/socket.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <netinet/in.h>
struct vxlan_header {
uint32_t vxlh_flags;
uint32_t vxlh_vni;
};
#define VXLAN_HDR_FLAGS_VALID_VNI 0x08000000
#define VXLAN_HDR_VNI_SHIFT 8
#define VXLAN_VNI_MAX (1 << 24)
#define VXLAN_VNI_MASK (VXLAN_VNI_MAX - 1)
/*
* The port assigned by IANA is 4789, but some early implementations
* (like Linux) use 8472 instead. If not specified, we default to
* the IANA port.
*/
#define VXLAN_PORT 4789
#define VXLAN_LEGACY_PORT 8472
struct ifvxlanparam {
uint64_t vxlp_with;
#define VXLAN_PARAM_WITH_VNI 0x0001
#define VXLAN_PARAM_WITH_LOCAL_ADDR4 0x0002
#define VXLAN_PARAM_WITH_LOCAL_ADDR6 0x0004
#define VXLAN_PARAM_WITH_REMOTE_ADDR4 0x0008
#define VXLAN_PARAM_WITH_REMOTE_ADDR6 0x0010
#define VXLAN_PARAM_WITH_LOCAL_PORT 0x0020
#define VXLAN_PARAM_WITH_REMOTE_PORT 0x0040
#define VXLAN_PARAM_WITH_PORT_RANGE 0x0080
#define VXLAN_PARAM_WITH_FTABLE_TIMEOUT 0x0100
#define VXLAN_PARAM_WITH_FTABLE_MAX 0x0200
#define VXLAN_PARAM_WITH_MULTICAST_IF 0x0400
#define VXLAN_PARAM_WITH_TTL 0x0800
#define VXLAN_PARAM_WITH_LEARN 0x1000
uint32_t vxlp_vni;
struct in_addr vxlp_local_in4;
struct in6_addr vxlp_local_in6;
struct in_addr vxlp_remote_in4;
struct in6_addr vxlp_remote_in6;
uint16_t vxlp_local_port;
uint16_t vxlp_remote_port;
uint16_t vxlp_min_port;
uint16_t vxlp_max_port;
char vxlp_mc_ifname[IFNAMSIZ];
uint32_t vxlp_ftable_timeout;
uint32_t vxlp_ftable_max;
uint8_t vxlp_ttl;
uint8_t vxlp_learn;
};
union vxlan_sockaddr {
struct sockaddr sa;
struct sockaddr_in in4;
struct sockaddr_in6 in6;
};
#define VXLAN_SOCKADDR_IS_IPV4(_vxsin) ((_vxsin)->sa.sa_family == AF_INET)
#define VXLAN_SOCKADDR_IS_IPV6(_vxsin) ((_vxsin)->sa.sa_family == AF_INET6)
#define VXLAN_SOCKADDR_IS_IPV46(_vxsin) \
(VXLAN_SOCKADDR_IS_IPV4(_vxsin) || VXLAN_SOCKADDR_IS_IPV6(_vxsin))
#define VXLAN_CMD_GET_CONFIG 0
#define VXLAN_CMD_SET_VNI 1
#define VXLAN_CMD_SET_LOCAL_ADDR 2
#define VXLAN_CMD_SET_REMOTE_ADDR 4
#define VXLAN_CMD_SET_LOCAL_PORT 5
#define VXLAN_CMD_SET_REMOTE_PORT 6
#define VXLAN_CMD_SET_PORT_RANGE 7
#define VXLAN_CMD_SET_FTABLE_TIMEOUT 8
#define VXLAN_CMD_SET_FTABLE_MAX 9
#define VXLAN_CMD_SET_MULTICAST_IF 10
#define VXLAN_CMD_SET_TTL 11
#define VXLAN_CMD_SET_LEARN 12
#define VXLAN_CMD_FTABLE_ENTRY_ADD 13
#define VXLAN_CMD_FTABLE_ENTRY_REM 14
#define VXLAN_CMD_FLUSH 15
struct ifvxlancfg {
uint32_t vxlc_vni;
union vxlan_sockaddr vxlc_local_sa;
union vxlan_sockaddr vxlc_remote_sa;
uint32_t vxlc_mc_ifindex;
uint32_t vxlc_ftable_cnt;
uint32_t vxlc_ftable_max;
uint32_t vxlc_ftable_timeout;
uint16_t vxlc_port_min;
uint16_t vxlc_port_max;
uint8_t vxlc_learn;
uint8_t vxlc_ttl;
};
struct ifvxlancmd {
uint32_t vxlcmd_flags;
#define VXLAN_CMD_FLAG_FLUSH_ALL 0x0001
#define VXLAN_CMD_FLAG_LEARN 0x0002
uint32_t vxlcmd_vni;
uint32_t vxlcmd_ftable_timeout;
uint32_t vxlcmd_ftable_max;
uint16_t vxlcmd_port;
uint16_t vxlcmd_port_min;
uint16_t vxlcmd_port_max;
uint8_t vxlcmd_mac[ETHER_ADDR_LEN];
uint8_t vxlcmd_ttl;
union vxlan_sockaddr vxlcmd_sa;
char vxlcmd_ifname[IFNAMSIZ];
};
#endif /* _NET_IF_VXLAN_H_ */

View File

@ -339,6 +339,7 @@
#define PRIV_NET_SETIFVNET 417 /* Move interface to vnet. */
#define PRIV_NET_SETIFDESCR 418 /* Set interface description. */
#define PRIV_NET_SETIFFIB 419 /* Set interface fib. */
#define PRIV_NET_VXLAN 420 /* Administer vxlan. */
/*
* 802.11-related privileges.