Bring in a number of new features, mostly implemented by Michio Honda:
- the VALE switch now support up to 254 destinations per switch, unicast or broadcast (multicast goes to all ports). - we can attach hw interfaces and the host stack to a VALE switch, which means we will be able to use it more or less as a native bridge (minor tweaks still necessary). A 'vale-ctl' program is supplied in tools/tools/netmap to attach/detach ports the switch, and list current configuration. - the lookup function in the VALE switch can be reassigned to something else, similar to the pf hooks. This will enable attaching the firewall, or other processing functions (e.g. in-kernel openvswitch) directly on the netmap port. The internal API used by device drivers does not change. Userspace applications should be recompiled because we bump NETMAP_API as we now use some fields in the struct nmreq that were previously ignored -- otherwise, data structures are the same. Manpages will be committed separately.
This commit is contained in:
parent
27892e02fb
commit
f18be5766f
File diff suppressed because it is too large
Load Diff
@ -39,6 +39,7 @@
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
#define NM_LOCK_T struct mtx
|
||||
#define NM_RWLOCK_T struct rwlock
|
||||
#define NM_SELINFO_T struct selinfo
|
||||
#define MBUF_LEN(m) ((m)->m_pkthdr.len)
|
||||
#define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m)
|
||||
@ -46,6 +47,7 @@
|
||||
#elif defined (linux)
|
||||
|
||||
#define NM_LOCK_T safe_spinlock_t // see bsd_glue.h
|
||||
#define NM_RWLOCK_T safe_spinlock_t // see bsd_glue.h
|
||||
#define NM_SELINFO_T wait_queue_head_t
|
||||
#define MBUF_LEN(m) ((m)->len)
|
||||
#define NM_SEND_UP(ifp, m) netif_rx(m)
|
||||
@ -63,7 +65,7 @@
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
|
||||
#define IFCAP_NETMAP 0x8000
|
||||
#else
|
||||
#define IFCAP_NETMAP 0x100000
|
||||
#define IFCAP_NETMAP 0x200000
|
||||
#endif
|
||||
|
||||
#elif defined (__APPLE__)
|
||||
@ -105,6 +107,9 @@
|
||||
} while (0)
|
||||
|
||||
struct netmap_adapter;
|
||||
struct nm_bdg_fwd;
|
||||
struct nm_bridge;
|
||||
struct netmap_priv_d;
|
||||
|
||||
/*
|
||||
* private, kernel view of a ring. Keeps track of the status of
|
||||
@ -138,6 +143,7 @@ struct netmap_kring {
|
||||
uint16_t nkr_slot_flags; /* initial value for flags */
|
||||
int nkr_hwofs; /* offset between NIC and netmap ring */
|
||||
struct netmap_adapter *na;
|
||||
struct nm_bdg_fwd *nkr_ft;
|
||||
NM_SELINFO_T si; /* poll/select wait queue */
|
||||
NM_LOCK_T q_lock; /* used if no device lock available */
|
||||
} __attribute__((__aligned__(64)));
|
||||
@ -160,6 +166,7 @@ struct netmap_adapter {
|
||||
#define NAF_SKIP_INTR 1 /* use the regular interrupt handler.
|
||||
* useful during initialization
|
||||
*/
|
||||
#define NAF_SW_ONLY 2 /* forward packets only to sw adapter */
|
||||
int refcount; /* number of user-space descriptors using this
|
||||
interface, which is equal to the number of
|
||||
struct netmap_if objs in the mapped region. */
|
||||
@ -218,10 +225,17 @@ struct netmap_adapter {
|
||||
* when it goes to 0 we can detach+free this port
|
||||
* (a bridge port is always attached if it exists;
|
||||
* it is not always registered)
|
||||
* na_bdg points to the bridge this NA is attached to.
|
||||
*/
|
||||
int bdg_port;
|
||||
int na_bdg_refcount;
|
||||
|
||||
struct nm_bridge *na_bdg;
|
||||
/* When we attach a physical interface to the bridge, we
|
||||
* allow the controlling process to terminate, so we need
|
||||
* a place to store the netmap_priv_d data structure.
|
||||
* This is only done when physical interfaces are attached to a bridge.
|
||||
*/
|
||||
struct netmap_priv_d *na_kpriv;
|
||||
#ifdef linux
|
||||
struct net_device_ops nm_ndo;
|
||||
#endif /* linux */
|
||||
@ -288,6 +302,22 @@ struct netmap_slot *netmap_reset(struct netmap_adapter *na,
|
||||
enum txrx tx, int n, u_int new_cur);
|
||||
int netmap_ring_reinit(struct netmap_kring *);
|
||||
|
||||
/*
|
||||
* The following bridge-related interfaces are used by other kernel modules
|
||||
* In the version that only supports unicast or broadcast, the lookup
|
||||
* function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
|
||||
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
|
||||
* XXX in practice "unknown" might be handled same as broadcast.
|
||||
*/
|
||||
typedef u_int (*bdg_lookup_fn_t)(char *buf, u_int len, uint8_t *ring_nr,
|
||||
struct netmap_adapter *);
|
||||
int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func);
|
||||
u_int netmap_bdg_learning(char *, u_int, uint8_t *, struct netmap_adapter *);
|
||||
#define NM_NAME "vale" /* prefix for the bridge port name */
|
||||
#define NM_BDG_MAXPORTS 254 /* up to 32 for bitmap, 254 ok otherwise */
|
||||
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
|
||||
#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
|
||||
|
||||
extern u_int netmap_buf_size;
|
||||
#define NETMAP_BUF_SIZE netmap_buf_size // XXX remove
|
||||
extern int netmap_mitigate;
|
||||
@ -309,11 +339,15 @@ enum { /* verbose flags */
|
||||
/*
|
||||
* NA returns a pointer to the struct netmap adapter from the ifp,
|
||||
* WNA is used to write it.
|
||||
* SWNA() is used for the "host stack" endpoint associated
|
||||
* to an interface. It is allocated together with the main NA(),
|
||||
* as an array of two objects.
|
||||
*/
|
||||
#ifndef WNA
|
||||
#define WNA(_ifp) (_ifp)->if_pspare[0]
|
||||
#endif
|
||||
#define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp))
|
||||
#define SWNA(_ifp) (NA(_ifp) + 1)
|
||||
|
||||
/*
|
||||
* Macros to determine if an interface is netmap capable or netmap enabled.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
|
||||
* Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
@ -127,8 +127,15 @@
|
||||
* transparent mode, buffers released with the flag set
|
||||
* will be forwarded to the 'other' side (host stack
|
||||
* or NIC, respectively) on the next select() or ioctl()
|
||||
*
|
||||
* The following will be supported from NETMAP_API = 5
|
||||
* NS_NO_LEARN on a VALE switch, do not 'learn' the source port for
|
||||
* this packet.
|
||||
* NS_INDIRECT the netmap buffer contains a 64-bit pointer to
|
||||
* the actual userspace buffer. This may be useful
|
||||
* to reduce copies in a VM environment.
|
||||
* NS_MOREFRAG Part of a multi-segment frame. The last (or only)
|
||||
* segment must not have this flag.
|
||||
* NS_PORT_MASK the high 8 bits of the flag, if not zero, indicate the
|
||||
* destination port for the VALE switch, overriding
|
||||
* the lookup table.
|
||||
@ -146,6 +153,8 @@ struct netmap_slot {
|
||||
* (host stack or device)
|
||||
*/
|
||||
#define NS_NO_LEARN 0x0008
|
||||
#define NS_INDIRECT 0x0010
|
||||
#define NS_MOREFRAG 0x0020
|
||||
#define NS_PORT_SHIFT 8
|
||||
#define NS_PORT_MASK (0xff << NS_PORT_SHIFT)
|
||||
};
|
||||
@ -277,10 +286,24 @@ struct netmap_if {
|
||||
* NIOCREGIF takes an interface name within a struct ifreq,
|
||||
* and activates netmap mode on the interface (if possible).
|
||||
*
|
||||
* For vale ports, starting with NETMAP_API = 5,
|
||||
* nr_tx_rings and nr_rx_rings specify how many software rings
|
||||
* are created (0 means 1).
|
||||
*
|
||||
* NIOCREGIF is also used to attach a NIC to a VALE switch.
|
||||
* In this case the name is vale*:ifname, and "nr_cmd"
|
||||
* is set to 'NETMAP_BDG_ATTACH' or 'NETMAP_BDG_DETACH'.
|
||||
* nr_ringid specifies which rings should be attached, 0 means all,
|
||||
* NETMAP_HW_RING + n means only the n-th ring.
|
||||
* The process can terminate after the interface has been attached.
|
||||
*
|
||||
* NIOCUNREGIF unregisters the interface associated to the fd.
|
||||
* this is deprecated and will go away.
|
||||
*
|
||||
* NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
|
||||
* whose identity is set in NIOCREGIF through nr_ringid
|
||||
*
|
||||
* NETMAP_API is the API version.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -289,7 +312,7 @@ struct netmap_if {
|
||||
struct nmreq {
|
||||
char nr_name[IFNAMSIZ];
|
||||
uint32_t nr_version; /* API version */
|
||||
#define NETMAP_API 3 /* current version */
|
||||
#define NETMAP_API 4 /* current version */
|
||||
uint32_t nr_offset; /* nifp offset in the shared region */
|
||||
uint32_t nr_memsize; /* size of the shared region */
|
||||
uint32_t nr_tx_slots; /* slots in tx rings */
|
||||
@ -301,8 +324,15 @@ struct nmreq {
|
||||
#define NETMAP_SW_RING 0x2000 /* process the sw ring */
|
||||
#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
|
||||
#define NETMAP_RING_MASK 0xfff /* the ring number */
|
||||
uint16_t spare1;
|
||||
uint32_t spare2[4];
|
||||
uint16_t nr_cmd;
|
||||
#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
|
||||
#define NETMAP_BDG_DETACH 2 /* detach the NIC */
|
||||
#define NETMAP_BDG_LOOKUP_REG 3 /* register lookup function */
|
||||
#define NETMAP_BDG_LIST 4 /* get bridge's info */
|
||||
uint16_t nr_arg1;
|
||||
#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
|
||||
uint16_t nr_arg2;
|
||||
uint32_t spare2[3];
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -3,7 +3,7 @@
|
||||
#
|
||||
# For multiple programs using a single source file each,
|
||||
# we can just define 'progs' and create custom targets.
|
||||
PROGS = pkt-gen bridge testpcap libnetmap.so
|
||||
PROGS = pkt-gen bridge vale-ctl testpcap libnetmap.so
|
||||
|
||||
CLEANFILES = $(PROGS) pcap.o nm_util.o
|
||||
NO_MAN=
|
||||
|
163
tools/tools/netmap/vale-ctl.c
Normal file
163
tools/tools/netmap/vale-ctl.c
Normal file
@ -0,0 +1,163 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Michio Honda. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* $FreeBSD$ */
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h> /* PRI* macros */
|
||||
#include <string.h> /* strcmp */
|
||||
#include <fcntl.h> /* open */
|
||||
#include <unistd.h> /* close */
|
||||
#include <sys/ioctl.h> /* ioctl */
|
||||
#include <sys/param.h>
|
||||
#include <net/if.h> /* ifreq */
|
||||
#include <net/netmap.h>
|
||||
#include <net/netmap_user.h>
|
||||
#include <libgen.h> /* basename */
|
||||
|
||||
/* debug support */
|
||||
#define ND(format, ...) do {} while(0)
|
||||
#define D(format, ...) \
|
||||
fprintf(stderr, "%s [%d] " format "\n", \
|
||||
__FUNCTION__, __LINE__, ##__VA_ARGS__)
|
||||
|
||||
static int
|
||||
bdg_ctl(const char *name, int nr_cmd, int nr_arg)
|
||||
{
|
||||
struct nmreq nmr;
|
||||
int error = 0;
|
||||
int fd = open("/dev/netmap", O_RDWR);
|
||||
|
||||
if (fd == -1) {
|
||||
D("Unable to open /dev/netmap");
|
||||
return -1;
|
||||
}
|
||||
|
||||
bzero(&nmr, sizeof(nmr));
|
||||
nmr.nr_version = NETMAP_API;
|
||||
if (name != NULL) /* might be NULL */
|
||||
strncpy(nmr.nr_name, name, sizeof(nmr.nr_name));
|
||||
nmr.nr_cmd = nr_cmd;
|
||||
|
||||
switch (nr_cmd) {
|
||||
case NETMAP_BDG_ATTACH:
|
||||
case NETMAP_BDG_DETACH:
|
||||
if (nr_arg && nr_arg != NETMAP_BDG_HOST)
|
||||
nr_arg = 0;
|
||||
nmr.nr_arg1 = nr_arg;
|
||||
error = ioctl(fd, NIOCREGIF, &nmr);
|
||||
if (error == -1)
|
||||
D("Unable to %s %s to the bridge", nr_cmd ==
|
||||
NETMAP_BDG_DETACH?"detach":"attach", name);
|
||||
else
|
||||
D("Success to %s %s to the bridge\n", nr_cmd ==
|
||||
NETMAP_BDG_DETACH?"detach":"attach", name);
|
||||
break;
|
||||
|
||||
case NETMAP_BDG_LIST:
|
||||
if (strlen(nmr.nr_name)) { /* name to bridge/port info */
|
||||
error = ioctl(fd, NIOCGINFO, &nmr);
|
||||
if (error)
|
||||
D("Unable to obtain info for %s", name);
|
||||
else
|
||||
D("%s at bridge:%d port:%d", name, nmr.nr_arg1,
|
||||
nmr.nr_arg2);
|
||||
break;
|
||||
}
|
||||
|
||||
/* scan all the bridges and ports */
|
||||
nmr.nr_arg1 = nmr.nr_arg2 = 0;
|
||||
for (; !ioctl(fd, NIOCGINFO, &nmr); nmr.nr_arg2++) {
|
||||
D("bridge:%d port:%d %s", nmr.nr_arg1, nmr.nr_arg2,
|
||||
nmr.nr_name);
|
||||
nmr.nr_name[0] = '\0';
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default: /* GINFO */
|
||||
nmr.nr_cmd = nmr.nr_arg1 = nmr.nr_arg2 = 0;
|
||||
error = ioctl(fd, NIOCGINFO, &nmr);
|
||||
if (error)
|
||||
D("Unable to get if info for %s", name);
|
||||
else
|
||||
D("%s: %d queues.", name, nmr.nr_rx_rings);
|
||||
break;
|
||||
}
|
||||
close(fd);
|
||||
return error;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int ch, nr_cmd = 0, nr_arg = 0;
|
||||
const char *command = basename(argv[0]);
|
||||
char *name = NULL;
|
||||
|
||||
if (argc != 3 && argc != 1 /* list all */ ) {
|
||||
usage:
|
||||
fprintf(stderr,
|
||||
"Usage:\n"
|
||||
"%s arguments\n"
|
||||
"\t-g interface interface name to get info\n"
|
||||
"\t-d interface interface name to be detached\n"
|
||||
"\t-a interface interface name to be attached\n"
|
||||
"\t-h interface interface name to be attached with the host stack\n"
|
||||
"\t-l list all or specified bridge's interfaces\n"
|
||||
"", command);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((ch = getopt(argc, argv, "d:a:h:g:l:")) != -1) {
|
||||
switch (ch) {
|
||||
default:
|
||||
fprintf(stderr, "bad option %c %s", ch, optarg);
|
||||
goto usage;
|
||||
case 'd':
|
||||
nr_cmd = NETMAP_BDG_DETACH;
|
||||
break;
|
||||
case 'a':
|
||||
nr_cmd = NETMAP_BDG_ATTACH;
|
||||
break;
|
||||
case 'h':
|
||||
nr_cmd = NETMAP_BDG_ATTACH;
|
||||
nr_arg = NETMAP_BDG_HOST;
|
||||
break;
|
||||
case 'g':
|
||||
nr_cmd = 0;
|
||||
break;
|
||||
case 'l':
|
||||
nr_cmd = NETMAP_BDG_LIST;
|
||||
break;
|
||||
}
|
||||
name = optarg;
|
||||
}
|
||||
if (argc == 1)
|
||||
nr_cmd = NETMAP_BDG_LIST;
|
||||
bdg_ctl(name, nr_cmd, nr_arg);
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user