Bring in a number of new features, mostly implemented by Michio Honda:

- the VALE switch now support up to 254 destinations per switch,
  unicast or broadcast (multicast goes to all ports).

- we can attach hw interfaces and the host stack to a VALE switch,
  which means we will be able to use it more or less as a native bridge
  (minor tweaks still necessary).
  A 'vale-ctl' program is supplied in tools/tools/netmap
  to attach/detach ports the switch, and list current configuration.

- the lookup function in the VALE switch can be reassigned to
  something else, similar to the pf hooks. This will enable
  attaching the firewall, or other processing functions (e.g. in-kernel
  openvswitch) directly on the netmap port.

The internal API used by device drivers does not change.

Userspace applications should be recompiled because we
bump NETMAP_API as we now use some fields in the struct nmreq
that were previously ignored -- otherwise, data structures
are the same.

Manpages will be committed separately.
This commit is contained in:
Luigi Rizzo 2013-05-30 14:07:14 +00:00
parent 27892e02fb
commit f18be5766f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=251139
5 changed files with 1289 additions and 317 deletions

File diff suppressed because it is too large Load Diff

View File

@ -39,6 +39,7 @@
#define unlikely(x) __builtin_expect(!!(x), 0)
#define NM_LOCK_T struct mtx
#define NM_RWLOCK_T struct rwlock
#define NM_SELINFO_T struct selinfo
#define MBUF_LEN(m) ((m)->m_pkthdr.len)
#define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m)
@ -46,6 +47,7 @@
#elif defined (linux)
#define NM_LOCK_T safe_spinlock_t // see bsd_glue.h
#define NM_RWLOCK_T safe_spinlock_t // see bsd_glue.h
#define NM_SELINFO_T wait_queue_head_t
#define MBUF_LEN(m) ((m)->len)
#define NM_SEND_UP(ifp, m) netif_rx(m)
@ -63,7 +65,7 @@
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
#define IFCAP_NETMAP 0x8000
#else
#define IFCAP_NETMAP 0x100000
#define IFCAP_NETMAP 0x200000
#endif
#elif defined (__APPLE__)
@ -105,6 +107,9 @@
} while (0)
struct netmap_adapter;
struct nm_bdg_fwd;
struct nm_bridge;
struct netmap_priv_d;
/*
* private, kernel view of a ring. Keeps track of the status of
@ -138,6 +143,7 @@ struct netmap_kring {
uint16_t nkr_slot_flags; /* initial value for flags */
int nkr_hwofs; /* offset between NIC and netmap ring */
struct netmap_adapter *na;
struct nm_bdg_fwd *nkr_ft;
NM_SELINFO_T si; /* poll/select wait queue */
NM_LOCK_T q_lock; /* used if no device lock available */
} __attribute__((__aligned__(64)));
@ -160,6 +166,7 @@ struct netmap_adapter {
#define NAF_SKIP_INTR 1 /* use the regular interrupt handler.
* useful during initialization
*/
#define NAF_SW_ONLY 2 /* forward packets only to sw adapter */
int refcount; /* number of user-space descriptors using this
interface, which is equal to the number of
struct netmap_if objs in the mapped region. */
@ -218,10 +225,17 @@ struct netmap_adapter {
* when it goes to 0 we can detach+free this port
* (a bridge port is always attached if it exists;
* it is not always registered)
* na_bdg points to the bridge this NA is attached to.
*/
int bdg_port;
int na_bdg_refcount;
struct nm_bridge *na_bdg;
/* When we attach a physical interface to the bridge, we
* allow the controlling process to terminate, so we need
* a place to store the netmap_priv_d data structure.
* This is only done when physical interfaces are attached to a bridge.
*/
struct netmap_priv_d *na_kpriv;
#ifdef linux
struct net_device_ops nm_ndo;
#endif /* linux */
@ -288,6 +302,22 @@ struct netmap_slot *netmap_reset(struct netmap_adapter *na,
enum txrx tx, int n, u_int new_cur);
int netmap_ring_reinit(struct netmap_kring *);
/*
* The following bridge-related interfaces are used by other kernel modules
* In the version that only supports unicast or broadcast, the lookup
* function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
* XXX in practice "unknown" might be handled same as broadcast.
*/
typedef u_int (*bdg_lookup_fn_t)(char *buf, u_int len, uint8_t *ring_nr,
struct netmap_adapter *);
int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func);
u_int netmap_bdg_learning(char *, u_int, uint8_t *, struct netmap_adapter *);
#define NM_NAME "vale" /* prefix for the bridge port name */
#define NM_BDG_MAXPORTS 254 /* up to 32 for bitmap, 254 ok otherwise */
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
extern u_int netmap_buf_size;
#define NETMAP_BUF_SIZE netmap_buf_size // XXX remove
extern int netmap_mitigate;
@ -309,11 +339,15 @@ enum { /* verbose flags */
/*
* NA returns a pointer to the struct netmap adapter from the ifp,
* WNA is used to write it.
* SWNA() is used for the "host stack" endpoint associated
* to an interface. It is allocated together with the main NA(),
* as an array of two objects.
*/
#ifndef WNA
#define WNA(_ifp) (_ifp)->if_pspare[0]
#endif
#define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp))
#define SWNA(_ifp) (NA(_ifp) + 1)
/*
* Macros to determine if an interface is netmap capable or netmap enabled.

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
* Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@ -127,8 +127,15 @@
* transparent mode, buffers released with the flag set
* will be forwarded to the 'other' side (host stack
* or NIC, respectively) on the next select() or ioctl()
*
* The following will be supported from NETMAP_API = 5
* NS_NO_LEARN on a VALE switch, do not 'learn' the source port for
* this packet.
* NS_INDIRECT the netmap buffer contains a 64-bit pointer to
* the actual userspace buffer. This may be useful
* to reduce copies in a VM environment.
* NS_MOREFRAG Part of a multi-segment frame. The last (or only)
* segment must not have this flag.
* NS_PORT_MASK the high 8 bits of the flag, if not zero, indicate the
* destination port for the VALE switch, overriding
* the lookup table.
@ -146,6 +153,8 @@ struct netmap_slot {
* (host stack or device)
*/
#define NS_NO_LEARN 0x0008
#define NS_INDIRECT 0x0010
#define NS_MOREFRAG 0x0020
#define NS_PORT_SHIFT 8
#define NS_PORT_MASK (0xff << NS_PORT_SHIFT)
};
@ -277,10 +286,24 @@ struct netmap_if {
* NIOCREGIF takes an interface name within a struct ifreq,
* and activates netmap mode on the interface (if possible).
*
* For vale ports, starting with NETMAP_API = 5,
* nr_tx_rings and nr_rx_rings specify how many software rings
* are created (0 means 1).
*
* NIOCREGIF is also used to attach a NIC to a VALE switch.
* In this case the name is vale*:ifname, and "nr_cmd"
* is set to 'NETMAP_BDG_ATTACH' or 'NETMAP_BDG_DETACH'.
* nr_ringid specifies which rings should be attached, 0 means all,
* NETMAP_HW_RING + n means only the n-th ring.
* The process can terminate after the interface has been attached.
*
* NIOCUNREGIF unregisters the interface associated to the fd.
* this is deprecated and will go away.
*
* NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
* whose identity is set in NIOCREGIF through nr_ringid
*
* NETMAP_API is the API version.
*/
/*
@ -289,7 +312,7 @@ struct netmap_if {
struct nmreq {
char nr_name[IFNAMSIZ];
uint32_t nr_version; /* API version */
#define NETMAP_API 3 /* current version */
#define NETMAP_API 4 /* current version */
uint32_t nr_offset; /* nifp offset in the shared region */
uint32_t nr_memsize; /* size of the shared region */
uint32_t nr_tx_slots; /* slots in tx rings */
@ -301,8 +324,15 @@ struct nmreq {
#define NETMAP_SW_RING 0x2000 /* process the sw ring */
#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
#define NETMAP_RING_MASK 0xfff /* the ring number */
uint16_t spare1;
uint32_t spare2[4];
uint16_t nr_cmd;
#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
#define NETMAP_BDG_DETACH 2 /* detach the NIC */
#define NETMAP_BDG_LOOKUP_REG 3 /* register lookup function */
#define NETMAP_BDG_LIST 4 /* get bridge's info */
uint16_t nr_arg1;
#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
uint16_t nr_arg2;
uint32_t spare2[3];
};
/*

View File

@ -3,7 +3,7 @@
#
# For multiple programs using a single source file each,
# we can just define 'progs' and create custom targets.
PROGS = pkt-gen bridge testpcap libnetmap.so
PROGS = pkt-gen bridge vale-ctl testpcap libnetmap.so
CLEANFILES = $(PROGS) pcap.o nm_util.o
NO_MAN=

View File

@ -0,0 +1,163 @@
/*
* Copyright (C) 2013 Michio Honda. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* $FreeBSD$ */
#include <errno.h>
#include <stdio.h>
#include <inttypes.h> /* PRI* macros */
#include <string.h> /* strcmp */
#include <fcntl.h> /* open */
#include <unistd.h> /* close */
#include <sys/ioctl.h> /* ioctl */
#include <sys/param.h>
#include <net/if.h> /* ifreq */
#include <net/netmap.h>
#include <net/netmap_user.h>
#include <libgen.h> /* basename */
/* debug support */
#define ND(format, ...) do {} while(0)
#define D(format, ...) \
fprintf(stderr, "%s [%d] " format "\n", \
__FUNCTION__, __LINE__, ##__VA_ARGS__)
static int
bdg_ctl(const char *name, int nr_cmd, int nr_arg)
{
struct nmreq nmr;
int error = 0;
int fd = open("/dev/netmap", O_RDWR);
if (fd == -1) {
D("Unable to open /dev/netmap");
return -1;
}
bzero(&nmr, sizeof(nmr));
nmr.nr_version = NETMAP_API;
if (name != NULL) /* might be NULL */
strncpy(nmr.nr_name, name, sizeof(nmr.nr_name));
nmr.nr_cmd = nr_cmd;
switch (nr_cmd) {
case NETMAP_BDG_ATTACH:
case NETMAP_BDG_DETACH:
if (nr_arg && nr_arg != NETMAP_BDG_HOST)
nr_arg = 0;
nmr.nr_arg1 = nr_arg;
error = ioctl(fd, NIOCREGIF, &nmr);
if (error == -1)
D("Unable to %s %s to the bridge", nr_cmd ==
NETMAP_BDG_DETACH?"detach":"attach", name);
else
D("Success to %s %s to the bridge\n", nr_cmd ==
NETMAP_BDG_DETACH?"detach":"attach", name);
break;
case NETMAP_BDG_LIST:
if (strlen(nmr.nr_name)) { /* name to bridge/port info */
error = ioctl(fd, NIOCGINFO, &nmr);
if (error)
D("Unable to obtain info for %s", name);
else
D("%s at bridge:%d port:%d", name, nmr.nr_arg1,
nmr.nr_arg2);
break;
}
/* scan all the bridges and ports */
nmr.nr_arg1 = nmr.nr_arg2 = 0;
for (; !ioctl(fd, NIOCGINFO, &nmr); nmr.nr_arg2++) {
D("bridge:%d port:%d %s", nmr.nr_arg1, nmr.nr_arg2,
nmr.nr_name);
nmr.nr_name[0] = '\0';
}
break;
default: /* GINFO */
nmr.nr_cmd = nmr.nr_arg1 = nmr.nr_arg2 = 0;
error = ioctl(fd, NIOCGINFO, &nmr);
if (error)
D("Unable to get if info for %s", name);
else
D("%s: %d queues.", name, nmr.nr_rx_rings);
break;
}
close(fd);
return error;
}
int
main(int argc, char *argv[])
{
int ch, nr_cmd = 0, nr_arg = 0;
const char *command = basename(argv[0]);
char *name = NULL;
if (argc != 3 && argc != 1 /* list all */ ) {
usage:
fprintf(stderr,
"Usage:\n"
"%s arguments\n"
"\t-g interface interface name to get info\n"
"\t-d interface interface name to be detached\n"
"\t-a interface interface name to be attached\n"
"\t-h interface interface name to be attached with the host stack\n"
"\t-l list all or specified bridge's interfaces\n"
"", command);
return 0;
}
while ((ch = getopt(argc, argv, "d:a:h:g:l:")) != -1) {
switch (ch) {
default:
fprintf(stderr, "bad option %c %s", ch, optarg);
goto usage;
case 'd':
nr_cmd = NETMAP_BDG_DETACH;
break;
case 'a':
nr_cmd = NETMAP_BDG_ATTACH;
break;
case 'h':
nr_cmd = NETMAP_BDG_ATTACH;
nr_arg = NETMAP_BDG_HOST;
break;
case 'g':
nr_cmd = 0;
break;
case 'l':
nr_cmd = NETMAP_BDG_LIST;
break;
}
name = optarg;
}
if (argc == 1)
nr_cmd = NETMAP_BDG_LIST;
bdg_ctl(name, nr_cmd, nr_arg);
return 0;
}