freebsd-dev/sys/dev/netmap/netmap_vale.c

1617 lines
43 KiB
C
Raw Normal View History

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (C) 2013-2016 Universita` di Pisa
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if defined(__FreeBSD__)
#include <sys/cdefs.h> /* prerequisite */
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/conf.h> /* cdevsw struct, UID, GID */
#include <sys/sockio.h>
#include <sys/socketvar.h> /* struct socket */
#include <sys/malloc.h>
#include <sys/poll.h>
#include <sys/rwlock.h>
#include <sys/socket.h> /* sockaddrs */
#include <sys/selinfo.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/bpf.h> /* BIOCIMMEDIATE */
#include <machine/bus.h> /* bus_dmamap_* */
#include <sys/endian.h>
#include <sys/refcount.h>
#include <sys/smp.h>
#elif defined(linux)
#include "bsd_glue.h"
#elif defined(__APPLE__)
#warning OSX support is only partial
#include "osx_glue.h"
#elif defined(_WIN32)
#include "win_glue.h"
#else
#error Unsupported platform
#endif /* unsupported */
/*
* common headers
*/
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
#include <dev/netmap/netmap_bdg.h>
#ifdef WITH_VALE
/*
* system parameters (most of them in netmap_kern.h)
* NM_BDG_NAME prefix for switch port names, default "vale"
* NM_BDG_MAXPORTS number of ports
* NM_BRIDGES max number of switches in the system.
* XXX should become a sysctl or tunable
*
* Switch ports are named valeX:Y where X is the switch name and Y
* is the port. If Y matches a physical interface name, the port is
* connected to a physical device.
*
* Unlike physical interfaces, switch ports use their own memory region
* for rings and buffers.
* The virtual interfaces use per-queue lock instead of core lock.
* In the tx loop, we aggregate traffic in batches to make all operations
* faster. The batch size is bridge_batch.
*/
#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */
#define NM_BRIDGE_RINGSIZE 1024 /* in the device */
#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
/* actual size of the tables */
#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NETMAP_MAX_FRAGS)
/* NM_FT_NULL terminates a list of slots in the ft */
#define NM_FT_NULL NM_BDG_BATCH_MAX
/*
* bridge_batch is set via sysctl to the max batch size to be
* used in the bridge. The actual value may be larger as the
* last packet in the block may overflow the size.
*/
static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
SYSBEGIN(vars_vale);
SYSCTL_DECL(_dev_netmap);
SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0,
"Max batch size to be used in the bridge");
SYSEND;
static int netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
static int netmap_vale_vp_bdg_attach(const char *, struct netmap_adapter *,
struct nm_bridge *);
static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *);
/*
* For each output interface, nm_vale_q is used to construct a list.
* bq_len is the number of output buffers (we can have coalescing
* during the copy).
*/
struct nm_vale_q {
uint16_t bq_head;
uint16_t bq_tail;
uint32_t bq_len; /* number of buffers */
};
2018-04-12 07:20:50 +00:00
/* Holds the default callbacks */
struct netmap_bdg_ops vale_bdg_ops = {
.lookup = netmap_vale_learning,
.config = NULL,
.dtor = NULL,
.vp_create = netmap_vale_vp_create,
.bwrap_attach = netmap_vale_bwrap_attach,
.name = NM_BDG_NAME,
};
/*
* this is a slightly optimized copy routine which rounds
* to multiple of 64 bytes and is often faster than dealing
* with other odd sizes. We assume there is enough room
* in the source and destination buffers.
*
* XXX only for multiples of 64 bytes, non overlapped.
*/
static inline void
pkt_copy(void *_src, void *_dst, int l)
{
uint64_t *src = _src;
uint64_t *dst = _dst;
if (unlikely(l >= 1024)) {
memcpy(dst, src, l);
return;
}
for (; likely(l > 0); l-=64) {
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
/*
* Free the forwarding tables for rings attached to switch ports.
*/
static void
nm_free_bdgfwd(struct netmap_adapter *na)
{
int nrings, i;
2018-04-12 07:20:50 +00:00
struct netmap_kring **kring;
NMG_LOCK_ASSERT();
nrings = na->num_tx_rings;
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
2018-04-12 07:20:50 +00:00
if (kring[i]->nkr_ft) {
nm_os_free(kring[i]->nkr_ft);
kring[i]->nkr_ft = NULL; /* protect from freeing twice */
}
}
}
/*
* Allocate the forwarding tables for the rings attached to the bridge ports.
*/
static int
nm_alloc_bdgfwd(struct netmap_adapter *na)
{
int nrings, l, i, num_dstq;
2018-04-12 07:20:50 +00:00
struct netmap_kring **kring;
NMG_LOCK_ASSERT();
/* all port:rings + broadcast */
num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
l += sizeof(struct nm_vale_q) * num_dstq;
l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
2015-07-10 05:51:36 +00:00
nrings = netmap_real_rings(na, NR_TX);
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
struct nm_bdg_fwd *ft;
struct nm_vale_q *dstq;
int j;
ft = nm_os_malloc(l);
if (!ft) {
nm_free_bdgfwd(na);
return ENOMEM;
}
dstq = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
for (j = 0; j < num_dstq; j++) {
dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
dstq[j].bq_len = 0;
}
2018-04-12 07:20:50 +00:00
kring[i]->nkr_ft = ft;
}
return 0;
}
/* Allows external modules to create bridges in exclusive mode,
* returns an authentication token that the external module will need
* to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(),
* and nm_bdg_update_private_data() operations.
* Successfully executed if ret != NULL and *return_status == 0.
*/
void *
netmap_vale_create(const char *bdg_name, int *return_status)
2018-04-12 07:20:50 +00:00
{
struct nm_bridge *b = NULL;
void *ret = NULL;
NMG_LOCK();
b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
2018-04-12 07:20:50 +00:00
if (b) {
*return_status = EEXIST;
goto unlock_bdg_create;
}
b = nm_find_bridge(bdg_name, 1 /* create */, &vale_bdg_ops);
2018-04-12 07:20:50 +00:00
if (!b) {
*return_status = ENOMEM;
goto unlock_bdg_create;
}
b->bdg_flags |= NM_BDG_ACTIVE | NM_BDG_EXCLUSIVE;
ret = nm_bdg_get_auth_token(b);
*return_status = 0;
unlock_bdg_create:
NMG_UNLOCK();
return ret;
}
/* Allows external modules to destroy a bridge created through
* netmap_bdg_create(), the bridge must be empty.
*/
int
netmap_vale_destroy(const char *bdg_name, void *auth_token)
2018-04-12 07:20:50 +00:00
{
struct nm_bridge *b = NULL;
int ret = 0;
NMG_LOCK();
b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
2018-04-12 07:20:50 +00:00
if (!b) {
ret = ENXIO;
goto unlock_bdg_free;
}
if (!nm_bdg_valid_auth_token(b, auth_token)) {
ret = EACCES;
goto unlock_bdg_free;
}
if (!(b->bdg_flags & NM_BDG_EXCLUSIVE)) {
ret = EINVAL;
goto unlock_bdg_free;
}
2018-04-12 07:20:50 +00:00
b->bdg_flags &= ~(NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE);
ret = netmap_bdg_free(b);
if (ret) {
b->bdg_flags |= NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE;
}
unlock_bdg_free:
NMG_UNLOCK();
return ret;
}
/* Process NETMAP_REQ_VALE_LIST. */
int
netmap_vale_list(struct nmreq_header *hdr)
{
struct nmreq_vale_list *req =
(struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
int namelen = strlen(hdr->nr_name);
struct nm_bridge *b, *bridges;
struct netmap_vp_adapter *vpna;
int error = 0, i, j;
u_int num_bridges;
netmap_bns_getbridges(&bridges, &num_bridges);
/* this is used to enumerate bridges and ports */
if (namelen) { /* look up indexes of bridge and port */
if (strncmp(hdr->nr_name, NM_BDG_NAME,
strlen(NM_BDG_NAME))) {
return EINVAL;
}
NMG_LOCK();
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (!b) {
NMG_UNLOCK();
return ENOENT;
}
req->nr_bridge_idx = b - bridges; /* bridge index */
req->nr_port_idx = NM_BDG_NOPORT;
for (j = 0; j < b->bdg_active_ports; j++) {
i = b->bdg_port_index[j];
vpna = b->bdg_ports[i];
if (vpna == NULL) {
nm_prerr("This should not happen");
continue;
}
/* the former and the latter identify a
* virtual port and a NIC, respectively
*/
if (!strcmp(vpna->up.name, hdr->nr_name)) {
req->nr_port_idx = i; /* port index */
break;
}
}
NMG_UNLOCK();
} else {
/* return the first non-empty entry starting from
* bridge nr_arg1 and port nr_arg2.
*
* Users can detect the end of the same bridge by
* seeing the new and old value of nr_arg1, and can
* detect the end of all the bridge by error != 0
*/
i = req->nr_bridge_idx;
j = req->nr_port_idx;
NMG_LOCK();
for (error = ENOENT; i < NM_BRIDGES; i++) {
b = bridges + i;
for ( ; j < NM_BDG_MAXPORTS; j++) {
if (b->bdg_ports[j] == NULL)
continue;
vpna = b->bdg_ports[j];
/* write back the VALE switch name */
strlcpy(hdr->nr_name, vpna->up.name,
sizeof(hdr->nr_name));
error = 0;
goto out;
}
j = 0; /* following bridges scan from 0 */
}
out:
req->nr_bridge_idx = i;
req->nr_port_idx = j;
NMG_UNLOCK();
}
return error;
}
/* Process NETMAP_REQ_VALE_ATTACH.
*/
int
netmap_vale_attach(struct nmreq_header *hdr, void *auth_token)
{
struct nmreq_vale_attach *req =
(struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter * vpna;
struct netmap_adapter *na = NULL;
struct netmap_mem_d *nmd = NULL;
struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
/* permission check for modified bridges */
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_exit;
}
if (req->reg.nr_mem_id) {
nmd = netmap_mem_find(req->reg.nr_mem_id);
if (nmd == NULL) {
error = EINVAL;
goto unlock_exit;
}
}
/* check for existing one */
error = netmap_get_vale_na(hdr, &na, nmd, 0);
if (na) {
error = EBUSY;
goto unref_exit;
}
error = netmap_get_vale_na(hdr, &na,
nmd, 1 /* create if not exists */);
if (error) { /* no device */
goto unlock_exit;
}
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto unlock_exit;
}
if (NETMAP_OWNED_BY_ANY(na)) {
error = EBUSY;
goto unref_exit;
}
if (na->nm_bdg_ctl) {
/* nop for VALE ports. The bwrap needs to put the hwna
* in netmap mode (see netmap_bwrap_bdg_ctl)
*/
error = na->nm_bdg_ctl(hdr, na);
if (error)
goto unref_exit;
nm_prdis("registered %s to netmap-mode", na->name);
}
vpna = (struct netmap_vp_adapter *)na;
req->port_index = vpna->bdg_port;
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
return 0;
unref_exit:
netmap_adapter_put(na);
unlock_exit:
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
return error;
}
/* Process NETMAP_REQ_VALE_DETACH.
*/
int
netmap_vale_detach(struct nmreq_header *hdr, void *auth_token)
{
struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter *vpna;
struct netmap_adapter *na;
struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
/* permission check for modified bridges */
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_exit;
}
error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
if (error) { /* no device, or another bridge or user owns the device */
goto unlock_exit;
}
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto unlock_exit;
} else if (nm_is_bwrap(na) &&
((struct netmap_bwrap_adapter *)na)->na_polling_state) {
/* Don't detach a NIC with polling */
error = EBUSY;
goto unref_exit;
}
vpna = (struct netmap_vp_adapter *)na;
if (na->na_vp != vpna) {
/* trying to detach first attach of VALE persistent port attached
* to 2 bridges
*/
error = EBUSY;
goto unref_exit;
}
nmreq_det->port_index = vpna->bdg_port;
if (na->nm_bdg_ctl) {
/* remove the port from bridge. The bwrap
* also needs to put the hwna in normal mode
*/
error = na->nm_bdg_ctl(hdr, na);
}
unref_exit:
netmap_adapter_put(na);
unlock_exit:
NMG_UNLOCK();
return error;
}
2018-04-12 07:20:50 +00:00
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
/* nm_dtor callback for ephemeral VALE ports */
static void
netmap_vale_vp_dtor(struct netmap_adapter *na)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
struct nm_bridge *b = vpna->na_bdg;
nm_prdis("%s has %d references", na->name, na->na_refcount);
if (b) {
netmap_bdg_detach_common(b, vpna->bdg_port, -1);
}
if (na->ifp != NULL && !nm_iszombie(na)) {
NM_DETACH_NA(na->ifp);
if (vpna->autodelete) {
nm_prdis("releasing %s", na->ifp->if_xname);
NMG_UNLOCK();
nm_os_vi_detach(na->ifp);
NMG_LOCK();
}
}
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
}
/* nm_krings_create callback for VALE ports.
* Calls the standard netmap_krings_create, then adds leases on rx
* rings and bdgfwd on tx rings.
*/
static int
netmap_vale_vp_krings_create(struct netmap_adapter *na)
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
{
u_int tailroom;
int error, i;
uint32_t *leases;
u_int nrx = netmap_real_rings(na, NR_RX);
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
/*
* Leases are attached to RX rings on vale ports
*/
tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
error = netmap_krings_create(na, tailroom);
if (error)
return error;
leases = na->tailroom;
for (i = 0; i < nrx; i++) { /* Receive rings */
na->rx_rings[i]->nkr_leases = leases;
leases += na->num_rx_desc;
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
}
error = nm_alloc_bdgfwd(na);
if (error) {
netmap_krings_delete(na);
return error;
}
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
return 0;
}
/* nm_krings_delete callback for VALE ports. */
static void
netmap_vale_vp_krings_delete(struct netmap_adapter *na)
{
nm_free_bdgfwd(na);
netmap_krings_delete(na);
}
static int
nm_vale_flush(struct nm_bdg_fwd *ft, u_int n,
struct netmap_vp_adapter *na, u_int ring_nr);
/*
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
* main dispatch routine for the bridge.
* Grab packets from a kring, move them into the ft structure
* associated to the tx (input) port. Max one instance per port,
* filtered on input (ioctl, poll or XXX).
* Returns the next position in the ring.
*/
static int
nm_vale_preflush(struct netmap_kring *kring, u_int end)
{
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
struct netmap_vp_adapter *na =
(struct netmap_vp_adapter*)kring->na;
struct netmap_ring *ring = kring->ring;
struct nm_bdg_fwd *ft;
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
u_int ring_nr = kring->ring_id;
u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
u_int ft_i = 0; /* start from 0 */
u_int frags = 1; /* how many frags ? */
struct nm_bridge *b = na->na_bdg;
/* To protect against modifications to the bridge we acquire a
* shared lock, waiting if we can sleep (if the source port is
* attached to a user process) or with a trylock otherwise (NICs).
*/
nm_prdis("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
if (na->up.na_flags & NAF_BDG_MAYSLEEP)
BDG_RLOCK(b);
else if (!BDG_RTRYLOCK(b))
return j;
nm_prdis(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
ft = kring->nkr_ft;
for (; likely(j != end); j = nm_next(j, lim)) {
struct netmap_slot *slot = &ring->slot[j];
char *buf;
ft[ft_i].ft_len = slot->len;
ft[ft_i].ft_flags = slot->flags;
2018-04-12 07:20:50 +00:00
ft[ft_i].ft_offset = 0;
nm_prdis("flags is 0x%x", slot->flags);
2015-07-10 05:51:36 +00:00
/* we do not use the buf changed flag, but we still need to reset it */
slot->flags &= ~NS_BUF_CHANGED;
/* this slot goes into a list so initialize the link field */
ft[ft_i].ft_next = NM_FT_NULL;
buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
if (unlikely(buf == NULL)) {
nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d",
(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
kring->name, j, ft[ft_i].ft_len);
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
ft[ft_i].ft_len = 0;
ft[ft_i].ft_flags = 0;
}
__builtin_prefetch(buf);
++ft_i;
if (slot->flags & NS_MOREFRAG) {
frags++;
continue;
}
if (unlikely(netmap_verbose && frags > 1))
nm_prlim(5, "%d frags at %d", frags, ft_i - frags);
ft[ft_i - frags].ft_frags = frags;
frags = 1;
if (unlikely((int)ft_i >= bridge_batch))
ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
}
if (frags > 1) {
/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
* have to fix frags count. */
frags--;
ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
ft[ft_i - frags].ft_frags = frags;
nm_prlim(5, "Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
}
if (ft_i)
ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
BDG_RUNLOCK(b);
return j;
}
/* ----- FreeBSD if_bridge hash function ------- */
/*
* The following hash function is adapted from "Hash Functions" by Bob Jenkins
* ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
*
* http://www.burtleburtle.net/bob/hash/spooky.html
*/
#define mix(a, b, c) \
do { \
a -= b; a -= c; a ^= (c >> 13); \
b -= c; b -= a; b ^= (a << 8); \
c -= a; c -= b; c ^= (b >> 13); \
a -= b; a -= c; a ^= (c >> 12); \
b -= c; b -= a; b ^= (a << 16); \
c -= a; c -= b; c ^= (b >> 5); \
a -= b; a -= c; a ^= (c >> 3); \
b -= c; b -= a; b ^= (a << 10); \
c -= a; c -= b; c ^= (b >> 15); \
} while (/*CONSTCOND*/0)
static __inline uint32_t
nm_vale_rthash(const uint8_t *addr)
{
uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
b += addr[5] << 8;
b += addr[4];
a += addr[3] << 24;
a += addr[2] << 16;
a += addr[1] << 8;
a += addr[0];
mix(a, b, c);
#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1)
return (c & BRIDGE_RTHASH_MASK);
}
#undef mix
/*
* Lookup function for a learning bridge.
* Update the hash table with the source address,
* and then returns the destination port index, and the
* ring in *dst_ring (at the moment, always use ring 0)
*/
2018-04-12 07:20:50 +00:00
uint32_t
netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
2018-04-12 07:20:50 +00:00
struct netmap_vp_adapter *na, void *private_data)
{
2018-04-12 07:20:50 +00:00
uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset;
u_int buf_len = ft->ft_len - ft->ft_offset;
struct nm_hash_ent *ht = private_data;
uint32_t sh, dh;
u_int dst, mysrc = na->bdg_port;
uint64_t smac, dmac;
uint8_t indbuf[12];
2018-04-12 07:20:50 +00:00
if (buf_len < 14) {
return NM_BDG_NOPORT;
}
if (ft->ft_flags & NS_INDIRECT) {
if (copyin(buf, indbuf, sizeof(indbuf))) {
return NM_BDG_NOPORT;
}
buf = indbuf;
}
dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
smac = le64toh(*(uint64_t *)(buf + 4));
smac >>= 16;
/*
* The hash is somewhat expensive, there might be some
* worthwhile optimizations here.
*/
2015-07-10 05:51:36 +00:00
if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
uint8_t *s = buf+6;
sh = nm_vale_rthash(s); /* hash of source */
/* update source port forwarding entry */
2015-07-10 05:51:36 +00:00
na->last_smac = ht[sh].mac = smac; /* XXX expire ? */
ht[sh].ports = mysrc;
if (netmap_debug & NM_DEBUG_VALE)
nm_prinf("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
}
dst = NM_BDG_BROADCAST;
if ((buf[0] & 1) == 0) { /* unicast */
dh = nm_vale_rthash(buf); /* hash of dst */
if (ht[dh].mac == dmac) { /* found dst */
dst = ht[dh].ports;
}
}
return dst;
}
/*
* Available space in the ring. Only used in VALE code
* and only with is_rx = 1
*/
static inline uint32_t
nm_kr_space(struct netmap_kring *k, int is_rx)
{
int space;
if (is_rx) {
int busy = k->nkr_hwlease - k->nr_hwcur;
if (busy < 0)
busy += k->nkr_num_slots;
space = k->nkr_num_slots - 1 - busy;
} else {
/* XXX never used in this branch */
space = k->nr_hwtail - k->nkr_hwlease;
if (space < 0)
space += k->nkr_num_slots;
}
#if 0
// sanity check
if (k->nkr_hwlease >= k->nkr_num_slots ||
k->nr_hwcur >= k->nkr_num_slots ||
k->nr_tail >= k->nkr_num_slots ||
busy < 0 ||
busy >= k->nkr_num_slots) {
nm_prerr("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",
k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
k->nkr_lease_idx, k->nkr_num_slots);
}
#endif
return space;
}
/* make a lease on the kring for N positions. return the
* lease index
* XXX only used in VALE code and with is_rx = 1
*/
static inline uint32_t
nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
{
uint32_t lim = k->nkr_num_slots - 1;
uint32_t lease_idx = k->nkr_lease_idx;
k->nkr_leases[lease_idx] = NR_NOSLOT;
k->nkr_lease_idx = nm_next(lease_idx, lim);
#ifdef CONFIG_NETMAP_DEBUG
if (n > nm_kr_space(k, is_rx)) {
nm_prerr("invalid request for %d slots", n);
panic("x");
}
#endif /* CONFIG NETMAP_DEBUG */
/* XXX verify that there are n slots */
k->nkr_hwlease += n;
if (k->nkr_hwlease > lim)
k->nkr_hwlease -= lim + 1;
#ifdef CONFIG_NETMAP_DEBUG
if (k->nkr_hwlease >= k->nkr_num_slots ||
k->nr_hwcur >= k->nkr_num_slots ||
k->nr_hwtail >= k->nkr_num_slots ||
k->nkr_lease_idx >= k->nkr_num_slots) {
nm_prerr("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
k->na->name,
k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
k->nkr_lease_idx, k->nkr_num_slots);
}
#endif /* CONFIG_NETMAP_DEBUG */
return lease_idx;
}
/*
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
*
* This flush routine supports only unicast and broadcast but a large
* number of ports, and lets us replace the learn and dispatch functions.
*/
int
nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
u_int ring_nr)
{
struct nm_vale_q *dst_ents, *brddst;
uint16_t num_dsts = 0, *dsts;
struct nm_bridge *b = na->na_bdg;
u_int i, me = na->bdg_port;
/*
* The work area (pointed by ft) is followed by an array of
* pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
* queues per port plus one for the broadcast traffic.
* Then we have an array of destination indexes.
*/
dst_ents = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
/* first pass: find a destination for each packet in the batch */
for (i = 0; likely(i < n); i += ft[i].ft_frags) {
uint8_t dst_ring = ring_nr; /* default, same ring as origin */
uint16_t dst_port, d_i;
struct nm_vale_q *d;
2018-04-12 07:20:50 +00:00
struct nm_bdg_fwd *start_ft = NULL;
nm_prdis("slot %d frags %d", i, ft[i].ft_frags);
2018-04-12 07:20:50 +00:00
if (na->up.virt_hdr_len < ft[i].ft_len) {
ft[i].ft_offset = na->up.virt_hdr_len;
start_ft = &ft[i];
} else if (na->up.virt_hdr_len == ft[i].ft_len && ft[i].ft_flags & NS_MOREFRAG) {
ft[i].ft_offset = ft[i].ft_len;
start_ft = &ft[i+1];
} else {
/* Drop the packet if the virtio-net header is not into the first
* fragment nor at the very beginning of the second.
*/
continue;
2018-04-12 07:20:50 +00:00
}
dst_port = b->bdg_ops.lookup(start_ft, &dst_ring, na, b->private_data);
if (netmap_verbose > 255)
nm_prlim(5, "slot %d port %d -> %d", i, me, dst_port);
if (dst_port >= NM_BDG_NOPORT)
continue; /* this packet is identified to be dropped */
else if (dst_port == NM_BDG_BROADCAST)
dst_ring = 0; /* broadcasts always go to ring 0 */
else if (unlikely(dst_port == me ||
!b->bdg_ports[dst_port]))
continue;
/* get a position in the scratch pad */
d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
d = dst_ents + d_i;
/* append the first fragment to the list */
if (d->bq_head == NM_FT_NULL) { /* new destination */
d->bq_head = d->bq_tail = i;
/* remember this position to be scanned later */
if (dst_port != NM_BDG_BROADCAST)
dsts[num_dsts++] = d_i;
} else {
ft[d->bq_tail].ft_next = i;
d->bq_tail = i;
}
d->bq_len += ft[i].ft_frags;
}
/*
* Broadcast traffic goes to ring 0 on all destinations.
* So we need to add these rings to the list of ports to scan.
* XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
* expensive. We should keep a compact list of active destinations
* so we could shorten this loop.
*/
brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
if (brddst->bq_head != NM_FT_NULL) {
u_int j;
for (j = 0; likely(j < b->bdg_active_ports); j++) {
uint16_t d_i;
i = b->bdg_port_index[j];
if (unlikely(i == me))
continue;
d_i = i * NM_BDG_MAXRINGS;
if (dst_ents[d_i].bq_head == NM_FT_NULL)
dsts[num_dsts++] = d_i;
}
}
nm_prdis(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
/* second pass: scan destinations */
for (i = 0; i < num_dsts; i++) {
struct netmap_vp_adapter *dst_na;
struct netmap_kring *kring;
struct netmap_ring *ring;
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
u_int dst_nr, lim, j, d_i, next, brd_next;
u_int needed, howmany;
int retry = netmap_txsync_retry;
struct nm_vale_q *d;
uint32_t my_start = 0, lease_idx = 0;
int nrings;
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
int virt_hdr_mismatch = 0;
d_i = dsts[i];
nm_prdis("second pass %d port %d", i, d_i);
d = dst_ents + d_i;
// XXX fix the division
dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
/* protect from the lookup function returning an inactive
* destination port
*/
if (unlikely(dst_na == NULL))
goto cleanup;
if (dst_na->up.na_flags & NAF_SW_ONLY)
goto cleanup;
/*
* The interface may be in !netmap mode in two cases:
* - when na is attached but not activated yet;
* - when na is being deactivated but is still attached.
*/
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
if (unlikely(!nm_netmap_on(&dst_na->up))) {
nm_prdis("not in netmap mode!");
goto cleanup;
}
/* there is at least one either unicast or broadcast packet */
brd_next = brddst->bq_head;
next = d->bq_head;
/* we need to reserve this many slots. If fewer are
* available, some packets will be dropped.
* Packets may have multiple fragments, so we may not use
* there is a chance that we may not use all of the slots
* we have claimed, so we will need to handle the leftover
* ones when we regain the lock.
*/
needed = d->bq_len + brddst->bq_len;
if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
if (netmap_verbose) {
nm_prlim(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
dst_na->up.virt_hdr_len);
}
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
/* There is a virtio-net header/offloadings mismatch between
* source and destination. The slower mismatch datapath will
* be used to cope with all the mismatches.
*/
virt_hdr_mismatch = 1;
if (dst_na->mfs < na->mfs) {
/* We may need to do segmentation offloadings, and so
* we may need a number of destination slots greater
* than the number of input slots ('needed').
* We look for the smallest integer 'x' which satisfies:
* needed * na->mfs + x * H <= x * na->mfs
* where 'H' is the length of the longest header that may
* be replicated in the segmentation process (e.g. for
* TCPv4 we must account for ethernet header, IP header
* and TCPv4 header).
*/
KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0"));
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
needed = (needed * na->mfs) /
(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
nm_prdis(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
}
}
nm_prdis(5, "pass 2 dst %d is %x %s",
i, d_i, nm_is_bwrap(&dst_na->up) ? "nic/host" : "virtual");
dst_nr = d_i & (NM_BDG_MAXRINGS-1);
nrings = dst_na->up.num_rx_rings;
if (dst_nr >= nrings)
dst_nr = dst_nr % nrings;
2018-04-12 07:20:50 +00:00
kring = dst_na->up.rx_rings[dst_nr];
ring = kring->ring;
/* the destination ring may have not been opened for RX */
if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON))
goto cleanup;
lim = kring->nkr_num_slots - 1;
retry:
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
if (dst_na->retry && retry) {
/* try to get some free slot from the previous run */
kring->nm_notify(kring, NAF_FORCE_RECLAIM);
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
/* actually useful only for bwraps, since there
* the notify will trigger a txsync on the hwna. VALE ports
* have dst_na->retry == 0
*/
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
}
/* reserve the buffers in the queue and an entry
* to report completion, and drop lock.
* XXX this might become a helper function.
*/
mtx_lock(&kring->q_lock);
if (kring->nkr_stopped) {
mtx_unlock(&kring->q_lock);
goto cleanup;
}
my_start = j = kring->nkr_hwlease;
howmany = nm_kr_space(kring, 1);
if (needed < howmany)
howmany = needed;
lease_idx = nm_kr_lease(kring, howmany, 1);
mtx_unlock(&kring->q_lock);
/* only retry if we need more than available slots */
if (retry && needed <= howmany)
retry = 0;
/* copy to the destination queue */
while (howmany > 0) {
struct netmap_slot *slot;
struct nm_bdg_fwd *ft_p, *ft_end;
u_int cnt;
/* find the queue from which we pick next packet.
* NM_FT_NULL is always higher than valid indexes
* so we never dereference it if the other list
* has packets (and if both are empty we never
* get here).
*/
if (next < brd_next) {
ft_p = ft + next;
next = ft_p->ft_next;
} else { /* insert broadcast */
ft_p = ft + brd_next;
brd_next = ft_p->ft_next;
}
cnt = ft_p->ft_frags; // cnt > 0
if (unlikely(cnt > howmany))
break; /* no more space */
if (netmap_verbose && cnt > 1)
nm_prlim(5, "rx %d frags to %d", cnt, j);
ft_end = ft_p + cnt;
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
if (unlikely(virt_hdr_mismatch)) {
bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
} else {
howmany -= cnt;
do {
char *dst, *src = ft_p->ft_buf;
size_t copy_len = ft_p->ft_len, dst_len = copy_len;
slot = &ring->slot[j];
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
dst = NMB(&dst_na->up, slot);
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
nm_prdis("send [%d] %d(%d) bytes at %s:%d",
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
i, (int)copy_len, (int)dst_len,
dst_na->up.name, j);
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
/* round to a multiple of 64 */
copy_len = (copy_len + 63) & ~63;
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
copy_len > NETMAP_BUF_SIZE(&na->up))) {
nm_prlim(5, "invalid len %d, down to 64", (int)copy_len);
copy_len = dst_len = 64; // XXX
}
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
if (ft_p->ft_flags & NS_INDIRECT) {
if (copyin(src, dst, copy_len)) {
// invalid user pointer, pretend len is 0
dst_len = 0;
}
} else {
//memcpy(dst, src, copy_len);
pkt_copy(src, dst, (int)copy_len);
}
slot->len = dst_len;
slot->flags = (cnt << 8)| NS_MOREFRAG;
j = nm_next(j, lim);
needed--;
ft_p++;
} while (ft_p != ft_end);
slot->flags = (cnt << 8); /* clear flag on last entry */
}
/* are we done ? */
if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
break;
}
{
/* current position */
uint32_t *p = kring->nkr_leases; /* shorthand */
uint32_t update_pos;
int still_locked = 1;
mtx_lock(&kring->q_lock);
if (unlikely(howmany > 0)) {
/* not used all bufs. If i am the last one
* i can recover the slots, otherwise must
* fill them with 0 to mark empty packets.
*/
nm_prdis("leftover %d bufs", howmany);
if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
/* yes i am the last one */
nm_prdis("roll back nkr_hwlease to %d", j);
kring->nkr_hwlease = j;
} else {
while (howmany-- > 0) {
ring->slot[j].len = 0;
ring->slot[j].flags = 0;
j = nm_next(j, lim);
}
}
}
p[lease_idx] = j; /* report I am done */
update_pos = kring->nr_hwtail;
if (my_start == update_pos) {
/* all slots before my_start have been reported,
* so scan subsequent leases to see if other ranges
* have been completed, and to a selwakeup or txsync.
*/
while (lease_idx != kring->nkr_lease_idx &&
p[lease_idx] != NR_NOSLOT) {
j = p[lease_idx];
p[lease_idx] = NR_NOSLOT;
lease_idx = nm_next(lease_idx, lim);
}
/* j is the new 'write' position. j != my_start
* means there are new buffers to report
*/
if (likely(j != my_start)) {
kring->nr_hwtail = j;
still_locked = 0;
mtx_unlock(&kring->q_lock);
2015-07-10 05:51:36 +00:00
kring->nm_notify(kring, 0);
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
/* this is netmap_notify for VALE ports and
* netmap_bwrap_notify for bwrap. The latter will
* trigger a txsync on the underlying hwna
*/
if (dst_na->retry && retry--) {
/* XXX this is going to call nm_notify again.
* Only useful for bwrap in virtual machines
*/
goto retry;
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
}
}
}
if (still_locked)
mtx_unlock(&kring->q_lock);
}
cleanup:
d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
d->bq_len = 0;
}
brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
brddst->bq_len = 0;
return 0;
}
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
/* nm_txsync callback for VALE ports */
static int
netmap_vale_vp_txsync(struct netmap_kring *kring, int flags)
{
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
struct netmap_vp_adapter *na =
(struct netmap_vp_adapter *)kring->na;
u_int done;
u_int const lim = kring->nkr_num_slots - 1;
2015-07-10 05:51:36 +00:00
u_int const head = kring->rhead;
if (bridge_batch <= 0) { /* testing only */
2015-07-10 05:51:36 +00:00
done = head; // used all
goto done;
}
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
if (!na->na_bdg) {
2015-07-10 05:51:36 +00:00
done = head;
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
goto done;
}
if (bridge_batch > NM_BDG_BATCH)
bridge_batch = NM_BDG_BATCH;
done = nm_vale_preflush(kring, head);
done:
2015-07-10 05:51:36 +00:00
if (done != head)
nm_prerr("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
/*
* packets between 'done' and 'cur' are left unsent.
*/
kring->nr_hwcur = done;
kring->nr_hwtail = nm_prev(done, lim);
if (netmap_debug & NM_DEBUG_TXSYNC)
nm_prinf("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
return 0;
}
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
/* create a netmap_vp_adapter that describes a VALE port.
* Only persistent VALE ports have a non-null ifp.
*/
static int
netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
2018-04-12 07:20:50 +00:00
struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret)
{
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter *vpna;
struct netmap_adapter *na;
int error = 0;
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
u_int npipes = 0;
2018-04-12 07:20:50 +00:00
u_int extrabufs = 0;
if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
return EINVAL;
}
vpna = nm_os_malloc(sizeof(*vpna));
if (vpna == NULL)
return ENOMEM;
na = &vpna->up;
na->ifp = ifp;
strlcpy(na->name, hdr->nr_name, sizeof(na->name));
/* bound checking */
2018-04-12 07:20:50 +00:00
na->num_tx_rings = req->nr_tx_rings;
nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
2018-04-12 07:20:50 +00:00
req->nr_tx_rings = na->num_tx_rings; /* write back */
na->num_rx_rings = req->nr_rx_rings;
nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
2018-04-12 07:20:50 +00:00
req->nr_rx_rings = na->num_rx_rings; /* write back */
nm_bound_var(&req->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1, NM_BDG_MAXSLOTS, NULL);
2018-04-12 07:20:50 +00:00
na->num_tx_desc = req->nr_tx_slots;
nm_bound_var(&req->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1, NM_BDG_MAXSLOTS, NULL);
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
/* validate number of pipes. We want at least 1,
* but probably can do with some more.
* So let's use 2 as default (when 0 is supplied)
*/
nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
/* validate extra bufs */
extrabufs = req->nr_extra_bufs;
2018-04-12 07:20:50 +00:00
nm_bound_var(&extrabufs, 0, 0,
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
128*NM_BDG_MAXSLOTS, NULL);
2018-04-12 07:20:50 +00:00
req->nr_extra_bufs = extrabufs; /* write back */
na->num_rx_desc = req->nr_rx_slots;
/* Set the mfs to a default value, as it is needed on the VALE
* mismatch datapath. XXX We should set it according to the MTU
* known to the kernel. */
vpna->mfs = NM_BDG_MFS_DEFAULT;
2015-07-10 05:51:36 +00:00
vpna->last_smac = ~0llu;
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
/*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
vpna->mfs = netmap_buf_size; */
if (netmap_verbose)
nm_prinf("max frame size %u", vpna->mfs);
2015-07-10 05:51:36 +00:00
na->na_flags |= NAF_BDG_MAYSLEEP;
2015-07-19 18:06:30 +00:00
/* persistent VALE ports look like hw devices
* with a native netmap adapter
*/
if (ifp)
na->na_flags |= NAF_NATIVE;
na->nm_txsync = netmap_vale_vp_txsync;
na->nm_rxsync = netmap_vp_rxsync; /* use the one provided by bdg */
na->nm_register = netmap_vp_reg; /* use the one provided by bdg */
na->nm_krings_create = netmap_vale_vp_krings_create;
na->nm_krings_delete = netmap_vale_vp_krings_delete;
na->nm_dtor = netmap_vale_vp_dtor;
nm_prdis("nr_mem_id %d", req->nr_mem_id);
na->nm_mem = nmd ?
netmap_mem_get(nmd):
netmap_mem_private_new(
na->num_tx_rings, na->num_tx_desc,
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
na->num_rx_rings, na->num_rx_desc,
2018-04-12 07:20:50 +00:00
req->nr_extra_bufs, npipes, &error);
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
if (na->nm_mem == NULL)
goto err;
na->nm_bdg_attach = netmap_vale_vp_bdg_attach;
/* other nmd fields are set in the common routine */
error = netmap_attach_common(na);
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
if (error)
goto err;
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
*ret = vpna;
return 0;
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
err:
if (na->nm_mem != NULL)
netmap_mem_put(na->nm_mem);
nm_os_free(vpna);
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
return error;
}
/* nm_bdg_attach callback for VALE ports
* The na_vp port is this same netmap_adapter. There is no host port.
*/
static int
netmap_vale_vp_bdg_attach(const char *name, struct netmap_adapter *na,
struct nm_bridge *b)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
if ((b->bdg_flags & NM_BDG_NEED_BWRAP) || vpna->na_bdg) {
return NM_NEED_BWRAP;
}
na->na_vp = vpna;
strlcpy(na->name, name, sizeof(na->name));
na->na_hostvp = NULL;
return 0;
}
static int
netmap_vale_bwrap_krings_create(struct netmap_adapter *na)
{
int error;
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
/* impersonate a netmap_vp_adapter */
error = netmap_vale_vp_krings_create(na);
if (error)
return error;
error = netmap_bwrap_krings_create_common(na);
if (error) {
netmap_vale_vp_krings_delete(na);
}
return error;
}
static void
netmap_vale_bwrap_krings_delete(struct netmap_adapter *na)
{
netmap_bwrap_krings_delete_common(na);
netmap_vale_vp_krings_delete(na);
}
static int
netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
{
struct netmap_bwrap_adapter *bna;
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
struct netmap_adapter *na = NULL;
struct netmap_adapter *hostna = NULL;
int error;
bna = nm_os_malloc(sizeof(*bna));
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
if (bna == NULL) {
return ENOMEM;
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
}
na = &bna->up.up;
strlcpy(na->name, nr_name, sizeof(na->name));
na->nm_register = netmap_bwrap_reg;
na->nm_txsync = netmap_vale_vp_txsync;
// na->nm_rxsync = netmap_bwrap_rxsync;
na->nm_krings_create = netmap_vale_bwrap_krings_create;
na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
na->nm_notify = netmap_bwrap_notify;
bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
/* Set the mfs, needed on the VALE mismatch datapath. */
bna->up.mfs = NM_BDG_MFS_DEFAULT;
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
if (hwna->na_flags & NAF_HOST_RINGS) {
hostna = &bna->host.up;
2015-07-10 05:51:36 +00:00
hostna->nm_notify = netmap_bwrap_notify;
bna->host.mfs = NM_BDG_MFS_DEFAULT;
This new version of netmap brings you the following: - netmap pipes, providing bidirectional blocking I/O while moving 100+ Mpps between processes using shared memory channels (no mistake: over one hundred million. But mind you, i said *moving* not *processing*); - kqueue support (BHyVe needs it); - improved user library. Just the interface name lets you select a NIC, host port, VALE switch port, netmap pipe, and individual queues. The upcoming netmap-enabled libpcap will use this feature. - optional extra buffers associated to netmap ports, for applications that need to buffer data yet don't want to make copies. - segmentation offloading for the VALE switch, useful between VMs. and a number of bug fixes and performance improvements. My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial amount of work on these features so we owe them a big thanks. There are some external repositories that can be of interest: https://code.google.com/p/netmap our public repository for netmap/VALE code, including linux versions and other stuff that does not belong here, such as python bindings. https://code.google.com/p/netmap-libpcap a clone of the libpcap repository with netmap support. With this any libpcap client has access to most netmap feature with no recompilation. E.g. tcpdump can filter packets at 10-15 Mpps. https://code.google.com/p/netmap-ipfw a userspace version of ipfw+dummynet which uses netmap to send/receive packets. Speed is up in the 7-10 Mpps range per core for simple rulesets. Both netmap-libpcap and netmap-ipfw will be merged upstream at some point, but while this happens it is useful to have access to them. And yes, this code will be merged soon. It is infinitely better than the version currently in 10 and 9. MFC after: 3 days
2014-02-15 04:53:04 +00:00
}
error = netmap_bwrap_attach_common(na, hwna);
if (error) {
nm_os_free(bna);
}
Update to the current version of netmap. Mostly bugfixes or features developed in the past 6 months, so this is a 10.1 candidate. Basically no user API changes (some bugfixes in sys/net/netmap_user.h). In detail: 1. netmap support for virtio-net, including in netmap mode. Under bhyve and with a netmap backend [2] we reach over 1Mpps with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode. 2. (kernel) add support for multiple memory allocators, so we can better partition physical and virtual interfaces giving access to separate users. The most visible effect is one additional argument to the various kernel functions to compute buffer addresses. All netmap-supported drivers are affected, but changes are mechanical and trivial 3. (kernel) simplify the prototype for *txsync() and *rxsync() driver methods. All netmap drivers affected, changes mostly mechanical. 4. add support for netmap-monitor ports. Think of it as a mirroring port on a physical switch: a netmap monitor port replicates traffic present on the main port. Restrictions apply. Drive carefully. 5. if_lem.c: support for various paravirtualization features, experimental and disabled by default. Most of these are described in our ANCS'13 paper [1]. Paravirtualized support in netmap mode is new, and beats the numbers in the paper by a large factor (under qemu-kvm, we measured gues-host throughput up to 10-12 Mpps). A lot of refactoring and additional documentation in the files in sys/dev/netmap, but apart from #2 and #3 above, almost nothing of this stuff is visible to other kernel parts. Example programs in tools/tools/netmap have been updated with bugfixes and to support more of the existing features. This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline. A lot of this code has been contributed by my colleagues at UNIPI, including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella. MFC after: 3 days.
2014-08-16 15:00:01 +00:00
return error;
}
int
netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create)
2015-07-10 05:51:36 +00:00
{
return netmap_get_bdg_na(hdr, na, nmd, create, &vale_bdg_ops);
}
2015-07-10 05:51:36 +00:00
/* creates a persistent VALE port */
int
nm_vi_create(struct nmreq_header *hdr)
{
struct nmreq_vale_newif *req =
(struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body;
int error = 0;
/* Build a nmreq_register out of the nmreq_vale_newif,
* so that we can call netmap_get_bdg_na(). */
struct nmreq_register regreq;
bzero(&regreq, sizeof(regreq));
regreq.nr_tx_slots = req->nr_tx_slots;
regreq.nr_rx_slots = req->nr_rx_slots;
regreq.nr_tx_rings = req->nr_tx_rings;
regreq.nr_rx_rings = req->nr_rx_rings;
regreq.nr_mem_id = req->nr_mem_id;
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)&regreq;
error = netmap_vi_create(hdr, 0 /* no autodelete */);
hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
hdr->nr_body = (uintptr_t)req;
/* Write back to the original struct. */
req->nr_tx_slots = regreq.nr_tx_slots;
req->nr_rx_slots = regreq.nr_rx_slots;
req->nr_tx_rings = regreq.nr_tx_rings;
req->nr_rx_rings = regreq.nr_rx_rings;
req->nr_mem_id = regreq.nr_mem_id;
return error;
2015-07-10 05:51:36 +00:00
}
/* remove a persistent VALE port from the system */
int
nm_vi_destroy(const char *name)
{
struct ifnet *ifp;
struct netmap_vp_adapter *vpna;
int error;
2015-07-10 05:51:36 +00:00
ifp = ifunit_ref(name);
if (!ifp)
return ENXIO;
NMG_LOCK();
/* make sure this is actually a VALE port */
if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
error = EINVAL;
goto err;
}
vpna = (struct netmap_vp_adapter *)NA(ifp);
/* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
if (vpna->autodelete) {
error = EINVAL;
goto err;
}
/* also make sure that nobody is using the inferface */
if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
error = EBUSY;
goto err;
}
NMG_UNLOCK();
2015-07-10 05:51:36 +00:00
if (netmap_verbose)
nm_prinf("destroying a persistent vale interface %s", ifp->if_xname);
/* Linux requires all the references are released
* before unregister
*/
netmap_detach(ifp);
if_rele(ifp);
nm_os_vi_detach(ifp);
return 0;
err:
NMG_UNLOCK();
if_rele(ifp);
return error;
2015-07-10 05:51:36 +00:00
}
static int
nm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
2015-07-10 05:51:36 +00:00
{
req->nr_rx_rings = na->num_rx_rings;
req->nr_tx_rings = na->num_tx_rings;
req->nr_rx_slots = na->num_rx_desc;
req->nr_tx_slots = na->num_tx_desc;
return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
&req->nr_mem_id);
2015-07-10 05:51:36 +00:00
}
/*
* Create a virtual interface registered to the system.
* The interface will be attached to a bridge later.
*/
int
netmap_vi_create(struct nmreq_header *hdr, int autodelete)
2015-07-10 05:51:36 +00:00
{
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
struct ifnet *ifp;
struct netmap_vp_adapter *vpna;
struct netmap_mem_d *nmd = NULL;
int error;
if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
return EINVAL;
}
/* don't include VALE prefix */
if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
return EINVAL;
if (strlen(hdr->nr_name) >= IFNAMSIZ) {
return EINVAL;
}
ifp = ifunit_ref(hdr->nr_name);
if (ifp) { /* already exist, cannot create new one */
error = EEXIST;
NMG_LOCK();
if (NM_NA_VALID(ifp)) {
int update_err = nm_update_info(req, NA(ifp));
if (update_err)
error = update_err;
}
NMG_UNLOCK();
if_rele(ifp);
return error;
}
error = nm_os_vi_persist(hdr->nr_name, &ifp);
if (error)
return error;
NMG_LOCK();
if (req->nr_mem_id) {
nmd = netmap_mem_find(req->nr_mem_id);
if (nmd == NULL) {
error = EINVAL;
goto err_1;
}
}
/* netmap_vp_create creates a struct netmap_vp_adapter */
error = netmap_vale_vp_create(hdr, ifp, nmd, &vpna);
if (error) {
if (netmap_debug & NM_DEBUG_VALE)
nm_prerr("error %d", error);
goto err_1;
}
/* persist-specific routines */
vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
if (!autodelete) {
netmap_adapter_get(&vpna->up);
} else {
vpna->autodelete = 1;
}
NM_ATTACH_NA(ifp, &vpna->up);
/* return the updated info */
error = nm_update_info(req, &vpna->up);
if (error) {
goto err_2;
}
nm_prdis("returning nr_mem_id %d", req->nr_mem_id);
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
nm_prdis("created %s", ifp->if_xname);
return 0;
err_2:
netmap_detach(ifp);
err_1:
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
nm_os_vi_detach(ifp);
return error;
}
#endif /* WITH_VALE */