2017-11-27 14:52:40 +00:00
|
|
|
/*-
|
|
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
|
|
*
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
* Copyright (C) 2013-2016 Universita` di Pisa
|
|
|
|
* All rights reserved.
|
2013-12-15 08:37:24 +00:00
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#if defined(__FreeBSD__)
|
|
|
|
#include <sys/cdefs.h> /* prerequisite */
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/errno.h>
|
|
|
|
#include <sys/param.h> /* defines used in kernel.h */
|
|
|
|
#include <sys/kernel.h> /* types used in module initialization */
|
|
|
|
#include <sys/conf.h> /* cdevsw struct, UID, GID */
|
|
|
|
#include <sys/sockio.h>
|
|
|
|
#include <sys/socketvar.h> /* struct socket */
|
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/poll.h>
|
|
|
|
#include <sys/rwlock.h>
|
|
|
|
#include <sys/socket.h> /* sockaddrs */
|
|
|
|
#include <sys/selinfo.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <net/if.h>
|
|
|
|
#include <net/if_var.h>
|
|
|
|
#include <net/bpf.h> /* BIOCIMMEDIATE */
|
|
|
|
#include <machine/bus.h> /* bus_dmamap_* */
|
|
|
|
#include <sys/endian.h>
|
|
|
|
#include <sys/refcount.h>
|
2018-10-23 08:55:16 +00:00
|
|
|
#include <sys/smp.h>
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
|
|
|
|
#elif defined(linux)
|
|
|
|
|
|
|
|
#include "bsd_glue.h"
|
|
|
|
|
|
|
|
#elif defined(__APPLE__)
|
|
|
|
|
|
|
|
#warning OSX support is only partial
|
|
|
|
#include "osx_glue.h"
|
|
|
|
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
#elif defined(_WIN32)
|
|
|
|
#include "win_glue.h"
|
|
|
|
|
2013-12-15 08:37:24 +00:00
|
|
|
#else
|
|
|
|
|
|
|
|
#error Unsupported platform
|
|
|
|
|
|
|
|
#endif /* unsupported */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* common headers
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <net/netmap.h>
|
|
|
|
#include <dev/netmap/netmap_kern.h>
|
|
|
|
#include <dev/netmap/netmap_mem2.h>
|
2018-10-23 08:55:16 +00:00
|
|
|
#include <dev/netmap/netmap_bdg.h>
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
#ifdef WITH_VALE
|
|
|
|
|
|
|
|
/*
|
|
|
|
* system parameters (most of them in netmap_kern.h)
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
* NM_BDG_NAME prefix for switch port names, default "vale"
|
2013-12-15 08:37:24 +00:00
|
|
|
* NM_BDG_MAXPORTS number of ports
|
|
|
|
* NM_BRIDGES max number of switches in the system.
|
|
|
|
* XXX should become a sysctl or tunable
|
|
|
|
*
|
|
|
|
* Switch ports are named valeX:Y where X is the switch name and Y
|
|
|
|
* is the port. If Y matches a physical interface name, the port is
|
|
|
|
* connected to a physical device.
|
|
|
|
*
|
|
|
|
* Unlike physical interfaces, switch ports use their own memory region
|
|
|
|
* for rings and buffers.
|
|
|
|
* The virtual interfaces use per-queue lock instead of core lock.
|
|
|
|
* In the tx loop, we aggregate traffic in batches to make all operations
|
|
|
|
* faster. The batch size is bridge_batch.
|
|
|
|
*/
|
|
|
|
#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
|
|
|
|
#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */
|
|
|
|
#define NM_BRIDGE_RINGSIZE 1024 /* in the device */
|
|
|
|
#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
|
|
|
|
/* actual size of the tables */
|
2018-10-23 08:55:16 +00:00
|
|
|
#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NETMAP_MAX_FRAGS)
|
2013-12-15 08:37:24 +00:00
|
|
|
/* NM_FT_NULL terminates a list of slots in the ft */
|
|
|
|
#define NM_FT_NULL NM_BDG_BATCH_MAX
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bridge_batch is set via sysctl to the max batch size to be
|
|
|
|
* used in the bridge. The actual value may be larger as the
|
|
|
|
* last packet in the block may overflow the size.
|
|
|
|
*/
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
|
|
|
|
SYSBEGIN(vars_vale);
|
2013-12-15 08:37:24 +00:00
|
|
|
SYSCTL_DECL(_dev_netmap);
|
2018-04-09 09:24:26 +00:00
|
|
|
SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0,
|
2018-05-18 03:38:17 +00:00
|
|
|
"Max batch size to be used in the bridge");
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
SYSEND;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *,
|
2017-06-12 22:53:18 +00:00
|
|
|
struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
|
2018-10-23 08:55:16 +00:00
|
|
|
static int netmap_vp_bdg_attach(const char *, struct netmap_adapter *,
|
|
|
|
struct nm_bridge *);
|
|
|
|
static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *);
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* For each output interface, nm_bdg_q is used to construct a list.
|
|
|
|
* bq_len is the number of output buffers (we can have coalescing
|
|
|
|
* during the copy).
|
|
|
|
*/
|
|
|
|
struct nm_bdg_q {
|
|
|
|
uint16_t bq_head;
|
|
|
|
uint16_t bq_tail;
|
|
|
|
uint32_t bq_len; /* number of buffers */
|
|
|
|
};
|
|
|
|
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
/* Holds the default callbacks */
|
2018-10-23 08:55:16 +00:00
|
|
|
struct netmap_bdg_ops vale_bdg_ops = {
|
|
|
|
.lookup = netmap_bdg_learning,
|
|
|
|
.config = NULL,
|
|
|
|
.dtor = NULL,
|
|
|
|
.vp_create = netmap_vp_create,
|
|
|
|
.bwrap_attach = netmap_vale_bwrap_attach,
|
|
|
|
.name = NM_BDG_NAME,
|
2013-12-15 08:37:24 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* this is a slightly optimized copy routine which rounds
|
|
|
|
* to multiple of 64 bytes and is often faster than dealing
|
|
|
|
* with other odd sizes. We assume there is enough room
|
|
|
|
* in the source and destination buffers.
|
|
|
|
*
|
|
|
|
* XXX only for multiples of 64 bytes, non overlapped.
|
|
|
|
*/
|
|
|
|
static inline void
|
|
|
|
pkt_copy(void *_src, void *_dst, int l)
|
|
|
|
{
|
2018-05-18 03:38:17 +00:00
|
|
|
uint64_t *src = _src;
|
|
|
|
uint64_t *dst = _dst;
|
|
|
|
if (unlikely(l >= 1024)) {
|
|
|
|
memcpy(dst, src, l);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
for (; likely(l > 0); l-=64) {
|
|
|
|
*dst++ = *src++;
|
|
|
|
*dst++ = *src++;
|
|
|
|
*dst++ = *src++;
|
|
|
|
*dst++ = *src++;
|
|
|
|
*dst++ = *src++;
|
|
|
|
*dst++ = *src++;
|
|
|
|
*dst++ = *src++;
|
|
|
|
*dst++ = *src++;
|
|
|
|
}
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free the forwarding tables for rings attached to switch ports.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
nm_free_bdgfwd(struct netmap_adapter *na)
|
|
|
|
{
|
|
|
|
int nrings, i;
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
struct netmap_kring **kring;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
NMG_LOCK_ASSERT();
|
It is 2014 and we have a new version of netmap.
Most relevant features:
- netmap emulation on any NIC, even those without native netmap support.
On the ixgbe we have measured about 4Mpps/core/queue in this mode,
which is still a lot more than with sockets/bpf.
- seamless interconnection of VALE switch, NICs and host stack.
If you disable accelerations on your NIC (say em0)
ifconfig em0 -txcsum -txcsum
you can use the VALE switch to connect the NIC and the host stack:
vale-ctl -h valeXX:em0
allowing sharing the NIC with other netmap clients.
- THE USER API HAS SLIGHTLY CHANGED (head/cur/tail pointers
instead of pointers/count as before). This was unavoidable to support,
in the future, multiple threads operating on the same rings.
Netmap clients require very small source code changes to compile again.
On the plus side, the new API should be easier to understand
and the internals are a lot simpler.
The manual page has been updated extensively to reflect the current
features and give some examples.
This is the result of work of several people including Giuseppe Lettieri,
Vincenzo Maffione, Michio Honda and myself, and has been financially
supported by EU projects CHANGE and OPENLAB, from NetApp University
Research Fund, NEC, and of course the Universita` di Pisa.
2014-01-06 12:53:15 +00:00
|
|
|
nrings = na->num_tx_rings;
|
|
|
|
kring = na->tx_rings;
|
2013-12-15 08:37:24 +00:00
|
|
|
for (i = 0; i < nrings; i++) {
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
if (kring[i]->nkr_ft) {
|
|
|
|
nm_os_free(kring[i]->nkr_ft);
|
|
|
|
kring[i]->nkr_ft = NULL; /* protect from freeing twice */
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate the forwarding tables for the rings attached to the bridge ports.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
nm_alloc_bdgfwd(struct netmap_adapter *na)
|
|
|
|
{
|
|
|
|
int nrings, l, i, num_dstq;
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
struct netmap_kring **kring;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
NMG_LOCK_ASSERT();
|
|
|
|
/* all port:rings + broadcast */
|
|
|
|
num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
|
|
|
|
l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
|
|
|
|
l += sizeof(struct nm_bdg_q) * num_dstq;
|
|
|
|
l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
|
|
|
|
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
nrings = netmap_real_rings(na, NR_TX);
|
2013-12-15 08:37:24 +00:00
|
|
|
kring = na->tx_rings;
|
|
|
|
for (i = 0; i < nrings; i++) {
|
|
|
|
struct nm_bdg_fwd *ft;
|
|
|
|
struct nm_bdg_q *dstq;
|
|
|
|
int j;
|
|
|
|
|
2017-06-12 22:53:18 +00:00
|
|
|
ft = nm_os_malloc(l);
|
2013-12-15 08:37:24 +00:00
|
|
|
if (!ft) {
|
|
|
|
nm_free_bdgfwd(na);
|
|
|
|
return ENOMEM;
|
|
|
|
}
|
|
|
|
dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
|
|
|
|
for (j = 0; j < num_dstq; j++) {
|
|
|
|
dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
|
|
|
|
dstq[j].bq_len = 0;
|
|
|
|
}
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
kring[i]->nkr_ft = ft;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allows external modules to create bridges in exclusive mode,
|
|
|
|
* returns an authentication token that the external module will need
|
|
|
|
* to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(),
|
|
|
|
* and nm_bdg_update_private_data() operations.
|
|
|
|
* Successfully executed if ret != NULL and *return_status == 0.
|
|
|
|
*/
|
|
|
|
void *
|
2018-10-23 08:55:16 +00:00
|
|
|
netmap_vale_create(const char *bdg_name, int *return_status)
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
{
|
|
|
|
struct nm_bridge *b = NULL;
|
|
|
|
void *ret = NULL;
|
|
|
|
|
|
|
|
NMG_LOCK();
|
2018-10-23 08:55:16 +00:00
|
|
|
b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
if (b) {
|
|
|
|
*return_status = EEXIST;
|
|
|
|
goto unlock_bdg_create;
|
|
|
|
}
|
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
b = nm_find_bridge(bdg_name, 1 /* create */, &vale_bdg_ops);
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
if (!b) {
|
|
|
|
*return_status = ENOMEM;
|
|
|
|
goto unlock_bdg_create;
|
|
|
|
}
|
|
|
|
|
|
|
|
b->bdg_flags |= NM_BDG_ACTIVE | NM_BDG_EXCLUSIVE;
|
|
|
|
ret = nm_bdg_get_auth_token(b);
|
|
|
|
*return_status = 0;
|
|
|
|
|
|
|
|
unlock_bdg_create:
|
|
|
|
NMG_UNLOCK();
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Allows external modules to destroy a bridge created through
|
|
|
|
* netmap_bdg_create(), the bridge must be empty.
|
|
|
|
*/
|
|
|
|
int
|
2018-10-23 08:55:16 +00:00
|
|
|
netmap_vale_destroy(const char *bdg_name, void *auth_token)
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
{
|
|
|
|
struct nm_bridge *b = NULL;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
NMG_LOCK();
|
2018-10-23 08:55:16 +00:00
|
|
|
b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
if (!b) {
|
|
|
|
ret = ENXIO;
|
|
|
|
goto unlock_bdg_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!nm_bdg_valid_auth_token(b, auth_token)) {
|
|
|
|
ret = EACCES;
|
|
|
|
goto unlock_bdg_free;
|
|
|
|
}
|
|
|
|
if (!(b->bdg_flags & NM_BDG_EXCLUSIVE)) {
|
|
|
|
ret = EINVAL;
|
|
|
|
goto unlock_bdg_free;
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
|
|
|
|
b->bdg_flags &= ~(NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE);
|
|
|
|
ret = netmap_bdg_free(b);
|
|
|
|
if (ret) {
|
|
|
|
b->bdg_flags |= NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE;
|
|
|
|
}
|
|
|
|
|
|
|
|
unlock_bdg_free:
|
|
|
|
NMG_UNLOCK();
|
|
|
|
return ret;
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
|
|
|
|
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
/* nm_dtor callback for ephemeral VALE ports */
|
2013-12-15 08:37:24 +00:00
|
|
|
static void
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
netmap_vp_dtor(struct netmap_adapter *na)
|
2013-12-15 08:37:24 +00:00
|
|
|
{
|
|
|
|
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
|
|
|
|
struct nm_bridge *b = vpna->na_bdg;
|
|
|
|
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
ND("%s has %d references", na->name, na->na_refcount);
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
if (b) {
|
|
|
|
netmap_bdg_detach_common(b, vpna->bdg_port, -1);
|
|
|
|
}
|
2017-06-12 22:53:18 +00:00
|
|
|
|
2018-04-09 09:24:26 +00:00
|
|
|
if (na->ifp != NULL && !nm_iszombie(na)) {
|
2018-10-23 08:55:16 +00:00
|
|
|
NM_DETACH_NA(na->ifp);
|
2018-04-09 09:24:26 +00:00
|
|
|
if (vpna->autodelete) {
|
|
|
|
ND("releasing %s", na->ifp->if_xname);
|
|
|
|
NMG_UNLOCK();
|
|
|
|
nm_os_vi_detach(na->ifp);
|
|
|
|
NMG_LOCK();
|
|
|
|
}
|
2017-06-12 22:53:18 +00:00
|
|
|
}
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
}
|
2013-12-15 08:37:24 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
|
|
|
|
/* Called by external kernel modules (e.g., Openvswitch).
|
|
|
|
* to modify the private data previously given to regops().
|
|
|
|
* 'name' may be just bridge's name (including ':' if it
|
|
|
|
* is not just NM_BDG_NAME).
|
|
|
|
* Called without NMG_LOCK.
|
|
|
|
*/
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
int
|
2018-10-23 08:55:16 +00:00
|
|
|
nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
|
|
|
|
void *callback_data, void *auth_token)
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
{
|
2018-10-23 08:55:16 +00:00
|
|
|
void *private_data = NULL;
|
|
|
|
struct nm_bridge *b;
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
int error = 0;
|
2018-10-23 08:55:16 +00:00
|
|
|
|
|
|
|
NMG_LOCK();
|
|
|
|
b = nm_find_bridge(name, 0 /* don't create */, NULL);
|
|
|
|
if (!b) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto unlock_update_priv;
|
|
|
|
}
|
|
|
|
if (!nm_bdg_valid_auth_token(b, auth_token)) {
|
|
|
|
error = EACCES;
|
|
|
|
goto unlock_update_priv;
|
|
|
|
}
|
|
|
|
BDG_WLOCK(b);
|
|
|
|
private_data = callback(b->private_data, callback_data, &error);
|
|
|
|
b->private_data = private_data;
|
|
|
|
BDG_WUNLOCK(b);
|
|
|
|
|
|
|
|
unlock_update_priv:
|
|
|
|
NMG_UNLOCK();
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
|
|
|
|
/* nm_krings_create callback for VALE ports.
|
|
|
|
* Calls the standard netmap_krings_create, then adds leases on rx
|
|
|
|
* rings and bdgfwd on tx rings.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
netmap_vp_krings_create(struct netmap_adapter *na)
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
{
|
2018-10-23 08:55:16 +00:00
|
|
|
u_int tailroom;
|
|
|
|
int error, i;
|
|
|
|
uint32_t *leases;
|
|
|
|
u_int nrx = netmap_real_rings(na, NR_RX);
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
/*
|
|
|
|
* Leases are attached to RX rings on vale ports
|
|
|
|
*/
|
|
|
|
tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
error = netmap_krings_create(na, tailroom);
|
|
|
|
if (error)
|
|
|
|
return error;
|
2017-06-12 22:53:18 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
leases = na->tailroom;
|
2017-06-12 22:53:18 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
for (i = 0; i < nrx; i++) { /* Receive rings */
|
|
|
|
na->rx_rings[i]->nkr_leases = leases;
|
|
|
|
leases += na->num_rx_desc;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
}
|
2017-06-12 22:53:18 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
error = nm_alloc_bdgfwd(na);
|
|
|
|
if (error) {
|
|
|
|
netmap_krings_delete(na);
|
|
|
|
return error;
|
|
|
|
}
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
|
|
|
|
return 0;
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
|
|
|
|
/* nm_krings_delete callback for VALE ports. */
|
|
|
|
static void
|
|
|
|
netmap_vp_krings_delete(struct netmap_adapter *na)
|
2017-06-12 22:53:18 +00:00
|
|
|
{
|
2018-10-23 08:55:16 +00:00
|
|
|
nm_free_bdgfwd(na);
|
|
|
|
netmap_krings_delete(na);
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
|
|
|
|
struct netmap_vp_adapter *na, u_int ring_nr);
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
* main dispatch routine for the bridge.
|
2013-12-15 08:37:24 +00:00
|
|
|
* Grab packets from a kring, move them into the ft structure
|
|
|
|
* associated to the tx (input) port. Max one instance per port,
|
|
|
|
* filtered on input (ioctl, poll or XXX).
|
|
|
|
* Returns the next position in the ring.
|
|
|
|
*/
|
|
|
|
static int
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
nm_bdg_preflush(struct netmap_kring *kring, u_int end)
|
2013-12-15 08:37:24 +00:00
|
|
|
{
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
struct netmap_vp_adapter *na =
|
|
|
|
(struct netmap_vp_adapter*)kring->na;
|
2013-12-15 08:37:24 +00:00
|
|
|
struct netmap_ring *ring = kring->ring;
|
|
|
|
struct nm_bdg_fwd *ft;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
u_int ring_nr = kring->ring_id;
|
2013-12-15 08:37:24 +00:00
|
|
|
u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
|
|
|
|
u_int ft_i = 0; /* start from 0 */
|
|
|
|
u_int frags = 1; /* how many frags ? */
|
|
|
|
struct nm_bridge *b = na->na_bdg;
|
|
|
|
|
|
|
|
/* To protect against modifications to the bridge we acquire a
|
|
|
|
* shared lock, waiting if we can sleep (if the source port is
|
|
|
|
* attached to a user process) or with a trylock otherwise (NICs).
|
|
|
|
*/
|
|
|
|
ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
|
|
|
|
if (na->up.na_flags & NAF_BDG_MAYSLEEP)
|
|
|
|
BDG_RLOCK(b);
|
|
|
|
else if (!BDG_RTRYLOCK(b))
|
2017-06-12 22:53:18 +00:00
|
|
|
return j;
|
2013-12-15 08:37:24 +00:00
|
|
|
ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
|
|
|
|
ft = kring->nkr_ft;
|
|
|
|
|
|
|
|
for (; likely(j != end); j = nm_next(j, lim)) {
|
|
|
|
struct netmap_slot *slot = &ring->slot[j];
|
|
|
|
char *buf;
|
|
|
|
|
|
|
|
ft[ft_i].ft_len = slot->len;
|
|
|
|
ft[ft_i].ft_flags = slot->flags;
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
ft[ft_i].ft_offset = 0;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
ND("flags is 0x%x", slot->flags);
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
/* we do not use the buf changed flag, but we still need to reset it */
|
|
|
|
slot->flags &= ~NS_BUF_CHANGED;
|
|
|
|
|
2013-12-15 08:37:24 +00:00
|
|
|
/* this slot goes into a list so initialize the link field */
|
|
|
|
ft[ft_i].ft_next = NM_FT_NULL;
|
|
|
|
buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
|
2014-06-06 10:50:14 +00:00
|
|
|
if (unlikely(buf == NULL)) {
|
|
|
|
RD(5, "NULL %s buffer pointer from %s slot %d len %d",
|
|
|
|
(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
|
|
|
|
kring->name, j, ft[ft_i].ft_len);
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
|
2014-06-06 10:50:14 +00:00
|
|
|
ft[ft_i].ft_len = 0;
|
|
|
|
ft[ft_i].ft_flags = 0;
|
|
|
|
}
|
2013-12-16 23:57:43 +00:00
|
|
|
__builtin_prefetch(buf);
|
2013-12-15 08:37:24 +00:00
|
|
|
++ft_i;
|
|
|
|
if (slot->flags & NS_MOREFRAG) {
|
|
|
|
frags++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (unlikely(netmap_verbose && frags > 1))
|
|
|
|
RD(5, "%d frags at %d", frags, ft_i - frags);
|
|
|
|
ft[ft_i - frags].ft_frags = frags;
|
|
|
|
frags = 1;
|
|
|
|
if (unlikely((int)ft_i >= bridge_batch))
|
|
|
|
ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
|
|
|
|
}
|
|
|
|
if (frags > 1) {
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
|
|
|
|
* have to fix frags count. */
|
|
|
|
frags--;
|
|
|
|
ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
|
|
|
|
ft[ft_i - frags].ft_frags = frags;
|
|
|
|
D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
if (ft_i)
|
|
|
|
ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
|
|
|
|
BDG_RUNLOCK(b);
|
|
|
|
return j;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* ----- FreeBSD if_bridge hash function ------- */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The following hash function is adapted from "Hash Functions" by Bob Jenkins
|
|
|
|
* ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
|
|
|
|
*
|
|
|
|
* http://www.burtleburtle.net/bob/hash/spooky.html
|
|
|
|
*/
|
|
|
|
#define mix(a, b, c) \
|
|
|
|
do { \
|
2018-05-18 03:38:17 +00:00
|
|
|
a -= b; a -= c; a ^= (c >> 13); \
|
|
|
|
b -= c; b -= a; b ^= (a << 8); \
|
|
|
|
c -= a; c -= b; c ^= (b >> 13); \
|
|
|
|
a -= b; a -= c; a ^= (c >> 12); \
|
|
|
|
b -= c; b -= a; b ^= (a << 16); \
|
|
|
|
c -= a; c -= b; c ^= (b >> 5); \
|
|
|
|
a -= b; a -= c; a ^= (c >> 3); \
|
|
|
|
b -= c; b -= a; b ^= (a << 10); \
|
|
|
|
c -= a; c -= b; c ^= (b >> 15); \
|
2013-12-15 08:37:24 +00:00
|
|
|
} while (/*CONSTCOND*/0)
|
|
|
|
|
It is 2014 and we have a new version of netmap.
Most relevant features:
- netmap emulation on any NIC, even those without native netmap support.
On the ixgbe we have measured about 4Mpps/core/queue in this mode,
which is still a lot more than with sockets/bpf.
- seamless interconnection of VALE switch, NICs and host stack.
If you disable accelerations on your NIC (say em0)
ifconfig em0 -txcsum -txcsum
you can use the VALE switch to connect the NIC and the host stack:
vale-ctl -h valeXX:em0
allowing sharing the NIC with other netmap clients.
- THE USER API HAS SLIGHTLY CHANGED (head/cur/tail pointers
instead of pointers/count as before). This was unavoidable to support,
in the future, multiple threads operating on the same rings.
Netmap clients require very small source code changes to compile again.
On the plus side, the new API should be easier to understand
and the internals are a lot simpler.
The manual page has been updated extensively to reflect the current
features and give some examples.
This is the result of work of several people including Giuseppe Lettieri,
Vincenzo Maffione, Michio Honda and myself, and has been financially
supported by EU projects CHANGE and OPENLAB, from NetApp University
Research Fund, NEC, and of course the Universita` di Pisa.
2014-01-06 12:53:15 +00:00
|
|
|
|
2013-12-15 08:37:24 +00:00
|
|
|
static __inline uint32_t
|
|
|
|
nm_bridge_rthash(const uint8_t *addr)
|
|
|
|
{
|
2018-05-18 03:38:17 +00:00
|
|
|
uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
|
2013-12-15 08:37:24 +00:00
|
|
|
|
2018-05-18 03:38:17 +00:00
|
|
|
b += addr[5] << 8;
|
|
|
|
b += addr[4];
|
|
|
|
a += addr[3] << 24;
|
|
|
|
a += addr[2] << 16;
|
|
|
|
a += addr[1] << 8;
|
|
|
|
a += addr[0];
|
2013-12-15 08:37:24 +00:00
|
|
|
|
2018-05-18 03:38:17 +00:00
|
|
|
mix(a, b, c);
|
2013-12-15 08:37:24 +00:00
|
|
|
#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1)
|
2018-05-18 03:38:17 +00:00
|
|
|
return (c & BRIDGE_RTHASH_MASK);
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#undef mix
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lookup function for a learning bridge.
|
|
|
|
* Update the hash table with the source address,
|
|
|
|
* and then returns the destination port index, and the
|
|
|
|
* ring in *dst_ring (at the moment, always use ring 0)
|
|
|
|
*/
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
uint32_t
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
struct netmap_vp_adapter *na, void *private_data)
|
2013-12-15 08:37:24 +00:00
|
|
|
{
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset;
|
|
|
|
u_int buf_len = ft->ft_len - ft->ft_offset;
|
|
|
|
struct nm_hash_ent *ht = private_data;
|
2013-12-15 08:37:24 +00:00
|
|
|
uint32_t sh, dh;
|
|
|
|
u_int dst, mysrc = na->bdg_port;
|
|
|
|
uint64_t smac, dmac;
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
uint8_t indbuf[12];
|
2013-12-15 08:37:24 +00:00
|
|
|
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
if (buf_len < 14) {
|
2013-12-15 08:37:24 +00:00
|
|
|
return NM_BDG_NOPORT;
|
|
|
|
}
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
|
|
|
|
if (ft->ft_flags & NS_INDIRECT) {
|
|
|
|
if (copyin(buf, indbuf, sizeof(indbuf))) {
|
|
|
|
return NM_BDG_NOPORT;
|
|
|
|
}
|
|
|
|
buf = indbuf;
|
|
|
|
}
|
|
|
|
|
2013-12-15 08:37:24 +00:00
|
|
|
dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
|
|
|
|
smac = le64toh(*(uint64_t *)(buf + 4));
|
|
|
|
smac >>= 16;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The hash is somewhat expensive, there might be some
|
|
|
|
* worthwhile optimizations here.
|
|
|
|
*/
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
|
2013-12-15 08:37:24 +00:00
|
|
|
uint8_t *s = buf+6;
|
2018-04-09 09:24:26 +00:00
|
|
|
sh = nm_bridge_rthash(s); /* hash of source */
|
2013-12-15 08:37:24 +00:00
|
|
|
/* update source port forwarding entry */
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
na->last_smac = ht[sh].mac = smac; /* XXX expire ? */
|
2013-12-15 08:37:24 +00:00
|
|
|
ht[sh].ports = mysrc;
|
|
|
|
if (netmap_verbose)
|
|
|
|
D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
|
|
|
|
s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
|
|
|
|
}
|
|
|
|
dst = NM_BDG_BROADCAST;
|
|
|
|
if ((buf[0] & 1) == 0) { /* unicast */
|
2018-04-09 09:24:26 +00:00
|
|
|
dh = nm_bridge_rthash(buf); /* hash of dst */
|
2013-12-15 08:37:24 +00:00
|
|
|
if (ht[dh].mac == dmac) { /* found dst */
|
|
|
|
dst = ht[dh].ports;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
It is 2014 and we have a new version of netmap.
Most relevant features:
- netmap emulation on any NIC, even those without native netmap support.
On the ixgbe we have measured about 4Mpps/core/queue in this mode,
which is still a lot more than with sockets/bpf.
- seamless interconnection of VALE switch, NICs and host stack.
If you disable accelerations on your NIC (say em0)
ifconfig em0 -txcsum -txcsum
you can use the VALE switch to connect the NIC and the host stack:
vale-ctl -h valeXX:em0
allowing sharing the NIC with other netmap clients.
- THE USER API HAS SLIGHTLY CHANGED (head/cur/tail pointers
instead of pointers/count as before). This was unavoidable to support,
in the future, multiple threads operating on the same rings.
Netmap clients require very small source code changes to compile again.
On the plus side, the new API should be easier to understand
and the internals are a lot simpler.
The manual page has been updated extensively to reflect the current
features and give some examples.
This is the result of work of several people including Giuseppe Lettieri,
Vincenzo Maffione, Michio Honda and myself, and has been financially
supported by EU projects CHANGE and OPENLAB, from NetApp University
Research Fund, NEC, and of course the Universita` di Pisa.
2014-01-06 12:53:15 +00:00
|
|
|
/*
|
|
|
|
* Available space in the ring. Only used in VALE code
|
|
|
|
* and only with is_rx = 1
|
|
|
|
*/
|
|
|
|
static inline uint32_t
|
|
|
|
nm_kr_space(struct netmap_kring *k, int is_rx)
|
|
|
|
{
|
|
|
|
int space;
|
|
|
|
|
|
|
|
if (is_rx) {
|
|
|
|
int busy = k->nkr_hwlease - k->nr_hwcur;
|
|
|
|
if (busy < 0)
|
|
|
|
busy += k->nkr_num_slots;
|
|
|
|
space = k->nkr_num_slots - 1 - busy;
|
|
|
|
} else {
|
|
|
|
/* XXX never used in this branch */
|
|
|
|
space = k->nr_hwtail - k->nkr_hwlease;
|
|
|
|
if (space < 0)
|
|
|
|
space += k->nkr_num_slots;
|
|
|
|
}
|
|
|
|
#if 0
|
|
|
|
// sanity check
|
|
|
|
if (k->nkr_hwlease >= k->nkr_num_slots ||
|
|
|
|
k->nr_hwcur >= k->nkr_num_slots ||
|
|
|
|
k->nr_tail >= k->nkr_num_slots ||
|
|
|
|
busy < 0 ||
|
|
|
|
busy >= k->nkr_num_slots) {
|
|
|
|
D("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
|
|
|
|
k->nkr_lease_idx, k->nkr_num_slots);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
return space;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* make a lease on the kring for N positions. return the
|
|
|
|
* lease index
|
|
|
|
* XXX only used in VALE code and with is_rx = 1
|
|
|
|
*/
|
|
|
|
static inline uint32_t
|
|
|
|
nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
|
|
|
|
{
|
|
|
|
uint32_t lim = k->nkr_num_slots - 1;
|
|
|
|
uint32_t lease_idx = k->nkr_lease_idx;
|
|
|
|
|
|
|
|
k->nkr_leases[lease_idx] = NR_NOSLOT;
|
|
|
|
k->nkr_lease_idx = nm_next(lease_idx, lim);
|
|
|
|
|
|
|
|
if (n > nm_kr_space(k, is_rx)) {
|
|
|
|
D("invalid request for %d slots", n);
|
|
|
|
panic("x");
|
|
|
|
}
|
|
|
|
/* XXX verify that there are n slots */
|
|
|
|
k->nkr_hwlease += n;
|
|
|
|
if (k->nkr_hwlease > lim)
|
|
|
|
k->nkr_hwlease -= lim + 1;
|
|
|
|
|
|
|
|
if (k->nkr_hwlease >= k->nkr_num_slots ||
|
|
|
|
k->nr_hwcur >= k->nkr_num_slots ||
|
|
|
|
k->nr_hwtail >= k->nkr_num_slots ||
|
|
|
|
k->nkr_lease_idx >= k->nkr_num_slots) {
|
|
|
|
D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
k->na->name,
|
It is 2014 and we have a new version of netmap.
Most relevant features:
- netmap emulation on any NIC, even those without native netmap support.
On the ixgbe we have measured about 4Mpps/core/queue in this mode,
which is still a lot more than with sockets/bpf.
- seamless interconnection of VALE switch, NICs and host stack.
If you disable accelerations on your NIC (say em0)
ifconfig em0 -txcsum -txcsum
you can use the VALE switch to connect the NIC and the host stack:
vale-ctl -h valeXX:em0
allowing sharing the NIC with other netmap clients.
- THE USER API HAS SLIGHTLY CHANGED (head/cur/tail pointers
instead of pointers/count as before). This was unavoidable to support,
in the future, multiple threads operating on the same rings.
Netmap clients require very small source code changes to compile again.
On the plus side, the new API should be easier to understand
and the internals are a lot simpler.
The manual page has been updated extensively to reflect the current
features and give some examples.
This is the result of work of several people including Giuseppe Lettieri,
Vincenzo Maffione, Michio Honda and myself, and has been financially
supported by EU projects CHANGE and OPENLAB, from NetApp University
Research Fund, NEC, and of course the Universita` di Pisa.
2014-01-06 12:53:15 +00:00
|
|
|
k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
|
|
|
|
k->nkr_lease_idx, k->nkr_num_slots);
|
|
|
|
}
|
|
|
|
return lease_idx;
|
|
|
|
}
|
|
|
|
|
2013-12-15 08:37:24 +00:00
|
|
|
/*
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
*
|
2013-12-15 08:37:24 +00:00
|
|
|
* This flush routine supports only unicast and broadcast but a large
|
|
|
|
* number of ports, and lets us replace the learn and dispatch functions.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
|
|
|
|
u_int ring_nr)
|
|
|
|
{
|
|
|
|
struct nm_bdg_q *dst_ents, *brddst;
|
|
|
|
uint16_t num_dsts = 0, *dsts;
|
|
|
|
struct nm_bridge *b = na->na_bdg;
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
u_int i, me = na->bdg_port;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The work area (pointed by ft) is followed by an array of
|
|
|
|
* pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
|
|
|
|
* queues per port plus one for the broadcast traffic.
|
|
|
|
* Then we have an array of destination indexes.
|
|
|
|
*/
|
|
|
|
dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
|
|
|
|
dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
|
|
|
|
|
|
|
|
/* first pass: find a destination for each packet in the batch */
|
|
|
|
for (i = 0; likely(i < n); i += ft[i].ft_frags) {
|
|
|
|
uint8_t dst_ring = ring_nr; /* default, same ring as origin */
|
|
|
|
uint16_t dst_port, d_i;
|
|
|
|
struct nm_bdg_q *d;
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
struct nm_bdg_fwd *start_ft = NULL;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
ND("slot %d frags %d", i, ft[i].ft_frags);
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
|
|
|
|
if (na->up.virt_hdr_len < ft[i].ft_len) {
|
|
|
|
ft[i].ft_offset = na->up.virt_hdr_len;
|
|
|
|
start_ft = &ft[i];
|
|
|
|
} else if (na->up.virt_hdr_len == ft[i].ft_len && ft[i].ft_flags & NS_MOREFRAG) {
|
|
|
|
ft[i].ft_offset = ft[i].ft_len;
|
|
|
|
start_ft = &ft[i+1];
|
|
|
|
} else {
|
|
|
|
/* Drop the packet if the virtio-net header is not into the first
|
|
|
|
* fragment nor at the very beginning of the second.
|
|
|
|
*/
|
2013-12-15 08:37:24 +00:00
|
|
|
continue;
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
}
|
|
|
|
dst_port = b->bdg_ops->lookup(start_ft, &dst_ring, na, b->private_data);
|
2013-12-15 08:37:24 +00:00
|
|
|
if (netmap_verbose > 255)
|
|
|
|
RD(5, "slot %d port %d -> %d", i, me, dst_port);
|
2018-04-09 09:24:26 +00:00
|
|
|
if (dst_port >= NM_BDG_NOPORT)
|
2013-12-15 08:37:24 +00:00
|
|
|
continue; /* this packet is identified to be dropped */
|
|
|
|
else if (dst_port == NM_BDG_BROADCAST)
|
|
|
|
dst_ring = 0; /* broadcasts always go to ring 0 */
|
|
|
|
else if (unlikely(dst_port == me ||
|
|
|
|
!b->bdg_ports[dst_port]))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* get a position in the scratch pad */
|
|
|
|
d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
|
|
|
|
d = dst_ents + d_i;
|
|
|
|
|
|
|
|
/* append the first fragment to the list */
|
|
|
|
if (d->bq_head == NM_FT_NULL) { /* new destination */
|
|
|
|
d->bq_head = d->bq_tail = i;
|
|
|
|
/* remember this position to be scanned later */
|
|
|
|
if (dst_port != NM_BDG_BROADCAST)
|
|
|
|
dsts[num_dsts++] = d_i;
|
|
|
|
} else {
|
|
|
|
ft[d->bq_tail].ft_next = i;
|
|
|
|
d->bq_tail = i;
|
|
|
|
}
|
|
|
|
d->bq_len += ft[i].ft_frags;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Broadcast traffic goes to ring 0 on all destinations.
|
|
|
|
* So we need to add these rings to the list of ports to scan.
|
|
|
|
* XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
|
|
|
|
* expensive. We should keep a compact list of active destinations
|
|
|
|
* so we could shorten this loop.
|
|
|
|
*/
|
|
|
|
brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
|
|
|
|
if (brddst->bq_head != NM_FT_NULL) {
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
u_int j;
|
2013-12-15 08:37:24 +00:00
|
|
|
for (j = 0; likely(j < b->bdg_active_ports); j++) {
|
|
|
|
uint16_t d_i;
|
|
|
|
i = b->bdg_port_index[j];
|
|
|
|
if (unlikely(i == me))
|
|
|
|
continue;
|
|
|
|
d_i = i * NM_BDG_MAXRINGS;
|
|
|
|
if (dst_ents[d_i].bq_head == NM_FT_NULL)
|
|
|
|
dsts[num_dsts++] = d_i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
/* second pass: scan destinations */
|
2013-12-15 08:37:24 +00:00
|
|
|
for (i = 0; i < num_dsts; i++) {
|
|
|
|
struct netmap_vp_adapter *dst_na;
|
|
|
|
struct netmap_kring *kring;
|
|
|
|
struct netmap_ring *ring;
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
u_int dst_nr, lim, j, d_i, next, brd_next;
|
2013-12-15 08:37:24 +00:00
|
|
|
u_int needed, howmany;
|
|
|
|
int retry = netmap_txsync_retry;
|
|
|
|
struct nm_bdg_q *d;
|
|
|
|
uint32_t my_start = 0, lease_idx = 0;
|
|
|
|
int nrings;
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
int virt_hdr_mismatch = 0;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
d_i = dsts[i];
|
|
|
|
ND("second pass %d port %d", i, d_i);
|
|
|
|
d = dst_ents + d_i;
|
|
|
|
// XXX fix the division
|
|
|
|
dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
|
|
|
|
/* protect from the lookup function returning an inactive
|
|
|
|
* destination port
|
|
|
|
*/
|
|
|
|
if (unlikely(dst_na == NULL))
|
|
|
|
goto cleanup;
|
|
|
|
if (dst_na->up.na_flags & NAF_SW_ONLY)
|
|
|
|
goto cleanup;
|
|
|
|
/*
|
|
|
|
* The interface may be in !netmap mode in two cases:
|
|
|
|
* - when na is attached but not activated yet;
|
|
|
|
* - when na is being deactivated but is still attached.
|
|
|
|
*/
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
if (unlikely(!nm_netmap_on(&dst_na->up))) {
|
2013-12-15 08:37:24 +00:00
|
|
|
ND("not in netmap mode!");
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* there is at least one either unicast or broadcast packet */
|
|
|
|
brd_next = brddst->bq_head;
|
|
|
|
next = d->bq_head;
|
|
|
|
/* we need to reserve this many slots. If fewer are
|
|
|
|
* available, some packets will be dropped.
|
|
|
|
* Packets may have multiple fragments, so we may not use
|
|
|
|
* there is a chance that we may not use all of the slots
|
|
|
|
* we have claimed, so we will need to handle the leftover
|
|
|
|
* ones when we regain the lock.
|
|
|
|
*/
|
|
|
|
needed = d->bq_len + brddst->bq_len;
|
|
|
|
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
|
2018-05-18 03:38:17 +00:00
|
|
|
if (netmap_verbose) {
|
|
|
|
RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
|
|
|
|
dst_na->up.virt_hdr_len);
|
|
|
|
}
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
/* There is a virtio-net header/offloadings mismatch between
|
|
|
|
* source and destination. The slower mismatch datapath will
|
|
|
|
* be used to cope with all the mismatches.
|
|
|
|
*/
|
|
|
|
virt_hdr_mismatch = 1;
|
|
|
|
if (dst_na->mfs < na->mfs) {
|
|
|
|
/* We may need to do segmentation offloadings, and so
|
|
|
|
* we may need a number of destination slots greater
|
|
|
|
* than the number of input slots ('needed').
|
|
|
|
* We look for the smallest integer 'x' which satisfies:
|
|
|
|
* needed * na->mfs + x * H <= x * na->mfs
|
|
|
|
* where 'H' is the length of the longest header that may
|
|
|
|
* be replicated in the segmentation process (e.g. for
|
|
|
|
* TCPv4 we must account for ethernet header, IP header
|
|
|
|
* and TCPv4 header).
|
|
|
|
*/
|
2018-04-09 09:24:26 +00:00
|
|
|
KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0"));
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
needed = (needed * na->mfs) /
|
|
|
|
(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
|
|
|
|
ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-12-15 08:37:24 +00:00
|
|
|
ND(5, "pass 2 dst %d is %x %s",
|
|
|
|
i, d_i, is_vp ? "virtual" : "nic/host");
|
|
|
|
dst_nr = d_i & (NM_BDG_MAXRINGS-1);
|
|
|
|
nrings = dst_na->up.num_rx_rings;
|
|
|
|
if (dst_nr >= nrings)
|
|
|
|
dst_nr = dst_nr % nrings;
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
kring = dst_na->up.rx_rings[dst_nr];
|
2013-12-15 08:37:24 +00:00
|
|
|
ring = kring->ring;
|
2018-04-09 09:24:26 +00:00
|
|
|
/* the destination ring may have not been opened for RX */
|
|
|
|
if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON))
|
|
|
|
goto cleanup;
|
2013-12-15 08:37:24 +00:00
|
|
|
lim = kring->nkr_num_slots - 1;
|
|
|
|
|
|
|
|
retry:
|
|
|
|
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
if (dst_na->retry && retry) {
|
|
|
|
/* try to get some free slot from the previous run */
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
kring->nm_notify(kring, 0);
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
/* actually useful only for bwraps, since there
|
|
|
|
* the notify will trigger a txsync on the hwna. VALE ports
|
|
|
|
* have dst_na->retry == 0
|
|
|
|
*/
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
}
|
2013-12-15 08:37:24 +00:00
|
|
|
/* reserve the buffers in the queue and an entry
|
|
|
|
* to report completion, and drop lock.
|
|
|
|
* XXX this might become a helper function.
|
|
|
|
*/
|
|
|
|
mtx_lock(&kring->q_lock);
|
|
|
|
if (kring->nkr_stopped) {
|
|
|
|
mtx_unlock(&kring->q_lock);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
my_start = j = kring->nkr_hwlease;
|
|
|
|
howmany = nm_kr_space(kring, 1);
|
|
|
|
if (needed < howmany)
|
|
|
|
howmany = needed;
|
|
|
|
lease_idx = nm_kr_lease(kring, howmany, 1);
|
|
|
|
mtx_unlock(&kring->q_lock);
|
|
|
|
|
|
|
|
/* only retry if we need more than available slots */
|
|
|
|
if (retry && needed <= howmany)
|
|
|
|
retry = 0;
|
|
|
|
|
|
|
|
/* copy to the destination queue */
|
|
|
|
while (howmany > 0) {
|
|
|
|
struct netmap_slot *slot;
|
|
|
|
struct nm_bdg_fwd *ft_p, *ft_end;
|
|
|
|
u_int cnt;
|
|
|
|
|
|
|
|
/* find the queue from which we pick next packet.
|
|
|
|
* NM_FT_NULL is always higher than valid indexes
|
|
|
|
* so we never dereference it if the other list
|
|
|
|
* has packets (and if both are empty we never
|
|
|
|
* get here).
|
|
|
|
*/
|
|
|
|
if (next < brd_next) {
|
|
|
|
ft_p = ft + next;
|
|
|
|
next = ft_p->ft_next;
|
|
|
|
} else { /* insert broadcast */
|
|
|
|
ft_p = ft + brd_next;
|
|
|
|
brd_next = ft_p->ft_next;
|
|
|
|
}
|
|
|
|
cnt = ft_p->ft_frags; // cnt > 0
|
|
|
|
if (unlikely(cnt > howmany))
|
|
|
|
break; /* no more space */
|
|
|
|
if (netmap_verbose && cnt > 1)
|
|
|
|
RD(5, "rx %d frags to %d", cnt, j);
|
|
|
|
ft_end = ft_p + cnt;
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
if (unlikely(virt_hdr_mismatch)) {
|
|
|
|
bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
|
|
|
|
} else {
|
|
|
|
howmany -= cnt;
|
|
|
|
do {
|
|
|
|
char *dst, *src = ft_p->ft_buf;
|
|
|
|
size_t copy_len = ft_p->ft_len, dst_len = copy_len;
|
|
|
|
|
|
|
|
slot = &ring->slot[j];
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
dst = NMB(&dst_na->up, slot);
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
|
|
|
|
ND("send [%d] %d(%d) bytes at %s:%d",
|
|
|
|
i, (int)copy_len, (int)dst_len,
|
|
|
|
NM_IFPNAME(dst_ifp), j);
|
|
|
|
/* round to a multiple of 64 */
|
|
|
|
copy_len = (copy_len + 63) & ~63;
|
|
|
|
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
if (unlikely(copy_len > NETMAP_BUF_SIZE(&dst_na->up) ||
|
|
|
|
copy_len > NETMAP_BUF_SIZE(&na->up))) {
|
2014-06-06 10:50:14 +00:00
|
|
|
RD(5, "invalid len %d, down to 64", (int)copy_len);
|
|
|
|
copy_len = dst_len = 64; // XXX
|
|
|
|
}
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
if (ft_p->ft_flags & NS_INDIRECT) {
|
|
|
|
if (copyin(src, dst, copy_len)) {
|
|
|
|
// invalid user pointer, pretend len is 0
|
|
|
|
dst_len = 0;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
//memcpy(dst, src, copy_len);
|
|
|
|
pkt_copy(src, dst, (int)copy_len);
|
|
|
|
}
|
|
|
|
slot->len = dst_len;
|
|
|
|
slot->flags = (cnt << 8)| NS_MOREFRAG;
|
|
|
|
j = nm_next(j, lim);
|
|
|
|
needed--;
|
|
|
|
ft_p++;
|
|
|
|
} while (ft_p != ft_end);
|
|
|
|
slot->flags = (cnt << 8); /* clear flag on last entry */
|
|
|
|
}
|
2013-12-15 08:37:24 +00:00
|
|
|
/* are we done ? */
|
|
|
|
if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
{
|
|
|
|
/* current position */
|
|
|
|
uint32_t *p = kring->nkr_leases; /* shorthand */
|
|
|
|
uint32_t update_pos;
|
|
|
|
int still_locked = 1;
|
|
|
|
|
|
|
|
mtx_lock(&kring->q_lock);
|
|
|
|
if (unlikely(howmany > 0)) {
|
|
|
|
/* not used all bufs. If i am the last one
|
|
|
|
* i can recover the slots, otherwise must
|
|
|
|
* fill them with 0 to mark empty packets.
|
|
|
|
*/
|
|
|
|
ND("leftover %d bufs", howmany);
|
|
|
|
if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
|
|
|
|
/* yes i am the last one */
|
|
|
|
ND("roll back nkr_hwlease to %d", j);
|
|
|
|
kring->nkr_hwlease = j;
|
|
|
|
} else {
|
|
|
|
while (howmany-- > 0) {
|
|
|
|
ring->slot[j].len = 0;
|
|
|
|
ring->slot[j].flags = 0;
|
|
|
|
j = nm_next(j, lim);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
p[lease_idx] = j; /* report I am done */
|
|
|
|
|
It is 2014 and we have a new version of netmap.
Most relevant features:
- netmap emulation on any NIC, even those without native netmap support.
On the ixgbe we have measured about 4Mpps/core/queue in this mode,
which is still a lot more than with sockets/bpf.
- seamless interconnection of VALE switch, NICs and host stack.
If you disable accelerations on your NIC (say em0)
ifconfig em0 -txcsum -txcsum
you can use the VALE switch to connect the NIC and the host stack:
vale-ctl -h valeXX:em0
allowing sharing the NIC with other netmap clients.
- THE USER API HAS SLIGHTLY CHANGED (head/cur/tail pointers
instead of pointers/count as before). This was unavoidable to support,
in the future, multiple threads operating on the same rings.
Netmap clients require very small source code changes to compile again.
On the plus side, the new API should be easier to understand
and the internals are a lot simpler.
The manual page has been updated extensively to reflect the current
features and give some examples.
This is the result of work of several people including Giuseppe Lettieri,
Vincenzo Maffione, Michio Honda and myself, and has been financially
supported by EU projects CHANGE and OPENLAB, from NetApp University
Research Fund, NEC, and of course the Universita` di Pisa.
2014-01-06 12:53:15 +00:00
|
|
|
update_pos = kring->nr_hwtail;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
if (my_start == update_pos) {
|
|
|
|
/* all slots before my_start have been reported,
|
|
|
|
* so scan subsequent leases to see if other ranges
|
|
|
|
* have been completed, and to a selwakeup or txsync.
|
|
|
|
*/
|
|
|
|
while (lease_idx != kring->nkr_lease_idx &&
|
|
|
|
p[lease_idx] != NR_NOSLOT) {
|
|
|
|
j = p[lease_idx];
|
|
|
|
p[lease_idx] = NR_NOSLOT;
|
|
|
|
lease_idx = nm_next(lease_idx, lim);
|
|
|
|
}
|
|
|
|
/* j is the new 'write' position. j != my_start
|
|
|
|
* means there are new buffers to report
|
|
|
|
*/
|
|
|
|
if (likely(j != my_start)) {
|
It is 2014 and we have a new version of netmap.
Most relevant features:
- netmap emulation on any NIC, even those without native netmap support.
On the ixgbe we have measured about 4Mpps/core/queue in this mode,
which is still a lot more than with sockets/bpf.
- seamless interconnection of VALE switch, NICs and host stack.
If you disable accelerations on your NIC (say em0)
ifconfig em0 -txcsum -txcsum
you can use the VALE switch to connect the NIC and the host stack:
vale-ctl -h valeXX:em0
allowing sharing the NIC with other netmap clients.
- THE USER API HAS SLIGHTLY CHANGED (head/cur/tail pointers
instead of pointers/count as before). This was unavoidable to support,
in the future, multiple threads operating on the same rings.
Netmap clients require very small source code changes to compile again.
On the plus side, the new API should be easier to understand
and the internals are a lot simpler.
The manual page has been updated extensively to reflect the current
features and give some examples.
This is the result of work of several people including Giuseppe Lettieri,
Vincenzo Maffione, Michio Honda and myself, and has been financially
supported by EU projects CHANGE and OPENLAB, from NetApp University
Research Fund, NEC, and of course the Universita` di Pisa.
2014-01-06 12:53:15 +00:00
|
|
|
kring->nr_hwtail = j;
|
2013-12-15 08:37:24 +00:00
|
|
|
still_locked = 0;
|
|
|
|
mtx_unlock(&kring->q_lock);
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
kring->nm_notify(kring, 0);
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
/* this is netmap_notify for VALE ports and
|
|
|
|
* netmap_bwrap_notify for bwrap. The latter will
|
|
|
|
* trigger a txsync on the underlying hwna
|
|
|
|
*/
|
|
|
|
if (dst_na->retry && retry--) {
|
|
|
|
/* XXX this is going to call nm_notify again.
|
|
|
|
* Only useful for bwrap in virtual machines
|
|
|
|
*/
|
2013-12-15 08:37:24 +00:00
|
|
|
goto retry;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
}
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (still_locked)
|
|
|
|
mtx_unlock(&kring->q_lock);
|
|
|
|
}
|
|
|
|
cleanup:
|
|
|
|
d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
|
|
|
|
d->bq_len = 0;
|
|
|
|
}
|
|
|
|
brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
|
|
|
|
brddst->bq_len = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
/* nm_txsync callback for VALE ports */
|
2013-12-15 08:37:24 +00:00
|
|
|
static int
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
netmap_vp_txsync(struct netmap_kring *kring, int flags)
|
2013-12-15 08:37:24 +00:00
|
|
|
{
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
struct netmap_vp_adapter *na =
|
|
|
|
(struct netmap_vp_adapter *)kring->na;
|
It is 2014 and we have a new version of netmap.
Most relevant features:
- netmap emulation on any NIC, even those without native netmap support.
On the ixgbe we have measured about 4Mpps/core/queue in this mode,
which is still a lot more than with sockets/bpf.
- seamless interconnection of VALE switch, NICs and host stack.
If you disable accelerations on your NIC (say em0)
ifconfig em0 -txcsum -txcsum
you can use the VALE switch to connect the NIC and the host stack:
vale-ctl -h valeXX:em0
allowing sharing the NIC with other netmap clients.
- THE USER API HAS SLIGHTLY CHANGED (head/cur/tail pointers
instead of pointers/count as before). This was unavoidable to support,
in the future, multiple threads operating on the same rings.
Netmap clients require very small source code changes to compile again.
On the plus side, the new API should be easier to understand
and the internals are a lot simpler.
The manual page has been updated extensively to reflect the current
features and give some examples.
This is the result of work of several people including Giuseppe Lettieri,
Vincenzo Maffione, Michio Honda and myself, and has been financially
supported by EU projects CHANGE and OPENLAB, from NetApp University
Research Fund, NEC, and of course the Universita` di Pisa.
2014-01-06 12:53:15 +00:00
|
|
|
u_int done;
|
|
|
|
u_int const lim = kring->nkr_num_slots - 1;
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
u_int const head = kring->rhead;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
if (bridge_batch <= 0) { /* testing only */
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
done = head; // used all
|
2013-12-15 08:37:24 +00:00
|
|
|
goto done;
|
|
|
|
}
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
if (!na->na_bdg) {
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
done = head;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
goto done;
|
|
|
|
}
|
2013-12-15 08:37:24 +00:00
|
|
|
if (bridge_batch > NM_BDG_BATCH)
|
|
|
|
bridge_batch = NM_BDG_BATCH;
|
|
|
|
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
done = nm_bdg_preflush(kring, head);
|
2013-12-15 08:37:24 +00:00
|
|
|
done:
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
if (done != head)
|
|
|
|
D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
|
It is 2014 and we have a new version of netmap.
Most relevant features:
- netmap emulation on any NIC, even those without native netmap support.
On the ixgbe we have measured about 4Mpps/core/queue in this mode,
which is still a lot more than with sockets/bpf.
- seamless interconnection of VALE switch, NICs and host stack.
If you disable accelerations on your NIC (say em0)
ifconfig em0 -txcsum -txcsum
you can use the VALE switch to connect the NIC and the host stack:
vale-ctl -h valeXX:em0
allowing sharing the NIC with other netmap clients.
- THE USER API HAS SLIGHTLY CHANGED (head/cur/tail pointers
instead of pointers/count as before). This was unavoidable to support,
in the future, multiple threads operating on the same rings.
Netmap clients require very small source code changes to compile again.
On the plus side, the new API should be easier to understand
and the internals are a lot simpler.
The manual page has been updated extensively to reflect the current
features and give some examples.
This is the result of work of several people including Giuseppe Lettieri,
Vincenzo Maffione, Michio Honda and myself, and has been financially
supported by EU projects CHANGE and OPENLAB, from NetApp University
Research Fund, NEC, and of course the Universita` di Pisa.
2014-01-06 12:53:15 +00:00
|
|
|
/*
|
|
|
|
* packets between 'done' and 'cur' are left unsent.
|
|
|
|
*/
|
|
|
|
kring->nr_hwcur = done;
|
|
|
|
kring->nr_hwtail = nm_prev(done, lim);
|
2013-12-15 08:37:24 +00:00
|
|
|
if (netmap_verbose)
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
|
2013-12-15 08:37:24 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
/* create a netmap_vp_adapter that describes a VALE port.
|
|
|
|
* Only persistent VALE ports have a non-null ifp.
|
|
|
|
*/
|
2013-12-15 08:37:24 +00:00
|
|
|
static int
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
|
|
|
|
struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret)
|
2013-12-15 08:37:24 +00:00
|
|
|
{
|
2018-05-18 03:38:17 +00:00
|
|
|
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
|
2013-12-15 08:37:24 +00:00
|
|
|
struct netmap_vp_adapter *vpna;
|
|
|
|
struct netmap_adapter *na;
|
2017-06-12 22:53:18 +00:00
|
|
|
int error = 0;
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
u_int npipes = 0;
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
u_int extrabufs = 0;
|
|
|
|
|
|
|
|
if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
|
|
|
|
return EINVAL;
|
|
|
|
}
|
2013-12-15 08:37:24 +00:00
|
|
|
|
2017-06-12 22:53:18 +00:00
|
|
|
vpna = nm_os_malloc(sizeof(*vpna));
|
2013-12-15 08:37:24 +00:00
|
|
|
if (vpna == NULL)
|
|
|
|
return ENOMEM;
|
|
|
|
|
|
|
|
na = &vpna->up;
|
|
|
|
|
|
|
|
na->ifp = ifp;
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
strncpy(na->name, hdr->nr_name, sizeof(na->name));
|
2013-12-15 08:37:24 +00:00
|
|
|
|
|
|
|
/* bound checking */
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
na->num_tx_rings = req->nr_tx_rings;
|
2013-12-15 08:37:24 +00:00
|
|
|
nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
req->nr_tx_rings = na->num_tx_rings; /* write back */
|
|
|
|
na->num_rx_rings = req->nr_rx_rings;
|
2013-12-15 08:37:24 +00:00
|
|
|
nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
req->nr_rx_rings = na->num_rx_rings; /* write back */
|
|
|
|
nm_bound_var(&req->nr_tx_slots, NM_BRIDGE_RINGSIZE,
|
2013-12-15 08:37:24 +00:00
|
|
|
1, NM_BDG_MAXSLOTS, NULL);
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
na->num_tx_desc = req->nr_tx_slots;
|
|
|
|
nm_bound_var(&req->nr_rx_slots, NM_BRIDGE_RINGSIZE,
|
2013-12-15 08:37:24 +00:00
|
|
|
1, NM_BDG_MAXSLOTS, NULL);
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
/* validate number of pipes. We want at least 1,
|
|
|
|
* but probably can do with some more.
|
|
|
|
* So let's use 2 as default (when 0 is supplied)
|
|
|
|
*/
|
|
|
|
nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
|
|
|
|
/* validate extra bufs */
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
nm_bound_var(&extrabufs, 0, 0,
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
128*NM_BDG_MAXSLOTS, NULL);
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
req->nr_extra_bufs = extrabufs; /* write back */
|
|
|
|
na->num_rx_desc = req->nr_rx_slots;
|
2018-04-09 09:24:26 +00:00
|
|
|
/* Set the mfs to a default value, as it is needed on the VALE
|
|
|
|
* mismatch datapath. XXX We should set it according to the MTU
|
|
|
|
* known to the kernel. */
|
|
|
|
vpna->mfs = NM_BDG_MFS_DEFAULT;
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
vpna->last_smac = ~0llu;
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
/*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
|
|
|
|
vpna->mfs = netmap_buf_size; */
|
2018-05-18 03:38:17 +00:00
|
|
|
if (netmap_verbose)
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
D("max frame size %u", vpna->mfs);
|
2013-12-15 08:37:24 +00:00
|
|
|
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
na->na_flags |= NAF_BDG_MAYSLEEP;
|
2015-07-19 18:06:30 +00:00
|
|
|
/* persistent VALE ports look like hw devices
|
|
|
|
* with a native netmap adapter
|
|
|
|
*/
|
|
|
|
if (ifp)
|
|
|
|
na->na_flags |= NAF_NATIVE;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
na->nm_txsync = netmap_vp_txsync;
|
|
|
|
na->nm_rxsync = netmap_vp_rxsync;
|
|
|
|
na->nm_register = netmap_vp_reg;
|
2013-12-15 08:37:24 +00:00
|
|
|
na->nm_krings_create = netmap_vp_krings_create;
|
|
|
|
na->nm_krings_delete = netmap_vp_krings_delete;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
na->nm_dtor = netmap_vp_dtor;
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
ND("nr_mem_id %d", req->nr_mem_id);
|
2017-06-12 22:53:18 +00:00
|
|
|
na->nm_mem = nmd ?
|
|
|
|
netmap_mem_get(nmd):
|
|
|
|
netmap_mem_private_new(
|
2013-12-15 08:37:24 +00:00
|
|
|
na->num_tx_rings, na->num_tx_desc,
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
na->num_rx_rings, na->num_rx_desc,
|
netmap: align codebase to the current upstream (commit id 3fb001303718146)
Changelist:
- Turn tx_rings and rx_rings arrays into arrays of pointers to kring
structs. This patch includes fixes for ixv, ixl, ix, re, cxgbe, iflib,
vtnet and ptnet drivers to cope with the change.
- Generalize the nm_config() callback to accept a struct containing many
parameters.
- Introduce NKR_FAKERING to support buffers sharing (used for netmap
pipes)
- Improved API for external VALE modules.
- Various bug fixes and improvements to the netmap memory allocator,
including support for externally (userspace) allocated memory.
- Refactoring of netmap pipes: now linked rings share the same netmap
buffers, with a separate set of kring pointers (rhead, rcur, rtail).
Buffer swapping does not need to happen anymore.
- Large refactoring of the control API towards an extensible solution;
the goal is to allow the addition of more commands and extension of
existing ones (with new options) without the need of hacks or the
risk of running out of configuration space.
A new NIOCCTRL ioctl has been added to handle all the requests of the
new control API, which cover all the functionalities so far supported.
The netmap API bumps from 11 to 12 with this patch. Full backward
compatibility is provided for the old control command (NIOCREGIF), by
means of a new netmap_legacy module. Many parts of the old netmap.h
header has now been moved to netmap_legacy.h (included by netmap.h).
Approved by: hrs (mentor)
2018-04-12 07:20:50 +00:00
|
|
|
req->nr_extra_bufs, npipes, &error);
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
if (na->nm_mem == NULL)
|
|
|
|
goto err;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
na->nm_bdg_attach = netmap_vp_bdg_attach;
|
2013-12-15 08:37:24 +00:00
|
|
|
/* other nmd fields are set in the common routine */
|
|
|
|
error = netmap_attach_common(na);
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
if (error)
|
|
|
|
goto err;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
*ret = vpna;
|
2013-12-15 08:37:24 +00:00
|
|
|
return 0;
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
|
|
|
|
err:
|
|
|
|
if (na->nm_mem != NULL)
|
2017-06-12 22:53:18 +00:00
|
|
|
netmap_mem_put(na->nm_mem);
|
|
|
|
nm_os_free(vpna);
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
return error;
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
/* nm_bdg_attach callback for VALE ports
|
|
|
|
* The na_vp port is this same netmap_adapter. There is no host port.
|
2013-12-15 08:37:24 +00:00
|
|
|
*/
|
|
|
|
static int
|
2018-10-23 08:55:16 +00:00
|
|
|
netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na,
|
|
|
|
struct nm_bridge *b)
|
2013-12-15 08:37:24 +00:00
|
|
|
{
|
2018-10-23 08:55:16 +00:00
|
|
|
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
if (b->bdg_ops != &vale_bdg_ops) {
|
|
|
|
return NM_NEED_BWRAP;
|
|
|
|
}
|
|
|
|
if (vpna->na_bdg) {
|
|
|
|
return NM_NEED_BWRAP;
|
|
|
|
}
|
|
|
|
na->na_vp = vpna;
|
|
|
|
strncpy(na->name, name, sizeof(na->name));
|
|
|
|
na->na_hostvp = NULL;
|
2013-12-15 08:37:24 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2018-10-23 08:55:16 +00:00
|
|
|
netmap_vale_bwrap_krings_create(struct netmap_adapter *na)
|
2013-12-15 08:37:24 +00:00
|
|
|
{
|
2018-10-23 08:55:16 +00:00
|
|
|
int error;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
/* impersonate a netmap_vp_adapter */
|
2013-12-15 08:37:24 +00:00
|
|
|
error = netmap_vp_krings_create(na);
|
|
|
|
if (error)
|
|
|
|
return error;
|
2018-10-23 08:55:16 +00:00
|
|
|
error = netmap_bwrap_krings_create_common(na);
|
2018-04-09 09:24:26 +00:00
|
|
|
if (error) {
|
2018-10-23 08:55:16 +00:00
|
|
|
netmap_vp_krings_delete(na);
|
2018-04-09 09:24:26 +00:00
|
|
|
}
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
return error;
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2018-10-23 08:55:16 +00:00
|
|
|
netmap_vale_bwrap_krings_delete(struct netmap_adapter *na)
|
2013-12-15 08:37:24 +00:00
|
|
|
{
|
2018-10-23 08:55:16 +00:00
|
|
|
netmap_bwrap_krings_delete_common(na);
|
2013-12-15 08:37:24 +00:00
|
|
|
netmap_vp_krings_delete(na);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2018-10-23 08:55:16 +00:00
|
|
|
netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
|
2013-12-15 08:37:24 +00:00
|
|
|
{
|
|
|
|
struct netmap_bwrap_adapter *bna;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
struct netmap_adapter *na = NULL;
|
|
|
|
struct netmap_adapter *hostna = NULL;
|
2018-10-23 08:55:16 +00:00
|
|
|
int error;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
2017-06-12 22:53:18 +00:00
|
|
|
bna = nm_os_malloc(sizeof(*bna));
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
if (bna == NULL) {
|
2013-12-15 08:37:24 +00:00
|
|
|
return ENOMEM;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
}
|
2013-12-15 08:37:24 +00:00
|
|
|
na = &bna->up.up;
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
strncpy(na->name, nr_name, sizeof(na->name));
|
Import the current version of netmap, aligned with the one on github.
This commit, long overdue, contains contributions in the last 2 years
from Stefano Garzarella, Giuseppe Lettieri, Vincenzo Maffione, including:
+ fixes on monitor ports
+ the 'ptnet' virtual device driver, and ptnetmap backend, for
high speed virtual passthrough on VMs (bhyve fixes in an upcoming commit)
+ improved emulated netmap mode
+ more robust error handling
+ removal of stale code
+ various fixes to code and documentation (some mixup between RX and TX
parameters, and private and public variables)
We also include an additional tool, nmreplay, which is functionally
equivalent to tcpreplay but operating on netmap ports.
2016-10-16 14:13:32 +00:00
|
|
|
na->nm_register = netmap_bwrap_reg;
|
2018-10-23 08:55:16 +00:00
|
|
|
na->nm_txsync = netmap_vp_txsync;
|
2013-12-15 08:37:24 +00:00
|
|
|
// na->nm_rxsync = netmap_bwrap_rxsync;
|
2018-10-23 08:55:16 +00:00
|
|
|
na->nm_krings_create = netmap_vale_bwrap_krings_create;
|
|
|
|
na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
|
2013-12-15 08:37:24 +00:00
|
|
|
na->nm_notify = netmap_bwrap_notify;
|
|
|
|
bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
|
2018-04-09 09:24:26 +00:00
|
|
|
/* Set the mfs, needed on the VALE mismatch datapath. */
|
|
|
|
bna->up.mfs = NM_BDG_MFS_DEFAULT;
|
2013-12-15 08:37:24 +00:00
|
|
|
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
if (hwna->na_flags & NAF_HOST_RINGS) {
|
|
|
|
hostna = &bna->host.up;
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
hostna->nm_notify = netmap_bwrap_notify;
|
2018-04-09 09:24:26 +00:00
|
|
|
bna->host.mfs = NM_BDG_MFS_DEFAULT;
|
This new version of netmap brings you the following:
- netmap pipes, providing bidirectional blocking I/O while moving
100+ Mpps between processes using shared memory channels
(no mistake: over one hundred million. But mind you, i said
*moving* not *processing*);
- kqueue support (BHyVe needs it);
- improved user library. Just the interface name lets you select a NIC,
host port, VALE switch port, netmap pipe, and individual queues.
The upcoming netmap-enabled libpcap will use this feature.
- optional extra buffers associated to netmap ports, for applications
that need to buffer data yet don't want to make copies.
- segmentation offloading for the VALE switch, useful between VMs.
and a number of bug fixes and performance improvements.
My colleagues Giuseppe Lettieri and Vincenzo Maffione did a substantial
amount of work on these features so we owe them a big thanks.
There are some external repositories that can be of interest:
https://code.google.com/p/netmap
our public repository for netmap/VALE code, including
linux versions and other stuff that does not belong here,
such as python bindings.
https://code.google.com/p/netmap-libpcap
a clone of the libpcap repository with netmap support.
With this any libpcap client has access to most netmap
feature with no recompilation. E.g. tcpdump can filter
packets at 10-15 Mpps.
https://code.google.com/p/netmap-ipfw
a userspace version of ipfw+dummynet which uses netmap
to send/receive packets. Speed is up in the 7-10 Mpps
range per core for simple rulesets.
Both netmap-libpcap and netmap-ipfw will be merged upstream at some
point, but while this happens it is useful to have access to them.
And yes, this code will be merged soon. It is infinitely better
than the version currently in 10 and 9.
MFC after: 3 days
2014-02-15 04:53:04 +00:00
|
|
|
}
|
2013-12-15 08:37:24 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
error = netmap_bwrap_attach_common(na, hwna);
|
2013-12-15 08:37:24 +00:00
|
|
|
if (error) {
|
2018-10-23 08:55:16 +00:00
|
|
|
nm_os_free(bna);
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
Update to the current version of netmap.
Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.
Basically no user API changes (some bugfixes in sys/net/netmap_user.h).
In detail:
1. netmap support for virtio-net, including in netmap mode.
Under bhyve and with a netmap backend [2] we reach over 1Mpps
with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.
2. (kernel) add support for multiple memory allocators, so we can
better partition physical and virtual interfaces giving access
to separate users. The most visible effect is one additional
argument to the various kernel functions to compute buffer
addresses. All netmap-supported drivers are affected, but changes
are mechanical and trivial
3. (kernel) simplify the prototype for *txsync() and *rxsync()
driver methods. All netmap drivers affected, changes mostly mechanical.
4. add support for netmap-monitor ports. Think of it as a mirroring
port on a physical switch: a netmap monitor port replicates traffic
present on the main port. Restrictions apply. Drive carefully.
5. if_lem.c: support for various paravirtualization features,
experimental and disabled by default.
Most of these are described in our ANCS'13 paper [1].
Paravirtualized support in netmap mode is new, and beats the
numbers in the paper by a large factor (under qemu-kvm,
we measured gues-host throughput up to 10-12 Mpps).
A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.
Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.
This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.
A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.
MFC after: 3 days.
2014-08-16 15:00:01 +00:00
|
|
|
return error;
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
int
|
|
|
|
netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
|
|
|
|
struct netmap_mem_d *nmd, int create)
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
{
|
2018-10-23 08:55:16 +00:00
|
|
|
return netmap_get_bdg_na(hdr, na, nmd, create, &vale_bdg_ops);
|
|
|
|
}
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
|
|
|
|
/* creates a persistent VALE port */
|
|
|
|
int
|
|
|
|
nm_vi_create(struct nmreq_header *hdr)
|
|
|
|
{
|
|
|
|
struct nmreq_vale_newif *req =
|
|
|
|
(struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body;
|
|
|
|
int error = 0;
|
|
|
|
/* Build a nmreq_register out of the nmreq_vale_newif,
|
|
|
|
* so that we can call netmap_get_bdg_na(). */
|
|
|
|
struct nmreq_register regreq;
|
|
|
|
bzero(®req, sizeof(regreq));
|
|
|
|
regreq.nr_tx_slots = req->nr_tx_slots;
|
|
|
|
regreq.nr_rx_slots = req->nr_rx_slots;
|
|
|
|
regreq.nr_tx_rings = req->nr_tx_rings;
|
|
|
|
regreq.nr_rx_rings = req->nr_rx_rings;
|
|
|
|
regreq.nr_mem_id = req->nr_mem_id;
|
|
|
|
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
|
|
|
|
hdr->nr_body = (uintptr_t)®req;
|
|
|
|
error = netmap_vi_create(hdr, 0 /* no autodelete */);
|
|
|
|
hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
|
|
|
|
hdr->nr_body = (uintptr_t)req;
|
|
|
|
/* Write back to the original struct. */
|
|
|
|
req->nr_tx_slots = regreq.nr_tx_slots;
|
|
|
|
req->nr_rx_slots = regreq.nr_rx_slots;
|
|
|
|
req->nr_tx_rings = regreq.nr_tx_rings;
|
|
|
|
req->nr_rx_rings = regreq.nr_rx_rings;
|
|
|
|
req->nr_mem_id = regreq.nr_mem_id;
|
|
|
|
return error;
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
}
|
It is 2014 and we have a new version of netmap.
Most relevant features:
- netmap emulation on any NIC, even those without native netmap support.
On the ixgbe we have measured about 4Mpps/core/queue in this mode,
which is still a lot more than with sockets/bpf.
- seamless interconnection of VALE switch, NICs and host stack.
If you disable accelerations on your NIC (say em0)
ifconfig em0 -txcsum -txcsum
you can use the VALE switch to connect the NIC and the host stack:
vale-ctl -h valeXX:em0
allowing sharing the NIC with other netmap clients.
- THE USER API HAS SLIGHTLY CHANGED (head/cur/tail pointers
instead of pointers/count as before). This was unavoidable to support,
in the future, multiple threads operating on the same rings.
Netmap clients require very small source code changes to compile again.
On the plus side, the new API should be easier to understand
and the internals are a lot simpler.
The manual page has been updated extensively to reflect the current
features and give some examples.
This is the result of work of several people including Giuseppe Lettieri,
Vincenzo Maffione, Michio Honda and myself, and has been financially
supported by EU projects CHANGE and OPENLAB, from NetApp University
Research Fund, NEC, and of course the Universita` di Pisa.
2014-01-06 12:53:15 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
/* remove a persistent VALE port from the system */
|
|
|
|
int
|
|
|
|
nm_vi_destroy(const char *name)
|
2013-12-15 08:37:24 +00:00
|
|
|
{
|
2018-10-23 08:55:16 +00:00
|
|
|
struct ifnet *ifp;
|
|
|
|
struct netmap_vp_adapter *vpna;
|
|
|
|
int error;
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
ifp = ifunit_ref(name);
|
|
|
|
if (!ifp)
|
|
|
|
return ENXIO;
|
|
|
|
NMG_LOCK();
|
|
|
|
/* make sure this is actually a VALE port */
|
|
|
|
if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
vpna = (struct netmap_vp_adapter *)NA(ifp);
|
|
|
|
|
|
|
|
/* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
|
|
|
|
if (vpna->autodelete) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* also make sure that nobody is using the inferface */
|
|
|
|
if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
|
|
|
|
vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
|
|
|
|
error = EBUSY;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
NMG_UNLOCK();
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
D("destroying a persistent vale interface %s", ifp->if_xname);
|
|
|
|
/* Linux requires all the references are released
|
|
|
|
* before unregister
|
|
|
|
*/
|
|
|
|
netmap_detach(ifp);
|
|
|
|
if_rele(ifp);
|
|
|
|
nm_os_vi_detach(ifp);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
NMG_UNLOCK();
|
|
|
|
if_rele(ifp);
|
|
|
|
return error;
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
}
|
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
static int
|
|
|
|
nm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
{
|
2018-10-23 08:55:16 +00:00
|
|
|
req->nr_rx_rings = na->num_rx_rings;
|
|
|
|
req->nr_tx_rings = na->num_tx_rings;
|
|
|
|
req->nr_rx_slots = na->num_rx_desc;
|
|
|
|
req->nr_tx_slots = na->num_tx_desc;
|
|
|
|
return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
|
|
|
|
&req->nr_mem_id);
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
}
|
|
|
|
|
2018-10-23 08:55:16 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Create a virtual interface registered to the system.
|
|
|
|
* The interface will be attached to a bridge later.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
netmap_vi_create(struct nmreq_header *hdr, int autodelete)
|
Sync netmap sources with the version in our private tree.
This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:
- fix zerocopy monitor ports and introduce copying monitor ports
(the latter are lower performance but give access to all traffic
in parallel with the application)
- exclusive open mode, useful to implement solutions that recover
from crashes of the main netmap client (suggested by Patrick Kelsey)
- revised memory allocator in preparation for the 'passthrough mode'
(ptnetmap) recently presented at bsdcan. ptnetmap is described in
S. Garzarella, G. Lettieri, L. Rizzo;
Virtual device passthrough for high speed VM networking,
ACM/IEEE ANCS 2015, Oakland (CA) May 2015
http://info.iet.unipi.it/~luigi/research.html
- fix rx CRC handing on ixl
- add module dependencies for netmap when building drivers as modules
- minor simplifications to device-specific routines (*txsync, *rxsync)
- general code cleanup (remove unused variables, introduce macros
to access rings and remove duplicate code,
Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).
Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.
MFC after: 1 month
Sponsored by: (partly) Verisign, Cisco
2015-07-10 05:51:36 +00:00
|
|
|
{
|
2018-10-23 08:55:16 +00:00
|
|
|
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
|
|
|
|
struct ifnet *ifp;
|
|
|
|
struct netmap_vp_adapter *vpna;
|
|
|
|
struct netmap_mem_d *nmd = NULL;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
|
|
|
|
return EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* don't include VALE prefix */
|
|
|
|
if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
|
|
|
|
return EINVAL;
|
|
|
|
if (strlen(hdr->nr_name) >= IFNAMSIZ) {
|
|
|
|
return EINVAL;
|
|
|
|
}
|
|
|
|
ifp = ifunit_ref(hdr->nr_name);
|
|
|
|
if (ifp) { /* already exist, cannot create new one */
|
|
|
|
error = EEXIST;
|
|
|
|
NMG_LOCK();
|
|
|
|
if (NM_NA_VALID(ifp)) {
|
|
|
|
int update_err = nm_update_info(req, NA(ifp));
|
|
|
|
if (update_err)
|
|
|
|
error = update_err;
|
|
|
|
}
|
|
|
|
NMG_UNLOCK();
|
|
|
|
if_rele(ifp);
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
error = nm_os_vi_persist(hdr->nr_name, &ifp);
|
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
NMG_LOCK();
|
|
|
|
if (req->nr_mem_id) {
|
|
|
|
nmd = netmap_mem_find(req->nr_mem_id);
|
|
|
|
if (nmd == NULL) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto err_1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* netmap_vp_create creates a struct netmap_vp_adapter */
|
|
|
|
error = netmap_vp_create(hdr, ifp, nmd, &vpna);
|
|
|
|
if (error) {
|
|
|
|
D("error %d", error);
|
|
|
|
goto err_1;
|
|
|
|
}
|
|
|
|
/* persist-specific routines */
|
|
|
|
vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
|
|
|
|
if (!autodelete) {
|
|
|
|
netmap_adapter_get(&vpna->up);
|
|
|
|
} else {
|
|
|
|
vpna->autodelete = 1;
|
|
|
|
}
|
|
|
|
NM_ATTACH_NA(ifp, &vpna->up);
|
|
|
|
/* return the updated info */
|
|
|
|
error = nm_update_info(req, &vpna->up);
|
|
|
|
if (error) {
|
|
|
|
goto err_2;
|
|
|
|
}
|
|
|
|
ND("returning nr_mem_id %d", req->nr_mem_id);
|
|
|
|
if (nmd)
|
|
|
|
netmap_mem_put(nmd);
|
|
|
|
NMG_UNLOCK();
|
|
|
|
ND("created %s", ifp->if_xname);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_2:
|
|
|
|
netmap_detach(ifp);
|
|
|
|
err_1:
|
|
|
|
if (nmd)
|
|
|
|
netmap_mem_put(nmd);
|
|
|
|
NMG_UNLOCK();
|
|
|
|
nm_os_vi_detach(ifp);
|
|
|
|
|
|
|
|
return error;
|
2013-12-15 08:37:24 +00:00
|
|
|
}
|
2018-10-23 08:55:16 +00:00
|
|
|
|
2013-12-15 08:37:24 +00:00
|
|
|
#endif /* WITH_VALE */
|