Sync netmap sources with the version in our private tree.

This commit contains large contributions from Giuseppe Lettieri and
Stefano Garzarella, is partly supported by grants from Verisign and Cisco,
and brings in the following:

- fix zerocopy monitor ports and introduce copying monitor ports
  (the latter are lower performance but give access to all traffic
  in parallel with the application)

- exclusive open mode, useful to implement solutions that recover
  from crashes of the main netmap client (suggested by Patrick Kelsey)

- revised memory allocator in preparation for the 'passthrough mode'
  (ptnetmap) recently presented at bsdcan. ptnetmap is described in
        S. Garzarella, G. Lettieri, L. Rizzo;
        Virtual device passthrough for high speed VM networking,
        ACM/IEEE ANCS 2015, Oakland (CA) May 2015
        http://info.iet.unipi.it/~luigi/research.html

- fix rx CRC handing on ixl

- add module dependencies for netmap when building drivers as modules

- minor simplifications to device-specific routines (*txsync, *rxsync)

- general code cleanup (remove unused variables, introduce macros
  to access rings and remove duplicate code,

Applications do not need to be recompiled, unless of course
they want to use the new features (monitors and exclusive open).

Those willing to try this code on stable/10 can just update the
sys/dev/netmap/*, sys/net/netmap* with the version in HEAD
and apply the small patches to individual device drivers.

MFC after:	1 month
Sponsored by:	(partly) Verisign, Cisco
This commit is contained in:
Luigi Rizzo 2015-07-10 05:51:36 +00:00
parent 9d73ee0f82
commit 847bf38369
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=285349
25 changed files with 2053 additions and 1474 deletions

View File

@ -8533,10 +8533,17 @@ static devclass_t cxgbe_devclass, cxl_devclass;
DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0);
MODULE_VERSION(t4nex, 1);
MODULE_DEPEND(t4nex, firmware, 1, 1, 1);
#ifdef DEV_NETMAP
MODULE_DEPEND(t4nex, netmap, 1, 1, 1);
#endif /* DEV_NETMAP */
DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0);
MODULE_VERSION(t5nex, 1);
MODULE_DEPEND(t5nex, firmware, 1, 1, 1);
#ifdef DEV_NETMAP
MODULE_DEPEND(t5nex, netmap, 1, 1, 1);
#endif /* DEV_NETMAP */
DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0);
MODULE_VERSION(cxgbe, 1);

View File

@ -917,8 +917,6 @@ cxgbe_netmap_txsync(struct netmap_kring *kring, int flags)
kring->nr_hwtail -= kring->nkr_num_slots;
}
nm_txsync_finalize(kring);
return (0);
}
@ -931,7 +929,7 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
struct port_info *pi = ifp->if_softc;
struct adapter *sc = pi->adapter;
struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[pi->first_nm_rxq + kring->ring_id];
u_int const head = nm_rxsync_prologue(kring);
u_int const head = kring->rhead;
u_int n;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
@ -993,8 +991,6 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
}
}
nm_rxsync_finalize(kring);
return (0);
}

View File

@ -344,6 +344,9 @@ devclass_t em_devclass;
DRIVER_MODULE(em, pci, em_driver, em_devclass, 0, 0);
MODULE_DEPEND(em, pci, 1, 1, 1);
MODULE_DEPEND(em, ether, 1, 1, 1);
#ifdef DEV_NETMAP
MODULE_DEPEND(em, netmap, 1, 1, 1);
#endif /* DEV_NETMAP */
/*********************************************************************
* Tunable default values.

View File

@ -322,6 +322,9 @@ static devclass_t igb_devclass;
DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
MODULE_DEPEND(igb, pci, 1, 1, 1);
MODULE_DEPEND(igb, ether, 1, 1, 1);
#ifdef DEV_NETMAP
MODULE_DEPEND(igb, netmap, 1, 1, 1);
#endif /* DEV_NETMAP */
/*********************************************************************
* Tunable default values.

View File

@ -286,6 +286,9 @@ extern devclass_t em_devclass;
DRIVER_MODULE(lem, pci, lem_driver, em_devclass, 0, 0);
MODULE_DEPEND(lem, pci, 1, 1, 1);
MODULE_DEPEND(lem, ether, 1, 1, 1);
#ifdef DEV_NETMAP
MODULE_DEPEND(lem, netmap, 1, 1, 1);
#endif /* DEV_NETMAP */
/*********************************************************************
* Tunable default values.

View File

@ -246,6 +246,9 @@ DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0);
MODULE_DEPEND(ix, pci, 1, 1, 1);
MODULE_DEPEND(ix, ether, 1, 1, 1);
#ifdef DEV_NETMAP
MODULE_DEPEND(ix, netmap, 1, 1, 1);
#endif /* DEV_NETMAP */
/*
** TUNEABLE PARAMETERS:

View File

@ -198,8 +198,6 @@ em_netmap_txsync(struct netmap_kring *kring, int flags)
}
}
nm_txsync_finalize(kring);
return 0;
}
@ -217,7 +215,7 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@ -303,9 +301,6 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags)
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:

View File

@ -180,8 +180,6 @@ igb_netmap_txsync(struct netmap_kring *kring, int flags)
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
nm_txsync_finalize(kring);
return 0;
}
@ -199,7 +197,7 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@ -283,9 +281,6 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags)
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:

View File

@ -68,9 +68,14 @@ extern int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip;
* count packets that might be missed due to lost interrupts.
*/
SYSCTL_DECL(_dev_netmap);
int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip;
/*
* The xl driver by default strips CRCs and we do not override it.
*/
int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
#if 0
SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_crcstrip,
CTLFLAG_RW, &ixl_crcstrip, 0, "strip CRC on rx frames");
CTLFLAG_RW, &ixl_crcstrip, 1, "strip CRC on rx frames");
#endif
SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss,
CTLFLAG_RW, &ixl_rx_miss, 0, "potentially missed rx intr");
SYSCTL_INT(_dev_netmap, OID_AUTO, ixl_rx_miss_bufs,
@ -268,8 +273,6 @@ ixl_netmap_txsync(struct netmap_kring *kring, int flags)
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
nm_txsync_finalize(kring);
return 0;
}
@ -297,7 +300,7 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@ -408,9 +411,6 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags)
wr32(vsi->hw, rxr->tail, nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:

View File

@ -302,8 +302,6 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags)
kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
}
nm_txsync_finalize(kring);
return 0;
}
@ -321,7 +319,7 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@ -466,9 +464,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:

View File

@ -159,8 +159,6 @@ re_netmap_txsync(struct netmap_kring *kring, int flags)
}
}
nm_txsync_finalize(kring);
return 0;
}
@ -178,7 +176,7 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@ -273,9 +271,6 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags)
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:

View File

@ -214,9 +214,6 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT);
}
//out:
nm_txsync_finalize(kring);
return 0;
}
@ -278,7 +275,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
// u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@ -340,9 +337,6 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
vtnet_rxq_enable_intr(rxq);
}
/* tell userspace that there might be new packets. */
nm_rxsync_finalize(kring);
ND("[C] h %d c %d t %d hwcur %d hwtail %d",
ring->head, ring->cur, ring->tail,
kring->nr_hwcur, kring->nr_hwtail);

View File

@ -322,8 +322,6 @@ ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
}
}
nm_txsync_finalize(kring);
return 0;
}
@ -351,7 +349,7 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
@ -458,9 +456,6 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i);
}
/* tell userspace that there might be new packets */
nm_rxsync_finalize(kring);
return 0;
ring_reset:

File diff suppressed because it is too large Load Diff

View File

@ -24,6 +24,8 @@
*/
/* $FreeBSD$ */
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/types.h>
#include <sys/module.h>
@ -148,9 +150,9 @@ nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
* Second argument is non-zero to intercept, 0 to restore
*/
int
netmap_catch_rx(struct netmap_adapter *na, int intercept)
netmap_catch_rx(struct netmap_generic_adapter *gna, int intercept)
{
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
struct netmap_adapter *na = &gna->up.up;
struct ifnet *ifp = na->ifp;
if (intercept) {
@ -183,7 +185,7 @@ void
netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
{
struct netmap_adapter *na = &gna->up.up;
struct ifnet *ifp = na->ifp;
struct ifnet *ifp = netmap_generic_getifp(gna);
if (enable) {
na->if_transmit = ifp->if_transmit;
@ -494,6 +496,7 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
{
struct netmap_vm_handle_t *vmh = object->handle;
struct netmap_priv_d *priv = vmh->priv;
struct netmap_adapter *na = priv->np_na;
vm_paddr_t paddr;
vm_page_t page;
vm_memattr_t memattr;
@ -503,7 +506,7 @@ netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
object, (intmax_t)offset, prot, mres);
memattr = object->memattr;
pidx = OFF_TO_IDX(offset);
paddr = netmap_mem_ofstophys(priv->np_mref, offset);
paddr = netmap_mem_ofstophys(na->nm_mem, offset);
if (paddr == 0)
return VM_PAGER_FAIL;
@ -568,14 +571,14 @@ netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
error = devfs_get_cdevpriv((void**)&priv);
if (error)
goto err_unlock;
if (priv->np_nifp == NULL) {
error = EINVAL;
goto err_unlock;
}
vmh->priv = priv;
priv->np_refcount++;
NMG_UNLOCK();
error = netmap_get_memory(priv);
if (error)
goto err_deref;
obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
&netmap_cdev_pager_ops, objsize, prot,
*foff, NULL);
@ -598,8 +601,18 @@ netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
return error;
}
// XXX can we remove this ?
/*
* netmap_close() is called on every close(), but we do not need to do
* anything at that moment, since the process may have other open file
* descriptors for /dev/netmap. Instead, we pass netmap_dtor() to
* devfs_set_cdevpriv() on open(). The FreeBSD kernel will call the destructor
* when the last fd pointing to the device is closed.
*
* Unfortunately, FreeBSD does not automatically track active mmap()s on an fd,
* so we have to track them by ourselvesi (see above). The result is that
* netmap_dtor() is called when the process has no open fds and no active
* memory maps on /dev/netmap, as in linux.
*/
static int
netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
{
@ -673,7 +686,7 @@ static void
netmap_knrdetach(struct knote *kn)
{
struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
struct selinfo *si = &priv->np_rxsi->si;
struct selinfo *si = &priv->np_si[NR_RX]->si;
D("remove selinfo %p", si);
knlist_remove(&si->si_note, kn, 0);
@ -683,7 +696,7 @@ static void
netmap_knwdetach(struct knote *kn)
{
struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
struct selinfo *si = &priv->np_txsi->si;
struct selinfo *si = &priv->np_si[NR_TX]->si;
D("remove selinfo %p", si);
knlist_remove(&si->si_note, kn, 0);
@ -773,7 +786,7 @@ netmap_kqfilter(struct cdev *dev, struct knote *kn)
return 1;
}
/* the si is indicated in the priv */
si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi;
si = priv->np_si[(ev == EVFILT_WRITE) ? NR_TX : NR_RX];
// XXX lock(priv) ?
kn->kn_fop = (ev == EVFILT_WRITE) ?
&netmap_wfiltops : &netmap_rfiltops;

View File

@ -305,7 +305,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
}
rtnl_lock();
/* Prepare to intercept incoming traffic. */
error = netmap_catch_rx(na, 1);
error = netmap_catch_rx(gna, 1);
if (error) {
D("netdev_rx_handler_register() failed (%d)", error);
goto register_handler;
@ -342,7 +342,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
netmap_catch_tx(gna, 0);
/* Do not intercept packets on the rx path. */
netmap_catch_rx(na, 0);
netmap_catch_rx(gna, 0);
rtnl_unlock();
@ -645,8 +645,6 @@ generic_netmap_txsync(struct netmap_kring *kring, int flags)
generic_netmap_tx_clean(kring);
nm_txsync_finalize(kring);
return 0;
}
@ -711,7 +709,7 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int nm_i; /* index into the netmap ring */ //j,
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
if (head > lim)
@ -774,8 +772,6 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
}
kring->nr_hwcur = head;
}
/* tell userspace that there might be new packets. */
nm_rxsync_finalize(kring);
IFRATE(rate_ctx.new.rxsync++);
return 0;
@ -784,20 +780,25 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
static void
generic_netmap_dtor(struct netmap_adapter *na)
{
struct ifnet *ifp = na->ifp;
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
struct ifnet *ifp = netmap_generic_getifp(gna);
struct netmap_adapter *prev_na = gna->prev;
if (prev_na != NULL) {
D("Released generic NA %p", gna);
if_rele(na->ifp);
if_rele(ifp);
netmap_adapter_put(prev_na);
if (na->ifp == NULL) {
/*
* The driver has been removed without releasing
* the reference so we need to do it here.
*/
netmap_adapter_put(prev_na);
}
}
if (ifp != NULL) {
WNA(ifp) = prev_na;
D("Restored native NA %p", prev_na);
na->ifp = NULL;
}
WNA(ifp) = prev_na;
D("Restored native NA %p", prev_na);
na->ifp = NULL;
}
/*
@ -834,6 +835,7 @@ generic_netmap_attach(struct ifnet *ifp)
return ENOMEM;
}
na = (struct netmap_adapter *)gna;
strncpy(na->name, ifp->if_xname, sizeof(na->name));
na->ifp = ifp;
na->num_tx_desc = num_tx_desc;
na->num_rx_desc = num_rx_desc;

View File

@ -34,26 +34,46 @@
#ifndef _NET_NETMAP_KERN_H_
#define _NET_NETMAP_KERN_H_
#if defined(linux)
#if defined(CONFIG_NETMAP_VALE)
#define WITH_VALE
#endif
#if defined(CONFIG_NETMAP_PIPE)
#define WITH_PIPES
#endif
#if defined(CONFIG_NETMAP_MONITOR)
#define WITH_MONITOR
#endif
#if defined(CONFIG_NETMAP_GENERIC)
#define WITH_GENERIC
#endif
#if defined(CONFIG_NETMAP_V1000)
#define WITH_V1000
#endif
#else /* not linux */
#define WITH_VALE // comment out to disable VALE support
#define WITH_PIPES
#define WITH_MONITOR
#define WITH_GENERIC
#endif
#if defined(__FreeBSD__)
#define likely(x) __builtin_expect((long)!!(x), 1L)
#define unlikely(x) __builtin_expect((long)!!(x), 0L)
#define NM_LOCK_T struct mtx
#define NM_LOCK_T struct mtx /* low level spinlock, used to protect queues */
/* netmap global lock */
#define NMG_LOCK_T struct sx
#define NMG_LOCK_INIT() sx_init(&netmap_global_lock, \
"netmap global lock")
#define NMG_LOCK_DESTROY() sx_destroy(&netmap_global_lock)
#define NMG_LOCK() sx_xlock(&netmap_global_lock)
#define NMG_UNLOCK() sx_xunlock(&netmap_global_lock)
#define NMG_LOCK_ASSERT() sx_assert(&netmap_global_lock, SA_XLOCKED)
#define NM_MTX_T struct sx /* OS-specific mutex (sleepable) */
#define NM_MTX_INIT(m) sx_init(&(m), #m)
#define NM_MTX_DESTROY(m) sx_destroy(&(m))
#define NM_MTX_LOCK(m) sx_xlock(&(m))
#define NM_MTX_UNLOCK(m) sx_xunlock(&(m))
#define NM_MTX_ASSERT(m) sx_assert(&(m), SA_XLOCKED)
#define NM_SELINFO_T struct nm_selinfo
#define MBUF_LEN(m) ((m)->m_pkthdr.len)
@ -102,6 +122,8 @@ struct ethtool_ops {
};
struct hrtimer {
};
#define NM_BNS_GET(b)
#define NM_BNS_PUT(b)
#elif defined (linux)
@ -117,20 +139,12 @@ struct hrtimer {
#define NM_ATOMIC_T volatile long unsigned int
#define NM_MTX_T struct mutex
#define NM_MTX_INIT(m, s) do { (void)s; mutex_init(&(m)); } while (0)
#define NM_MTX_DESTROY(m) do { (void)m; } while (0)
#define NM_MTX_T struct mutex /* OS-specific sleepable lock */
#define NM_MTX_INIT(m) mutex_init(&(m))
#define NM_MTX_DESTROY(m) do { (void)(m); } while (0)
#define NM_MTX_LOCK(m) mutex_lock(&(m))
#define NM_MTX_UNLOCK(m) mutex_unlock(&(m))
#define NM_MTX_LOCK_ASSERT(m) mutex_is_locked(&(m))
#define NMG_LOCK_T NM_MTX_T
#define NMG_LOCK_INIT() NM_MTX_INIT(netmap_global_lock, \
"netmap_global_lock")
#define NMG_LOCK_DESTROY() NM_MTX_DESTROY(netmap_global_lock)
#define NMG_LOCK() NM_MTX_LOCK(netmap_global_lock)
#define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock)
#define NMG_LOCK_ASSERT() NM_MTX_LOCK_ASSERT(netmap_global_lock)
#define NM_MTX_ASSERT(m) mutex_is_locked(&(m))
#ifndef DEV_NETMAP
#define DEV_NETMAP
@ -152,6 +166,13 @@ struct hrtimer {
#endif /* end - platform-specific code */
#define NMG_LOCK_T NM_MTX_T
#define NMG_LOCK_INIT() NM_MTX_INIT(netmap_global_lock)
#define NMG_LOCK_DESTROY() NM_MTX_DESTROY(netmap_global_lock)
#define NMG_LOCK() NM_MTX_LOCK(netmap_global_lock)
#define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock)
#define NMG_LOCK_ASSERT() NM_MTX_ASSERT(netmap_global_lock)
#define ND(format, ...)
#define D(format, ...) \
do { \
@ -185,6 +206,23 @@ const char *nm_dump_buf(char *p, int len, int lim, char *dst);
extern NMG_LOCK_T netmap_global_lock;
enum txrx { NR_RX = 0, NR_TX = 1, NR_TXRX };
static __inline const char*
nm_txrx2str(enum txrx t)
{
return (t== NR_RX ? "RX" : "TX");
}
static __inline enum txrx
nm_txrx_swap(enum txrx t)
{
return (t== NR_RX ? NR_TX : NR_RX);
}
#define for_rx_tx(t) for ((t) = 0; (t) < NR_TXRX; (t)++)
/*
* private, kernel view of a ring. Keeps track of the status of
* a ring across system calls.
@ -259,6 +297,7 @@ struct netmap_kring {
uint32_t nr_kflags; /* private driver flags */
#define NKR_PENDINTR 0x1 // Pending interrupt.
#define NKR_EXCLUSIVE 0x2 /* exclusive binding */
uint32_t nkr_num_slots;
/*
@ -308,7 +347,10 @@ struct netmap_kring {
// u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */
struct mbq rx_queue; /* intercepted rx mbufs. */
uint32_t users; /* existing bindings for this ring */
uint32_t ring_id; /* debugging */
enum txrx tx; /* kind of ring (tx or rx) */
char name[64]; /* diagnostic */
/* [tx]sync callback for this kring.
@ -323,6 +365,7 @@ struct netmap_kring {
* any of the nm_krings_create callbacks.
*/
int (*nm_sync)(struct netmap_kring *kring, int flags);
int (*nm_notify)(struct netmap_kring *kring, int flags);
#ifdef WITH_PIPES
struct netmap_kring *pipe; /* if this is a pipe ring,
@ -333,17 +376,25 @@ struct netmap_kring {
*/
#endif /* WITH_PIPES */
#ifdef WITH_VALE
int (*save_notify)(struct netmap_kring *kring, int flags);
#endif
#ifdef WITH_MONITOR
/* pointer to the adapter that is monitoring this kring (if any)
*/
struct netmap_monitor_adapter *monitor;
/* array of krings that are monitoring this kring */
struct netmap_kring **monitors;
uint32_t max_monitors; /* current size of the monitors array */
uint32_t n_monitors; /* next unused entry in the monitor array */
/*
* Monitors work by intercepting the txsync and/or rxsync of the
* monitored krings. This is implemented by replacing
* the nm_sync pointer above and saving the previous
* one in save_sync below.
* Monitors work by intercepting the sync and notify callbacks of the
* monitored krings. This is implemented by replacing the pointers
* above and saving the previous ones in mon_* pointers below
*/
int (*save_sync)(struct netmap_kring *kring, int flags);
int (*mon_sync)(struct netmap_kring *kring, int flags);
int (*mon_notify)(struct netmap_kring *kring, int flags);
uint32_t mon_tail; /* last seen slot on rx */
uint32_t mon_pos; /* index of this ring in the monitored ring array */
#endif
} __attribute__((__aligned__(64)));
@ -414,8 +465,11 @@ tail->| |<-hwtail | |<-hwlease
*/
enum txrx { NR_RX = 0, NR_TX = 1 };
struct netmap_lut {
struct lut_entry *lut;
uint32_t objtotal; /* max buffer index */
uint32_t objsize; /* buffer size */
};
struct netmap_vp_adapter; // forward
@ -445,11 +499,10 @@ struct netmap_adapter {
* forwarding packets coming from this
* interface
*/
#define NAF_MEM_OWNER 8 /* the adapter is responsible for the
* deallocation of the memory allocator
#define NAF_MEM_OWNER 8 /* the adapter uses its own memory area
* that cannot be changed
*/
#define NAF_NATIVE_ON 16 /* the adapter is native and the attached
* interface is in netmap mode.
#define NAF_NATIVE 16 /* the adapter is native.
* Virtual ports (vale, pipe, monitor...)
* should never use this flag.
*/
@ -469,7 +522,7 @@ struct netmap_adapter {
u_int num_rx_rings; /* number of adapter receive rings */
u_int num_tx_rings; /* number of adapter transmit rings */
u_int num_tx_desc; /* number of descriptor in each queue */
u_int num_tx_desc; /* number of descriptor in each queue */
u_int num_rx_desc;
/* tx_rings and rx_rings are private but allocated
@ -483,10 +536,10 @@ struct netmap_adapter {
/* (used for leases) */
NM_SELINFO_T tx_si, rx_si; /* global wait queues */
NM_SELINFO_T si[NR_TXRX]; /* global wait queues */
/* count users of the global wait queues */
int tx_si_users, rx_si_users;
int si_users[NR_TXRX];
void *pdev; /* used to store pci device */
@ -544,6 +597,7 @@ struct netmap_adapter {
int (*nm_txsync)(struct netmap_kring *kring, int flags);
int (*nm_rxsync)(struct netmap_kring *kring, int flags);
int (*nm_notify)(struct netmap_kring *kring, int flags);
#define NAF_FORCE_READ 1
#define NAF_FORCE_RECLAIM 2
/* return configuration information */
@ -551,12 +605,6 @@ struct netmap_adapter {
u_int *txr, u_int *txd, u_int *rxr, u_int *rxd);
int (*nm_krings_create)(struct netmap_adapter *);
void (*nm_krings_delete)(struct netmap_adapter *);
int (*nm_notify)(struct netmap_adapter *,
u_int ring, enum txrx, int flags);
#define NAF_DISABLE_NOTIFY 8 /* notify that the stopped state of the
* ring has changed (kring->nkr_stopped)
*/
#ifdef WITH_VALE
/*
* nm_bdg_attach() initializes the na_vp field to point
@ -593,9 +641,7 @@ struct netmap_adapter {
* buffer addresses, and the total number of buffers.
*/
struct netmap_mem_d *nm_mem;
struct lut_entry *na_lut;
uint32_t na_lut_objtotal; /* max buffer index */
uint32_t na_lut_objsize; /* buffer size */
struct netmap_lut na_lut;
/* additional information attached to this adapter
* by other netmap subsystems. Currently used by
@ -603,16 +649,49 @@ struct netmap_adapter {
*/
void *na_private;
#ifdef WITH_PIPES
/* array of pipes that have this adapter as a parent */
struct netmap_pipe_adapter **na_pipes;
int na_next_pipe; /* next free slot in the array */
int na_max_pipes; /* size of the array */
#endif /* WITH_PIPES */
char name[64];
};
static __inline u_int
nma_get_ndesc(struct netmap_adapter *na, enum txrx t)
{
return (t == NR_TX ? na->num_tx_desc : na->num_rx_desc);
}
static __inline void
nma_set_ndesc(struct netmap_adapter *na, enum txrx t, u_int v)
{
if (t == NR_TX)
na->num_tx_desc = v;
else
na->num_rx_desc = v;
}
static __inline u_int
nma_get_nrings(struct netmap_adapter *na, enum txrx t)
{
return (t == NR_TX ? na->num_tx_rings : na->num_rx_rings);
}
static __inline void
nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
{
if (t == NR_TX)
na->num_tx_rings = v;
else
na->num_rx_rings = v;
}
static __inline struct netmap_kring*
NMR(struct netmap_adapter *na, enum txrx t)
{
return (t == NR_TX ? na->tx_rings : na->rx_rings);
}
/*
* If the NIC is owned by the kernel
@ -624,7 +703,6 @@ struct netmap_adapter {
#define NETMAP_OWNED_BY_ANY(na) \
(NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0))
/*
* derived netmap adapters for various types of ports
*/
@ -645,6 +723,8 @@ struct netmap_vp_adapter { /* VALE software port */
u_int virt_hdr_len;
/* Maximum Frame Size, used in bdg_mismatch_datapath() */
u_int mfs;
/* Last source MAC on this port */
uint64_t last_smac;
};
@ -689,15 +769,9 @@ struct netmap_generic_adapter { /* emulated device */
#endif /* WITH_GENERIC */
static __inline int
netmap_real_tx_rings(struct netmap_adapter *na)
netmap_real_rings(struct netmap_adapter *na, enum txrx t)
{
return na->num_tx_rings + !!(na->na_flags & NAF_HOST_RINGS);
}
static __inline int
netmap_real_rx_rings(struct netmap_adapter *na)
{
return na->num_rx_rings + !!(na->na_flags & NAF_HOST_RINGS);
return nma_get_nrings(na, t) + !!(na->na_flags & NAF_HOST_RINGS);
}
#ifdef WITH_VALE
@ -751,9 +825,6 @@ struct netmap_bwrap_adapter {
struct netmap_vp_adapter host; /* for host rings */
struct netmap_adapter *hwna; /* the underlying device */
/* backup of the hwna notify callback */
int (*save_notify)(struct netmap_adapter *,
u_int ring, enum txrx, int flags);
/* backup of the hwna memory allocator */
struct netmap_mem_d *save_nmd;
@ -847,6 +918,14 @@ static __inline int nm_kr_tryget(struct netmap_kring *kr)
return 0;
}
static __inline void nm_kr_get(struct netmap_kring *kr)
{
while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
tsleep(kr, 0, "NM_KR_GET", 4);
}
/*
* The following functions are used by individual drivers to
@ -895,25 +974,25 @@ const char *netmap_bdg_name(struct netmap_vp_adapter *);
#define netmap_bdg_name(_vp) NULL
#endif /* WITH_VALE */
static inline int
nm_native_on(struct netmap_adapter *na)
{
return na && na->na_flags & NAF_NATIVE_ON;
}
static inline int
nm_netmap_on(struct netmap_adapter *na)
{
return na && na->na_flags & NAF_NETMAP_ON;
}
static inline int
nm_native_on(struct netmap_adapter *na)
{
return nm_netmap_on(na) && (na->na_flags & NAF_NATIVE);
}
/* set/clear native flags and if_transmit/netdev_ops */
static inline void
nm_set_native_flags(struct netmap_adapter *na)
{
struct ifnet *ifp = na->ifp;
na->na_flags |= (NAF_NATIVE_ON | NAF_NETMAP_ON);
na->na_flags |= NAF_NETMAP_ON;
#ifdef IFCAP_NETMAP /* or FreeBSD ? */
ifp->if_capenable |= IFCAP_NETMAP;
#endif
@ -940,63 +1019,13 @@ nm_clear_native_flags(struct netmap_adapter *na)
ifp->netdev_ops = (void *)na->if_transmit;
ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
#endif
na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON);
na->na_flags &= ~NAF_NETMAP_ON;
#ifdef IFCAP_NETMAP /* or FreeBSD ? */
ifp->if_capenable &= ~IFCAP_NETMAP;
#endif
}
/*
* validates parameters in the ring/kring, returns a value for head
* If any error, returns ring_size to force a reinit.
*/
uint32_t nm_txsync_prologue(struct netmap_kring *);
/*
* validates parameters in the ring/kring, returns a value for head,
* and the 'reserved' value in the argument.
* If any error, returns ring_size lim to force a reinit.
*/
uint32_t nm_rxsync_prologue(struct netmap_kring *);
/*
* update kring and ring at the end of txsync.
*/
static inline void
nm_txsync_finalize(struct netmap_kring *kring)
{
/* update ring tail to what the kernel knows */
kring->ring->tail = kring->rtail = kring->nr_hwtail;
/* note, head/rhead/hwcur might be behind cur/rcur
* if no carrier
*/
ND(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
kring->name, kring->nr_hwcur, kring->nr_hwtail,
kring->rhead, kring->rcur, kring->rtail);
}
/*
* update kring and ring at the end of rxsync
*/
static inline void
nm_rxsync_finalize(struct netmap_kring *kring)
{
/* tell userspace that there might be new packets */
//struct netmap_ring *ring = kring->ring;
ND("head %d cur %d tail %d -> %d", ring->head, ring->cur, ring->tail,
kring->nr_hwtail);
kring->ring->tail = kring->rtail = kring->nr_hwtail;
/* make a copy of the state for next round */
kring->rhead = kring->ring->head;
kring->rcur = kring->ring->cur;
}
/* check/fix address and len in tx rings */
#if 1 /* debug version */
#define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \
@ -1050,14 +1079,15 @@ int netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
* been created using netmap_krings_create
*/
void netmap_krings_delete(struct netmap_adapter *na);
int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
/* set the stopped/enabled status of ring
* When stopping, they also wait for all current activity on the ring to
* terminate. The status change is then notified using the na nm_notify
* callback.
*/
void netmap_set_txring(struct netmap_adapter *, u_int ring_id, int stopped);
void netmap_set_rxring(struct netmap_adapter *, u_int ring_id, int stopped);
void netmap_set_ring(struct netmap_adapter *, u_int ring_id, enum txrx, int stopped);
/* set the stopped/enabled status of all rings of the adapter. */
void netmap_set_all_rings(struct netmap_adapter *, int stopped);
/* convenience wrappers for netmap_set_all_rings, used in drivers */
@ -1066,9 +1096,9 @@ void netmap_enable_all_rings(struct ifnet *);
int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
struct netmap_if *
int
netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
uint16_t ringid, uint32_t flags, int *err);
uint16_t ringid, uint32_t flags);
@ -1088,7 +1118,7 @@ int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
* XXX in practice "unknown" might be handled same as broadcast.
*/
typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
const struct netmap_vp_adapter *);
struct netmap_vp_adapter *);
typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
struct netmap_bdg_ops {
@ -1098,7 +1128,7 @@ struct netmap_bdg_ops {
};
u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
const struct netmap_vp_adapter *);
struct netmap_vp_adapter *);
#define NM_BDG_MAXPORTS 254 /* up to 254 */
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
@ -1108,34 +1138,52 @@ u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
/* these are redefined in case of no VALE support */
int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
void netmap_init_bridges(void);
struct nm_bridge *netmap_init_bridges2(u_int);
void netmap_uninit_bridges2(struct nm_bridge *, u_int);
int netmap_init_bridges(void);
void netmap_uninit_bridges(void);
int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops);
int netmap_bdg_config(struct nmreq *nmr);
#else /* !WITH_VALE */
#define netmap_get_bdg_na(_1, _2, _3) 0
#define netmap_init_bridges(_1)
#define netmap_init_bridges(_1) 0
#define netmap_uninit_bridges()
#define netmap_bdg_ctl(_1, _2) EINVAL
#endif /* !WITH_VALE */
#ifdef WITH_PIPES
/* max number of pipes per device */
#define NM_MAXPIPES 64 /* XXX how many? */
/* in case of no error, returns the actual number of pipes in nmr->nr_arg1 */
int netmap_pipe_alloc(struct netmap_adapter *, struct nmreq *nmr);
void netmap_pipe_dealloc(struct netmap_adapter *);
int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
#else /* !WITH_PIPES */
#define NM_MAXPIPES 0
#define netmap_pipe_alloc(_1, _2) EOPNOTSUPP
#define netmap_pipe_alloc(_1, _2) 0
#define netmap_pipe_dealloc(_1)
#define netmap_get_pipe_na(_1, _2, _3) 0
#define netmap_get_pipe_na(nmr, _2, _3) \
({ int role__ = (nmr)->nr_flags & NR_REG_MASK; \
(role__ == NR_REG_PIPE_MASTER || \
role__ == NR_REG_PIPE_SLAVE) ? EOPNOTSUPP : 0; })
#endif
#ifdef WITH_MONITOR
int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
void netmap_monitor_stop(struct netmap_adapter *na);
#else
#define netmap_get_monitor_na(_1, _2, _3) 0
#define netmap_get_monitor_na(nmr, _2, _3) \
((nmr)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
#endif
#ifdef CONFIG_NET_NS
struct net *netmap_bns_get(void);
void netmap_bns_put(struct net *);
void netmap_bns_getbridges(struct nm_bridge **, u_int *);
#else
#define netmap_bns_get()
#define netmap_bns_put(_1)
#define netmap_bns_getbridges(b, n) \
do { *b = nm_bridges; *n = NM_BRIDGES; } while (0)
#endif
/* Various prototypes */
@ -1186,8 +1234,8 @@ int netmap_adapter_put(struct netmap_adapter *na);
/*
* module variables
*/
#define NETMAP_BUF_BASE(na) ((na)->na_lut[0].vaddr)
#define NETMAP_BUF_SIZE(na) ((na)->na_lut_objsize)
#define NETMAP_BUF_BASE(na) ((na)->na_lut.lut[0].vaddr)
#define NETMAP_BUF_SIZE(na) ((na)->na_lut.objsize)
extern int netmap_mitigate; // XXX not really used
extern int netmap_no_pendintr;
extern int netmap_verbose; // XXX debugging
@ -1291,15 +1339,14 @@ netmap_reload_map(struct netmap_adapter *na,
#else /* linux */
int nm_iommu_group_id(bus_dma_tag_t dev);
extern size_t netmap_mem_get_bufsize(struct netmap_mem_d *);
#include <linux/dma-mapping.h>
static inline void
netmap_load_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
if (map) {
*map = dma_map_single(na->pdev, buf, netmap_mem_get_bufsize(na->nm_mem),
if (0 && map) {
*map = dma_map_single(na->pdev, buf, na->na_lut.objsize,
DMA_BIDIRECTIONAL);
}
}
@ -1308,7 +1355,7 @@ static inline void
netmap_unload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map)
{
u_int sz = netmap_mem_get_bufsize(na->nm_mem);
u_int sz = na->na_lut.objsize;
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
@ -1320,7 +1367,7 @@ static inline void
netmap_reload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
u_int sz = netmap_mem_get_bufsize(na->nm_mem);
u_int sz = na->na_lut.objsize;
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
@ -1418,9 +1465,9 @@ struct netmap_obj_pool;
static inline void *
NMB(struct netmap_adapter *na, struct netmap_slot *slot)
{
struct lut_entry *lut = na->na_lut;
struct lut_entry *lut = na->na_lut.lut;
uint32_t i = slot->buf_idx;
return (unlikely(i >= na->na_lut_objtotal)) ?
return (unlikely(i >= na->na_lut.objtotal)) ?
lut[0].vaddr : lut[i].vaddr;
}
@ -1428,10 +1475,10 @@ static inline void *
PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
{
uint32_t i = slot->buf_idx;
struct lut_entry *lut = na->na_lut;
void *ret = (i >= na->na_lut_objtotal) ? lut[0].vaddr : lut[i].vaddr;
struct lut_entry *lut = na->na_lut.lut;
void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr;
*pp = (i >= na->na_lut_objtotal) ? lut[0].paddr : lut[i].paddr;
*pp = (i >= na->na_lut.objtotal) ? lut[0].paddr : lut[i].paddr;
return ret;
}
@ -1459,7 +1506,7 @@ void netmap_txsync_to_host(struct netmap_adapter *na);
* If np_nifp is NULL initialization has not been performed,
* so they should return an error to userspace.
*
* The ref_done field is used to regulate access to the refcount in the
* The ref_done field (XXX ?) is used to regulate access to the refcount in the
* memory allocator. The refcount must be incremented at most once for
* each open("/dev/netmap"). The increment is performed by the first
* function that calls netmap_get_memory() (currently called by
@ -1472,11 +1519,10 @@ struct netmap_priv_d {
struct netmap_adapter *np_na;
uint32_t np_flags; /* from the ioctl */
u_int np_txqfirst, np_txqlast; /* range of tx rings to scan */
u_int np_rxqfirst, np_rxqlast; /* range of rx rings to scan */
u_int np_qfirst[NR_TXRX],
np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
uint16_t np_txpoll; /* XXX and also np_rxpoll ? */
struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */
/* np_refcount is only used on FreeBSD */
int np_refcount; /* use with NMG_LOCK held */
@ -1484,7 +1530,7 @@ struct netmap_priv_d {
* Either the local or the global one depending on the
* number of rings.
*/
NM_SELINFO_T *np_rxsi, *np_txsi;
NM_SELINFO_T *np_si[NR_TXRX];
struct thread *np_td; /* kqueue, just debugging */
};
@ -1507,12 +1553,20 @@ struct netmap_monitor_adapter {
*/
int generic_netmap_attach(struct ifnet *ifp);
int netmap_catch_rx(struct netmap_adapter *na, int intercept);
int netmap_catch_rx(struct netmap_generic_adapter *na, int intercept);
void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
void netmap_catch_tx(struct netmap_generic_adapter *na, int enable);
int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr);
int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
static inline struct ifnet*
netmap_generic_getifp(struct netmap_generic_adapter *gna)
{
if (gna->prev)
return gna->prev->ifp;
return gna->up.up.ifp;
}
//#define RATE_GENERIC /* Enables communication statistics for generic. */
#ifdef RATE_GENERIC

View File

@ -71,6 +71,7 @@ struct netmap_obj_params {
u_int size;
u_int num;
};
struct netmap_obj_pool {
char name[NETMAP_POOL_MAX_NAMSZ]; /* name of the allocator */
@ -106,16 +107,26 @@ struct netmap_obj_pool {
u_int r_objsize;
};
#ifdef linux
// XXX a mtx would suffice here 20130415 lr
#define NMA_LOCK_T struct semaphore
#else /* !linux */
#define NMA_LOCK_T struct mtx
#endif /* linux */
#define NMA_LOCK_T NM_MTX_T
typedef int (*netmap_mem_config_t)(struct netmap_mem_d*);
typedef int (*netmap_mem_finalize_t)(struct netmap_mem_d*);
typedef void (*netmap_mem_deref_t)(struct netmap_mem_d*);
struct netmap_mem_ops {
void (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*);
int (*nmd_get_info)(struct netmap_mem_d *, u_int *size,
u_int *memflags, uint16_t *id);
vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t);
int (*nmd_config)(struct netmap_mem_d *);
int (*nmd_finalize)(struct netmap_mem_d *);
void (*nmd_deref)(struct netmap_mem_d *);
ssize_t (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr);
void (*nmd_delete)(struct netmap_mem_d *);
struct netmap_if * (*nmd_if_new)(struct netmap_adapter *);
void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *);
int (*nmd_rings_create)(struct netmap_adapter *);
void (*nmd_rings_delete)(struct netmap_adapter *);
};
typedef uint16_t nm_memid_t;
@ -126,53 +137,145 @@ struct netmap_mem_d {
u_int flags;
#define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */
int lasterr; /* last error for curr config */
int refcount; /* existing priv structures */
int active; /* active users */
int refcount;
/* the three allocators */
struct netmap_obj_pool pools[NETMAP_POOLS_NR];
netmap_mem_config_t config; /* called with NMA_LOCK held */
netmap_mem_finalize_t finalize; /* called with NMA_LOCK held */
netmap_mem_deref_t deref; /* called with NMA_LOCK held */
nm_memid_t nm_id; /* allocator identifier */
int nm_grp; /* iommu groupd id */
/* list of all existing allocators, sorted by nm_id */
struct netmap_mem_d *prev, *next;
struct netmap_mem_ops *ops;
};
#define NMD_DEFCB(t0, name) \
t0 \
netmap_mem_##name(struct netmap_mem_d *nmd) \
{ \
return nmd->ops->nmd_##name(nmd); \
}
#define NMD_DEFCB1(t0, name, t1) \
t0 \
netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1) \
{ \
return nmd->ops->nmd_##name(nmd, a1); \
}
#define NMD_DEFCB3(t0, name, t1, t2, t3) \
t0 \
netmap_mem_##name(struct netmap_mem_d *nmd, t1 a1, t2 a2, t3 a3) \
{ \
return nmd->ops->nmd_##name(nmd, a1, a2, a3); \
}
#define NMD_DEFNACB(t0, name) \
t0 \
netmap_mem_##name(struct netmap_adapter *na) \
{ \
return na->nm_mem->ops->nmd_##name(na); \
}
#define NMD_DEFNACB1(t0, name, t1) \
t0 \
netmap_mem_##name(struct netmap_adapter *na, t1 a1) \
{ \
return na->nm_mem->ops->nmd_##name(na, a1); \
}
NMD_DEFCB1(void, get_lut, struct netmap_lut *);
NMD_DEFCB3(int, get_info, u_int *, u_int *, uint16_t *);
NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t);
static int netmap_mem_config(struct netmap_mem_d *);
NMD_DEFCB(int, config);
NMD_DEFCB1(ssize_t, if_offset, const void *);
NMD_DEFCB(void, delete);
NMD_DEFNACB(struct netmap_if *, if_new);
NMD_DEFNACB1(void, if_delete, struct netmap_if *);
NMD_DEFNACB(int, rings_create);
NMD_DEFNACB(void, rings_delete);
static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
#define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx)
#define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx)
#define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx)
#define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx)
#ifdef NM_DEBUG_MEM_PUTGET
#define NM_DBG_REFC(nmd, func, line) \
printf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
#else
#define NM_DBG_REFC(nmd, func, line)
#endif
#ifdef NM_DEBUG_MEM_PUTGET
void __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
#else
void netmap_mem_get(struct netmap_mem_d *nmd)
#endif
{
NMA_LOCK(nmd);
nmd->refcount++;
NM_DBG_REFC(nmd, func, line);
NMA_UNLOCK(nmd);
}
#ifdef NM_DEBUG_MEM_PUTGET
void __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
#else
void netmap_mem_put(struct netmap_mem_d *nmd)
#endif
{
int last;
NMA_LOCK(nmd);
last = (--nmd->refcount == 0);
NM_DBG_REFC(nmd, func, line);
NMA_UNLOCK(nmd);
if (last)
netmap_mem_delete(nmd);
}
int
netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
if (nm_mem_assign_group(nmd, na->pdev) < 0) {
return ENOMEM;
} else {
nmd->ops->nmd_finalize(nmd);
}
if (!nmd->lasterr && na->pdev)
netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
return nmd->lasterr;
}
void
netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
NMA_LOCK(nmd);
netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
NMA_UNLOCK(nmd);
return nmd->ops->nmd_deref(nmd);
}
/* accessor functions */
struct lut_entry*
netmap_mem_get_lut(struct netmap_mem_d *nmd)
static void
netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
{
return nmd->pools[NETMAP_BUF_POOL].lut;
lut->lut = nmd->pools[NETMAP_BUF_POOL].lut;
lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
}
u_int
netmap_mem_get_buftotal(struct netmap_mem_d *nmd)
{
return nmd->pools[NETMAP_BUF_POOL].objtotal;
}
size_t
netmap_mem_get_bufsize(struct netmap_mem_d *nmd)
{
return nmd->pools[NETMAP_BUF_POOL]._objsize;
}
#ifdef linux
#define NMA_LOCK_INIT(n) sema_init(&(n)->nm_mtx, 1)
#define NMA_LOCK_DESTROY(n)
#define NMA_LOCK(n) down(&(n)->nm_mtx)
#define NMA_UNLOCK(n) up(&(n)->nm_mtx)
#else /* !linux */
#define NMA_LOCK_INIT(n) mtx_init(&(n)->nm_mtx, "netmap memory allocator lock", NULL, MTX_DEF)
#define NMA_LOCK_DESTROY(n) mtx_destroy(&(n)->nm_mtx)
#define NMA_LOCK(n) mtx_lock(&(n)->nm_mtx)
#define NMA_UNLOCK(n) mtx_unlock(&(n)->nm_mtx)
#endif /* linux */
struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
[NETMAP_IF_POOL] = {
.size = 1024,
@ -209,9 +312,7 @@ struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
* running in netmap mode.
* Virtual (VALE) ports will have each its own allocator.
*/
static int netmap_mem_global_config(struct netmap_mem_d *nmd);
static int netmap_mem_global_finalize(struct netmap_mem_d *nmd);
static void netmap_mem_global_deref(struct netmap_mem_d *nmd);
extern struct netmap_mem_ops netmap_mem_global_ops; /* forward */
struct netmap_mem_d nm_mem = { /* Our memory allocator. */
.pools = {
[NETMAP_IF_POOL] = {
@ -236,24 +337,21 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */
.nummax = 1000000, /* one million! */
},
},
.config = netmap_mem_global_config,
.finalize = netmap_mem_global_finalize,
.deref = netmap_mem_global_deref,
.nm_id = 1,
.nm_grp = -1,
.prev = &nm_mem,
.next = &nm_mem,
.ops = &netmap_mem_global_ops
};
struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
/* blueprint for the private memory allocators */
static int netmap_mem_private_config(struct netmap_mem_d *nmd);
static int netmap_mem_private_finalize(struct netmap_mem_d *nmd);
static void netmap_mem_private_deref(struct netmap_mem_d *nmd);
extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */
const struct netmap_mem_d nm_blueprint = {
.pools = {
[NETMAP_IF_POOL] = {
@ -278,11 +376,10 @@ const struct netmap_mem_d nm_blueprint = {
.nummax = 1000000, /* one million! */
},
},
.config = netmap_mem_private_config,
.finalize = netmap_mem_private_finalize,
.deref = netmap_mem_private_deref,
.flags = NETMAP_MEM_PRIVATE,
.ops = &netmap_mem_private_ops
};
/* memory allocator related sysctls */
@ -382,8 +479,8 @@ nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
* First, find the allocator that contains the requested offset,
* then locate the cluster through a lookup table.
*/
vm_paddr_t
netmap_mem_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
static vm_paddr_t
netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
{
int i;
vm_ooffset_t o = offset;
@ -414,13 +511,13 @@ netmap_mem_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
return 0; // XXX bad address
}
int
netmap_mem_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
static int
netmap_mem2_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
nm_memid_t *id)
{
int error = 0;
NMA_LOCK(nmd);
error = nmd->config(nmd);
error = netmap_mem_config(nmd);
if (error)
goto out;
if (size) {
@ -487,8 +584,8 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)))
ssize_t
netmap_mem_if_offset(struct netmap_mem_d *nmd, const void *addr)
static ssize_t
netmap_mem2_if_offset(struct netmap_mem_d *nmd, const void *addr)
{
ssize_t v;
NMA_LOCK(nmd);
@ -648,7 +745,7 @@ netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n)
static void
netmap_extra_free(struct netmap_adapter *na, uint32_t head)
{
struct lut_entry *lut = na->na_lut;
struct lut_entry *lut = na->na_lut.lut;
struct netmap_mem_d *nmd = na->nm_mem;
struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
uint32_t i, cur, *buf;
@ -1081,15 +1178,15 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd)
void
static void
netmap_mem_private_delete(struct netmap_mem_d *nmd)
{
if (nmd == NULL)
return;
if (netmap_verbose)
D("deleting %p", nmd);
if (nmd->refcount > 0)
D("bug: deleting mem allocator with refcount=%d!", nmd->refcount);
if (nmd->active > 0)
D("bug: deleting mem allocator with active=%d!", nmd->active);
nm_mem_release_id(nmd);
if (netmap_verbose)
D("done deleting %p", nmd);
@ -1110,8 +1207,10 @@ static int
netmap_mem_private_finalize(struct netmap_mem_d *nmd)
{
int err;
nmd->refcount++;
NMA_LOCK(nmd);
nmd->active++;
err = netmap_mem_finalize_all(nmd);
NMA_UNLOCK(nmd);
return err;
}
@ -1119,8 +1218,10 @@ netmap_mem_private_finalize(struct netmap_mem_d *nmd)
static void
netmap_mem_private_deref(struct netmap_mem_d *nmd)
{
if (--nmd->refcount <= 0)
NMA_LOCK(nmd);
if (--nmd->active <= 0)
netmap_mem_reset_all(nmd);
NMA_UNLOCK(nmd);
}
@ -1223,14 +1324,14 @@ netmap_mem_global_config(struct netmap_mem_d *nmd)
{
int i;
if (nmd->refcount)
if (nmd->active)
/* already in use, we cannot change the configuration */
goto out;
if (!netmap_memory_config_changed(nmd))
goto out;
D("reconfiguring");
ND("reconfiguring");
if (nmd->flags & NETMAP_MEM_FINALIZED) {
/* reset previous allocation */
@ -1261,7 +1362,7 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd)
if (netmap_mem_global_config(nmd))
goto out;
nmd->refcount++;
nmd->active++;
if (nmd->flags & NETMAP_MEM_FINALIZED) {
/* may happen if config is not changed */
@ -1276,53 +1377,56 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd)
out:
if (nmd->lasterr)
nmd->refcount--;
nmd->active--;
err = nmd->lasterr;
return err;
}
int
netmap_mem_init(void)
{
NMA_LOCK_INIT(&nm_mem);
return (0);
}
void
netmap_mem_fini(void)
static void
netmap_mem_global_delete(struct netmap_mem_d *nmd)
{
int i;
for (i = 0; i < NETMAP_POOLS_NR; i++) {
netmap_destroy_obj_allocator(&nm_mem.pools[i]);
}
NMA_LOCK_DESTROY(&nm_mem);
}
int
netmap_mem_init(void)
{
NMA_LOCK_INIT(&nm_mem);
netmap_mem_get(&nm_mem);
return (0);
}
void
netmap_mem_fini(void)
{
netmap_mem_put(&nm_mem);
}
static void
netmap_free_rings(struct netmap_adapter *na)
{
struct netmap_kring *kring;
struct netmap_ring *ring;
if (!na->tx_rings)
return;
for (kring = na->tx_rings; kring != na->rx_rings; kring++) {
ring = kring->ring;
if (ring == NULL)
continue;
netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
netmap_ring_free(na->nm_mem, ring);
kring->ring = NULL;
}
for (/* cont'd from above */; kring != na->tailroom; kring++) {
ring = kring->ring;
if (ring == NULL)
continue;
netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
netmap_ring_free(na->nm_mem, ring);
kring->ring = NULL;
enum txrx t;
for_rx_tx(t) {
u_int i;
for (i = 0; i < netmap_real_rings(na, t); i++) {
struct netmap_kring *kring = &NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
if (ring == NULL)
continue;
netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
netmap_ring_free(na->nm_mem, ring);
kring->ring = NULL;
}
}
}
@ -1333,99 +1437,63 @@ netmap_free_rings(struct netmap_adapter *na)
* The kring array must follow the layout described
* in netmap_krings_create().
*/
int
netmap_mem_rings_create(struct netmap_adapter *na)
static int
netmap_mem2_rings_create(struct netmap_adapter *na)
{
struct netmap_ring *ring;
u_int len, ndesc;
struct netmap_kring *kring;
u_int i;
enum txrx t;
NMA_LOCK(na->nm_mem);
/* transmit rings */
for (i =0, kring = na->tx_rings; kring != na->rx_rings; kring++, i++) {
if (kring->ring) {
ND("%s %ld already created", kring->name, kring - na->tx_rings);
continue; /* already created by somebody else */
}
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
D("Cannot allocate tx_ring");
goto cleanup;
}
ND("txring at %p", ring);
kring->ring = ring;
*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
*(int64_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
for_rx_tx(t) {
u_int i;
/* copy values from kring */
ring->head = kring->rhead;
ring->cur = kring->rcur;
ring->tail = kring->rtail;
*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
netmap_mem_bufsize(na->nm_mem);
ND("%s h %d c %d t %d", kring->name,
ring->head, ring->cur, ring->tail);
ND("initializing slots for txring");
if (i != na->num_tx_rings || (na->na_flags & NAF_HOST_RINGS)) {
/* this is a real ring */
if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
D("Cannot allocate buffers for tx_ring");
for (i = 0; i <= nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = &NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
u_int len, ndesc;
if (ring) {
ND("%s already created", kring->name);
continue; /* already created by somebody else */
}
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
D("Cannot allocate %s_ring", nm_txrx2str(t));
goto cleanup;
}
} else {
/* this is a fake tx ring, set all indices to 0 */
netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
}
}
ND("txring at %p", ring);
kring->ring = ring;
*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
*(int64_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
/* receive rings */
for ( i = 0 /* kring cont'd from above */ ; kring != na->tailroom; kring++, i++) {
if (kring->ring) {
ND("%s %ld already created", kring->name, kring - na->rx_rings);
continue; /* already created by somebody else */
}
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
D("Cannot allocate rx_ring");
goto cleanup;
}
ND("rxring at %p", ring);
kring->ring = ring;
*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
*(int64_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
/* copy values from kring */
ring->head = kring->rhead;
ring->cur = kring->rcur;
ring->tail = kring->rtail;
*(int *)(uintptr_t)&ring->nr_buf_size =
netmap_mem_bufsize(na->nm_mem);
ND("%s h %d c %d t %d", kring->name,
ring->head, ring->cur, ring->tail);
ND("initializing slots for rxring %p", ring);
if (i != na->num_rx_rings || (na->na_flags & NAF_HOST_RINGS)) {
/* this is a real ring */
if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
D("Cannot allocate buffers for rx_ring");
goto cleanup;
/* copy values from kring */
ring->head = kring->rhead;
ring->cur = kring->rcur;
ring->tail = kring->rtail;
*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
netmap_mem_bufsize(na->nm_mem);
ND("%s h %d c %d t %d", kring->name,
ring->head, ring->cur, ring->tail);
ND("initializing slots for %s_ring", nm_txrx2str(txrx));
if (i != nma_get_nrings(na, t) || (na->na_flags & NAF_HOST_RINGS)) {
/* this is a real ring */
if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
D("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
goto cleanup;
}
} else {
/* this is a fake ring, set all indices to 0 */
netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
}
} else {
/* this is a fake rx ring, set all indices to 1 */
netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 1);
/* ring info */
*(uint16_t *)(uintptr_t)&ring->ringid = kring->ring_id;
*(uint16_t *)(uintptr_t)&ring->dir = kring->tx;
}
}
@ -1441,8 +1509,8 @@ netmap_mem_rings_create(struct netmap_adapter *na)
return ENOMEM;
}
void
netmap_mem_rings_delete(struct netmap_adapter *na)
static void
netmap_mem2_rings_delete(struct netmap_adapter *na)
{
/* last instance, release bufs and rings */
NMA_LOCK(na->nm_mem);
@ -1461,16 +1529,20 @@ netmap_mem_rings_delete(struct netmap_adapter *na)
* (number of tx/rx rings and descs) does not change while
* the interface is in netmap mode.
*/
struct netmap_if *
netmap_mem_if_new(struct netmap_adapter *na)
static struct netmap_if *
netmap_mem2_if_new(struct netmap_adapter *na)
{
struct netmap_if *nifp;
ssize_t base; /* handy for relative offsets between rings and nifp */
u_int i, len, ntx, nrx;
u_int i, len, n[NR_TXRX], ntot;
enum txrx t;
/* account for the (eventually fake) host rings */
ntx = na->num_tx_rings + 1;
nrx = na->num_rx_rings + 1;
ntot = 0;
for_rx_tx(t) {
/* account for the (eventually fake) host rings */
n[t] = nma_get_nrings(na, t) + 1;
ntot += n[t];
}
/*
* the descriptor is followed inline by an array of offsets
* to the tx and rx rings in the shared memory region.
@ -1478,7 +1550,7 @@ netmap_mem_if_new(struct netmap_adapter *na)
NMA_LOCK(na->nm_mem);
len = sizeof(struct netmap_if) + (nrx + ntx) * sizeof(ssize_t);
len = sizeof(struct netmap_if) + (ntot * sizeof(ssize_t));
nifp = netmap_if_malloc(na->nm_mem, len);
if (nifp == NULL) {
NMA_UNLOCK(na->nm_mem);
@ -1496,12 +1568,12 @@ netmap_mem_if_new(struct netmap_adapter *na)
* userspace to reach the ring from the nifp.
*/
base = netmap_if_offset(na->nm_mem, nifp);
for (i = 0; i < ntx; i++) {
for (i = 0; i < n[NR_TX]; i++) {
*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] =
netmap_ring_offset(na->nm_mem, na->tx_rings[i].ring) - base;
}
for (i = 0; i < nrx; i++) {
*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+ntx] =
for (i = 0; i < n[NR_RX]; i++) {
*(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] =
netmap_ring_offset(na->nm_mem, na->rx_rings[i].ring) - base;
}
@ -1510,8 +1582,8 @@ netmap_mem_if_new(struct netmap_adapter *na)
return (nifp);
}
void
netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
static void
netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
{
if (nifp == NULL)
/* nothing to do */
@ -1528,78 +1600,39 @@ static void
netmap_mem_global_deref(struct netmap_mem_d *nmd)
{
nmd->refcount--;
if (!nmd->refcount)
nmd->active--;
if (!nmd->active)
nmd->nm_grp = -1;
if (netmap_verbose)
D("refcount = %d", nmd->refcount);
D("active = %d", nmd->active);
}
int
netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
if (nm_mem_assign_group(nmd, na->pdev) < 0) {
return ENOMEM;
} else {
NMA_LOCK(nmd);
nmd->finalize(nmd);
NMA_UNLOCK(nmd);
}
if (!nmd->lasterr && na->pdev)
netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
return nmd->lasterr;
}
void
netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
NMA_LOCK(nmd);
netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
if (nmd->refcount == 1) {
u_int i;
/*
* Reset the allocator when it falls out of use so that any
* pool resources leaked by unclean application exits are
* reclaimed.
*/
for (i = 0; i < NETMAP_POOLS_NR; i++) {
struct netmap_obj_pool *p;
u_int j;
p = &nmd->pools[i];
p->objfree = p->objtotal;
/*
* Reproduce the net effect of the M_ZERO malloc()
* and marking of free entries in the bitmap that
* occur in finalize_obj_allocator()
*/
memset(p->bitmap,
'\0',
sizeof(uint32_t) * ((p->objtotal + 31) / 32));
/*
* Set all the bits in the bitmap that have
* corresponding buffers to 1 to indicate they are
* free.
*/
for (j = 0; j < p->objtotal; j++) {
if (p->lut[j].vaddr != NULL) {
p->bitmap[ (j>>5) ] |= ( 1 << (j & 31) );
}
}
}
/*
* Per netmap_mem_finalize_all(),
* buffers 0 and 1 are reserved
*/
nmd->pools[NETMAP_BUF_POOL].objfree -= 2;
nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3;
}
nmd->deref(nmd);
NMA_UNLOCK(nmd);
}
struct netmap_mem_ops netmap_mem_global_ops = {
.nmd_get_lut = netmap_mem2_get_lut,
.nmd_get_info = netmap_mem2_get_info,
.nmd_ofstophys = netmap_mem2_ofstophys,
.nmd_config = netmap_mem_global_config,
.nmd_finalize = netmap_mem_global_finalize,
.nmd_deref = netmap_mem_global_deref,
.nmd_delete = netmap_mem_global_delete,
.nmd_if_offset = netmap_mem2_if_offset,
.nmd_if_new = netmap_mem2_if_new,
.nmd_if_delete = netmap_mem2_if_delete,
.nmd_rings_create = netmap_mem2_rings_create,
.nmd_rings_delete = netmap_mem2_rings_delete
};
struct netmap_mem_ops netmap_mem_private_ops = {
.nmd_get_lut = netmap_mem2_get_lut,
.nmd_get_info = netmap_mem2_get_info,
.nmd_ofstophys = netmap_mem2_ofstophys,
.nmd_config = netmap_mem_private_config,
.nmd_finalize = netmap_mem_private_finalize,
.nmd_deref = netmap_mem_private_deref,
.nmd_if_offset = netmap_mem2_if_offset,
.nmd_delete = netmap_mem_private_delete,
.nmd_if_new = netmap_mem2_if_new,
.nmd_if_delete = netmap_mem2_if_delete,
.nmd_rings_create = netmap_mem2_rings_create,
.nmd_rings_delete = netmap_mem2_rings_delete
};

View File

@ -117,9 +117,7 @@
extern struct netmap_mem_d nm_mem;
struct lut_entry* netmap_mem_get_lut(struct netmap_mem_d *);
u_int netmap_mem_get_buftotal(struct netmap_mem_d *);
size_t netmap_mem_get_bufsize(struct netmap_mem_d *);
void netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *);
vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
int netmap_mem_finalize(struct netmap_mem_d *, struct netmap_adapter *);
int netmap_mem_init(void);
@ -134,12 +132,34 @@ ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
struct netmap_mem_d* netmap_mem_private_new(const char *name,
u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes,
int* error);
void netmap_mem_private_delete(struct netmap_mem_d *);
void netmap_mem_delete(struct netmap_mem_d *);
//#define NM_DEBUG_MEM_PUTGET 1
#ifdef NM_DEBUG_MEM_PUTGET
#define netmap_mem_get(nmd) \
do { \
__netmap_mem_get(nmd, __FUNCTION__, __LINE__); \
} while (0)
#define netmap_mem_put(nmd) \
do { \
__netmap_mem_put(nmd, __FUNCTION__, __LINE__); \
} while (0)
void __netmap_mem_get(struct netmap_mem_d *, const char *, int);
void __netmap_mem_put(struct netmap_mem_d *, const char *, int);
#else /* !NM_DEBUG_MEM_PUTGET */
void netmap_mem_get(struct netmap_mem_d *);
void netmap_mem_put(struct netmap_mem_d *);
#endif /* !NM_DEBUG_PUTGET */
#define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */
#define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */
uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);
#endif

View File

@ -28,7 +28,7 @@
*
* Monitors
*
* netmap monitors can be used to do zero-copy monitoring of network traffic
* netmap monitors can be used to do monitoring of network traffic
* on another adapter, when the latter adapter is working in netmap mode.
*
* Monitors offer to userspace the same interface as any other netmap port,
@ -38,8 +38,24 @@
* monitored adapter. During registration, the user can choose if she wants
* to intercept tx only, rx only, or both tx and rx traffic.
*
* The monitor only sees the frames after they have been consumed in the
* monitored adapter:
* If the monitor is not able to cope with the stream of frames, excess traffic
* will be dropped.
*
* If the monitored adapter leaves netmap mode, the monitor has to be restarted.
*
* Monitors can be either zero-copy or copy-based.
*
* Copy monitors see the frames before they are consumed:
*
* - For tx traffic, this is when the application sends them, before they are
* passed down to the adapter.
*
* - For rx traffic, this is when they are received by the adapter, before
* they are sent up to the application, if any (note that, if no
* application is reading from a monitored ring, the ring will eventually
* fill up and traffic will stop).
*
* Zero-copy monitors only see the frames after they have been consumed:
*
* - For tx traffic, this is after the slots containing the frames have been
* marked as free. Note that this may happen at a considerably delay after
@ -49,11 +65,9 @@
* has released them. In most cases, the consumer is a userspace
* application which may have modified the frame contents.
*
* If the monitor is not able to cope with the stream of frames, excess traffic
* will be dropped.
*
* Each ring can be monitored by at most one monitor. This may change in the
* future, if we implement monitor chaining.
* Several copy monitors may be active on any ring. Zero-copy monitors,
* instead, need exclusive access to each of the monitored rings. This may
* change in the future, if we implement zero-copy monitor chaining.
*
*/
@ -105,34 +119,319 @@
#define NM_MONITOR_MAXSLOTS 4096
/* monitor works by replacing the nm_sync callbacks in the monitored rings.
* The actions to be performed are the same on both tx and rx rings, so we
* have collected them here
/*
********************************************************************
* functions common to both kind of monitors
********************************************************************
*/
/* nm_sync callback for the monitor's own tx rings.
* This makes no sense and always returns error
*/
static int
netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr)
netmap_monitor_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_monitor_adapter *mna = kring->monitor;
struct netmap_kring *mkring = &mna->up.rx_rings[kring->ring_id];
struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
int error;
int rel_slots, free_slots, busy;
RD(1, "%s %x", kring->name, flags);
return EIO;
}
/* nm_sync callback for the monitor's own rx rings.
* Note that the lock in netmap_zmon_parent_sync only protects
* writers among themselves. Synchronization between writers
* (i.e., netmap_zmon_parent_txsync and netmap_zmon_parent_rxsync)
* and readers (i.e., netmap_zmon_rxsync) relies on memory barriers.
*/
static int
netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
kring->nr_hwcur = kring->rcur;
mb();
return 0;
}
/* nm_krings_create callbacks for monitors.
* We could use the default netmap_hw_krings_zmon, but
* we don't need the mbq.
*/
static int
netmap_monitor_krings_create(struct netmap_adapter *na)
{
return netmap_krings_create(na, 0);
}
/* nm_krings_delete callback for monitors */
static void
netmap_monitor_krings_delete(struct netmap_adapter *na)
{
netmap_krings_delete(na);
}
static u_int
nm_txrx2flag(enum txrx t)
{
return (t == NR_RX ? NR_MONITOR_RX : NR_MONITOR_TX);
}
/* allocate the monitors array in the monitored kring */
static int
nm_monitor_alloc(struct netmap_kring *kring, u_int n)
{
size_t len;
struct netmap_kring **nm;
if (n <= kring->max_monitors)
/* we already have more entries that requested */
return 0;
len = sizeof(struct netmap_kring *) * n;
nm = realloc(kring->monitors, len, M_DEVBUF, M_NOWAIT | M_ZERO);
if (nm == NULL)
return ENOMEM;
kring->monitors = nm;
kring->max_monitors = n;
return 0;
}
/* deallocate the parent array in the parent adapter */
static void
nm_monitor_dealloc(struct netmap_kring *kring)
{
if (kring->monitors) {
if (kring->n_monitors > 0) {
D("freeing not empty monitor array for %s (%d dangling monitors)!", kring->name,
kring->n_monitors);
}
free(kring->monitors, M_DEVBUF);
kring->monitors = NULL;
kring->max_monitors = 0;
kring->n_monitors = 0;
}
}
/*
* monitors work by replacing the nm_sync() and possibly the
* nm_notify() callbacks in the monitored rings.
*/
static int netmap_zmon_parent_txsync(struct netmap_kring *, int);
static int netmap_zmon_parent_rxsync(struct netmap_kring *, int);
static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
static int netmap_monitor_parent_notify(struct netmap_kring *, int);
/* add the monitor mkring to the list of monitors of kring.
* If this is the first monitor, intercept the callbacks
*/
static int
netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zcopy)
{
int error = 0;
/* sinchronize with concurrently running nm_sync()s */
nm_kr_get(kring);
/* make sure the monitor array exists and is big enough */
error = nm_monitor_alloc(kring, kring->n_monitors + 1);
if (error)
goto out;
kring->monitors[kring->n_monitors] = mkring;
mkring->mon_pos = kring->n_monitors;
kring->n_monitors++;
if (kring->n_monitors == 1) {
/* this is the first monitor, intercept callbacks */
D("%s: intercept callbacks on %s", mkring->name, kring->name);
kring->mon_sync = kring->nm_sync;
/* zcopy monitors do not override nm_notify(), but
* we save the original one regardless, so that
* netmap_monitor_del() does not need to know the
* monitor type
*/
kring->mon_notify = kring->nm_notify;
if (kring->tx == NR_TX) {
kring->nm_sync = (zcopy ? netmap_zmon_parent_txsync :
netmap_monitor_parent_txsync);
} else {
kring->nm_sync = (zcopy ? netmap_zmon_parent_rxsync :
netmap_monitor_parent_rxsync);
if (!zcopy) {
/* also intercept notify */
kring->nm_notify = netmap_monitor_parent_notify;
kring->mon_tail = kring->nr_hwtail;
}
}
}
out:
nm_kr_put(kring);
return error;
}
/* remove the monitor mkring from the list of monitors of kring.
* If this is the last monitor, restore the original callbacks
*/
static void
netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
{
/* sinchronize with concurrently running nm_sync()s */
nm_kr_get(kring);
kring->n_monitors--;
if (mkring->mon_pos != kring->n_monitors) {
kring->monitors[mkring->mon_pos] = kring->monitors[kring->n_monitors];
kring->monitors[mkring->mon_pos]->mon_pos = mkring->mon_pos;
}
kring->monitors[kring->n_monitors] = NULL;
if (kring->n_monitors == 0) {
/* this was the last monitor, restore callbacks and delete monitor array */
D("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync);
kring->nm_sync = kring->mon_sync;
kring->mon_sync = NULL;
if (kring->tx == NR_RX) {
D("%s: restoring notify on %s: %p",
mkring->name, kring->name, kring->mon_notify);
kring->nm_notify = kring->mon_notify;
kring->mon_notify = NULL;
}
nm_monitor_dealloc(kring);
}
nm_kr_put(kring);
}
/* This is called when the monitored adapter leaves netmap mode
* (see netmap_do_unregif).
* We need to notify the monitors that the monitored rings are gone.
* We do this by setting their mna->priv.np_na to NULL.
* Note that the rings are already stopped when this happens, so
* no monitor ring callback can be active.
*/
void
netmap_monitor_stop(struct netmap_adapter *na)
{
enum txrx t;
for_rx_tx(t) {
u_int i;
for (i = 0; i < nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = &NMR(na, t)[i];
u_int j;
for (j = 0; j < kring->n_monitors; j++) {
struct netmap_kring *mkring =
kring->monitors[j];
struct netmap_monitor_adapter *mna =
(struct netmap_monitor_adapter *)mkring->na;
/* forget about this adapter */
mna->priv.np_na = NULL;
}
}
}
}
/* common functions for the nm_register() callbacks of both kind of
* monitors.
*/
static int
netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
{
struct netmap_monitor_adapter *mna =
(struct netmap_monitor_adapter *)na;
struct netmap_priv_d *priv = &mna->priv;
struct netmap_adapter *pna = priv->np_na;
struct netmap_kring *kring, *mkring;
int i;
enum txrx t;
ND("%p: onoff %d", na, onoff);
if (onoff) {
if (pna == NULL) {
/* parent left netmap mode, fatal */
D("%s: internal error", na->name);
return ENXIO;
}
for_rx_tx(t) {
if (mna->flags & nm_txrx2flag(t)) {
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
kring = &NMR(pna, t)[i];
mkring = &na->rx_rings[i];
netmap_monitor_add(mkring, kring, zmon);
}
}
}
na->na_flags |= NAF_NETMAP_ON;
} else {
if (pna == NULL) {
D("%s: parent left netmap mode, nothing to restore", na->name);
return 0;
}
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
if (mna->flags & nm_txrx2flag(t)) {
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
kring = &NMR(pna, t)[i];
mkring = &na->rx_rings[i];
netmap_monitor_del(mkring, kring);
}
}
}
}
return 0;
}
/*
****************************************************************
* functions specific for zero-copy monitors
****************************************************************
*/
/*
* Common function for both zero-copy tx and rx nm_sync()
* callbacks
*/
static int
netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
{
struct netmap_kring *mkring = kring->monitors[0];
struct netmap_ring *ring = kring->ring, *mring;
int error = 0;
int rel_slots, free_slots, busy, sent = 0;
u_int beg, end, i;
u_int lim = kring->nkr_num_slots - 1,
mlim = mkring->nkr_num_slots - 1;
mlim; // = mkring->nkr_num_slots - 1;
if (mkring == NULL) {
RD(5, "NULL monitor on %s", kring->name);
return 0;
}
mring = mkring->ring;
mlim = mkring->nkr_num_slots - 1;
/* get the relased slots (rel_slots) */
beg = *ringptr;
error = kring->save_sync(kring, flags);
if (error)
return error;
end = *ringptr;
if (tx == NR_TX) {
beg = kring->nr_hwtail;
error = kring->mon_sync(kring, flags);
if (error)
return error;
end = kring->nr_hwtail;
} else { /* NR_RX */
beg = kring->nr_hwcur;
end = kring->rhead;
}
rel_slots = end - beg;
if (rel_slots < 0)
rel_slots += kring->nkr_num_slots;
if (!rel_slots) {
return 0;
/* no released slots, but we still need
* to call rxsync if this is a rx ring
*/
goto out_rxsync;
}
/* we need to lock the monitor receive ring, since it
@ -147,19 +446,18 @@ netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr
busy += mkring->nkr_num_slots;
free_slots = mlim - busy;
if (!free_slots) {
mtx_unlock(&mkring->q_lock);
return 0;
}
if (!free_slots)
goto out;
/* swap min(free_slots, rel_slots) slots */
if (free_slots < rel_slots) {
beg += (rel_slots - free_slots);
if (beg > lim)
beg = 0;
if (beg >= kring->nkr_num_slots)
beg -= kring->nkr_num_slots;
rel_slots = free_slots;
}
sent = rel_slots;
for ( ; rel_slots; rel_slots--) {
struct netmap_slot *s = &ring->slot[beg];
struct netmap_slot *ms = &mring->slot[i];
@ -168,6 +466,7 @@ netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr
tmp = ms->buf_idx;
ms->buf_idx = s->buf_idx;
s->buf_idx = tmp;
ND(5, "beg %d buf_idx %d", beg, tmp);
tmp = ms->len;
ms->len = s->len;
@ -182,143 +481,196 @@ netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr
mb();
mkring->nr_hwtail = i;
out:
mtx_unlock(&mkring->q_lock);
/* notify the new frames to the monitor */
mna->up.nm_notify(&mna->up, mkring->ring_id, NR_RX, 0);
return 0;
if (sent) {
/* notify the new frames to the monitor */
mkring->nm_notify(mkring, 0);
}
out_rxsync:
if (tx == NR_RX)
error = kring->mon_sync(kring, flags);
return error;
}
/* callback used to replace the nm_sync callback in the monitored tx rings */
static int
netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
return netmap_zmon_parent_sync(kring, flags, NR_TX);
}
/* callback used to replace the nm_sync callback in the monitored rx rings */
static int
netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
return netmap_zmon_parent_sync(kring, flags, NR_RX);
}
static int
netmap_zmon_reg(struct netmap_adapter *na, int onoff)
{
return netmap_monitor_reg_common(na, onoff, 1 /* zcopy */);
}
/* nm_dtor callback for monitors */
static void
netmap_zmon_dtor(struct netmap_adapter *na)
{
struct netmap_monitor_adapter *mna =
(struct netmap_monitor_adapter *)na;
struct netmap_priv_d *priv = &mna->priv;
struct netmap_adapter *pna = priv->np_na;
netmap_adapter_put(pna);
}
/*
****************************************************************
* functions specific for copy monitors
****************************************************************
*/
static void
netmap_monitor_parent_sync(struct netmap_kring *kring, u_int first_new, int new_slots)
{
u_int j;
for (j = 0; j < kring->n_monitors; j++) {
struct netmap_kring *mkring = kring->monitors[j];
u_int i, mlim, beg;
int free_slots, busy, sent = 0, m;
u_int lim = kring->nkr_num_slots - 1;
struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
u_int max_len = NETMAP_BUF_SIZE(mkring->na);
mlim = mkring->nkr_num_slots - 1;
/* we need to lock the monitor receive ring, since it
* is the target of bot tx and rx traffic from the monitored
* adapter
*/
mtx_lock(&mkring->q_lock);
/* get the free slots available on the monitor ring */
i = mkring->nr_hwtail;
busy = i - mkring->nr_hwcur;
if (busy < 0)
busy += mkring->nkr_num_slots;
free_slots = mlim - busy;
if (!free_slots)
goto out;
/* copy min(free_slots, new_slots) slots */
m = new_slots;
beg = first_new;
if (free_slots < m) {
beg += (m - free_slots);
if (beg >= kring->nkr_num_slots)
beg -= kring->nkr_num_slots;
m = free_slots;
}
for ( ; m; m--) {
struct netmap_slot *s = &ring->slot[beg];
struct netmap_slot *ms = &mring->slot[i];
u_int copy_len = s->len;
char *src = NMB(kring->na, s),
*dst = NMB(mkring->na, ms);
if (unlikely(copy_len > max_len)) {
RD(5, "%s->%s: truncating %d to %d", kring->name,
mkring->name, copy_len, max_len);
copy_len = max_len;
}
memcpy(dst, src, copy_len);
ms->len = copy_len;
sent++;
beg = nm_next(beg, lim);
i = nm_next(i, mlim);
}
mb();
mkring->nr_hwtail = i;
out:
mtx_unlock(&mkring->q_lock);
if (sent) {
/* notify the new frames to the monitor */
mkring->nm_notify(mkring, 0);
}
}
}
/* callback used to replace the nm_sync callback in the monitored tx rings */
static int
netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
return netmap_monitor_parent_sync(kring, flags, &kring->nr_hwtail);
u_int first_new;
int new_slots;
/* get the new slots */
first_new = kring->nr_hwcur;
new_slots = kring->rhead - first_new;
if (new_slots < 0)
new_slots += kring->nkr_num_slots;
if (new_slots)
netmap_monitor_parent_sync(kring, first_new, new_slots);
return kring->mon_sync(kring, flags);
}
/* callback used to replace the nm_sync callback in the monitored rx rings */
static int
netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
return netmap_monitor_parent_sync(kring, flags, &kring->rcur);
u_int first_new;
int new_slots, error;
/* get the new slots */
error = kring->mon_sync(kring, flags);
if (error)
return error;
first_new = kring->mon_tail;
new_slots = kring->nr_hwtail - first_new;
if (new_slots < 0)
new_slots += kring->nkr_num_slots;
if (new_slots)
netmap_monitor_parent_sync(kring, first_new, new_slots);
kring->mon_tail = kring->nr_hwtail;
return 0;
}
/* nm_sync callback for the monitor's own tx rings.
* This makes no sense and always returns error
*/
/* callback used to replace the nm_notify() callback in the monitored rx rings */
static int
netmap_monitor_txsync(struct netmap_kring *kring, int flags)
netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
{
D("%s %x", kring->name, flags);
return EIO;
}
/* nm_sync callback for the monitor's own rx rings.
* Note that the lock in netmap_monitor_parent_sync only protects
* writers among themselves. Synchronization between writers
* (i.e., netmap_monitor_parent_txsync and netmap_monitor_parent_rxsync)
* and readers (i.e., netmap_monitor_rxsync) relies on memory barriers.
*/
static int
netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
kring->nr_hwcur = kring->rcur;
mb();
nm_rxsync_finalize(kring);
return 0;
}
/* nm_krings_create callbacks for monitors.
* We could use the default netmap_hw_krings_monitor, but
* we don't need the mbq.
*/
static int
netmap_monitor_krings_create(struct netmap_adapter *na)
{
return netmap_krings_create(na, 0);
ND(5, "%s %x", kring->name, flags);
/* ?xsync callbacks have tryget called by their callers
* (NIOCREGIF and poll()), but here we have to call it
* by ourself
*/
if (nm_kr_tryget(kring))
goto out;
netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
nm_kr_put(kring);
out:
return kring->mon_notify(kring, flags);
}
/* nm_register callback for monitors.
*
* On registration, replace the nm_sync callbacks in the monitored
* rings with our own, saving the previous ones in the monitored
* rings themselves, where they are used by netmap_monitor_parent_sync.
*
* On de-registration, restore the original callbacks. We need to
* stop traffic while we are doing this, since the monitored adapter may
* have already started executing a netmap_monitor_parent_sync
* and may not like the kring->save_sync pointer to become NULL.
*/
static int
netmap_monitor_reg(struct netmap_adapter *na, int onoff)
{
struct netmap_monitor_adapter *mna =
(struct netmap_monitor_adapter *)na;
struct netmap_priv_d *priv = &mna->priv;
struct netmap_adapter *pna = priv->np_na;
struct netmap_kring *kring;
int i;
ND("%p: onoff %d", na, onoff);
if (onoff) {
if (!nm_netmap_on(pna)) {
/* parent left netmap mode, fatal */
return ENXIO;
}
if (mna->flags & NR_MONITOR_TX) {
for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
kring = &pna->tx_rings[i];
kring->save_sync = kring->nm_sync;
kring->nm_sync = netmap_monitor_parent_txsync;
}
}
if (mna->flags & NR_MONITOR_RX) {
for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
kring = &pna->rx_rings[i];
kring->save_sync = kring->nm_sync;
kring->nm_sync = netmap_monitor_parent_rxsync;
}
}
na->na_flags |= NAF_NETMAP_ON;
} else {
if (!nm_netmap_on(pna)) {
/* parent left netmap mode, nothing to restore */
return 0;
}
na->na_flags &= ~NAF_NETMAP_ON;
if (mna->flags & NR_MONITOR_TX) {
for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
netmap_set_txring(pna, i, 1 /* stopped */);
kring = &pna->tx_rings[i];
kring->nm_sync = kring->save_sync;
kring->save_sync = NULL;
netmap_set_txring(pna, i, 0 /* enabled */);
}
}
if (mna->flags & NR_MONITOR_RX) {
for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
netmap_set_rxring(pna, i, 1 /* stopped */);
kring = &pna->rx_rings[i];
kring->nm_sync = kring->save_sync;
kring->save_sync = NULL;
netmap_set_rxring(pna, i, 0 /* enabled */);
}
}
}
return 0;
}
/* nm_krings_delete callback for monitors */
static void
netmap_monitor_krings_delete(struct netmap_adapter *na)
{
netmap_krings_delete(na);
return netmap_monitor_reg_common(na, onoff, 0 /* no zcopy */);
}
/* nm_dtor callback for monitors */
static void
netmap_monitor_dtor(struct netmap_adapter *na)
{
@ -326,22 +678,7 @@ netmap_monitor_dtor(struct netmap_adapter *na)
(struct netmap_monitor_adapter *)na;
struct netmap_priv_d *priv = &mna->priv;
struct netmap_adapter *pna = priv->np_na;
int i;
ND("%p", na);
if (nm_netmap_on(pna)) {
/* parent still in netmap mode, mark its krings as free */
if (mna->flags & NR_MONITOR_TX) {
for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
pna->tx_rings[i].monitor = NULL;
}
}
if (mna->flags & NR_MONITOR_RX) {
for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
pna->rx_rings[i].monitor = NULL;
}
}
}
netmap_adapter_put(pna);
}
@ -354,6 +691,9 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
struct netmap_adapter *pna; /* parent adapter */
struct netmap_monitor_adapter *mna;
int i, error;
enum txrx t;
int zcopy = (nmr->nr_flags & NR_ZCOPY_MON);
char monsuff[10] = "";
if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
ND("not a monitor");
@ -400,44 +740,65 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
D("ringid error");
goto put_out;
}
if (nmr->nr_flags & NR_MONITOR_TX) {
for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
struct netmap_kring *kring = &pna->tx_rings[i];
if (kring->monitor) {
error = EBUSY;
D("ring busy");
goto release_out;
}
kring->monitor = mna;
}
}
if (nmr->nr_flags & NR_MONITOR_RX) {
for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
struct netmap_kring *kring = &pna->rx_rings[i];
if (kring->monitor) {
error = EBUSY;
D("ring busy");
goto release_out;
}
kring->monitor = mna;
}
if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) {
snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]);
}
snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name,
monsuff,
zcopy ? "z" : "",
(nmr->nr_flags & NR_MONITOR_RX) ? "r" : "",
(nmr->nr_flags & NR_MONITOR_TX) ? "t" : "");
snprintf(mna->up.name, sizeof(mna->up.name), "mon:%s", pna->name);
if (zcopy) {
/* zero copy monitors need exclusive access to the monitored rings */
for_rx_tx(t) {
if (! (nmr->nr_flags & nm_txrx2flag(t)))
continue;
for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
struct netmap_kring *kring = &NMR(pna, t)[i];
if (kring->n_monitors > 0) {
error = EBUSY;
D("ring %s already monitored by %s", kring->name,
kring->monitors[0]->name);
goto put_out;
}
}
}
mna->up.nm_register = netmap_zmon_reg;
mna->up.nm_dtor = netmap_zmon_dtor;
/* to have zero copy, we need to use the same memory allocator
* as the monitored port
*/
mna->up.nm_mem = pna->nm_mem;
mna->up.na_lut = pna->na_lut;
} else {
/* normal monitors are incompatible with zero copy ones */
for_rx_tx(t) {
if (! (nmr->nr_flags & nm_txrx2flag(t)))
continue;
for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
struct netmap_kring *kring = &NMR(pna, t)[i];
if (kring->n_monitors > 0 &&
kring->monitors[0]->na->nm_register == netmap_zmon_reg)
{
error = EBUSY;
D("ring busy");
goto put_out;
}
}
}
mna->up.nm_rxsync = netmap_monitor_rxsync;
mna->up.nm_register = netmap_monitor_reg;
mna->up.nm_dtor = netmap_monitor_dtor;
}
/* the monitor supports the host rings iff the parent does */
mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
/* a do-nothing txsync: monitors cannot be used to inject packets */
mna->up.nm_txsync = netmap_monitor_txsync;
mna->up.nm_rxsync = netmap_monitor_rxsync;
mna->up.nm_register = netmap_monitor_reg;
mna->up.nm_dtor = netmap_monitor_dtor;
mna->up.nm_krings_create = netmap_monitor_krings_create;
mna->up.nm_krings_delete = netmap_monitor_krings_delete;
mna->up.nm_mem = pna->nm_mem;
mna->up.na_lut = pna->na_lut;
mna->up.na_lut_objtotal = pna->na_lut_objtotal;
mna->up.na_lut_objsize = pna->na_lut_objsize;
mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
* in the parent
@ -458,7 +819,7 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
error = netmap_attach_common(&mna->up);
if (error) {
D("attach_common error");
goto release_out;
goto put_out;
}
/* remember the traffic directions we have to monitor */
@ -478,16 +839,6 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
return 0;
release_out:
D("monitor error");
for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
if (pna->tx_rings[i].monitor == mna)
pna->tx_rings[i].monitor = NULL;
}
for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
if (pna->rx_rings[i].monitor == mna)
pna->rx_rings[i].monitor = NULL;
}
put_out:
netmap_adapter_put(pna);
free(mna, M_DEVBUF);

View File

@ -72,51 +72,31 @@
#define NM_PIPE_MAXSLOTS 4096
int netmap_default_pipes = 0; /* default number of pipes for each nic */
int netmap_default_pipes = 0; /* ignored, kept for compatibility */
SYSCTL_DECL(_dev_netmap);
SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
/* allocate the pipe array in the parent adapter */
int
netmap_pipe_alloc(struct netmap_adapter *na, struct nmreq *nmr)
static int
nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
{
size_t len;
int mode = nmr->nr_flags & NR_REG_MASK;
u_int npipes;
struct netmap_pipe_adapter **npa;
if (mode == NR_REG_PIPE_MASTER || mode == NR_REG_PIPE_SLAVE) {
/* this is for our parent, not for us */
if (npipes <= na->na_max_pipes)
/* we already have more entries that requested */
return 0;
}
if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES)
return EINVAL;
/* TODO: we can resize the array if the new
* request can accomodate the already existing pipes
*/
if (na->na_pipes) {
nmr->nr_arg1 = na->na_max_pipes;
return 0;
}
npipes = nmr->nr_arg1;
if (npipes == 0)
npipes = netmap_default_pipes;
nm_bound_var(&npipes, 0, 0, NM_MAXPIPES, NULL);
if (npipes == 0) {
/* really zero, nothing to alloc */
goto out;
}
len = sizeof(struct netmap_pipe_adapter *) * npipes;
na->na_pipes = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO);
if (na->na_pipes == NULL)
len = sizeof(struct netmap_pipe_adapter *) * npipes;
npa = realloc(na->na_pipes, len, M_DEVBUF, M_NOWAIT | M_ZERO);
if (npa == NULL)
return ENOMEM;
na->na_pipes = npa;
na->na_max_pipes = npipes;
na->na_next_pipe = 0;
out:
nmr->nr_arg1 = npipes;
return 0;
}
@ -126,7 +106,10 @@ void
netmap_pipe_dealloc(struct netmap_adapter *na)
{
if (na->na_pipes) {
ND("freeing pipes for %s", na->name);
if (na->na_next_pipe > 0) {
D("freeing not empty pipe array for %s (%d dangling pipes)!", na->name,
na->na_next_pipe);
}
free(na->na_pipes, M_DEVBUF);
na->na_pipes = NULL;
na->na_max_pipes = 0;
@ -155,8 +138,10 @@ static int
netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
{
if (parent->na_next_pipe >= parent->na_max_pipes) {
D("%s: no space left for pipes", parent->name);
return ENOMEM;
u_int npipes = parent->na_max_pipes ? 2*parent->na_max_pipes : 2;
int error = nm_pipe_alloc(parent, npipes);
if (error)
return error;
}
parent->na_pipes[parent->na_next_pipe] = na;
@ -172,8 +157,10 @@ netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na
u_int n;
n = --parent->na_next_pipe;
if (n != na->parent_slot) {
parent->na_pipes[na->parent_slot] =
parent->na_pipes[n];
struct netmap_pipe_adapter **p =
&parent->na_pipes[na->parent_slot];
*p = parent->na_pipes[n];
(*p)->parent_slot = na->parent_slot;
}
parent->na_pipes[n] = NULL;
}
@ -208,7 +195,6 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
if (limit == 0) {
/* either the rxring is full, or nothing to send */
nm_txsync_finalize(txkring); /* actually useless */
return 0;
}
@ -222,7 +208,9 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
*rs = *ts;
*ts = tmp;
/* no need to report the buffer change */
/* report the buffer change */
ts->flags |= NS_BUF_CHANGED;
rs->flags |= NS_BUF_CHANGED;
j = nm_next(j, lim_rx);
k = nm_next(k, lim_tx);
@ -233,12 +221,11 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
txkring->nr_hwcur = k;
txkring->nr_hwtail = nm_prev(k, lim_tx);
nm_txsync_finalize(txkring);
ND(2, "after: hwcur %d hwtail %d cur %d head %d tail %d j %d", txkring->nr_hwcur, txkring->nr_hwtail,
txkring->rcur, txkring->rhead, txkring->rtail, j);
mb(); /* make sure rxkring->nr_hwtail is updated before notifying */
rxkring->na->nm_notify(rxkring->na, rxkring->ring_id, NR_RX, 0);
rxkring->nm_notify(rxkring, 0);
return 0;
}
@ -254,12 +241,11 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
ND(5, "hwcur %d hwtail %d cur %d head %d tail %d", rxkring->nr_hwcur, rxkring->nr_hwtail,
rxkring->rcur, rxkring->rhead, rxkring->rtail);
mb(); /* paired with the first mb() in txsync */
nm_rxsync_finalize(rxkring);
if (oldhwcur != rxkring->nr_hwcur) {
/* we have released some slots, notify the other end */
mb(); /* make sure nr_hwcur is updated before notifying */
txkring->na->nm_notify(txkring->na, txkring->ring_id, NR_TX, 0);
txkring->nm_notify(txkring, 0);
}
return 0;
}
@ -318,11 +304,13 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
(struct netmap_pipe_adapter *)na;
struct netmap_adapter *ona = &pna->peer->up;
int error = 0;
enum txrx t;
if (pna->peer_ref) {
int i;
/* case 1) above */
D("%p: case 1, create everything", na);
ND("%p: case 1, create everything", na);
error = netmap_krings_create(na, 0);
if (error)
goto err;
@ -338,10 +326,10 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
goto del_krings1;
/* update our hidden ring pointers */
for (i = 0; i < na->num_tx_rings + 1; i++)
na->tx_rings[i].save_ring = na->tx_rings[i].ring;
for (i = 0; i < na->num_rx_rings + 1; i++)
na->rx_rings[i].save_ring = na->rx_rings[i].ring;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
NMR(na, t)[i].save_ring = NMR(na, t)[i].ring;
}
/* now, create krings and rings of the other end */
error = netmap_krings_create(ona, 0);
@ -352,27 +340,28 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
if (error)
goto del_krings2;
for (i = 0; i < ona->num_tx_rings + 1; i++)
ona->tx_rings[i].save_ring = ona->tx_rings[i].ring;
for (i = 0; i < ona->num_rx_rings + 1; i++)
ona->rx_rings[i].save_ring = ona->rx_rings[i].ring;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(ona, t) + 1; i++)
NMR(ona, t)[i].save_ring = NMR(ona, t)[i].ring;
}
/* cross link the krings */
for (i = 0; i < na->num_tx_rings; i++) {
na->tx_rings[i].pipe = pna->peer->up.rx_rings + i;
na->rx_rings[i].pipe = pna->peer->up.tx_rings + i;
pna->peer->up.tx_rings[i].pipe = na->rx_rings + i;
pna->peer->up.rx_rings[i].pipe = na->tx_rings + i;
for_rx_tx(t) {
enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
for (i = 0; i < nma_get_nrings(na, t); i++) {
NMR(na, t)[i].pipe = NMR(&pna->peer->up, r) + i;
NMR(&pna->peer->up, r)[i].pipe = NMR(na, t) + i;
}
}
} else {
int i;
/* case 2) above */
/* recover the hidden rings */
ND("%p: case 2, hidden rings", na);
for (i = 0; i < na->num_tx_rings + 1; i++)
na->tx_rings[i].ring = na->tx_rings[i].save_ring;
for (i = 0; i < na->num_rx_rings + 1; i++)
na->rx_rings[i].ring = na->rx_rings[i].save_ring;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
NMR(na, t)[i].ring = NMR(na, t)[i].save_ring;
}
}
return 0;
@ -423,6 +412,8 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
{
struct netmap_pipe_adapter *pna =
(struct netmap_pipe_adapter *)na;
enum txrx t;
ND("%p: onoff %d", na, onoff);
if (onoff) {
na->na_flags |= NAF_NETMAP_ON;
@ -443,11 +434,10 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
netmap_adapter_get(na);
pna->peer->peer_ref = 1;
/* hide our rings from netmap_mem_rings_delete */
for (i = 0; i < na->num_tx_rings + 1; i++) {
na->tx_rings[i].ring = NULL;
}
for (i = 0; i < na->num_rx_rings + 1; i++) {
na->rx_rings[i].ring = NULL;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
NMR(na, t)[i].ring = NULL;
}
}
}
return 0;
@ -481,6 +471,7 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
(struct netmap_pipe_adapter *)na;
struct netmap_adapter *ona; /* na of the other end */
int i;
enum txrx t;
if (!pna->peer_ref) {
ND("%p: case 2, kept alive by peer", na);
@ -496,10 +487,10 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
* cleanup-after-error path */
return;
}
for (i = 0; i < ona->num_tx_rings + 1; i++)
ona->tx_rings[i].ring = ona->tx_rings[i].save_ring;
for (i = 0; i < ona->num_rx_rings + 1; i++)
ona->rx_rings[i].ring = ona->rx_rings[i].save_ring;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(ona, t) + 1; i++)
NMR(ona, t)[i].ring = NMR(ona, t)[i].save_ring;
}
netmap_mem_rings_delete(ona);
netmap_krings_delete(ona);
}
@ -604,8 +595,6 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
mna->up.nm_krings_delete = netmap_pipe_krings_delete;
mna->up.nm_mem = pna->nm_mem;
mna->up.na_lut = pna->na_lut;
mna->up.na_lut_objtotal = pna->na_lut_objtotal;
mna->up.na_lut_objsize = pna->na_lut_objsize;
mna->up.num_tx_rings = 1;
mna->up.num_rx_rings = 1;

View File

@ -222,6 +222,10 @@ struct nm_bridge {
* the lookup function, and allocated on attach
*/
struct nm_hash_ent ht[NM_BDG_HASH];
#ifdef CONFIG_NET_NS
struct net *ns;
#endif /* CONFIG_NET_NS */
};
const char*
@ -234,12 +238,14 @@ netmap_bdg_name(struct netmap_vp_adapter *vp)
}
#ifndef CONFIG_NET_NS
/*
* XXX in principle nm_bridges could be created dynamically
* Right now we have a static array and deletions are protected
* by an exclusive lock.
*/
struct nm_bridge nm_bridges[NM_BRIDGES];
struct nm_bridge *nm_bridges;
#endif /* !CONFIG_NET_NS */
/*
@ -283,10 +289,13 @@ static struct nm_bridge *
nm_find_bridge(const char *name, int create)
{
int i, l, namelen;
struct nm_bridge *b = NULL;
struct nm_bridge *b = NULL, *bridges;
u_int num_bridges;
NMG_LOCK_ASSERT();
netmap_bns_getbridges(&bridges, &num_bridges);
namelen = strlen(NM_NAME); /* base length */
l = name ? strlen(name) : 0; /* actual length */
if (l < namelen) {
@ -304,8 +313,8 @@ nm_find_bridge(const char *name, int create)
ND("--- prefix is '%.*s' ---", namelen, name);
/* lookup the name, remember empty slot if there is one */
for (i = 0; i < NM_BRIDGES; i++) {
struct nm_bridge *x = nm_bridges + i;
for (i = 0; i < num_bridges; i++) {
struct nm_bridge *x = bridges + i;
if (x->bdg_active_ports == 0) {
if (create && b == NULL)
@ -318,7 +327,7 @@ nm_find_bridge(const char *name, int create)
break;
}
}
if (i == NM_BRIDGES && b) { /* name not found, can create entry */
if (i == num_bridges && b) { /* name not found, can create entry */
/* initialize the bridge */
strncpy(b->bdg_basename, name, namelen);
ND("create new bridge %s with ports %d", b->bdg_basename,
@ -331,6 +340,7 @@ nm_find_bridge(const char *name, int create)
b->bdg_ops.lookup = netmap_bdg_learning;
/* reset the MAC address table */
bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
NM_BNS_GET(b);
}
return b;
}
@ -373,7 +383,7 @@ nm_alloc_bdgfwd(struct netmap_adapter *na)
l += sizeof(struct nm_bdg_q) * num_dstq;
l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
nrings = netmap_real_tx_rings(na);
nrings = netmap_real_rings(na, NR_TX);
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
struct nm_bdg_fwd *ft;
@ -458,6 +468,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
if (lim == 0) {
ND("marking bridge %s as free", b->bdg_basename);
bzero(&b->bdg_ops, sizeof(b->bdg_ops));
NM_BNS_PUT(b);
}
}
@ -632,7 +643,7 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
i = b->bdg_port_index[j];
vpna = b->bdg_ports[i];
// KASSERT(na != NULL);
D("checking %s", vpna->up.name);
ND("checking %s", vpna->up.name);
if (!strcmp(vpna->up.name, nr_name)) {
netmap_adapter_get(&vpna->up);
ND("found existing if %s refs %d", nr_name)
@ -813,12 +824,15 @@ nm_bdg_ctl_detach(struct nmreq *nmr)
int
netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
{
struct nm_bridge *b;
struct nm_bridge *b, *bridges;
struct netmap_adapter *na;
struct netmap_vp_adapter *vpna;
char *name = nmr->nr_name;
int cmd = nmr->nr_cmd, namelen = strlen(name);
int error = 0, i, j;
u_int num_bridges;
netmap_bns_getbridges(&bridges, &num_bridges);
switch (cmd) {
case NETMAP_BDG_NEWIF:
@ -852,7 +866,6 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
break;
}
name = name + b->bdg_namelen + 1;
error = ENOENT;
for (j = 0; j < b->bdg_active_ports; j++) {
i = b->bdg_port_index[j];
@ -866,7 +879,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
*/
if (!strcmp(vpna->up.name, name)) {
/* bridge index */
nmr->nr_arg1 = b - nm_bridges;
nmr->nr_arg1 = b - bridges;
nmr->nr_arg2 = i; /* port index */
error = 0;
break;
@ -886,7 +899,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
NMG_LOCK();
for (error = ENOENT; i < NM_BRIDGES; i++) {
b = nm_bridges + i;
b = bridges + i;
if (j >= b->bdg_active_ports) {
j = 0; /* following bridges scan from 0 */
continue;
@ -984,7 +997,7 @@ netmap_vp_krings_create(struct netmap_adapter *na)
u_int tailroom;
int error, i;
uint32_t *leases;
u_int nrx = netmap_real_rx_rings(na);
u_int nrx = netmap_real_rings(na, NR_RX);
/*
* Leases are attached to RX rings on vale ports
@ -1066,6 +1079,9 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end)
ft[ft_i].ft_flags = slot->flags;
ND("flags is 0x%x", slot->flags);
/* we do not use the buf changed flag, but we still need to reset it */
slot->flags &= ~NS_BUF_CHANGED;
/* this slot goes into a list so initialize the link field */
ft[ft_i].ft_next = NM_FT_NULL;
buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
@ -1180,7 +1196,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
*/
u_int
netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
const struct netmap_vp_adapter *na)
struct netmap_vp_adapter *na)
{
uint8_t *buf = ft->ft_buf;
u_int buf_len = ft->ft_len;
@ -1211,11 +1227,11 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
* The hash is somewhat expensive, there might be some
* worthwhile optimizations here.
*/
if ((buf[6] & 1) == 0) { /* valid src */
if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
uint8_t *s = buf+6;
sh = nm_bridge_rthash(s); // XXX hash of source
/* update source port forwarding entry */
ht[sh].mac = smac; /* XXX expire ? */
na->last_smac = ht[sh].mac = smac; /* XXX expire ? */
ht[sh].ports = mysrc;
if (netmap_verbose)
D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
@ -1229,7 +1245,6 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
}
/* XXX otherwise return NM_BDG_UNKNOWN ? */
}
*dst_ring = 0;
return dst;
}
@ -1475,7 +1490,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
if (dst_na->retry && retry) {
/* try to get some free slot from the previous run */
dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
kring->nm_notify(kring, 0);
/* actually useful only for bwraps, since there
* the notify will trigger a txsync on the hwna. VALE ports
* have dst_na->retry == 0
@ -1616,7 +1631,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
kring->nr_hwtail = j;
still_locked = 0;
mtx_unlock(&kring->q_lock);
dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
kring->nm_notify(kring, 0);
/* this is netmap_notify for VALE ports and
* netmap_bwrap_notify for bwrap. The latter will
* trigger a txsync on the underlying hwna
@ -1649,29 +1664,28 @@ netmap_vp_txsync(struct netmap_kring *kring, int flags)
(struct netmap_vp_adapter *)kring->na;
u_int done;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = kring->rcur;
u_int const head = kring->rhead;
if (bridge_batch <= 0) { /* testing only */
done = cur; // used all
done = head; // used all
goto done;
}
if (!na->na_bdg) {
done = cur;
done = head;
goto done;
}
if (bridge_batch > NM_BDG_BATCH)
bridge_batch = NM_BDG_BATCH;
done = nm_bdg_preflush(kring, cur);
done = nm_bdg_preflush(kring, head);
done:
if (done != cur)
D("early break at %d/ %d, tail %d", done, cur, kring->nr_hwtail);
if (done != head)
D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
/*
* packets between 'done' and 'cur' are left unsent.
*/
kring->nr_hwcur = done;
kring->nr_hwtail = nm_prev(done, lim);
nm_txsync_finalize(kring);
if (netmap_verbose)
D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
return 0;
@ -1687,7 +1701,7 @@ netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
struct netmap_adapter *na = kring->na;
struct netmap_ring *ring = kring->ring;
u_int nm_i, lim = kring->nkr_num_slots - 1;
u_int head = nm_rxsync_prologue(kring);
u_int head = kring->rhead;
int n;
if (head > lim) {
@ -1717,8 +1731,6 @@ netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
kring->nr_hwcur = head;
}
/* tell userspace that there are new packets */
nm_rxsync_finalize(kring);
n = 0;
done:
return n;
@ -1804,12 +1816,13 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter
na->num_rx_desc = nmr->nr_rx_slots;
vpna->virt_hdr_len = 0;
vpna->mfs = 1514;
vpna->last_smac = ~0llu;
/*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
vpna->mfs = netmap_buf_size; */
if (netmap_verbose)
D("max frame size %u", vpna->mfs);
na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
na->na_flags |= NAF_BDG_MAYSLEEP;
na->nm_txsync = netmap_vp_txsync;
na->nm_rxsync = netmap_vp_rxsync;
na->nm_register = netmap_vp_reg;
@ -1832,7 +1845,7 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter
err:
if (na->nm_mem != NULL)
netmap_mem_private_delete(na->nm_mem);
netmap_mem_delete(na->nm_mem);
free(vpna, M_DEVBUF);
return error;
}
@ -1913,75 +1926,35 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
* The bridge wrapper then sends the packets through the bridge.
*/
static int
netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct netmap_bwrap_adapter *bna = na->na_private;
struct netmap_vp_adapter *hostna = &bna->host;
struct netmap_kring *kring, *bkring;
struct netmap_kring *bkring;
struct netmap_ring *ring;
int is_host_ring = ring_nr == na->num_rx_rings;
struct netmap_vp_adapter *vpna = &bna->up;
u_int ring_nr = kring->ring_id;
int error = 0;
if (netmap_verbose)
D("%s %s%d 0x%x", na->name,
(tx == NR_TX ? "TX" : "RX"), ring_nr, flags);
if (flags & NAF_DISABLE_NOTIFY) {
/* the enabled/disabled state of the ring has changed,
* propagate the info to the wrapper (with tx/rx swapped)
*/
if (tx == NR_TX) {
netmap_set_rxring(&vpna->up, ring_nr,
na->tx_rings[ring_nr].nkr_stopped);
} else {
netmap_set_txring(&vpna->up, ring_nr,
na->rx_rings[ring_nr].nkr_stopped);
}
return 0;
}
D("%s %s 0x%x", na->name, kring->name, flags);
if (!nm_netmap_on(na))
return 0;
/* we only care about receive interrupts */
if (tx == NR_TX)
return 0;
kring = &na->rx_rings[ring_nr];
ring = kring->ring;
bkring = &vpna->up.tx_rings[ring_nr];
ring = kring->ring; /* == kbkring->ring */
/* make sure the ring is not disabled */
if (nm_kr_tryget(kring))
return 0;
if (is_host_ring && hostna->na_bdg == NULL) {
error = bna->save_notify(na, ring_nr, tx, flags);
goto put_out;
}
/* Here we expect ring->head = ring->cur = ring->tail
* because everything has been released from the previous round.
* However the ring is shared and we might have info from
* the wrong side (the tx ring). Hence we overwrite with
* the info from the rx kring.
*/
if (netmap_verbose)
D("%s head %d cur %d tail %d (kring %d %d %d)", na->name,
ring->head, ring->cur, ring->tail,
D("%s head %d cur %d tail %d", na->name,
kring->rhead, kring->rcur, kring->rtail);
ring->head = kring->rhead;
ring->cur = kring->rcur;
ring->tail = kring->rtail;
if (is_host_ring) {
vpna = hostna;
ring_nr = 0;
}
/* simulate a user wakeup on the rx ring */
/* fetch packets that have arrived.
* XXX maybe do this in a loop ?
/* simulate a user wakeup on the rx ring
* fetch packets that have arrived.
*/
error = kring->nm_sync(kring, 0);
if (error)
@ -1992,33 +1965,18 @@ netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx,
goto put_out;
}
/* new packets are ring->cur to ring->tail, and the bkring
* had hwcur == ring->cur. So advance ring->cur to ring->tail
/* new packets are kring->rcur to kring->nr_hwtail, and the bkring
* had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
* to push all packets out.
*/
ring->head = ring->cur = ring->tail;
bkring->rhead = bkring->rcur = kring->nr_hwtail;
/* also set tail to what the bwrap expects */
bkring = &vpna->up.tx_rings[ring_nr];
ring->tail = bkring->nr_hwtail; // rtail too ?
/* pass packets to the switch */
nm_txsync_prologue(bkring); // XXX error checking ?
netmap_vp_txsync(bkring, flags);
/* mark all buffers as released on this ring */
ring->head = ring->cur = kring->nr_hwtail;
ring->tail = kring->rtail;
kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
/* another call to actually release the buffers */
if (!is_host_ring) {
error = kring->nm_sync(kring, 0);
} else {
/* mark all packets as released, as in the
* second part of netmap_rxsync_from_host()
*/
kring->nr_hwcur = kring->nr_hwtail;
nm_rxsync_finalize(kring);
}
error = kring->nm_sync(kring, 0);
put_out:
nm_kr_put(kring);
@ -2035,6 +1993,7 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
struct netmap_adapter *hwna = bna->hwna;
struct netmap_vp_adapter *hostna = &bna->host;
int error;
enum txrx t;
ND("%s %s", na->name, onoff ? "on" : "off");
@ -2047,8 +2006,6 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
* putting it in netmap mode
*/
hwna->na_lut = na->na_lut;
hwna->na_lut_objtotal = na->na_lut_objtotal;
hwna->na_lut_objsize = na->na_lut_objsize;
if (hostna->na_bdg) {
/* if the host rings have been attached to switch,
@ -2056,8 +2013,6 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
* in the hostna also
*/
hostna->up.na_lut = na->na_lut;
hostna->up.na_lut_objtotal = na->na_lut_objtotal;
hostna->up.na_lut_objsize = na->na_lut_objsize;
}
/* cross-link the netmap rings
@ -2066,13 +2021,12 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
* We need to do this now, after the initialization
* of the kring->ring pointers
*/
for (i = 0; i < na->num_rx_rings + 1; i++) {
hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
hwna->tx_rings[i].ring = na->rx_rings[i].ring;
}
for (i = 0; i < na->num_tx_rings + 1; i++) {
hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
hwna->rx_rings[i].ring = na->tx_rings[i].ring;
for_rx_tx(t) {
enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
for (i = 0; i < nma_get_nrings(na, r) + 1; i++) {
NMR(hwna, t)[i].nkr_num_slots = NMR(na, r)[i].nkr_num_slots;
NMR(hwna, t)[i].ring = NMR(na, r)[i].ring;
}
}
}
@ -2087,14 +2041,29 @@ netmap_bwrap_register(struct netmap_adapter *na, int onoff)
netmap_vp_reg(&hostna->up, onoff);
if (onoff) {
/* intercept the hwna nm_nofify callback */
bna->save_notify = hwna->nm_notify;
hwna->nm_notify = netmap_bwrap_intr_notify;
u_int i;
/* intercept the hwna nm_nofify callback on the hw rings */
for (i = 0; i < hwna->num_rx_rings; i++) {
hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
}
i = hwna->num_rx_rings; /* for safety */
/* save the host ring notify unconditionally */
hwna->rx_rings[i].save_notify = hwna->rx_rings[i].nm_notify;
if (hostna->na_bdg) {
/* also intercept the host ring notify */
hwna->rx_rings[i].nm_notify = netmap_bwrap_intr_notify;
}
} else {
hwna->nm_notify = bna->save_notify;
hwna->na_lut = NULL;
hwna->na_lut_objtotal = 0;
hwna->na_lut_objsize = 0;
u_int i;
/* reset all notify callbacks (including host ring) */
for (i = 0; i <= hwna->num_rx_rings; i++) {
hwna->rx_rings[i].nm_notify = hwna->rx_rings[i].save_notify;
hwna->rx_rings[i].save_notify = NULL;
}
hwna->na_lut.lut = NULL;
hwna->na_lut.objtotal = 0;
hwna->na_lut.objsize = 0;
}
return 0;
@ -2154,9 +2123,9 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
* The corresponding krings must point back to the
* hostna
*/
hostna->tx_rings = na->tx_rings + na->num_tx_rings;
hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
hostna->tx_rings[0].na = hostna;
hostna->rx_rings = na->rx_rings + na->num_rx_rings;
hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
hostna->rx_rings[0].na = hostna;
}
@ -2180,74 +2149,59 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na)
/* notify method for the bridge-->hwna direction */
static int
netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
netmap_bwrap_notify(struct netmap_kring *kring, int flags)
{
struct netmap_bwrap_adapter *bna =
(struct netmap_bwrap_adapter *)na;
struct netmap_adapter *na = kring->na;
struct netmap_bwrap_adapter *bna = na->na_private;
struct netmap_adapter *hwna = bna->hwna;
struct netmap_kring *kring, *hw_kring;
struct netmap_ring *ring;
u_int lim;
u_int ring_n = kring->ring_id;
u_int lim = kring->nkr_num_slots - 1;
struct netmap_kring *hw_kring;
int error = 0;
if (tx == NR_TX)
return EINVAL;
kring = &na->rx_rings[ring_n];
ND("%s: na %s hwna %s",
(kring ? kring->name : "NULL!"),
(na ? na->name : "NULL!"),
(hwna ? hwna->name : "NULL!"));
hw_kring = &hwna->tx_rings[ring_n];
ring = kring->ring;
lim = kring->nkr_num_slots - 1;
if (nm_kr_tryget(hw_kring))
return 0;
if (!nm_netmap_on(hwna))
return 0;
mtx_lock(&kring->q_lock);
/* first step: simulate a user wakeup on the rx ring */
netmap_vp_rxsync_locked(kring, flags);
netmap_vp_rxsync(kring, flags);
ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
na->name, ring_n,
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
ring->head, ring->cur, ring->tail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
/* second step: the simulated user consumes all new packets */
ring->head = ring->cur = ring->tail;
/* third step: the new packets are sent on the tx ring
/* second step: the new packets are sent on the tx ring
* (which is actually the same ring)
*/
/* set tail to what the hw expects */
ring->tail = hw_kring->rtail;
nm_txsync_prologue(&hwna->tx_rings[ring_n]); // XXX error checking ?
hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
error = hw_kring->nm_sync(hw_kring, flags);
if (error)
goto out;
/* fourth step: now we are back the rx ring */
/* third step: now we are back the rx ring */
/* claim ownership on all hw owned bufs */
ring->head = nm_next(ring->tail, lim); /* skip past reserved slot */
ring->tail = kring->rtail; /* restore saved value of tail, for safety */
kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
/* fifth step: the user goes to sleep again, causing another rxsync */
netmap_vp_rxsync_locked(kring, flags);
/* fourth step: the user goes to sleep again, causing another rxsync */
netmap_vp_rxsync(kring, flags);
ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
na->name, ring_n,
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
ring->head, ring->cur, ring->tail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
mtx_unlock(&kring->q_lock);
out:
nm_kr_put(hw_kring);
return error;
}
/* notify method for the bridge-->host-rings path */
static int
netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
{
struct netmap_bwrap_adapter *bna = na->na_private;
struct netmap_adapter *port_na = &bna->up.up;
if (tx == NR_TX || ring_n != 0)
return EINVAL;
return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
}
/* nm_bdg_ctl callback for the bwrap.
* Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
* On attach, it needs to provide a fake netmap_priv_d structure and
@ -2261,7 +2215,6 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
{
struct netmap_priv_d *npriv;
struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
struct netmap_if *nifp;
int error = 0;
if (attach) {
@ -2275,8 +2228,8 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
npriv = malloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
if (npriv == NULL)
return ENOMEM;
nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags, &error);
if (!nifp) {
error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags);
if (error) {
bzero(npriv, sizeof(*npriv));
free(npriv, M_DEVBUF);
return error;
@ -2323,6 +2276,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
struct netmap_adapter *na = NULL;
struct netmap_adapter *hostna = NULL;
int error = 0;
enum txrx t;
/* make sure the NIC is not already in use */
if (NETMAP_OWNED_BY_ANY(hwna)) {
@ -2336,15 +2290,17 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
}
na = &bna->up.up;
na->na_private = bna;
strncpy(na->name, nr_name, sizeof(na->name));
/* fill the ring data for the bwrap adapter with rx/tx meanings
* swapped. The real cross-linking will be done during register,
* when all the krings will have been created.
*/
na->num_rx_rings = hwna->num_tx_rings;
na->num_tx_rings = hwna->num_rx_rings;
na->num_tx_desc = hwna->num_rx_desc;
na->num_rx_desc = hwna->num_tx_desc;
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
nma_set_nrings(na, t, nma_get_nrings(hwna, r));
nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
}
na->nm_dtor = netmap_bwrap_dtor;
na->nm_register = netmap_bwrap_register;
// na->nm_txsync = netmap_bwrap_txsync;
@ -2376,13 +2332,14 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
hostna = &bna->host.up;
snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
hostna->ifp = hwna->ifp;
hostna->num_tx_rings = 1;
hostna->num_tx_desc = hwna->num_rx_desc;
hostna->num_rx_rings = 1;
hostna->num_rx_desc = hwna->num_tx_desc;
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t);
nma_set_nrings(hostna, t, 1);
nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
}
// hostna->nm_txsync = netmap_bwrap_host_txsync;
// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
hostna->nm_notify = netmap_bwrap_host_notify;
hostna->nm_notify = netmap_bwrap_notify;
hostna->nm_mem = na->nm_mem;
hostna->na_private = bna;
hostna->na_vp = &bna->up;
@ -2416,7 +2373,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
return 0;
err_free:
netmap_mem_private_delete(na->nm_mem);
netmap_mem_delete(na->nm_mem);
err_put:
hwna->na_vp = hwna->na_hostvp = NULL;
netmap_adapter_put(hwna);
@ -2425,13 +2382,54 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
}
void
netmap_init_bridges(void)
struct nm_bridge *
netmap_init_bridges2(u_int n)
{
int i;
bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
for (i = 0; i < NM_BRIDGES; i++)
BDG_RWINIT(&nm_bridges[i]);
struct nm_bridge *b;
b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF,
M_NOWAIT | M_ZERO);
if (b == NULL)
return NULL;
for (i = 0; i < n; i++)
BDG_RWINIT(&b[i]);
return b;
}
void
netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
{
int i;
if (b == NULL)
return;
for (i = 0; i < n; i++)
BDG_RWDESTROY(&b[i]);
free(b, M_DEVBUF);
}
int
netmap_init_bridges(void)
{
#ifdef CONFIG_NET_NS
return netmap_bns_register();
#else
nm_bridges = netmap_init_bridges2(NM_BRIDGES);
if (nm_bridges == NULL)
return ENOMEM;
return 0;
#endif
}
void
netmap_uninit_bridges(void)
{
#ifdef CONFIG_NET_NS
netmap_bns_unregister();
#else
netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
#endif
}
#endif /* WITH_VALE */

View File

@ -304,6 +304,7 @@ static void re_set_linkspeed (struct rl_softc *);
#ifdef DEV_NETMAP /* see ixgbe.c for details */
#include <dev/netmap/if_re_netmap.h>
MODULE_DEPEND(re, netmap, 1, 1, 1);
#endif /* !DEV_NETMAP */
#ifdef RE_DIAG

View File

@ -157,6 +157,11 @@ struct netmap_slot {
/*
* must be set whenever buf_idx is changed (as it might be
* necessary to recompute the physical address and mapping)
*
* It is also set by the kernel whenever the buf_idx is
* changed internally (e.g., by pipes). Applications may
* use this information to know when they can reuse the
* contents of previously prepared buffers.
*/
#define NS_REPORT 0x0002 /* ask the hardware to report results */
@ -513,6 +518,9 @@ enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
/* monitor uses the NR_REG to select the rings to monitor */
#define NR_MONITOR_TX 0x100
#define NR_MONITOR_RX 0x200
#define NR_ZCOPY_MON 0x400
/* request exclusive access to the selected rings */
#define NR_EXCLUSIVE 0x800
/*

View File

@ -284,6 +284,12 @@ typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
* -NN bind individual NIC ring pair
* {NN bind master side of pipe NN
* }NN bind slave side of pipe NN
* a suffix starting with + and the following flags,
* in any order:
* x exclusive access
* z zero copy monitor
* t monitor tx side
* r monitor rx side
*
* req provides the initial values of nmreq before parsing ifname.
* Remember that the ifname parsing will override the ring
@ -351,9 +357,12 @@ nm_open(const char *ifname, const struct nmreq *req,
struct nm_desc *d = NULL;
const struct nm_desc *parent = arg;
u_int namelen;
uint32_t nr_ringid = 0, nr_flags;
uint32_t nr_ringid = 0, nr_flags, nr_reg;
const char *port = NULL;
const char *errmsg = NULL;
#define MAXERRMSG 80
char errmsg[MAXERRMSG] = "";
enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK } p_state;
long num;
if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
errno = 0; /* name not recognised, not an error */
@ -362,60 +371,112 @@ nm_open(const char *ifname, const struct nmreq *req,
if (ifname[0] == 'n')
ifname += 7;
/* scan for a separator */
for (port = ifname; *port && !index("-*^{}", *port); port++)
for (port = ifname; *port && !index("-*^{}/", *port); port++)
;
namelen = port - ifname;
if (namelen >= sizeof(d->req.nr_name)) {
errmsg = "name too long";
snprintf(errmsg, MAXERRMSG, "name too long");
goto fail;
}
switch (*port) {
default: /* '\0', no suffix */
nr_flags = NR_REG_ALL_NIC;
break;
case '-': /* one NIC */
nr_flags = NR_REG_ONE_NIC;
nr_ringid = atoi(port + 1);
break;
case '*': /* NIC and SW, ignore port */
nr_flags = NR_REG_NIC_SW;
if (port[1]) {
errmsg = "invalid port for nic+sw";
goto fail;
p_state = P_START;
nr_flags = NR_REG_ALL_NIC; /* default for no suffix */
while (*port) {
switch (p_state) {
case P_START:
switch (*port) {
case '^': /* only SW ring */
nr_flags = NR_REG_SW;
p_state = P_RNGSFXOK;
break;
case '*': /* NIC and SW */
nr_flags = NR_REG_NIC_SW;
p_state = P_RNGSFXOK;
break;
case '-': /* one NIC ring pair */
nr_flags = NR_REG_ONE_NIC;
p_state = P_GETNUM;
break;
case '{': /* pipe (master endpoint) */
nr_flags = NR_REG_PIPE_MASTER;
p_state = P_GETNUM;
break;
case '}': /* pipe (slave endoint) */
nr_flags = NR_REG_PIPE_SLAVE;
p_state = P_GETNUM;
break;
case '/': /* start of flags */
p_state = P_FLAGS;
break;
default:
snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port);
goto fail;
}
port++;
break;
case P_RNGSFXOK:
switch (*port) {
case '/':
p_state = P_FLAGS;
break;
default:
snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port);
goto fail;
}
port++;
break;
case P_GETNUM:
num = strtol(port, (char **)&port, 10);
if (num < 0 || num >= NETMAP_RING_MASK) {
snprintf(errmsg, MAXERRMSG, "'%ld' out of range [0, %d)",
num, NETMAP_RING_MASK);
goto fail;
}
nr_ringid = num & NETMAP_RING_MASK;
p_state = P_RNGSFXOK;
break;
case P_FLAGS:
case P_FLAGSOK:
switch (*port) {
case 'x':
nr_flags |= NR_EXCLUSIVE;
break;
case 'z':
nr_flags |= NR_ZCOPY_MON;
break;
case 't':
nr_flags |= NR_MONITOR_TX;
break;
case 'r':
nr_flags |= NR_MONITOR_RX;
break;
default:
snprintf(errmsg, MAXERRMSG, "unrecognized flag: '%c'", *port);
goto fail;
}
port++;
p_state = P_FLAGSOK;
break;
}
break;
case '^': /* only sw ring */
nr_flags = NR_REG_SW;
if (port[1]) {
errmsg = "invalid port for sw ring";
goto fail;
}
break;
case '{':
nr_flags = NR_REG_PIPE_MASTER;
nr_ringid = atoi(port + 1);
break;
case '}':
nr_flags = NR_REG_PIPE_SLAVE;
nr_ringid = atoi(port + 1);
break;
}
if (nr_ringid >= NETMAP_RING_MASK) {
errmsg = "invalid ringid";
if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) {
snprintf(errmsg, MAXERRMSG, "unexpected end of port name");
goto fail;
}
ND("flags: %s %s %s %s",
(nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "",
(nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "",
(nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "",
(nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : "");
d = (struct nm_desc *)calloc(1, sizeof(*d));
if (d == NULL) {
errmsg = "nm_desc alloc failure";
snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure");
errno = ENOMEM;
return NULL;
}
d->self = d; /* set this early so nm_close() works */
d->fd = open("/dev/netmap", O_RDWR);
if (d->fd < 0) {
errmsg = "cannot open /dev/netmap";
snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno));
goto fail;
}
@ -464,7 +525,7 @@ nm_open(const char *ifname, const struct nmreq *req,
d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL);
if (ioctl(d->fd, NIOCREGIF, &d->req)) {
errmsg = "NIOCREGIF failed";
snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno));
goto fail;
}
@ -479,7 +540,7 @@ nm_open(const char *ifname, const struct nmreq *req,
d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
d->fd, 0);
if (d->mem == MAP_FAILED) {
errmsg = "mmap failed";
snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno));
goto fail;
}
d->done_mmap = 1;
@ -495,20 +556,22 @@ nm_open(const char *ifname, const struct nmreq *req,
(char *)d->mem + d->memsize;
}
if (d->req.nr_flags == NR_REG_SW) { /* host stack */
nr_reg = d->req.nr_flags & NR_REG_MASK;
if (nr_reg == NR_REG_SW) { /* host stack */
d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
} else if (d->req.nr_flags == NR_REG_ALL_NIC) { /* only nic */
} else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */
d->first_tx_ring = 0;
d->first_rx_ring = 0;
d->last_tx_ring = d->req.nr_tx_rings - 1;
d->last_rx_ring = d->req.nr_rx_rings - 1;
} else if (d->req.nr_flags == NR_REG_NIC_SW) {
} else if (nr_reg == NR_REG_NIC_SW) {
d->first_tx_ring = 0;
d->first_rx_ring = 0;
d->last_tx_ring = d->req.nr_tx_rings;
d->last_rx_ring = d->req.nr_rx_rings;
} else if (d->req.nr_flags == NR_REG_ONE_NIC) {
} else if (nr_reg == NR_REG_ONE_NIC) {
/* XXX check validity */
d->first_tx_ring = d->last_tx_ring =
d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK;
@ -541,7 +604,7 @@ nm_open(const char *ifname, const struct nmreq *req,
fail:
nm_close(d);
if (errmsg)
if (errmsg[0])
D("%s %s", errmsg, ifname);
if (errno == 0)
errno = EINVAL;