Update to the current version of netmap.

Mostly bugfixes or features developed in the past 6 months,
so this is a 10.1 candidate.

Basically no user API changes (some bugfixes in sys/net/netmap_user.h).

In detail:

1. netmap support for virtio-net, including in netmap mode.
  Under bhyve and with a netmap backend [2] we reach over 1Mpps
  with standard APIs (e.g. libpcap), and 5-8 Mpps in netmap mode.

2. (kernel) add support for multiple memory allocators, so we can
  better partition physical and virtual interfaces giving access
  to separate users. The most visible effect is one additional
  argument to the various kernel functions to compute buffer
  addresses. All netmap-supported drivers are affected, but changes
  are mechanical and trivial

3. (kernel) simplify the prototype for *txsync() and *rxsync()
  driver methods. All netmap drivers affected, changes mostly mechanical.

4. add support for netmap-monitor ports. Think of it as a mirroring
  port on a physical switch: a netmap monitor port replicates traffic
  present on the main port. Restrictions apply. Drive carefully.

5. if_lem.c: support for various paravirtualization features,
  experimental and disabled by default.
  Most of these are described in our ANCS'13 paper [1].
  Paravirtualized support in netmap mode is new, and beats the
  numbers in the paper by a large factor (under qemu-kvm,
  we measured gues-host throughput up to 10-12 Mpps).

A lot of refactoring and additional documentation in the files
in sys/dev/netmap, but apart from #2 and #3 above, almost nothing
of this stuff is visible to other kernel parts.

Example programs in tools/tools/netmap have been updated with bugfixes
and to support more of the existing features.

This is meant to go into 10.1 so we plan an MFC before the Aug.22 deadline.

A lot of this code has been contributed by my colleagues at UNIPI,
including Giuseppe Lettieri, Vincenzo Maffione, Stefano Garzarella.

MFC after:	3 days.
This commit is contained in:
Luigi Rizzo 2014-08-16 15:00:01 +00:00
parent 1b31334c64
commit 4bf50f18eb
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=270063
29 changed files with 3747 additions and 869 deletions

View File

@ -1948,6 +1948,7 @@ dev/netmap/netmap_freebsd.c optional netmap
dev/netmap/netmap_generic.c optional netmap
dev/netmap/netmap_mbq.c optional netmap
dev/netmap/netmap_mem2.c optional netmap
dev/netmap/netmap_monitor.c optional netmap
dev/netmap/netmap_offloadings.c optional netmap
dev/netmap/netmap_pipe.c optional netmap
dev/netmap/netmap_vale.c optional netmap

View File

@ -434,19 +434,18 @@ cxgbe_netmap_on(struct adapter *sc, struct port_info *pi, struct ifnet *ifp,
hwb = &sc->sge.hw_buf_info[0];
for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) {
if (hwb->size == NETMAP_BUF_SIZE)
if (hwb->size == NETMAP_BUF_SIZE(na))
break;
}
if (i >= SGE_FLBUF_SIZES) {
if_printf(ifp, "no hwidx for netmap buffer size %d.\n",
NETMAP_BUF_SIZE);
NETMAP_BUF_SIZE(na));
return (ENXIO);
}
hwidx = i;
/* Must set caps before calling netmap_reset */
na->na_flags |= (NAF_NATIVE_ON | NAF_NETMAP_ON);
ifp->if_capenable |= IFCAP_NETMAP;
nm_set_native_flags(na);
for_each_nm_rxq(pi, i, nm_rxq) {
alloc_nm_rxq_hwq(pi, nm_rxq);
@ -460,7 +459,7 @@ cxgbe_netmap_on(struct adapter *sc, struct port_info *pi, struct ifnet *ifp,
for (j = 0; j < nm_rxq->fl_sidx - 8; j++) {
uint64_t ba;
PNMB(&slot[j], &ba);
PNMB(na, &slot[j], &ba);
nm_rxq->fl_desc[j] = htobe64(ba | hwidx);
}
nm_rxq->fl_pidx = j;
@ -512,8 +511,7 @@ cxgbe_netmap_off(struct adapter *sc, struct port_info *pi, struct ifnet *ifp,
rc = -t4_enable_vi(sc, sc->mbox, pi->nm_viid, false, false);
if (rc != 0)
if_printf(ifp, "netmap disable_vi failed: %d\n", rc);
na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON);
ifp->if_capenable &= ~IFCAP_NETMAP;
nm_clear_native_flags(na);
/*
* XXXNM: We need to make sure that the tx queues are quiet and won't
@ -669,7 +667,7 @@ cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq,
for (i = 0; i < n; i++) {
slot = &ring->slot[kring->nr_hwcur];
PNMB(slot, &ba);
PNMB(kring->na, slot, &ba);
cpl->ctrl0 = nm_txq->cpl_ctrl0;
cpl->pack = 0;
@ -786,13 +784,13 @@ reclaim_nm_tx_desc(struct sge_nm_txq *nm_txq)
}
static int
cxgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
cxgbe_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct port_info *pi = ifp->if_softc;
struct adapter *sc = pi->adapter;
struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[pi->first_nm_txq + ring_nr];
struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[pi->first_nm_txq + kring->ring_id];
const u_int head = kring->rhead;
u_int reclaimed = 0;
int n, d, npkt_remaining, ndesc_remaining;
@ -851,14 +849,14 @@ cxgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
}
static int
cxgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_adapter *na = kring->na;
struct netmap_ring *ring = kring->ring;
struct ifnet *ifp = na->ifp;
struct port_info *pi = ifp->if_softc;
struct adapter *sc = pi->adapter;
struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[pi->first_nm_rxq + ring_nr];
struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[pi->first_nm_rxq + kring->ring_id];
u_int const head = nm_rxsync_prologue(kring);
u_int n;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
@ -891,7 +889,7 @@ cxgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
while (n > 0) {
for (i = 0; i < 8; i++, fl_pidx++, slot++) {
PNMB(slot, &ba);
PNMB(na, slot, &ba);
nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx);
slot->flags &= ~NS_BUF_CHANGED;
MPASS(fl_pidx <= nm_rxq->fl_sidx);

View File

@ -3340,10 +3340,10 @@ em_setup_transmit_ring(struct tx_ring *txr)
uint64_t paddr;
void *addr;
addr = PNMB(slot + si, &paddr);
addr = PNMB(na, slot + si, &paddr);
txr->tx_base[i].buffer_addr = htole64(paddr);
/* reload the map for netmap mode */
netmap_load_map(txr->txtag, txbuf->map, addr);
netmap_load_map(na, txr->txtag, txbuf->map, addr);
}
#endif /* DEV_NETMAP */
@ -4082,8 +4082,8 @@ em_setup_receive_ring(struct rx_ring *rxr)
uint64_t paddr;
void *addr;
addr = PNMB(slot + si, &paddr);
netmap_load_map(rxr->rxtag, rxbuf->map, addr);
addr = PNMB(na, slot + si, &paddr);
netmap_load_map(na, rxr->rxtag, rxbuf->map, addr);
/* Update descriptor */
rxr->rx_base[j].buffer_addr = htole64(paddr);
continue;

View File

@ -3629,7 +3629,7 @@ igb_setup_transmit_ring(struct tx_ring *txr)
if (slot) {
int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
/* no need to set the address */
netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
}
#endif /* DEV_NETMAP */
/* clear the watch index */
@ -4433,8 +4433,8 @@ igb_setup_receive_ring(struct rx_ring *rxr)
uint64_t paddr;
void *addr;
addr = PNMB(slot + sj, &paddr);
netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
addr = PNMB(na, slot + sj, &paddr);
netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
/* Update descriptor */
rxr->rx_base[j].read.pkt_addr = htole64(paddr);
continue;

View File

@ -32,6 +32,15 @@
******************************************************************************/
/*$FreeBSD$*/
/*
* Uncomment the following extensions for better performance in a VM,
* especially if you have support in the hypervisor.
* See http://info.iet.unipi.it/~luigi/netmap/
*/
// #define BATCH_DISPATCH
// #define NIC_SEND_COMBINING
// #define NIC_PARAVIRT /* enable virtio-like synchronization */
#include "opt_inet.h"
#include "opt_inet6.h"
@ -291,6 +300,10 @@ static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
/*
* increase lem_rxd and lem_txd to at least 2048 in netmap mode
* for better performance.
*/
static int lem_rxd = EM_DEFAULT_RXD;
static int lem_txd = EM_DEFAULT_TXD;
static int lem_smart_pwr_down = FALSE;
@ -460,6 +473,20 @@ lem_attach(device_t dev)
"max number of rx packets to process", &adapter->rx_process_limit,
lem_rx_process_limit);
#ifdef NIC_SEND_COMBINING
/* Sysctls to control mitigation */
lem_add_rx_process_limit(adapter, "sc_enable",
"driver TDT mitigation", &adapter->sc_enable, 0);
#endif /* NIC_SEND_COMBINING */
#ifdef BATCH_DISPATCH
lem_add_rx_process_limit(adapter, "batch_enable",
"driver rx batch", &adapter->batch_enable, 0);
#endif /* BATCH_DISPATCH */
#ifdef NIC_PARAVIRT
lem_add_rx_process_limit(adapter, "rx_retries",
"driver rx retries", &adapter->rx_retries, 0);
#endif /* NIC_PARAVIRT */
/* Sysctl for setting the interface flow control */
lem_set_flow_cntrl(adapter, "flow_control",
"flow control setting",
@ -517,6 +544,49 @@ lem_attach(device_t dev)
*/
adapter->hw.mac.report_tx_early = 1;
#ifdef NIC_PARAVIRT
device_printf(dev, "driver supports paravirt, subdev 0x%x\n",
adapter->hw.subsystem_device_id);
if (adapter->hw.subsystem_device_id == E1000_PARA_SUBDEV) {
uint64_t bus_addr;
device_printf(dev, "paravirt support on dev %p\n", adapter);
tsize = 4096; // XXX one page for the csb
if (lem_dma_malloc(adapter, tsize, &adapter->csb_mem, BUS_DMA_NOWAIT)) {
device_printf(dev, "Unable to allocate csb memory\n");
error = ENOMEM;
goto err_csb;
}
/* Setup the Base of the CSB */
adapter->csb = (struct paravirt_csb *)adapter->csb_mem.dma_vaddr;
/* force the first kick */
adapter->csb->host_need_txkick = 1; /* txring empty */
adapter->csb->guest_need_rxkick = 1; /* no rx packets */
bus_addr = adapter->csb_mem.dma_paddr;
lem_add_rx_process_limit(adapter, "csb_on",
"enable paravirt.", &adapter->csb->guest_csb_on, 0);
lem_add_rx_process_limit(adapter, "txc_lim",
"txc_lim", &adapter->csb->host_txcycles_lim, 1);
/* some stats */
#define PA_SC(name, var, val) \
lem_add_rx_process_limit(adapter, name, name, var, val)
PA_SC("host_need_txkick",&adapter->csb->host_need_txkick, 1);
PA_SC("host_rxkick_at",&adapter->csb->host_rxkick_at, ~0);
PA_SC("guest_need_txkick",&adapter->csb->guest_need_txkick, 0);
PA_SC("guest_need_rxkick",&adapter->csb->guest_need_rxkick, 1);
PA_SC("tdt_reg_count",&adapter->tdt_reg_count, 0);
PA_SC("tdt_csb_count",&adapter->tdt_csb_count, 0);
PA_SC("tdt_int_count",&adapter->tdt_int_count, 0);
PA_SC("guest_need_kick_count",&adapter->guest_need_kick_count, 0);
/* tell the host where the block is */
E1000_WRITE_REG(&adapter->hw, E1000_CSBAH,
(u32)(bus_addr >> 32));
E1000_WRITE_REG(&adapter->hw, E1000_CSBAL,
(u32)bus_addr);
}
#endif /* NIC_PARAVIRT */
tsize = roundup2(adapter->num_tx_desc * sizeof(struct e1000_tx_desc),
EM_DBA_ALIGN);
@ -675,6 +745,11 @@ lem_attach(device_t dev)
err_rx_desc:
lem_dma_free(adapter, &adapter->txdma);
err_tx_desc:
#ifdef NIC_PARAVIRT
lem_dma_free(adapter, &adapter->csb_mem);
err_csb:
#endif /* NIC_PARAVIRT */
err_pci:
if (adapter->ifp != (void *)NULL)
if_free_drv(adapter->ifp);
@ -762,6 +837,12 @@ lem_detach(device_t dev)
adapter->rx_desc_base = NULL;
}
#ifdef NIC_PARAVIRT
if (adapter->csb) {
lem_dma_free(adapter, &adapter->csb_mem);
adapter->csb = NULL;
}
#endif /* NIC_PARAVIRT */
lem_release_hw_control(adapter);
free(adapter->mta, M_DEVBUF);
EM_TX_LOCK_DESTROY(adapter);
@ -871,6 +952,16 @@ lem_start_locked(if_t ifp)
}
if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD)
if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
#ifdef NIC_PARAVIRT
if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE && adapter->csb &&
adapter->csb->guest_csb_on &&
!(adapter->csb->guest_need_txkick & 1)) {
adapter->csb->guest_need_txkick = 1;
adapter->guest_need_kick_count++;
// XXX memory barrier
lem_txeof(adapter); // XXX possibly clear IFF_DRV_OACTIVE
}
#endif /* NIC_PARAVIRT */
return;
}
@ -1716,6 +1807,37 @@ lem_xmit(struct adapter *adapter, struct mbuf **m_headp)
*/
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
#ifdef NIC_PARAVIRT
if (adapter->csb) {
adapter->csb->guest_tdt = i;
/* XXX memory barrier ? */
if (adapter->csb->guest_csb_on &&
!(adapter->csb->host_need_txkick & 1)) {
/* XXX maybe useless
* clean the ring. maybe do it before ?
* maybe a little bit of histeresys ?
*/
if (adapter->num_tx_desc_avail <= 64) {// XXX
lem_txeof(adapter);
}
return (0);
}
}
#endif /* NIC_PARAVIRT */
#ifdef NIC_SEND_COMBINING
if (adapter->sc_enable) {
if (adapter->shadow_tdt & MIT_PENDING_INT) {
/* signal intr and data pending */
adapter->shadow_tdt = MIT_PENDING_TDT | (i & 0xffff);
return (0);
} else {
adapter->shadow_tdt = MIT_PENDING_INT;
}
}
#endif /* NIC_SEND_COMBINING */
if (adapter->hw.mac.type == e1000_82547 &&
adapter->link_duplex == HALF_DUPLEX)
lem_82547_move_tail(adapter);
@ -1959,6 +2081,20 @@ lem_local_timer(void *arg)
lem_smartspeed(adapter);
#ifdef NIC_PARAVIRT
/* recover space if needed */
if (adapter->csb && adapter->csb->guest_csb_on &&
(adapter->watchdog_check == TRUE) &&
(ticks - adapter->watchdog_time > EM_WATCHDOG) &&
(adapter->num_tx_desc_avail != adapter->num_tx_desc) ) {
lem_txeof(adapter);
/*
* lem_txeof() normally (except when space in the queue
* runs low XXX) cleans watchdog_check so that
* we do not hung.
*/
}
#endif /* NIC_PARAVIRT */
/*
* We check the watchdog: the time since
* the last TX descriptor was cleaned.
@ -2643,10 +2779,10 @@ lem_setup_transmit_structures(struct adapter *adapter)
uint64_t paddr;
void *addr;
addr = PNMB(slot + si, &paddr);
addr = PNMB(na, slot + si, &paddr);
adapter->tx_desc_base[i].buffer_addr = htole64(paddr);
/* reload the map for netmap mode */
netmap_load_map(adapter->txtag, tx_buffer->map, addr);
netmap_load_map(na, adapter->txtag, tx_buffer->map, addr);
}
#endif /* DEV_NETMAP */
tx_buffer->next_eop = -1;
@ -3021,6 +3157,16 @@ lem_txeof(struct adapter *adapter)
adapter->next_tx_to_clean = first;
adapter->num_tx_desc_avail = num_avail;
#ifdef NIC_SEND_COMBINING
if ((adapter->shadow_tdt & MIT_PENDING_TDT) == MIT_PENDING_TDT) {
/* a tdt write is pending, do it */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(0),
0xffff & adapter->shadow_tdt);
adapter->shadow_tdt = MIT_PENDING_INT;
} else {
adapter->shadow_tdt = 0; // disable
}
#endif /* NIC_SEND_COMBINING */
/*
* If we have enough room, clear IFF_DRV_OACTIVE to
* tell the stack that it is OK to send packets.
@ -3028,6 +3174,12 @@ lem_txeof(struct adapter *adapter)
*/
if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) {
if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
#ifdef NIC_PARAVIRT
if (adapter->csb) { // XXX also csb_on ?
adapter->csb->guest_need_txkick = 2; /* acked */
// XXX memory barrier
}
#endif /* NIC_PARAVIRT */
if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
adapter->watchdog_check = FALSE;
return;
@ -3213,8 +3365,8 @@ lem_setup_receive_structures(struct adapter *adapter)
uint64_t paddr;
void *addr;
addr = PNMB(slot + si, &paddr);
netmap_load_map(adapter->rxtag, rx_buffer->map, addr);
addr = PNMB(na, slot + si, &paddr);
netmap_load_map(na, adapter->rxtag, rx_buffer->map, addr);
/* Update descriptor */
adapter->rx_desc_base[i].buffer_addr = htole64(paddr);
continue;
@ -3413,7 +3565,23 @@ lem_rxeof(struct adapter *adapter, int count, int *done)
int i, rx_sent = 0;
struct e1000_rx_desc *current_desc;
#ifdef BATCH_DISPATCH
struct mbuf *mh = NULL, *mt = NULL;
#endif /* BATCH_DISPATCH */
#ifdef NIC_PARAVIRT
int retries = 0;
struct paravirt_csb* csb = adapter->csb;
int csb_mode = csb && csb->guest_csb_on;
//ND("clear guest_rxkick at %d", adapter->next_rx_desc_to_check);
if (csb_mode && csb->guest_need_rxkick)
csb->guest_need_rxkick = 0;
#endif /* NIC_PARAVIRT */
EM_RX_LOCK(adapter);
#ifdef BATCH_DISPATCH
batch_again:
#endif /* BATCH_DISPATCH */
i = adapter->next_rx_desc_to_check;
current_desc = &adapter->rx_desc_base[i];
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
@ -3426,19 +3594,45 @@ lem_rxeof(struct adapter *adapter, int count, int *done)
}
#endif /* DEV_NETMAP */
#if 1 // XXX optimization ?
if (!((current_desc->status) & E1000_RXD_STAT_DD)) {
if (done != NULL)
*done = rx_sent;
EM_RX_UNLOCK(adapter);
return (FALSE);
}
#endif /* 0 */
while (count != 0 && if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
struct mbuf *m = NULL;
status = current_desc->status;
if ((status & E1000_RXD_STAT_DD) == 0)
if ((status & E1000_RXD_STAT_DD) == 0) {
#ifdef NIC_PARAVIRT
if (csb_mode) {
/* buffer not ready yet. Retry a few times before giving up */
if (++retries <= adapter->rx_retries) {
continue;
}
if (csb->guest_need_rxkick == 0) {
// ND("set guest_rxkick at %d", adapter->next_rx_desc_to_check);
csb->guest_need_rxkick = 1;
// XXX memory barrier, status volatile ?
continue; /* double check */
}
}
/* no buffer ready, give up */
#endif /* NIC_PARAVIRT */
break;
}
#ifdef NIC_PARAVIRT
if (csb_mode) {
if (csb->guest_need_rxkick)
// ND("clear again guest_rxkick at %d", adapter->next_rx_desc_to_check);
csb->guest_need_rxkick = 0;
retries = 0;
}
#endif /* NIC_PARAVIRT */
mp = adapter->rx_buffer_area[i].m_head;
/*
@ -3563,11 +3757,36 @@ lem_rxeof(struct adapter *adapter, int count, int *done)
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
#ifdef NIC_PARAVIRT
if (csb_mode) {
/* the buffer at i has been already replaced by lem_get_buf()
* so it is safe to set guest_rdt = i and possibly send a kick.
* XXX see if we can optimize it later.
*/
csb->guest_rdt = i;
// XXX memory barrier
if (i == csb->host_rxkick_at)
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i);
}
#endif /* NIC_PARAVIRT */
/* Advance our pointers to the next descriptor. */
if (++i == adapter->num_rx_desc)
i = 0;
/* Call into the stack */
if (m != NULL) {
#ifdef BATCH_DISPATCH
if (adapter->batch_enable) {
if (mh == NULL)
mh = mt = m;
else
mt->m_nextpkt = m;
mt = m;
m->m_nextpkt = NULL;
rx_sent++;
current_desc = &adapter->rx_desc_base[i];
continue;
}
#endif /* BATCH_DISPATCH */
adapter->next_rx_desc_to_check = i;
EM_RX_UNLOCK(adapter);
if_input(ifp, m);
@ -3578,10 +3797,27 @@ lem_rxeof(struct adapter *adapter, int count, int *done)
current_desc = &adapter->rx_desc_base[i];
}
adapter->next_rx_desc_to_check = i;
#ifdef BATCH_DISPATCH
if (mh) {
EM_RX_UNLOCK(adapter);
while ( (mt = mh) != NULL) {
mh = mh->m_nextpkt;
mt->m_nextpkt = NULL;
if_input(ifp, mt);
}
EM_RX_LOCK(adapter);
i = adapter->next_rx_desc_to_check; /* in case of interrupts */
if (count > 0)
goto batch_again;
}
#endif /* BATCH_DISPATCH */
/* Advance the E1000's Receive Queue #0 "Tail Pointer". */
if (--i < 0)
i = adapter->num_rx_desc - 1;
#ifdef NIC_PARAVIRT
if (!csb_mode) /* filter out writes */
#endif /* NIC_PARAVIRT */
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i);
if (done != NULL)
*done = rx_sent;

View File

@ -3155,7 +3155,7 @@ ixgbe_setup_transmit_ring(struct tx_ring *txr)
*/
if (slot) {
int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
}
#endif /* DEV_NETMAP */
/* Clear the EOP descriptor pointer */
@ -4098,8 +4098,8 @@ ixgbe_setup_receive_ring(struct rx_ring *rxr)
uint64_t paddr;
void *addr;
addr = PNMB(slot + sj, &paddr);
netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
addr = PNMB(na, slot + sj, &paddr);
netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
/* Update descriptor and the cached value */
rxr->rx_base[j].read.pkt_addr = htole64(paddr);
rxbuf->addr = htole64(paddr);

View File

@ -113,10 +113,10 @@ em_netmap_reg(struct netmap_adapter *na, int onoff)
* Reconcile kernel and user view of the transmit ring.
*/
static int
em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
em_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
@ -128,7 +128,7 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
@ -144,7 +144,7 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
void *addr = PNMB(na, slot, &paddr);
/* device-specific */
struct e1000_tx_desc *curr = &txr->tx_base[nic_i];
@ -153,12 +153,12 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nic_i == 0 || nic_i == report_frequency) ?
E1000_TXD_CMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
NM_CHECK_ADDR_LEN(na, addr, len);
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(paddr);
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
netmap_reload_map(na, txr->txtag, txbuf->map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
@ -187,7 +187,7 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* record completed transmissions using TDH */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
@ -208,10 +208,10 @@ em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
em_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
@ -222,7 +222,7 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
if (head > lim)
return netmap_ring_reinit(kring);
@ -271,18 +271,18 @@ em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
void *addr = PNMB(na, slot, &paddr);
struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
struct em_buffer *rxbuf = &rxr->rx_buffers[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
curr->buffer_addr = htole64(paddr);
netmap_reload_map(rxr->rxtag, rxbuf->map, addr);
netmap_reload_map(na, rxr->rxtag, rxbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->status = 0;

View File

@ -81,10 +81,10 @@ igb_netmap_reg(struct netmap_adapter *na, int onoff)
* Reconcile kernel and user view of the transmit ring.
*/
static int
igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
igb_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
@ -96,7 +96,7 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
/* 82575 needs the queue index added */
u32 olinfo_status =
(adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
@ -115,7 +115,7 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
void *addr = PNMB(na, slot, &paddr);
/* device-specific */
union e1000_adv_tx_desc *curr =
@ -125,11 +125,11 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nic_i == 0 || nic_i == report_frequency) ?
E1000_ADVTXD_DCMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
NM_CHECK_ADDR_LEN(na, addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
netmap_reload_map(na, txr->txtag, txbuf->map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
@ -171,7 +171,7 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
*/
if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
/* record completed transmissions using TDH */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(kring->ring_id));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
@ -190,10 +190,10 @@ igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
igb_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
@ -204,7 +204,7 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
if (head > lim)
return netmap_ring_reinit(kring);
@ -251,17 +251,17 @@ igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
void *addr = PNMB(na, slot, &paddr);
union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
struct igb_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->wb.upper.status_error = 0;

View File

@ -39,6 +39,7 @@
#include <vm/pmap.h> /* vtophys ? */
#include <dev/netmap/netmap_kern.h>
extern int netmap_adaptive_io;
/*
* Register/unregister. We are already under netmap lock.
@ -84,10 +85,10 @@ lem_netmap_reg(struct netmap_adapter *na, int onoff)
* Reconcile kernel and user view of the transmit ring.
*/
static int
lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
lem_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
@ -98,6 +99,10 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
#ifdef NIC_PARAVIRT
struct paravirt_csb *csb = adapter->csb;
uint64_t *csbd = (uint64_t *)(csb + 1);
#endif /* NIC_PARAVIRT */
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
@ -108,12 +113,25 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
#ifdef NIC_PARAVIRT
int do_kick = 0;
uint64_t t = 0; // timestamp
int n = head - nm_i;
if (n < 0)
n += lim + 1;
if (csb) {
t = rdtsc(); /* last timestamp */
csbd[16] += t - csbd[0]; /* total Wg */
csbd[17] += n; /* Wg count */
csbd[0] = t;
}
#endif /* NIC_PARAVIRT */
nic_i = netmap_idx_k2n(kring, nm_i);
while (nm_i != head) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
void *addr = PNMB(na, slot, &paddr);
/* device-specific */
struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i];
@ -122,12 +140,12 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nic_i == 0 || nic_i == report_frequency) ?
E1000_TXD_CMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
NM_CHECK_ADDR_LEN(na, addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
curr->buffer_addr = htole64(paddr);
netmap_reload_map(adapter->txtag, txbuf->map, addr);
netmap_reload_map(na, adapter->txtag, txbuf->map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
@ -140,6 +158,7 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
// XXX might try an early kick
}
kring->nr_hwcur = head;
@ -147,8 +166,38 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
#ifdef NIC_PARAVIRT
/* set unconditionally, then also kick if needed */
if (csb) {
t = rdtsc();
if (csb->host_need_txkick == 2) {
/* can compute an update of delta */
int64_t delta = t - csbd[3];
if (delta < 0)
delta = -delta;
if (csbd[8] == 0 || delta < csbd[8]) {
csbd[8] = delta;
csbd[9]++;
}
csbd[10]++;
}
csb->guest_tdt = nic_i;
csbd[18] += t - csbd[0]; // total wp
csbd[19] += n;
}
if (!csb || !csb->guest_csb_on || (csb->host_need_txkick & 1))
do_kick = 1;
if (do_kick)
#endif /* NIC_PARAVIRT */
/* (re)start the tx unit up to slot nic_i (excluded) */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i);
#ifdef NIC_PARAVIRT
if (do_kick) {
uint64_t t1 = rdtsc();
csbd[20] += t1 - t; // total Np
csbd[21]++;
}
#endif /* NIC_PARAVIRT */
}
/*
@ -157,6 +206,93 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
kring->last_reclaim = ticks;
/* record completed transmissions using TDH */
#ifdef NIC_PARAVIRT
/* host updates tdh unconditionally, and we have
* no side effects on reads, so we can read from there
* instead of exiting.
*/
if (csb) {
static int drain = 0, nodrain=0, good = 0, bad = 0, fail = 0;
u_int x = adapter->next_tx_to_clean;
csbd[19]++; // XXX count reclaims
nic_i = csb->host_tdh;
if (csb->guest_csb_on) {
if (nic_i == x) {
bad++;
csbd[24]++; // failed reclaims
/* no progress, request kick and retry */
csb->guest_need_txkick = 1;
mb(); // XXX barrier
nic_i = csb->host_tdh;
} else {
good++;
}
if (nic_i != x) {
csb->guest_need_txkick = 2;
if (nic_i == csb->guest_tdt)
drain++;
else
nodrain++;
#if 1
if (netmap_adaptive_io) {
/* new mechanism: last half ring (or so)
* released one slot at a time.
* This effectively makes the system spin.
*
* Take next_to_clean + 1 as a reference.
* tdh must be ahead or equal
* On entry, the logical order is
* x < tdh = nic_i
* We first push tdh up to avoid wraps.
* The limit is tdh-ll (half ring).
* if tdh-256 < x we report x;
* else we report tdh-256
*/
u_int tdh = nic_i;
u_int ll = csbd[15];
u_int delta = lim/8;
if (netmap_adaptive_io == 2 || ll > delta)
csbd[15] = ll = delta;
else if (netmap_adaptive_io == 1 && ll > 1) {
csbd[15]--;
}
if (nic_i >= kring->nkr_num_slots) {
RD(5, "bad nic_i %d on input", nic_i);
}
x = nm_next(x, lim);
if (tdh < x)
tdh += lim + 1;
if (tdh <= x + ll) {
nic_i = x;
csbd[25]++; //report n + 1;
} else {
tdh = nic_i;
if (tdh < ll)
tdh += lim + 1;
nic_i = tdh - ll;
csbd[26]++; // report tdh - ll
}
}
#endif
} else {
/* we stop, count whether we are idle or not */
int bh_active = csb->host_need_txkick & 2 ? 4 : 0;
csbd[27+ csb->host_need_txkick]++;
if (netmap_adaptive_io == 1) {
if (bh_active && csbd[15] > 1)
csbd[15]--;
else if (!bh_active && csbd[15] < lim/2)
csbd[15]++;
}
bad--;
fail++;
}
}
RD(1, "drain %d nodrain %d good %d retry %d fail %d",
drain, nodrain, good, bad, fail);
} else
#endif /* !NIC_PARAVIRT */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
@ -176,10 +312,10 @@ lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
lem_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
@ -190,10 +326,21 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
#ifdef NIC_PARAVIRT
struct paravirt_csb *csb = adapter->csb;
uint32_t csb_mode = csb && csb->guest_csb_on;
uint32_t do_host_rxkick = 0;
#endif /* NIC_PARAVIRT */
if (head > lim)
return netmap_ring_reinit(kring);
#ifdef NIC_PARAVIRT
if (csb_mode) {
force_update = 1;
csb->guest_need_rxkick = 0;
}
#endif /* NIC_PARAVIRT */
/* XXX check sync modes */
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
@ -212,11 +359,28 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
uint32_t staterr = le32toh(curr->status);
int len;
#ifdef NIC_PARAVIRT
if (csb_mode) {
if ((staterr & E1000_RXD_STAT_DD) == 0) {
/* don't bother to retry if more than 1 pkt */
if (n > 1)
break;
csb->guest_need_rxkick = 1;
wmb();
staterr = le32toh(curr->status);
if ((staterr & E1000_RXD_STAT_DD) == 0) {
break;
} else { /* we are good */
csb->guest_need_rxkick = 0;
}
}
} else
#endif /* NIC_PARAVIRT */
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
len = le16toh(curr->length) - 4; // CRC
if (len < 0) {
D("bogus pkt size %d nic idx %d", len, nic_i);
RD(5, "bogus pkt (%d) size %d nic idx %d", n, len, nic_i);
len = 0;
}
ring->slot[nm_i].len = len;
@ -228,6 +392,18 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
#ifdef NIC_PARAVIRT
if (csb_mode) {
if (n > 1) {
/* leave one spare buffer so we avoid rxkicks */
nm_i = nm_prev(nm_i, lim);
nic_i = nm_prev(nic_i, lim);
n--;
} else {
csb->guest_need_rxkick = 1;
}
}
#endif /* NIC_PARAVIRT */
ND("%d new packets at nic %d nm %d tail %d",
n,
adapter->next_rx_desc_to_check,
@ -249,23 +425,27 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
void *addr = PNMB(na, slot, &paddr);
struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
curr->buffer_addr = htole64(paddr);
netmap_reload_map(adapter->rxtag, rxbuf->map, addr);
netmap_reload_map(na, adapter->rxtag, rxbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->status = 0;
bus_dmamap_sync(adapter->rxtag, rxbuf->map,
BUS_DMASYNC_PREREAD);
#ifdef NIC_PARAVIRT
if (csb_mode && csb->host_rxkick_at == nic_i)
do_host_rxkick = 1;
#endif /* NIC_PARAVIRT */
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
@ -277,6 +457,12 @@ lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
#ifdef NIC_PARAVIRT
/* set unconditionally, then also kick if needed */
if (csb)
csb->guest_rdt = nic_i;
if (!csb_mode || do_host_rxkick)
#endif /* NIC_PARAVIRT */
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
}

View File

@ -65,10 +65,10 @@ re_netmap_reg(struct netmap_adapter *na, int onoff)
* Reconcile kernel and user view of the transmit ring.
*/
static int
re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
re_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
@ -96,14 +96,14 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
void *addr = PNMB(na, slot, &paddr);
/* device-specific */
struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[nic_i];
int cmd = slot->len | RL_TDESC_CMD_EOF |
RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ;
NM_CHECK_ADDR_LEN(addr, len);
NM_CHECK_ADDR_LEN(na, addr, len);
if (nic_i == lim) /* mark end of ring */
cmd |= RL_TDESC_CMD_EOR;
@ -112,7 +112,7 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* buffer has changed, reload map */
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_reload_map(sc->rl_ldata.rl_tx_mtag,
netmap_reload_map(na, sc->rl_ldata.rl_tx_mtag,
txd[nic_i].tx_dmamap, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
@ -169,10 +169,10 @@ re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
re_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
@ -240,12 +240,12 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
void *addr = PNMB(na, slot, &paddr);
struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[nic_i];
int cmd = NETMAP_BUF_SIZE | RL_RDESC_CMD_OWN;
int cmd = NETMAP_BUF_SIZE(na) | RL_RDESC_CMD_OWN;
if (addr == netmap_buffer_base) /* bad buf */
if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
goto ring_reset;
if (nic_i == lim) /* mark end of ring */
@ -255,7 +255,7 @@ re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* buffer has changed, reload map */
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
netmap_reload_map(na, sc->rl_ldata.rl_rx_mtag,
rxd[nic_i].rx_dmamap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
@ -296,14 +296,10 @@ re_netmap_tx_init(struct rl_softc *sc)
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_slot *slot;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return;
}
slot = netmap_reset(na, NR_TX, 0, 0);
/* slot is NULL if we are not in netmap mode */
/* slot is NULL if we are not in native netmap mode */
if (!slot)
return; // XXX cannot happen
return;
/* in netmap mode, overwrite addresses and maps */
txd = sc->rl_ldata.rl_tx_desc;
desc = sc->rl_ldata.rl_tx_list;
@ -313,11 +309,11 @@ re_netmap_tx_init(struct rl_softc *sc)
for (i = 0; i < n; i++) {
uint64_t paddr;
int l = netmap_idx_n2k(&na->tx_rings[0], i);
void *addr = PNMB(slot + l, &paddr);
void *addr = PNMB(na, slot + l, &paddr);
desc[i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc[i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_load_map(sc->rl_ldata.rl_tx_mtag,
netmap_load_map(na, sc->rl_ldata.rl_tx_mtag,
txd[i].tx_dmamap, addr);
}
}
@ -344,15 +340,15 @@ re_netmap_rx_init(struct rl_softc *sc)
uint64_t paddr;
uint32_t nm_i = netmap_idx_n2k(&na->rx_rings[0], nic_i);
addr = PNMB(slot + nm_i, &paddr);
addr = PNMB(na, slot + nm_i, &paddr);
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
netmap_reload_map(na, sc->rl_ldata.rl_rx_mtag,
sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, addr);
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
sc->rl_ldata.rl_rx_desc[nic_i].rx_dmamap, BUS_DMASYNC_PREREAD);
desc[nic_i].rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc[nic_i].rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
cmdstat = NETMAP_BUF_SIZE;
cmdstat = NETMAP_BUF_SIZE(na);
if (nic_i == n - 1) /* mark the end of ring */
cmdstat |= RL_RDESC_CMD_EOR;
if (nic_i < max_avail)

View File

@ -0,0 +1,434 @@
/*
* Copyright (C) 2014 Vincenzo Maffione, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD$
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
#include <vm/vm.h>
#include <vm/pmap.h> /* vtophys ? */
#include <dev/netmap/netmap_kern.h>
#define SOFTC_T vtnet_softc
/* Free all the unused buffer in all the RX virtqueues.
* This function is called when entering and exiting netmap mode.
* - buffers queued by the virtio driver return skbuf/mbuf pointer
* and need to be freed;
* - buffers queued by netmap return the txq/rxq, and do not need work
*/
void
vtnet_netmap_free_bufs(struct SOFTC_T* sc)
{
int i, nmb = 0, n = 0, last;
for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i];
struct virtqueue *vq;
struct mbuf *m;
struct vtnet_txq *txq = &sc->vtnet_txqs[i];
struct vtnet_tx_header *txhdr;
last = 0;
vq = rxq->vtnrx_vq;
while ((m = virtqueue_drain(vq, &last)) != NULL) {
n++;
if (m != (void *)rxq)
m_freem(m);
else
nmb++;
}
last = 0;
vq = txq->vtntx_vq;
while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
n++;
if (txhdr != (void *)txq) {
m_freem(txhdr->vth_mbuf);
uma_zfree(vtnet_tx_header_zone, txhdr);
} else
nmb++;
}
}
D("freed %d mbufs, %d netmap bufs on %d queues",
n - nmb, nmb, i);
}
/* Register and unregister. */
int
vtnet_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct SOFTC_T *sc = ifp->if_softc;
VTNET_CORE_LOCK(sc);
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
/* drain queues so netmap and native drivers
* do not interfere with each other
*/
vtnet_netmap_free_bufs(sc);
vtnet_init_locked(sc); /* also enable intr */
VTNET_CORE_UNLOCK(sc);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
/* Reconcile kernel and user view of the transmit ring. */
static int
vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
/* device-specific */
struct SOFTC_T *sc = ifp->if_softc;
struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr];
struct virtqueue *vq = txq->vtntx_vq;
/*
* First part: process new packets to send.
*/
rmb();
nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
struct sglist *sg = txq->vtntx_sg;
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != head; n++) {
/* we use an empty header here */
static struct virtio_net_hdr_mrg_rxbuf hdr;
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(na, slot, &paddr);
int err;
NM_CHECK_ADDR_LEN(na, addr, len);
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Initialize the scatterlist, expose it to the hypervisor,
* and kick the hypervisor (if necessary).
*/
sglist_reset(sg); // cheap
// if vtnet_hdr_size > 0 ...
err = sglist_append(sg, &hdr, sc->vtnet_hdr_size);
// XXX later, support multi segment
err = sglist_append_phys(sg, paddr, len);
/* use na as the cookie */
err = virtqueue_enqueue(vq, txq, sg, sg->sg_nseg, 0);
if (unlikely(err < 0)) {
D("virtqueue_enqueue failed");
break;
}
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
/* Update hwcur depending on where we stopped. */
kring->nr_hwcur = nm_i; /* note we migth break early */
/* No more free TX slots? Ask the hypervisor for notifications,
* possibly only when a considerable amount of work has been
* done.
*/
ND(3,"sent %d packets, hwcur %d", n, nm_i);
virtqueue_disable_intr(vq);
virtqueue_notify(vq);
} else {
if (ring->head != ring->tail)
ND(5, "pure notify ? head %d tail %d nused %d %d",
ring->head, ring->tail, virtqueue_nused(vq),
(virtqueue_dump(vq), 1));
virtqueue_notify(vq);
virtqueue_enable_intr(vq); // like postpone with 0
}
/* Free used slots. We only consider our own used buffers, recognized
* by the token we passed to virtqueue_add_outbuf.
*/
n = 0;
for (;;) {
struct vtnet_tx_header *txhdr = virtqueue_dequeue(vq, NULL);
if (txhdr == NULL)
break;
if (likely(txhdr == (void *)txq)) {
n++;
if (virtqueue_nused(vq) < 32) { // XXX slow release
break;
}
} else { /* leftover from previous transmission */
m_freem(txhdr->vth_mbuf);
uma_zfree(vtnet_tx_header_zone, txhdr);
}
}
if (n) {
kring->nr_hwtail += n;
if (kring->nr_hwtail > lim)
kring->nr_hwtail -= lim + 1;
}
if (nm_i != kring->nr_hwtail /* && vtnet_txq_below_threshold(txq) == 0*/) {
ND(3, "disable intr, hwcur %d", nm_i);
virtqueue_disable_intr(vq);
} else {
ND(3, "enable intr, hwcur %d", nm_i);
virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT);
}
//out:
nm_txsync_finalize(kring);
return 0;
}
static int
vtnet_refill_rxq(struct netmap_kring *kring, u_int nm_i, u_int head)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int const lim = kring->nkr_num_slots - 1;
u_int n;
/* device-specific */
struct SOFTC_T *sc = ifp->if_softc;
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr];
struct virtqueue *vq = rxq->vtnrx_vq;
/* use a local sglist, default might be short */
struct sglist_seg ss[2];
struct sglist sg[1] = { ss, 0, 0, 2};
for (n = 0; nm_i != head; n++) {
static struct virtio_net_hdr_mrg_rxbuf hdr;
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(na, slot, &paddr);
int err = 0;
if (addr == NETMAP_BUF_BASE(na)) { /* bad buf */
if (netmap_ring_reinit(kring))
return -1;
}
slot->flags &= ~NS_BUF_CHANGED;
sglist_reset(sg); // cheap
err = sglist_append(sg, &hdr, sc->vtnet_hdr_size);
err = sglist_append_phys(sg, paddr, NETMAP_BUF_SIZE(na));
/* writable for the host */
err = virtqueue_enqueue(vq, rxq, sg, 0, sg->sg_nseg);
if (err < 0) {
D("virtqueue_enqueue failed");
break;
}
nm_i = nm_next(nm_i, lim);
}
return nm_i;
}
/* Reconcile kernel and user view of the receive ring. */
static int
vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_ring *ring = kring->ring;
u_int ring_nr = kring->ring_id;
u_int nm_i; /* index into the netmap ring */
// u_int nic_i; /* index into the NIC ring */
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = nm_rxsync_prologue(kring);
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* device-specific */
struct SOFTC_T *sc = ifp->if_softc;
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr];
struct virtqueue *vq = rxq->vtnrx_vq;
/* XXX netif_carrier_ok ? */
if (head > lim)
return netmap_ring_reinit(kring);
rmb();
/*
* First part: import newly received packets.
* Only accept our
* own buffers (matching the token). We should only get
* matching buffers, because of vtnet_netmap_free_rx_unused_bufs()
* and vtnet_netmap_init_buffers().
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
struct netmap_adapter *token;
nm_i = kring->nr_hwtail;
n = 0;
for (;;) {
int len;
token = virtqueue_dequeue(vq, &len);
if (token == NULL)
break;
if (likely(token == (void *)rxq)) {
ring->slot[nm_i].len = len;
ring->slot[nm_i].flags = slot_flags;
nm_i = nm_next(nm_i, lim);
n++;
} else {
D("This should not happen");
}
}
kring->nr_hwtail = nm_i;
kring->nr_kflags &= ~NKR_PENDINTR;
}
ND("[B] h %d c %d hwcur %d hwtail %d",
ring->head, ring->cur, kring->nr_hwcur,
kring->nr_hwtail);
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur; /* netmap ring index */
if (nm_i != head) {
int err = vtnet_refill_rxq(kring, nm_i, head);
if (err < 0)
return 1;
kring->nr_hwcur = err;
virtqueue_notify(vq);
/* After draining the queue may need an intr from the hypervisor */
vtnet_rxq_enable_intr(rxq);
}
/* tell userspace that there might be new packets. */
nm_rxsync_finalize(kring);
ND("[C] h %d c %d t %d hwcur %d hwtail %d",
ring->head, ring->cur, ring->tail,
kring->nr_hwcur, kring->nr_hwtail);
return 0;
}
/* Make RX virtqueues buffers pointing to netmap buffers. */
static int
vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc)
{
struct ifnet *ifp = sc->vtnet_ifp;
struct netmap_adapter* na = NA(ifp);
unsigned int r;
if (!nm_native_on(na))
return 0;
for (r = 0; r < na->num_rx_rings; r++) {
struct netmap_kring *kring = &na->rx_rings[r];
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[r];
struct virtqueue *vq = rxq->vtnrx_vq;
struct netmap_slot* slot;
int err = 0;
slot = netmap_reset(na, NR_RX, r, 0);
if (!slot) {
D("strange, null netmap ring %d", r);
return 0;
}
/* Add up to na>-num_rx_desc-1 buffers to this RX virtqueue.
* It's important to leave one virtqueue slot free, otherwise
* we can run into ring->cur/ring->tail wraparounds.
*/
err = vtnet_refill_rxq(kring, 0, na->num_rx_desc-1);
if (err < 0)
return 0;
virtqueue_notify(vq);
}
return 1;
}
/* Update the virtio-net device configurations. Number of queues can
* change dinamically, by 'ethtool --set-channels $IFNAME combined $N'.
* This is actually the only way virtio-net can currently enable
* the multiqueue mode.
* XXX note that we seem to lose packets if the netmap ring has more
* slots than the queue
*/
static int
vtnet_netmap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
u_int *rxr, u_int *rxd)
{
struct ifnet *ifp = na->ifp;
struct SOFTC_T *sc = ifp->if_softc;
*txr = *rxr = sc->vtnet_max_vq_pairs;
*rxd = 512; // sc->vtnet_rx_nmbufs;
*txd = *rxd; // XXX
D("vtnet config txq=%d, txd=%d rxq=%d, rxd=%d",
*txr, *txd, *rxr, *rxd);
return 0;
}
static void
vtnet_netmap_attach(struct SOFTC_T *sc)
{
struct netmap_adapter na;
bzero(&na, sizeof(na));
na.ifp = sc->vtnet_ifp;
na.num_tx_desc = 1024;// sc->vtnet_rx_nmbufs;
na.num_rx_desc = 1024; // sc->vtnet_rx_nmbufs;
na.nm_register = vtnet_netmap_reg;
na.nm_txsync = vtnet_netmap_txsync;
na.nm_rxsync = vtnet_netmap_rxsync;
na.nm_config = vtnet_netmap_config;
na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs;
D("max rings %d", sc->vtnet_max_vq_pairs);
netmap_attach(&na);
D("virtio attached txq=%d, txd=%d rxq=%d, rxd=%d",
na.num_tx_rings, na.num_tx_desc,
na.num_tx_rings, na.num_rx_desc);
}
/* end of file */

View File

@ -153,10 +153,10 @@ ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
* methods should be handled by the individual drivers.
*/
static int
ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
@ -171,7 +171,7 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
struct tx_ring *txr = &adapter->tx_rings[kring->ring_id];
int reclaim_tx;
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
@ -223,7 +223,7 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
void *addr = PNMB(na, slot, &paddr);
/* device-specific */
union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i];
@ -236,11 +236,11 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
__builtin_prefetch(&ring->slot[nm_i + 1]);
__builtin_prefetch(&txr->tx_buffers[nic_i + 1]);
NM_CHECK_ADDR_LEN(addr, len);
NM_CHECK_ADDR_LEN(na, addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
netmap_reload_map(na, txr->txtag, txbuf->map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
@ -309,7 +309,7 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* REPORT_STATUS in a few slots so TDH is the only
* good way.
*/
nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(kring->ring_id));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
@ -341,10 +341,10 @@ ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
* of whether or not we received an interrupt.
*/
static int
ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
@ -355,7 +355,7 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
struct rx_ring *rxr = &adapter->rx_rings[kring->ring_id];
if (head > lim)
return netmap_ring_reinit(kring);
@ -425,17 +425,17 @@ ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
for (n = 0; nm_i != head; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
void *addr = PNMB(na, slot, &paddr);
union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
netmap_reload_map(na, rxr->ptag, rxbuf->pmap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->wb.upper.status_error = 0;

File diff suppressed because it is too large Load Diff

View File

@ -50,6 +50,9 @@
#include <sys/selinfo.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_types.h> /* IFT_ETHER */
#include <net/ethernet.h> /* ether_ifdetach */
#include <net/if_dl.h> /* LLADDR */
#include <machine/bus.h> /* bus_dmamap_* */
#include <netinet/in.h> /* in6_cksum_pseudo() */
#include <machine/in_cksum.h> /* in_pseudo(), in_cksum_hdr() */
@ -91,8 +94,7 @@ nm_csum_fold(rawsum_t cur_sum)
return htobe16((~cur_sum) & 0xFFFF);
}
uint16_t
nm_csum_ipv4(struct nm_iphdr *iph)
uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
{
#if 0
return in_cksum_hdr((void *)iph);
@ -148,8 +150,7 @@ nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
int
netmap_catch_rx(struct netmap_adapter *na, int intercept)
{
struct netmap_generic_adapter *gna =
(struct netmap_generic_adapter *)na;
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
struct ifnet *ifp = na->ifp;
if (intercept) {
@ -221,9 +222,9 @@ generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
* (and eventually, just reference the netmap buffer)
*/
if (*m->m_ext.ext_cnt != 1) {
if (GET_MBUF_REFCNT(m) != 1) {
D("invalid refcnt %d for %p",
*m->m_ext.ext_cnt, m);
GET_MBUF_REFCNT(m), m);
panic("in generic_xmit_frame");
}
// XXX the ext_size check is unnecessary if we link the netmap buf
@ -238,7 +239,7 @@ generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
}
m->m_len = m->m_pkthdr.len = len;
// inc refcount. All ours, we could skip the atomic
atomic_fetchadd_int(m->m_ext.ext_cnt, 1);
atomic_fetchadd_int(PNT_MBUF_REFCNT(m), 1);
m->m_flags |= M_FLOWID;
m->m_pkthdr.flowid = ring_nr;
m->m_pkthdr.rcvif = ifp; /* used for tx notification */
@ -277,10 +278,11 @@ generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
void
netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *na)
netmap_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na)
{
ND("called");
mit->mit_pending = 0;
mit->mit_ring_idx = idx;
mit->mit_na = na;
}
@ -313,6 +315,135 @@ netmap_mitigation_cleanup(struct nm_generic_mit *mit)
ND("called");
}
static int
nm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
{
return EINVAL;
}
static void
nm_vi_start(struct ifnet *ifp)
{
panic("nm_vi_start() must not be called");
}
/*
* Index manager of persistent virtual interfaces.
* It is used to decide the lowest byte of the MAC address.
* We use the same algorithm with management of bridge port index.
*/
#define NM_VI_MAX 255
static struct {
uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */
uint8_t active;
struct mtx lock;
} nm_vi_indices;
void
nm_vi_init_index(void)
{
int i;
for (i = 0; i < NM_VI_MAX; i++)
nm_vi_indices.index[i] = i;
nm_vi_indices.active = 0;
mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF);
}
/* return -1 if no index available */
static int
nm_vi_get_index(void)
{
int ret;
mtx_lock(&nm_vi_indices.lock);
ret = nm_vi_indices.active == NM_VI_MAX ? -1 :
nm_vi_indices.index[nm_vi_indices.active++];
mtx_unlock(&nm_vi_indices.lock);
return ret;
}
static void
nm_vi_free_index(uint8_t val)
{
int i, lim;
mtx_lock(&nm_vi_indices.lock);
lim = nm_vi_indices.active;
for (i = 0; i < lim; i++) {
if (nm_vi_indices.index[i] == val) {
/* swap index[lim-1] and j */
int tmp = nm_vi_indices.index[lim-1];
nm_vi_indices.index[lim-1] = val;
nm_vi_indices.index[i] = tmp;
nm_vi_indices.active--;
break;
}
}
if (lim == nm_vi_indices.active)
D("funny, index %u didn't found", val);
mtx_unlock(&nm_vi_indices.lock);
}
#undef NM_VI_MAX
/*
* Implementation of a netmap-capable virtual interface that
* registered to the system.
* It is based on if_tap.c and ip_fw_log.c in FreeBSD 9.
*
* Note: Linux sets refcount to 0 on allocation of net_device,
* then increments it on registration to the system.
* FreeBSD sets refcount to 1 on if_alloc(), and does not
* increment this refcount on if_attach().
*/
int
nm_vi_persist(const char *name, struct ifnet **ret)
{
struct ifnet *ifp;
u_short macaddr_hi;
uint32_t macaddr_mid;
u_char eaddr[6];
int unit = nm_vi_get_index(); /* just to decide MAC address */
if (unit < 0)
return EBUSY;
/*
* We use the same MAC address generation method with tap
* except for the highest octet is 00:be instead of 00:bd
*/
macaddr_hi = htons(0x00be); /* XXX tap + 1 */
macaddr_mid = (uint32_t) ticks;
bcopy(&macaddr_hi, eaddr, sizeof(short));
bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t));
eaddr[5] = (uint8_t)unit;
ifp = if_alloc(IFT_ETHER);
if (ifp == NULL) {
D("if_alloc failed");
return ENOMEM;
}
if_initname(ifp, name, IF_DUNIT_NONE);
ifp->if_mtu = 65536;
ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_init = (void *)nm_vi_dummy;
ifp->if_ioctl = nm_vi_dummy;
ifp->if_start = nm_vi_start;
ifp->if_mtu = ETHERMTU;
IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
ifp->if_capabilities |= IFCAP_LINKSTATE;
ifp->if_capenable |= IFCAP_LINKSTATE;
ether_ifattach(ifp, eaddr);
*ret = ifp;
return 0;
}
/* unregister from the system and drop the final refcount */
void
nm_vi_detach(struct ifnet *ifp)
{
nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]);
ether_ifdetach(ifp);
if_free(ifp);
}
/*
* In order to track whether pages are still mapped, we hook into

View File

@ -102,51 +102,42 @@ __FBSDID("$FreeBSD$");
* mbuf wrappers
*/
/*
* mbuf destructor, also need to change the type to EXT_EXTREF,
/* mbuf destructor, also need to change the type to EXT_EXTREF,
* add an M_NOFREE flag, and then clear the flag and
* chain into uma_zfree(zone_pack, mf)
* (or reinstall the buffer ?)
*
* On FreeBSD 9 the destructor is called as ext_free(ext_arg1, ext_arg2)
* whereas newer version have ext_free(m, ext_arg1, ext_arg2)
* For compatibility we set ext_arg1 = m on allocation so we have
* the same code on both.
*/
#define SET_MBUF_DESTRUCTOR(m, fn) do { \
(m)->m_ext.ext_free = (void *)fn; \
(m)->m_ext.ext_type = EXT_EXTREF; \
} while (0)
(m)->m_ext.ext_free = (void *)fn; \
(m)->m_ext.ext_type = EXT_EXTREF; \
} while (0)
static void
static void
netmap_default_mbuf_destructor(struct mbuf *m)
{
/* restore original data pointer and type */
m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg2;
{
/* restore original mbuf */
m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1;
m->m_ext.ext_arg1 = NULL;
m->m_ext.ext_type = EXT_PACKET;
m->m_ext.ext_free = NULL;
m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL;
if (*(m->m_ext.ext_cnt) == 0)
*(m->m_ext.ext_cnt) = 1;
if (GET_MBUF_REFCNT(m) == 0)
SET_MBUF_REFCNT(m, 1);
uma_zfree(zone_pack, m);
}
}
static inline struct mbuf *
netmap_get_mbuf(int len)
{
static inline struct mbuf *
netmap_get_mbuf(int len)
{
struct mbuf *m;
m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR | M_NOFREE);
if (m) {
m->m_ext.ext_arg1 = m; /* FreeBSD 9 compat */
m->m_ext.ext_arg2 = m->m_ext.ext_buf; /* save original */
m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save
m->m_ext.ext_free = (void *)netmap_default_mbuf_destructor;
m->m_ext.ext_type = EXT_EXTREF;
ND(5, "create m %p refcnt %d", m, *m->m_ext.ext_cnt);
ND(5, "create m %p refcnt %d", m, GET_MBUF_REFCNT(m));
}
return m;
}
#define GET_MBUF_REFCNT(m) ((m)->m_ext.ext_cnt ? *(m)->m_ext.ext_cnt : -1)
}
@ -158,8 +149,6 @@ netmap_get_mbuf(int len)
#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */
#include <linux/hrtimer.h>
//#define RATE /* Enables communication statistics. */
//#define REG_RESET
#endif /* linux */
@ -174,7 +163,7 @@ netmap_get_mbuf(int len)
/* ======================== usage stats =========================== */
#ifdef RATE
#ifdef RATE_GENERIC
#define IFRATE(x) x
struct rate_stats {
unsigned long txpkt;
@ -218,23 +207,33 @@ static void rate_callback(unsigned long arg)
static struct rate_context rate_ctx;
void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi)
{
if (txp) rate_ctx.new.txpkt++;
if (txs) rate_ctx.new.txsync++;
if (txi) rate_ctx.new.txirq++;
if (rxp) rate_ctx.new.rxpkt++;
if (rxs) rate_ctx.new.rxsync++;
if (rxi) rate_ctx.new.rxirq++;
}
#else /* !RATE */
#define IFRATE(x)
#endif /* !RATE */
/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
#define GENERIC_BUF_SIZE netmap_buf_size /* Size of the mbufs in the Tx pool. */
/*
* Wrapper used by the generic adapter layer to notify
* the poller threads. Differently from netmap_rx_irq(), we check
* only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq.
* only NAF_NETMAP_ON instead of NAF_NATIVE_ON to enable the irq.
*/
static void
netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
{
if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
struct netmap_adapter *na = NA(ifp);
if (unlikely(!nm_netmap_on(na)))
return;
netmap_common_irq(ifp, q, work_done);
@ -245,7 +244,6 @@ netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
static int
generic_netmap_register(struct netmap_adapter *na, int enable)
{
struct ifnet *ifp = na->ifp;
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
struct mbuf *m;
int error;
@ -271,7 +269,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
goto out;
}
for (r=0; r<na->num_rx_rings; r++)
netmap_mitigation_init(&gna->mit[r], na);
netmap_mitigation_init(&gna->mit[r], r, na);
/* Initialize the rx queue, as generic_rx_handler() can
* be called as soon as netmap_catch_rx() returns.
@ -296,7 +294,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
for (i=0; i<na->num_tx_desc; i++)
na->tx_rings[r].tx_pool[i] = NULL;
for (i=0; i<na->num_tx_desc; i++) {
m = netmap_get_mbuf(GENERIC_BUF_SIZE);
m = netmap_get_mbuf(NETMAP_BUF_SIZE(na));
if (!m) {
D("tx_pool[%d] allocation failed", i);
error = ENOMEM;
@ -312,14 +310,14 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
D("netdev_rx_handler_register() failed (%d)", error);
goto register_handler;
}
ifp->if_capenable |= IFCAP_NETMAP;
na->na_flags |= NAF_NETMAP_ON;
/* Make netmap control the packet steering. */
netmap_catch_tx(gna, 1);
rtnl_unlock();
#ifdef RATE
#ifdef RATE_GENERIC
if (rate_ctx.refcount == 0) {
D("setup_timer()");
memset(&rate_ctx, 0, sizeof(rate_ctx));
@ -338,7 +336,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
error handling code below. */
rtnl_lock();
ifp->if_capenable &= ~IFCAP_NETMAP;
na->na_flags &= ~NAF_NETMAP_ON;
/* Release packet steering control. */
netmap_catch_tx(gna, 0);
@ -365,7 +363,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
free(na->tx_rings[r].tx_pool, M_DEVBUF);
}
#ifdef RATE
#ifdef RATE_GENERIC
if (--rate_ctx.refcount == 0) {
D("del_timer()");
del_timer(&rate_ctx.timer);
@ -421,6 +419,8 @@ generic_mbuf_destructor(struct mbuf *m)
IFRATE(rate_ctx.new.txirq++);
}
extern int netmap_adaptive_io;
/* Record completed transmissions and update hwtail.
*
* The oldest tx buffer not yet completed is at nr_hwtail + 1,
@ -440,7 +440,7 @@ generic_netmap_tx_clean(struct netmap_kring *kring)
if (unlikely(m == NULL)) {
/* this is done, try to replenish the entry */
tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(kring->na));
if (unlikely(m == NULL)) {
D("mbuf allocation failed, XXX error");
// XXX how do we proceed ? break ?
@ -451,6 +451,23 @@ generic_netmap_tx_clean(struct netmap_kring *kring)
}
n++;
nm_i = nm_next(nm_i, lim);
#if 0 /* rate adaptation */
if (netmap_adaptive_io > 1) {
if (n >= netmap_adaptive_io)
break;
} else if (netmap_adaptive_io) {
/* if hwcur - nm_i < lim/8 do an early break
* so we prevent the sender from stalling. See CVT.
*/
if (hwcur >= nm_i) {
if (hwcur - nm_i < lim/2)
break;
} else {
if (hwcur + lim + 1 - nm_i < lim/2)
break;
}
}
#endif
}
kring->nr_hwtail = nm_prev(nm_i, lim);
ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
@ -530,14 +547,15 @@ generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
* since it implements the TX flow control (and takes some locks).
*/
static int
generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
generic_netmap_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */ // j
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
u_int ring_nr = kring->ring_id;
IFRATE(rate_ctx.new.txsync++);
@ -553,19 +571,19 @@ generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
while (nm_i != head) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
void *addr = NMB(slot);
void *addr = NMB(na, slot);
/* device-specific */
struct mbuf *m;
int tx_ret;
NM_CHECK_ADDR_LEN(addr, len);
NM_CHECK_ADDR_LEN(na, addr, len);
/* Tale a mbuf from the tx pool and copy in the user packet. */
m = kring->tx_pool[nm_i];
if (unlikely(!m)) {
RD(5, "This should never happen");
kring->tx_pool[nm_i] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
kring->tx_pool[nm_i] = m = netmap_get_mbuf(NETMAP_BUF_SIZE(na));
if (unlikely(m == NULL)) {
D("mbuf allocation failed");
break;
@ -580,7 +598,7 @@ generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
*/
tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
if (unlikely(tx_ret)) {
RD(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
ND(5, "start_xmit failed: err %d [nm_i %u, head %u, hwtail %u]",
tx_ret, nm_i, head, kring->nr_hwtail);
/*
* No room for this mbuf in the device driver.
@ -686,10 +704,10 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
* Access must be protected because the rx handler is asynchronous,
*/
static int
generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
generic_netmap_rxsync(struct netmap_kring *kring, int flags)
{
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
struct netmap_adapter *na = kring->na;
u_int nm_i; /* index into the netmap ring */ //j,
u_int n;
u_int const lim = kring->nkr_num_slots - 1;
@ -712,11 +730,11 @@ generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
nm_i = kring->nr_hwtail; /* first empty slot in the receive ring */
for (n = 0; nm_i != stop_i; n++) {
int len;
void *addr = NMB(&ring->slot[nm_i]);
void *addr = NMB(na, &ring->slot[nm_i]);
struct mbuf *m;
/* we only check the address here on generic rx rings */
if (addr == netmap_buffer_base) { /* Bad buffer */
if (addr == NETMAP_BUF_BASE(na)) { /* Bad buffer */
return netmap_ring_reinit(kring);
}
/*
@ -823,7 +841,7 @@ generic_netmap_attach(struct ifnet *ifp)
na->nm_txsync = &generic_netmap_txsync;
na->nm_rxsync = &generic_netmap_rxsync;
na->nm_dtor = &generic_netmap_dtor;
/* when using generic, IFCAP_NETMAP is set so we force
/* when using generic, NAF_NETMAP_ON is set so we force
* NAF_SKIP_INTR to use the regular interrupt handler
*/
na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;

View File

@ -36,6 +36,7 @@
#define WITH_VALE // comment out to disable VALE support
#define WITH_PIPES
#define WITH_MONITOR
#if defined(__FreeBSD__)
@ -66,11 +67,23 @@
struct netmap_adapter *netmap_getna(if_t ifp);
#endif
#if __FreeBSD_version >= 1100027
#define GET_MBUF_REFCNT(m) ((m)->m_ext.ext_cnt ? *((m)->m_ext.ext_cnt) : -1)
#define SET_MBUF_REFCNT(m, x) *((m)->m_ext.ext_cnt) = x
#define PNT_MBUF_REFCNT(m) ((m)->m_ext.ext_cnt)
#else
#define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *((m)->m_ext.ref_cnt) : -1)
#define SET_MBUF_REFCNT(m, x) *((m)->m_ext.ref_cnt) = x
#define PNT_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt)
#endif
MALLOC_DECLARE(M_NETMAP);
// XXX linux struct, not used in FreeBSD
struct net_device_ops {
};
struct ethtool_ops {
};
struct hrtimer {
};
@ -82,7 +95,7 @@ struct hrtimer {
#define MBUF_IFP(m) ((m)->dev)
#define NM_SEND_UP(ifp, m) \
do { \
m->priority = NM_MAGIC_PRIORITY; \
m->priority = NM_MAGIC_PRIORITY_RX; \
netif_rx(m); \
} while (0)
@ -100,18 +113,6 @@ struct hrtimer {
#define DEV_NETMAP
#endif /* DEV_NETMAP */
/*
* IFCAP_NETMAP goes into net_device's priv_flags (if_capenable).
* This was 16 bits up to linux 2.6.36, so we need a 16 bit value on older
* platforms and tolerate the clash with IFF_DYNAMIC and IFF_BRIDGE_PORT.
* For the 32-bit value, 0x100000 has no clashes until at least 3.5.1
*/
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
#define IFCAP_NETMAP 0x8000
#else
#define IFCAP_NETMAP 0x200000
#endif
#elif defined (__APPLE__)
#warning apple support is incomplete.
@ -215,7 +216,7 @@ extern NMG_LOCK_T netmap_global_lock;
* rxsync_from_host() and netmap_transmit(). The mbq is protected
* by its internal lock.
*
* RX rings attached to the VALE switch are accessed by both sender
* RX rings attached to the VALE switch are accessed by both senders
* and receiver. They are protected through the q_lock on the RX ring.
*/
struct netmap_kring {
@ -266,7 +267,13 @@ struct netmap_kring {
uint32_t nkr_hwlease;
uint32_t nkr_lease_idx;
volatile int nkr_stopped; // XXX what for ?
/* while nkr_stopped is set, no new [tr]xsync operations can
* be started on this kring.
* This is used by netmap_disable_all_rings()
* to find a synchronization point where critical data
* structures pointed to by the kring can be added or removed
*/
volatile int nkr_stopped;
/* Support for adapters without native netmap support.
* On tx rings we preallocate an array of tx buffers
@ -281,13 +288,40 @@ struct netmap_kring {
uint32_t ring_id; /* debugging */
char name[64]; /* diagnostic */
/* [tx]sync callback for this kring.
* The default nm_kring_create callback (netmap_krings_create)
* sets the nm_sync callback of each hardware tx(rx) kring to
* the corresponding nm_txsync(nm_rxsync) taken from the
* netmap_adapter; moreover, it sets the sync callback
* of the host tx(rx) ring to netmap_txsync_to_host
* (netmap_rxsync_from_host).
*
* Overrides: the above configuration is not changed by
* any of the nm_krings_create callbacks.
*/
int (*nm_sync)(struct netmap_kring *kring, int flags);
#ifdef WITH_PIPES
struct netmap_kring *pipe;
struct netmap_ring *save_ring;
struct netmap_kring *pipe; /* if this is a pipe ring,
* pointer to the other end
*/
struct netmap_ring *save_ring; /* pointer to hidden rings
* (see netmap_pipe.c for details)
*/
#endif /* WITH_PIPES */
#ifdef WITH_MONITOR
/* pointer to the adapter that is monitoring this kring (if any)
*/
struct netmap_monitor_adapter *monitor;
/*
* Monitors work by intercepting the txsync and/or rxsync of the
* monitored krings. This is implemented by replacing
* the nm_sync pointer above and saving the previous
* one in save_sync below.
*/
int (*save_sync)(struct netmap_kring *kring, int flags);
#endif
} __attribute__((__aligned__(64)));
@ -360,6 +394,8 @@ tail->| |<-hwtail | |<-hwlease
enum txrx { NR_RX = 0, NR_TX = 1 };
struct netmap_vp_adapter; // forward
/*
* The "struct netmap_adapter" extends the "struct adapter"
* (or equivalent) device descriptor.
@ -390,13 +426,19 @@ struct netmap_adapter {
* deallocation of the memory allocator
*/
#define NAF_NATIVE_ON 16 /* the adapter is native and the attached
* interface is in netmap mode
* interface is in netmap mode.
* Virtual ports (vale, pipe, monitor...)
* should never use this flag.
*/
#define NAF_NETMAP_ON 32 /* netmap is active (either native or
* emulated. Where possible (e.g. FreeBSD)
* emulated). Where possible (e.g. FreeBSD)
* IFCAP_NETMAP also mirrors this flag.
*/
#define NAF_HOST_RINGS 64 /* the adapter supports the host rings */
#define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */
#define NAF_BUSY (1U<<31) /* the adapter is used internally and
* cannot be registered from userspace
*/
int active_fds; /* number of user-space descriptors using this
interface, which is equal to the number of
struct netmap_if objs in the mapped region. */
@ -423,6 +465,8 @@ struct netmap_adapter {
/* count users of the global wait queues */
int tx_si_users, rx_si_users;
void *pdev; /* used to store pci device */
/* copy of if_qflush and if_transmit pointers, to intercept
* packets from the network stack when netmap is active.
*/
@ -444,7 +488,7 @@ struct netmap_adapter {
*
* nm_register() is called on NIOCREGIF and close() to enter
* or exit netmap mode on the NIC
* Called with NMG_LOCK held.
* Called with NNG_LOCK held.
*
* nm_txsync() pushes packets to the underlying hw/switch
*
@ -453,14 +497,20 @@ struct netmap_adapter {
* nm_config() returns configuration information from the OS
* Called with NMG_LOCK held.
*
* nm_krings_create() create and init the krings array
* (the array layout must conform to the description
* found above the definition of netmap_krings_create)
* nm_krings_create() create and init the tx_rings and
* rx_rings arrays of kring structures. In particular,
* set the nm_sync callbacks for each ring.
* There is no need to also allocate the corresponding
* netmap_rings, since netmap_mem_rings_create() will always
* be called to provide the missing ones.
* Called with NNG_LOCK held.
*
* nm_krings_delete() cleanup and delete the kring array
* nm_krings_delete() cleanup and delete the tx_rings and rx_rings
* arrays
* Called with NMG_LOCK held.
*
* nm_notify() is used to act after data have become available
* (or the stopped state of the ring has changed)
* (or the stopped state of the ring has changed)
* For hw devices this is typically a selwakeup(),
* but for NIC/host ports attached to a switch (or vice-versa)
* we also need to invoke the 'txsync' code downstream.
@ -469,8 +519,8 @@ struct netmap_adapter {
int (*nm_register)(struct netmap_adapter *, int onoff);
int (*nm_txsync)(struct netmap_adapter *, u_int ring, int flags);
int (*nm_rxsync)(struct netmap_adapter *, u_int ring, int flags);
int (*nm_txsync)(struct netmap_kring *kring, int flags);
int (*nm_rxsync)(struct netmap_kring *kring, int flags);
#define NAF_FORCE_READ 1
#define NAF_FORCE_RECLAIM 2
/* return configuration information */
@ -480,7 +530,35 @@ struct netmap_adapter {
void (*nm_krings_delete)(struct netmap_adapter *);
int (*nm_notify)(struct netmap_adapter *,
u_int ring, enum txrx, int flags);
#define NAF_DISABLE_NOTIFY 8
#define NAF_DISABLE_NOTIFY 8 /* notify that the stopped state of the
* ring has changed (kring->nkr_stopped)
*/
#ifdef WITH_VALE
/*
* nm_bdg_attach() initializes the na_vp field to point
* to an adapter that can be attached to a VALE switch. If the
* current adapter is already a VALE port, na_vp is simply a cast;
* otherwise, na_vp points to a netmap_bwrap_adapter.
* If applicable, this callback also initializes na_hostvp,
* that can be used to connect the adapter host rings to the
* switch.
* Called with NMG_LOCK held.
*
* nm_bdg_ctl() is called on the actual attach/detach to/from
* to/from the switch, to perform adapter-specific
* initializations
* Called with NMG_LOCK held.
*/
int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *);
int (*nm_bdg_ctl)(struct netmap_adapter *, struct nmreq *, int);
/* adapter used to attach this adapter to a VALE switch (if any) */
struct netmap_vp_adapter *na_vp;
/* adapter used to attach the host rings of this adapter
* to a VALE switch (if any) */
struct netmap_vp_adapter *na_hostvp;
#endif
/* standard refcount to control the lifetime of the adapter
* (it should be equal to the lifetime of the corresponding ifp)
@ -494,17 +572,22 @@ struct netmap_adapter {
struct netmap_mem_d *nm_mem;
struct lut_entry *na_lut;
uint32_t na_lut_objtotal; /* max buffer index */
uint32_t na_lut_objsize; /* buffer size */
/* used internally. If non-null, the interface cannot be bound
* from userspace
/* additional information attached to this adapter
* by other netmap subsystems. Currently used by
* bwrap and LINUX/v1000.
*/
void *na_private;
#ifdef WITH_PIPES
/* array of pipes that have this adapter as a parent */
struct netmap_pipe_adapter **na_pipes;
int na_next_pipe;
int na_max_pipes;
int na_next_pipe; /* next free slot in the array */
int na_max_pipes; /* size of the array */
#endif /* WITH_PIPES */
char name[64];
};
@ -514,9 +597,9 @@ struct netmap_adapter {
* if the NIC is owned by a user, only users can share it.
* Evaluation must be done under NMG_LOCK().
*/
#define NETMAP_OWNED_BY_KERN(na) (na->na_private)
#define NETMAP_OWNED_BY_KERN(na) ((na)->na_flags & NAF_BUSY)
#define NETMAP_OWNED_BY_ANY(na) \
(NETMAP_OWNED_BY_KERN(na) || (na->active_fds > 0))
(NETMAP_OWNED_BY_KERN(na) || ((na)->active_fds > 0))
/*
@ -546,12 +629,17 @@ struct netmap_hw_adapter { /* physical device */
struct netmap_adapter up;
struct net_device_ops nm_ndo; // XXX linux only
struct ethtool_ops nm_eto; // XXX linux only
const struct ethtool_ops* save_ethtool;
int (*nm_hw_register)(struct netmap_adapter *, int onoff);
};
/* Mitigation support. */
struct nm_generic_mit {
struct hrtimer mit_timer;
int mit_pending;
int mit_ring_idx; /* index of the ring being mitigated */
struct netmap_adapter *mit_na; /* backpointer */
};
@ -641,16 +729,19 @@ struct netmap_bwrap_adapter {
/* backup of the hwna notify callback */
int (*save_notify)(struct netmap_adapter *,
u_int ring, enum txrx, int flags);
/* backup of the hwna memory allocator */
struct netmap_mem_d *save_nmd;
/*
* When we attach a physical interface to the bridge, we
* allow the controlling process to terminate, so we need
* a place to store the netmap_priv_d data structure.
* a place to store the n_detmap_priv_d data structure.
* This is only done when physical interfaces
* are attached to a bridge.
*/
struct netmap_priv_d *na_kpriv;
};
int netmap_bwrap_attach(const char *name, struct netmap_adapter *);
#endif /* WITH_VALE */
@ -747,12 +838,11 @@ static __inline int nm_kr_tryget(struct netmap_kring *kr)
* netmap_load_map/netmap_reload_map are helper routines to set/reset
* the dmamap for a packet buffer
*
* netmap_reset() is a helper routine to be called in the driver
* when reinitializing a ring.
* netmap_reset() is a helper routine to be called in the hw driver
* when reinitializing a ring. It should not be called by
* virtual ports (vale, pipes, monitor)
*/
int netmap_attach(struct netmap_adapter *);
int netmap_attach_common(struct netmap_adapter *);
void netmap_detach_common(struct netmap_adapter *na);
void netmap_detach(struct ifnet *);
int netmap_transmit(struct ifnet *, struct mbuf *);
struct netmap_slot *netmap_reset(struct netmap_adapter *na,
@ -764,10 +854,33 @@ int netmap_rx_irq(struct ifnet *, u_int, u_int *);
#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
void netmap_common_irq(struct ifnet *, u_int, u_int *work_done);
void netmap_disable_all_rings(struct ifnet *);
void netmap_enable_all_rings(struct ifnet *);
void netmap_disable_ring(struct netmap_kring *kr);
#ifdef WITH_VALE
/* functions used by external modules to interface with VALE */
#define netmap_vp_to_ifp(_vp) ((_vp)->up.ifp)
#define netmap_ifp_to_vp(_ifp) (NA(_ifp)->na_vp)
#define netmap_ifp_to_host_vp(_ifp) (NA(_ifp)->na_hostvp)
#define netmap_bdg_idx(_vp) ((_vp)->bdg_port)
const char *netmap_bdg_name(struct netmap_vp_adapter *);
#else /* !WITH_VALE */
#define netmap_vp_to_ifp(_vp) NULL
#define netmap_ifp_to_vp(_ifp) NULL
#define netmap_ifp_to_host_vp(_ifp) NULL
#define netmap_bdg_idx(_vp) -1
#define netmap_bdg_name(_vp) NULL
#endif /* WITH_VALE */
static inline int
nm_native_on(struct netmap_adapter *na)
{
return na && na->na_flags & NAF_NATIVE_ON;
}
static inline int
nm_netmap_on(struct netmap_adapter *na)
{
return na && na->na_flags & NAF_NETMAP_ON;
}
/* set/clear native flags and if_transmit/netdev_ops */
static inline void
@ -785,6 +898,8 @@ nm_set_native_flags(struct netmap_adapter *na)
#else
na->if_transmit = (void *)ifp->netdev_ops;
ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops;
ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto;
#endif
}
@ -798,6 +913,7 @@ nm_clear_native_flags(struct netmap_adapter *na)
ifp->if_transmit = na->if_transmit;
#else
ifp->netdev_ops = (void *)na->if_transmit;
ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
#endif
na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON);
#ifdef IFCAP_NETMAP /* or FreeBSD ? */
@ -858,30 +974,72 @@ nm_rxsync_finalize(struct netmap_kring *kring)
/* check/fix address and len in tx rings */
#if 1 /* debug version */
#define NM_CHECK_ADDR_LEN(_a, _l) do { \
if (_a == netmap_buffer_base || _l > NETMAP_BUF_SIZE) { \
#define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \
if (_a == NETMAP_BUF_BASE(_na) || _l > NETMAP_BUF_SIZE(_na)) { \
RD(5, "bad addr/len ring %d slot %d idx %d len %d", \
ring_nr, nm_i, slot->buf_idx, len); \
if (_l > NETMAP_BUF_SIZE) \
_l = NETMAP_BUF_SIZE; \
kring->ring_id, nm_i, slot->buf_idx, len); \
if (_l > NETMAP_BUF_SIZE(_na)) \
_l = NETMAP_BUF_SIZE(_na); \
} } while (0)
#else /* no debug version */
#define NM_CHECK_ADDR_LEN(_a, _l) do { \
if (_l > NETMAP_BUF_SIZE) \
_l = NETMAP_BUF_SIZE; \
#define NM_CHECK_ADDR_LEN(_na, _a, _l) do { \
if (_l > NETMAP_BUF_SIZE(_na)) \
_l = NETMAP_BUF_SIZE(_na); \
} while (0)
#endif
/*---------------------------------------------------------------*/
/*
* Support routines to be used with the VALE switch
* Support routines used by netmap subsystems
* (native drivers, VALE, generic, pipes, monitors, ...)
*/
/* common routine for all functions that create a netmap adapter. It performs
* two main tasks:
* - if the na points to an ifp, mark the ifp as netmap capable
* using na as its native adapter;
* - provide defaults for the setup callbacks and the memory allocator
*/
int netmap_attach_common(struct netmap_adapter *);
/* common actions to be performed on netmap adapter destruction */
void netmap_detach_common(struct netmap_adapter *);
/* fill priv->np_[tr]xq{first,last} using the ringid and flags information
* coming from a struct nmreq
*/
int netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags);
/* update the ring parameters (number and size of tx and rx rings).
* It calls the nm_config callback, if available.
*/
int netmap_update_config(struct netmap_adapter *na);
/* create and initialize the common fields of the krings array.
* using the information that must be already available in the na.
* tailroom can be used to request the allocation of additional
* tailroom bytes after the krings array. This is used by
* netmap_vp_adapter's (i.e., VALE ports) to make room for
* leasing-related data structures
*/
int netmap_krings_create(struct netmap_adapter *na, u_int tailroom);
/* deletes the kring array of the adapter. The array must have
* been created using netmap_krings_create
*/
void netmap_krings_delete(struct netmap_adapter *na);
int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
/* set the stopped/enabled status of ring
* When stopping, they also wait for all current activity on the ring to
* terminate. The status change is then notified using the na nm_notify
* callback.
*/
void netmap_set_txring(struct netmap_adapter *, u_int ring_id, int stopped);
void netmap_set_rxring(struct netmap_adapter *, u_int ring_id, int stopped);
/* set the stopped/enabled status of all rings of the adapter. */
void netmap_set_all_rings(struct netmap_adapter *, int stopped);
/* convenience wrappers for netmap_set_all_rings, used in drivers */
void netmap_disable_all_rings(struct ifnet *);
void netmap_enable_all_rings(struct ifnet *);
int netmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait);
struct netmap_if *
netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
@ -904,10 +1062,18 @@ int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
* XXX in practice "unknown" might be handled same as broadcast.
*/
typedef u_int (*bdg_lookup_fn_t)(char *buf, u_int len,
uint8_t *ring_nr, struct netmap_vp_adapter *);
u_int netmap_bdg_learning(char *, u_int, uint8_t *,
struct netmap_vp_adapter *);
typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
const struct netmap_vp_adapter *);
typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
struct netmap_bdg_ops {
bdg_lookup_fn_t lookup;
bdg_config_fn_t config;
bdg_dtor_fn_t dtor;
};
u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
const struct netmap_vp_adapter *);
#define NM_BDG_MAXPORTS 254 /* up to 254 */
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
@ -915,11 +1081,11 @@ u_int netmap_bdg_learning(char *, u_int, uint8_t *,
#define NM_NAME "vale" /* prefix for bridge port name */
/* these are redefined in case of no VALE support */
int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
void netmap_init_bridges(void);
int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func);
int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops);
int netmap_bdg_config(struct nmreq *nmr);
#else /* !WITH_VALE */
#define netmap_get_bdg_na(_1, _2, _3) 0
@ -941,6 +1107,12 @@ int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create
#define netmap_get_pipe_na(_1, _2, _3) 0
#endif
#ifdef WITH_MONITOR
int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
#else
#define netmap_get_monitor_na(_1, _2, _3) 0
#endif
/* Various prototypes */
int netmap_poll(struct cdev *dev, int events, struct thread *td);
int netmap_init(void);
@ -952,7 +1124,6 @@ int netmap_dtor_locked(struct netmap_priv_d *priv);
int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td);
/* netmap_adapter creation/destruction */
#define NM_IFPNAME(ifp) ((ifp) ? (ifp)->if_xname : "zombie")
// #define NM_DEBUG_PUTGET 1
@ -965,7 +1136,7 @@ void __netmap_adapter_get(struct netmap_adapter *na);
#define netmap_adapter_get(na) \
do { \
struct netmap_adapter *__na = na; \
D("getting %p:%s (%d)", __na, NM_IFPNAME(__na->ifp), __na->na_refcount); \
D("getting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount); \
__netmap_adapter_get(__na); \
} while (0)
@ -974,7 +1145,7 @@ int __netmap_adapter_put(struct netmap_adapter *na);
#define netmap_adapter_put(na) \
({ \
struct netmap_adapter *__na = na; \
D("putting %p:%s (%d)", __na, NM_IFPNAME(__na->ifp), __na->na_refcount); \
D("putting %p:%s (%d)", __na, (__na)->name, (__na)->na_refcount); \
__netmap_adapter_put(__na); \
})
@ -990,12 +1161,10 @@ int netmap_adapter_put(struct netmap_adapter *na);
/*
* module variables
*/
extern u_int netmap_buf_size;
#define NETMAP_BUF_SIZE netmap_buf_size // XXX remove
#define NETMAP_BUF_BASE(na) ((na)->na_lut[0].vaddr)
#define NETMAP_BUF_SIZE(na) ((na)->na_lut_objsize)
extern int netmap_mitigate; // XXX not really used
extern int netmap_no_pendintr;
extern u_int netmap_total_buffers; // global allocator
extern char *netmap_buffer_base; // global allocator
extern int netmap_verbose; // XXX debugging
enum { /* verbose flags */
NM_VERB_ON = 1, /* generic verbose */
@ -1055,6 +1224,10 @@ extern int netmap_generic_rings;
#ifdef __FreeBSD__
/* Assigns the device IOMMU domain to an allocator.
* Returns -ENOMEM in case the domain is different */
#define nm_iommu_group_id(dev) (0)
/* Callback invoked by the dma machinery after a successful dmamap_load */
static void netmap_dmamap_cb(__unused void *arg,
__unused bus_dma_segment_t * segs, __unused int nseg, __unused int error)
@ -1065,26 +1238,77 @@ static void netmap_dmamap_cb(__unused void *arg,
* XXX can we do it without a callback ?
*/
static inline void
netmap_load_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
netmap_load_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
if (map)
bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE,
bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
}
static inline void
netmap_unload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map)
{
if (map)
bus_dmamap_unload(tag, map);
}
/* update the map when a buffer changes. */
static inline void
netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
netmap_reload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
if (map) {
bus_dmamap_unload(tag, map);
bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE,
bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
}
}
#else /* linux */
int nm_iommu_group_id(bus_dma_tag_t dev);
extern size_t netmap_mem_get_bufsize(struct netmap_mem_d *);
#include <linux/dma-mapping.h>
static inline void
netmap_load_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
if (map) {
*map = dma_map_single(na->pdev, buf, netmap_mem_get_bufsize(na->nm_mem),
DMA_BIDIRECTIONAL);
}
}
static inline void
netmap_unload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map)
{
u_int sz = netmap_mem_get_bufsize(na->nm_mem);
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
DMA_BIDIRECTIONAL);
}
}
static inline void
netmap_reload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
u_int sz = netmap_mem_get_bufsize(na->nm_mem);
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
DMA_BIDIRECTIONAL);
}
*map = dma_map_single(na->pdev, buf, sz,
DMA_BIDIRECTIONAL);
}
/*
* XXX How do we redefine these functions:
*
@ -1095,8 +1319,7 @@ netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
* unfortunately the direction is not, so we need to change
* something to have a cross API
*/
#define netmap_load_map(_t, _m, _b)
#define netmap_reload_map(_t, _m, _b)
#if 0
struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l];
/* set time_stamp *before* dma to help avoid a possible race */
@ -1165,34 +1388,13 @@ struct lut_entry {
};
struct netmap_obj_pool;
extern struct lut_entry *netmap_buffer_lut;
#define NMB_VA(i) (netmap_buffer_lut[i].vaddr)
#define NMB_PA(i) (netmap_buffer_lut[i].paddr)
/*
* NMB return the virtual address of a buffer (buffer 0 on bad index)
* PNMB also fills the physical address
*/
static inline void *
NMB(struct netmap_slot *slot)
{
uint32_t i = slot->buf_idx;
return (unlikely(i >= netmap_total_buffers)) ? NMB_VA(0) : NMB_VA(i);
}
static inline void *
PNMB(struct netmap_slot *slot, uint64_t *pp)
{
uint32_t i = slot->buf_idx;
void *ret = (i >= netmap_total_buffers) ? NMB_VA(0) : NMB_VA(i);
*pp = (i >= netmap_total_buffers) ? NMB_PA(0) : NMB_PA(i);
return ret;
}
/* Generic version of NMB, which uses device-specific memory. */
static inline void *
BDG_NMB(struct netmap_adapter *na, struct netmap_slot *slot)
NMB(struct netmap_adapter *na, struct netmap_slot *slot)
{
struct lut_entry *lut = na->na_lut;
uint32_t i = slot->buf_idx;
@ -1200,6 +1402,19 @@ BDG_NMB(struct netmap_adapter *na, struct netmap_slot *slot)
lut[0].vaddr : lut[i].vaddr;
}
static inline void *
PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
{
uint32_t i = slot->buf_idx;
struct lut_entry *lut = na->na_lut;
void *ret = (i >= na->na_lut_objtotal) ? lut[0].vaddr : lut[i].vaddr;
*pp = (i >= na->na_lut_objtotal) ? lut[0].paddr : lut[i].paddr;
return ret;
}
/* Generic version of NMB, which uses device-specific memory. */
void netmap_txsync_to_host(struct netmap_adapter *na);
@ -1251,6 +1466,17 @@ struct netmap_priv_d {
struct thread *np_td; /* kqueue, just debugging */
};
#ifdef WITH_MONITOR
struct netmap_monitor_adapter {
struct netmap_adapter up;
struct netmap_priv_d priv;
uint32_t flags;
};
#endif /* WITH_MONITOR */
/*
* generic netmap emulation for devices that do not have
@ -1265,12 +1491,20 @@ int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len,
int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
//#define RATE_GENERIC /* Enables communication statistics for generic. */
#ifdef RATE_GENERIC
void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi);
#else
#define generic_rate(txp, txs, txi, rxp, rxs, rxi)
#endif
/*
* netmap_mitigation API. This is used by the generic adapter
* to reduce the number of interrupt requests/selwakeup
* to clients on incoming packets.
*/
void netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *na);
void netmap_mitigation_init(struct nm_generic_mit *mit, int idx,
struct netmap_adapter *na);
void netmap_mitigation_start(struct nm_generic_mit *mit);
void netmap_mitigation_restart(struct nm_generic_mit *mit);
int netmap_mitigation_active(struct nm_generic_mit *mit);
@ -1378,4 +1612,10 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
struct netmap_vp_adapter *dst_na,
struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
u_int *j, u_int lim, u_int *howmany);
/* persistent virtual port routines */
int nm_vi_persist(const char *, struct ifnet **);
void nm_vi_detach(struct ifnet *);
void nm_vi_init_index(void);
#endif /* _NET_NETMAP_KERN_H_ */

View File

@ -74,6 +74,7 @@ mbq_unlock(struct mbq *q)
mtx_unlock_spin(&q->lock);
}
void mbq_safe_init(struct mbq *q);
void mbq_safe_destroy(struct mbq *q);
void mbq_safe_enqueue(struct mbq *q, struct mbuf *m);

View File

@ -54,6 +54,112 @@ __FBSDID("$FreeBSD$");
#include <dev/netmap/netmap_kern.h>
#include "netmap_mem2.h"
#define NETMAP_BUF_MAX_NUM 20*4096*2 /* large machine */
#define NETMAP_POOL_MAX_NAMSZ 32
enum {
NETMAP_IF_POOL = 0,
NETMAP_RING_POOL,
NETMAP_BUF_POOL,
NETMAP_POOLS_NR
};
struct netmap_obj_params {
u_int size;
u_int num;
};
struct netmap_obj_pool {
char name[NETMAP_POOL_MAX_NAMSZ]; /* name of the allocator */
/* ---------------------------------------------------*/
/* these are only meaningful if the pool is finalized */
/* (see 'finalized' field in netmap_mem_d) */
u_int objtotal; /* actual total number of objects. */
u_int memtotal; /* actual total memory space */
u_int numclusters; /* actual number of clusters */
u_int objfree; /* number of free objects. */
struct lut_entry *lut; /* virt,phys addresses, objtotal entries */
uint32_t *bitmap; /* one bit per buffer, 1 means free */
uint32_t bitmap_slots; /* number of uint32 entries in bitmap */
/* ---------------------------------------------------*/
/* limits */
u_int objminsize; /* minimum object size */
u_int objmaxsize; /* maximum object size */
u_int nummin; /* minimum number of objects */
u_int nummax; /* maximum number of objects */
/* these are changed only by config */
u_int _objtotal; /* total number of objects */
u_int _objsize; /* object size */
u_int _clustsize; /* cluster size */
u_int _clustentries; /* objects per cluster */
u_int _numclusters; /* number of clusters */
/* requested values */
u_int r_objtotal;
u_int r_objsize;
};
#ifdef linux
// XXX a mtx would suffice here 20130415 lr
#define NMA_LOCK_T struct semaphore
#else /* !linux */
#define NMA_LOCK_T struct mtx
#endif /* linux */
typedef int (*netmap_mem_config_t)(struct netmap_mem_d*);
typedef int (*netmap_mem_finalize_t)(struct netmap_mem_d*);
typedef void (*netmap_mem_deref_t)(struct netmap_mem_d*);
typedef uint16_t nm_memid_t;
struct netmap_mem_d {
NMA_LOCK_T nm_mtx; /* protect the allocator */
u_int nm_totalsize; /* shorthand */
u_int flags;
#define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */
int lasterr; /* last error for curr config */
int refcount; /* existing priv structures */
/* the three allocators */
struct netmap_obj_pool pools[NETMAP_POOLS_NR];
netmap_mem_config_t config;
netmap_mem_finalize_t finalize;
netmap_mem_deref_t deref;
nm_memid_t nm_id; /* allocator identifier */
int nm_grp; /* iommu groupd id */
/* list of all existing allocators, sorted by nm_id */
struct netmap_mem_d *prev, *next;
};
/* accessor functions */
struct lut_entry*
netmap_mem_get_lut(struct netmap_mem_d *nmd)
{
return nmd->pools[NETMAP_BUF_POOL].lut;
}
u_int
netmap_mem_get_buftotal(struct netmap_mem_d *nmd)
{
return nmd->pools[NETMAP_BUF_POOL].objtotal;
}
size_t
netmap_mem_get_bufsize(struct netmap_mem_d *nmd)
{
return nmd->pools[NETMAP_BUF_POOL]._objsize;
}
#ifdef linux
#define NMA_LOCK_INIT(n) sema_init(&(n)->nm_mtx, 1)
#define NMA_LOCK_DESTROY(n)
@ -135,6 +241,7 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */
.deref = netmap_mem_global_deref,
.nm_id = 1,
.nm_grp = -1,
.prev = &nm_mem,
.next = &nm_mem,
@ -143,9 +250,6 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */
struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
// XXX logically belongs to nm_mem
struct lut_entry *netmap_buffer_lut; /* exported */
/* blueprint for the private memory allocators */
static int netmap_mem_private_config(struct netmap_mem_d *nmd);
static int netmap_mem_private_finalize(struct netmap_mem_d *nmd);
@ -254,6 +358,25 @@ nm_mem_release_id(struct netmap_mem_d *nmd)
NMA_UNLOCK(&nm_mem);
}
static int
nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
{
int err = 0, id;
id = nm_iommu_group_id(dev);
if (netmap_verbose)
D("iommu_group %d", id);
NMA_LOCK(nmd);
if (nmd->nm_grp < 0)
nmd->nm_grp = id;
if (nmd->nm_grp != id)
nmd->lasterr = err = ENOMEM;
NMA_UNLOCK(nmd);
return err;
}
/*
* First, find the allocator that contains the requested offset,
@ -274,7 +397,7 @@ netmap_mem_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
if (offset >= p[i].memtotal)
continue;
// now lookup the cluster's address
pa = p[i].lut[offset / p[i]._objsize].paddr +
pa = vtophys(p[i].lut[offset / p[i]._objsize].vaddr) +
offset % p[i]._objsize;
NMA_UNLOCK(nmd);
return pa;
@ -300,18 +423,22 @@ netmap_mem_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags,
error = nmd->config(nmd);
if (error)
goto out;
if (nmd->flags & NETMAP_MEM_FINALIZED) {
*size = nmd->nm_totalsize;
} else {
int i;
*size = 0;
for (i = 0; i < NETMAP_POOLS_NR; i++) {
struct netmap_obj_pool *p = nmd->pools + i;
*size += (p->_numclusters * p->_clustsize);
if (size) {
if (nmd->flags & NETMAP_MEM_FINALIZED) {
*size = nmd->nm_totalsize;
} else {
int i;
*size = 0;
for (i = 0; i < NETMAP_POOLS_NR; i++) {
struct netmap_obj_pool *p = nmd->pools + i;
*size += (p->_numclusters * p->_clustsize);
}
}
}
*memflags = nmd->flags;
*id = nmd->nm_id;
if (memflags)
*memflags = nmd->flags;
if (id)
*id = nmd->nm_id;
out:
NMA_UNLOCK(nmd);
return error;
@ -471,12 +598,15 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
vaddr, p->name);
}
#define netmap_mem_bufsize(n) \
((n)->pools[NETMAP_BUF_POOL]._objsize)
#define netmap_if_malloc(n, len) netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL)
#define netmap_if_free(n, v) netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v))
#define netmap_ring_malloc(n, len) netmap_obj_malloc(&(n)->pools[NETMAP_RING_POOL], len, NULL, NULL)
#define netmap_ring_free(n, v) netmap_obj_free_va(&(n)->pools[NETMAP_RING_POOL], (v))
#define netmap_buf_malloc(n, _pos, _index) \
netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], NETMAP_BDG_BUF_SIZE(n), _pos, _index)
netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], netmap_mem_bufsize(n), _pos, _index)
#if 0 // XXX unused
@ -675,7 +805,7 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj
p->r_objtotal = objtotal;
p->r_objsize = objsize;
#define MAX_CLUSTSIZE (1<<17)
#define MAX_CLUSTSIZE (1<<22) // 4 MB
#define LINE_ROUND NM_CACHE_ALIGN // 64
if (objsize >= MAX_CLUSTSIZE) {
/* we could do it but there is no point */
@ -713,15 +843,14 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj
clustentries = i;
break;
}
if (delta > ( (clustentries*objsize) % PAGE_SIZE) )
clustentries = i;
}
// D("XXX --- ouch, delta %d (bad for buffers)", delta);
/* compute clustsize and round to the next page */
/* exact solution not found */
if (clustentries == 0) {
D("unsupported allocation for %d bytes", objsize);
return EINVAL;
}
/* compute clustsize */
clustsize = clustentries * objsize;
i = (clustsize & (PAGE_SIZE - 1));
if (i)
clustsize += PAGE_SIZE - i;
if (netmap_verbose)
D("objsize %d clustsize %d objects %d",
objsize, clustsize, clustentries);
@ -856,6 +985,47 @@ netmap_mem_reset_all(struct netmap_mem_d *nmd)
nmd->flags &= ~NETMAP_MEM_FINALIZED;
}
static int
netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
{
int i, lim = p->_objtotal;
if (na->pdev == NULL)
return 0;
#ifdef __FreeBSD__
(void)i;
(void)lim;
D("unsupported on FreeBSD");
#else /* linux */
for (i = 2; i < lim; i++) {
netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr);
}
#endif /* linux */
return 0;
}
static int
netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
{
#ifdef __FreeBSD__
D("unsupported on FreeBSD");
#else /* linux */
int i, lim = p->_objtotal;
if (na->pdev == NULL)
return 0;
for (i = 2; i < lim; i++) {
netmap_load_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr,
p->lut[i].vaddr);
}
#endif /* linux */
return 0;
}
static int
netmap_mem_finalize_all(struct netmap_mem_d *nmd)
{
@ -1091,13 +1261,6 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd)
if (netmap_mem_finalize_all(nmd))
goto out;
/* backward compatibility */
netmap_buf_size = nmd->pools[NETMAP_BUF_POOL]._objsize;
netmap_total_buffers = nmd->pools[NETMAP_BUF_POOL].objtotal;
netmap_buffer_lut = nmd->pools[NETMAP_BUF_POOL].lut;
netmap_buffer_base = nmd->pools[NETMAP_BUF_POOL].lut[0].vaddr;
nmd->lasterr = 0;
out:
@ -1198,7 +1361,7 @@ netmap_mem_rings_create(struct netmap_adapter *na)
ring->cur = kring->rcur;
ring->tail = kring->rtail;
*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
netmap_mem_bufsize(na->nm_mem);
ND("%s h %d c %d t %d", kring->name,
ring->head, ring->cur, ring->tail);
ND("initializing slots for txring");
@ -1241,7 +1404,7 @@ netmap_mem_rings_create(struct netmap_adapter *na)
ring->cur = kring->rcur;
ring->tail = kring->rtail;
*(int *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
netmap_mem_bufsize(na->nm_mem);
ND("%s h %d c %d t %d", kring->name,
ring->head, ring->cur, ring->tail);
ND("initializing slots for rxring %p", ring);
@ -1290,7 +1453,7 @@ netmap_mem_rings_delete(struct netmap_adapter *na)
* the interface is in netmap mode.
*/
struct netmap_if *
netmap_mem_if_new(const char *ifname, struct netmap_adapter *na)
netmap_mem_if_new(struct netmap_adapter *na)
{
struct netmap_if *nifp;
ssize_t base; /* handy for relative offsets between rings and nifp */
@ -1316,7 +1479,7 @@ netmap_mem_if_new(const char *ifname, struct netmap_adapter *na)
/* initialize base fields -- override const */
*(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
*(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
strncpy(nifp->ni_name, ifname, (size_t)IFNAMSIZ);
strncpy(nifp->ni_name, na->name, (size_t)IFNAMSIZ);
/*
* fill the slots for the rx and tx rings. They contain the offset
@ -1358,6 +1521,8 @@ netmap_mem_global_deref(struct netmap_mem_d *nmd)
NMA_LOCK(nmd);
nmd->refcount--;
if (!nmd->refcount)
nmd->nm_grp = -1;
if (netmap_verbose)
D("refcount = %d", nmd->refcount);
@ -1365,13 +1530,25 @@ netmap_mem_global_deref(struct netmap_mem_d *nmd)
}
int
netmap_mem_finalize(struct netmap_mem_d *nmd)
netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
return nmd->finalize(nmd);
if (nm_mem_assign_group(nmd, na->pdev) < 0) {
return ENOMEM;
} else {
nmd->finalize(nmd);
}
if (!nmd->lasterr && na->pdev)
netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na);
return nmd->lasterr;
}
void
netmap_mem_deref(struct netmap_mem_d *nmd)
netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
{
NMA_LOCK(nmd);
netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na);
NMA_UNLOCK(nmd);
return nmd->deref(nmd);
}

View File

@ -97,70 +97,6 @@
#define _NET_NETMAP_MEM2_H_
#define NETMAP_BUF_MAX_NUM 20*4096*2 /* large machine */
#define NETMAP_POOL_MAX_NAMSZ 32
enum {
NETMAP_IF_POOL = 0,
NETMAP_RING_POOL,
NETMAP_BUF_POOL,
NETMAP_POOLS_NR
};
struct netmap_obj_params {
u_int size;
u_int num;
};
struct netmap_obj_pool {
char name[NETMAP_POOL_MAX_NAMSZ]; /* name of the allocator */
/* ---------------------------------------------------*/
/* these are only meaningful if the pool is finalized */
/* (see 'finalized' field in netmap_mem_d) */
u_int objtotal; /* actual total number of objects. */
u_int memtotal; /* actual total memory space */
u_int numclusters; /* actual number of clusters */
u_int objfree; /* number of free objects. */
struct lut_entry *lut; /* virt,phys addresses, objtotal entries */
uint32_t *bitmap; /* one bit per buffer, 1 means free */
uint32_t bitmap_slots; /* number of uint32 entries in bitmap */
/* ---------------------------------------------------*/
/* limits */
u_int objminsize; /* minimum object size */
u_int objmaxsize; /* maximum object size */
u_int nummin; /* minimum number of objects */
u_int nummax; /* maximum number of objects */
/* these are changed only by config */
u_int _objtotal; /* total number of objects */
u_int _objsize; /* object size */
u_int _clustsize; /* cluster size */
u_int _clustentries; /* objects per cluster */
u_int _numclusters; /* number of clusters */
/* requested values */
u_int r_objtotal;
u_int r_objsize;
};
#ifdef linux
// XXX a mtx would suffice here 20130415 lr
#define NMA_LOCK_T struct semaphore
#else /* !linux */
#define NMA_LOCK_T struct mtx
#endif /* linux */
typedef int (*netmap_mem_config_t)(struct netmap_mem_d*);
typedef int (*netmap_mem_finalize_t)(struct netmap_mem_d*);
typedef void (*netmap_mem_deref_t)(struct netmap_mem_d*);
typedef uint16_t nm_memid_t;
/* We implement two kinds of netmap_mem_d structures:
*
@ -178,40 +114,21 @@ typedef uint16_t nm_memid_t;
* are no active users. By 'active user' we mean an existing netmap_priv
* structure holding a reference to the allocator.
*/
struct netmap_mem_d {
NMA_LOCK_T nm_mtx; /* protect the allocator */
u_int nm_totalsize; /* shorthand */
u_int flags;
#define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */
#define NETMAP_MEM_PRIVATE 0x2 /* uses private address space */
int lasterr; /* last error for curr config */
int refcount; /* existing priv structures */
/* the three allocators */
struct netmap_obj_pool pools[NETMAP_POOLS_NR];
netmap_mem_config_t config;
netmap_mem_finalize_t finalize;
netmap_mem_deref_t deref;
nm_memid_t nm_id; /* allocator identifier */
/* list of all existing allocators, sorted by nm_id */
struct netmap_mem_d *prev, *next;
};
extern struct netmap_mem_d nm_mem;
struct lut_entry* netmap_mem_get_lut(struct netmap_mem_d *);
u_int netmap_mem_get_buftotal(struct netmap_mem_d *);
size_t netmap_mem_get_bufsize(struct netmap_mem_d *);
vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
int netmap_mem_finalize(struct netmap_mem_d *);
int netmap_mem_finalize(struct netmap_mem_d *, struct netmap_adapter *);
int netmap_mem_init(void);
void netmap_mem_fini(void);
struct netmap_if *
netmap_mem_if_new(const char *, struct netmap_adapter *);
struct netmap_if * netmap_mem_if_new(struct netmap_adapter *);
void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
int netmap_mem_rings_create(struct netmap_adapter *);
void netmap_mem_rings_delete(struct netmap_adapter *);
void netmap_mem_deref(struct netmap_mem_d *);
void netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
struct netmap_mem_d* netmap_mem_private_new(const char *name,
@ -219,7 +136,8 @@ struct netmap_mem_d* netmap_mem_private_new(const char *name,
int* error);
void netmap_mem_private_delete(struct netmap_mem_d *);
#define NETMAP_BDG_BUF_SIZE(n) ((n)->pools[NETMAP_BUF_POOL]._objsize)
#define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */
#define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */
uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);

View File

@ -0,0 +1,498 @@
/*
* Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD$
*
* Monitors
*
* netmap monitors can be used to do zero-copy monitoring of network traffic
* on another adapter, when the latter adapter is working in netmap mode.
*
* Monitors offer to userspace the same interface as any other netmap port,
* with as many pairs of netmap rings as the monitored adapter.
* However, only the rx rings are actually used. Each monitor rx ring receives
* the traffic transiting on both the tx and rx corresponding rings in the
* monitored adapter. During registration, the user can choose if she wants
* to intercept tx only, rx only, or both tx and rx traffic.
*
* The monitor only sees the frames after they have been consumed in the
* monitored adapter:
*
* - For tx traffic, this is after the slots containing the frames have been
* marked as free. Note that this may happen at a considerably delay after
* frame transmission, since freeing of slots is often done lazily.
*
* - For rx traffic, this is after the consumer on the monitored adapter
* has released them. In most cases, the consumer is a userspace
* application which may have modified the frame contents.
*
* If the monitor is not able to cope with the stream of frames, excess traffic
* will be dropped.
*
* Each ring can be monitored by at most one monitor. This may change in the
* future, if we implement monitor chaining.
*
*/
#if defined(__FreeBSD__)
#include <sys/cdefs.h> /* prerequisite */
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/malloc.h>
#include <sys/poll.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/selinfo.h>
#include <sys/sysctl.h>
#include <sys/socket.h> /* sockaddrs */
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* */
#include <sys/refcount.h>
#elif defined(linux)
#include "bsd_glue.h"
#elif defined(__APPLE__)
#warning OSX support is only partial
#include "osx_glue.h"
#else
#error Unsupported platform
#endif /* unsupported */
/*
* common headers
*/
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
#ifdef WITH_MONITOR
#define NM_MONITOR_MAXSLOTS 4096
/* monitor works by replacing the nm_sync callbacks in the monitored rings.
* The actions to be performed are the same on both tx and rx rings, so we
* have collected them here
*/
static int
netmap_monitor_parent_sync(struct netmap_kring *kring, int flags, u_int* ringptr)
{
struct netmap_monitor_adapter *mna = kring->monitor;
struct netmap_kring *mkring = &mna->up.rx_rings[kring->ring_id];
struct netmap_ring *ring = kring->ring, *mring = mkring->ring;
int error;
int rel_slots, free_slots, busy;
u_int beg, end, i;
u_int lim = kring->nkr_num_slots - 1,
mlim = mkring->nkr_num_slots - 1;
/* get the relased slots (rel_slots) */
beg = *ringptr;
error = kring->save_sync(kring, flags);
if (error)
return error;
end = *ringptr;
rel_slots = end - beg;
if (rel_slots < 0)
rel_slots += kring->nkr_num_slots;
if (!rel_slots) {
return 0;
}
/* we need to lock the monitor receive ring, since it
* is the target of bot tx and rx traffic from the monitored
* adapter
*/
mtx_lock(&mkring->q_lock);
/* get the free slots available on the monitor ring */
i = mkring->nr_hwtail;
busy = i - mkring->nr_hwcur;
if (busy < 0)
busy += mkring->nkr_num_slots;
free_slots = mlim - busy;
if (!free_slots) {
mtx_unlock(&mkring->q_lock);
return 0;
}
/* swap min(free_slots, rel_slots) slots */
if (free_slots < rel_slots) {
beg += (rel_slots - free_slots);
if (beg > lim)
beg = 0;
rel_slots = free_slots;
}
for ( ; rel_slots; rel_slots--) {
struct netmap_slot *s = &ring->slot[beg];
struct netmap_slot *ms = &mring->slot[i];
uint32_t tmp;
tmp = ms->buf_idx;
ms->buf_idx = s->buf_idx;
s->buf_idx = tmp;
tmp = ms->len;
ms->len = s->len;
s->len = tmp;
s->flags |= NS_BUF_CHANGED;
beg = nm_next(beg, lim);
i = nm_next(i, mlim);
}
wmb();
mkring->nr_hwtail = i;
mtx_unlock(&mkring->q_lock);
/* notify the new frames to the monitor */
mna->up.nm_notify(&mna->up, mkring->ring_id, NR_RX, 0);
return 0;
}
/* callback used to replace the nm_sync callback in the monitored tx rings */
static int
netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
return netmap_monitor_parent_sync(kring, flags, &kring->nr_hwtail);
}
/* callback used to replace the nm_sync callback in the monitored rx rings */
static int
netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
return netmap_monitor_parent_sync(kring, flags, &kring->rcur);
}
/* nm_sync callback for the monitor's own tx rings.
* This makes no sense and always returns error
*/
static int
netmap_monitor_txsync(struct netmap_kring *kring, int flags)
{
D("%s %x", kring->name, flags);
return EIO;
}
/* nm_sync callback for the monitor's own rx rings.
* Note that the lock in netmap_monitor_parent_sync only protects
* writers among themselves. Synchronization between writers
* (i.e., netmap_monitor_parent_txsync and netmap_monitor_parent_rxsync)
* and readers (i.e., netmap_monitor_rxsync) relies on memory barriers.
*/
static int
netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
{
ND("%s %x", kring->name, flags);
kring->nr_hwcur = kring->rcur;
rmb();
nm_rxsync_finalize(kring);
return 0;
}
/* nm_krings_create callbacks for monitors.
* We could use the default netmap_hw_krings_monitor, but
* we don't need the mbq.
*/
static int
netmap_monitor_krings_create(struct netmap_adapter *na)
{
return netmap_krings_create(na, 0);
}
/* nm_register callback for monitors.
*
* On registration, replace the nm_sync callbacks in the monitored
* rings with our own, saving the previous ones in the monitored
* rings themselves, where they are used by netmap_monitor_parent_sync.
*
* On de-registration, restore the original callbacks. We need to
* stop traffic while we are doing this, since the monitored adapter may
* have already started executing a netmap_monitor_parent_sync
* and may not like the kring->save_sync pointer to become NULL.
*/
static int
netmap_monitor_reg(struct netmap_adapter *na, int onoff)
{
struct netmap_monitor_adapter *mna =
(struct netmap_monitor_adapter *)na;
struct netmap_priv_d *priv = &mna->priv;
struct netmap_adapter *pna = priv->np_na;
struct netmap_kring *kring;
int i;
ND("%p: onoff %d", na, onoff);
if (onoff) {
if (!nm_netmap_on(pna)) {
/* parent left netmap mode, fatal */
return ENXIO;
}
if (mna->flags & NR_MONITOR_TX) {
for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
kring = &pna->tx_rings[i];
kring->save_sync = kring->nm_sync;
kring->nm_sync = netmap_monitor_parent_txsync;
}
}
if (mna->flags & NR_MONITOR_RX) {
for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
kring = &pna->rx_rings[i];
kring->save_sync = kring->nm_sync;
kring->nm_sync = netmap_monitor_parent_rxsync;
}
}
na->na_flags |= NAF_NETMAP_ON;
} else {
if (!nm_netmap_on(pna)) {
/* parent left netmap mode, nothing to restore */
return 0;
}
na->na_flags &= ~NAF_NETMAP_ON;
if (mna->flags & NR_MONITOR_TX) {
for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
netmap_set_txring(pna, i, 1 /* stopped */);
kring = &pna->tx_rings[i];
kring->nm_sync = kring->save_sync;
kring->save_sync = NULL;
netmap_set_txring(pna, i, 0 /* enabled */);
}
}
if (mna->flags & NR_MONITOR_RX) {
for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
netmap_set_rxring(pna, i, 1 /* stopped */);
kring = &pna->rx_rings[i];
kring->nm_sync = kring->save_sync;
kring->save_sync = NULL;
netmap_set_rxring(pna, i, 0 /* enabled */);
}
}
}
return 0;
}
/* nm_krings_delete callback for monitors */
static void
netmap_monitor_krings_delete(struct netmap_adapter *na)
{
netmap_krings_delete(na);
}
/* nm_dtor callback for monitors */
static void
netmap_monitor_dtor(struct netmap_adapter *na)
{
struct netmap_monitor_adapter *mna =
(struct netmap_monitor_adapter *)na;
struct netmap_priv_d *priv = &mna->priv;
struct netmap_adapter *pna = priv->np_na;
int i;
ND("%p", na);
if (nm_netmap_on(pna)) {
/* parent still in netmap mode, mark its krings as free */
if (mna->flags & NR_MONITOR_TX) {
for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
pna->tx_rings[i].monitor = NULL;
}
}
if (mna->flags & NR_MONITOR_RX) {
for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
pna->rx_rings[i].monitor = NULL;
}
}
}
netmap_adapter_put(pna);
}
/* check if nmr is a request for a monitor adapter that we can satisfy */
int
netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
{
struct nmreq pnmr;
struct netmap_adapter *pna; /* parent adapter */
struct netmap_monitor_adapter *mna;
int i, error;
if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
ND("not a monitor");
return 0;
}
/* this is a request for a monitor adapter */
D("flags %x", nmr->nr_flags);
mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
if (mna == NULL) {
D("memory error");
return ENOMEM;
}
/* first, try to find the adapter that we want to monitor
* We use the same nmr, after we have turned off the monitor flags.
* In this way we can potentially monitor everything netmap understands,
* except other monitors.
*/
memcpy(&pnmr, nmr, sizeof(pnmr));
pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX);
error = netmap_get_na(&pnmr, &pna, create);
if (error) {
D("parent lookup failed: %d", error);
return error;
}
D("found parent: %s", pna->name);
if (!nm_netmap_on(pna)) {
/* parent not in netmap mode */
/* XXX we can wait for the parent to enter netmap mode,
* by intercepting its nm_register callback (2014-03-16)
*/
D("%s not in netmap mode", pna->name);
error = EINVAL;
goto put_out;
}
/* grab all the rings we need in the parent */
mna->priv.np_na = pna;
error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
if (error) {
D("ringid error");
goto put_out;
}
if (nmr->nr_flags & NR_MONITOR_TX) {
for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
struct netmap_kring *kring = &pna->tx_rings[i];
if (kring->monitor) {
error = EBUSY;
D("ring busy");
goto release_out;
}
kring->monitor = mna;
}
}
if (nmr->nr_flags & NR_MONITOR_RX) {
for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
struct netmap_kring *kring = &pna->rx_rings[i];
if (kring->monitor) {
error = EBUSY;
D("ring busy");
goto release_out;
}
kring->monitor = mna;
}
}
snprintf(mna->up.name, sizeof(mna->up.name), "mon:%s", pna->name);
/* the monitor supports the host rings iff the parent does */
mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
mna->up.nm_txsync = netmap_monitor_txsync;
mna->up.nm_rxsync = netmap_monitor_rxsync;
mna->up.nm_register = netmap_monitor_reg;
mna->up.nm_dtor = netmap_monitor_dtor;
mna->up.nm_krings_create = netmap_monitor_krings_create;
mna->up.nm_krings_delete = netmap_monitor_krings_delete;
mna->up.nm_mem = pna->nm_mem;
mna->up.na_lut = pna->na_lut;
mna->up.na_lut_objtotal = pna->na_lut_objtotal;
mna->up.na_lut_objsize = pna->na_lut_objsize;
mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
/* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
* in the parent
*/
mna->up.num_rx_rings = pna->num_rx_rings;
if (pna->num_tx_rings > pna->num_rx_rings)
mna->up.num_rx_rings = pna->num_tx_rings;
/* by default, the number of slots is the same as in
* the parent rings, but the user may ask for a different
* number
*/
mna->up.num_tx_desc = nmr->nr_tx_slots;
nm_bound_var(&mna->up.num_tx_desc, pna->num_tx_desc,
1, NM_MONITOR_MAXSLOTS, NULL);
mna->up.num_rx_desc = nmr->nr_rx_slots;
nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
1, NM_MONITOR_MAXSLOTS, NULL);
error = netmap_attach_common(&mna->up);
if (error) {
D("attach_common error");
goto release_out;
}
/* remember the traffic directions we have to monitor */
mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX));
*na = &mna->up;
netmap_adapter_get(*na);
/* write the configuration back */
nmr->nr_tx_rings = mna->up.num_tx_rings;
nmr->nr_rx_rings = mna->up.num_rx_rings;
nmr->nr_tx_slots = mna->up.num_tx_desc;
nmr->nr_rx_slots = mna->up.num_rx_desc;
/* keep the reference to the parent */
D("monitor ok");
return 0;
release_out:
D("monitor error");
for (i = mna->priv.np_txqfirst; i < mna->priv.np_txqlast; i++) {
if (pna->tx_rings[i].monitor == mna)
pna->tx_rings[i].monitor = NULL;
}
for (i = mna->priv.np_rxqfirst; i < mna->priv.np_rxqlast; i++) {
if (pna->rx_rings[i].monitor == mna)
pna->rx_rings[i].monitor = NULL;
}
put_out:
netmap_adapter_put(pna);
free(mna, M_DEVBUF);
return error;
}
#endif /* WITH_MONITOR */

View File

@ -159,7 +159,7 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
src = ft_p->ft_buf;
src_len = ft_p->ft_len;
slot = &ring->slot[*j];
dst = BDG_NMB(&dst_na->up, slot);
dst = NMB(&dst_na->up, slot);
dst_len = src_len;
/* We are processing the first input slot and there is a mismatch
@ -303,7 +303,7 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
/* Next destination slot. */
*j = nm_next(*j, lim);
slot = &ring->slot[*j];
dst = BDG_NMB(&dst_na->up, slot);
dst = NMB(&dst_na->up, slot);
gso_bytes = 0;
gso_idx++;
@ -365,7 +365,7 @@ void bdg_mismatch_datapath(struct netmap_vp_adapter *na,
/* Next destination slot. */
*j = nm_next(*j, lim);
slot = &ring->slot[*j];
dst = BDG_NMB(&dst_na->up, slot);
dst = NMB(&dst_na->up, slot);
/* Next source slot. */
ft_p++;

View File

@ -126,7 +126,7 @@ void
netmap_pipe_dealloc(struct netmap_adapter *na)
{
if (na->na_pipes) {
ND("freeing pipes for %s", NM_IFPNAME(na->ifp));
ND("freeing pipes for %s", na->name);
free(na->na_pipes, M_DEVBUF);
na->na_pipes = NULL;
na->na_max_pipes = 0;
@ -155,7 +155,7 @@ static int
netmap_pipe_add(struct netmap_adapter *parent, struct netmap_pipe_adapter *na)
{
if (parent->na_next_pipe >= parent->na_max_pipes) {
D("%s: no space left for pipes", NM_IFPNAME(parent->ifp));
D("%s: no space left for pipes", parent->name);
return ENOMEM;
}
@ -179,10 +179,9 @@ netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na
}
static int
netmap_pipe_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
{
struct netmap_kring *txkring = na->tx_rings + ring_nr,
*rxkring = txkring->pipe;
struct netmap_kring *rxkring = txkring->pipe;
u_int limit; /* slots to transfer */
u_int j, k, lim_tx = txkring->nkr_num_slots - 1,
lim_rx = rxkring->nkr_num_slots - 1;
@ -245,10 +244,9 @@ netmap_pipe_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
}
static int
netmap_pipe_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
{
struct netmap_kring *rxkring = na->rx_rings + ring_nr,
*txkring = rxkring->pipe;
struct netmap_kring *txkring = rxkring->pipe;
uint32_t oldhwcur = rxkring->nr_hwcur;
ND("%s %x <- %s", rxkring->name, flags, txkring->name);
@ -425,12 +423,11 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
{
struct netmap_pipe_adapter *pna =
(struct netmap_pipe_adapter *)na;
struct ifnet *ifp = na->ifp;
ND("%p: onoff %d", na, onoff);
if (onoff) {
ifp->if_capenable |= IFCAP_NETMAP;
na->na_flags |= NAF_NETMAP_ON;
} else {
ifp->if_capenable &= ~IFCAP_NETMAP;
na->na_flags &= ~NAF_NETMAP_ON;
}
if (pna->peer_ref) {
ND("%p: case 1.a or 2.a, nothing to do", na);
@ -522,8 +519,6 @@ netmap_pipe_dtor(struct netmap_adapter *na)
if (pna->role == NR_REG_PIPE_MASTER)
netmap_pipe_remove(pna->parent, pna);
netmap_adapter_put(pna->parent);
free(na->ifp, M_DEVBUF);
na->ifp = NULL;
pna->parent = NULL;
}
@ -533,7 +528,6 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
struct nmreq pnmr;
struct netmap_adapter *pna; /* parent adapter */
struct netmap_pipe_adapter *mna, *sna, *req;
struct ifnet *ifp, *ifp2;
u_int pipe_id;
int role = nmr->nr_flags & NR_REG_MASK;
int error;
@ -556,7 +550,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
ND("parent lookup failed: %d", error);
return error;
}
ND("found parent: %s", NM_IFPNAME(pna->ifp));
ND("found parent: %s", na->name);
if (NETMAP_OWNED_BY_KERN(pna)) {
ND("parent busy");
@ -591,19 +585,12 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
* The endpoint we were asked for holds a reference to
* the other one.
*/
ifp = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
if (!ifp) {
error = ENOMEM;
goto put_out;
}
strcpy(ifp->if_xname, NM_IFPNAME(pna->ifp));
mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
if (mna == NULL) {
error = ENOMEM;
goto free_ifp;
goto put_out;
}
mna->up.ifp = ifp;
snprintf(mna->up.name, sizeof(mna->up.name), "%s{%d", pna->name, pipe_id);
mna->id = pipe_id;
mna->role = NR_REG_PIPE_MASTER;
@ -618,6 +605,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
mna->up.nm_mem = pna->nm_mem;
mna->up.na_lut = pna->na_lut;
mna->up.na_lut_objtotal = pna->na_lut_objtotal;
mna->up.na_lut_objsize = pna->na_lut_objsize;
mna->up.num_tx_rings = 1;
mna->up.num_rx_rings = 1;
@ -629,28 +617,21 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
1, NM_PIPE_MAXSLOTS, NULL);
error = netmap_attach_common(&mna->up);
if (error)
goto free_ifp;
goto free_mna;
/* register the master with the parent */
error = netmap_pipe_add(pna, mna);
if (error)
goto free_mna;
/* create the slave */
ifp2 = malloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
if (!ifp) {
error = ENOMEM;
goto free_mna;
}
strcpy(ifp2->if_xname, NM_IFPNAME(pna->ifp));
sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
if (sna == NULL) {
error = ENOMEM;
goto free_ifp2;
goto free_mna;
}
/* most fields are the same, copy from master and then fix */
*sna = *mna;
sna->up.ifp = ifp2;
snprintf(sna->up.name, sizeof(sna->up.name), "%s}%d", pna->name, pipe_id);
sna->role = NR_REG_PIPE_SLAVE;
error = netmap_attach_common(&sna->up);
if (error)
@ -696,12 +677,8 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
free_sna:
free(sna, M_DEVBUF);
free_ifp2:
free(ifp2, M_DEVBUF);
free_mna:
free(mna, M_DEVBUF);
free_ifp:
free(ifp, M_DEVBUF);
put_out:
netmap_adapter_put(pna);
return error;

File diff suppressed because it is too large Load Diff

View File

@ -289,6 +289,10 @@ static device_method_t vtnet_methods[] = {
DEVMETHOD_END
};
#ifdef DEV_NETMAP
#include <dev/netmap/if_vtnet_netmap.h>
#endif /* DEV_NETMAP */
static driver_t vtnet_driver = {
"vtnet",
vtnet_methods,
@ -395,6 +399,10 @@ vtnet_attach(device_t dev)
goto fail;
}
#ifdef DEV_NETMAP
vtnet_netmap_attach(sc);
#endif /* DEV_NETMAP */
vtnet_start_taskqueues(sc);
fail:
@ -424,6 +432,10 @@ vtnet_detach(device_t dev)
ether_ifdetach(ifp);
}
#ifdef DEV_NETMAP
netmap_detach(ifp);
#endif /* DEV_NETMAP */
vtnet_free_taskqueues(sc);
if (sc->vtnet_vlan_attach != NULL) {
@ -1735,6 +1747,12 @@ vtnet_rxq_eof(struct vtnet_rxq *rxq)
VTNET_RXQ_LOCK_ASSERT(rxq);
#ifdef DEV_NETMAP
if (netmap_rx_irq(ifp, 0, &deq)) {
return (FALSE);
}
#endif /* DEV_NETMAP */
while (count-- > 0) {
m = virtqueue_dequeue(vq, &len);
if (m == NULL)
@ -2421,6 +2439,13 @@ vtnet_txq_eof(struct vtnet_txq *txq)
deq = 0;
VTNET_TXQ_LOCK_ASSERT(txq);
#ifdef DEV_NETMAP
if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) {
virtqueue_disable_intr(vq); // XXX luigi
return 0; // XXX or 1 ?
}
#endif /* DEV_NETMAP */
while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
m = txhdr->vth_mbuf;
deq++;
@ -2895,6 +2920,11 @@ vtnet_init_rx_queues(struct vtnet_softc *sc)
("%s: too many rx mbufs %d for %d segments", __func__,
sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
#ifdef DEV_NETMAP
if (vtnet_netmap_init_rx_buffers(sc))
return 0;
#endif /* DEV_NETMAP */
for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
rxq = &sc->vtnet_rxqs[i];
@ -3047,6 +3077,13 @@ vtnet_init(void *xsc)
sc = xsc;
#ifdef DEV_NETMAP
if (!NA(sc->vtnet_ifp)) {
D("try to attach again");
vtnet_netmap_attach(sc);
}
#endif /* DEV_NETMAP */
VTNET_CORE_LOCK(sc);
vtnet_init_locked(sc);
VTNET_CORE_UNLOCK(sc);

View File

@ -445,6 +445,13 @@ struct netmap_if {
* Set the virtio-net header length used by the client
* of a VALE switch port.
*
* NETMAP_BDG_NEWIF
* create a persistent VALE port with name nr_name.
* Used by vale-ctl -n ...
*
* NETMAP_BDG_DELIF
* delete a persistent VALE port. Used by vale-ctl -d ...
*
* nr_arg1, nr_arg2, nr_arg3 (in/out) command specific
*
*
@ -478,11 +485,12 @@ struct nmreq {
uint16_t nr_cmd;
#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
#define NETMAP_BDG_DETACH 2 /* detach the NIC */
#define NETMAP_BDG_LOOKUP_REG 3 /* register lookup function */
#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */
#define NETMAP_BDG_LIST 4 /* get bridge's info */
#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */
#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */
#define NETMAP_BDG_NEWIF 6 /* create a virtual port */
#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */
uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */
#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
@ -517,6 +525,7 @@ enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */
#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */
#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */
#endif /* !NIOCREGIF */
@ -533,4 +542,15 @@ nm_ring_empty(struct netmap_ring *ring)
return (ring->cur == ring->tail);
}
/*
* Opaque structure that is passed to an external kernel
* module via ioctl(fd, NIOCCONFIG, req) for a user-owned
* bridge port (at this point ephemeral VALE interface).
*/
#define NM_IFRDATA_LEN 256
struct nm_ifreq {
char nifr_name[IFNAMSIZ];
char data[NM_IFRDATA_LEN];
};
#endif /* _NET_NETMAP_H_ */

View File

@ -149,21 +149,21 @@ nm_ring_space(struct netmap_ring *ring)
#define ND(_fmt, ...) do {} while(0)
#define D(_fmt, ...) \
do { \
struct timeval t0; \
gettimeofday(&t0, NULL); \
struct timeval _t0; \
gettimeofday(&_t0, NULL); \
fprintf(stderr, "%03d.%06d %s [%d] " _fmt "\n", \
(int)(t0.tv_sec % 1000), (int)t0.tv_usec, \
(int)(_t0.tv_sec % 1000), (int)_t0.tv_usec, \
__FUNCTION__, __LINE__, ##__VA_ARGS__); \
} while (0)
/* Rate limited version of "D", lps indicates how many per second */
#define RD(lps, format, ...) \
do { \
static int t0, __cnt; \
static int __t0, __cnt; \
struct timeval __xxts; \
gettimeofday(&__xxts, NULL); \
if (t0 != __xxts.tv_sec) { \
t0 = __xxts.tv_sec; \
if (__t0 != __xxts.tv_sec) { \
__t0 = __xxts.tv_sec; \
__cnt = 0; \
} \
if (__cnt++ < lps) { \
@ -495,23 +495,23 @@ nm_open(const char *ifname, const struct nmreq *req,
(char *)d->mem + d->memsize;
}
if (nr_flags == NR_REG_SW) { /* host stack */
if (d->req.nr_flags == NR_REG_SW) { /* host stack */
d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
} else if (nr_flags == NR_REG_ALL_NIC) { /* only nic */
} else if (d->req.nr_flags == NR_REG_ALL_NIC) { /* only nic */
d->first_tx_ring = 0;
d->first_rx_ring = 0;
d->last_tx_ring = d->req.nr_tx_rings - 1;
d->last_rx_ring = d->req.nr_rx_rings - 1;
} else if (nr_flags == NR_REG_NIC_SW) {
} else if (d->req.nr_flags == NR_REG_NIC_SW) {
d->first_tx_ring = 0;
d->first_rx_ring = 0;
d->last_tx_ring = d->req.nr_tx_rings;
d->last_rx_ring = d->req.nr_rx_rings;
} else if (nr_flags == NR_REG_ONE_NIC) {
} else if (d->req.nr_flags == NR_REG_ONE_NIC) {
/* XXX check validity */
d->first_tx_ring = d->last_tx_ring =
d->first_rx_ring = d->last_rx_ring = nr_ringid;
d->first_rx_ring = d->last_rx_ring = d->req.nr_ringid & NETMAP_RING_MASK;
} else { /* pipes */
d->first_tx_ring = d->last_tx_ring = 0;
d->first_rx_ring = d->last_rx_ring = 0;

157
sys/net/paravirt.h Normal file
View File

@ -0,0 +1,157 @@
/*
* Copyright (C) 2013 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef NET_PARAVIRT_H
#define NET_PARAVIRT_H
/*
* $FreeBSD$
*
Support for virtio-like communication between host (H) and guest (G) NICs.
THIS IS EXPERIMENTAL CODE AND SUBJECT TO CHANGE.
The guest allocates the shared Communication Status Block (csb) and
write its physical address at CSBAL and CSBAH (data is little endian).
csb->csb_on enables the mode. If disabled, the device acts a regular one.
Notifications for tx and rx are exchanged without vm exits
if possible. In particular (only mentioning csb mode below),
the following actions are performed. In the description below,
"double check" means verifying again the condition that caused
the previous action, and reverting the action if the condition has
changed. The condition typically depends on a variable set by the
other party, and the double check is done to avoid races. E.g.
// start with A=0
again:
// do something
if ( cond(C) ) { // C is written by the other side
A = 1;
// barrier
if ( !cond(C) ) {
A = 0;
goto again;
}
}
TX: start from idle:
H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new
transmissions, G always updates guest_tdt. If host_need_txkick == 1,
G also writes to the TDT, which acts as a kick to H (so pending
writes are always dispatched to H as soon as possible.)
TX: active state:
On the kick (TDT write) H sets host_need_txkick == 0 (if not
done already by G), and starts an I/O thread trying to consume
packets from TDH to guest_tdt, periodically refreshing host_tdh
and TDH. When host_tdh == guest_tdt, H sets host_need_txkick=1,
and then does the "double check" for race avoidance.
TX: G runs out of buffers
XXX there are two mechanisms, one boolean (using guest_need_txkick)
and one with a threshold (using guest_txkick_at). They are mutually
exclusive.
BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does
the double check. If H finds guest_need_txkick== 1 on a write
to TDH, it also generates an interrupt.
THRESHOLD: G sets guest_txkick_at to the TDH value for which it
wants to receive an interrupt. When H detects that TDH moves
across guest_txkick_at, it generates an interrupt.
This second mechanism reduces the number of interrupts and
TDT writes on the transmit side when the host is too slow.
RX: start from idle
G starts with guest_need_rxkick = 1 when the receive ring is empty.
As packets arrive, H updates host_rdh (and RDH) and also generates an
interrupt when guest_need_rxkick == 1 (so incoming packets are
always reported to G as soon as possible, apart from interrupt
moderation delays). It also tracks guest_rdt for new buffers.
RX: active state
As the interrupt arrives, G sets guest_need_rxkick = 0 and starts
draining packets from the receive ring, while updating guest_rdt
When G runs out of packets it sets guest_need_rxkick=1 and does the
double check.
RX: H runs out of buffers
XXX there are two mechanisms, one boolean (using host_need_rxkick)
and one with a threshold (using host_xxkick_at). They are mutually
exclusive.
BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the
double check. If G finds host_need_rxkick==1 on updating guest_rdt,
it also writes to RDT causing a kick to H.
THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants
to receive a kick. When G detects that guest_rdt moves across
host_rxkick_at, it writes to RDT thus generates a kick.
This second mechanism reduces the number of kicks and
RDT writes on the receive side when the guest is too slow and
would free only a few buffers at a time.
*/
struct paravirt_csb {
/* XXX revise the layout to minimize cache bounces.
* Usage is described as follows:
* [GH][RW][+-0] guest/host reads/writes frequently/rarely/almost never
*/
/* these are (mostly) written by the guest */
uint32_t guest_tdt; /* GW+ HR+ pkt to transmit */
uint32_t guest_need_txkick; /* GW- HR+ G ran out of tx bufs, request kick */
uint32_t guest_need_rxkick; /* GW- HR+ G ran out of rx pkts, request kick */
uint32_t guest_csb_on; /* GW- HR+ enable paravirtual mode */
uint32_t guest_rdt; /* GW+ HR+ rx buffers available */
uint32_t guest_txkick_at; /* GW- HR+ tx ring pos. where G expects an intr */
uint32_t guest_use_msix; /* GW0 HR0 guest uses MSI-X interrupts. */
uint32_t pad[9];
/* these are (mostly) written by the host */
uint32_t host_tdh; /* GR0 HW- shadow register, mostly unused */
uint32_t host_need_txkick; /* GR+ HW- start the iothread */
uint32_t host_txcycles_lim; /* GW- HR- how much to spin before sleep.
* set by the guest */
uint32_t host_txcycles; /* GR0 HW- counter, but no need to be exported */
uint32_t host_rdh; /* GR0 HW- shadow register, mostly unused */
uint32_t host_need_rxkick; /* GR+ HW- flush rx queued packets */
uint32_t host_isr; /* GR* HW* shadow copy of ISR */
uint32_t host_rxkick_at; /* GR+ HW- rx ring pos where H expects a kick */
uint32_t vnet_ring_high; /* Vnet ring physical address high. */
uint32_t vnet_ring_low; /* Vnet ring physical address low. */
};
#define NET_PARAVIRT_CSB_SIZE 4096
#define NET_PARAVIRT_NONE (~((uint32_t)0))
#ifdef QEMU_PCI_H
/*
* API functions only available within QEMU
*/
void paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal,
uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as);
#endif /* QEMU_PCI_H */
#endif /* NET_PARAVIRT_H */

View File

@ -37,6 +37,8 @@
*
*/
// #define TRASH_VHOST_HDR
#define _GNU_SOURCE /* for CPU_SET() */
#include <stdio.h>
#define NETMAP_WITH_LIBS
@ -123,12 +125,14 @@ struct virt_header {
uint8_t fields[VIRT_HDR_MAX];
};
#define MAX_BODYSIZE 16384
struct pkt {
struct virt_header vh;
struct ether_header eh;
struct ip ip;
struct udphdr udp;
uint8_t body[2048]; // XXX hardwired
uint8_t body[MAX_BODYSIZE]; // XXX hardwired
} __attribute__((__packed__));
struct ip_range {
@ -144,6 +148,15 @@ struct mac_range {
/* ifname can be netmap:foo-xxxx */
#define MAX_IFNAMELEN 64 /* our buffer for ifname */
//#define MAX_PKTSIZE 1536
#define MAX_PKTSIZE MAX_BODYSIZE /* XXX: + IP_HDR + ETH_HDR */
/* compact timestamp to fit into 60 byte packet. (enough to obtain RTT) */
struct tstamp {
uint32_t sec;
uint32_t nsec;
};
/*
* global arguments for all threads
*/
@ -168,6 +181,8 @@ struct glob_arg {
#define OPT_TS 16 /* add a timestamp */
#define OPT_INDIRECT 32 /* use indirect buffers, tx only */
#define OPT_DUMP 64 /* dump rx/tx traffic */
#define OPT_MONITOR_TX 128
#define OPT_MONITOR_RX 256
int dev_type;
#ifndef NO_PCAP
pcap_t *p;
@ -179,7 +194,6 @@ struct glob_arg {
int affinity;
int main_fd;
struct nm_desc *nmd;
uint64_t nmd_flags;
int report_interval; /* milliseconds between prints */
void *(*td_body)(void *);
void *mmap_addr;
@ -309,6 +323,7 @@ sigint_h(int sig)
int i;
(void)sig; /* UNUSED */
D("received control-C on thread %p", pthread_self());
for (i = 0; i < global_nthreads; i++) {
targs[i].cancel = 1;
}
@ -642,9 +657,37 @@ initialize_packet(struct targ *targ)
eh->ether_type = htons(ETHERTYPE_IP);
bzero(&pkt->vh, sizeof(pkt->vh));
#ifdef TRASH_VHOST_HDR
/* set bogus content */
pkt->vh.fields[0] = 0xff;
pkt->vh.fields[1] = 0xff;
pkt->vh.fields[2] = 0xff;
pkt->vh.fields[3] = 0xff;
pkt->vh.fields[4] = 0xff;
pkt->vh.fields[5] = 0xff;
#endif /* TRASH_VHOST_HDR */
// dump_payload((void *)pkt, targ->g->pkt_size, NULL, 0);
}
static void
set_vnet_hdr_len(struct targ *t)
{
int err, l = t->g->virt_header;
struct nmreq req;
if (l == 0)
return;
memset(&req, 0, sizeof(req));
bcopy(t->nmd->req.nr_name, req.nr_name, sizeof(req.nr_name));
req.nr_version = NETMAP_API;
req.nr_cmd = NETMAP_BDG_VNET_HDR;
req.nr_arg1 = l;
err = ioctl(t->fd, NIOCREGIF, &req);
if (err) {
D("Unable to set vnet header length %d", l);
}
}
/*
@ -760,10 +803,13 @@ pinger_body(void *data)
if (nm_ring_empty(ring)) {
D("-- ouch, cannot send");
} else {
struct tstamp *tp;
nm_pkt_copy(frame, p, size);
clock_gettime(CLOCK_REALTIME_PRECISE, &ts);
bcopy(&sent, p+42, sizeof(sent));
bcopy(&ts, p+46, sizeof(ts));
tp = (struct tstamp *)(p+46);
tp->sec = (uint32_t)ts.tv_sec;
tp->nsec = (uint32_t)ts.tv_nsec;
sent++;
ring->head = ring->cur = nm_ring_next(ring, ring->cur);
}
@ -780,12 +826,15 @@ pinger_body(void *data)
ring = NETMAP_RXRING(nifp, i);
while (!nm_ring_empty(ring)) {
uint32_t seq;
struct tstamp *tp;
slot = &ring->slot[ring->cur];
p = NETMAP_BUF(ring, slot->buf_idx);
clock_gettime(CLOCK_REALTIME_PRECISE, &now);
bcopy(p+42, &seq, sizeof(seq));
bcopy(p+46, &ts, sizeof(ts));
tp = (struct tstamp *)(p+46);
ts.tv_sec = (time_t)tp->sec;
ts.tv_nsec = (long)tp->nsec;
ts.tv_sec = now.tv_sec - ts.tv_sec;
ts.tv_nsec = now.tv_nsec - ts.tv_nsec;
if (ts.tv_nsec < 0) {
@ -978,7 +1027,7 @@ sender_body(void *data)
{
struct targ *targ = (struct targ *) data;
struct pollfd pfd = { .fd = targ->fd, .events = POLLOUT };
struct netmap_if *nifp = targ->nmd->nifp;
struct netmap_if *nifp;
struct netmap_ring *txring;
int i, n = targ->g->npackets / targ->g->nthreads;
int64_t sent = 0;
@ -993,7 +1042,7 @@ sender_body(void *data)
frame += sizeof(pkt->vh) - targ->g->virt_header;
size = targ->g->pkt_size + targ->g->virt_header;
D("start");
D("start, fd %d main_fd %d", targ->fd, targ->g->main_fd);
if (setaffinity(targ->thread, targ->affinity))
goto quit;
@ -1035,6 +1084,7 @@ sender_body(void *data)
int tosend = 0;
int frags = targ->g->frags;
nifp = targ->nmd->nifp;
while (!targ->cancel && (n == 0 || sent < n)) {
if (rate_limit && tosend <= 0) {
@ -1088,12 +1138,17 @@ sender_body(void *data)
}
}
/* flush any remaining packets */
D("flush tail %d head %d on thread %p",
txring->tail, txring->head,
pthread_self());
ioctl(pfd.fd, NIOCTXSYNC, NULL);
/* final part: wait all the TX queues to be empty. */
for (i = targ->nmd->first_tx_ring; i <= targ->nmd->last_tx_ring; i++) {
txring = NETMAP_TXRING(nifp, i);
while (nm_tx_pending(txring)) {
RD(5, "pending tx tail %d head %d on ring %d",
txring->tail, txring->head, i);
ioctl(pfd.fd, NIOCTXSYNC, NULL);
usleep(1); /* wait 1 tick */
}
@ -1152,7 +1207,7 @@ receiver_body(void *data)
{
struct targ *targ = (struct targ *) data;
struct pollfd pfd = { .fd = targ->fd, .events = POLLIN };
struct netmap_if *nifp = targ->nmd->nifp;
struct netmap_if *nifp;
struct netmap_ring *rxring;
int i;
uint64_t received = 0;
@ -1160,21 +1215,21 @@ receiver_body(void *data)
if (setaffinity(targ->thread, targ->affinity))
goto quit;
D("reading from %s fd %d main_fd %d",
targ->g->ifname, targ->fd, targ->g->main_fd);
/* unbounded wait for the first packet. */
for (;;) {
for (;!targ->cancel;) {
i = poll(&pfd, 1, 1000);
if (i > 0 && !(pfd.revents & POLLERR))
break;
RD(1, "waiting for initial packets, poll returns %d %d",
i, pfd.revents);
}
/* main loop, exit after 1s silence */
clock_gettime(CLOCK_REALTIME_PRECISE, &targ->tic);
if (targ->g->dev_type == DEV_TAP) {
D("reading from %s fd %d", targ->g->ifname, targ->g->main_fd);
while (!targ->cancel) {
char buf[2048];
char buf[MAX_BODYSIZE];
/* XXX should we poll ? */
if (read(targ->g->main_fd, buf, sizeof(buf)) > 0)
targ->count++;
@ -1183,11 +1238,14 @@ receiver_body(void *data)
} else if (targ->g->dev_type == DEV_PCAP) {
while (!targ->cancel) {
/* XXX should we poll ? */
pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap, NULL);
pcap_dispatch(targ->g->p, targ->g->burst, receive_pcap,
(u_char *)&targ->count);
}
#endif /* !NO_PCAP */
} else {
int dump = targ->g->options & OPT_DUMP;
nifp = targ->nmd->nifp;
while (!targ->cancel) {
/* Once we started to receive packets, wait at most 1 seconds
before quitting. */
@ -1333,6 +1391,8 @@ start_threads(struct glob_arg *g)
if (g->dev_type == DEV_NETMAP) {
struct nm_desc nmd = *g->nmd; /* copy, we overwrite ringid */
uint64_t nmd_flags = 0;
nmd.self = &nmd;
if (g->nthreads > 1) {
if (nmd.req.nr_flags != NR_REG_ALL_NIC) {
@ -1344,18 +1404,23 @@ start_threads(struct glob_arg *g)
}
/* Only touch one of the rings (rx is already ok) */
if (g->td_body == receiver_body)
nmd.req.nr_ringid |= NETMAP_NO_TX_POLL;
nmd_flags |= NETMAP_NO_TX_POLL;
/* register interface. Override ifname and ringid etc. */
if (g->options & OPT_MONITOR_TX)
nmd.req.nr_flags |= NR_MONITOR_TX;
if (g->options & OPT_MONITOR_RX)
nmd.req.nr_flags |= NR_MONITOR_RX;
t->nmd = nm_open(t->g->ifname, NULL, g->nmd_flags |
NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, g->nmd);
t->nmd = nm_open(t->g->ifname, NULL, nmd_flags |
NM_OPEN_IFNAME | NM_OPEN_NO_MMAP, &nmd);
if (t->nmd == NULL) {
D("Unable to open %s: %s",
t->g->ifname, strerror(errno));
continue;
}
t->fd = t->nmd->fd;
set_vnet_hdr_len(t);
} else {
targs[i].fd = g->main_fd;
@ -1573,7 +1638,7 @@ main(int arc, char **argv)
g.virt_header = 0;
while ( (ch = getopt(arc, argv,
"a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:")) != -1) {
"a:f:F:n:i:Il:d:s:D:S:b:c:o:p:T:w:WvR:XC:H:e:m:")) != -1) {
struct sf *fn;
switch(ch) {
@ -1707,6 +1772,15 @@ main(int arc, char **argv)
case 'e': /* extra bufs */
g.extra_bufs = atoi(optarg);
break;
case 'm':
if (strcmp(optarg, "tx") == 0) {
g.options |= OPT_MONITOR_TX;
} else if (strcmp(optarg, "rx") == 0) {
g.options |= OPT_MONITOR_RX;
} else {
D("unrecognized monitor mode %s", optarg);
}
break;
}
}
@ -1723,8 +1797,8 @@ main(int arc, char **argv)
if (g.cpus == 0)
g.cpus = i;
if (g.pkt_size < 16 || g.pkt_size > 1536) {
D("bad pktsize %d\n", g.pkt_size);
if (g.pkt_size < 16 || g.pkt_size > MAX_PKTSIZE) {
D("bad pktsize %d [16..%d]\n", g.pkt_size, MAX_PKTSIZE);
usage();
}
@ -1766,26 +1840,25 @@ main(int arc, char **argv)
} else if (g.dev_type == DEV_PCAP) {
char pcap_errbuf[PCAP_ERRBUF_SIZE];
D("using pcap on %s", g.ifname);
pcap_errbuf[0] = '\0'; // init the buffer
g.p = pcap_open_live(g.ifname, 0, 1, 100, pcap_errbuf);
g.p = pcap_open_live(g.ifname, 256 /* XXX */, 1, 100, pcap_errbuf);
if (g.p == NULL) {
D("cannot open pcap on %s", g.ifname);
usage();
}
g.main_fd = pcap_fileno(g.p);
D("using pcap on %s fileno %d", g.ifname, g.main_fd);
#endif /* !NO_PCAP */
} else if (g.dummy_send) { /* but DEV_NETMAP */
D("using a dummy send routine");
} else {
struct nm_desc base_nmd;
struct nmreq base_nmd;
bzero(&base_nmd, sizeof(base_nmd));
g.nmd_flags = 0;
g.nmd_flags |= parse_nmr_config(g.nmr_config, &base_nmd.req);
parse_nmr_config(g.nmr_config, &base_nmd);
if (g.extra_bufs) {
base_nmd.req.nr_arg3 = g.extra_bufs;
g.nmd_flags |= NM_OPEN_ARG3;
base_nmd.nr_arg3 = g.extra_bufs;
}
/*
@ -1795,7 +1868,7 @@ main(int arc, char **argv)
* which in turn may take some time for the PHY to
* reconfigure. We do the open here to have time to reset.
*/
g.nmd = nm_open(g.ifname, NULL, g.nmd_flags, &base_nmd);
g.nmd = nm_open(g.ifname, &base_nmd, 0, NULL);
if (g.nmd == NULL) {
D("Unable to open %s: %s", g.ifname, strerror(errno));
goto out;
@ -1803,7 +1876,11 @@ main(int arc, char **argv)
g.main_fd = g.nmd->fd;
D("mapped %dKB at %p", g.nmd->req.nr_memsize>>10, g.nmd->mem);
devqueues = g.nmd->req.nr_rx_rings;
/* get num of queues in tx or rx */
if (g.td_body == sender_body)
devqueues = g.nmd->req.nr_tx_rings;
else
devqueues = g.nmd->req.nr_rx_rings;
/* validate provided nthreads. */
if (g.nthreads < 1 || g.nthreads > devqueues) {
@ -1819,12 +1896,14 @@ main(int arc, char **argv)
req->nr_offset, req->nr_tx_rings, req->nr_rx_rings,
req->nr_arg2);
for (i = 0; i <= req->nr_tx_rings; i++) {
D(" TX%d at 0x%lx", i,
(char *)NETMAP_TXRING(nifp, i) - (char *)nifp);
struct netmap_ring *ring = NETMAP_TXRING(nifp, i);
D(" TX%d at 0x%lx slots %d", i,
(char *)ring - (char *)nifp, ring->num_slots);
}
for (i = 0; i <= req->nr_rx_rings; i++) {
D(" RX%d at 0x%lx", i,
(char *)NETMAP_RXRING(nifp, i) - (char *)nifp);
struct netmap_ring *ring = NETMAP_RXRING(nifp, i);
D(" RX%d at 0x%lx slots %d", i,
(char *)ring - (char *)nifp, ring->num_slots);
}
}

View File

@ -38,6 +38,7 @@
#include <net/netmap.h>
#include <net/netmap_user.h>
#include <libgen.h> /* basename */
#include <stdlib.h> /* atoi, free */
/* debug support */
#define ND(format, ...) do {} while(0)
@ -45,8 +46,47 @@
fprintf(stderr, "%s [%d] " format "\n", \
__FUNCTION__, __LINE__, ##__VA_ARGS__)
/* XXX cut and paste from pkt-gen.c because I'm not sure whether this
* program may include nm_util.h
*/
void parse_nmr_config(const char* conf, struct nmreq *nmr)
{
char *w, *tok;
int i, v;
nmr->nr_tx_rings = nmr->nr_rx_rings = 0;
nmr->nr_tx_slots = nmr->nr_rx_slots = 0;
if (conf == NULL || ! *conf)
return;
w = strdup(conf);
for (i = 0, tok = strtok(w, ","); tok; i++, tok = strtok(NULL, ",")) {
v = atoi(tok);
switch (i) {
case 0:
nmr->nr_tx_slots = nmr->nr_rx_slots = v;
break;
case 1:
nmr->nr_rx_slots = v;
break;
case 2:
nmr->nr_tx_rings = nmr->nr_rx_rings = v;
break;
case 3:
nmr->nr_rx_rings = v;
break;
default:
D("ignored config: %s", tok);
break;
}
}
D("txr %d txd %d rxr %d rxd %d",
nmr->nr_tx_rings, nmr->nr_tx_slots,
nmr->nr_rx_rings, nmr->nr_rx_slots);
free(w);
}
static int
bdg_ctl(const char *name, int nr_cmd, int nr_arg)
bdg_ctl(const char *name, int nr_cmd, int nr_arg, char *nmr_config)
{
struct nmreq nmr;
int error = 0;
@ -62,8 +102,19 @@ bdg_ctl(const char *name, int nr_cmd, int nr_arg)
if (name != NULL) /* might be NULL */
strncpy(nmr.nr_name, name, sizeof(nmr.nr_name));
nmr.nr_cmd = nr_cmd;
parse_nmr_config(nmr_config, &nmr);
switch (nr_cmd) {
case NETMAP_BDG_DELIF:
case NETMAP_BDG_NEWIF:
error = ioctl(fd, NIOCREGIF, &nmr);
if (error == -1) {
ND("Unable to %s %s", nr_cmd == NETMAP_BDG_DELIF ? "delete":"create", name);
perror(name);
} else {
ND("Success to %s %s", nr_cmd == NETMAP_BDG_DELIF ? "delete":"create", name);
}
break;
case NETMAP_BDG_ATTACH:
case NETMAP_BDG_DETACH:
if (nr_arg && nr_arg != NETMAP_BDG_HOST)
@ -120,7 +171,7 @@ main(int argc, char *argv[])
{
int ch, nr_cmd = 0, nr_arg = 0;
const char *command = basename(argv[0]);
char *name = NULL;
char *name = NULL, *nmr_config = NULL;
if (argc > 3) {
usage:
@ -131,12 +182,15 @@ main(int argc, char *argv[])
"\t-d interface interface name to be detached\n"
"\t-a interface interface name to be attached\n"
"\t-h interface interface name to be attached with the host stack\n"
"\t-n interface interface name to be created\n"
"\t-r interface interface name to be deleted\n"
"\t-l list all or specified bridge's interfaces (default)\n"
"\t-C string ring/slot setting of an interface creating by -n\n"
"", command);
return 0;
}
while ((ch = getopt(argc, argv, "d:a:h:g:l")) != -1) {
while ((ch = getopt(argc, argv, "d:a:h:g:l:n:r:C:")) != -1) {
name = optarg; /* default */
switch (ch) {
default:
@ -152,6 +206,12 @@ main(int argc, char *argv[])
nr_cmd = NETMAP_BDG_ATTACH;
nr_arg = NETMAP_BDG_HOST;
break;
case 'n':
nr_cmd = NETMAP_BDG_NEWIF;
break;
case 'r':
nr_cmd = NETMAP_BDG_DELIF;
break;
case 'g':
nr_cmd = 0;
break;
@ -160,6 +220,9 @@ main(int argc, char *argv[])
if (optind < argc && argv[optind][0] == '-')
name = NULL;
break;
case 'C':
nmr_config = strdup(optarg);
break;
}
if (optind != argc) {
// fprintf(stderr, "optind %d argc %d\n", optind, argc);
@ -168,5 +231,5 @@ main(int argc, char *argv[])
}
if (argc == 1)
nr_cmd = NETMAP_BDG_LIST;
return bdg_ctl(name, nr_cmd, nr_arg) ? 1 : 0;
return bdg_ctl(name, nr_cmd, nr_arg, nmr_config) ? 1 : 0;
}