split netmap code according to functions:

- netmap.c		base code
- netmap_freebsd.c	FreeBSD-specific code
- netmap_generic.c	emulate netmap over standard drivers
- netmap_mbq.c		simple mbuf tailq
- netmap_mem2.c		memory management
- netmap_vale.c		VALE switch

simplify devce-specific code
This commit is contained in:
Luigi Rizzo 2013-12-15 08:37:24 +00:00
parent c3e51c9ce1
commit f9790aeb88
18 changed files with 5936 additions and 3573 deletions

View File

@ -1880,7 +1880,11 @@ dev/nand/nfc_if.m optional nand
dev/ncv/ncr53c500.c optional ncv
dev/ncv/ncr53c500_pccard.c optional ncv pccard
dev/netmap/netmap.c optional netmap
dev/netmap/netmap_freebsd.c optional netmap
dev/netmap/netmap_generic.c optional netmap
dev/netmap/netmap_mbq.c optional netmap
dev/netmap/netmap_mem2.c optional netmap
dev/netmap/netmap_vale.c optional netmap
# compile-with "${NORMAL_C} -Wconversion -Wextra"
dev/nge/if_nge.c optional nge
dev/nxge/if_nxge.c optional nxge \

View File

@ -26,7 +26,7 @@
/*
* $FreeBSD$
*
* netmap support for em.
* netmap support for: em.
*
* For more details on netmap support please see ixgbe_netmap.h
*/
@ -39,10 +39,6 @@
#include <dev/netmap/netmap_kern.h>
static void em_netmap_block_tasks(struct adapter *);
static void em_netmap_unblock_tasks(struct adapter *);
// XXX do we need to block/unblock the tasks ?
static void
em_netmap_block_tasks(struct adapter *adapter)
@ -85,45 +81,31 @@ em_netmap_unblock_tasks(struct adapter *adapter)
/*
* Register/unregister routine
* Register/unregister. We are already under netmap lock.
*/
static int
em_netmap_reg(struct ifnet *ifp, int onoff)
em_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
struct netmap_adapter *na = NA(ifp);
int error = 0;
if (na == NULL)
return EINVAL; /* no netmap support here */
EM_CORE_LOCK(adapter);
em_disable_intr(adapter);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
em_netmap_block_tasks(adapter);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
ifp->if_capenable |= IFCAP_NETMAP;
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_transmit;
em_init_locked(adapter);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
error = ENOMEM;
goto fail;
}
nm_set_native_flags(na);
} else {
fail:
/* return to non-netmap mode */
ifp->if_transmit = na->if_transmit;
ifp->if_capenable &= ~IFCAP_NETMAP;
em_init_locked(adapter); /* also enable intr */
nm_clear_native_flags(na);
}
em_init_locked(adapter); /* also enable intr */
em_netmap_unblock_tasks(adapter);
return (error);
EM_CORE_UNLOCK(adapter);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
@ -131,93 +113,103 @@ em_netmap_reg(struct ifnet *ifp, int onoff)
* Reconcile kernel and user view of the transmit ring.
*/
static int
em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
em_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
struct netmap_adapter *na = NA(ifp);
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n, new_slots;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = nm_txsync_prologue(kring, &new_slots);
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
k = ring->cur;
if (k > lim)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
if (cur > lim) /* error checking in nm_txsync_prologue() */
return netmap_ring_reinit(kring);
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
* Process new packets to send. j is the current index in the
* netmap ring, l is the corresponding index in the NIC ring.
* First part: process new packets to send.
*/
j = kring->nr_hwcur;
if (j != k) { /* we have new packets to send */
l = netmap_idx_k2n(kring, j);
for (n = 0; j != k; n++) {
/* slot is the current slot in the netmap ring */
struct netmap_slot *slot = &ring->slot[j];
/* curr is the current slot in the nic ring */
struct e1000_tx_desc *curr = &txr->tx_base[l];
struct em_buffer *txbuf = &txr->tx_buffers[l];
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
E1000_TXD_CMD_RS : 0;
nm_i = kring->nr_hwcur;
if (nm_i != cur) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != cur; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
u_int len = slot->len;
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
return netmap_ring_reinit(kring);
}
/* device-specific */
struct e1000_tx_desc *curr = &txr->tx_base[nic_i];
struct em_buffer *txbuf = &txr->tx_buffers[nic_i];
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
E1000_TXD_CMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
slot->flags &= ~NS_REPORT;
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(paddr);
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->upper.data = 0;
curr->lower.data = htole32(adapter->txd_cmd | len |
(E1000_TXD_CMD_EOP | flags) );
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = k; /* the saved ring->cur */
kring->nr_hwavail -= n;
kring->nr_hwcur = cur; /* the saved ring->cur */
/* decrease avail by # of packets sent minus previous ones */
kring->nr_hwavail -= new_slots;
/* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
/* (re)start the tx unit up to slot nic_i (excluded) */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
}
if (n == 0 || kring->nr_hwavail < 1) {
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
int delta;
/* record completed transmissions using TDH */
l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
if (l >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", l);
l -= kring->nkr_num_slots;
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
delta = l - txr->next_to_clean;
delta = nic_i - txr->next_to_clean;
if (delta) {
/* some completed, increment hwavail. */
if (delta < 0)
delta += kring->nkr_num_slots;
txr->next_to_clean = l;
txr->next_to_clean = nic_i;
kring->nr_hwavail += delta;
}
}
/* update avail to what the kernel knows */
ring->avail = kring->nr_hwavail;
nm_txsync_finalize(kring, cur);
return 0;
}
@ -227,19 +219,23 @@ em_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
em_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
struct netmap_adapter *na = NA(ifp);
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int j, l, n, lim = kring->nkr_num_slots - 1;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n, resvd;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
u_int k = ring->cur, resvd = ring->reserved;
k = ring->cur;
if (k > lim)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
if (cur > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
@ -247,84 +243,85 @@ em_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* Import newly received packets into the netmap ring.
* j is an index in the netmap ring, l in the NIC ring.
* First part: import newly received packets.
*/
l = rxr->next_to_check;
j = netmap_idx_n2k(kring, l);
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
struct e1000_rx_desc *curr = &rxr->rx_base[l];
struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->status);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[j].len = le16toh(curr->length);
ring->slot[j].flags = slot_flags;
bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map,
ring->slot[nm_i].len = le16toh(curr->length);
ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map,
BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
nm_i = nm_next(nm_i, lim);
/* make sure next_to_refresh follows next_to_check */
rxr->next_to_refresh = l; // XXX
l = (l == lim) ? 0 : l + 1;
rxr->next_to_refresh = nic_i; // XXX
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
rxr->next_to_check = l;
rxr->next_to_check = nic_i;
kring->nr_hwavail += n;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/* skip past packets that userspace has released */
j = kring->nr_hwcur; /* netmap ring index */
if (resvd > 0) {
if (resvd + ring->avail >= lim + 1) {
D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
ring->reserved = resvd = 0; // XXX panic...
}
k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
}
if (j != k) { /* userspace has released some packets. */
l = netmap_idx_k2n(kring, j); /* NIC ring index */
for (n = 0; j != k; n++) {
struct netmap_slot *slot = &ring->slot[j];
struct e1000_rx_desc *curr = &rxr->rx_base[l];
struct em_buffer *rxbuf = &rxr->rx_buffers[l];
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != cur) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != cur; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
if (addr == netmap_buffer_base) { /* bad buf */
return netmap_ring_reinit(kring);
}
struct e1000_rx_desc *curr = &rxr->rx_base[nic_i];
struct em_buffer *rxbuf = &rxr->rx_buffers[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
curr->buffer_addr = htole64(paddr);
/* buffer has changed, reload map */
curr->buffer_addr = htole64(paddr);
netmap_reload_map(rxr->rxtag, rxbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->status = 0;
bus_dmamap_sync(rxr->rxtag, rxbuf->map,
BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwavail -= n;
kring->nr_hwcur = k;
kring->nr_hwcur = cur;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move l back by one unit
* so move nic_i back by one unit
*/
l = (l == 0) ? lim : l - 1;
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
nic_i = (nic_i == 0) ? lim : nic_i - 1;
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
/* tell userspace that there are new packets */
/* tell userspace that there might be new packets */
ring->avail = kring->nr_hwavail - resvd;
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
@ -342,7 +339,8 @@ em_netmap_attach(struct adapter *adapter)
na.nm_txsync = em_netmap_txsync;
na.nm_rxsync = em_netmap_rxsync;
na.nm_register = em_netmap_reg;
netmap_attach(&na, adapter->num_queues);
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
netmap_attach(&na);
}
/* end of file */

View File

@ -37,88 +37,10 @@
#include <vm/pmap.h> /* vtophys ? */
#include <dev/netmap/netmap_kern.h>
/*
* register-unregister routine
* Adaptation to different versions of the driver.
*/
static int
igb_netmap_reg(struct ifnet *ifp, int onoff)
{
struct adapter *adapter = ifp->if_softc;
struct netmap_adapter *na = NA(ifp);
int error = 0;
if (na == NULL)
return EINVAL; /* no netmap support here */
igb_disable_intr(adapter);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
if (onoff) {
ifp->if_capenable |= IFCAP_NETMAP;
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_transmit;
igb_init_locked(adapter);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
error = ENOMEM;
goto fail;
}
} else {
fail:
/* restore if_transmit */
ifp->if_transmit = na->if_transmit;
ifp->if_capenable &= ~IFCAP_NETMAP;
igb_init_locked(adapter); /* also enable intr */
}
return (error);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
{
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
struct netmap_adapter *na = NA(ifp);
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
k = ring->cur;
if (k > lim)
return netmap_ring_reinit(kring);
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/* check for new packets to send.
* j indexes the netmap ring, l indexes the nic ring, and
* j = kring->nr_hwcur, l = E1000_TDT (not tracked),
* j == (l + kring->nkr_hwofs) % ring_size
*/
j = kring->nr_hwcur;
if (j != k) { /* we have new packets to send */
/* 82575 needs the queue index added */
u32 olinfo_status =
(adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
l = netmap_idx_k2n(kring, j);
for (n = 0; j != k; n++) {
/* slot is the current slot in the netmap ring */
struct netmap_slot *slot = &ring->slot[j];
/* curr is the current slot in the nic ring */
union e1000_adv_tx_desc *curr =
(union e1000_adv_tx_desc *)&txr->tx_base[l];
#ifndef IGB_MEDIA_RESET
/* at the same time as IGB_MEDIA_RESET was defined, the
* tx buffer descriptor was renamed, so use this to revert
@ -126,24 +48,95 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
*/
#define igb_tx_buf igb_tx_buffer
#endif
struct igb_tx_buf *txbuf = &txr->tx_buffers[l];
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
E1000_ADVTXD_DCMD_RS : 0;
/*
* Register/unregister. We are already under netmap lock.
*/
static int
igb_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
IGB_CORE_LOCK(adapter);
igb_disable_intr(adapter);
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
igb_init_locked(adapter); /* also enable intr */
IGB_CORE_UNLOCK(adapter);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
/*
* Reconcile kernel and user view of the transmit ring.
*/
static int
igb_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n, new_slots;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = nm_txsync_prologue(kring, &new_slots);
/* generate an interrupt approximately every half ring */
u_int report_frequency = kring->nkr_num_slots >> 1;
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
/* 82575 needs the queue index added */
u32 olinfo_status =
(adapter->hw.mac.type == e1000_82575) ? (txr->me << 4) : 0;
if (cur > lim) /* error checking in nm_txsync_prologue() */
return netmap_ring_reinit(kring);
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
if (nm_i != cur) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != cur; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
u_int len = slot->len;
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
return netmap_ring_reinit(kring);
}
/* device-specific */
union e1000_adv_tx_desc *curr =
(union e1000_adv_tx_desc *)&txr->tx_base[nic_i];
struct igb_tx_buf *txbuf = &txr->tx_buffers[nic_i];
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
E1000_ADVTXD_DCMD_RS : 0;
NM_CHECK_ADDR_LEN(addr, len);
slot->flags &= ~NS_REPORT;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->read.buffer_addr = htole64(paddr);
// XXX check olinfo and cmd_type_len
curr->read.olinfo_status =
@ -151,48 +144,56 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
(len<< E1000_ADVTXD_PAYLEN_SHIFT));
curr->read.cmd_type_len =
htole32(len | E1000_ADVTXD_DTYP_DATA |
E1000_ADVTXD_DCMD_IFCS |
E1000_ADVTXD_DCMD_DEXT |
E1000_ADVTXD_DCMD_EOP | flags);
E1000_ADVTXD_DCMD_IFCS |
E1000_ADVTXD_DCMD_DEXT |
E1000_ADVTXD_DCMD_EOP | flags);
/* make sure changes to the buffer are synced */
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = k; /* the saved ring->cur */
kring->nr_hwavail -= n;
kring->nr_hwcur = cur; /* the saved ring->cur */
/* decrease avail by # of packets sent minus previous ones */
kring->nr_hwavail -= new_slots;
/* Set the watchdog XXX ? */
txr->queue_status = IGB_QUEUE_WORKING;
txr->watchdog_time = ticks;
/* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), l);
/* (re)start the tx unit up to slot nic_i (excluded) */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), nic_i);
}
if (n == 0 || kring->nr_hwavail < 1) {
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
int delta;
/* record completed transmissions using TDH */
l = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
if (l >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", l);
l -= kring->nkr_num_slots;
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(ring_nr));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
delta = l - txr->next_to_clean;
delta = nic_i - txr->next_to_clean;
if (delta) {
/* some completed, increment hwavail. */
if (delta < 0)
delta += kring->nkr_num_slots;
txr->next_to_clean = l;
txr->next_to_clean = nic_i;
kring->nr_hwavail += delta;
}
}
/* update avail to what the kernel knows */
ring->avail = kring->nr_hwavail;
nm_txsync_finalize(kring, cur);
return 0;
}
@ -202,101 +203,107 @@ igb_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
igb_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
igb_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
struct netmap_adapter *na = NA(ifp);
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int j, l, n, lim = kring->nkr_num_slots - 1;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n, resvd;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
u_int k = ring->cur, resvd = ring->reserved;
k = ring->cur;
if (k > lim)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
if (cur > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* import newly received packets into the netmap ring.
* j is an index in the netmap ring, l in the NIC ring.
* First part: import newly received packets.
*/
l = rxr->next_to_check;
j = netmap_idx_n2k(kring, l);
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[j].len = le16toh(curr->wb.upper.length);
ring->slot[j].flags = slot_flags;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(rxr->ptag,
rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
rxr->next_to_check = l;
rxr->next_to_check = nic_i;
kring->nr_hwavail += n;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/* skip past packets that userspace has released */
j = kring->nr_hwcur; /* netmap ring index */
if (resvd > 0) {
if (resvd + ring->avail >= lim + 1) {
D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
ring->reserved = resvd = 0; // XXX panic...
}
k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
}
if (j != k) { /* userspace has released some packets. */
l = netmap_idx_k2n(kring, j);
for (n = 0; j != k; n++) {
struct netmap_slot *slot = ring->slot + j;
union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
struct igb_rx_buf *rxbuf = rxr->rx_buffers + l;
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != cur) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != cur; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
if (addr == netmap_buffer_base) { /* bad buf */
return netmap_ring_reinit(kring);
}
union e1000_adv_rx_desc *curr = &rxr->rx_base[nic_i];
struct igb_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->read.pkt_addr = htole64(paddr);
curr->wb.upper.status_error = 0;
curr->read.pkt_addr = htole64(paddr);
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
BUS_DMASYNC_PREREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwavail -= n;
kring->nr_hwcur = k;
kring->nr_hwcur = cur;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move l back by one unit
* so move nic_i back by one unit
*/
l = (l == 0) ? lim : l - 1;
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), l);
nic_i = (nic_i == 0) ? lim : nic_i - 1;
E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), nic_i);
}
/* tell userspace that there are new packets */
/* tell userspace that there might be new packets */
ring->avail = kring->nr_hwavail - resvd;
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
@ -314,6 +321,8 @@ igb_netmap_attach(struct adapter *adapter)
na.nm_txsync = igb_netmap_txsync;
na.nm_rxsync = igb_netmap_rxsync;
na.nm_register = igb_netmap_reg;
netmap_attach(&na, adapter->num_queues);
}
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
netmap_attach(&na);
}
/* end of file */

View File

@ -27,11 +27,12 @@
/*
* $FreeBSD$
*
* netmap support for "lem"
* netmap support for: lem
*
* For details on netmap support please see ixgbe_netmap.h
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
#include <vm/vm.h>
@ -40,17 +41,13 @@
/*
* Register/unregister
* Register/unregister. We are already under netmap lock.
*/
static int
lem_netmap_reg(struct ifnet *ifp, int onoff)
lem_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
struct netmap_adapter *na = NA(ifp);
int error = 0;
if (na == NULL)
return EINVAL;
EM_CORE_LOCK(adapter);
@ -64,24 +61,14 @@ lem_netmap_reg(struct ifnet *ifp, int onoff)
taskqueue_drain(adapter->tq, &adapter->rxtx_task);
taskqueue_drain(adapter->tq, &adapter->link_task);
#endif /* !EM_LEGCY_IRQ */
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
ifp->if_capenable |= IFCAP_NETMAP;
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_transmit;
lem_init_locked(adapter);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
error = ENOMEM;
goto fail;
}
nm_set_native_flags(na);
} else {
fail:
/* return to non-netmap mode */
ifp->if_transmit = na->if_transmit;
ifp->if_capenable &= ~IFCAP_NETMAP;
lem_init_locked(adapter); /* also enable intr */
nm_clear_native_flags(na);
}
lem_init_locked(adapter); /* also enable intr */
#ifndef EM_LEGACY_IRQ
taskqueue_unblock(adapter->tq); // XXX do we need this ?
@ -89,7 +76,7 @@ lem_netmap_reg(struct ifnet *ifp, int onoff)
EM_CORE_UNLOCK(adapter);
return (error);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
@ -97,108 +84,102 @@ lem_netmap_reg(struct ifnet *ifp, int onoff)
* Reconcile kernel and user view of the transmit ring.
*/
static int
lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
lem_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct adapter *adapter = ifp->if_softc;
struct netmap_adapter *na = NA(ifp);
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n, new_slots;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = nm_txsync_prologue(kring, &new_slots);
/* generate an interrupt approximately every half ring */
int report_frequency = kring->nkr_num_slots >> 1;
u_int report_frequency = kring->nkr_num_slots >> 1;
ND("%s: hwofs %d, hwcur %d hwavail %d lease %d cur %d avail %d",
ifp->if_xname,
kring->nkr_hwofs, kring->nr_hwcur, kring->nr_hwavail,
kring->nkr_hwlease,
ring->cur, ring->avail);
/* take a copy of ring->cur now, and never read it again */
k = ring->cur;
if (k > lim)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
if (cur > lim) /* error checking in nm_txsync_prologue() */
return netmap_ring_reinit(kring);
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
* Process new packets to send. j is the current index in the
* netmap ring, l is the corresponding index in the NIC ring.
* First part: process new packets to send.
*/
j = kring->nr_hwcur;
if (netmap_verbose > 255)
RD(5, "device %s send %d->%d", ifp->if_xname, j, k);
if (j != k) { /* we have new packets to send */
l = netmap_idx_k2n(kring, j);
for (n = 0; j != k; n++) {
/* slot is the current slot in the netmap ring */
struct netmap_slot *slot = &ring->slot[j];
/* curr is the current slot in the nic ring */
struct e1000_tx_desc *curr = &adapter->tx_desc_base[l];
struct em_buffer *txbuf = &adapter->tx_buffer_area[l];
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
E1000_TXD_CMD_RS : 0;
nm_i = kring->nr_hwcur;
if (nm_i != cur) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != cur; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
u_int len = slot->len;
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
return netmap_ring_reinit(kring);
}
ND("slot %d NIC %d %s", j, l, nm_dump_buf(addr, len, 128, NULL));
/* device-specific */
struct e1000_tx_desc *curr = &adapter->tx_desc_base[nic_i];
struct em_buffer *txbuf = &adapter->tx_buffer_area[nic_i];
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
E1000_TXD_CMD_RS : 0;
slot->flags &= ~NS_REPORT;
if (1 || slot->flags & NS_BUF_CHANGED) {
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(adapter->txtag, txbuf->map, addr);
curr->buffer_addr = htole64(paddr);
slot->flags &= ~NS_BUF_CHANGED;
netmap_reload_map(adapter->txtag, txbuf->map, addr);
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
curr->upper.data = 0;
curr->lower.data =
htole32( adapter->txd_cmd | len |
curr->lower.data = htole32(adapter->txd_cmd | len |
(E1000_TXD_CMD_EOP | flags) );
ND("len %d kring %d nic %d", len, j, l);
bus_dmamap_sync(adapter->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
BUS_DMASYNC_PREWRITE);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
ND("sent %d packets from %d, TDT now %d", n, kring->nr_hwcur, l);
kring->nr_hwcur = k; /* the saved ring->cur */
kring->nr_hwavail -= n;
kring->nr_hwcur = cur; /* the saved ring->cur */
/* decrease avail by # of packets sent minus previous ones */
kring->nr_hwavail -= new_slots;
/* synchronize the NIC ring */
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), l);
/* (re)start the tx unit up to slot nic_i (excluded) */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i);
}
if (n == 0 || kring->nr_hwavail < 1) {
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
int delta;
/* record completed transmissions using TDH */
l = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
ND("tdh is now %d", l);
if (l >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("bad TDH %d", l);
l -= kring->nkr_num_slots;
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
delta = l - adapter->next_tx_to_clean;
delta = nic_i - adapter->next_tx_to_clean;
if (delta) {
/* some tx completed, increment hwavail. */
/* some completed, increment hwavail. */
if (delta < 0)
delta += kring->nkr_num_slots;
if (netmap_verbose > 255)
RD(5, "%s tx recover %d bufs",
ifp->if_xname, delta);
adapter->next_tx_to_clean = l;
adapter->next_tx_to_clean = nic_i;
kring->nr_hwavail += delta;
}
}
/* update avail to what the kernel knows */
ring->avail = kring->nr_hwavail;
nm_txsync_finalize(kring, cur);
return 0;
}
@ -208,39 +189,39 @@ lem_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
lem_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct adapter *adapter = ifp->if_softc;
struct netmap_adapter *na = NA(ifp);
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
int j, l, n, lim = kring->nkr_num_slots - 1;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n, resvd;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
u_int k = ring->cur, resvd = ring->reserved;
if (k > lim)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
if (cur > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* Import newly received packets into the netmap ring.
* j is an index in the netmap ring, l in the NIC ring.
* First part: import newly received packets.
*/
l = adapter->next_rx_desc_to_check;
j = netmap_idx_n2k(kring, l);
ND("%s: next NIC %d kring %d (ofs %d), hwcur %d hwavail %d cur %d avail %d",
ifp->if_xname,
l, j, kring->nkr_hwofs, kring->nr_hwcur, kring->nr_hwavail,
ring->cur, ring->avail);
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = adapter->next_rx_desc_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
uint32_t staterr = le32toh(curr->status);
int len;
@ -248,76 +229,73 @@ lem_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
break;
len = le16toh(curr->length) - 4; // CRC
if (len < 0) {
D("bogus pkt size at %d", j);
D("bogus pkt size %d nic idx %d", len, nic_i);
len = 0;
}
ND("\n%s", nm_dump_buf(NMB(&ring->slot[j]),
len, 128, NULL));
ring->slot[j].len = len;
ring->slot[j].flags = slot_flags;
ring->slot[nm_i].len = len;
ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(adapter->rxtag,
adapter->rx_buffer_area[l].map,
BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
adapter->rx_buffer_area[nic_i].map,
BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
adapter->next_rx_desc_to_check = l;
adapter->next_rx_desc_to_check = nic_i;
// ifp->if_ipackets += n;
kring->nr_hwavail += n;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/* skip past packets that userspace has released */
j = kring->nr_hwcur; /* netmap ring index */
if (resvd > 0) {
if (resvd + ring->avail >= lim + 1) {
D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
ring->reserved = resvd = 0; // XXX panic...
}
k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
}
if (j != k) { /* userspace has released some packets. */
l = netmap_idx_k2n(kring, j); /* NIC ring index */
for (n = 0; j != k; n++) {
struct netmap_slot *slot = &ring->slot[j];
struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
struct em_buffer *rxbuf = &adapter->rx_buffer_area[l];
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != cur) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != cur; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
if (addr == netmap_buffer_base) { /* bad buf */
return netmap_ring_reinit(kring);
}
struct e1000_rx_desc *curr = &adapter->rx_desc_base[nic_i];
struct em_buffer *rxbuf = &adapter->rx_buffer_area[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(adapter->rxtag, rxbuf->map, addr);
curr->buffer_addr = htole64(paddr);
netmap_reload_map(adapter->rxtag, rxbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
curr->status = 0;
bus_dmamap_sync(adapter->rxtag, rxbuf->map,
BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwavail -= n;
kring->nr_hwcur = k;
kring->nr_hwcur = cur;
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move l back by one unit
* so move nic_i back by one unit
*/
l = (l == 0) ? lim : l - 1;
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), l);
nic_i = (nic_i == 0) ? lim : nic_i - 1;
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
}
/* tell userspace that there are new packets */
/* tell userspace that there might be new packets */
ring->avail = kring->nr_hwavail - resvd;
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
@ -335,7 +313,8 @@ lem_netmap_attach(struct adapter *adapter)
na.nm_txsync = lem_netmap_txsync;
na.nm_rxsync = lem_netmap_rxsync;
na.nm_register = lem_netmap_reg;
netmap_attach(&na, 1);
na.num_tx_rings = na.num_rx_rings = 1;
netmap_attach(&na);
}
/* end of file */

View File

@ -26,8 +26,9 @@
/*
* $FreeBSD$
*
* netmap support for "re"
* For details on netmap support please see ixgbe_netmap.h
* netmap support for: re
*
* For more details on netmap support please see ixgbe_netmap.h
*/
@ -39,44 +40,24 @@
/*
* support for netmap register/unregisted. We are already under core lock.
* only called on the first register or the last unregister.
* Register/unregister. We are already under netmap lock.
*/
static int
re_netmap_reg(struct ifnet *ifp, int onoff)
re_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct rl_softc *adapter = ifp->if_softc;
struct netmap_adapter *na = NA(ifp);
int error = 0;
if (na == NULL)
return EINVAL;
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
re_stop(adapter);
RL_LOCK(adapter);
re_stop(adapter); /* also clears IFF_DRV_RUNNING */
if (onoff) {
ifp->if_capenable |= IFCAP_NETMAP;
/* save if_transmit to restore it later */
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_transmit;
re_init_locked(adapter);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
error = ENOMEM;
goto fail;
}
nm_set_native_flags(na);
} else {
fail:
/* restore if_transmit */
ifp->if_transmit = na->if_transmit;
ifp->if_capenable &= ~IFCAP_NETMAP;
re_init_locked(adapter); /* also enables intr */
nm_clear_native_flags(na);
}
return (error);
re_init_locked(adapter); /* also enables intr */
RL_UNLOCK(adapter);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
@ -84,90 +65,107 @@ re_netmap_reg(struct ifnet *ifp, int onoff)
* Reconcile kernel and user view of the transmit ring.
*/
static int
re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
re_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct rl_softc *sc = ifp->if_softc;
struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc;
struct netmap_adapter *na = NA(sc->rl_ifp);
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
int j, k, l, n, lim = kring->nkr_num_slots - 1;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n, new_slots;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = nm_txsync_prologue(kring, &new_slots);
k = ring->cur;
if (k > lim)
/* device-specific */
struct rl_softc *sc = ifp->if_softc;
struct rl_txdesc *txd = sc->rl_ldata.rl_tx_desc;
if (cur > lim) /* error checking in nm_txsync_prologue() */
return netmap_ring_reinit(kring);
/* Sync the TX descriptor list */
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
sc->rl_ldata.rl_tx_list_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
sc->rl_ldata.rl_tx_list_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); // XXX extra postwrite ?
/* XXX move after the transmissions */
/* record completed transmissions */
for (n = 0, l = sc->rl_ldata.rl_tx_considx;
l != sc->rl_ldata.rl_tx_prodidx;
n++, l = RL_TX_DESC_NXT(sc, l)) {
uint32_t cmdstat =
le32toh(sc->rl_ldata.rl_tx_list[l].rl_cmdstat);
if (cmdstat & RL_TDESC_STAT_OWN)
break;
}
if (n > 0) {
sc->rl_ldata.rl_tx_considx = l;
sc->rl_ldata.rl_tx_free += n;
kring->nr_hwavail += n;
}
/*
* First part: process new packets to send.
*/
nm_i = kring->nr_hwcur;
if (nm_i != cur) { /* we have new packets to send */
nic_i = sc->rl_ldata.rl_tx_prodidx;
// XXX or netmap_idx_k2n(kring, nm_i);
/* update avail to what the kernel knows */
ring->avail = kring->nr_hwavail;
j = kring->nr_hwcur;
if (j != k) { /* we have new packets to send */
l = sc->rl_ldata.rl_tx_prodidx;
for (n = 0; j != k; n++) {
struct netmap_slot *slot = &ring->slot[j];
struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[l];
int cmd = slot->len | RL_TDESC_CMD_EOF |
RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ;
for (n = 0; nm_i != cur; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
int len = slot->len;
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
// XXX what about prodidx ?
return netmap_ring_reinit(kring);
}
/* device-specific */
struct rl_desc *desc = &sc->rl_ldata.rl_tx_list[nic_i];
int cmd = slot->len | RL_TDESC_CMD_EOF |
RL_TDESC_CMD_OWN | RL_TDESC_CMD_SOF ;
if (l == lim) /* mark end of ring */
NM_CHECK_ADDR_LEN(addr, len);
if (nic_i == lim) /* mark end of ring */
cmd |= RL_TDESC_CMD_EOR;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
/* buffer has changed, unload and reload map */
netmap_reload_map(sc->rl_ldata.rl_tx_mtag,
txd[l].tx_dmamap, addr);
slot->flags &= ~NS_BUF_CHANGED;
txd[nic_i].tx_dmamap, addr);
}
slot->flags &= ~NS_REPORT;
desc->rl_cmdstat = htole32(cmd);
bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
txd[l].tx_dmamap, BUS_DMASYNC_PREWRITE);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
}
sc->rl_ldata.rl_tx_prodidx = l;
kring->nr_hwcur = k; /* the saved ring->cur */
ring->avail -= n; // XXX see others
kring->nr_hwavail = ring->avail;
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
desc->rl_cmdstat = htole32(cmd);
/* make sure changes to the buffer are synced */
bus_dmamap_sync(sc->rl_ldata.rl_tx_mtag,
txd[nic_i].tx_dmamap,
BUS_DMASYNC_PREWRITE);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
sc->rl_ldata.rl_tx_prodidx = nic_i;
/* decrease avail by # of packets sent minus previous ones */
kring->nr_hwcur = cur; /* the saved ring->cur */
kring->nr_hwavail -= new_slots;
/* synchronize the NIC ring */
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
sc->rl_ldata.rl_tx_list_map,
BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
sc->rl_ldata.rl_tx_list_map,
BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
/* start ? */
CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (flags & NAF_FORCE_RECLAIM || kring->nr_hwavail < 1) {
nic_i = sc->rl_ldata.rl_tx_considx;
for (n = 0; nic_i != sc->rl_ldata.rl_tx_prodidx;
n++, nic_i = RL_TX_DESC_NXT(sc, nic_i)) {
uint32_t cmdstat =
le32toh(sc->rl_ldata.rl_tx_list[nic_i].rl_cmdstat);
if (cmdstat & RL_TDESC_STAT_OWN)
break;
}
if (n > 0) {
sc->rl_ldata.rl_tx_considx = nic_i;
sc->rl_ldata.rl_tx_free += n;
kring->nr_hwavail += n;
}
}
nm_txsync_finalize(kring, cur);
return 0;
}
@ -176,42 +174,45 @@ re_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
* Reconcile kernel and user view of the receive ring.
*/
static int
re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
re_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct rl_softc *sc = ifp->if_softc;
struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc;
struct netmap_adapter *na = NA(sc->rl_ifp);
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
int j, l, n, lim = kring->nkr_num_slots - 1;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n, resvd;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
u_int k = ring->cur, resvd = ring->reserved;
k = ring->cur;
if (k > lim)
/* device-specific */
struct rl_softc *sc = ifp->if_softc;
struct rl_rxdesc *rxd = sc->rl_ldata.rl_rx_desc;
if (cur > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
sc->rl_ldata.rl_rx_list_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
sc->rl_ldata.rl_rx_list_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* Import newly received packets into the netmap ring.
* j is an index in the netmap ring, l in the NIC ring.
* First part: import newly received packets.
*
* The device uses all the buffers in the ring, so we need
* This device uses all the buffers in the ring, so we need
* another termination condition in addition to RL_RDESC_STAT_OWN
* cleared (all buffers could have it cleared. The easiest one
* is to limit the amount of data reported up to 'lim'
*/
l = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
j = netmap_idx_n2k(kring, l); /* the kring index */
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = kring->nr_hwavail; n < lim ; n++) {
struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l];
struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[nic_i];
uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
uint32_t total_len;
@ -220,74 +221,75 @@ re_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
total_len = rxstat & sc->rl_rxlenmask;
/* XXX subtract crc */
total_len = (total_len < 4) ? 0 : total_len - 4;
kring->ring->slot[j].len = total_len;
kring->ring->slot[j].flags = slot_flags;
ring->slot[nm_i].len = total_len;
ring->slot[nm_i].flags = slot_flags;
/* sync was in re_newbuf() */
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n != kring->nr_hwavail) {
sc->rl_ldata.rl_rx_prodidx = l;
sc->rl_ldata.rl_rx_prodidx = nic_i;
sc->rl_ifp->if_ipackets += n - kring->nr_hwavail;
kring->nr_hwavail = n;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/* skip past packets that userspace has released */
j = kring->nr_hwcur;
if (resvd > 0) {
if (resvd + ring->avail >= lim + 1) {
D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
ring->reserved = resvd = 0; // XXX panic...
}
k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
}
if (j != k) { /* userspace has released some packets. */
l = netmap_idx_k2n(kring, j); /* the NIC index */
for (n = 0; j != k; n++) {
struct netmap_slot *slot = ring->slot + j;
struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[l];
int cmd = NETMAP_BUF_SIZE | RL_RDESC_CMD_OWN;
/*
* Second part: skip past packets that userspace has released.
*/
nm_i = kring->nr_hwcur;
if (nm_i != cur) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != cur; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
if (addr == netmap_buffer_base) { /* bad buf */
return netmap_ring_reinit(kring);
}
struct rl_desc *desc = &sc->rl_ldata.rl_rx_list[nic_i];
int cmd = NETMAP_BUF_SIZE | RL_RDESC_CMD_OWN;
if (l == lim) /* mark end of ring */
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (nic_i == lim) /* mark end of ring */
cmd |= RL_RDESC_CMD_EOR;
slot->flags &= ~NS_REPORT;
if (slot->flags & NS_BUF_CHANGED) {
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
rxd[l].rx_dmamap, addr);
/* buffer has changed, reload map */
desc->rl_bufaddr_lo = htole32(RL_ADDR_LO(paddr));
desc->rl_bufaddr_hi = htole32(RL_ADDR_HI(paddr));
netmap_reload_map(sc->rl_ldata.rl_rx_mtag,
rxd[nic_i].rx_dmamap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
desc->rl_cmdstat = htole32(cmd);
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
rxd[l].rx_dmamap, BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
rxd[nic_i].rx_dmamap,
BUS_DMASYNC_PREREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwavail -= n;
kring->nr_hwcur = k;
/* Flush the RX DMA ring */
kring->nr_hwcur = cur;
bus_dmamap_sync(sc->rl_ldata.rl_rx_list_tag,
sc->rl_ldata.rl_rx_list_map,
BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
}
/* tell userspace that there are new packets */
/* tell userspace that there might be new packets */
ring->avail = kring->nr_hwavail - resvd;
return 0;
ring_reset:
return netmap_ring_reinit(kring);
}
/*
* Additional routines to init the tx and rx rings.
* In other drivers we do that inline in the main code.
@ -299,11 +301,16 @@ re_netmap_tx_init(struct rl_softc *sc)
struct rl_desc *desc;
int i, n;
struct netmap_adapter *na = NA(sc->rl_ifp);
struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0);
struct netmap_slot *slot;
if (!na || !(na->na_flags & NAF_NATIVE_ON)) {
return;
}
slot = netmap_reset(na, NR_TX, 0, 0);
/* slot is NULL if we are not in netmap mode */
if (!slot)
return;
return; // XXX cannot happen
/* in netmap mode, overwrite addresses and maps */
txd = sc->rl_ldata.rl_tx_desc;
desc = sc->rl_ldata.rl_tx_list;
@ -377,6 +384,8 @@ re_netmap_attach(struct rl_softc *sc)
na.nm_txsync = re_netmap_txsync;
na.nm_rxsync = re_netmap_rxsync;
na.nm_register = re_netmap_reg;
netmap_attach(&na, 1);
na.num_tx_rings = na.num_rx_rings = 1;
netmap_attach(&na);
}
/* end of file */

View File

@ -26,16 +26,16 @@
/*
* $FreeBSD$
*
* netmap modifications for ixgbe
* netmap support for: ixgbe
*
* This file is meant to be a reference on how to implement
* netmap support for a network driver.
* This file contains code but only static or inline functions
* that are used by a single driver. To avoid replication of
* code we just #include it near the beginning of the
* standard driver.
* This file contains code but only static or inline functions used
* by a single driver. To avoid replication of code we just #include
* it near the beginning of the standard driver.
*/
#include <net/netmap.h>
#include <sys/selinfo.h>
/*
@ -48,7 +48,10 @@
*/
#include <dev/netmap/netmap_kern.h>
/*
* device-specific sysctl variables:
*
* ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
* During regular operations the CRC is stripped, but on some
* hardware reception of frames not multiple of 64 is slower,
@ -56,17 +59,11 @@
*
* ix_rx_miss, ix_rx_miss_bufs:
* count packets that might be missed due to lost interrupts.
*
* ix_use_dd
* use the dd bit for completed tx transmissions.
* This is tricky, much better to use TDH for now.
*/
SYSCTL_DECL(_dev_netmap);
static int ix_rx_miss, ix_rx_miss_bufs, ix_use_dd, ix_crcstrip;
static int ix_rx_miss, ix_rx_miss_bufs, ix_crcstrip;
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip,
CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames");
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_use_dd,
CTLFLAG_RW, &ix_use_dd, 0, "use dd instead of tdh to detect tx frames");
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr");
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
@ -110,283 +107,235 @@ set_crcstrip(struct ixgbe_hw *hw, int onoff)
IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc);
}
/*
* Register/unregister. We are already under core lock.
* Register/unregister. We are already under netmap lock.
* Only called on the first register or the last unregister.
*/
static int
ixgbe_netmap_reg(struct ifnet *ifp, int onoff)
ixgbe_netmap_reg(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;
struct netmap_adapter *na = NA(ifp);
int error = 0;
if (na == NULL)
return EINVAL; /* no netmap support here */
IXGBE_CORE_LOCK(adapter);
ixgbe_disable_intr(adapter);
ixgbe_disable_intr(adapter); // XXX maybe ixgbe_stop ?
/* Tell the stack that the interface is no longer active */
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
set_crcstrip(&adapter->hw, onoff);
if (onoff) { /* enable netmap mode */
ifp->if_capenable |= IFCAP_NETMAP;
/* save if_transmit and replace with our routine */
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_transmit;
/*
* reinitialize the adapter, now with netmap flag set,
* so the rings will be set accordingly.
*/
ixgbe_init_locked(adapter);
if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) == 0) {
error = ENOMEM;
goto fail;
}
} else { /* reset normal mode (explicit request or netmap failed) */
fail:
/* restore if_transmit */
ifp->if_transmit = na->if_transmit;
ifp->if_capenable &= ~IFCAP_NETMAP;
/* initialize the card, this time in standard mode */
ixgbe_init_locked(adapter); /* also enables intr */
/* enable or disable flags and callbacks in na and ifp */
if (onoff) {
nm_set_native_flags(na);
} else {
nm_clear_native_flags(na);
}
set_crcstrip(&adapter->hw, onoff);
ixgbe_init_locked(adapter); /* also enables intr */
set_crcstrip(&adapter->hw, onoff); // XXX why twice ?
IXGBE_CORE_UNLOCK(adapter);
return (error);
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
}
/*
* Reconcile kernel and user view of the transmit ring.
* This routine might be called frequently so it must be efficient.
*
* ring->cur holds the userspace view of the current ring index. Userspace
* has filled the tx slots from the previous call's ring->cur up to but not
* including ring->cur for this call. In this function the kernel updates
* kring->nr_hwcur to ring->cur, thus slots [kring->nr_hwcur, ring->cur) are
* now ready to transmit. At the last interrupt kring->nr_hwavail slots were
* available.
* Userspace wants to send packets up to the one before ring->cur,
* kernel knows kring->nr_hwcur is the first unsent packet.
*
* This function runs under lock (acquired from the caller or internally).
* It must first update ring->avail to what the kernel knows,
* subtract the newly used slots (ring->cur - kring->nr_hwcur)
* from both avail and nr_hwavail, and set ring->nr_hwcur = ring->cur
* issuing a dmamap_sync on all slots.
* Here we push packets out (as many as possible), and possibly
* reclaim buffers from previously completed transmission.
*
* Since ring comes from userspace, its content must be read only once,
* and validated before being used to update the kernel's structures.
* (this is also true for every use of ring in the kernel).
* ring->avail is not used on input, but it is updated on return.
*
* ring->avail is never used, only checked for bogus values.
*
* I flags & FORCE_RECLAIM, reclaim transmitted
* buffers irrespective of interrupt mitigation.
* The caller (netmap) guarantees that there is only one instance
* running at any time. Any interference with other driver
* methods should be handled by the individual drivers.
*/
static int
ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
ixgbe_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
struct netmap_adapter *na = NA(adapter->ifp);
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int j, l, n = 0;
u_int const k = ring->cur, lim = kring->nkr_num_slots - 1;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n, new_slots;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = nm_txsync_prologue(kring, &new_slots);
/*
* ixgbe can generate an interrupt on every tx packet, but it
* seems very expensive, so we interrupt once every half ring,
* or when requested with NS_REPORT
* interrupts on every tx packet are expensive so request
* them every half ring, or where NS_REPORT is set
*/
u_int report_frequency = kring->nkr_num_slots >> 1;
if (k > lim)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct tx_ring *txr = &adapter->tx_rings[ring_nr];
int reclaim_tx;
if (cur > lim) /* error checking in nm_txsync_prologue() */
return netmap_ring_reinit(kring);
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
/*
* Process new packets to send. j is the current index in the
* netmap ring, l is the corresponding index in the NIC ring.
* First part: process new packets to send.
* nm_i is the current index in the netmap ring,
* nic_i is the corresponding index in the NIC ring.
* The two numbers differ because upon a *_init() we reset
* the NIC ring but leave the netmap ring unchanged.
* For the transmit ring, we have
*
* j = kring->nr_hwcur
* l = IXGBE_TDT (not tracked in the driver)
* nm_i = kring->nr_hwcur
* nic_i = IXGBE_TDT (not tracked in the driver)
* and
* j == (l + kring->nkr_hwofs) % ring_size
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*
* In this driver kring->nkr_hwofs >= 0, but for other
* drivers it might be negative as well.
*/
j = kring->nr_hwcur;
if (j != k) { /* we have new packets to send */
prefetch(&ring->slot[j]);
l = netmap_idx_k2n(kring, j); /* NIC index */
prefetch(&txr->tx_buffers[l]);
for (n = 0; j != k; n++) {
/*
* Collect per-slot info.
* Note that txbuf and curr are indexed by l.
*
* In this driver we collect the buffer address
* (using the PNMB() macro) because we always
* need to rewrite it into the NIC ring.
* Many other drivers preserve the address, so
* we only need to access it if NS_BUF_CHANGED
* is set.
* XXX note, on this device the dmamap* calls are
* not necessary because tag is 0, however just accessing
* the per-packet tag kills 1Mpps at 900 MHz.
*/
struct netmap_slot *slot = &ring->slot[j];
union ixgbe_adv_tx_desc *curr = &txr->tx_base[l];
struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[l];
uint64_t paddr;
// XXX type for flags and len ?
int flags = ((slot->flags & NS_REPORT) ||
j == 0 || j == report_frequency) ?
IXGBE_TXD_CMD_RS : 0;
/*
* If we have packets to send (kring->nr_hwcur != ring->cur)
* iterate over the netmap ring, fetch length and update
* the corresponding slot in the NIC ring. Some drivers also
* need to update the buffer's physical address in the NIC slot
* even NS_BUF_CHANGED is not set (PNMB computes the addresses).
*
* The netmap_reload_map() calls is especially expensive,
* even when (as in this case) the tag is 0, so do only
* when the buffer has actually changed.
*
* If possible do not set the report/intr bit on all slots,
* but only a few times per ring or when NS_REPORT is set.
*
* Finally, on 10G and faster drivers, it might be useful
* to prefetch the next slot and txr entry.
*/
nm_i = kring->nr_hwcur;
if (nm_i != cur) { /* we have new packets to send */
nic_i = netmap_idx_k2n(kring, nm_i);
prefetch(&ring->slot[nm_i]);
prefetch(&txr->tx_buffers[nic_i]);
for (n = 0; nm_i != cur; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
u_int len = slot->len;
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
prefetch(&ring->slot[j]);
prefetch(&txr->tx_buffers[l]);
/* device-specific */
union ixgbe_adv_tx_desc *curr = &txr->tx_base[nic_i];
struct ixgbe_tx_buf *txbuf = &txr->tx_buffers[nic_i];
int flags = (slot->flags & NS_REPORT ||
nic_i == 0 || nic_i == report_frequency) ?
IXGBE_TXD_CMD_RS : 0;
/*
* Quick check for valid addr and len.
* NMB() returns netmap_buffer_base for invalid
* buffer indexes (but the address is still a
* valid one to be used in a ring). slot->len is
* unsigned so no need to check for negative values.
*/
if (addr == netmap_buffer_base || len > NETMAP_BUF_SIZE) {
ring_reset:
return netmap_ring_reinit(kring);
}
/* prefetch for next round */
prefetch(&ring->slot[nm_i + 1]);
prefetch(&txr->tx_buffers[nic_i + 1]);
NM_CHECK_ADDR_LEN(addr, len);
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, unload and reload map */
/* buffer has changed, reload map */
netmap_reload_map(txr->txtag, txbuf->map, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
slot->flags &= ~NS_REPORT;
/*
* Fill the slot in the NIC ring.
* In this driver we need to rewrite the buffer
* address in the NIC ring. Other drivers do not
* need this.
* Use legacy descriptor, it is faster.
*/
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
/* Fill the slot in the NIC ring. */
/* Use legacy descriptor, they are faster? */
curr->read.buffer_addr = htole64(paddr);
curr->read.olinfo_status = 0;
curr->read.cmd_type_len = htole32(len | flags |
IXGBE_ADVTXD_DCMD_IFCS | IXGBE_TXD_CMD_EOP);
/* make sure changes to the buffer are synced */
bus_dmamap_sync(txr->txtag, txbuf->map, BUS_DMASYNC_PREWRITE);
bus_dmamap_sync(txr->txtag, txbuf->map,
BUS_DMASYNC_PREWRITE);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwcur = k; /* the saved ring->cur */
/* decrease avail by number of packets sent */
kring->nr_hwavail -= n;
kring->nr_hwcur = cur; /* the saved ring->cur */
/* decrease avail by # of packets sent minus previous ones */
kring->nr_hwavail -= new_slots;
/* synchronize the NIC ring */
bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* (re)start the transmitter up to slot l (excluded) */
IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), l);
/* (re)start the tx unit up to slot nic_i (excluded) */
IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), nic_i);
}
/*
* Reclaim buffers for completed transmissions.
* Second part: reclaim buffers for completed transmissions.
* Because this is expensive (we read a NIC register etc.)
* we only do it in specific cases (see below).
* In all cases kring->nr_kflags indicates which slot will be
* checked upon a tx interrupt (nkr_num_slots means none).
*/
if (flags & NAF_FORCE_RECLAIM) {
j = 1; /* forced reclaim, ignore interrupts */
kring->nr_kflags = kring->nkr_num_slots;
reclaim_tx = 1; /* forced reclaim */
} else if (kring->nr_hwavail > 0) {
j = 0; /* buffers still available: no reclaim, ignore intr. */
kring->nr_kflags = kring->nkr_num_slots;
reclaim_tx = 0; /* have buffers, no reclaim */
} else {
/*
* no buffers available, locate a slot for which we request
* ReportStatus (approximately half ring after next_to_clean)
* and record it in kring->nr_kflags.
* If the slot has DD set, do the reclaim looking at TDH,
* otherwise we go to sleep (in netmap_poll()) and will be
* woken up when slot nr_kflags will be ready.
* No buffers available. Locate previous slot with
* REPORT_STATUS set.
* If the slot has DD set, we can reclaim space,
* otherwise wait for the next interrupt.
* This enables interrupt moderation on the tx
* side though it might reduce throughput.
*/
struct ixgbe_legacy_tx_desc *txd =
(struct ixgbe_legacy_tx_desc *)txr->tx_base;
j = txr->next_to_clean + kring->nkr_num_slots/2;
if (j >= kring->nkr_num_slots)
j -= kring->nkr_num_slots;
nic_i = txr->next_to_clean + report_frequency;
if (nic_i > lim)
nic_i -= lim + 1;
// round to the closest with dd set
j= (j < kring->nkr_num_slots / 4 || j >= kring->nkr_num_slots*3/4) ?
nic_i = (nic_i < kring->nkr_num_slots / 4 ||
nic_i >= kring->nkr_num_slots*3/4) ?
0 : report_frequency;
kring->nr_kflags = j; /* the slot to check */
j = txd[j].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ?
reclaim_tx = txd[nic_i].upper.fields.status & IXGBE_TXD_STAT_DD; // XXX cpu_to_le32 ?
}
if (j) {
int delta;
if (reclaim_tx) {
/*
* Record completed transmissions.
* We (re)use the driver's txr->next_to_clean to keep
* track of the most recently completed transmission.
*
* The datasheet discourages the use of TDH to find out the
* number of sent packets. We should rather check the DD
* status bit in a packet descriptor. However, we only set
* the "report status" bit for some descriptors (a kind of
* interrupt mitigation), so we can only check on those.
* For the time being we use TDH, as we do it infrequently
* enough not to pose performance problems.
* The datasheet discourages the use of TDH to find
* out the number of sent packets, but we only set
* REPORT_STATUS in a few slots so TDH is the only
* good way.
*/
if (ix_use_dd) {
struct ixgbe_legacy_tx_desc *txd =
(struct ixgbe_legacy_tx_desc *)txr->tx_base;
u_int k1 = netmap_idx_k2n(kring, kring->nr_hwcur);
l = txr->next_to_clean;
delta = 0;
while (l != k1 &&
txd[l].upper.fields.status & IXGBE_TXD_STAT_DD) {
delta++;
l = (l == lim) ? 0 : l + 1;
nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
}
} else {
l = IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(ring_nr));
if (l >= kring->nkr_num_slots) { /* XXX can happen */
D("TDH wrap %d", l);
l -= kring->nkr_num_slots;
}
delta = l - txr->next_to_clean;
}
if (delta) {
if (nic_i != txr->next_to_clean) {
n = (nic_i + lim + 1) - txr->next_to_clean;
if (n > lim)
n -= lim + 1;
/* some tx completed, increment avail */
if (delta < 0)
delta += kring->nkr_num_slots;
txr->next_to_clean = l;
kring->nr_hwavail += delta;
if (kring->nr_hwavail > lim)
goto ring_reset;
txr->next_to_clean = nic_i;
kring->nr_hwavail += n;
if (kring->nr_hwavail > lim) {
RD(5, "bad hwavail %d",
kring->nr_hwavail);
return netmap_ring_reinit(kring);
}
}
}
/* update avail to what the kernel knows */
ring->avail = kring->nr_hwavail;
nm_txsync_finalize(kring, cur);
return 0;
}
@ -394,11 +343,12 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
/*
* Reconcile kernel and user view of the receive ring.
* Same as for the txsync, this routine must be efficient and
* avoid races in accessing the shared regions.
* Same as for the txsync, this routine must be efficient.
* The caller guarantees a single invocations, but races against
* the rest of the driver should be handled here.
*
* When called, userspace has read data from slots kring->nr_hwcur
* up to ring->cur (excluded).
* When called, userspace has released buffers up to
* ring->cur - ring->reserved (last one excluded).
*
* The last interrupt reported kring->nr_hwavail slots available
* after kring->nr_hwcur.
@ -410,18 +360,23 @@ ixgbe_netmap_txsync(struct ifnet *ifp, u_int ring_nr, int flags)
* of whether or not we received an interrupt.
*/
static int
ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
ixgbe_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
struct netmap_adapter *na = NA(adapter->ifp);
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int j, l, n, lim = kring->nkr_num_slots - 1;
u_int nm_i; /* index into the netmap ring */
u_int nic_i; /* index into the NIC ring */
u_int n, resvd;
u_int const lim = kring->nkr_num_slots - 1;
u_int const cur = nm_rxsync_prologue(kring, &resvd); /* cur + res */
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
u_int k = ring->cur, resvd = ring->reserved;
if (k > lim)
/* device-specific */
struct adapter *adapter = ifp->if_softc;
struct rx_ring *rxr = &adapter->rx_rings[ring_nr];
if (cur > lim)
return netmap_ring_reinit(kring);
/* XXX check sync modes */
@ -429,17 +384,17 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
/*
* First part, import newly received packets into the netmap ring.
* First part: import newly received packets.
*
* j is the index of the next free slot in the netmap ring,
* and l is the index of the next received packet in the NIC ring,
* nm_i is the index of the next free slot in the netmap ring,
* nic_i is the index of the next received packet in the NIC ring,
* and they may differ in case if_init() has been called while
* in netmap mode. For the receive ring we have
*
* j = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
* l = rxr->next_to_check;
* nm_i = (kring->nr_hwcur + kring->nr_hwavail) % ring_size
* nic_i = rxr->next_to_check;
* and
* j == (l + kring->nkr_hwofs) % ring_size
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*
* rxr->next_to_check is set to 0 on a ring reinit
*/
@ -447,21 +402,21 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
int crclen = ix_crcstrip ? 0 : 4;
uint16_t slot_flags = kring->nkr_slot_flags;
l = rxr->next_to_check;
j = netmap_idx_n2k(kring, l);
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
for (n = 0; ; n++) {
union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
if ((staterr & IXGBE_RXD_STAT_DD) == 0)
break;
ring->slot[j].len = le16toh(curr->wb.upper.length) - crclen;
ring->slot[j].flags = slot_flags;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
ring->slot[nm_i].flags = slot_flags;
bus_dmamap_sync(rxr->ptag,
rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
if (netmap_no_pendintr && !force_update) {
@ -469,48 +424,36 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
ix_rx_miss ++;
ix_rx_miss_bufs += n;
}
rxr->next_to_check = l;
rxr->next_to_check = nic_i;
kring->nr_hwavail += n;
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
/*
* Skip past packets that userspace has released
* (from kring->nr_hwcur to ring->cur - ring->reserved excluded),
* Second part: skip past packets that userspace has released.
* (kring->nr_hwcur to ring->cur - ring->reserved excluded),
* and make the buffers available for reception.
* As usual j is the index in the netmap ring, l is the index
* in the NIC ring, and j == (l + kring->nkr_hwofs) % ring_size
* As usual nm_i is the index in the netmap ring,
* nic_i is the index in the NIC ring, and
* nm_i == (nic_i + kring->nkr_hwofs) % ring_size
*/
j = kring->nr_hwcur;
if (resvd > 0) {
if (resvd + ring->avail >= lim + 1) {
D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
ring->reserved = resvd = 0; // XXX panic...
}
k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
}
if (j != k) { /* userspace has released some packets. */
l = netmap_idx_k2n(kring, j);
for (n = 0; j != k; n++) {
/* collect per-slot info, with similar validations
* and flag handling as in the txsync code.
*
* NOTE curr and rxbuf are indexed by l.
* Also, this driver needs to update the physical
* address in the NIC ring, but other drivers
* may not have this requirement.
*/
struct netmap_slot *slot = &ring->slot[j];
union ixgbe_adv_rx_desc *curr = &rxr->rx_base[l];
struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[l];
nm_i = kring->nr_hwcur;
if (nm_i != cur) {
nic_i = netmap_idx_k2n(kring, nm_i);
for (n = 0; nm_i != cur; n++) {
struct netmap_slot *slot = &ring->slot[nm_i];
uint64_t paddr;
void *addr = PNMB(slot, &paddr);
union ixgbe_adv_rx_desc *curr = &rxr->rx_base[nic_i];
struct ixgbe_rx_buf *rxbuf = &rxr->rx_buffers[nic_i];
if (addr == netmap_buffer_base) /* bad buf */
goto ring_reset;
if (slot->flags & NS_BUF_CHANGED) {
/* buffer has changed, reload map */
netmap_reload_map(rxr->ptag, rxbuf->pmap, addr);
slot->flags &= ~NS_BUF_CHANGED;
}
@ -518,20 +461,23 @@ ixgbe_netmap_rxsync(struct ifnet *ifp, u_int ring_nr, int flags)
curr->read.pkt_addr = htole64(paddr);
bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
BUS_DMASYNC_PREREAD);
j = (j == lim) ? 0 : j + 1;
l = (l == lim) ? 0 : l + 1;
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
kring->nr_hwavail -= n;
kring->nr_hwcur = k;
kring->nr_hwcur = cur;
bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
/* IMPORTANT: we must leave one free slot in the ring,
* so move l back by one unit
/*
* IMPORTANT: we must leave one free slot in the ring,
* so move nic_i back by one unit
*/
l = (l == 0) ? lim : l - 1;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), l);
nic_i = (nic_i == 0) ? lim : nic_i - 1;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_RDT(rxr->me), nic_i);
}
/* tell userspace that there are new packets */
/* tell userspace that there might be new packets */
ring->avail = kring->nr_hwavail - resvd;
return 0;
@ -562,7 +508,8 @@ ixgbe_netmap_attach(struct adapter *adapter)
na.nm_txsync = ixgbe_netmap_txsync;
na.nm_rxsync = ixgbe_netmap_rxsync;
na.nm_register = ixgbe_netmap_reg;
netmap_attach(&na, adapter->num_queues);
}
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
netmap_attach(&na);
}
/* end of file */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,410 @@
/*
* Copyright (C) 2013 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* $FreeBSD$ */
#include <sys/types.h>
#include <sys/module.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/conf.h> /* DEV_MODULE */
#include <sys/rwlock.h>
#include <vm/vm.h> /* vtophys */
#include <vm/pmap.h> /* vtophys */
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/uma.h>
#include <sys/malloc.h>
#include <sys/socket.h> /* sockaddrs */
#include <sys/selinfo.h>
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* */
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
/*
* Intercept the rx routine in the standard device driver.
* Second argument is non-zero to intercept, 0 to restore
*/
int
netmap_catch_rx(struct netmap_adapter *na, int intercept)
{
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
struct ifnet *ifp = na->ifp;
if (intercept) {
if (gna->save_if_input) {
D("cannot intercept again");
return EINVAL; /* already set */
}
gna->save_if_input = ifp->if_input;
ifp->if_input = generic_rx_handler;
} else {
if (!gna->save_if_input){
D("cannot restore");
return EINVAL; /* not saved */
}
ifp->if_input = gna->save_if_input;
gna->save_if_input = NULL;
}
return 0;
}
/*
* Intercept the packet steering routine in the tx path,
* so that we can decide which queue is used for an mbuf.
* Second argument is non-zero to intercept, 0 to restore.
*
* XXX see if FreeBSD has such a mechanism
*/
void
netmap_catch_packet_steering(struct netmap_generic_adapter *na, int enable)
{
if (enable) {
} else {
}
}
/* Transmit routine used by generic_netmap_txsync(). Returns 0 on success
* and non-zero on error (which may be packet drops or other errors).
* addr and len identify the netmap buffer, m is the (preallocated)
* mbuf to use for transmissions.
*
* We should add a reference to the mbuf so the m_freem() at the end
* of the transmission does not consume resources.
*
* On FreeBSD, and on multiqueue cards, we can force the queue using
* if ((m->m_flags & M_FLOWID) != 0)
* i = m->m_pkthdr.flowid % adapter->num_queues;
* else
* i = curcpu % adapter->num_queues;
*
*/
int
generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
void *addr, u_int len, u_int ring_nr)
{
int ret;
m->m_len = m->m_pkthdr.len = 0;
// copy data to the mbuf
m_copyback(m, 0, len, addr);
// inc refcount. We are alone, so we can skip the atomic
atomic_fetchadd_int(m->m_ext.ref_cnt, 1);
m->m_flags |= M_FLOWID;
m->m_pkthdr.flowid = ring_nr;
m->m_pkthdr.rcvif = ifp; /* used for tx notification */
ret = ifp->if_transmit(ifp, m);
return ret;
}
/*
* The following two functions are empty until we have a generic
* way to extract the info from the ifp
*/
int
generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
{
D("called");
return 0;
}
void
generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
{
D("called");
*txq = 1;
*rxq = 1;
}
void netmap_mitigation_init(struct netmap_generic_adapter *na)
{
ND("called");
na->mit_pending = 0;
}
void netmap_mitigation_start(struct netmap_generic_adapter *na)
{
ND("called");
}
void netmap_mitigation_restart(struct netmap_generic_adapter *na)
{
ND("called");
}
int netmap_mitigation_active(struct netmap_generic_adapter *na)
{
ND("called");
return 0;
}
void netmap_mitigation_cleanup(struct netmap_generic_adapter *na)
{
ND("called");
}
/*
* In order to track whether pages are still mapped, we hook into
* the standard cdev_pager and intercept the constructor and
* destructor.
*/
struct netmap_vm_handle_t {
struct cdev *dev;
struct netmap_priv_d *priv;
};
static int
netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
vm_ooffset_t foff, struct ucred *cred, u_short *color)
{
struct netmap_vm_handle_t *vmh = handle;
D("handle %p size %jd prot %d foff %jd",
handle, (intmax_t)size, prot, (intmax_t)foff);
dev_ref(vmh->dev);
return 0;
}
static void
netmap_dev_pager_dtor(void *handle)
{
struct netmap_vm_handle_t *vmh = handle;
struct cdev *dev = vmh->dev;
struct netmap_priv_d *priv = vmh->priv;
D("handle %p", handle);
netmap_dtor(priv);
free(vmh, M_DEVBUF);
dev_rel(dev);
}
static int
netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
int prot, vm_page_t *mres)
{
struct netmap_vm_handle_t *vmh = object->handle;
struct netmap_priv_d *priv = vmh->priv;
vm_paddr_t paddr;
vm_page_t page;
vm_memattr_t memattr;
vm_pindex_t pidx;
ND("object %p offset %jd prot %d mres %p",
object, (intmax_t)offset, prot, mres);
memattr = object->memattr;
pidx = OFF_TO_IDX(offset);
paddr = netmap_mem_ofstophys(priv->np_mref, offset);
if (paddr == 0)
return VM_PAGER_FAIL;
if (((*mres)->flags & PG_FICTITIOUS) != 0) {
/*
* If the passed in result page is a fake page, update it with
* the new physical address.
*/
page = *mres;
vm_page_updatefake(page, paddr, memattr);
} else {
/*
* Replace the passed in reqpage page with our own fake page and
* free up the all of the original pages.
*/
#ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */
#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
#define VM_OBJECT_WLOCK VM_OBJECT_LOCK
#endif /* VM_OBJECT_WUNLOCK */
VM_OBJECT_WUNLOCK(object);
page = vm_page_getfake(paddr, memattr);
VM_OBJECT_WLOCK(object);
vm_page_lock(*mres);
vm_page_free(*mres);
vm_page_unlock(*mres);
*mres = page;
vm_page_insert(page, object, pidx);
}
page->valid = VM_PAGE_BITS_ALL;
return (VM_PAGER_OK);
}
static struct cdev_pager_ops netmap_cdev_pager_ops = {
.cdev_pg_ctor = netmap_dev_pager_ctor,
.cdev_pg_dtor = netmap_dev_pager_dtor,
.cdev_pg_fault = netmap_dev_pager_fault,
};
static int
netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
vm_size_t objsize, vm_object_t *objp, int prot)
{
int error;
struct netmap_vm_handle_t *vmh;
struct netmap_priv_d *priv;
vm_object_t obj;
D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
(intmax_t )*foff, (intmax_t )objsize, objp, prot);
vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
M_NOWAIT | M_ZERO);
if (vmh == NULL)
return ENOMEM;
vmh->dev = cdev;
NMG_LOCK();
error = devfs_get_cdevpriv((void**)&priv);
if (error)
goto err_unlock;
vmh->priv = priv;
priv->np_refcount++;
NMG_UNLOCK();
error = netmap_get_memory(priv);
if (error)
goto err_deref;
obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
&netmap_cdev_pager_ops, objsize, prot,
*foff, NULL);
if (obj == NULL) {
D("cdev_pager_allocate failed");
error = EINVAL;
goto err_deref;
}
*objp = obj;
return 0;
err_deref:
NMG_LOCK();
priv->np_refcount--;
err_unlock:
NMG_UNLOCK();
// err:
free(vmh, M_DEVBUF);
return error;
}
// XXX can we remove this ?
static int
netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
{
if (netmap_verbose)
D("dev %p fflag 0x%x devtype %d td %p",
dev, fflag, devtype, td);
return 0;
}
static int
netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
struct netmap_priv_d *priv;
int error;
(void)dev;
(void)oflags;
(void)devtype;
(void)td;
// XXX wait or nowait ?
priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
M_NOWAIT | M_ZERO);
if (priv == NULL)
return ENOMEM;
error = devfs_set_cdevpriv(priv, netmap_dtor);
if (error)
return error;
priv->np_refcount = 1;
return 0;
}
struct cdevsw netmap_cdevsw = {
.d_version = D_VERSION,
.d_name = "netmap",
.d_open = netmap_open,
.d_mmap_single = netmap_mmap_single,
.d_ioctl = netmap_ioctl,
.d_poll = netmap_poll,
.d_close = netmap_close,
};
/*
* Kernel entry point.
*
* Initialize/finalize the module and return.
*
* Return 0 on success, errno on failure.
*/
static int
netmap_loader(__unused struct module *module, int event, __unused void *arg)
{
int error = 0;
switch (event) {
case MOD_LOAD:
error = netmap_init();
break;
case MOD_UNLOAD:
netmap_fini();
break;
default:
error = EOPNOTSUPP;
break;
}
return (error);
}
DEV_MODULE(netmap, netmap_loader, NULL);

View File

@ -0,0 +1,818 @@
/*
* Copyright (C) 2013 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* This module implements netmap support on top of standard,
* unmodified device drivers.
*
* A NIOCREGIF request is handled here if the device does not
* have native support. TX and RX rings are emulated as follows:
*
* NIOCREGIF
* We preallocate a block of TX mbufs (roughly as many as
* tx descriptors; the number is not critical) to speed up
* operation during transmissions. The refcount on most of
* these buffers is artificially bumped up so we can recycle
* them more easily. Also, the destructor is intercepted
* so we use it as an interrupt notification to wake up
* processes blocked on a poll().
*
* For each receive ring we allocate one "struct mbq"
* (an mbuf tailq plus a spinlock). We intercept packets
* (through if_input)
* on the receive path and put them in the mbq from which
* netmap receive routines can grab them.
*
* TX:
* in the generic_txsync() routine, netmap buffers are copied
* (or linked, in a future) to the preallocated mbufs
* and pushed to the transmit queue. Some of these mbufs
* (those with NS_REPORT, or otherwise every half ring)
* have the refcount=1, others have refcount=2.
* When the destructor is invoked, we take that as
* a notification that all mbufs up to that one in
* the specific ring have been completed, and generate
* the equivalent of a transmit interrupt.
*
* RX:
*
*/
#ifdef __FreeBSD__
#include <sys/cdefs.h> /* prerequisite */
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/lock.h> /* PROT_EXEC */
#include <sys/rwlock.h>
#include <sys/socket.h> /* sockaddrs */
#include <sys/selinfo.h>
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */
// XXX temporary - D() defined here
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
#define rtnl_lock() D("rtnl_lock called");
#define rtnl_unlock() D("rtnl_lock called");
#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid)
#define smp_mb()
/*
* mbuf wrappers
*/
/*
* we allocate an EXT_PACKET
*/
#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
/* mbuf destructor, also need to change the type to EXT_EXTREF,
* add an M_NOFREE flag, and then clear the flag and
* chain into uma_zfree(zone_pack, mf)
* (or reinstall the buffer ?)
*/
#define SET_MBUF_DESTRUCTOR(m, fn) do { \
(m)->m_ext.ext_free = (void *)fn; \
(m)->m_ext.ext_type = EXT_EXTREF; \
} while (0)
#define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
#else /* linux */
#include "bsd_glue.h"
#include <linux/rtnetlink.h> /* rtnl_[un]lock() */
#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */
#include <linux/hrtimer.h>
//#define RATE /* Enables communication statistics. */
//#define REG_RESET
#endif /* linux */
/* Common headers. */
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
/* ======================== usage stats =========================== */
#ifdef RATE
#define IFRATE(x) x
struct rate_stats {
unsigned long txpkt;
unsigned long txsync;
unsigned long txirq;
unsigned long rxpkt;
unsigned long rxirq;
unsigned long rxsync;
};
struct rate_context {
unsigned refcount;
struct timer_list timer;
struct rate_stats new;
struct rate_stats old;
};
#define RATE_PRINTK(_NAME_) \
printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
#define RATE_PERIOD 2
static void rate_callback(unsigned long arg)
{
struct rate_context * ctx = (struct rate_context *)arg;
struct rate_stats cur = ctx->new;
int r;
RATE_PRINTK(txpkt);
RATE_PRINTK(txsync);
RATE_PRINTK(txirq);
RATE_PRINTK(rxpkt);
RATE_PRINTK(rxsync);
RATE_PRINTK(rxirq);
printk("\n");
ctx->old = cur;
r = mod_timer(&ctx->timer, jiffies +
msecs_to_jiffies(RATE_PERIOD * 1000));
if (unlikely(r))
D("[v1000] Error: mod_timer()");
}
static struct rate_context rate_ctx;
#else /* !RATE */
#define IFRATE(x)
#endif /* !RATE */
/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
#define GENERIC_BUF_SIZE netmap_buf_size /* Size of the mbufs in the Tx pool. */
/*
* Wrapper used by the generic adapter layer to notify
* the poller threads. Differently from netmap_rx_irq(), we check
* only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq.
*/
static void
netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
{
if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
return;
netmap_common_irq(ifp, q, work_done);
}
/* Enable/disable netmap mode for a generic network interface. */
int generic_netmap_register(struct netmap_adapter *na, int enable)
{
struct ifnet *ifp = na->ifp;
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
struct mbuf *m;
int error;
int i, r;
if (!na)
return EINVAL;
#ifdef REG_RESET
error = ifp->netdev_ops->ndo_stop(ifp);
if (error) {
return error;
}
#endif /* REG_RESET */
if (enable) { /* Enable netmap mode. */
/* Initialize the rx queue, as generic_rx_handler() can
* be called as soon as netmap_catch_rx() returns.
*/
for (r=0; r<na->num_rx_rings; r++) {
mbq_safe_init(&na->rx_rings[r].rx_queue);
na->rx_rings[r].nr_ntc = 0;
}
/* Init the mitigation timer. */
netmap_mitigation_init(gna);
/*
* Preallocate packet buffers for the tx rings.
*/
for (r=0; r<na->num_tx_rings; r++) {
na->tx_rings[r].nr_ntc = 0;
na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
M_DEVBUF, M_NOWAIT | M_ZERO);
if (!na->tx_rings[r].tx_pool) {
D("tx_pool allocation failed");
error = ENOMEM;
goto free_tx_pool;
}
for (i=0; i<na->num_tx_desc; i++) {
m = netmap_get_mbuf(GENERIC_BUF_SIZE);
if (!m) {
D("tx_pool[%d] allocation failed", i);
error = ENOMEM;
goto free_mbufs;
}
na->tx_rings[r].tx_pool[i] = m;
}
}
rtnl_lock();
/* Prepare to intercept incoming traffic. */
error = netmap_catch_rx(na, 1);
if (error) {
D("netdev_rx_handler_register() failed");
goto register_handler;
}
ifp->if_capenable |= IFCAP_NETMAP;
/* Make netmap control the packet steering. */
netmap_catch_packet_steering(gna, 1);
rtnl_unlock();
#ifdef RATE
if (rate_ctx.refcount == 0) {
D("setup_timer()");
memset(&rate_ctx, 0, sizeof(rate_ctx));
setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
D("Error: mod_timer()");
}
}
rate_ctx.refcount++;
#endif /* RATE */
} else { /* Disable netmap mode. */
rtnl_lock();
ifp->if_capenable &= ~IFCAP_NETMAP;
/* Release packet steering control. */
netmap_catch_packet_steering(gna, 0);
/* Do not intercept packets on the rx path. */
netmap_catch_rx(na, 0);
rtnl_unlock();
/* Free the mbufs going to the netmap rings */
for (r=0; r<na->num_rx_rings; r++) {
mbq_safe_purge(&na->rx_rings[r].rx_queue);
mbq_safe_destroy(&na->rx_rings[r].rx_queue);
}
netmap_mitigation_cleanup(gna);
for (r=0; r<na->num_tx_rings; r++) {
for (i=0; i<na->num_tx_desc; i++) {
m_freem(na->tx_rings[r].tx_pool[i]);
}
free(na->tx_rings[r].tx_pool, M_DEVBUF);
}
#ifdef RATE
if (--rate_ctx.refcount == 0) {
D("del_timer()");
del_timer(&rate_ctx.timer);
}
#endif
}
#ifdef REG_RESET
error = ifp->netdev_ops->ndo_open(ifp);
if (error) {
goto alloc_tx_pool;
}
#endif
return 0;
register_handler:
rtnl_unlock();
free_tx_pool:
r--;
i = na->num_tx_desc; /* Useless, but just to stay safe. */
free_mbufs:
i--;
for (; r>=0; r--) {
for (; i>=0; i--) {
m_freem(na->tx_rings[r].tx_pool[i]);
}
free(na->tx_rings[r].tx_pool, M_DEVBUF);
i = na->num_tx_desc - 1;
}
return error;
}
/*
* Callback invoked when the device driver frees an mbuf used
* by netmap to transmit a packet. This usually happens when
* the NIC notifies the driver that transmission is completed.
*/
static void
generic_mbuf_destructor(struct mbuf *m)
{
if (netmap_verbose)
D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
#ifdef __FreeBSD__
m->m_ext.ext_type = EXT_PACKET;
m->m_ext.ext_free = NULL;
if (*(m->m_ext.ref_cnt) == 0)
*(m->m_ext.ref_cnt) = 1;
uma_zfree(zone_pack, m);
#endif /* __FreeBSD__ */
IFRATE(rate_ctx.new.txirq++);
}
/* Record completed transmissions and update hwavail.
*
* nr_ntc is the oldest tx buffer not yet completed
* (same as nr_hwavail + nr_hwcur + 1),
* nr_hwcur is the first unsent buffer.
* When cleaning, we try to recover buffers between nr_ntc and nr_hwcur.
*/
static int
generic_netmap_tx_clean(struct netmap_kring *kring)
{
u_int num_slots = kring->nkr_num_slots;
u_int ntc = kring->nr_ntc;
u_int hwcur = kring->nr_hwcur;
u_int n = 0;
struct mbuf **tx_pool = kring->tx_pool;
while (ntc != hwcur) { /* buffers not completed */
struct mbuf *m = tx_pool[ntc];
if (unlikely(m == NULL)) {
/* try to replenish the entry */
tx_pool[ntc] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
if (unlikely(m == NULL)) {
D("mbuf allocation failed, XXX error");
// XXX how do we proceed ? break ?
return -ENOMEM;
}
} else if (GET_MBUF_REFCNT(m) != 1) {
break; /* This mbuf is still busy: its refcnt is 2. */
}
if (unlikely(++ntc == num_slots)) {
ntc = 0;
}
n++;
}
kring->nr_ntc = ntc;
kring->nr_hwavail += n;
ND("tx completed [%d] -> hwavail %d", n, kring->nr_hwavail);
return n;
}
/*
* We have pending packets in the driver between nr_ntc and j.
* Compute a position in the middle, to be used to generate
* a notification.
*/
static inline u_int
generic_tx_event_middle(struct netmap_kring *kring, u_int hwcur)
{
u_int n = kring->nkr_num_slots;
u_int ntc = kring->nr_ntc;
u_int e;
if (hwcur >= ntc) {
e = (hwcur + ntc) / 2;
} else { /* wrap around */
e = (hwcur + n + ntc) / 2;
if (e >= n) {
e -= n;
}
}
if (unlikely(e >= n)) {
D("This cannot happen");
e = 0;
}
return e;
}
/*
* We have pending packets in the driver between nr_ntc and hwcur.
* Schedule a notification approximately in the middle of the two.
* There is a race but this is only called within txsync which does
* a double check.
*/
static void
generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
{
struct mbuf *m;
u_int e;
if (kring->nr_ntc == hwcur) {
return;
}
e = generic_tx_event_middle(kring, hwcur);
m = kring->tx_pool[e];
if (m == NULL) {
/* This can happen if there is already an event on the netmap
slot 'e': There is nothing to do. */
return;
}
ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
kring->tx_pool[e] = NULL;
SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
// XXX wmb() ?
/* Decrement the refcount an free it if we have the last one. */
m_freem(m);
smp_mb();
}
/*
* generic_netmap_txsync() transforms netmap buffers into mbufs
* and passes them to the standard device driver
* (ndo_start_xmit() or ifp->if_transmit() ).
* On linux this is not done directly, but using dev_queue_xmit(),
* since it implements the TX flow control (and takes some locks).
*/
static int
generic_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct ifnet *ifp = na->ifp;
struct netmap_kring *kring = &na->tx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int j, k, num_slots = kring->nkr_num_slots;
int new_slots, ntx;
IFRATE(rate_ctx.new.txsync++);
// TODO: handle the case of mbuf allocation failure
/* first, reclaim completed buffers */
generic_netmap_tx_clean(kring);
/* Take a copy of ring->cur now, and never read it again. */
k = ring->cur;
if (unlikely(k >= num_slots)) {
return netmap_ring_reinit(kring);
}
rmb();
j = kring->nr_hwcur;
/*
* 'new_slots' counts how many new slots have been added:
* everything from hwcur to cur, excluding reserved ones, if any.
* nr_hwreserved start from hwcur and counts how many slots were
* not sent to the NIC from the previous round.
*/
new_slots = k - j - kring->nr_hwreserved;
if (new_slots < 0) {
new_slots += num_slots;
}
ntx = 0;
if (j != k) {
/* Process new packets to send:
* j is the current index in the netmap ring.
*/
while (j != k) {
struct netmap_slot *slot = &ring->slot[j]; /* Current slot in the netmap ring */
void *addr = NMB(slot);
u_int len = slot->len;
struct mbuf *m;
int tx_ret;
if (unlikely(addr == netmap_buffer_base || len > NETMAP_BUF_SIZE)) {
return netmap_ring_reinit(kring);
}
/* Tale a mbuf from the tx pool and copy in the user packet. */
m = kring->tx_pool[j];
if (unlikely(!m)) {
RD(5, "This should never happen");
kring->tx_pool[j] = m = netmap_get_mbuf(GENERIC_BUF_SIZE);
if (unlikely(m == NULL)) {
D("mbuf allocation failed");
break;
}
}
/* XXX we should ask notifications when NS_REPORT is set,
* or roughly every half frame. We can optimize this
* by lazily requesting notifications only when a
* transmission fails. Probably the best way is to
* break on failures and set notifications when
* ring->avail == 0 || j != k
*/
tx_ret = generic_xmit_frame(ifp, m, addr, len, ring_nr);
if (unlikely(tx_ret)) {
RD(5, "start_xmit failed: err %d [%u,%u,%u,%u]",
tx_ret, kring->nr_ntc, j, k, kring->nr_hwavail);
/*
* No room for this mbuf in the device driver.
* Request a notification FOR A PREVIOUS MBUF,
* then call generic_netmap_tx_clean(kring) to do the
* double check and see if we can free more buffers.
* If there is space continue, else break;
* NOTE: the double check is necessary if the problem
* occurs in the txsync call after selrecord().
* Also, we need some way to tell the caller that not
* all buffers were queued onto the device (this was
* not a problem with native netmap driver where space
* is preallocated). The bridge has a similar problem
* and we solve it there by dropping the excess packets.
*/
generic_set_tx_event(kring, j);
if (generic_netmap_tx_clean(kring)) { /* space now available */
continue;
} else {
break;
}
}
slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
if (unlikely(++j == num_slots))
j = 0;
ntx++;
}
/* Update hwcur to the next slot to transmit. */
kring->nr_hwcur = j;
/*
* Report all new slots as unavailable, even those not sent.
* We account for them with with hwreserved, so that
* nr_hwreserved =:= cur - nr_hwcur
*/
kring->nr_hwavail -= new_slots;
kring->nr_hwreserved = k - j;
if (kring->nr_hwreserved < 0) {
kring->nr_hwreserved += num_slots;
}
IFRATE(rate_ctx.new.txpkt += ntx);
if (!kring->nr_hwavail) {
/* No more available slots? Set a notification event
* on a netmap slot that will be cleaned in the future.
* No doublecheck is performed, since txsync() will be
* called twice by netmap_poll().
*/
generic_set_tx_event(kring, j);
}
ND("tx #%d, hwavail = %d", n, kring->nr_hwavail);
}
/* Synchronize the user's view to the kernel view. */
ring->avail = kring->nr_hwavail;
ring->reserved = kring->nr_hwreserved;
return 0;
}
/*
* This handler is registered (through netmap_catch_rx())
* within the attached network interface
* in the RX subsystem, so that every mbuf passed up by
* the driver can be stolen to the network stack.
* Stolen packets are put in a queue where the
* generic_netmap_rxsync() callback can extract them.
*/
void generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
{
struct netmap_adapter *na = NA(ifp);
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
u_int work_done;
u_int rr = 0; // receive ring number
ND("called");
/* limit the size of the queue */
if (unlikely(mbq_len(&na->rx_rings[rr].rx_queue) > 1024)) {
m_freem(m);
} else {
mbq_safe_enqueue(&na->rx_rings[rr].rx_queue, m);
}
if (netmap_generic_mit < 32768) {
/* no rx mitigation, pass notification up */
netmap_generic_irq(na->ifp, rr, &work_done);
IFRATE(rate_ctx.new.rxirq++);
} else {
/* same as send combining, filter notification if there is a
* pending timer, otherwise pass it up and start a timer.
*/
if (likely(netmap_mitigation_active(gna))) {
/* Record that there is some pending work. */
gna->mit_pending = 1;
} else {
netmap_generic_irq(na->ifp, rr, &work_done);
IFRATE(rate_ctx.new.rxirq++);
netmap_mitigation_start(gna);
}
}
}
/*
* generic_netmap_rxsync() extracts mbufs from the queue filled by
* generic_netmap_rx_handler() and puts their content in the netmap
* receive ring.
* Access must be protected because the rx handler is asynchronous,
*/
static int
generic_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
{
struct netmap_kring *kring = &na->rx_rings[ring_nr];
struct netmap_ring *ring = kring->ring;
u_int j, n, lim = kring->nkr_num_slots - 1;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
u_int k, resvd = ring->reserved;
if (ring->cur > lim)
return netmap_ring_reinit(kring);
/* Import newly received packets into the netmap ring. */
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
struct mbuf *m;
n = 0;
j = kring->nr_ntc; /* first empty slot in the receive ring */
/* extract buffers from the rx queue, stop at most one
* slot before nr_hwcur (index k)
*/
k = (kring->nr_hwcur) ? kring->nr_hwcur-1 : lim;
while (j != k) {
int len;
void *addr = NMB(&ring->slot[j]);
if (addr == netmap_buffer_base) { /* Bad buffer */
return netmap_ring_reinit(kring);
}
/*
* Call the locked version of the function.
* XXX Ideally we could grab a batch of mbufs at once,
* by changing rx_queue into a ring.
*/
m = mbq_safe_dequeue(&kring->rx_queue);
if (!m)
break;
len = MBUF_LEN(m);
m_copydata(m, 0, len, addr);
ring->slot[j].len = len;
ring->slot[j].flags = slot_flags;
m_freem(m);
if (unlikely(j++ == lim))
j = 0;
n++;
}
if (n) {
kring->nr_ntc = j;
kring->nr_hwavail += n;
IFRATE(rate_ctx.new.rxpkt += n);
}
kring->nr_kflags &= ~NKR_PENDINTR;
}
// XXX should we invert the order ?
/* Skip past packets that userspace has released */
j = kring->nr_hwcur;
k = ring->cur;
if (resvd > 0) {
if (resvd + ring->avail >= lim + 1) {
D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
ring->reserved = resvd = 0; // XXX panic...
}
k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
}
if (j != k) {
/* Userspace has released some packets. */
for (n = 0; j != k; n++) {
struct netmap_slot *slot = &ring->slot[j];
slot->flags &= ~NS_BUF_CHANGED;
if (unlikely(j++ == lim))
j = 0;
}
kring->nr_hwavail -= n;
kring->nr_hwcur = k;
}
/* Tell userspace that there are new packets. */
ring->avail = kring->nr_hwavail - resvd;
IFRATE(rate_ctx.new.rxsync++);
return 0;
}
static void
generic_netmap_dtor(struct netmap_adapter *na)
{
struct ifnet *ifp = na->ifp;
struct netmap_generic_adapter *gna = (struct netmap_generic_adapter*)na;
struct netmap_adapter *prev_na = gna->prev;
if (prev_na != NULL) {
D("Released generic NA %p", gna);
if_rele(na->ifp);
netmap_adapter_put(prev_na);
}
if (ifp != NULL) {
WNA(ifp) = prev_na;
D("Restored native NA %p", prev_na);
na->ifp = NULL;
}
}
/*
* generic_netmap_attach() makes it possible to use netmap on
* a device without native netmap support.
* This is less performant than native support but potentially
* faster than raw sockets or similar schemes.
*
* In this "emulated" mode, netmap rings do not necessarily
* have the same size as those in the NIC. We use a default
* value and possibly override it if the OS has ways to fetch the
* actual configuration.
*/
int
generic_netmap_attach(struct ifnet *ifp)
{
struct netmap_adapter *na;
struct netmap_generic_adapter *gna;
int retval;
u_int num_tx_desc, num_rx_desc;
num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
if (gna == NULL) {
D("no memory on attach, give up");
return ENOMEM;
}
na = (struct netmap_adapter *)gna;
na->ifp = ifp;
na->num_tx_desc = num_tx_desc;
na->num_rx_desc = num_rx_desc;
na->nm_register = &generic_netmap_register;
na->nm_txsync = &generic_netmap_txsync;
na->nm_rxsync = &generic_netmap_rxsync;
na->nm_dtor = &generic_netmap_dtor;
/* when using generic, IFCAP_NETMAP is set so we force
* NAF_SKIP_INTR to use the regular interrupt handler
*/
na->na_flags = NAF_SKIP_INTR;
ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
ifp->num_tx_queues, ifp->real_num_tx_queues,
ifp->tx_queue_len);
ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
ifp->num_rx_queues, ifp->real_num_rx_queues);
generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
retval = netmap_attach_common(na);
if (retval) {
free(gna, M_DEVBUF);
}
return retval;
}

View File

@ -1,5 +1,6 @@
/*
* Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
* Copyright (C) 2013 Universita` di Pisa. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -33,27 +34,61 @@
#ifndef _NET_NETMAP_KERN_H_
#define _NET_NETMAP_KERN_H_
#define WITH_VALE // comment out to disable VALE support
#if defined(__FreeBSD__)
#define likely(x) __builtin_expect((long)!!(x), 1L)
#define unlikely(x) __builtin_expect((long)!!(x), 0L)
#define NM_LOCK_T struct mtx
#define NMG_LOCK_T struct mtx
#define NMG_LOCK_INIT() mtx_init(&netmap_global_lock, \
"netmap global lock", NULL, MTX_DEF)
#define NMG_LOCK_DESTROY() mtx_destroy(&netmap_global_lock)
#define NMG_LOCK() mtx_lock(&netmap_global_lock)
#define NMG_UNLOCK() mtx_unlock(&netmap_global_lock)
#define NMG_LOCK_ASSERT() mtx_assert(&netmap_global_lock, MA_OWNED)
#define NM_SELINFO_T struct selinfo
#define MBUF_LEN(m) ((m)->m_pkthdr.len)
#define MBUF_IFP(m) ((m)->m_pkthdr.rcvif)
#define NM_SEND_UP(ifp, m) ((ifp)->if_input)(ifp, m)
#define NM_ATOMIC_T volatile int
#define NM_ATOMIC_T volatile int // XXX ?
/* atomic operations */
#include <machine/atomic.h>
#define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1))
#define NM_ATOMIC_CLEAR(p) atomic_store_rel_int((p), 0)
#define prefetch(x) __builtin_prefetch(x)
MALLOC_DECLARE(M_NETMAP);
// XXX linux struct, not used in FreeBSD
struct net_device_ops {
};
struct hrtimer {
};
#elif defined (linux)
#define NM_LOCK_T safe_spinlock_t // see bsd_glue.h
#define NM_SELINFO_T wait_queue_head_t
#define MBUF_LEN(m) ((m)->len)
#define MBUF_IFP(m) ((m)->dev)
#define NM_SEND_UP(ifp, m) netif_rx(m)
#define NM_ATOMIC_T volatile long unsigned int
// XXX a mtx would suffice here too 20130404 gl
#define NMG_LOCK_T struct semaphore
#define NMG_LOCK_INIT() sema_init(&netmap_global_lock, 1)
#define NMG_LOCK_DESTROY()
#define NMG_LOCK() down(&netmap_global_lock)
#define NMG_UNLOCK() up(&netmap_global_lock)
#define NMG_LOCK_ASSERT() // XXX to be completed
#ifndef DEV_NETMAP
#define DEV_NETMAP
#endif /* DEV_NETMAP */
@ -115,6 +150,10 @@ struct netmap_priv_d;
const char *nm_dump_buf(char *p, int len, int lim, char *dst);
#include "netmap_mbq.h"
extern NMG_LOCK_T netmap_global_lock;
/*
* private, kernel view of a ring. Keeps track of the status of
* a ring across system calls.
@ -152,7 +191,7 @@ const char *nm_dump_buf(char *p, int len, int lim, char *dst);
* nkr_leases array of nkr_num_slots where writers can report
* completion of their block. NR_NOSLOT (~0) indicates
* that the writer has not finished yet
* nkr_lease_idx index of next free slot in nr_leases, to be assigned
* nkr_lease_idx index of next free slot in nr_leases, to be assigned
*
* The kring is manipulated by txsync/rxsync and generic netmap function.
* q_lock is used to arbitrate access to the kring from within the netmap
@ -166,6 +205,7 @@ struct netmap_kring {
uint32_t nr_hwcur;
uint32_t nr_hwavail;
uint32_t nr_kflags; /* private driver flags */
int32_t nr_hwreserved;
#define NKR_PENDINTR 0x1 // Pending interrupt.
uint32_t nkr_num_slots;
int32_t nkr_hwofs; /* offset between NIC and netmap ring */
@ -183,6 +223,17 @@ struct netmap_kring {
NM_ATOMIC_T nr_busy; /* prevent concurrent syscalls */
volatile int nkr_stopped;
/* support for adapters without native netmap support.
* On tx rings we preallocate an array of tx buffers
* (same size as the netmap ring), on rx rings we
* store incoming packets in a queue.
* XXX who writes to the rx queue ?
*/
struct mbuf **tx_pool;
u_int nr_ntc; /* Emulation of a next-to-clean RX ring pointer. */
struct mbq rx_queue; /* A queue for intercepted rx mbufs. */
} __attribute__((__aligned__(64)));
@ -245,22 +296,26 @@ nm_next(uint32_t i, uint32_t lim)
enum txrx { NR_RX = 0, NR_TX = 1 };
/*
* This struct extends the 'struct adapter' (or
* equivalent) device descriptor. It contains all fields needed to
* support netmap operation.
* The "struct netmap_adapter" extends the "struct adapter"
* (or equivalent) device descriptor.
* It contains all base fields needed to support netmap operation.
* There are in fact different types of netmap adapters
* (native, generic, VALE switch...) so a netmap_adapter is
* just the first field in the derived type.
*/
struct netmap_adapter {
/*
* On linux we do not have a good way to tell if an interface
* is netmap-capable. So we use the following trick:
* is netmap-capable. So we always use the following trick:
* NA(ifp) points here, and the first entry (which hopefully
* always exists and is at least 32 bits) contains a magic
* value which we can use to detect that the interface is good.
*/
uint32_t magic;
uint32_t na_flags; /* future place for IFCAP_NETMAP */
uint32_t na_flags; /* enabled, and other flags */
#define NAF_SKIP_INTR 1 /* use the regular interrupt handler.
* useful during initialization
*/
@ -272,17 +327,16 @@ struct netmap_adapter {
#define NAF_MEM_OWNER 8 /* the adapter is responsible for the
* deallocation of the memory allocator
*/
int refcount; /* number of user-space descriptors using this
#define NAF_NATIVE_ON 16 /* the adapter is native and the attached
* interface is in netmap mode
*/
#define NAF_NETMAP_ON 32 /* netmap is active (either native or
* emulated. Where possible (e.g. FreeBSD)
* IFCAP_NETMAP also mirrors this flag.
*/
int active_fds; /* number of user-space descriptors using this
interface, which is equal to the number of
struct netmap_if objs in the mapped region. */
/*
* The selwakeup in the interrupt thread can use per-ring
* and/or global wait queues. We track how many clients
* of each type we have so we can optimize the drivers,
* and especially avoid huge contention on the locks.
*/
int na_single; /* threads attached to a single hw queue */
int na_multi; /* threads attached to multiple hw queues */
u_int num_rx_rings; /* number of adapter receive rings */
u_int num_tx_rings; /* number of adapter transmit rings */
@ -296,6 +350,9 @@ struct netmap_adapter {
*/
struct netmap_kring *tx_rings; /* array of TX rings. */
struct netmap_kring *rx_rings; /* array of RX rings. */
void *tailroom; /* space below the rings array */
/* (used for leases) */
NM_SELINFO_T tx_si, rx_si; /* global wait queues */
@ -309,47 +366,157 @@ struct netmap_adapter {
*/
struct ifnet *ifp; /* adapter is ifp->if_softc */
NM_LOCK_T core_lock; /* used if no device lock available */
/* private cleanup */
void (*nm_dtor)(struct netmap_adapter *);
int (*nm_register)(struct ifnet *, int onoff);
int (*nm_register)(struct netmap_adapter *, int onoff);
int (*nm_txsync)(struct ifnet *, u_int ring, int flags);
int (*nm_rxsync)(struct ifnet *, u_int ring, int flags);
int (*nm_txsync)(struct netmap_adapter *, u_int ring, int flags);
int (*nm_rxsync)(struct netmap_adapter *, u_int ring, int flags);
#define NAF_FORCE_READ 1
#define NAF_FORCE_RECLAIM 2
/* return configuration information */
int (*nm_config)(struct ifnet *, u_int *txr, u_int *txd,
u_int *rxr, u_int *rxd);
int (*nm_config)(struct netmap_adapter *,
u_int *txr, u_int *txd, u_int *rxr, u_int *rxd);
int (*nm_krings_create)(struct netmap_adapter *);
void (*nm_krings_delete)(struct netmap_adapter *);
int (*nm_notify)(struct netmap_adapter *,
u_int ring, enum txrx, int flags);
#define NAF_GLOBAL_NOTIFY 4
#define NAF_DISABLE_NOTIFY 8
/* standard refcount to control the lifetime of the adapter
* (it should be equal to the lifetime of the corresponding ifp)
*/
int na_refcount;
/* memory allocator (opaque)
* We also cache a pointer to the lut_entry for translating
* buffer addresses, and the total number of buffers.
*/
struct netmap_mem_d *nm_mem;
struct lut_entry *na_lut;
uint32_t na_lut_objtotal; /* max buffer index */
/* used internally. If non-null, the interface cannot be bound
* from userspace
*/
void *na_private;
};
/*
* If the NIC is owned by the kernel
* (i.e., bridge), neither another bridge nor user can use it;
* if the NIC is owned by a user, only users can share it.
* Evaluation must be done under NMG_LOCK().
*/
#define NETMAP_OWNED_BY_KERN(na) (na->na_private)
#define NETMAP_OWNED_BY_ANY(na) \
(NETMAP_OWNED_BY_KERN(na) || (na->active_fds > 0))
/*
* derived netmap adapters for various types of ports
*/
struct netmap_vp_adapter { /* VALE software port */
struct netmap_adapter up;
/*
* Bridge support:
*
* bdg_port is the port number used in the bridge;
* na_bdg_refcount is a refcount used for bridge ports,
* when it goes to 0 we can detach+free this port
* (a bridge port is always attached if it exists;
* it is not always registered)
* na_bdg points to the bridge this NA is attached to.
*/
int bdg_port;
int na_bdg_refcount;
struct nm_bridge *na_bdg;
int retry;
u_int offset; /* Offset of ethernet header for each packet. */
};
struct netmap_hw_adapter { /* physical device */
struct netmap_adapter up;
struct net_device_ops nm_ndo; // XXX linux only
};
struct netmap_generic_adapter { /* non-native device */
struct netmap_hw_adapter up;
/* Pointer to a previously used netmap adapter. */
struct netmap_adapter *prev;
/* generic netmap adapters support:
* a net_device_ops struct overrides ndo_select_queue(),
* save_if_input saves the if_input hook (FreeBSD),
* mit_timer and mit_pending implement rx interrupt mitigation,
*/
struct net_device_ops generic_ndo;
void (*save_if_input)(struct ifnet *, struct mbuf *);
struct hrtimer mit_timer;
int mit_pending;
};
#ifdef WITH_VALE
/* bridge wrapper for non VALE ports. It is used to connect real devices to the bridge.
*
* The real device must already have its own netmap adapter (hwna). The
* bridge wrapper and the hwna adapter share the same set of netmap rings and
* buffers, but they have two separate sets of krings descriptors, with tx/rx
* meanings swapped:
*
* netmap
* bwrap krings rings krings hwna
* +------+ +------+ +-----+ +------+ +------+
* |tx_rings->| |\ /| |----| |<-tx_rings|
* | | +------+ \ / +-----+ +------+ | |
* | | X | |
* | | / \ | |
* | | +------+/ \+-----+ +------+ | |
* |rx_rings->| | | |----| |<-rx_rings|
* | | +------+ +-----+ +------+ | |
* +------+ +------+
*
* - packets coming from the bridge go to the brwap rx rings, which are also the
* hwna tx rings. The bwrap notify callback will then complete the hwna tx
* (see netmap_bwrap_notify).
* - packets coming from the outside go to the hwna rx rings, which are also the
* bwrap tx rings. The (overwritten) hwna notify method will then complete
* the bridge tx (see netmap_bwrap_intr_notify).
*
* The bridge wrapper may optionally connect the hwna 'host' rings to the
* bridge. This is done by using a second port in the bridge and connecting it
* to the 'host' netmap_vp_adapter contained in the netmap_bwrap_adapter.
* The brwap host adapter cross-links the hwna host rings in the same way as shown above.
*
* - packets coming from the bridge and directed to host stack are handled by the
* bwrap host notify callback (see netmap_bwrap_host_notify)
* - packets coming from the host stack are still handled by the overwritten
* hwna notify callback (netmap_bwrap_intr_notify), but are diverted to the
* host adapter depending on the ring number.
*
*/
struct netmap_bwrap_adapter {
struct netmap_vp_adapter up;
struct netmap_vp_adapter host; /* for host rings */
struct netmap_adapter *hwna; /* the underlying device */
/* backup of the hwna notify callback */
int (*save_notify)(struct netmap_adapter *,
u_int ring, enum txrx, int flags);
/* When we attach a physical interface to the bridge, we
* allow the controlling process to terminate, so we need
* a place to store the netmap_priv_d data structure.
* This is only done when physical interfaces are attached to a bridge.
*/
struct netmap_priv_d *na_kpriv;
/* memory allocator */
struct netmap_mem_d *nm_mem;
#ifdef linux
struct net_device_ops nm_ndo;
#endif /* linux */
};
/*
* Available space in the ring.
* Available space in the ring. Only used in VALE code
*/
static inline uint32_t
nm_kr_space(struct netmap_kring *k, int is_rx)
@ -357,7 +524,7 @@ nm_kr_space(struct netmap_kring *k, int is_rx)
int space;
if (is_rx) {
int busy = k->nkr_hwlease - k->nr_hwcur;
int busy = k->nkr_hwlease - k->nr_hwcur + k->nr_hwreserved;
if (busy < 0)
busy += k->nkr_num_slots;
space = k->nkr_num_slots - 1 - busy;
@ -381,25 +548,6 @@ nm_kr_space(struct netmap_kring *k, int is_rx)
}
/* return update position */
static inline uint32_t
nm_kr_rxpos(struct netmap_kring *k)
{
uint32_t pos = k->nr_hwcur + k->nr_hwavail;
if (pos >= k->nkr_num_slots)
pos -= k->nkr_num_slots;
#if 0
if (pos >= k->nkr_num_slots ||
k->nkr_hwlease >= k->nkr_num_slots ||
k->nr_hwcur >= k->nkr_num_slots ||
k->nr_hwavail >= k->nkr_num_slots ||
k->nkr_lease_idx >= k->nkr_num_slots) {
D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease,
k->nkr_lease_idx, k->nkr_num_slots);
}
#endif
return pos;
}
/* make a lease on the kring for N positions. return the
@ -435,23 +583,61 @@ nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
return lease_idx;
}
#endif /* WITH_VALE */
/* return update position */
static inline uint32_t
nm_kr_rxpos(struct netmap_kring *k)
{
uint32_t pos = k->nr_hwcur + k->nr_hwavail;
if (pos >= k->nkr_num_slots)
pos -= k->nkr_num_slots;
#if 0
if (pos >= k->nkr_num_slots ||
k->nkr_hwlease >= k->nkr_num_slots ||
k->nr_hwcur >= k->nkr_num_slots ||
k->nr_hwavail >= k->nkr_num_slots ||
k->nkr_lease_idx >= k->nkr_num_slots) {
D("invalid kring, cur %d avail %d lease %d lease_idx %d lim %d", k->nr_hwcur, k->nr_hwavail, k->nkr_hwlease,
k->nkr_lease_idx, k->nkr_num_slots);
}
#endif
return pos;
}
/*
* XXX NETMAP_DELETING() is unused
*
* The combination of "enable" (ifp->if_capenable & IFCAP_NETMAP)
* and refcount gives the status of the interface, namely:
*
* enable refcount Status
*
* FALSE 0 normal operation
* FALSE != 0 -- (impossible)
* TRUE 1 netmap mode
* TRUE 0 being deleted.
* protect against multiple threads using the same ring.
* also check that the ring has not been stopped.
* We only care for 0 or !=0 as a return code.
*/
#define NM_KR_BUSY 1
#define NM_KR_STOPPED 2
#define NETMAP_DELETING(_na) ( ((_na)->refcount == 0) && \
( (_na)->ifp->if_capenable & IFCAP_NETMAP) )
static __inline void nm_kr_put(struct netmap_kring *kr)
{
NM_ATOMIC_CLEAR(&kr->nr_busy);
}
static __inline int nm_kr_tryget(struct netmap_kring *kr)
{
/* check a first time without taking the lock
* to avoid starvation for nm_kr_get()
*/
if (unlikely(kr->nkr_stopped)) {
ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
return NM_KR_STOPPED;
}
if (unlikely(NM_ATOMIC_TEST_AND_SET(&kr->nr_busy)))
return NM_KR_BUSY;
/* check a second time with lock held */
if (unlikely(kr->nkr_stopped)) {
ND("ring %p stopped (%d)", kr, kr->nkr_stopped);
nm_kr_put(kr);
return NM_KR_STOPPED;
}
return 0;
}
/*
@ -472,16 +658,116 @@ nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
* netmap_reset() is a helper routine to be called in the driver
* when reinitializing a ring.
*/
int netmap_attach(struct netmap_adapter *, u_int);
int netmap_attach(struct netmap_adapter *);
int netmap_attach_common(struct netmap_adapter *);
void netmap_detach_common(struct netmap_adapter *na);
void netmap_detach(struct ifnet *);
int netmap_transmit(struct ifnet *, struct mbuf *);
enum txrx { NR_RX = 0, NR_TX = 1 };
struct netmap_slot *netmap_reset(struct netmap_adapter *na,
enum txrx tx, u_int n, u_int new_cur);
int netmap_ring_reinit(struct netmap_kring *);
u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
/* set/clear native flags. XXX maybe also if_transmit ? */
static inline void
nm_set_native_flags(struct netmap_adapter *na)
{
struct ifnet *ifp = na->ifp;
na->na_flags |= (NAF_NATIVE_ON | NAF_NETMAP_ON);
#ifdef IFCAP_NETMAP /* or FreeBSD ? */
ifp->if_capenable |= IFCAP_NETMAP;
#endif
#ifdef __FreeBSD__
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_transmit;
#else
na->if_transmit = (void *)ifp->netdev_ops;
ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
#endif
}
static inline void
nm_clear_native_flags(struct netmap_adapter *na)
{
struct ifnet *ifp = na->ifp;
#ifdef __FreeBSD__
ifp->if_transmit = na->if_transmit;
#else
ifp->netdev_ops = (void *)na->if_transmit;
#endif
na->na_flags &= ~(NAF_NATIVE_ON | NAF_NETMAP_ON);
#ifdef IFCAP_NETMAP /* or FreeBSD ? */
ifp->if_capenable &= ~IFCAP_NETMAP;
#endif
}
/*
* validates parameters in the ring/kring, returns a value for cur,
* and the 'new_slots' value in the argument.
* If any error, returns cur > lim to force a reinit.
*/
u_int nm_txsync_prologue(struct netmap_kring *, u_int *);
/*
* validates parameters in the ring/kring, returns a value for cur,
* and the 'reserved' value in the argument.
* If any error, returns cur > lim to force a reinit.
*/
u_int nm_rxsync_prologue(struct netmap_kring *, u_int *);
/*
* update kring and ring at the end of txsync
*/
static inline void
nm_txsync_finalize(struct netmap_kring *kring, u_int cur)
{
/* recompute hwreserved */
kring->nr_hwreserved = cur - kring->nr_hwcur;
if (kring->nr_hwreserved < 0)
kring->nr_hwreserved += kring->nkr_num_slots;
/* update avail and reserved to what the kernel knows */
kring->ring->avail = kring->nr_hwavail;
kring->ring->reserved = kring->nr_hwreserved;
}
/* check/fix address and len in tx rings */
#if 1 /* debug version */
#define NM_CHECK_ADDR_LEN(_a, _l) do { \
if (_a == netmap_buffer_base || _l > NETMAP_BUF_SIZE) { \
RD(5, "bad addr/len ring %d slot %d idx %d len %d", \
ring_nr, nm_i, slot->buf_idx, len); \
if (_l > NETMAP_BUF_SIZE) \
_l = NETMAP_BUF_SIZE; \
} } while (0)
#else /* no debug version */
#define NM_CHECK_ADDR_LEN(_a, _l) do { \
if (_l > NETMAP_BUF_SIZE) \
_l = NETMAP_BUF_SIZE; \
} while (0)
#endif
/*---------------------------------------------------------------*/
/*
* Support routines to be used with the VALE switch
*/
int netmap_update_config(struct netmap_adapter *na);
int netmap_krings_create(struct netmap_adapter *na, u_int ntx, u_int nrx, u_int tailroom);
void netmap_krings_delete(struct netmap_adapter *na);
struct netmap_if *
netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
uint16_t ringid, int *err);
u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
#ifdef WITH_VALE
/*
* The following bridge-related interfaces are used by other kernel modules
* In the version that only supports unicast or broadcast, the lookup
@ -489,15 +775,76 @@ u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
* XXX in practice "unknown" might be handled same as broadcast.
*/
typedef u_int (*bdg_lookup_fn_t)(char *buf, u_int len, uint8_t *ring_nr,
struct netmap_adapter *);
int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func);
u_int netmap_bdg_learning(char *, u_int, uint8_t *, struct netmap_adapter *);
#define NM_NAME "vale" /* prefix for the bridge port name */
#define NM_BDG_MAXPORTS 254 /* up to 32 for bitmap, 254 ok otherwise */
typedef u_int (*bdg_lookup_fn_t)(char *buf, u_int len,
uint8_t *ring_nr, struct netmap_vp_adapter *);
u_int netmap_bdg_learning(char *, u_int, uint8_t *,
struct netmap_vp_adapter *);
#define NM_BDG_MAXPORTS 254 /* up to 254 */
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
#define NM_NAME "vale" /* prefix for bridge port name */
/* these are redefined in case of no VALE support */
int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
void netmap_init_bridges(void);
int netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func);
#else /* !WITH_VALE */
#define netmap_get_bdg_na(_1, _2, _3) 0
#define netmap_init_bridges(_1)
#define netmap_bdg_ctl(_1, _2) EINVAL
#endif /* !WITH_VALE */
/* Various prototypes */
int netmap_poll(struct cdev *dev, int events, struct thread *td);
int netmap_init(void);
void netmap_fini(void);
int netmap_get_memory(struct netmap_priv_d* p);
void netmap_dtor(void *data);
int netmap_dtor_locked(struct netmap_priv_d *priv);
int netmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td);
/* netmap_adapter creation/destruction */
#define NM_IFPNAME(ifp) ((ifp) ? (ifp)->if_xname : "zombie")
#define NM_DEBUG_PUTGET 1
#ifdef NM_DEBUG_PUTGET
#define NM_DBG(f) __##f
void __netmap_adapter_get(struct netmap_adapter *na);
#define netmap_adapter_get(na) \
do { \
struct netmap_adapter *__na = na; \
D("getting %p:%s (%d)", __na, NM_IFPNAME(__na->ifp), __na->na_refcount); \
__netmap_adapter_get(__na); \
} while (0)
int __netmap_adapter_put(struct netmap_adapter *na);
#define netmap_adapter_put(na) \
do { \
struct netmap_adapter *__na = na; \
D("putting %p:%s (%d)", __na, NM_IFPNAME(__na->ifp), __na->na_refcount); \
__netmap_adapter_put(__na); \
} while (0)
#else /* !NM_DEBUG_PUTGET */
#define NM_DBG(f) f
void netmap_adapter_get(struct netmap_adapter *na);
int netmap_adapter_put(struct netmap_adapter *na);
#endif /* !NM_DEBUG_PUTGET */
extern u_int netmap_buf_size;
#define NETMAP_BUF_SIZE netmap_buf_size // XXX remove
extern int netmap_mitigate;
@ -516,18 +863,18 @@ enum { /* verbose flags */
NM_VERB_NIC_TXSYNC = 0x2000,
};
extern int netmap_txsync_retry;
extern int netmap_generic_mit;
extern int netmap_generic_ringsize;
/*
* NA returns a pointer to the struct netmap adapter from the ifp,
* WNA is used to write it.
* SWNA() is used for the "host stack" endpoint associated
* to an interface. It is allocated together with the main NA(),
* as an array of two objects.
*/
#ifndef WNA
#define WNA(_ifp) (_ifp)->if_pspare[0]
#endif
#define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp))
#define SWNA(_ifp) (NA(_ifp) + 1)
/*
* Macros to determine if an interface is netmap capable or netmap enabled.
@ -561,6 +908,7 @@ enum { /* verbose flags */
#endif /* linux */
#ifdef __FreeBSD__
/* Callback invoked by the dma machinery after a successfull dmamap_load */
static void netmap_dmamap_cb(__unused void *arg,
__unused bus_dma_segment_t * segs, __unused int nseg, __unused int error)
@ -588,6 +936,7 @@ netmap_reload_map(bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
}
}
#else /* linux */
/*
@ -695,16 +1044,97 @@ PNMB(struct netmap_slot *slot, uint64_t *pp)
return ret;
}
/* Generic version of NMB, which uses device-specific memory. */
static inline void *
BDG_NMB(struct netmap_adapter *na, struct netmap_slot *slot)
{
struct lut_entry *lut = na->na_lut;
uint32_t i = slot->buf_idx;
return (unlikely(i >= na->na_lut_objtotal)) ?
lut[0].vaddr : lut[i].vaddr;
}
/* default functions to handle rx/tx interrupts */
int netmap_rx_irq(struct ifnet *, u_int, u_int *);
#define netmap_tx_irq(_n, _q) netmap_rx_irq(_n, _q, NULL)
#ifdef __FreeBSD__
MALLOC_DECLARE(M_NETMAP);
#endif /* __FreeBSD__ */
void netmap_common_irq(struct ifnet *, u_int, u_int *work_done);
void netmap_txsync_to_host(struct netmap_adapter *na);
void netmap_disable_all_rings(struct ifnet *);
void netmap_enable_all_rings(struct ifnet *);
void netmap_disable_ring(struct netmap_kring *kr);
/* Structure associated to each thread which registered an interface.
*
* The first 4 fields of this structure are written by NIOCREGIF and
* read by poll() and NIOC?XSYNC.
* There is low contention among writers (actually, a correct user program
* should have no contention among writers) and among writers and readers,
* so we use a single global lock to protect the structure initialization.
* Since initialization involves the allocation of memory, we reuse the memory
* allocator lock.
* Read access to the structure is lock free. Readers must check that
* np_nifp is not NULL before using the other fields.
* If np_nifp is NULL initialization has not been performed, so they should
* return an error to userlevel.
*
* The ref_done field is used to regulate access to the refcount in the
* memory allocator. The refcount must be incremented at most once for
* each open("/dev/netmap"). The increment is performed by the first
* function that calls netmap_get_memory() (currently called by
* mmap(), NIOCGINFO and NIOCREGIF).
* If the refcount is incremented, it is then decremented when the
* private structure is destroyed.
*/
struct netmap_priv_d {
struct netmap_if * volatile np_nifp; /* netmap if descriptor. */
struct netmap_adapter *np_na;
int np_ringid; /* from the ioctl */
u_int np_qfirst, np_qlast; /* range of rings to scan */
uint16_t np_txpoll;
struct netmap_mem_d *np_mref; /* use with NMG_LOCK held */
/* np_refcount is only used on FreeBSD */
int np_refcount; /* use with NMG_LOCK held */
};
/*
* generic netmap emulation for devices that do not have
* native netmap support.
* XXX generic_netmap_register() is only exported to implement
* nma_is_generic().
*/
int generic_netmap_register(struct netmap_adapter *na, int enable);
int generic_netmap_attach(struct ifnet *ifp);
int netmap_catch_rx(struct netmap_adapter *na, int intercept);
void generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
void netmap_catch_packet_steering(struct netmap_generic_adapter *na, int enable);
int generic_xmit_frame(struct ifnet *ifp, struct mbuf *m, void *addr, u_int len, u_int ring_nr);
int generic_find_num_desc(struct ifnet *ifp, u_int *tx, u_int *rx);
void generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq);
static __inline int
nma_is_generic(struct netmap_adapter *na)
{
return na->nm_register == generic_netmap_register;
}
/*
* netmap_mitigation API. This is used by the generic adapter
* to reduce the number of interrupt requests/selwakeup
* to clients on incoming packets.
*/
void netmap_mitigation_init(struct netmap_generic_adapter *na);
void netmap_mitigation_start(struct netmap_generic_adapter *na);
void netmap_mitigation_restart(struct netmap_generic_adapter *na);
int netmap_mitigation_active(struct netmap_generic_adapter *na);
void netmap_mitigation_cleanup(struct netmap_generic_adapter *na);
// int generic_timer_handler(struct hrtimer *t);
#endif /* _NET_NETMAP_KERN_H_ */

152
sys/dev/netmap/netmap_mbq.c Normal file
View File

@ -0,0 +1,152 @@
/*
* Copyright (C) 2013 Vincenzo Maffione. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD$
*/
#ifdef linux
#include "bsd_glue.h"
#else /* __FreeBSD__ */
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#endif /* __FreeBSD__ */
#include "netmap_mbq.h"
static inline void __mbq_init(struct mbq *q)
{
q->head = q->tail = NULL;
q->count = 0;
}
void mbq_safe_init(struct mbq *q)
{
mtx_init(&q->lock, "mbq", NULL, MTX_SPIN);
__mbq_init(q);
}
void mbq_init(struct mbq *q)
{
__mbq_init(q);
}
static inline void __mbq_enqueue(struct mbq *q, struct mbuf *m)
{
m->m_nextpkt = NULL;
if (q->tail) {
q->tail->m_nextpkt = m;
q->tail = m;
} else {
q->head = q->tail = m;
}
q->count++;
}
void mbq_safe_enqueue(struct mbq *q, struct mbuf *m)
{
mtx_lock(&q->lock);
__mbq_enqueue(q, m);
mtx_unlock(&q->lock);
}
void mbq_enqueue(struct mbq *q, struct mbuf *m)
{
__mbq_enqueue(q, m);
}
static inline struct mbuf *__mbq_dequeue(struct mbq *q)
{
struct mbuf *ret = NULL;
if (q->head) {
ret = q->head;
q->head = ret->m_nextpkt;
if (q->head == NULL) {
q->tail = NULL;
}
q->count--;
ret->m_nextpkt = NULL;
}
return ret;
}
struct mbuf *mbq_safe_dequeue(struct mbq *q)
{
struct mbuf *ret;
mtx_lock(&q->lock);
ret = __mbq_dequeue(q);
mtx_unlock(&q->lock);
return ret;
}
struct mbuf *mbq_dequeue(struct mbq *q)
{
return __mbq_dequeue(q);
}
/* XXX seems pointless to have a generic purge */
static void __mbq_purge(struct mbq *q, int safe)
{
struct mbuf *m;
for (;;) {
m = safe ? mbq_safe_dequeue(q) : mbq_dequeue(q);
if (m) {
m_freem(m);
} else {
break;
}
}
}
void mbq_purge(struct mbq *q)
{
__mbq_purge(q, 0);
}
void mbq_safe_purge(struct mbq *q)
{
__mbq_purge(q, 1);
}
void mbq_safe_destroy(struct mbq *q)
{
mtx_destroy(&q->lock);
}
void mbq_destroy(struct mbq *q)
{
}

View File

@ -0,0 +1,78 @@
/*
* Copyright (C) 2013 Vincenzo Maffione. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD$
*/
#ifndef __NETMAP_MBQ_H__
#define __NETMAP_MBQ_H__
/*
* These function implement an mbuf tailq with an optional lock.
* The base functions act ONLY ON THE QUEUE, whereas the "safe"
* variants (mbq_safe_*) also handle the lock.
*/
/* XXX probably rely on a previous definition of SPINLOCK_T */
#ifdef linux
#define SPINLOCK_T safe_spinlock_t
#else
#define SPINLOCK_T struct mtx
#endif
/* A FIFO queue of mbufs with an optional lock. */
struct mbq {
struct mbuf *head;
struct mbuf *tail;
int count;
SPINLOCK_T lock;
};
/* XXX "destroy" does not match "init" as a name.
* We should also clarify whether init can be used while
* holding a lock, and whether mbq_safe_destroy() is a NOP.
*/
void mbq_init(struct mbq *q);
void mbq_destroy(struct mbq *q);
void mbq_enqueue(struct mbq *q, struct mbuf *m);
struct mbuf *mbq_dequeue(struct mbq *q);
void mbq_purge(struct mbq *q);
/* XXX missing mbq_lock() and mbq_unlock */
void mbq_safe_init(struct mbq *q);
void mbq_safe_destroy(struct mbq *q);
void mbq_safe_enqueue(struct mbq *q, struct mbuf *m);
struct mbuf *mbq_safe_dequeue(struct mbq *q);
void mbq_safe_purge(struct mbq *q);
static inline unsigned int mbq_len(struct mbq *q)
{
return q->count;
}
#endif /* __NETMAP_MBQ_H_ */

View File

@ -8,7 +8,7 @@
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@ -167,12 +167,12 @@ const struct netmap_mem_d nm_blueprint = {
#define DECLARE_SYSCTLS(id, name) \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s")
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s")
SYSCTL_DECL(_dev_netmap);
DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
@ -310,7 +310,7 @@ netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_
}
if (p->objfree == 0) {
D("%s allocator: run out of memory", p->name);
D("no more %s objects", p->name);
return NULL;
}
if (start)
@ -395,28 +395,22 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
/* Return nonzero on error */
static int
netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_if *nifp,
struct netmap_slot *slot, u_int n)
netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
{
struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
u_int i = 0; /* slot counter */
uint32_t pos = 0; /* slot in p->bitmap */
uint32_t index = 0; /* buffer index */
(void)nifp; /* UNUSED */
for (i = 0; i < n; i++) {
void *vaddr = netmap_buf_malloc(nmd, &pos, &index);
if (vaddr == NULL) {
D("unable to locate empty packet buffer");
D("no more buffers after %d of %d", i, n);
goto cleanup;
}
slot[i].buf_idx = index;
slot[i].len = p->_objsize;
/* XXX setting flags=NS_BUF_CHANGED forces a pointer reload
* in the NIC ring. This is a hack that hides missing
* initializations in the drivers, and should go away.
*/
// slot[i].flags = NS_BUF_CHANGED;
slot[i].flags = 0;
}
ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos);
@ -433,11 +427,10 @@ netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_if *nifp,
static void
netmap_free_buf(struct netmap_mem_d *nmd, struct netmap_if *nifp, uint32_t i)
netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i)
{
struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
(void)nifp;
if (i < 2 || i >= p->objtotal) {
D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
return;
@ -760,7 +753,8 @@ netmap_mem_private_finalize(struct netmap_mem_d *nmd)
}
static void netmap_mem_private_deref(struct netmap_mem_d *nmd)
static void
netmap_mem_private_deref(struct netmap_mem_d *nmd)
{
NMA_LOCK(nmd);
if (--nmd->refcount <= 0)
@ -845,7 +839,7 @@ netmap_mem_global_config(struct netmap_mem_d *nmd)
netmap_reset_obj_allocator(&nmd->pools[i]);
}
nmd->flags &= ~NETMAP_MEM_FINALIZED;
}
}
for (i = 0; i < NETMAP_POOLS_NR; i++) {
nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i],
@ -938,31 +932,129 @@ netmap_free_rings(struct netmap_adapter *na)
na->rx_rings[i].ring = NULL;
}
}
free(na->tx_rings, M_DEVBUF);
na->tx_rings = na->rx_rings = NULL;
}
/* call with NMA_LOCK held *
*
* Allocate netmap rings and buffers for this card
* The rings are contiguous, but have variable size.
*/
int
netmap_mem_rings_create(struct netmap_adapter *na)
{
struct netmap_ring *ring;
u_int len, ndesc;
struct netmap_kring *kring;
NMA_LOCK(na->nm_mem);
for (kring = na->tx_rings; kring != na->rx_rings; kring++) { /* Transmit rings */
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
D("Cannot allocate tx_ring");
goto cleanup;
}
ND("txring[%d] at %p ofs %d", i, ring);
kring->ring = ring;
*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
*(ssize_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
ring->avail = kring->nr_hwavail;
ring->cur = kring->nr_hwcur;
*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
ND("initializing slots for txring");
if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
D("Cannot allocate buffers for tx_ring");
goto cleanup;
}
}
for ( ; kring != na->tailroom; kring++) { /* Receive rings */
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
D("Cannot allocate rx_ring");
goto cleanup;
}
ND("rxring at %p ofs %d", ring);
kring->ring = ring;
*(uint32_t *)(uintptr_t)&ring->num_slots = ndesc;
*(ssize_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
ring->cur = kring->nr_hwcur;
ring->avail = kring->nr_hwavail;
*(int *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
ND("initializing slots for rxring[%d]", i);
if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
D("Cannot allocate buffers for rx_ring");
goto cleanup;
}
}
NMA_UNLOCK(na->nm_mem);
return 0;
cleanup:
netmap_free_rings(na);
NMA_UNLOCK(na->nm_mem);
return ENOMEM;
}
void
netmap_mem_rings_delete(struct netmap_adapter *na)
{
/* last instance, release bufs and rings */
u_int i, lim;
struct netmap_kring *kring;
struct netmap_ring *ring;
NMA_LOCK(na->nm_mem);
for (kring = na->tx_rings; kring != na->tailroom; kring++) {
ring = kring->ring;
if (ring == NULL)
continue;
lim = kring->nkr_num_slots;
for (i = 0; i < lim; i++)
netmap_free_buf(na->nm_mem, ring->slot[i].buf_idx);
}
netmap_free_rings(na);
NMA_UNLOCK(na->nm_mem);
}
/* call with NMA_LOCK held */
/*
* Allocate the per-fd structure netmap_if.
* If this is the first instance, also allocate the krings, rings etc.
*
* We assume that the configuration stored in na
* (number of tx/rx rings and descs) does not change while
* the interface is in netmap mode.
*/
extern int nma_is_vp(struct netmap_adapter *na);
struct netmap_if *
netmap_mem_if_new(const char *ifname, struct netmap_adapter *na)
{
struct netmap_if *nifp;
struct netmap_ring *ring;
ssize_t base; /* handy for relative offsets between rings and nifp */
u_int i, len, ndesc, ntx, nrx;
struct netmap_kring *kring;
uint32_t *tx_leases = NULL, *rx_leases = NULL;
u_int i, len, ntx, nrx;
/*
* verify whether virtual port need the stack ring
@ -990,124 +1082,6 @@ netmap_mem_if_new(const char *ifname, struct netmap_adapter *na)
*(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
strncpy(nifp->ni_name, ifname, (size_t)IFNAMSIZ);
if (na->refcount) { /* already setup, we are done */
goto final;
}
len = (ntx + nrx) * sizeof(struct netmap_kring);
/*
* Leases are attached to TX rings on NIC/host ports,
* and to RX rings on VALE ports.
*/
if (nma_is_vp(na)) {
len += sizeof(uint32_t) * na->num_rx_desc * na->num_rx_rings;
} else {
len += sizeof(uint32_t) * na->num_tx_desc * ntx;
}
na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
if (na->tx_rings == NULL) {
D("Cannot allocate krings for %s", ifname);
goto cleanup;
}
na->rx_rings = na->tx_rings + ntx;
if (nma_is_vp(na)) {
rx_leases = (uint32_t *)(na->rx_rings + nrx);
} else {
tx_leases = (uint32_t *)(na->rx_rings + nrx);
}
/*
* First instance, allocate netmap rings and buffers for this card
* The rings are contiguous, but have variable size.
*/
for (i = 0; i < ntx; i++) { /* Transmit rings */
kring = &na->tx_rings[i];
ndesc = na->num_tx_desc;
bzero(kring, sizeof(*kring));
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
D("Cannot allocate tx_ring[%d] for %s", i, ifname);
goto cleanup;
}
ND("txring[%d] at %p ofs %d", i, ring);
kring->na = na;
kring->ring = ring;
if (tx_leases) {
kring->nkr_leases = tx_leases;
tx_leases += ndesc;
}
*(uint32_t *)(uintptr_t)&ring->num_slots = kring->nkr_num_slots = ndesc;
*(ssize_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
/*
* IMPORTANT:
* Always keep one slot empty, so we can detect new
* transmissions comparing cur and nr_hwcur (they are
* the same only if there are no new transmissions).
*/
ring->avail = kring->nr_hwavail = ndesc - 1;
ring->cur = kring->nr_hwcur = 0;
*(uint16_t *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
ND("initializing slots for txring[%d]", i);
if (netmap_new_bufs(na->nm_mem, nifp, ring->slot, ndesc)) {
D("Cannot allocate buffers for tx_ring[%d] for %s", i, ifname);
goto cleanup;
}
}
for (i = 0; i < nrx; i++) { /* Receive rings */
kring = &na->rx_rings[i];
ndesc = na->num_rx_desc;
bzero(kring, sizeof(*kring));
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
D("Cannot allocate rx_ring[%d] for %s", i, ifname);
goto cleanup;
}
ND("rxring[%d] at %p ofs %d", i, ring);
kring->na = na;
kring->ring = ring;
if (rx_leases && i < na->num_rx_rings) {
kring->nkr_leases = rx_leases;
rx_leases += ndesc;
}
*(uint32_t *)(uintptr_t)&ring->num_slots = kring->nkr_num_slots = ndesc;
*(ssize_t *)(uintptr_t)&ring->buf_ofs =
(na->nm_mem->pools[NETMAP_IF_POOL].memtotal +
na->nm_mem->pools[NETMAP_RING_POOL].memtotal) -
netmap_ring_offset(na->nm_mem, ring);
ring->cur = kring->nr_hwcur = 0;
ring->avail = kring->nr_hwavail = 0; /* empty */
*(int *)(uintptr_t)&ring->nr_buf_size =
NETMAP_BDG_BUF_SIZE(na->nm_mem);
ND("initializing slots for rxring[%d]", i);
if (netmap_new_bufs(na->nm_mem, nifp, ring->slot, ndesc)) {
D("Cannot allocate buffers for rx_ring[%d] for %s", i, ifname);
goto cleanup;
}
}
#ifdef linux
// XXX initialize the selrecord structs.
for (i = 0; i < ntx; i++)
init_waitqueue_head(&na->tx_rings[i].si);
for (i = 0; i < nrx; i++)
init_waitqueue_head(&na->rx_rings[i].si);
init_waitqueue_head(&na->tx_si);
init_waitqueue_head(&na->rx_si);
#endif
final:
/*
* fill the slots for the rx and tx rings. They contain the offset
* between the ring and nifp, so the information is usable in
@ -1126,13 +1100,6 @@ netmap_mem_if_new(const char *ifname, struct netmap_adapter *na)
NMA_UNLOCK(na->nm_mem);
return (nifp);
cleanup:
netmap_free_rings(na);
netmap_if_free(na->nm_mem, nifp);
NMA_UNLOCK(na->nm_mem);
return NULL;
}
void
@ -1143,25 +1110,6 @@ netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
return;
NMA_LOCK(na->nm_mem);
if (na->refcount <= 0) {
/* last instance, release bufs and rings */
u_int i, j, lim;
struct netmap_ring *ring;
for (i = 0; i < na->num_tx_rings + 1; i++) {
ring = na->tx_rings[i].ring;
lim = na->tx_rings[i].nkr_num_slots;
for (j = 0; j < lim; j++)
netmap_free_buf(na->nm_mem, nifp, ring->slot[j].buf_idx);
}
for (i = 0; i < na->num_rx_rings + 1; i++) {
ring = na->rx_rings[i].ring;
lim = na->rx_rings[i].nkr_num_slots;
for (j = 0; j < lim; j++)
netmap_free_buf(na->nm_mem, nifp, ring->slot[j].buf_idx);
}
netmap_free_rings(na);
}
netmap_if_free(na->nm_mem, nifp);
NMA_UNLOCK(na->nm_mem);
@ -1179,12 +1127,14 @@ netmap_mem_global_deref(struct netmap_mem_d *nmd)
NMA_UNLOCK(nmd);
}
int netmap_mem_finalize(struct netmap_mem_d *nmd)
int
netmap_mem_finalize(struct netmap_mem_d *nmd)
{
return nmd->finalize(nmd);
}
void netmap_mem_deref(struct netmap_mem_d *nmd)
void
netmap_mem_deref(struct netmap_mem_d *nmd)
{
return nmd->deref(nmd);
}

View File

@ -189,7 +189,7 @@ struct netmap_mem_d {
/* the three allocators */
struct netmap_obj_pool pools[NETMAP_POOLS_NR];
netmap_mem_config_t config;
netmap_mem_config_t config;
netmap_mem_finalize_t finalize;
netmap_mem_deref_t deref;
};
@ -200,14 +200,17 @@ vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
int netmap_mem_finalize(struct netmap_mem_d *);
int netmap_mem_init(void);
void netmap_mem_fini(void);
struct netmap_if * netmap_mem_if_new(const char *, struct netmap_adapter *);
void netmap_mem_if_delete(struct netmap_adapter *na, struct netmap_if *nifp);
struct netmap_if *
netmap_mem_if_new(const char *, struct netmap_adapter *);
void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
int netmap_mem_rings_create(struct netmap_adapter *);
void netmap_mem_rings_delete(struct netmap_adapter *);
void netmap_mem_deref(struct netmap_mem_d *);
int netmap_mem_get_info(struct netmap_mem_d *nm_mem, u_int *size, u_int *memflags);
ssize_t netmap_mem_if_offset(struct netmap_mem_d *nm_mem, const void *vaddr);
int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags);
ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
struct netmap_mem_d*
netmap_mem_private_new(const char *name, u_int txr, u_int txd, u_int rxr, u_int rxd);
void netmap_mem_private_delete(struct netmap_mem_d *nm_mem);
void netmap_mem_private_delete(struct netmap_mem_d *);
#define NETMAP_BDG_BUF_SIZE(n) ((n)->pools[NETMAP_BUF_POOL]._objsize)

1983
sys/dev/netmap/netmap_vale.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -5,10 +5,14 @@
.PATH: ${.CURDIR}/../../dev/netmap
.PATH.h: ${.CURDIR}/../../net
CFLAGS += -I${.CURDIR}/../../
KMOD = netmap
SRCS = device_if.h bus_if.h opt_netmap.h
SRCS += netmap.c netmap.h netmap_kern.h
netmap.o: netmap_mem2.c
SRCS += netmap_mem2.c netmap_mem2.h
SRCS += netmap_generic.c
SRCS += netmap_mbq.c netmap_mbq.h
SRCS += netmap_vale.c
SRCS += netmap_freebsd.c
.include <bsd.kmod.mk>

View File

@ -1,33 +1,27 @@
/*
* Copyright (C) 2011-2013 Matteo Landi, Luigi Rizzo. All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* 3. Neither the name of the authors nor the names of their contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
@ -36,7 +30,7 @@
* Definitions of constants and the structures used by the netmap
* framework, for the part visible to both kernel and userspace.
* Detailed info on netmap is available with "man netmap" or at
*
*
* http://info.iet.unipi.it/~luigi/netmap/
*
* This API is also used to communicate with the VALE software switch
@ -271,7 +265,7 @@ struct netmap_if {
const ssize_t ring_ofs[0];
};
#ifndef NIOCREGIF
#ifndef NIOCREGIF
/*
* ioctl names and related fields
*
@ -306,10 +300,10 @@ struct netmap_if {
* NETMAP_PRIV_MEM is a return value used to indicate that
* this ring is in a private memory region hence buffer
* swapping cannot be used
*
*
* nr_cmd is used to configure NICs attached to a VALE switch,
* or to dump the configuration of a VALE switch.
*
*
* nr_cmd = NETMAP_BDG_ATTACH and nr_name = vale*:ifname
* attaches the NIC to the switch, with nr_ringid specifying
* which rings to use
@ -350,8 +344,10 @@ struct nmreq {
#define NETMAP_BDG_DETACH 2 /* detach the NIC */
#define NETMAP_BDG_LOOKUP_REG 3 /* register lookup function */
#define NETMAP_BDG_LIST 4 /* get bridge's info */
#define NETMAP_BDG_OFFSET 5 /* set the port offset */
uint16_t nr_arg1;
#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
#define NETMAP_BDG_MAX_OFFSET 12
uint16_t nr_arg2;
uint32_t spare2[3];
};

View File

@ -1,33 +1,28 @@
/*
* Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Copyright (C) 2013 Universita` di Pisa
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the
* distribution.
*
* 3. Neither the name of the authors nor the names of their contributors
* may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
@ -56,11 +51,22 @@
*
* Since rings are circular, we have macros to compute the next index
* i = NETMAP_RING_NEXT(ring, i);
*
* To ease porting apps from pcap to netmap we supply a few fuctions
* that can be called to open, close and read from netmap in a way
* similar to libpcap.
*
* In order to use these, include #define NETMAP_WITH_LIBS
* in the source file that invokes these functions.
*/
#ifndef _NET_NETMAP_USER_H_
#define _NET_NETMAP_USER_H_
#include <stdint.h>
#include <net/if.h> /* IFNAMSIZ */
#include <net/netmap.h>
#define _NETMAP_OFFSET(type, ptr, offset) \
((type)(void *)((char *)(ptr) + (offset)))
@ -77,7 +83,7 @@
#define NETMAP_BUF_IDX(ring, buf) \
( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
(ring)->nr_buf_size )
(ring)->nr_buf_size )
#define NETMAP_RING_NEXT(r, i) \
((i)+1 == (r)->num_slots ? 0 : (i) + 1 )
@ -92,4 +98,232 @@
*/
#define NETMAP_TX_RING_EMPTY(r) ((r)->avail >= (r)->num_slots - 1)
#ifdef NETMAP_WITH_LIBS
/*
* Support for simple I/O libraries.
* Include other system headers required for compiling this.
*/
#ifndef HAVE_NETMAP_WITH_LIBS
#define HAVE_NETMAP_WITH_LIBS
#include <sys/time.h>
#include <sys/mman.h>
#include <string.h> /* memset */
#include <sys/ioctl.h>
#include <sys/errno.h> /* EINVAL */
#include <fcntl.h> /* O_RDWR */
#include <malloc.h>
struct nm_hdr_t { /* same as pcap_pkthdr */
struct timeval ts;
uint32_t caplen;
uint32_t len;
};
struct nm_desc_t {
struct nm_desc_t *self;
int fd;
void *mem;
int memsize;
struct netmap_if *nifp;
uint16_t first_ring, last_ring, cur_ring;
struct nmreq req;
struct nm_hdr_t hdr;
};
/*
* when the descriptor is open correctly, d->self == d
*/
#define P2NMD(p) ((struct nm_desc_t *)(p))
#define IS_NETMAP_DESC(d) (P2NMD(d)->self == P2NMD(d))
#define NETMAP_FD(d) (P2NMD(d)->fd)
/*
* The callback, invoked on each received packet. Same as libpcap
*/
typedef void (*nm_cb_t)(u_char *, const struct nm_hdr_t *, const u_char *d);
/*
* The open routine accepts an ifname (netmap:foo or vale:foo) and
* optionally a second (string) argument indicating the ring number
* to open. If successful, t opens the fd and maps the memory.
*/
static struct nm_desc_t *nm_open(const char *ifname,
const char *ring_no, int flags, int ring_flags);
/*
* nm_dispatch() is the same as pcap_dispatch()
* nm_next() is the same as pcap_next()
*/
static int nm_dispatch(struct nm_desc_t *, int, nm_cb_t, u_char *);
static u_char *nm_next(struct nm_desc_t *, struct nm_hdr_t *);
/*
* unmap memory, close file descriptor and free the descriptor.
*/
static int nm_close(struct nm_desc_t *);
/*
* Try to open, return descriptor if successful, NULL otherwise.
* An invalid netmap name will return errno = 0;
*/
static struct nm_desc_t *
nm_open(const char *ifname, const char *ring_name, int flags, int ring_flags)
{
struct nm_desc_t *d;
u_int n;
if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, "vale", 4)) {
errno = 0; /* name not recognised */
return NULL;
}
if (ifname[0] == 'n')
ifname += 7;
d = (struct nm_desc_t *)calloc(1, sizeof(*d));
if (d == NULL) {
errno = ENOMEM;
return NULL;
}
d->self = d; /* set this early so nm_close() works */
d->fd = open("/dev/netmap", O_RDWR);
if (d->fd < 0)
goto fail;
if (flags & NETMAP_SW_RING) {
d->req.nr_ringid = NETMAP_SW_RING;
} else {
u_int r;
if (flags & NETMAP_HW_RING) /* interpret ring as int */
r = (uintptr_t)ring_name;
else /* interpret ring as numeric string */
r = ring_name ? atoi(ring_name) : ~0;
r = (r < NETMAP_RING_MASK) ? (r | NETMAP_HW_RING) : 0;
d->req.nr_ringid = r; /* set the ring */
}
d->req.nr_ringid |= (flags & ~NETMAP_RING_MASK);
d->req.nr_version = NETMAP_API;
strncpy(d->req.nr_name, ifname, sizeof(d->req.nr_name));
if (ioctl(d->fd, NIOCREGIF, &d->req))
goto fail;
d->memsize = d->req.nr_memsize;
d->mem = mmap(0, d->memsize, PROT_WRITE | PROT_READ, MAP_SHARED,
d->fd, 0);
if (d->mem == NULL)
goto fail;
d->nifp = NETMAP_IF(d->mem, d->req.nr_offset);
if (d->req.nr_ringid & NETMAP_SW_RING) {
d->first_ring = d->last_ring = d->req.nr_rx_rings;
} else if (d->req.nr_ringid & NETMAP_HW_RING) {
d->first_ring = d->last_ring =
d->req.nr_ringid & NETMAP_RING_MASK;
} else {
d->first_ring = 0;
d->last_ring = d->req.nr_rx_rings - 1;
}
d->cur_ring = d->first_ring;
for (n = d->first_ring; n <= d->last_ring; n++) {
struct netmap_ring *ring = NETMAP_RXRING(d->nifp, n);
ring->flags |= ring_flags;
}
return d;
fail:
nm_close(d);
errno = EINVAL;
return NULL;
}
static int
nm_close(struct nm_desc_t *d)
{
if (d == NULL || d->self != d)
return EINVAL;
if (d->mem)
munmap(d->mem, d->memsize);
if (d->fd != -1)
close(d->fd);
bzero(d, sizeof(*d));
free(d);
return 0;
}
/*
* Same prototype as pcap_dispatch(), only need to cast.
*/
inline /* not really, but disable unused warnings */
static int
nm_dispatch(struct nm_desc_t *d, int cnt, nm_cb_t cb, u_char *arg)
{
int n = d->last_ring - d->first_ring + 1;
int c, got = 0, ri = d->cur_ring;
if (cnt == 0)
cnt = -1;
/* cnt == -1 means infinite, but rings have a finite amount
* of buffers and the int is large enough that we never wrap,
* so we can omit checking for -1
*/
for (c=0; c < n && cnt != got; c++) {
/* compute current ring to use */
struct netmap_ring *ring;
ri = d->cur_ring + c;
if (ri > d->last_ring)
ri = d->first_ring;
ring = NETMAP_RXRING(d->nifp, ri);
for ( ; ring->avail > 0 && cnt != got; got++) {
u_int i = ring->cur;
u_int idx = ring->slot[i].buf_idx;
u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
// XXX should check valid buf
// prefetch(buf);
d->hdr.len = d->hdr.caplen = ring->slot[i].len;
d->hdr.ts = ring->ts;
cb(arg, &d->hdr, buf);
ring->cur = NETMAP_RING_NEXT(ring, i);
ring->avail--;
}
}
d->cur_ring = ri;
return got;
}
inline /* not really, but disable unused warnings */
static u_char *
nm_next(struct nm_desc_t *d, struct nm_hdr_t *hdr)
{
int ri = d->cur_ring;
do {
/* compute current ring to use */
struct netmap_ring *ring = NETMAP_RXRING(d->nifp, ri);
if (ring->avail > 0) {
u_int i = ring->cur;
u_int idx = ring->slot[i].buf_idx;
u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
// XXX should check valid buf
// prefetch(buf);
hdr->ts = ring->ts;
hdr->len = hdr->caplen = ring->slot[i].len;
ring->cur = NETMAP_RING_NEXT(ring, i);
ring->avail--;
d->cur_ring = ri;
return buf;
}
ri++;
if (ri > d->last_ring)
ri = d->first_ring;
} while (ri != d->cur_ring);
return NULL; /* nothing found */
}
#endif /* !HAVE_NETMAP_WITH_LIBS */
#endif /* NETMAP_WITH_LIBS */
#endif /* _NET_NETMAP_USER_H_ */