netmap: align codebase to upstream version v11.4

Changelist:
  - remove unused nkr_slot_flags
  - new nm_intr adapter callback to enable/disable interrupts
  - remove unused sysctls and document the other sysctls
  - new infrastructure to support NS_MOREFRAG for NIC ports
  - support for external memory allocator (for now linux-only),
    including linux-specific changes in common headers
  - optimizations within netmap pipes datapath
  - improvements on VALE control API
  - new nm_parse() helper function in netmap_user.h
  - various bug fixes and code clean up

Approved by:	hrs (mentor)
This commit is contained in:
vmaffione 2018-04-09 09:24:26 +00:00
parent 5e1d126c43
commit 8b391e44ef
22 changed files with 1535 additions and 549 deletions

View File

@ -235,8 +235,6 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags)
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
@ -247,7 +245,7 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags)
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
ring->slot[nm_i].flags = slot_flags;
ring->slot[nm_i].flags = 0;
bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map,
BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);

View File

@ -217,8 +217,6 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags)
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
@ -229,7 +227,7 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags)
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length);
ring->slot[nm_i].flags = slot_flags;
ring->slot[nm_i].flags = 0;
bus_dmamap_sync(rxr->ptag,
rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);

View File

@ -331,7 +331,6 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags)
*/
if (netmap_no_pendintr || force_update) {
int crclen = ixl_crcstrip ? 0 : 4;
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_check; // or also k2n(kring->nr_hwtail)
nm_i = netmap_idx_n2k(kring, nic_i);
@ -346,7 +345,7 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags)
break;
ring->slot[nm_i].len = ((qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
>> I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - crclen;
ring->slot[nm_i].flags = slot_flags;
ring->slot[nm_i].flags = 0;
bus_dmamap_sync(rxr->ptag,
rxr->buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);

View File

@ -216,8 +216,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
* First part: import newly received packets.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = adapter->next_rx_desc_to_check;
nm_i = netmap_idx_n2k(kring, nic_i);
@ -234,7 +232,7 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
len = 0;
}
ring->slot[nm_i].len = len;
ring->slot[nm_i].flags = slot_flags;
ring->slot[nm_i].flags = 0;
bus_dmamap_sync(adapter->rxtag,
adapter->rx_buffer_area[nic_i].map,
BUS_DMASYNC_POSTREAD);

View File

@ -216,6 +216,7 @@ static void ptnet_update_vnet_hdr(struct ptnet_softc *sc);
static int ptnet_nm_register(struct netmap_adapter *na, int onoff);
static int ptnet_nm_txsync(struct netmap_kring *kring, int flags);
static int ptnet_nm_rxsync(struct netmap_kring *kring, int flags);
static void ptnet_nm_intr(struct netmap_adapter *na, int onoff);
static void ptnet_tx_intr(void *opaque);
static void ptnet_rx_intr(void *opaque);
@ -477,6 +478,7 @@ ptnet_attach(device_t dev)
na_arg.nm_krings_create = ptnet_nm_krings_create;
na_arg.nm_krings_delete = ptnet_nm_krings_delete;
na_arg.nm_dtor = ptnet_nm_dtor;
na_arg.nm_intr = ptnet_nm_intr;
na_arg.nm_register = ptnet_nm_register;
na_arg.nm_txsync = ptnet_nm_txsync;
na_arg.nm_rxsync = ptnet_nm_rxsync;
@ -1298,6 +1300,18 @@ ptnet_nm_rxsync(struct netmap_kring *kring, int flags)
return 0;
}
static void
ptnet_nm_intr(struct netmap_adapter *na, int onoff)
{
struct ptnet_softc *sc = if_getsoftc(na->ifp);
int i;
for (i = 0; i < sc->num_rings; i++) {
struct ptnet_queue *pq = sc->queues + i;
pq->ptgh->guest_need_kick = onoff;
}
}
static void
ptnet_tx_intr(void *opaque)
{

View File

@ -201,7 +201,6 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags)
* is to stop right before nm_hwcur.
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
uint32_t stop_i = nm_prev(kring->nr_hwcur, lim);
nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
@ -218,7 +217,7 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags)
/* XXX subtract crc */
total_len = (total_len < 4) ? 0 : total_len - 4;
ring->slot[nm_i].len = total_len;
ring->slot[nm_i].flags = slot_flags;
ring->slot[nm_i].flags = 0;
/* sync was in re_newbuf() */
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD);

View File

@ -122,6 +122,7 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
struct SOFTC_T *sc = ifp->if_softc;
struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr];
struct virtqueue *vq = txq->vtntx_vq;
int interrupts = !(kring->nr_kflags & NKR_NOINTR);
/*
* First part: process new packets to send.
@ -179,7 +180,9 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
ring->head, ring->tail, virtqueue_nused(vq),
(virtqueue_dump(vq), 1));
virtqueue_notify(vq);
virtqueue_enable_intr(vq); // like postpone with 0
if (interrupts) {
virtqueue_enable_intr(vq); // like postpone with 0
}
}
@ -209,7 +212,7 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
if (nm_i != kring->nr_hwtail /* && vtnet_txq_below_threshold(txq) == 0*/) {
ND(3, "disable intr, hwcur %d", nm_i);
virtqueue_disable_intr(vq);
} else {
} else if (interrupts) {
ND(3, "enable intr, hwcur %d", nm_i);
virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT);
}
@ -277,6 +280,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
int interrupts = !(kring->nr_kflags & NKR_NOINTR);
/* device-specific */
struct SOFTC_T *sc = ifp->if_softc;
@ -297,7 +301,6 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
* and vtnet_netmap_init_buffers().
*/
if (netmap_no_pendintr || force_update) {
uint16_t slot_flags = kring->nkr_slot_flags;
struct netmap_adapter *token;
nm_i = kring->nr_hwtail;
@ -309,7 +312,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
break;
if (likely(token == (void *)rxq)) {
ring->slot[nm_i].len = len;
ring->slot[nm_i].flags = slot_flags;
ring->slot[nm_i].flags = 0;
nm_i = nm_next(nm_i, lim);
n++;
} else {
@ -334,7 +337,9 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
kring->nr_hwcur = err;
virtqueue_notify(vq);
/* After draining the queue may need an intr from the hypervisor */
vtnet_rxq_enable_intr(rxq);
if (interrupts) {
vtnet_rxq_enable_intr(rxq);
}
}
ND("[C] h %d c %d t %d hwcur %d hwtail %d",
@ -345,6 +350,28 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
}
/* Enable/disable interrupts on all virtqueues. */
static void
vtnet_netmap_intr(struct netmap_adapter *na, int onoff)
{
struct SOFTC_T *sc = na->ifp->if_softc;
int i;
for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i];
struct vtnet_txq *txq = &sc->vtnet_txqs[i];
struct virtqueue *txvq = txq->vtntx_vq;
if (onoff) {
vtnet_rxq_enable_intr(rxq);
virtqueue_enable_intr(txvq);
} else {
vtnet_rxq_disable_intr(rxq);
virtqueue_disable_intr(txvq);
}
}
}
/* Make RX virtqueues buffers pointing to netmap buffers. */
static int
vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc)
@ -417,6 +444,7 @@ vtnet_netmap_attach(struct SOFTC_T *sc)
na.nm_txsync = vtnet_netmap_txsync;
na.nm_rxsync = vtnet_netmap_rxsync;
na.nm_config = vtnet_netmap_config;
na.nm_intr = vtnet_netmap_intr;
na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs;
D("max rings %d", sc->vtnet_max_vq_pairs);
netmap_attach(&na);

View File

@ -397,7 +397,6 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
*/
if (netmap_no_pendintr || force_update) {
int crclen = (ix_crcstrip || IXGBE_IS_VF(adapter) ) ? 0 : 4;
uint16_t slot_flags = kring->nkr_slot_flags;
nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail)
nm_i = netmap_idx_n2k(kring, nic_i);
@ -409,7 +408,7 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
if ((staterr & IXGBE_RXD_STAT_DD) == 0)
break;
ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen;
ring->slot[nm_i].flags = slot_flags;
ring->slot[nm_i].flags = 0;
bus_dmamap_sync(rxr->ptag,
rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD);
nm_i = nm_next(nm_i, lim);

View File

@ -482,10 +482,8 @@ ports attached to the switch)
int netmap_verbose;
static int netmap_no_timestamp; /* don't timestamp on rxsync */
int netmap_mitigate = 1;
int netmap_no_pendintr = 1;
int netmap_txsync_retry = 2;
int netmap_flags = 0; /* debug flags */
static int netmap_fwd = 0; /* force transparent forwarding */
/*
@ -515,7 +513,9 @@ int netmap_generic_mit = 100*1000;
* Anyway users looking for the best performance should
* use native adapters.
*/
#ifdef linux
int netmap_generic_txqdisc = 1;
#endif
/* Default number of slots and queues for generic adapters. */
int netmap_generic_ringsize = 1024;
@ -539,21 +539,32 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr,
0, "Always look for new received packets.");
SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
&netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
&netmap_txsync_retry, 0, "Number of txsync loops in bridge's flush.");
SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, &netmap_generic_txqdisc, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW, &ptnetmap_tx_workers, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0,
"Force NR_FORWARD mode");
SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
"Adapter mode. 0 selects the best option available,"
"1 forces native adapter, 2 forces emulated adapter");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
0, "RX notification interval in nanoseconds");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
&netmap_generic_ringsize, 0,
"Number of per-ring slots for emulated netmap mode");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW,
&netmap_generic_rings, 0,
"Number of TX/RX queues for emulated netmap adapters");
#ifdef linux
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW,
&netmap_generic_txqdisc, 0, "Use qdisc for generic adapters");
#endif
SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr,
0, "Allow ptnet devices to use virtio-net headers");
SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW,
&ptnetmap_tx_workers, 0, "Use worker threads for pnetmap TX processing");
SYSEND;
@ -912,7 +923,19 @@ netmap_hw_krings_delete(struct netmap_adapter *na)
netmap_krings_delete(na);
}
static void
netmap_mem_drop(struct netmap_adapter *na)
{
int last = netmap_mem_deref(na->nm_mem, na);
/* if the native allocator had been overrided on regif,
* restore it now and drop the temporary one
*/
if (last && na->nm_mem_prev) {
netmap_mem_put(na->nm_mem);
na->nm_mem = na->nm_mem_prev;
na->nm_mem_prev = NULL;
}
}
/*
* Undo everything that was done in netmap_do_regif(). In particular,
@ -980,7 +1003,7 @@ netmap_do_unregif(struct netmap_priv_d *priv)
/* delete the nifp */
netmap_mem_if_delete(na, priv->np_nifp);
/* drop the allocator */
netmap_mem_deref(na->nm_mem, na);
netmap_mem_drop(na);
/* mark the priv as unregistered */
priv->np_na = NULL;
priv->np_nifp = NULL;
@ -1289,7 +1312,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
slot->len = len;
slot->flags = kring->nkr_slot_flags;
slot->flags = 0;
nm_i = nm_next(nm_i, lim);
mbq_enqueue(&fq, m);
}
@ -1409,7 +1432,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adap
assign_mem:
if (nmd != NULL && !((*na)->na_flags & NAF_MEM_OWNER) &&
(*na)->active_fds == 0 && ((*na)->nm_mem != nmd)) {
netmap_mem_put((*na)->nm_mem);
(*na)->nm_mem_prev = (*na)->nm_mem;
(*na)->nm_mem = netmap_mem_get(nmd);
}
@ -1896,7 +1919,8 @@ netmap_krings_get(struct netmap_priv_d *priv)
int excl = (priv->np_flags & NR_EXCLUSIVE);
enum txrx t;
ND("%s: grabbing tx [%d, %d) rx [%d, %d)",
if (netmap_verbose)
D("%s: grabbing tx [%d, %d) rx [%d, %d)",
na->name,
priv->np_qfirst[NR_TX],
priv->np_qlast[NR_TX],
@ -2059,9 +2083,57 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
if (na->active_fds == 0) {
/*
* If this is the first registration of the adapter,
* create the in-kernel view of the netmap rings,
* the netmap krings.
* perform sanity checks and create the in-kernel view
* of the netmap rings (the netmap krings).
*/
if (na->ifp) {
/* This netmap adapter is attached to an ifnet. */
unsigned nbs = netmap_mem_bufsize(na->nm_mem);
unsigned mtu = nm_os_ifnet_mtu(na->ifp);
/* The maximum amount of bytes that a single
* receive or transmit NIC descriptor can hold. */
unsigned hw_max_slot_len = 4096;
if (mtu <= hw_max_slot_len) {
/* The MTU fits a single NIC slot. We only
* Need to check that netmap buffers are
* large enough to hold an MTU. NS_MOREFRAG
* cannot be used in this case. */
if (nbs < mtu) {
nm_prerr("error: netmap buf size (%u) "
"< device MTU (%u)", nbs, mtu);
error = EINVAL;
goto err_drop_mem;
}
} else {
/* More NIC slots may be needed to receive
* or transmit a single packet. Check that
* the adapter supports NS_MOREFRAG and that
* netmap buffers are large enough to hold
* the maximum per-slot size. */
if (!(na->na_flags & NAF_MOREFRAG)) {
nm_prerr("error: large MTU (%d) needed "
"but %s does not support "
"NS_MOREFRAG", mtu,
na->ifp->if_xname);
error = EINVAL;
goto err_drop_mem;
} else if (nbs < hw_max_slot_len) {
nm_prerr("error: using NS_MOREFRAG on "
"%s requires netmap buf size "
">= %u", na->ifp->if_xname,
hw_max_slot_len);
error = EINVAL;
goto err_drop_mem;
} else {
nm_prinf("info: netmap application on "
"%s needs to support "
"NS_MOREFRAG "
"(MTU=%u,netmap_buf_size=%u)",
na->ifp->if_xname, mtu, nbs);
}
}
}
/*
* Depending on the adapter, this may also create
@ -2128,15 +2200,15 @@ err_put_lut:
memset(&na->na_lut, 0, sizeof(na->na_lut));
err_del_if:
netmap_mem_if_delete(na, nifp);
err_rel_excl:
netmap_krings_put(priv);
err_del_rings:
netmap_mem_rings_delete(na);
err_rel_excl:
netmap_krings_put(priv);
err_del_krings:
if (na->active_fds == 0)
na->nm_krings_delete(na);
err_drop_mem:
netmap_mem_deref(na->nm_mem, na);
netmap_mem_drop(na);
err:
priv->np_na = NULL;
return error;
@ -2224,6 +2296,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
do {
/* memsize is always valid */
u_int memflags;
uint64_t memsize;
if (nmr->nr_name[0] != '\0') {
@ -2243,10 +2316,11 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
}
}
error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
error = netmap_mem_get_info(nmd, &memsize, &memflags,
&nmr->nr_arg2);
if (error)
break;
nmr->nr_memsize = (uint32_t)memsize;
if (na == NULL) /* only memory info */
break;
nmr->nr_offset = 0;
@ -2304,6 +2378,17 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
}
NMG_UNLOCK();
break;
} else if (i == NETMAP_POOLS_CREATE) {
nmd = netmap_mem_ext_create(nmr, &error);
if (nmd == NULL)
break;
/* reset the fields used by POOLS_CREATE to
* avoid confusing the rest of the code
*/
nmr->nr_cmd = 0;
nmr->nr_arg1 = 0;
nmr->nr_arg2 = 0;
nmr->nr_arg3 = 0;
} else if (i != 0) {
D("nr_cmd must be 0 not %d", i);
error = EINVAL;
@ -2314,7 +2399,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
NMG_LOCK();
do {
u_int memflags;
struct ifnet *ifp;
uint64_t memsize;
if (priv->np_nifp != NULL) { /* thread already registered */
error = EBUSY;
@ -2356,12 +2441,13 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
nmr->nr_tx_rings = na->num_tx_rings;
nmr->nr_rx_slots = na->num_rx_desc;
nmr->nr_tx_slots = na->num_tx_desc;
error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags,
error = netmap_mem_get_info(na->nm_mem, &memsize, &memflags,
&nmr->nr_arg2);
if (error) {
netmap_do_unregif(priv);
break;
}
nmr->nr_memsize = (uint32_t)memsize;
if (memflags & NETMAP_MEM_PRIVATE) {
*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
}
@ -2533,7 +2619,6 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
#define want_tx want[NR_TX]
#define want_rx want[NR_RX]
struct mbq q; /* packets from RX hw queues to host stack */
enum txrx t;
/*
* In order to avoid nested locks, we need to "double check"
@ -2585,14 +2670,15 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
check_all_tx = nm_si_user(priv, NR_TX);
check_all_rx = nm_si_user(priv, NR_RX);
#ifdef __FreeBSD__
/*
* We start with a lock free round which is cheap if we have
* slots available. If this fails, then lock and call the sync
* routines.
* routines. We can't do this on Linux, as the contract says
* that we must call nm_os_selrecord() unconditionally.
*/
#if 1 /* new code- call rx if any of the ring needs to release or read buffers */
if (want_tx) {
t = NR_TX;
enum txrx t = NR_TX;
for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
kring = &NMR(na, t)[i];
/* XXX compare ring->cur and kring->tail */
@ -2603,8 +2689,8 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
}
}
if (want_rx) {
enum txrx t = NR_RX;
want_rx = 0; /* look for a reason to run the handlers */
t = NR_RX;
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
kring = &NMR(na, t)[i];
if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */
@ -2615,24 +2701,20 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
if (!want_rx)
revents |= events & (POLLIN | POLLRDNORM); /* we have data */
}
#else /* old code */
for_rx_tx(t) {
for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) {
kring = &NMR(na, t)[i];
/* XXX compare ring->cur and kring->tail */
if (!nm_ring_empty(kring->ring)) {
revents |= want[t];
want[t] = 0; /* also breaks the loop */
}
}
}
#endif /* old code */
#endif
#ifdef linux
/* The selrecord must be unconditional on linux. */
nm_os_selrecord(sr, check_all_tx ?
&na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
nm_os_selrecord(sr, check_all_rx ?
&na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
#endif /* linux */
/*
* If we want to push packets out (priv->np_txpoll) or
* want_tx is still set, we must issue txsync calls
* (on all rings, to avoid that the tx rings stall).
* XXX should also check cur != hwcur on the tx rings.
* Fortunately, normal tx mode has np_txpoll set.
*/
if (priv->np_txpoll || want_tx) {
@ -2649,6 +2731,12 @@ flush_tx:
kring = &na->tx_rings[i];
ring = kring->ring;
/*
* Don't try to txsync this TX ring if we already found some
* space in some of the TX rings (want_tx == 0) and there are no
* TX slots in this ring that need to be flushed to the NIC
* (cur == hwcur).
*/
if (!send_down && !want_tx && ring->cur == kring->nr_hwcur)
continue;
@ -2676,14 +2764,18 @@ flush_tx:
if (found) { /* notify other listeners */
revents |= want_tx;
want_tx = 0;
#ifndef linux
kring->nm_notify(kring, 0);
#endif /* linux */
}
}
/* if there were any packet to forward we must have handled them by now */
send_down = 0;
if (want_tx && retry_tx && sr) {
#ifndef linux
nm_os_selrecord(sr, check_all_tx ?
&na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si);
#endif /* !linux */
retry_tx = 0;
goto flush_tx;
}
@ -2734,14 +2826,18 @@ do_retry_rx:
if (found) {
revents |= want_rx;
retry_rx = 0;
#ifndef linux
kring->nm_notify(kring, 0);
#endif /* linux */
}
}
#ifndef linux
if (retry_rx && sr) {
nm_os_selrecord(sr, check_all_rx ?
&na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
}
#endif /* !linux */
if (send_down || retry_rx) {
retry_rx = 0;
if (send_down)
@ -2766,6 +2862,44 @@ do_retry_rx:
#undef want_rx
}
int
nma_intr_enable(struct netmap_adapter *na, int onoff)
{
bool changed = false;
enum txrx t;
int i;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = &NMR(na, t)[i];
int on = !(kring->nr_kflags & NKR_NOINTR);
if (!!onoff != !!on) {
changed = true;
}
if (onoff) {
kring->nr_kflags &= ~NKR_NOINTR;
} else {
kring->nr_kflags |= NKR_NOINTR;
}
}
}
if (!changed) {
return 0; /* nothing to do */
}
if (!na->nm_intr) {
D("Cannot %s interrupts for %s", onoff ? "enable" : "disable",
na->name);
return -1;
}
na->nm_intr(na, onoff);
return 0;
}
/*-------------------- driver support routines -------------------*/
@ -2804,6 +2938,7 @@ netmap_attach_common(struct netmap_adapter *na)
if (na->na_flags & NAF_HOST_RINGS && na->ifp) {
na->if_input = na->ifp->if_input; /* for netmap_send_up */
}
na->pdev = na; /* make sure netmap_mem_map() is called */
#endif /* __FreeBSD__ */
if (na->nm_krings_create == NULL) {
/* we assume that we have been called by a driver,
@ -2832,22 +2967,6 @@ netmap_attach_common(struct netmap_adapter *na)
return 0;
}
/* standard cleanup, called by all destructors */
void
netmap_detach_common(struct netmap_adapter *na)
{
if (na->tx_rings) { /* XXX should not happen */
D("freeing leftover tx_rings");
na->nm_krings_delete(na);
}
netmap_pipe_dealloc(na);
if (na->nm_mem)
netmap_mem_put(na->nm_mem);
bzero(na, sizeof(*na));
nm_os_free(na);
}
/* Wrapper for the register callback provided netmap-enabled
* hardware drivers.
* nm_iszombie(na) means that the driver module has been
@ -2900,7 +3019,7 @@ netmap_hw_dtor(struct netmap_adapter *na)
* Return 0 on success, ENOMEM otherwise.
*/
int
netmap_attach_ext(struct netmap_adapter *arg, size_t size)
netmap_attach_ext(struct netmap_adapter *arg, size_t size, int override_reg)
{
struct netmap_hw_adapter *hwna = NULL;
struct ifnet *ifp = NULL;
@ -2912,15 +3031,27 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t size)
if (arg == NULL || arg->ifp == NULL)
goto fail;
ifp = arg->ifp;
if (NA(ifp) && !NM_NA_VALID(ifp)) {
/* If NA(ifp) is not null but there is no valid netmap
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
* instance when also PF_RING is in use. */
D("Error: netmap adapter hook is busy");
return EBUSY;
}
hwna = nm_os_malloc(size);
if (hwna == NULL)
goto fail;
hwna->up = *arg;
hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
hwna->nm_hw_register = hwna->up.nm_register;
hwna->up.nm_register = netmap_hw_reg;
if (override_reg) {
hwna->nm_hw_register = hwna->up.nm_register;
hwna->up.nm_register = netmap_hw_reg;
}
if (netmap_attach_common(&hwna->up)) {
nm_os_free(hwna);
goto fail;
@ -2939,6 +3070,7 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t size)
#endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
}
hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu;
if (ifp->ethtool_ops) {
hwna->nm_eto = *ifp->ethtool_ops;
}
@ -2968,7 +3100,8 @@ fail:
int
netmap_attach(struct netmap_adapter *arg)
{
return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter));
return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter),
1 /* override nm_reg */);
}
@ -2996,7 +3129,15 @@ NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
if (na->nm_dtor)
na->nm_dtor(na);
netmap_detach_common(na);
if (na->tx_rings) { /* XXX should not happen */
D("freeing leftover tx_rings");
na->nm_krings_delete(na);
}
netmap_pipe_dealloc(na);
if (na->nm_mem)
netmap_mem_put(na->nm_mem);
bzero(na, sizeof(*na));
nm_os_free(na);
return 1;
}
@ -3029,15 +3170,14 @@ netmap_detach(struct ifnet *ifp)
NMG_LOCK();
netmap_set_all_rings(na, NM_KR_LOCKED);
na->na_flags |= NAF_ZOMBIE;
/*
* if the netmap adapter is not native, somebody
* changed it, so we can not release it here.
* The NAF_ZOMBIE flag will notify the new owner that
* the driver is gone.
*/
if (na->na_flags & NAF_NATIVE) {
netmap_adapter_put(na);
if (!(na->na_flags & NAF_NATIVE) || !netmap_adapter_put(na)) {
na->na_flags |= NAF_ZOMBIE;
}
/* give active users a chance to notice that NAF_ZOMBIE has been
* turned on, so that they can stop and return an error to userspace.
@ -3116,9 +3256,9 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
*/
mbq_lock(q);
busy = kring->nr_hwtail - kring->nr_hwcur;
if (busy < 0)
busy += kring->nkr_num_slots;
busy = kring->nr_hwtail - kring->nr_hwcur;
if (busy < 0)
busy += kring->nkr_num_slots;
if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) {
RD(2, "%s full hwcur %d hwtail %d qlen %d", na->name,
kring->nr_hwcur, kring->nr_hwtail, mbq_len(q));
@ -3216,16 +3356,6 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
kring->nr_hwtail -= lim + 1;
}
#if 0 // def linux
/* XXX check that the mappings are correct */
/* need ring_nr, adapter->pdev, direction */
buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
D("error mapping rx netmap buffer %d", i);
// XXX fix error handling
}
#endif /* linux */
/*
* Wakeup on the individual and global selwait
* We do the wakeup here, but the ring is not yet reconfigured.

View File

@ -173,6 +173,16 @@ nm_os_ifnet_fini(void)
nm_ifnet_dh_tag);
}
unsigned
nm_os_ifnet_mtu(struct ifnet *ifp)
{
#if __FreeBSD_version < 1100030
return ifp->if_data.ifi_mtu;
#else /* __FreeBSD_version >= 1100030 */
return ifp->if_mtu;
#endif
}
rawsum_t
nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
{
@ -294,24 +304,30 @@ nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept)
{
struct netmap_adapter *na = &gna->up.up;
struct ifnet *ifp = na->ifp;
int ret = 0;
nm_os_ifnet_lock();
if (intercept) {
if (gna->save_if_input) {
D("cannot intercept again");
return EINVAL; /* already set */
ret = EINVAL; /* already set */
goto out;
}
gna->save_if_input = ifp->if_input;
ifp->if_input = freebsd_generic_rx_handler;
} else {
if (!gna->save_if_input){
D("cannot restore");
return EINVAL; /* not saved */
ret = EINVAL; /* not saved */
goto out;
}
ifp->if_input = gna->save_if_input;
gna->save_if_input = NULL;
}
out:
nm_os_ifnet_unlock();
return 0;
return ret;
}
@ -327,12 +343,14 @@ nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept)
struct netmap_adapter *na = &gna->up.up;
struct ifnet *ifp = netmap_generic_getifp(gna);
nm_os_ifnet_lock();
if (intercept) {
na->if_transmit = ifp->if_transmit;
ifp->if_transmit = netmap_transmit;
} else {
ifp->if_transmit = na->if_transmit;
}
nm_os_ifnet_unlock();
return 0;
}

View File

@ -86,8 +86,6 @@ __FBSDID("$FreeBSD$");
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
#define rtnl_lock() ND("rtnl_lock called")
#define rtnl_unlock() ND("rtnl_unlock called")
#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid)
#define smp_mb()
@ -168,7 +166,13 @@ nm_os_get_mbuf(struct ifnet *ifp, int len)
* has a KASSERT(), checking that the mbuf dtor function is not NULL.
*/
#if __FreeBSD_version <= 1200050
static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { }
#else /* __FreeBSD_version >= 1200051 */
/* The arg1 and arg2 pointers argument were removed by r324446, which
* in included since version 1200051. */
static void void_mbuf_dtor(struct mbuf *m) { }
#endif /* __FreeBSD_version >= 1200051 */
#define SET_MBUF_DESTRUCTOR(m, fn) do { \
(m)->m_ext.ext_free = (fn != NULL) ? \
@ -200,8 +204,6 @@ nm_os_get_mbuf(struct ifnet *ifp, int len)
#include "win_glue.h"
#define rtnl_lock() ND("rtnl_lock called")
#define rtnl_unlock() ND("rtnl_unlock called")
#define MBUF_TXQ(m) 0//((m)->m_pkthdr.flowid)
#define MBUF_RXQ(m) 0//((m)->m_pkthdr.flowid)
#define smp_mb() //XXX: to be correctly defined
@ -210,7 +212,6 @@ nm_os_get_mbuf(struct ifnet *ifp, int len)
#include "bsd_glue.h"
#include <linux/rtnetlink.h> /* rtnl_[un]lock() */
#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */
#include <linux/hrtimer.h>
@ -339,17 +340,13 @@ generic_netmap_unregister(struct netmap_adapter *na)
int i, r;
if (na->active_fds == 0) {
rtnl_lock();
na->na_flags &= ~NAF_NETMAP_ON;
/* Release packet steering control. */
nm_os_catch_tx(gna, 0);
/* Stop intercepting packets on the RX path. */
nm_os_catch_rx(gna, 0);
rtnl_unlock();
/* Release packet steering control. */
nm_os_catch_tx(gna, 0);
}
for_each_rx_kring_h(r, kring, na) {
@ -510,24 +507,20 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
}
if (na->active_fds == 0) {
rtnl_lock();
/* Prepare to intercept incoming traffic. */
error = nm_os_catch_rx(gna, 1);
if (error) {
D("nm_os_catch_rx(1) failed (%d)", error);
goto register_handler;
goto free_tx_pools;
}
/* Make netmap control the packet steering. */
/* Let netmap control the packet steering. */
error = nm_os_catch_tx(gna, 1);
if (error) {
D("nm_os_catch_tx(1) failed (%d)", error);
goto catch_rx;
}
rtnl_unlock();
na->na_flags |= NAF_NETMAP_ON;
#ifdef RATE_GENERIC
@ -548,8 +541,6 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
/* Here (na->active_fds == 0) holds. */
catch_rx:
nm_os_catch_rx(gna, 0);
register_handler:
rtnl_unlock();
free_tx_pools:
for_each_tx_kring(r, kring, na) {
mtx_destroy(&kring->tx_event_lock);
@ -626,7 +617,11 @@ generic_mbuf_destructor(struct mbuf *m)
* txsync. */
netmap_generic_irq(na, r, NULL);
#ifdef __FreeBSD__
#if __FreeBSD_version <= 1200050
void_mbuf_dtor(m, NULL, NULL);
#else /* __FreeBSD_version >= 1200051 */
void_mbuf_dtor(m);
#endif /* __FreeBSD_version >= 1200051 */
#endif
}
@ -1017,7 +1012,6 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
/* Adapter-specific variables. */
uint16_t slot_flags = kring->nkr_slot_flags;
u_int nm_buf_len = NETMAP_BUF_SIZE(na);
struct mbq tmpq;
struct mbuf *m;
@ -1096,7 +1090,7 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags)
avail -= nm_buf_len;
ring->slot[nm_i].len = copy;
ring->slot[nm_i].flags = slot_flags | (mlen ? NS_MOREFRAG : 0);
ring->slot[nm_i].flags = (mlen ? NS_MOREFRAG : 0);
nm_i = nm_next(nm_i, lim);
}
@ -1208,6 +1202,15 @@ generic_netmap_attach(struct ifnet *ifp)
}
#endif
if (NA(ifp) && !NM_NA_VALID(ifp)) {
/* If NA(ifp) is not null but there is no valid netmap
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
* instance when also PF_RING is in use. */
D("Error: netmap adapter hook is busy");
return EBUSY;
}
num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
nm_os_generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */

View File

@ -39,6 +39,9 @@
#if defined(linux)
#if defined(CONFIG_NETMAP_EXTMEM)
#define WITH_EXTMEM
#endif
#if defined(CONFIG_NETMAP_VALE)
#define WITH_VALE
#endif
@ -90,6 +93,7 @@
#define NM_MTX_INIT(m) sx_init(&(m), #m)
#define NM_MTX_DESTROY(m) sx_destroy(&(m))
#define NM_MTX_LOCK(m) sx_xlock(&(m))
#define NM_MTX_SPINLOCK(m) while (!sx_try_xlock(&(m))) ;
#define NM_MTX_UNLOCK(m) sx_xunlock(&(m))
#define NM_MTX_ASSERT(m) sx_assert(&(m), SA_XLOCKED)
@ -100,7 +104,7 @@
#define MBUF_TRANSMIT(na, ifp, m) ((na)->if_transmit(ifp, m))
#define GEN_TX_MBUF_IFP(m) ((m)->m_pkthdr.rcvif)
#define NM_ATOMIC_T volatile int // XXX ?
#define NM_ATOMIC_T volatile int /* required by atomic/bitops.h */
/* atomic operations */
#include <machine/atomic.h>
#define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1))
@ -132,13 +136,10 @@ struct nm_selinfo {
};
// XXX linux struct, not used in FreeBSD
struct net_device_ops {
};
struct ethtool_ops {
};
struct hrtimer {
/* Not used in FreeBSD. */
};
#define NM_BNS_GET(b)
#define NM_BNS_PUT(b)
@ -202,14 +203,6 @@ struct hrtimer {
#define NETMAP_KERNEL_XCHANGE_POINTERS _IO('i', 180)
#define NETMAP_KERNEL_SEND_SHUTDOWN_SIGNAL _IO_direct('i', 195)
//Empty data structures are not permitted by MSVC compiler
//XXX_ale, try to solve this problem
struct net_device_ops{
char data[1];
};
typedef struct ethtool_ops{
char data[1];
};
typedef struct hrtimer{
KTIMER timer;
BOOLEAN active;
@ -297,6 +290,8 @@ void nm_os_ifnet_fini(void);
void nm_os_ifnet_lock(void);
void nm_os_ifnet_unlock(void);
unsigned nm_os_ifnet_mtu(struct ifnet *ifp);
void nm_os_get_module(void);
void nm_os_put_module(void);
@ -305,8 +300,10 @@ void netmap_undo_zombie(struct ifnet *);
/* os independent alloc/realloc/free */
void *nm_os_malloc(size_t);
void *nm_os_vmalloc(size_t);
void *nm_os_realloc(void *, size_t new_size, size_t old_size);
void nm_os_free(void *);
void nm_os_vfree(void *);
/* passes a packet up to the host stack.
* If the packet is sent (or dropped) immediately it returns NULL,
@ -371,8 +368,7 @@ struct netmap_zmon_list {
* TX rings: hwcur + hwofs coincides with next_to_send
*
* For received packets, slot->flags is set to nkr_slot_flags
* so we can provide a proper initial value (e.g. set NS_FORWARD
* when operating in 'transparent' mode).
* so we can provide a proper initial value.
*
* The following fields are used to implement lock-free copy of packets
* from input to output ports in VALE switch:
@ -427,6 +423,7 @@ struct netmap_kring {
* (used internally by pipes and
* by ptnetmap host ports)
*/
#define NKR_NOINTR 0x10 /* don't use interrupts on this ring */
uint32_t nr_mode;
uint32_t nr_pending_mode;
@ -442,8 +439,6 @@ struct netmap_kring {
*/
int32_t nkr_hwofs;
uint16_t nkr_slot_flags; /* initial value for flags */
/* last_reclaim is opaque marker to help reduce the frequency
* of operations such as reclaiming tx buffers. A possible use
* is set it to ticks and do the reclaim only once per tick.
@ -580,7 +575,7 @@ nm_prev(uint32_t i, uint32_t lim)
+-----------------+ +-----------------+
| | | |
|XXX free slot XXX| |XXX free slot XXX|
| free | | free |
+-----------------+ +-----------------+
head->| owned by user |<-hwcur | not sent to nic |<-hwcur
| | | yet |
@ -621,9 +616,14 @@ tail->| |<-hwtail | |<-hwlease
* a circular array where completions should be reported.
*/
struct lut_entry;
#ifdef __FreeBSD__
#define plut_entry lut_entry
#endif
struct netmap_lut {
struct lut_entry *lut;
struct plut_entry *plut;
uint32_t objtotal; /* max buffer index */
uint32_t objsize; /* buffer size */
};
@ -671,6 +671,7 @@ struct netmap_adapter {
#define NAF_HOST_RINGS 64 /* the adapter supports the host rings */
#define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */
#define NAF_PTNETMAP_HOST 256 /* the adapter supports ptnetmap in the host */
#define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */
#define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */
#define NAF_BUSY (1U<<31) /* the adapter is used internally and
* cannot be registered from userspace
@ -711,9 +712,8 @@ struct netmap_adapter {
/* copy of if_input for netmap_send_up() */
void (*if_input)(struct ifnet *, struct mbuf *);
/* references to the ifnet and device routines, used by
* the generic netmap functions.
*/
/* Back reference to the parent ifnet struct. Used for
* hardware ports (emulated netmap included). */
struct ifnet *ifp; /* adapter is ifp->if_softc */
/*---- callbacks for this netmap adapter -----*/
@ -806,6 +806,7 @@ struct netmap_adapter {
* buffer addresses, the total number of buffers and the buffer size.
*/
struct netmap_mem_d *nm_mem;
struct netmap_mem_d *nm_mem_prev;
struct netmap_lut na_lut;
/* additional information attached to this adapter
@ -861,6 +862,8 @@ NMR(struct netmap_adapter *na, enum txrx t)
return (t == NR_TX ? na->tx_rings : na->rx_rings);
}
int nma_intr_enable(struct netmap_adapter *na, int onoff);
/*
* If the NIC is owned by the kernel
* (i.e., bridge), neither another bridge nor user can use it;
@ -898,8 +901,10 @@ struct netmap_vp_adapter { /* VALE software port */
struct netmap_hw_adapter { /* physical device */
struct netmap_adapter up;
struct net_device_ops nm_ndo; // XXX linux only
struct ethtool_ops nm_eto; // XXX linux only
#ifdef linux
struct net_device_ops nm_ndo;
struct ethtool_ops nm_eto;
#endif
const struct ethtool_ops* save_ethtool;
int (*nm_hw_register)(struct netmap_adapter *, int onoff);
@ -920,12 +925,10 @@ struct netmap_generic_adapter { /* emulated device */
/* Pointer to a previously used netmap adapter. */
struct netmap_adapter *prev;
/* generic netmap adapters support:
* a net_device_ops struct overrides ndo_select_queue(),
* save_if_input saves the if_input hook (FreeBSD),
* mit implements rx interrupt mitigation,
/* Emulated netmap adapters support:
* - save_if_input saves the if_input hook (FreeBSD);
* - mit implements rx interrupt mitigation;
*/
struct net_device_ops generic_ndo;
void (*save_if_input)(struct ifnet *, struct mbuf *);
struct nm_generic_mit *mit;
@ -1186,7 +1189,7 @@ static __inline void nm_kr_start(struct netmap_kring *kr)
* virtual ports (vale, pipes, monitor)
*/
int netmap_attach(struct netmap_adapter *);
int netmap_attach_ext(struct netmap_adapter *, size_t size);
int netmap_attach_ext(struct netmap_adapter *, size_t size, int override_reg);
void netmap_detach(struct ifnet *);
int netmap_transmit(struct ifnet *, struct mbuf *);
struct netmap_slot *netmap_reset(struct netmap_adapter *na,
@ -1279,15 +1282,12 @@ nm_set_native_flags(struct netmap_adapter *na)
ifp->if_transmit = netmap_transmit;
#elif defined (_WIN32)
(void)ifp; /* prevent a warning */
//XXX_ale can we just comment those?
//na->if_transmit = ifp->if_transmit;
//ifp->if_transmit = netmap_transmit;
#else
#elif defined (linux)
na->if_transmit = (void *)ifp->netdev_ops;
ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops;
ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto;
#endif
#endif /* linux */
nm_update_hostrings_mode(na);
}
@ -1308,8 +1308,6 @@ nm_clear_native_flags(struct netmap_adapter *na)
ifp->if_transmit = na->if_transmit;
#elif defined(_WIN32)
(void)ifp; /* prevent a warning */
//XXX_ale can we just comment those?
//ifp->if_transmit = na->if_transmit;
#else
ifp->netdev_ops = (void *)na->if_transmit;
ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
@ -1374,8 +1372,6 @@ uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *);
* - provide defaults for the setup callbacks and the memory allocator
*/
int netmap_attach_common(struct netmap_adapter *);
/* common actions to be performed on netmap adapter destruction */
void netmap_detach_common(struct netmap_adapter *);
/* fill priv->np_[tr]xq{first,last} using the ringid and flags information
* coming from a struct nmreq
*/
@ -1431,8 +1427,8 @@ int netmap_get_hw_na(struct ifnet *ifp,
*
* VALE only supports unicast or broadcast. The lookup
* function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown.
* XXX in practice "unknown" might be handled same as broadcast.
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate
* drop.
*/
typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
struct netmap_vp_adapter *);
@ -1471,7 +1467,7 @@ int netmap_bdg_config(struct nmreq *nmr);
#ifdef WITH_PIPES
/* max number of pipes per device */
#define NM_MAXPIPES 64 /* XXX how many? */
#define NM_MAXPIPES 64 /* XXX this should probably be a sysctl */
void netmap_pipe_dealloc(struct netmap_adapter *);
int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create);
@ -1573,7 +1569,9 @@ extern int netmap_flags;
extern int netmap_generic_mit;
extern int netmap_generic_ringsize;
extern int netmap_generic_rings;
#ifdef linux
extern int netmap_generic_txqdisc;
#endif
extern int ptnetmap_tx_workers;
/*
@ -1618,13 +1616,14 @@ static void netmap_dmamap_cb(__unused void *arg,
/* bus_dmamap_load wrapper: call aforementioned function if map != NULL.
* XXX can we do it without a callback ?
*/
static inline void
static inline int
netmap_load_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
if (map)
bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na),
netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT);
return 0;
}
static inline void
@ -1635,6 +1634,8 @@ netmap_unload_map(struct netmap_adapter *na,
bus_dmamap_unload(tag, map);
}
#define netmap_sync_map(na, tag, map, sz, t)
/* update the map when a buffer changes. */
static inline void
netmap_reload_map(struct netmap_adapter *na,
@ -1654,54 +1655,11 @@ netmap_reload_map(struct netmap_adapter *na,
int nm_iommu_group_id(bus_dma_tag_t dev);
#include <linux/dma-mapping.h>
static inline void
netmap_load_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
if (0 && map) {
*map = dma_map_single(na->pdev, buf, NETMAP_BUF_SIZE(na),
DMA_BIDIRECTIONAL);
}
}
static inline void
netmap_unload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map)
{
u_int sz = NETMAP_BUF_SIZE(na);
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
DMA_BIDIRECTIONAL);
}
}
static inline void
netmap_reload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
u_int sz = NETMAP_BUF_SIZE(na);
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
DMA_BIDIRECTIONAL);
}
*map = dma_map_single(na->pdev, buf, sz,
DMA_BIDIRECTIONAL);
}
/*
* XXX How do we redefine these functions:
*
* on linux we need
* dma_map_single(&pdev->dev, virt_addr, len, direction)
* dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction
* The len can be implicit (on netmap it is NETMAP_BUF_SIZE)
* unfortunately the direction is not, so we need to change
* something to have a cross API
* dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction)
*/
#if 0
struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l];
/* set time_stamp *before* dma to help avoid a possible race */
@ -1724,10 +1682,59 @@ netmap_reload_map(struct netmap_adapter *na,
#endif
/*
* The bus_dmamap_sync() can be one of wmb() or rmb() depending on direction.
*/
#define bus_dmamap_sync(_a, _b, _c)
static inline int
netmap_load_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf, u_int size)
{
if (map) {
*map = dma_map_single(na->pdev, buf, size,
DMA_BIDIRECTIONAL);
if (dma_mapping_error(na->pdev, *map)) {
*map = 0;
return ENOMEM;
}
}
return 0;
}
static inline void
netmap_unload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, u_int sz)
{
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
DMA_BIDIRECTIONAL);
}
}
static inline void
netmap_sync_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t)
{
if (*map) {
if (t == NR_RX)
dma_sync_single_for_cpu(na->pdev, *map, sz,
DMA_FROM_DEVICE);
else
dma_sync_single_for_device(na->pdev, *map, sz,
DMA_TO_DEVICE);
}
}
static inline void
netmap_reload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
u_int sz = NETMAP_BUF_SIZE(na);
if (*map) {
dma_unmap_single(na->pdev, *map, sz,
DMA_BIDIRECTIONAL);
}
*map = dma_map_single(na->pdev, buf, sz,
DMA_BIDIRECTIONAL);
}
#endif /* linux */
@ -1764,10 +1771,26 @@ netmap_idx_k2n(struct netmap_kring *kr, int idx)
/* Entries of the look-up table. */
#ifdef __FreeBSD__
struct lut_entry {
void *vaddr; /* virtual address. */
vm_paddr_t paddr; /* physical address. */
};
#else /* linux & _WIN32 */
/* dma-mapping in linux can assign a buffer a different address
* depending on the device, so we need to have a separate
* physical-address look-up table for each na.
* We can still share the vaddrs, though, therefore we split
* the lut_entry structure.
*/
struct lut_entry {
void *vaddr; /* virtual address. */
};
struct plut_entry {
vm_paddr_t paddr; /* physical address. */
};
#endif /* linux & _WIN32 */
struct netmap_obj_pool;
@ -1789,12 +1812,13 @@ PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp)
{
uint32_t i = slot->buf_idx;
struct lut_entry *lut = na->na_lut.lut;
struct plut_entry *plut = na->na_lut.plut;
void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr;
#ifndef _WIN32
*pp = (i >= na->na_lut.objtotal) ? lut[0].paddr : lut[i].paddr;
#ifdef _WIN32
*pp = (i >= na->na_lut.objtotal) ? (uint64_t)plut[0].paddr.QuadPart : (uint64_t)plut[i].paddr.QuadPart;
#else
*pp = (i >= na->na_lut.objtotal) ? (uint64_t)lut[0].paddr.QuadPart : (uint64_t)lut[i].paddr.QuadPart;
*pp = (i >= na->na_lut.objtotal) ? plut[0].paddr : plut[i].paddr;
#endif
return ret;
}
@ -1823,7 +1847,7 @@ struct netmap_priv_d {
uint32_t np_flags; /* from the ioctl */
u_int np_qfirst[NR_TXRX],
np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
uint16_t np_txpoll; /* XXX and also np_rxpoll ? */
uint16_t np_txpoll;
int np_sync_flags; /* to be passed to nm_sync */
int np_refs; /* use with NMG_LOCK held */

File diff suppressed because it is too large Load Diff

View File

@ -136,9 +136,9 @@ struct netmap_if * netmap_mem_if_new(struct netmap_adapter *, struct netmap_priv
void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
int netmap_mem_rings_create(struct netmap_adapter *);
void netmap_mem_rings_delete(struct netmap_adapter *);
void netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
int netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *);
int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
int netmap_mem_get_info(struct netmap_mem_d *, uint64_t *size, u_int *memflags, uint16_t *id);
ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_int rxd,
u_int extra_bufs, u_int npipes, int* error);
@ -149,6 +149,14 @@ void netmap_mem_delete(struct netmap_mem_d *);
struct netmap_mem_d* __netmap_mem_get(struct netmap_mem_d *, const char *, int);
void __netmap_mem_put(struct netmap_mem_d *, const char *, int);
struct netmap_mem_d* netmap_mem_find(nm_memid_t);
unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd);
#ifdef WITH_EXTMEM
struct netmap_mem_d* netmap_mem_ext_create(struct nmreq *, int *);
#else /* !WITH_EXTMEM */
#define netmap_mem_ext_create(nmr, _perr) \
({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; })
#endif /* WITH_EXTMEM */
#ifdef WITH_PTNETMAP_GUEST
struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *,
@ -163,6 +171,7 @@ int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *);
#define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */
#define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */
#define NETMAP_MEM_EXT 0x10 /* external memory (not remappable) */
uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n);

View File

@ -66,9 +66,7 @@
* has released them. In most cases, the consumer is a userspace
* application which may have modified the frame contents.
*
* Several copy monitors may be active on any ring. Zero-copy monitors,
* instead, need exclusive access to each of the monitored rings. This may
* change in the future, if we implement zero-copy monitor chaining.
* Several copy or zero-copy monitors may be active on any ring.
*
*/
@ -263,7 +261,7 @@ netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int
if (zmon && z->prev != NULL)
kring = z->prev;
/* sinchronize with concurrently running nm_sync()s */
/* synchronize with concurrently running nm_sync()s */
nm_kr_stop(kring, NM_KR_LOCKED);
if (nm_monitor_none(kring)) {
@ -329,7 +327,7 @@ netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
if (zmon && mz->prev != NULL)
kring = mz->prev;
/* sinchronize with concurrently running nm_sync()s */
/* synchronize with concurrently running nm_sync()s */
nm_kr_stop(kring, NM_KR_LOCKED);
if (zmon) {

View File

@ -132,7 +132,7 @@ gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp,
ND("TCP/UDP csum %x", be16toh(*check));
}
static int
static inline int
vnet_hdr_is_bad(struct nm_vnet_hdr *vh)
{
uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
@ -170,7 +170,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
u_int dst_slots = 0;
if (unlikely(ft_p == ft_end)) {
RD(3, "No source slots to process");
RD(1, "No source slots to process");
return;
}
@ -189,11 +189,11 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
/* Initial sanity check on the source virtio-net header. If
* something seems wrong, just drop the packet. */
if (src_len < na->up.virt_hdr_len) {
RD(3, "Short src vnet header, dropping");
RD(1, "Short src vnet header, dropping");
return;
}
if (vnet_hdr_is_bad(vh)) {
RD(3, "Bad src vnet header, dropping");
if (unlikely(vnet_hdr_is_bad(vh))) {
RD(1, "Bad src vnet header, dropping");
return;
}
}
@ -266,7 +266,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
if (dst_slots >= *howmany) {
/* We still have work to do, but we've run out of
* dst slots, so we have to drop the packet. */
RD(3, "Not enough slots, dropping GSO packet");
ND(1, "Not enough slots, dropping GSO packet");
return;
}
@ -281,7 +281,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
* encapsulation. */
for (;;) {
if (src_len < ethhlen) {
RD(3, "Short GSO fragment [eth], dropping");
RD(1, "Short GSO fragment [eth], dropping");
return;
}
ethertype = be16toh(*((uint16_t *)
@ -297,7 +297,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
(gso_hdr + ethhlen);
if (src_len < ethhlen + 20) {
RD(3, "Short GSO fragment "
RD(1, "Short GSO fragment "
"[IPv4], dropping");
return;
}
@ -310,14 +310,14 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
iphlen = 40;
break;
default:
RD(3, "Unsupported ethertype, "
RD(1, "Unsupported ethertype, "
"dropping GSO packet");
return;
}
ND(3, "type=%04x", ethertype);
if (src_len < ethhlen + iphlen) {
RD(3, "Short GSO fragment [IP], dropping");
RD(1, "Short GSO fragment [IP], dropping");
return;
}
@ -329,7 +329,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
(gso_hdr + ethhlen + iphlen);
if (src_len < ethhlen + iphlen + 20) {
RD(3, "Short GSO fragment "
RD(1, "Short GSO fragment "
"[TCP], dropping");
return;
}
@ -340,7 +340,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na,
}
if (src_len < gso_hdr_len) {
RD(3, "Short GSO fragment [TCP/UDP], dropping");
RD(1, "Short GSO fragment [TCP/UDP], dropping");
return;
}

View File

@ -81,7 +81,8 @@
static int netmap_default_pipes = 0; /* ignored, kept for compatibility */
SYSBEGIN(vars_pipes);
SYSCTL_DECL(_dev_netmap);
SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW,
&netmap_default_pipes, 0, "For compatibility only");
SYSEND;
/* allocate the pipe array in the parent adapter */
@ -182,6 +183,7 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
u_int j, k, lim_tx = txkring->nkr_num_slots - 1,
lim_rx = rxkring->nkr_num_slots - 1;
int m, busy;
struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail,
@ -208,18 +210,18 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
}
while (limit-- > 0) {
struct netmap_slot *rs = &rxkring->ring->slot[j];
struct netmap_slot *ts = &txkring->ring->slot[k];
struct netmap_slot *rs = &rxring->slot[j];
struct netmap_slot *ts = &txring->slot[k];
struct netmap_slot tmp;
/* swap the slots */
tmp = *rs;
*rs = *ts;
*ts = tmp;
__builtin_prefetch(ts + 1);
/* report the buffer change */
ts->flags |= NS_BUF_CHANGED;
/* swap the slots and report the buffer change */
tmp = *rs;
tmp.flags |= NS_BUF_CHANGED;
*rs = *ts;
rs->flags |= NS_BUF_CHANGED;
*ts = tmp;
j = nm_next(j, lim_rx);
k = nm_next(k, lim_tx);

View File

@ -169,19 +169,19 @@ rate_batch_stats_update(struct rate_batch_stats *bf, uint32_t pre_tail,
#endif /* RATE */
struct ptnetmap_state {
/* Kthreads. */
struct nm_kctx **kctxs;
/* Kthreads. */
struct nm_kctx **kctxs;
/* Shared memory with the guest (TX/RX) */
struct ptnet_csb_gh __user *csb_gh;
struct ptnet_csb_hg __user *csb_hg;
bool stopped;
bool stopped;
/* Netmap adapter wrapping the backend. */
struct netmap_pt_host_adapter *pth_na;
/* Netmap adapter wrapping the backend. */
struct netmap_pt_host_adapter *pth_na;
IFRATE(struct rate_context rate_ctx;)
IFRATE(struct rate_context rate_ctx;)
};
static inline void
@ -1268,13 +1268,11 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na,
}
*na = &pth_na->up;
netmap_adapter_get(*na);
/* set parent busy, because attached for ptnetmap */
parent->na_flags |= NAF_BUSY;
strncpy(pth_na->up.name, parent->name, sizeof(pth_na->up.name));
strcat(pth_na->up.name, "-PTN");
netmap_adapter_get(*na);
DBG(D("%s ptnetmap request DONE", pth_na->up.name));
@ -1350,7 +1348,7 @@ netmap_pt_guest_txsync(struct ptnet_csb_gh *ptgh, struct ptnet_csb_hg *pthg,
* go to sleep and we need to be notified by the host when more free
* space is available.
*/
if (nm_kr_txempty(kring)) {
if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
/* Reenable notifications. */
ptgh->guest_need_kick = 1;
/* Double check */
@ -1415,7 +1413,7 @@ netmap_pt_guest_rxsync(struct ptnet_csb_gh *ptgh, struct ptnet_csb_hg *pthg,
* we need to be notified by the host when more RX slots have been
* completed.
*/
if (nm_kr_rxempty(kring)) {
if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
/* Reenable notifications. */
ptgh->guest_need_kick = 1;
/* Double check */
@ -1504,7 +1502,7 @@ netmap_pt_guest_attach(struct netmap_adapter *arg,
if (arg->nm_mem == NULL)
return ENOMEM;
arg->na_flags |= NAF_MEM_OWNER;
error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter));
error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
if (error)
return error;
@ -1517,7 +1515,7 @@ netmap_pt_guest_attach(struct netmap_adapter *arg,
memset(&ptna->dr, 0, sizeof(ptna->dr));
ptna->dr.up.ifp = ifp;
ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
ptna->backend_regifs = 0;

View File

@ -150,6 +150,8 @@ __FBSDID("$FreeBSD$");
#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG)
/* NM_FT_NULL terminates a list of slots in the ft */
#define NM_FT_NULL NM_BDG_BATCH_MAX
/* Default size for the Maximum Frame Size. */
#define NM_BDG_MFS_DEFAULT 1514
/*
@ -160,7 +162,8 @@ __FBSDID("$FreeBSD$");
static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
SYSBEGIN(vars_vale);
SYSCTL_DECL(_dev_netmap);
SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0,
"Max batch size to be used in the bridge");
SYSEND;
static int netmap_vp_create(struct nmreq *, struct ifnet *,
@ -226,9 +229,9 @@ struct nm_bridge {
/* the forwarding table, MAC+ports.
* XXX should be changed to an argument to be passed to
* the lookup function, and allocated on attach
* the lookup function
*/
struct nm_hash_ent ht[NM_BDG_HASH];
struct nm_hash_ent *ht; // allocated on attach
#ifdef CONFIG_NET_NS
struct net *ns;
@ -365,17 +368,20 @@ nm_find_bridge(const char *name, int create)
}
if (i == num_bridges && b) { /* name not found, can create entry */
/* initialize the bridge */
strncpy(b->bdg_basename, name, namelen);
ND("create new bridge %s with ports %d", b->bdg_basename,
b->bdg_active_ports);
b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
if (b->ht == NULL) {
D("failed to allocate hash table");
return NULL;
}
strncpy(b->bdg_basename, name, namelen);
b->bdg_namelen = namelen;
b->bdg_active_ports = 0;
for (i = 0; i < NM_BDG_MAXPORTS; i++)
b->bdg_port_index[i] = i;
/* set the default function */
b->bdg_ops.lookup = netmap_bdg_learning;
/* reset the MAC address table */
bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
NM_BNS_GET(b);
}
return b;
@ -503,6 +509,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
ND("now %d active ports", lim);
if (lim == 0) {
ND("marking bridge %s as free", b->bdg_basename);
nm_os_free(b->ht);
bzero(&b->bdg_ops, sizeof(b->bdg_ops));
NM_BNS_PUT(b);
}
@ -542,11 +549,14 @@ netmap_vp_dtor(struct netmap_adapter *na)
netmap_bdg_detach_common(b, vpna->bdg_port, -1);
}
if (vpna->autodelete && na->ifp != NULL) {
ND("releasing %s", na->ifp->if_xname);
NMG_UNLOCK();
nm_os_vi_detach(na->ifp);
NMG_LOCK();
if (na->ifp != NULL && !nm_iszombie(na)) {
WNA(na->ifp) = NULL;
if (vpna->autodelete) {
ND("releasing %s", na->ifp->if_xname);
NMG_UNLOCK();
nm_os_vi_detach(na->ifp);
NMG_LOCK();
}
}
}
@ -603,11 +613,15 @@ err:
static int
nm_update_info(struct nmreq *nmr, struct netmap_adapter *na)
{
uint64_t memsize;
int ret;
nmr->nr_rx_rings = na->num_rx_rings;
nmr->nr_tx_rings = na->num_tx_rings;
nmr->nr_rx_slots = na->num_rx_desc;
nmr->nr_tx_slots = na->num_tx_desc;
return netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, NULL, &nmr->nr_arg2);
ret = netmap_mem_get_info(na->nm_mem, &memsize, NULL, &nmr->nr_arg2);
nmr->nr_memsize = (uint32_t)memsize;
return ret;
}
/*
@ -736,7 +750,6 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
for (j = 0; j < b->bdg_active_ports; j++) {
i = b->bdg_port_index[j];
vpna = b->bdg_ports[i];
// KASSERT(na != NULL);
ND("checking %s", vpna->up.name);
if (!strcmp(vpna->up.name, nr_name)) {
netmap_adapter_get(&vpna->up);
@ -788,6 +801,18 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
} else {
struct netmap_adapter *hw;
/* the vale:nic syntax is only valid for some commands */
switch (nmr->nr_cmd) {
case NETMAP_BDG_ATTACH:
case NETMAP_BDG_DETACH:
case NETMAP_BDG_POLLING_ON:
case NETMAP_BDG_POLLING_OFF:
break; /* ok */
default:
error = EINVAL;
goto out;
}
error = netmap_get_hw_na(ifp, nmd, &hw);
if (error || hw == NULL)
goto out;
@ -848,6 +873,12 @@ nm_bdg_ctl_attach(struct nmreq *nmr)
}
}
/* XXX check existing one */
error = netmap_get_bdg_na(nmr, &na, nmd, 0);
if (!error) {
error = EBUSY;
goto unref_exit;
}
error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */);
if (error) /* no device */
goto unlock_exit;
@ -1149,9 +1180,8 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
bna->na_polling_state = bps;
bps->bna = bna;
/* disable interrupt if possible */
if (bna->hwna->nm_intr)
bna->hwna->nm_intr(bna->hwna, 0);
/* disable interrupts if possible */
nma_intr_enable(bna->hwna, 0);
/* start kthread now */
error = nm_bdg_polling_start_kthreads(bps);
if (error) {
@ -1159,8 +1189,7 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
nm_os_free(bps->kthreads);
nm_os_free(bps);
bna->na_polling_state = NULL;
if (bna->hwna->nm_intr)
bna->hwna->nm_intr(bna->hwna, 1);
nma_intr_enable(bna->hwna, 1);
}
return error;
}
@ -1180,9 +1209,8 @@ nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na)
bps->configured = false;
nm_os_free(bps);
bna->na_polling_state = NULL;
/* reenable interrupt */
if (bna->hwna->nm_intr)
bna->hwna->nm_intr(bna->hwna, 1);
/* reenable interrupts */
nma_intr_enable(bna->hwna, 1);
return 0;
}
@ -1577,7 +1605,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
BDG_WLOCK(vpna->na_bdg);
if (onoff) {
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
for (i = 0; i < netmap_real_rings(na, t); i++) {
struct netmap_kring *kring = &NMR(na, t)[i];
if (nm_kring_pending_on(kring))
@ -1593,7 +1621,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff)
if (na->active_fds == 0)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
for (i = 0; i < netmap_real_rings(na, t); i++) {
struct netmap_kring *kring = &NMR(na, t)[i];
if (nm_kring_pending_off(kring))
@ -1657,7 +1685,7 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
*/
if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
uint8_t *s = buf+6;
sh = nm_bridge_rthash(s); // XXX hash of source
sh = nm_bridge_rthash(s); /* hash of source */
/* update source port forwarding entry */
na->last_smac = ht[sh].mac = smac; /* XXX expire ? */
ht[sh].ports = mysrc;
@ -1667,11 +1695,10 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
}
dst = NM_BDG_BROADCAST;
if ((buf[0] & 1) == 0) { /* unicast */
dh = nm_bridge_rthash(buf); // XXX hash of dst
dh = nm_bridge_rthash(buf); /* hash of dst */
if (ht[dh].mac == dmac) { /* found dst */
dst = ht[dh].ports;
}
/* XXX otherwise return NM_BDG_UNKNOWN ? */
}
return dst;
}
@ -1785,10 +1812,8 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na);
if (netmap_verbose > 255)
RD(5, "slot %d port %d -> %d", i, me, dst_port);
if (dst_port == NM_BDG_NOPORT)
if (dst_port >= NM_BDG_NOPORT)
continue; /* this packet is identified to be dropped */
else if (unlikely(dst_port > NM_BDG_MAXPORTS))
continue;
else if (dst_port == NM_BDG_BROADCAST)
dst_ring = 0; /* broadcasts always go to ring 0 */
else if (unlikely(dst_port == me ||
@ -1882,10 +1907,10 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
needed = d->bq_len + brddst->bq_len;
if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
if (netmap_verbose) {
RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
dst_na->up.virt_hdr_len);
}
if (netmap_verbose) {
RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
dst_na->up.virt_hdr_len);
}
/* There is a virtio-net header/offloadings mismatch between
* source and destination. The slower mismatch datapath will
* be used to cope with all the mismatches.
@ -1902,6 +1927,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
* TCPv4 we must account for ethernet header, IP header
* and TCPv4 header).
*/
KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0"));
needed = (needed * na->mfs) /
(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
@ -1916,6 +1942,9 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
dst_nr = dst_nr % nrings;
kring = &dst_na->up.rx_rings[dst_nr];
ring = kring->ring;
/* the destination ring may have not been opened for RX */
if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON))
goto cleanup;
lim = kring->nkr_num_slots - 1;
retry:
@ -2196,7 +2225,7 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
if (vpna->na_bdg)
return EBUSY;
return netmap_bwrap_attach(name, na);
na->na_vp = vpna;
strncpy(na->name, name, sizeof(na->name));
na->na_hostvp = NULL;
@ -2248,7 +2277,10 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp,
nm_bound_var(&nmr->nr_arg3, 0, 0,
128*NM_BDG_MAXSLOTS, NULL);
na->num_rx_desc = nmr->nr_rx_slots;
vpna->mfs = 1514;
/* Set the mfs to a default value, as it is needed on the VALE
* mismatch datapath. XXX We should set it according to the MTU
* known to the kernel. */
vpna->mfs = NM_BDG_MFS_DEFAULT;
vpna->last_smac = ~0llu;
/*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
vpna->mfs = netmap_buf_size; */
@ -2330,7 +2362,8 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
struct nm_bridge *b = bna->up.na_bdg,
*bh = bna->host.na_bdg;
netmap_mem_put(bna->host.up.nm_mem);
if (bna->host.up.nm_mem)
netmap_mem_put(bna->host.up.nm_mem);
if (b) {
netmap_bdg_detach_common(b, bna->up.bdg_port,
@ -2459,28 +2492,6 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
hostna->up.na_lut = na->na_lut;
}
/* cross-link the netmap rings
* The original number of rings comes from hwna,
* rx rings on one side equals tx rings on the other.
*/
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
NMR(hwna, r)[i].ring = NMR(na, t)[i].ring;
}
}
if (na->na_flags & NAF_HOST_RINGS) {
struct netmap_adapter *hna = &hostna->up;
/* the hostna rings are the host rings of the bwrap.
* The corresponding krings must point back to the
* hostna
*/
hna->tx_rings = &na->tx_rings[na->num_tx_rings];
hna->tx_rings[0].na = hna;
hna->rx_rings = &na->rx_rings[na->num_rx_rings];
hna->rx_rings[0].na = hna;
}
}
/* pass down the pending ring state information */
@ -2497,9 +2508,10 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
/* copy up the current ring state information */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
NMR(na, t)[i].nr_mode =
NMR(hwna, t)[i].nr_mode;
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
struct netmap_kring *kring = &NMR(hwna, t)[i];
NMR(na, t)[i].nr_mode = kring->nr_mode;
}
}
/* impersonate a netmap_vp_adapter */
@ -2537,6 +2549,14 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
hwna->na_lut.lut = NULL;
hwna->na_lut.objtotal = 0;
hwna->na_lut.objsize = 0;
/* pass ownership of the netmap rings to the hwna */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
NMR(na, t)[i].ring = NULL;
}
}
}
return 0;
@ -2570,6 +2590,7 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
struct netmap_bwrap_adapter *bna =
(struct netmap_bwrap_adapter *)na;
struct netmap_adapter *hwna = bna->hwna;
struct netmap_adapter *hostna = &bna->host.up;
int i, error = 0;
enum txrx t;
@ -2586,16 +2607,49 @@ netmap_bwrap_krings_create(struct netmap_adapter *na)
goto err_del_vp_rings;
}
/* get each ring slot number from the corresponding hwna ring */
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots;
/* increment the usage counter for all the hwna krings */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
NMR(hwna, t)[i].users++;
}
}
/* now create the actual rings */
error = netmap_mem_rings_create(hwna);
if (error) {
goto err_dec_users;
}
/* cross-link the netmap rings
* The original number of rings comes from hwna,
* rx rings on one side equals tx rings on the other.
*/
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots;
NMR(na, t)[i].ring = NMR(hwna, r)[i].ring;
}
}
if (na->na_flags & NAF_HOST_RINGS) {
/* the hostna rings are the host rings of the bwrap.
* The corresponding krings must point back to the
* hostna
*/
hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
hostna->tx_rings[0].na = hostna;
hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
hostna->rx_rings[0].na = hostna;
}
return 0;
err_dec_users:
for_rx_tx(t) {
NMR(hwna, t)[i].users--;
}
hwna->nm_krings_delete(hwna);
err_del_vp_rings:
netmap_vp_krings_delete(na);
@ -2609,9 +2663,20 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na)
struct netmap_bwrap_adapter *bna =
(struct netmap_bwrap_adapter *)na;
struct netmap_adapter *hwna = bna->hwna;
enum txrx t;
int i;
ND("%s", na->name);
/* decrement the usage counter for all the hwna krings */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
NMR(hwna, t)[i].users--;
}
}
/* delete any netmap rings that are no longer needed */
netmap_mem_rings_delete(hwna);
hwna->nm_krings_delete(hwna);
netmap_vp_krings_delete(na);
}
@ -2699,7 +2764,7 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach)
if (npriv == NULL)
return ENOMEM;
npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW);
error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags);
if (error) {
netmap_priv_delete(npriv);
return error;
@ -2766,6 +2831,8 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
na->nm_mem = netmap_mem_get(hwna->nm_mem);
na->virt_hdr_len = hwna->virt_hdr_len;
bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
/* Set the mfs, needed on the VALE mismatch datapath. */
bna->up.mfs = NM_BDG_MFS_DEFAULT;
bna->hwna = hwna;
netmap_adapter_get(hwna);
@ -2793,6 +2860,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
na->na_hostvp = hwna->na_hostvp =
hostna->na_hostvp = &bna->host;
hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
bna->host.mfs = NM_BDG_MFS_DEFAULT;
}
ND("%s<->%s txr %d txd %d rxr %d rxd %d",

View File

@ -217,7 +217,8 @@ struct netmap_slot {
#define NS_MOREFRAG 0x0020 /* packet has more fragments */
/*
* (VALE ports only)
* (VALE ports, ptnetmap ports and some NIC ports, e.g.
* ixgbe and i40e on Linux)
* Set on all but the last slot of a multi-segment packet.
* The 'len' field refers to the individual fragment.
*/
@ -528,6 +529,7 @@ struct nmreq {
#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */
#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */
#define NETMAP_POOLS_INFO_GET 13 /* get memory allocator pools info */
#define NETMAP_POOLS_CREATE 14 /* create a new memory allocator */
uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */
#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
@ -567,13 +569,13 @@ enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
#define NM_BDG_NAME "vale" /* prefix for bridge port name */
#ifdef _WIN32
/*
* Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined
* in ws2def.h but not sure if they are in the form we need.
* XXX so we redefine them
* in a convenient way to use for DeviceIoControl signatures
* We therefore redefine them in a convenient way to use for DeviceIoControl
* signatures.
*/
#ifdef _WIN32
#undef _IO // ws2def.h
#define _WIN_NM_IOCTL_TYPE 40000
#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \

View File

@ -100,6 +100,7 @@
#endif /* likely and unlikely */
#include <net/netmap.h>
#include <net/netmap_virt.h> /* nmreq_pointer_get() */
/* helper macro */
#define _NETMAP_OFFSET(type, ptr, offset) \
@ -114,7 +115,7 @@
nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
#define NETMAP_BUF(ring, index) \
((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
((char *)(ring) + (ring)->buf_ofs + ((long)(index)*(ring)->nr_buf_size))
#define NETMAP_BUF_IDX(ring, buf) \
( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
@ -224,7 +225,7 @@ struct nm_desc {
struct nm_desc *self; /* point to self if netmap. */
int fd;
void *mem;
uint32_t memsize;
uint64_t memsize;
int done_mmap; /* set if mem is the result of mmap */
struct netmap_if * const nifp;
uint16_t first_tx_ring, last_tx_ring, cur_tx_ring;
@ -272,8 +273,6 @@ struct nm_desc {
* to multiple of 64 bytes and is often faster than dealing
* with other odd sizes. We assume there is enough room
* in the source and destination buffers.
*
* XXX only for multiples of 64 bytes, non overlapped.
*/
static inline void
nm_pkt_copy(const void *_src, void *_dst, int l)
@ -281,7 +280,7 @@ nm_pkt_copy(const void *_src, void *_dst, int l)
const uint64_t *src = (const uint64_t *)_src;
uint64_t *dst = (uint64_t *)_dst;
if (unlikely(l >= 1024)) {
if (unlikely(l >= 1024 || l % 64)) {
memcpy(dst, src, l);
return;
}
@ -352,6 +351,7 @@ enum {
NM_OPEN_ARG2 = 0x200000,
NM_OPEN_ARG3 = 0x400000,
NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */
NM_OPEN_EXTMEM = 0x1000000,
};
@ -613,38 +613,46 @@ nm_is_identifier(const char *s, const char *e)
return 1;
}
/*
* Try to open, return descriptor if successful, NULL otherwise.
* An invalid netmap name will return errno = 0;
* You can pass a pointer to a pre-filled nm_desc to add special
* parameters. Flags is used as follows
* NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap
* if the nr_arg2 (memory block) matches.
* NM_OPEN_ARG1 use req.nr_arg1 from arg
* NM_OPEN_ARG2 use req.nr_arg2 from arg
* NM_OPEN_RING_CFG user ring config from arg
*/
static struct nm_desc *
nm_open(const char *ifname, const struct nmreq *req,
uint64_t new_flags, const struct nm_desc *arg)
static void
nm_init_offsets(struct nm_desc *d)
{
struct nm_desc *d = NULL;
const struct nm_desc *parent = arg;
u_int namelen;
uint32_t nr_ringid = 0, nr_flags, nr_reg;
struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring);
if ((void *)r == (void *)nifp) {
/* the descriptor is open for TX only */
r = NETMAP_TXRING(nifp, d->first_tx_ring);
}
*(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
*(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
*(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
*(void **)(uintptr_t)&d->buf_end =
(char *)d->mem + d->memsize;
}
#define MAXERRMSG 80
#define NM_PARSE_OK 0
#define NM_PARSE_MEMID 1
static int
nm_parse_one(const char *ifname, struct nmreq *d, char **out, int memid_allowed)
{
int is_vale;
const char *port = NULL;
const char *vpname = NULL;
#define MAXERRMSG 80
u_int namelen;
uint32_t nr_ringid = 0, nr_flags;
char errmsg[MAXERRMSG] = "";
enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state;
int is_vale;
long num;
uint16_t nr_arg2 = 0;
enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state;
errno = 0;
if (strncmp(ifname, "netmap:", 7) &&
strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) {
errno = 0; /* name not recognised, not an error */
return NULL;
snprintf(errmsg, MAXERRMSG, "invalid port name: %s", ifname);
errno = EINVAL;
goto fail;
}
is_vale = (ifname[0] == 'v');
@ -677,10 +685,14 @@ nm_open(const char *ifname, const struct nmreq *req,
}
namelen = port - ifname;
if (namelen >= sizeof(d->req.nr_name)) {
if (namelen >= sizeof(d->nr_name)) {
snprintf(errmsg, MAXERRMSG, "name too long");
goto fail;
}
memcpy(d->nr_name, ifname, namelen);
d->nr_name[namelen] = '\0';
D("name %s", d->nr_name);
p_state = P_START;
nr_flags = NR_REG_ALL_NIC; /* default for no suffix */
while (*port) {
@ -777,21 +789,28 @@ nm_open(const char *ifname, const struct nmreq *req,
p_state = P_FLAGSOK;
break;
case P_MEMID:
if (nr_arg2 != 0) {
if (!memid_allowed) {
snprintf(errmsg, MAXERRMSG, "double setting of memid");
goto fail;
}
num = strtol(port, (char **)&port, 10);
if (num <= 0) {
snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num);
goto fail;
ND("non-numeric memid %s (out = %p)", port, out);
if (out == NULL)
goto fail;
*out = (char *)port;
while (*port)
port++;
} else {
nr_arg2 = num;
memid_allowed = 0;
p_state = P_RNGSFXOK;
}
nr_arg2 = num;
p_state = P_RNGSFXOK;
break;
}
}
if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) {
if (p_state != P_START && p_state != P_RNGSFXOK &&
p_state != P_FLAGSOK && p_state != P_MEMID) {
snprintf(errmsg, MAXERRMSG, "unexpected end of port name");
goto fail;
}
@ -800,6 +819,138 @@ nm_open(const char *ifname, const struct nmreq *req,
(nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "",
(nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "",
(nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : "");
d->nr_flags |= nr_flags;
d->nr_ringid |= nr_ringid;
d->nr_arg2 = nr_arg2;
return (p_state == P_MEMID) ? NM_PARSE_MEMID : NM_PARSE_OK;
fail:
if (!errno)
errno = EINVAL;
if (out)
*out = strdup(errmsg);
return -1;
}
static int
nm_interp_memid(const char *memid, struct nmreq *req, char **err)
{
int fd = -1;
char errmsg[MAXERRMSG] = "";
struct nmreq greq;
off_t mapsize;
struct netmap_pools_info *pi;
/* first, try to look for a netmap port with this name */
fd = open("/dev/netmap", O_RDONLY);
if (fd < 0) {
snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno));
goto fail;
}
memset(&greq, 0, sizeof(greq));
if (nm_parse_one(memid, &greq, err, 0) == NM_PARSE_OK) {
greq.nr_version = NETMAP_API;
if (ioctl(fd, NIOCGINFO, &greq) < 0) {
if (errno == ENOENT || errno == ENXIO)
goto try_external;
snprintf(errmsg, MAXERRMSG, "cannot getinfo for %s: %s", memid, strerror(errno));
goto fail;
}
req->nr_arg2 = greq.nr_arg2;
close(fd);
return 0;
}
try_external:
D("trying with external memory");
close(fd);
fd = open(memid, O_RDWR);
if (fd < 0) {
snprintf(errmsg, MAXERRMSG, "cannot open %s: %s", memid, strerror(errno));
goto fail;
}
mapsize = lseek(fd, 0, SEEK_END);
if (mapsize < 0) {
snprintf(errmsg, MAXERRMSG, "failed to obtain filesize of %s: %s", memid, strerror(errno));
goto fail;
}
pi = mmap(0, mapsize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
if (pi == MAP_FAILED) {
snprintf(errmsg, MAXERRMSG, "cannot map %s: %s", memid, strerror(errno));
goto fail;
}
req->nr_cmd = NETMAP_POOLS_CREATE;
pi->memsize = mapsize;
nmreq_pointer_put(req, pi);
D("mapped %zu bytes at %p from file %s", mapsize, pi, memid);
return 0;
fail:
D("%s", errmsg);
close(fd);
if (err && !*err)
*err = strdup(errmsg);
return errno;
}
static int
nm_parse(const char *ifname, struct nm_desc *d, char *errmsg)
{
char *err;
switch (nm_parse_one(ifname, &d->req, &err, 1)) {
case NM_PARSE_OK:
D("parse OK");
break;
case NM_PARSE_MEMID:
D("memid: %s", err);
errno = nm_interp_memid(err, &d->req, &err);
D("errno = %d", errno);
if (!errno)
break;
/* fallthrough */
default:
D("error");
strncpy(errmsg, err, MAXERRMSG);
errmsg[MAXERRMSG-1] = '\0';
free(err);
return -1;
}
D("parsed name: %s", d->req.nr_name);
d->self = d;
return 0;
}
/*
* Try to open, return descriptor if successful, NULL otherwise.
* An invalid netmap name will return errno = 0;
* You can pass a pointer to a pre-filled nm_desc to add special
* parameters. Flags is used as follows
* NM_OPEN_NO_MMAP use the memory from arg, only
* if the nr_arg2 (memory block) matches.
* Special case: if arg is NULL, skip the
* mmap entirely (maybe because you are going
* to do it by yourself, or you plan to call
* nm_mmap() only later)
* NM_OPEN_ARG1 use req.nr_arg1 from arg
* NM_OPEN_ARG2 use req.nr_arg2 from arg
* NM_OPEN_RING_CFG user ring config from arg
*/
static struct nm_desc *
nm_open(const char *ifname, const struct nmreq *req,
uint64_t new_flags, const struct nm_desc *arg)
{
struct nm_desc *d = NULL;
const struct nm_desc *parent = arg;
char errmsg[MAXERRMSG] = "";
uint32_t nr_reg;
struct netmap_pools_info *pi = NULL;
if (strncmp(ifname, "netmap:", 7) &&
strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) {
errno = 0; /* name not recognised, not an error */
return NULL;
}
d = (struct nm_desc *)calloc(1, sizeof(*d));
if (d == NULL) {
snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure");
@ -813,32 +964,87 @@ nm_open(const char *ifname, const struct nmreq *req,
goto fail;
}
if (req)
if (req) {
d->req = *req;
d->req.nr_version = NETMAP_API;
d->req.nr_ringid &= ~NETMAP_RING_MASK;
} else {
d->req.nr_arg1 = 4;
d->req.nr_arg2 = 0;
d->req.nr_arg3 = 0;
}
if (!(new_flags & NM_OPEN_IFNAME)) {
char *err;
switch (nm_parse_one(ifname, &d->req, &err, 1)) {
case NM_PARSE_OK:
break;
case NM_PARSE_MEMID:
if ((new_flags & NM_OPEN_NO_MMAP) &&
IS_NETMAP_DESC(parent)) {
/* ignore the memid setting, since we are
* going to use the parent's one
*/
break;
}
errno = nm_interp_memid(err, &d->req, &err);
if (!errno)
break;
/* fallthrough */
default:
strncpy(errmsg, err, MAXERRMSG);
errmsg[MAXERRMSG-1] = '\0';
free(err);
goto fail;
}
d->self = d;
}
/* compatibility checks for POOL_SCREATE and NM_OPEN flags
* the first check may be dropped once we have a larger nreq
*/
if (d->req.nr_cmd == NETMAP_POOLS_CREATE) {
if (IS_NETMAP_DESC(parent)) {
if (new_flags & (NM_OPEN_ARG1 | NM_OPEN_ARG2 | NM_OPEN_ARG3)) {
snprintf(errmsg, MAXERRMSG,
"POOLS_CREATE is incompatibile "
"with NM_OPEN_ARG? flags");
errno = EINVAL;
goto fail;
}
if (new_flags & NM_OPEN_NO_MMAP) {
snprintf(errmsg, MAXERRMSG,
"POOLS_CREATE is incompatible "
"with NM_OPEN_NO_MMAP flag");
errno = EINVAL;
goto fail;
}
}
}
d->req.nr_version = NETMAP_API;
d->req.nr_ringid &= NETMAP_RING_MASK;
/* these fields are overridden by ifname and flags processing */
d->req.nr_ringid |= nr_ringid;
d->req.nr_flags |= nr_flags;
if (nr_arg2)
d->req.nr_arg2 = nr_arg2;
memcpy(d->req.nr_name, ifname, namelen);
d->req.nr_name[namelen] = '\0';
/* optionally import info from parent */
if (IS_NETMAP_DESC(parent) && new_flags) {
if (new_flags & NM_OPEN_ARG1)
if (new_flags & NM_OPEN_EXTMEM) {
if (parent->req.nr_cmd == NETMAP_POOLS_CREATE) {
d->req.nr_cmd = NETMAP_POOLS_CREATE;
nmreq_pointer_put(&d->req, nmreq_pointer_get(&parent->req));
D("Warning: not overriding arg[1-3] since external memory is being used");
new_flags &= ~(NM_OPEN_ARG1 | NM_OPEN_ARG2 | NM_OPEN_ARG3);
}
}
if (new_flags & NM_OPEN_ARG1) {
D("overriding ARG1 %d", parent->req.nr_arg1);
d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ?
parent->req.nr_arg1 : 4;
if (new_flags & NM_OPEN_ARG2)
d->req.nr_arg1 = parent->req.nr_arg1;
}
if (new_flags & (NM_OPEN_ARG2 | NM_OPEN_NO_MMAP)) {
D("overriding ARG2 %d", parent->req.nr_arg2);
d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ?
parent->req.nr_arg2 : 0;
if (new_flags & NM_OPEN_ARG3)
d->req.nr_arg2 = parent->req.nr_arg2;
}
if (new_flags & NM_OPEN_ARG3) {
D("overriding ARG3 %d", parent->req.nr_arg3);
d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ?
parent->req.nr_arg3 : 0;
d->req.nr_arg3 = parent->req.nr_arg3;
}
if (new_flags & NM_OPEN_RING_CFG) {
D("overriding RING_CFG");
d->req.nr_tx_slots = parent->req.nr_tx_slots;
@ -859,15 +1065,26 @@ nm_open(const char *ifname, const struct nmreq *req,
/* add the *XPOLL flags */
d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL);
if (d->req.nr_cmd == NETMAP_POOLS_CREATE) {
pi = nmreq_pointer_get(&d->req);
}
if (ioctl(d->fd, NIOCREGIF, &d->req)) {
snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno));
goto fail;
}
/* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */
if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) {
snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno));
goto fail;
if (pi != NULL) {
d->mem = pi;
d->memsize = pi->memsize;
nm_init_offsets(d);
} else if ((!(new_flags & NM_OPEN_NO_MMAP) || parent)) {
/* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */
errno = nm_mmap(d, parent);
if (errno) {
snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno));
goto fail;
}
}
nr_reg = d->req.nr_flags & NR_REG_MASK;
@ -934,7 +1151,8 @@ nm_close(struct nm_desc *d)
*/
static void *__xxzt[] __attribute__ ((unused)) =
{ (void *)nm_open, (void *)nm_inject,
(void *)nm_dispatch, (void *)nm_nextpkt } ;
(void *)nm_dispatch, (void *)nm_nextpkt,
(void *)nm_parse } ;
if (d == NULL || d->self != d)
return EINVAL;
@ -971,17 +1189,8 @@ nm_mmap(struct nm_desc *d, const struct nm_desc *parent)
}
d->done_mmap = 1;
}
{
struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset);
struct netmap_ring *r = NETMAP_RXRING(nifp, );
*(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp;
*(struct netmap_ring **)(uintptr_t)&d->some_ring = r;
*(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0);
*(void **)(uintptr_t)&d->buf_end =
(char *)d->mem + d->memsize;
}
nm_init_offsets(d);
return 0;
fail:
@ -994,13 +1203,13 @@ fail:
static int
nm_inject(struct nm_desc *d, const void *buf, size_t size)
{
u_int c, n = d->last_tx_ring - d->first_tx_ring + 1;
u_int c, n = d->last_tx_ring - d->first_tx_ring + 1,
ri = d->cur_tx_ring;
for (c = 0; c < n ; c++) {
for (c = 0; c < n ; c++, ri++) {
/* compute current ring to use */
struct netmap_ring *ring;
uint32_t i, idx;
uint32_t ri = d->cur_tx_ring + c;
if (ri > d->last_tx_ring)
ri = d->first_tx_ring;
@ -1038,11 +1247,10 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
* of buffers and the int is large enough that we never wrap,
* so we can omit checking for -1
*/
for (c=0; c < n && cnt != got; c++) {
for (c=0; c < n && cnt != got; c++, ri++) {
/* compute current ring to use */
struct netmap_ring *ring;
ri = d->cur_rx_ring + c;
if (ri > d->last_rx_ring)
ri = d->first_rx_ring;
ring = NETMAP_RXRING(d->nifp, ri);
@ -1053,6 +1261,9 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
}
i = ring->cur;
idx = ring->slot[i].buf_idx;
/* d->cur_rx_ring doesn't change inside this loop, but
* set it here, so it reflects d->hdr.buf's ring */
d->cur_rx_ring = ri;
d->hdr.slot = &ring->slot[i];
d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
// __builtin_prefetch(buf);
@ -1065,7 +1276,6 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
d->hdr.flags = 0;
cb(arg, &d->hdr, d->hdr.buf);
}
d->cur_rx_ring = ri;
return got;
}

View File

@ -139,6 +139,13 @@ nmreq_pointer_put(struct nmreq *nmr, void *userptr)
*pp = (uintptr_t)userptr;
}
static inline void *
nmreq_pointer_get(const struct nmreq *nmr)
{
const uintptr_t * pp = (const uintptr_t *)&nmr->nr_arg1;
return (void *)*pp;
}
/* ptnetmap features */
#define PTNETMAP_F_VNET_HDR 1