diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h index 26f85fdf321d..299bc3837d5e 100644 --- a/sys/dev/netmap/if_em_netmap.h +++ b/sys/dev/netmap/if_em_netmap.h @@ -235,8 +235,6 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags) * First part: import newly received packets. */ if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - nic_i = rxr->next_to_check; nm_i = netmap_idx_n2k(kring, nic_i); @@ -247,7 +245,7 @@ em_netmap_rxsync(struct netmap_kring *kring, int flags) if ((staterr & E1000_RXD_STAT_DD) == 0) break; ring->slot[nm_i].len = le16toh(curr->wb.upper.length); - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[nic_i].map, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h index cd80f2663bc7..df15ceee7d8c 100644 --- a/sys/dev/netmap/if_igb_netmap.h +++ b/sys/dev/netmap/if_igb_netmap.h @@ -217,8 +217,6 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags) * First part: import newly received packets. */ if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - nic_i = rxr->next_to_check; nm_i = netmap_idx_n2k(kring, nic_i); @@ -229,7 +227,7 @@ igb_netmap_rxsync(struct netmap_kring *kring, int flags) if ((staterr & E1000_RXD_STAT_DD) == 0) break; ring->slot[nm_i].len = le16toh(curr->wb.upper.length); - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; bus_dmamap_sync(rxr->ptag, rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); diff --git a/sys/dev/netmap/if_ixl_netmap.h b/sys/dev/netmap/if_ixl_netmap.h index ea0bf35dea67..e9b036d34e87 100644 --- a/sys/dev/netmap/if_ixl_netmap.h +++ b/sys/dev/netmap/if_ixl_netmap.h @@ -331,7 +331,6 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags) */ if (netmap_no_pendintr || force_update) { int crclen = ixl_crcstrip ? 0 : 4; - uint16_t slot_flags = kring->nkr_slot_flags; nic_i = rxr->next_check; // or also k2n(kring->nr_hwtail) nm_i = netmap_idx_n2k(kring, nic_i); @@ -346,7 +345,7 @@ ixl_netmap_rxsync(struct netmap_kring *kring, int flags) break; ring->slot[nm_i].len = ((qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT) - crclen; - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; bus_dmamap_sync(rxr->ptag, rxr->buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h index 56d65733a4f5..f8ba2bb716c8 100644 --- a/sys/dev/netmap/if_lem_netmap.h +++ b/sys/dev/netmap/if_lem_netmap.h @@ -216,8 +216,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags) * First part: import newly received packets. */ if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; - nic_i = adapter->next_rx_desc_to_check; nm_i = netmap_idx_n2k(kring, nic_i); @@ -234,7 +232,7 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags) len = 0; } ring->slot[nm_i].len = len; - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[nic_i].map, BUS_DMASYNC_POSTREAD); diff --git a/sys/dev/netmap/if_ptnet.c b/sys/dev/netmap/if_ptnet.c index becf7e4e8f04..1805a7f31e48 100644 --- a/sys/dev/netmap/if_ptnet.c +++ b/sys/dev/netmap/if_ptnet.c @@ -216,6 +216,7 @@ static void ptnet_update_vnet_hdr(struct ptnet_softc *sc); static int ptnet_nm_register(struct netmap_adapter *na, int onoff); static int ptnet_nm_txsync(struct netmap_kring *kring, int flags); static int ptnet_nm_rxsync(struct netmap_kring *kring, int flags); +static void ptnet_nm_intr(struct netmap_adapter *na, int onoff); static void ptnet_tx_intr(void *opaque); static void ptnet_rx_intr(void *opaque); @@ -477,6 +478,7 @@ ptnet_attach(device_t dev) na_arg.nm_krings_create = ptnet_nm_krings_create; na_arg.nm_krings_delete = ptnet_nm_krings_delete; na_arg.nm_dtor = ptnet_nm_dtor; + na_arg.nm_intr = ptnet_nm_intr; na_arg.nm_register = ptnet_nm_register; na_arg.nm_txsync = ptnet_nm_txsync; na_arg.nm_rxsync = ptnet_nm_rxsync; @@ -1298,6 +1300,18 @@ ptnet_nm_rxsync(struct netmap_kring *kring, int flags) return 0; } +static void +ptnet_nm_intr(struct netmap_adapter *na, int onoff) +{ + struct ptnet_softc *sc = if_getsoftc(na->ifp); + int i; + + for (i = 0; i < sc->num_rings; i++) { + struct ptnet_queue *pq = sc->queues + i; + pq->ptgh->guest_need_kick = onoff; + } +} + static void ptnet_tx_intr(void *opaque) { diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h index 2cb3454c5f31..e7dd087acc67 100644 --- a/sys/dev/netmap/if_re_netmap.h +++ b/sys/dev/netmap/if_re_netmap.h @@ -201,7 +201,6 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags) * is to stop right before nm_hwcur. */ if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; uint32_t stop_i = nm_prev(kring->nr_hwcur, lim); nic_i = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */ @@ -218,7 +217,7 @@ re_netmap_rxsync(struct netmap_kring *kring, int flags) /* XXX subtract crc */ total_len = (total_len < 4) ? 0 : total_len - 4; ring->slot[nm_i].len = total_len; - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; /* sync was in re_newbuf() */ bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag, rxd[nic_i].rx_dmamap, BUS_DMASYNC_POSTREAD); diff --git a/sys/dev/netmap/if_vtnet_netmap.h b/sys/dev/netmap/if_vtnet_netmap.h index 4bed0e718dd4..10789c53d1f0 100644 --- a/sys/dev/netmap/if_vtnet_netmap.h +++ b/sys/dev/netmap/if_vtnet_netmap.h @@ -122,6 +122,7 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags) struct SOFTC_T *sc = ifp->if_softc; struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr]; struct virtqueue *vq = txq->vtntx_vq; + int interrupts = !(kring->nr_kflags & NKR_NOINTR); /* * First part: process new packets to send. @@ -179,7 +180,9 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags) ring->head, ring->tail, virtqueue_nused(vq), (virtqueue_dump(vq), 1)); virtqueue_notify(vq); - virtqueue_enable_intr(vq); // like postpone with 0 + if (interrupts) { + virtqueue_enable_intr(vq); // like postpone with 0 + } } @@ -209,7 +212,7 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags) if (nm_i != kring->nr_hwtail /* && vtnet_txq_below_threshold(txq) == 0*/) { ND(3, "disable intr, hwcur %d", nm_i); virtqueue_disable_intr(vq); - } else { + } else if (interrupts) { ND(3, "enable intr, hwcur %d", nm_i); virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT); } @@ -277,6 +280,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; + int interrupts = !(kring->nr_kflags & NKR_NOINTR); /* device-specific */ struct SOFTC_T *sc = ifp->if_softc; @@ -297,7 +301,6 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) * and vtnet_netmap_init_buffers(). */ if (netmap_no_pendintr || force_update) { - uint16_t slot_flags = kring->nkr_slot_flags; struct netmap_adapter *token; nm_i = kring->nr_hwtail; @@ -309,7 +312,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) break; if (likely(token == (void *)rxq)) { ring->slot[nm_i].len = len; - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; nm_i = nm_next(nm_i, lim); n++; } else { @@ -334,7 +337,9 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) kring->nr_hwcur = err; virtqueue_notify(vq); /* After draining the queue may need an intr from the hypervisor */ - vtnet_rxq_enable_intr(rxq); + if (interrupts) { + vtnet_rxq_enable_intr(rxq); + } } ND("[C] h %d c %d t %d hwcur %d hwtail %d", @@ -345,6 +350,28 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) } +/* Enable/disable interrupts on all virtqueues. */ +static void +vtnet_netmap_intr(struct netmap_adapter *na, int onoff) +{ + struct SOFTC_T *sc = na->ifp->if_softc; + int i; + + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i]; + struct vtnet_txq *txq = &sc->vtnet_txqs[i]; + struct virtqueue *txvq = txq->vtntx_vq; + + if (onoff) { + vtnet_rxq_enable_intr(rxq); + virtqueue_enable_intr(txvq); + } else { + vtnet_rxq_disable_intr(rxq); + virtqueue_disable_intr(txvq); + } + } +} + /* Make RX virtqueues buffers pointing to netmap buffers. */ static int vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc) @@ -417,6 +444,7 @@ vtnet_netmap_attach(struct SOFTC_T *sc) na.nm_txsync = vtnet_netmap_txsync; na.nm_rxsync = vtnet_netmap_rxsync; na.nm_config = vtnet_netmap_config; + na.nm_intr = vtnet_netmap_intr; na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs; D("max rings %d", sc->vtnet_max_vq_pairs); netmap_attach(&na); diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h index 419e08f0f6de..30da63191775 100644 --- a/sys/dev/netmap/ixgbe_netmap.h +++ b/sys/dev/netmap/ixgbe_netmap.h @@ -397,7 +397,6 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) */ if (netmap_no_pendintr || force_update) { int crclen = (ix_crcstrip || IXGBE_IS_VF(adapter) ) ? 0 : 4; - uint16_t slot_flags = kring->nkr_slot_flags; nic_i = rxr->next_to_check; // or also k2n(kring->nr_hwtail) nm_i = netmap_idx_n2k(kring, nic_i); @@ -409,7 +408,7 @@ ixgbe_netmap_rxsync(struct netmap_kring *kring, int flags) if ((staterr & IXGBE_RXD_STAT_DD) == 0) break; ring->slot[nm_i].len = le16toh(curr->wb.upper.length) - crclen; - ring->slot[nm_i].flags = slot_flags; + ring->slot[nm_i].flags = 0; bus_dmamap_sync(rxr->ptag, rxr->rx_buffers[nic_i].pmap, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index edcc308e8d87..3c5551bad156 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -482,10 +482,8 @@ ports attached to the switch) int netmap_verbose; static int netmap_no_timestamp; /* don't timestamp on rxsync */ -int netmap_mitigate = 1; int netmap_no_pendintr = 1; int netmap_txsync_retry = 2; -int netmap_flags = 0; /* debug flags */ static int netmap_fwd = 0; /* force transparent forwarding */ /* @@ -515,7 +513,9 @@ int netmap_generic_mit = 100*1000; * Anyway users looking for the best performance should * use native adapters. */ +#ifdef linux int netmap_generic_txqdisc = 1; +#endif /* Default number of slots and queues for generic adapters. */ int netmap_generic_ringsize = 1024; @@ -539,21 +539,32 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, verbose, CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode"); SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp, CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp"); -SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, - CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets."); +SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr, + 0, "Always look for new received packets."); SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW, - &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush."); + &netmap_txsync_retry, 0, "Number of txsync loops in bridge's flush."); -SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, &netmap_generic_txqdisc, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0 , ""); -SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW, &ptnetmap_tx_workers, 0 , ""); +SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0, + "Force NR_FORWARD mode"); +SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0, + "Adapter mode. 0 selects the best option available," + "1 forces native adapter, 2 forces emulated adapter"); +SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, + 0, "RX notification interval in nanoseconds"); +SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, + &netmap_generic_ringsize, 0, + "Number of per-ring slots for emulated netmap mode"); +SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, + &netmap_generic_rings, 0, + "Number of TX/RX queues for emulated netmap adapters"); +#ifdef linux +SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, + &netmap_generic_txqdisc, 0, "Use qdisc for generic adapters"); +#endif +SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, + 0, "Allow ptnet devices to use virtio-net headers"); +SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW, + &ptnetmap_tx_workers, 0, "Use worker threads for pnetmap TX processing"); SYSEND; @@ -912,7 +923,19 @@ netmap_hw_krings_delete(struct netmap_adapter *na) netmap_krings_delete(na); } - +static void +netmap_mem_drop(struct netmap_adapter *na) +{ + int last = netmap_mem_deref(na->nm_mem, na); + /* if the native allocator had been overrided on regif, + * restore it now and drop the temporary one + */ + if (last && na->nm_mem_prev) { + netmap_mem_put(na->nm_mem); + na->nm_mem = na->nm_mem_prev; + na->nm_mem_prev = NULL; + } +} /* * Undo everything that was done in netmap_do_regif(). In particular, @@ -980,7 +1003,7 @@ netmap_do_unregif(struct netmap_priv_d *priv) /* delete the nifp */ netmap_mem_if_delete(na, priv->np_nifp); /* drop the allocator */ - netmap_mem_deref(na->nm_mem, na); + netmap_mem_drop(na); /* mark the priv as unregistered */ priv->np_na = NULL; priv->np_nifp = NULL; @@ -1289,7 +1312,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags) D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL)); slot->len = len; - slot->flags = kring->nkr_slot_flags; + slot->flags = 0; nm_i = nm_next(nm_i, lim); mbq_enqueue(&fq, m); } @@ -1409,7 +1432,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adap assign_mem: if (nmd != NULL && !((*na)->na_flags & NAF_MEM_OWNER) && (*na)->active_fds == 0 && ((*na)->nm_mem != nmd)) { - netmap_mem_put((*na)->nm_mem); + (*na)->nm_mem_prev = (*na)->nm_mem; (*na)->nm_mem = netmap_mem_get(nmd); } @@ -1896,7 +1919,8 @@ netmap_krings_get(struct netmap_priv_d *priv) int excl = (priv->np_flags & NR_EXCLUSIVE); enum txrx t; - ND("%s: grabbing tx [%d, %d) rx [%d, %d)", + if (netmap_verbose) + D("%s: grabbing tx [%d, %d) rx [%d, %d)", na->name, priv->np_qfirst[NR_TX], priv->np_qlast[NR_TX], @@ -2059,9 +2083,57 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na, if (na->active_fds == 0) { /* * If this is the first registration of the adapter, - * create the in-kernel view of the netmap rings, - * the netmap krings. + * perform sanity checks and create the in-kernel view + * of the netmap rings (the netmap krings). */ + if (na->ifp) { + /* This netmap adapter is attached to an ifnet. */ + unsigned nbs = netmap_mem_bufsize(na->nm_mem); + unsigned mtu = nm_os_ifnet_mtu(na->ifp); + /* The maximum amount of bytes that a single + * receive or transmit NIC descriptor can hold. */ + unsigned hw_max_slot_len = 4096; + + if (mtu <= hw_max_slot_len) { + /* The MTU fits a single NIC slot. We only + * Need to check that netmap buffers are + * large enough to hold an MTU. NS_MOREFRAG + * cannot be used in this case. */ + if (nbs < mtu) { + nm_prerr("error: netmap buf size (%u) " + "< device MTU (%u)", nbs, mtu); + error = EINVAL; + goto err_drop_mem; + } + } else { + /* More NIC slots may be needed to receive + * or transmit a single packet. Check that + * the adapter supports NS_MOREFRAG and that + * netmap buffers are large enough to hold + * the maximum per-slot size. */ + if (!(na->na_flags & NAF_MOREFRAG)) { + nm_prerr("error: large MTU (%d) needed " + "but %s does not support " + "NS_MOREFRAG", mtu, + na->ifp->if_xname); + error = EINVAL; + goto err_drop_mem; + } else if (nbs < hw_max_slot_len) { + nm_prerr("error: using NS_MOREFRAG on " + "%s requires netmap buf size " + ">= %u", na->ifp->if_xname, + hw_max_slot_len); + error = EINVAL; + goto err_drop_mem; + } else { + nm_prinf("info: netmap application on " + "%s needs to support " + "NS_MOREFRAG " + "(MTU=%u,netmap_buf_size=%u)", + na->ifp->if_xname, mtu, nbs); + } + } + } /* * Depending on the adapter, this may also create @@ -2128,15 +2200,15 @@ err_put_lut: memset(&na->na_lut, 0, sizeof(na->na_lut)); err_del_if: netmap_mem_if_delete(na, nifp); -err_rel_excl: - netmap_krings_put(priv); err_del_rings: netmap_mem_rings_delete(na); +err_rel_excl: + netmap_krings_put(priv); err_del_krings: if (na->active_fds == 0) na->nm_krings_delete(na); err_drop_mem: - netmap_mem_deref(na->nm_mem, na); + netmap_mem_drop(na); err: priv->np_na = NULL; return error; @@ -2224,6 +2296,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread do { /* memsize is always valid */ u_int memflags; + uint64_t memsize; if (nmr->nr_name[0] != '\0') { @@ -2243,10 +2316,11 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread } } - error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags, + error = netmap_mem_get_info(nmd, &memsize, &memflags, &nmr->nr_arg2); if (error) break; + nmr->nr_memsize = (uint32_t)memsize; if (na == NULL) /* only memory info */ break; nmr->nr_offset = 0; @@ -2304,6 +2378,17 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread } NMG_UNLOCK(); break; + } else if (i == NETMAP_POOLS_CREATE) { + nmd = netmap_mem_ext_create(nmr, &error); + if (nmd == NULL) + break; + /* reset the fields used by POOLS_CREATE to + * avoid confusing the rest of the code + */ + nmr->nr_cmd = 0; + nmr->nr_arg1 = 0; + nmr->nr_arg2 = 0; + nmr->nr_arg3 = 0; } else if (i != 0) { D("nr_cmd must be 0 not %d", i); error = EINVAL; @@ -2314,7 +2399,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread NMG_LOCK(); do { u_int memflags; - struct ifnet *ifp; + uint64_t memsize; if (priv->np_nifp != NULL) { /* thread already registered */ error = EBUSY; @@ -2356,12 +2441,13 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread nmr->nr_tx_rings = na->num_tx_rings; nmr->nr_rx_slots = na->num_rx_desc; nmr->nr_tx_slots = na->num_tx_desc; - error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags, + error = netmap_mem_get_info(na->nm_mem, &memsize, &memflags, &nmr->nr_arg2); if (error) { netmap_do_unregif(priv); break; } + nmr->nr_memsize = (uint32_t)memsize; if (memflags & NETMAP_MEM_PRIVATE) { *(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM; } @@ -2533,7 +2619,6 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) #define want_tx want[NR_TX] #define want_rx want[NR_RX] struct mbq q; /* packets from RX hw queues to host stack */ - enum txrx t; /* * In order to avoid nested locks, we need to "double check" @@ -2585,14 +2670,15 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) check_all_tx = nm_si_user(priv, NR_TX); check_all_rx = nm_si_user(priv, NR_RX); +#ifdef __FreeBSD__ /* * We start with a lock free round which is cheap if we have * slots available. If this fails, then lock and call the sync - * routines. + * routines. We can't do this on Linux, as the contract says + * that we must call nm_os_selrecord() unconditionally. */ -#if 1 /* new code- call rx if any of the ring needs to release or read buffers */ if (want_tx) { - t = NR_TX; + enum txrx t = NR_TX; for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) { kring = &NMR(na, t)[i]; /* XXX compare ring->cur and kring->tail */ @@ -2603,8 +2689,8 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) } } if (want_rx) { + enum txrx t = NR_RX; want_rx = 0; /* look for a reason to run the handlers */ - t = NR_RX; for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) { kring = &NMR(na, t)[i]; if (kring->ring->cur == kring->ring->tail /* try fetch new buffers */ @@ -2615,24 +2701,20 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr) if (!want_rx) revents |= events & (POLLIN | POLLRDNORM); /* we have data */ } -#else /* old code */ - for_rx_tx(t) { - for (i = priv->np_qfirst[t]; want[t] && i < priv->np_qlast[t]; i++) { - kring = &NMR(na, t)[i]; - /* XXX compare ring->cur and kring->tail */ - if (!nm_ring_empty(kring->ring)) { - revents |= want[t]; - want[t] = 0; /* also breaks the loop */ - } - } - } -#endif /* old code */ +#endif + +#ifdef linux + /* The selrecord must be unconditional on linux. */ + nm_os_selrecord(sr, check_all_tx ? + &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si); + nm_os_selrecord(sr, check_all_rx ? + &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si); +#endif /* linux */ /* * If we want to push packets out (priv->np_txpoll) or * want_tx is still set, we must issue txsync calls * (on all rings, to avoid that the tx rings stall). - * XXX should also check cur != hwcur on the tx rings. * Fortunately, normal tx mode has np_txpoll set. */ if (priv->np_txpoll || want_tx) { @@ -2649,6 +2731,12 @@ flush_tx: kring = &na->tx_rings[i]; ring = kring->ring; + /* + * Don't try to txsync this TX ring if we already found some + * space in some of the TX rings (want_tx == 0) and there are no + * TX slots in this ring that need to be flushed to the NIC + * (cur == hwcur). + */ if (!send_down && !want_tx && ring->cur == kring->nr_hwcur) continue; @@ -2676,14 +2764,18 @@ flush_tx: if (found) { /* notify other listeners */ revents |= want_tx; want_tx = 0; +#ifndef linux kring->nm_notify(kring, 0); +#endif /* linux */ } } /* if there were any packet to forward we must have handled them by now */ send_down = 0; if (want_tx && retry_tx && sr) { +#ifndef linux nm_os_selrecord(sr, check_all_tx ? &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]].si); +#endif /* !linux */ retry_tx = 0; goto flush_tx; } @@ -2734,14 +2826,18 @@ do_retry_rx: if (found) { revents |= want_rx; retry_rx = 0; +#ifndef linux kring->nm_notify(kring, 0); +#endif /* linux */ } } +#ifndef linux if (retry_rx && sr) { nm_os_selrecord(sr, check_all_rx ? &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si); } +#endif /* !linux */ if (send_down || retry_rx) { retry_rx = 0; if (send_down) @@ -2766,6 +2862,44 @@ do_retry_rx: #undef want_rx } +int +nma_intr_enable(struct netmap_adapter *na, int onoff) +{ + bool changed = false; + enum txrx t; + int i; + + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t); i++) { + struct netmap_kring *kring = &NMR(na, t)[i]; + int on = !(kring->nr_kflags & NKR_NOINTR); + + if (!!onoff != !!on) { + changed = true; + } + if (onoff) { + kring->nr_kflags &= ~NKR_NOINTR; + } else { + kring->nr_kflags |= NKR_NOINTR; + } + } + } + + if (!changed) { + return 0; /* nothing to do */ + } + + if (!na->nm_intr) { + D("Cannot %s interrupts for %s", onoff ? "enable" : "disable", + na->name); + return -1; + } + + na->nm_intr(na, onoff); + + return 0; +} + /*-------------------- driver support routines -------------------*/ @@ -2804,6 +2938,7 @@ netmap_attach_common(struct netmap_adapter *na) if (na->na_flags & NAF_HOST_RINGS && na->ifp) { na->if_input = na->ifp->if_input; /* for netmap_send_up */ } + na->pdev = na; /* make sure netmap_mem_map() is called */ #endif /* __FreeBSD__ */ if (na->nm_krings_create == NULL) { /* we assume that we have been called by a driver, @@ -2832,22 +2967,6 @@ netmap_attach_common(struct netmap_adapter *na) return 0; } - -/* standard cleanup, called by all destructors */ -void -netmap_detach_common(struct netmap_adapter *na) -{ - if (na->tx_rings) { /* XXX should not happen */ - D("freeing leftover tx_rings"); - na->nm_krings_delete(na); - } - netmap_pipe_dealloc(na); - if (na->nm_mem) - netmap_mem_put(na->nm_mem); - bzero(na, sizeof(*na)); - nm_os_free(na); -} - /* Wrapper for the register callback provided netmap-enabled * hardware drivers. * nm_iszombie(na) means that the driver module has been @@ -2900,7 +3019,7 @@ netmap_hw_dtor(struct netmap_adapter *na) * Return 0 on success, ENOMEM otherwise. */ int -netmap_attach_ext(struct netmap_adapter *arg, size_t size) +netmap_attach_ext(struct netmap_adapter *arg, size_t size, int override_reg) { struct netmap_hw_adapter *hwna = NULL; struct ifnet *ifp = NULL; @@ -2912,15 +3031,27 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t size) if (arg == NULL || arg->ifp == NULL) goto fail; + ifp = arg->ifp; + if (NA(ifp) && !NM_NA_VALID(ifp)) { + /* If NA(ifp) is not null but there is no valid netmap + * adapter it means that someone else is using the same + * pointer (e.g. ax25_ptr on linux). This happens for + * instance when also PF_RING is in use. */ + D("Error: netmap adapter hook is busy"); + return EBUSY; + } + hwna = nm_os_malloc(size); if (hwna == NULL) goto fail; hwna->up = *arg; hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE; strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name)); - hwna->nm_hw_register = hwna->up.nm_register; - hwna->up.nm_register = netmap_hw_reg; + if (override_reg) { + hwna->nm_hw_register = hwna->up.nm_register; + hwna->up.nm_register = netmap_hw_reg; + } if (netmap_attach_common(&hwna->up)) { nm_os_free(hwna); goto fail; @@ -2939,6 +3070,7 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t size) #endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */ } hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit; + hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu; if (ifp->ethtool_ops) { hwna->nm_eto = *ifp->ethtool_ops; } @@ -2968,7 +3100,8 @@ fail: int netmap_attach(struct netmap_adapter *arg) { - return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter)); + return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter), + 1 /* override nm_reg */); } @@ -2996,7 +3129,15 @@ NM_DBG(netmap_adapter_put)(struct netmap_adapter *na) if (na->nm_dtor) na->nm_dtor(na); - netmap_detach_common(na); + if (na->tx_rings) { /* XXX should not happen */ + D("freeing leftover tx_rings"); + na->nm_krings_delete(na); + } + netmap_pipe_dealloc(na); + if (na->nm_mem) + netmap_mem_put(na->nm_mem); + bzero(na, sizeof(*na)); + nm_os_free(na); return 1; } @@ -3029,15 +3170,14 @@ netmap_detach(struct ifnet *ifp) NMG_LOCK(); netmap_set_all_rings(na, NM_KR_LOCKED); - na->na_flags |= NAF_ZOMBIE; /* * if the netmap adapter is not native, somebody * changed it, so we can not release it here. * The NAF_ZOMBIE flag will notify the new owner that * the driver is gone. */ - if (na->na_flags & NAF_NATIVE) { - netmap_adapter_put(na); + if (!(na->na_flags & NAF_NATIVE) || !netmap_adapter_put(na)) { + na->na_flags |= NAF_ZOMBIE; } /* give active users a chance to notice that NAF_ZOMBIE has been * turned on, so that they can stop and return an error to userspace. @@ -3116,9 +3256,9 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m) */ mbq_lock(q); - busy = kring->nr_hwtail - kring->nr_hwcur; - if (busy < 0) - busy += kring->nkr_num_slots; + busy = kring->nr_hwtail - kring->nr_hwcur; + if (busy < 0) + busy += kring->nkr_num_slots; if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) { RD(2, "%s full hwcur %d hwtail %d qlen %d", na->name, kring->nr_hwcur, kring->nr_hwtail, mbq_len(q)); @@ -3216,16 +3356,6 @@ netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n, kring->nr_hwtail -= lim + 1; } -#if 0 // def linux - /* XXX check that the mappings are correct */ - /* need ring_nr, adapter->pdev, direction */ - buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE); - if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) { - D("error mapping rx netmap buffer %d", i); - // XXX fix error handling - } - -#endif /* linux */ /* * Wakeup on the individual and global selwait * We do the wakeup here, but the ring is not yet reconfigured. diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index b811a017822b..c122dc64bed2 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -173,6 +173,16 @@ nm_os_ifnet_fini(void) nm_ifnet_dh_tag); } +unsigned +nm_os_ifnet_mtu(struct ifnet *ifp) +{ +#if __FreeBSD_version < 1100030 + return ifp->if_data.ifi_mtu; +#else /* __FreeBSD_version >= 1100030 */ + return ifp->if_mtu; +#endif +} + rawsum_t nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) { @@ -294,24 +304,30 @@ nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept) { struct netmap_adapter *na = &gna->up.up; struct ifnet *ifp = na->ifp; + int ret = 0; + nm_os_ifnet_lock(); if (intercept) { if (gna->save_if_input) { D("cannot intercept again"); - return EINVAL; /* already set */ + ret = EINVAL; /* already set */ + goto out; } gna->save_if_input = ifp->if_input; ifp->if_input = freebsd_generic_rx_handler; } else { if (!gna->save_if_input){ D("cannot restore"); - return EINVAL; /* not saved */ + ret = EINVAL; /* not saved */ + goto out; } ifp->if_input = gna->save_if_input; gna->save_if_input = NULL; } +out: + nm_os_ifnet_unlock(); - return 0; + return ret; } @@ -327,12 +343,14 @@ nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept) struct netmap_adapter *na = &gna->up.up; struct ifnet *ifp = netmap_generic_getifp(gna); + nm_os_ifnet_lock(); if (intercept) { na->if_transmit = ifp->if_transmit; ifp->if_transmit = netmap_transmit; } else { ifp->if_transmit = na->if_transmit; } + nm_os_ifnet_unlock(); return 0; } diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c index 1276a3a0c46c..2ed251a55775 100644 --- a/sys/dev/netmap/netmap_generic.c +++ b/sys/dev/netmap/netmap_generic.c @@ -86,8 +86,6 @@ __FBSDID("$FreeBSD$"); #include #include -#define rtnl_lock() ND("rtnl_lock called") -#define rtnl_unlock() ND("rtnl_unlock called") #define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) #define smp_mb() @@ -168,7 +166,13 @@ nm_os_get_mbuf(struct ifnet *ifp, int len) * has a KASSERT(), checking that the mbuf dtor function is not NULL. */ +#if __FreeBSD_version <= 1200050 +static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { } +#else /* __FreeBSD_version >= 1200051 */ +/* The arg1 and arg2 pointers argument were removed by r324446, which + * in included since version 1200051. */ static void void_mbuf_dtor(struct mbuf *m) { } +#endif /* __FreeBSD_version >= 1200051 */ #define SET_MBUF_DESTRUCTOR(m, fn) do { \ (m)->m_ext.ext_free = (fn != NULL) ? \ @@ -200,8 +204,6 @@ nm_os_get_mbuf(struct ifnet *ifp, int len) #include "win_glue.h" -#define rtnl_lock() ND("rtnl_lock called") -#define rtnl_unlock() ND("rtnl_unlock called") #define MBUF_TXQ(m) 0//((m)->m_pkthdr.flowid) #define MBUF_RXQ(m) 0//((m)->m_pkthdr.flowid) #define smp_mb() //XXX: to be correctly defined @@ -210,7 +212,6 @@ nm_os_get_mbuf(struct ifnet *ifp, int len) #include "bsd_glue.h" -#include /* rtnl_[un]lock() */ #include /* struct ethtool_ops, get_ringparam */ #include @@ -339,17 +340,13 @@ generic_netmap_unregister(struct netmap_adapter *na) int i, r; if (na->active_fds == 0) { - rtnl_lock(); - na->na_flags &= ~NAF_NETMAP_ON; - /* Release packet steering control. */ - nm_os_catch_tx(gna, 0); - /* Stop intercepting packets on the RX path. */ nm_os_catch_rx(gna, 0); - rtnl_unlock(); + /* Release packet steering control. */ + nm_os_catch_tx(gna, 0); } for_each_rx_kring_h(r, kring, na) { @@ -510,24 +507,20 @@ generic_netmap_register(struct netmap_adapter *na, int enable) } if (na->active_fds == 0) { - rtnl_lock(); - /* Prepare to intercept incoming traffic. */ error = nm_os_catch_rx(gna, 1); if (error) { D("nm_os_catch_rx(1) failed (%d)", error); - goto register_handler; + goto free_tx_pools; } - /* Make netmap control the packet steering. */ + /* Let netmap control the packet steering. */ error = nm_os_catch_tx(gna, 1); if (error) { D("nm_os_catch_tx(1) failed (%d)", error); goto catch_rx; } - rtnl_unlock(); - na->na_flags |= NAF_NETMAP_ON; #ifdef RATE_GENERIC @@ -548,8 +541,6 @@ generic_netmap_register(struct netmap_adapter *na, int enable) /* Here (na->active_fds == 0) holds. */ catch_rx: nm_os_catch_rx(gna, 0); -register_handler: - rtnl_unlock(); free_tx_pools: for_each_tx_kring(r, kring, na) { mtx_destroy(&kring->tx_event_lock); @@ -626,7 +617,11 @@ generic_mbuf_destructor(struct mbuf *m) * txsync. */ netmap_generic_irq(na, r, NULL); #ifdef __FreeBSD__ +#if __FreeBSD_version <= 1200050 + void_mbuf_dtor(m, NULL, NULL); +#else /* __FreeBSD_version >= 1200051 */ void_mbuf_dtor(m); +#endif /* __FreeBSD_version >= 1200051 */ #endif } @@ -1017,7 +1012,6 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags) int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; /* Adapter-specific variables. */ - uint16_t slot_flags = kring->nkr_slot_flags; u_int nm_buf_len = NETMAP_BUF_SIZE(na); struct mbq tmpq; struct mbuf *m; @@ -1096,7 +1090,7 @@ generic_netmap_rxsync(struct netmap_kring *kring, int flags) avail -= nm_buf_len; ring->slot[nm_i].len = copy; - ring->slot[nm_i].flags = slot_flags | (mlen ? NS_MOREFRAG : 0); + ring->slot[nm_i].flags = (mlen ? NS_MOREFRAG : 0); nm_i = nm_next(nm_i, lim); } @@ -1208,6 +1202,15 @@ generic_netmap_attach(struct ifnet *ifp) } #endif + if (NA(ifp) && !NM_NA_VALID(ifp)) { + /* If NA(ifp) is not null but there is no valid netmap + * adapter it means that someone else is using the same + * pointer (e.g. ax25_ptr on linux). This happens for + * instance when also PF_RING is in use. */ + D("Error: netmap adapter hook is busy"); + return EBUSY; + } + num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ nm_os_generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */ diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 268c980ff174..3e6451091324 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -39,6 +39,9 @@ #if defined(linux) +#if defined(CONFIG_NETMAP_EXTMEM) +#define WITH_EXTMEM +#endif #if defined(CONFIG_NETMAP_VALE) #define WITH_VALE #endif @@ -90,6 +93,7 @@ #define NM_MTX_INIT(m) sx_init(&(m), #m) #define NM_MTX_DESTROY(m) sx_destroy(&(m)) #define NM_MTX_LOCK(m) sx_xlock(&(m)) +#define NM_MTX_SPINLOCK(m) while (!sx_try_xlock(&(m))) ; #define NM_MTX_UNLOCK(m) sx_xunlock(&(m)) #define NM_MTX_ASSERT(m) sx_assert(&(m), SA_XLOCKED) @@ -100,7 +104,7 @@ #define MBUF_TRANSMIT(na, ifp, m) ((na)->if_transmit(ifp, m)) #define GEN_TX_MBUF_IFP(m) ((m)->m_pkthdr.rcvif) -#define NM_ATOMIC_T volatile int // XXX ? +#define NM_ATOMIC_T volatile int /* required by atomic/bitops.h */ /* atomic operations */ #include #define NM_ATOMIC_TEST_AND_SET(p) (!atomic_cmpset_acq_int((p), 0, 1)) @@ -132,13 +136,10 @@ struct nm_selinfo { }; -// XXX linux struct, not used in FreeBSD -struct net_device_ops { -}; -struct ethtool_ops { -}; struct hrtimer { + /* Not used in FreeBSD. */ }; + #define NM_BNS_GET(b) #define NM_BNS_PUT(b) @@ -202,14 +203,6 @@ struct hrtimer { #define NETMAP_KERNEL_XCHANGE_POINTERS _IO('i', 180) #define NETMAP_KERNEL_SEND_SHUTDOWN_SIGNAL _IO_direct('i', 195) -//Empty data structures are not permitted by MSVC compiler -//XXX_ale, try to solve this problem -struct net_device_ops{ - char data[1]; -}; -typedef struct ethtool_ops{ - char data[1]; -}; typedef struct hrtimer{ KTIMER timer; BOOLEAN active; @@ -297,6 +290,8 @@ void nm_os_ifnet_fini(void); void nm_os_ifnet_lock(void); void nm_os_ifnet_unlock(void); +unsigned nm_os_ifnet_mtu(struct ifnet *ifp); + void nm_os_get_module(void); void nm_os_put_module(void); @@ -305,8 +300,10 @@ void netmap_undo_zombie(struct ifnet *); /* os independent alloc/realloc/free */ void *nm_os_malloc(size_t); +void *nm_os_vmalloc(size_t); void *nm_os_realloc(void *, size_t new_size, size_t old_size); void nm_os_free(void *); +void nm_os_vfree(void *); /* passes a packet up to the host stack. * If the packet is sent (or dropped) immediately it returns NULL, @@ -371,8 +368,7 @@ struct netmap_zmon_list { * TX rings: hwcur + hwofs coincides with next_to_send * * For received packets, slot->flags is set to nkr_slot_flags - * so we can provide a proper initial value (e.g. set NS_FORWARD - * when operating in 'transparent' mode). + * so we can provide a proper initial value. * * The following fields are used to implement lock-free copy of packets * from input to output ports in VALE switch: @@ -427,6 +423,7 @@ struct netmap_kring { * (used internally by pipes and * by ptnetmap host ports) */ +#define NKR_NOINTR 0x10 /* don't use interrupts on this ring */ uint32_t nr_mode; uint32_t nr_pending_mode; @@ -442,8 +439,6 @@ struct netmap_kring { */ int32_t nkr_hwofs; - uint16_t nkr_slot_flags; /* initial value for flags */ - /* last_reclaim is opaque marker to help reduce the frequency * of operations such as reclaiming tx buffers. A possible use * is set it to ticks and do the reclaim only once per tick. @@ -580,7 +575,7 @@ nm_prev(uint32_t i, uint32_t lim) +-----------------+ +-----------------+ | | | | - |XXX free slot XXX| |XXX free slot XXX| + | free | | free | +-----------------+ +-----------------+ head->| owned by user |<-hwcur | not sent to nic |<-hwcur | | | yet | @@ -621,9 +616,14 @@ tail->| |<-hwtail | |<-hwlease * a circular array where completions should be reported. */ +struct lut_entry; +#ifdef __FreeBSD__ +#define plut_entry lut_entry +#endif struct netmap_lut { struct lut_entry *lut; + struct plut_entry *plut; uint32_t objtotal; /* max buffer index */ uint32_t objsize; /* buffer size */ }; @@ -671,6 +671,7 @@ struct netmap_adapter { #define NAF_HOST_RINGS 64 /* the adapter supports the host rings */ #define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */ #define NAF_PTNETMAP_HOST 256 /* the adapter supports ptnetmap in the host */ +#define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */ #define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */ #define NAF_BUSY (1U<<31) /* the adapter is used internally and * cannot be registered from userspace @@ -711,9 +712,8 @@ struct netmap_adapter { /* copy of if_input for netmap_send_up() */ void (*if_input)(struct ifnet *, struct mbuf *); - /* references to the ifnet and device routines, used by - * the generic netmap functions. - */ + /* Back reference to the parent ifnet struct. Used for + * hardware ports (emulated netmap included). */ struct ifnet *ifp; /* adapter is ifp->if_softc */ /*---- callbacks for this netmap adapter -----*/ @@ -806,6 +806,7 @@ struct netmap_adapter { * buffer addresses, the total number of buffers and the buffer size. */ struct netmap_mem_d *nm_mem; + struct netmap_mem_d *nm_mem_prev; struct netmap_lut na_lut; /* additional information attached to this adapter @@ -861,6 +862,8 @@ NMR(struct netmap_adapter *na, enum txrx t) return (t == NR_TX ? na->tx_rings : na->rx_rings); } +int nma_intr_enable(struct netmap_adapter *na, int onoff); + /* * If the NIC is owned by the kernel * (i.e., bridge), neither another bridge nor user can use it; @@ -898,8 +901,10 @@ struct netmap_vp_adapter { /* VALE software port */ struct netmap_hw_adapter { /* physical device */ struct netmap_adapter up; - struct net_device_ops nm_ndo; // XXX linux only - struct ethtool_ops nm_eto; // XXX linux only +#ifdef linux + struct net_device_ops nm_ndo; + struct ethtool_ops nm_eto; +#endif const struct ethtool_ops* save_ethtool; int (*nm_hw_register)(struct netmap_adapter *, int onoff); @@ -920,12 +925,10 @@ struct netmap_generic_adapter { /* emulated device */ /* Pointer to a previously used netmap adapter. */ struct netmap_adapter *prev; - /* generic netmap adapters support: - * a net_device_ops struct overrides ndo_select_queue(), - * save_if_input saves the if_input hook (FreeBSD), - * mit implements rx interrupt mitigation, + /* Emulated netmap adapters support: + * - save_if_input saves the if_input hook (FreeBSD); + * - mit implements rx interrupt mitigation; */ - struct net_device_ops generic_ndo; void (*save_if_input)(struct ifnet *, struct mbuf *); struct nm_generic_mit *mit; @@ -1186,7 +1189,7 @@ static __inline void nm_kr_start(struct netmap_kring *kr) * virtual ports (vale, pipes, monitor) */ int netmap_attach(struct netmap_adapter *); -int netmap_attach_ext(struct netmap_adapter *, size_t size); +int netmap_attach_ext(struct netmap_adapter *, size_t size, int override_reg); void netmap_detach(struct ifnet *); int netmap_transmit(struct ifnet *, struct mbuf *); struct netmap_slot *netmap_reset(struct netmap_adapter *na, @@ -1279,15 +1282,12 @@ nm_set_native_flags(struct netmap_adapter *na) ifp->if_transmit = netmap_transmit; #elif defined (_WIN32) (void)ifp; /* prevent a warning */ - //XXX_ale can we just comment those? - //na->if_transmit = ifp->if_transmit; - //ifp->if_transmit = netmap_transmit; -#else +#elif defined (linux) na->if_transmit = (void *)ifp->netdev_ops; ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo; ((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops; ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto; -#endif +#endif /* linux */ nm_update_hostrings_mode(na); } @@ -1308,8 +1308,6 @@ nm_clear_native_flags(struct netmap_adapter *na) ifp->if_transmit = na->if_transmit; #elif defined(_WIN32) (void)ifp; /* prevent a warning */ - //XXX_ale can we just comment those? - //ifp->if_transmit = na->if_transmit; #else ifp->netdev_ops = (void *)na->if_transmit; ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool; @@ -1374,8 +1372,6 @@ uint32_t nm_rxsync_prologue(struct netmap_kring *, struct netmap_ring *); * - provide defaults for the setup callbacks and the memory allocator */ int netmap_attach_common(struct netmap_adapter *); -/* common actions to be performed on netmap adapter destruction */ -void netmap_detach_common(struct netmap_adapter *); /* fill priv->np_[tr]xq{first,last} using the ringid and flags information * coming from a struct nmreq */ @@ -1431,8 +1427,8 @@ int netmap_get_hw_na(struct ifnet *ifp, * * VALE only supports unicast or broadcast. The lookup * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, - * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 for unknown. - * XXX in practice "unknown" might be handled same as broadcast. + * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate + * drop. */ typedef u_int (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, struct netmap_vp_adapter *); @@ -1471,7 +1467,7 @@ int netmap_bdg_config(struct nmreq *nmr); #ifdef WITH_PIPES /* max number of pipes per device */ -#define NM_MAXPIPES 64 /* XXX how many? */ +#define NM_MAXPIPES 64 /* XXX this should probably be a sysctl */ void netmap_pipe_dealloc(struct netmap_adapter *); int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create); @@ -1573,7 +1569,9 @@ extern int netmap_flags; extern int netmap_generic_mit; extern int netmap_generic_ringsize; extern int netmap_generic_rings; +#ifdef linux extern int netmap_generic_txqdisc; +#endif extern int ptnetmap_tx_workers; /* @@ -1618,13 +1616,14 @@ static void netmap_dmamap_cb(__unused void *arg, /* bus_dmamap_load wrapper: call aforementioned function if map != NULL. * XXX can we do it without a callback ? */ -static inline void +static inline int netmap_load_map(struct netmap_adapter *na, bus_dma_tag_t tag, bus_dmamap_t map, void *buf) { if (map) bus_dmamap_load(tag, map, buf, NETMAP_BUF_SIZE(na), netmap_dmamap_cb, NULL, BUS_DMA_NOWAIT); + return 0; } static inline void @@ -1635,6 +1634,8 @@ netmap_unload_map(struct netmap_adapter *na, bus_dmamap_unload(tag, map); } +#define netmap_sync_map(na, tag, map, sz, t) + /* update the map when a buffer changes. */ static inline void netmap_reload_map(struct netmap_adapter *na, @@ -1654,54 +1655,11 @@ netmap_reload_map(struct netmap_adapter *na, int nm_iommu_group_id(bus_dma_tag_t dev); #include -static inline void -netmap_load_map(struct netmap_adapter *na, - bus_dma_tag_t tag, bus_dmamap_t map, void *buf) -{ - if (0 && map) { - *map = dma_map_single(na->pdev, buf, NETMAP_BUF_SIZE(na), - DMA_BIDIRECTIONAL); - } -} - -static inline void -netmap_unload_map(struct netmap_adapter *na, - bus_dma_tag_t tag, bus_dmamap_t map) -{ - u_int sz = NETMAP_BUF_SIZE(na); - - if (*map) { - dma_unmap_single(na->pdev, *map, sz, - DMA_BIDIRECTIONAL); - } -} - -static inline void -netmap_reload_map(struct netmap_adapter *na, - bus_dma_tag_t tag, bus_dmamap_t map, void *buf) -{ - u_int sz = NETMAP_BUF_SIZE(na); - - if (*map) { - dma_unmap_single(na->pdev, *map, sz, - DMA_BIDIRECTIONAL); - } - - *map = dma_map_single(na->pdev, buf, sz, - DMA_BIDIRECTIONAL); -} - /* - * XXX How do we redefine these functions: - * * on linux we need * dma_map_single(&pdev->dev, virt_addr, len, direction) - * dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction - * The len can be implicit (on netmap it is NETMAP_BUF_SIZE) - * unfortunately the direction is not, so we need to change - * something to have a cross API + * dma_unmap_single(&adapter->pdev->dev, phys_addr, len, direction) */ - #if 0 struct e1000_buffer *buffer_info = &tx_ring->buffer_info[l]; /* set time_stamp *before* dma to help avoid a possible race */ @@ -1724,10 +1682,59 @@ netmap_reload_map(struct netmap_adapter *na, #endif -/* - * The bus_dmamap_sync() can be one of wmb() or rmb() depending on direction. - */ -#define bus_dmamap_sync(_a, _b, _c) +static inline int +netmap_load_map(struct netmap_adapter *na, + bus_dma_tag_t tag, bus_dmamap_t map, void *buf, u_int size) +{ + if (map) { + *map = dma_map_single(na->pdev, buf, size, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(na->pdev, *map)) { + *map = 0; + return ENOMEM; + } + } + return 0; +} + +static inline void +netmap_unload_map(struct netmap_adapter *na, + bus_dma_tag_t tag, bus_dmamap_t map, u_int sz) +{ + if (*map) { + dma_unmap_single(na->pdev, *map, sz, + DMA_BIDIRECTIONAL); + } +} + +static inline void +netmap_sync_map(struct netmap_adapter *na, + bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t) +{ + if (*map) { + if (t == NR_RX) + dma_sync_single_for_cpu(na->pdev, *map, sz, + DMA_FROM_DEVICE); + else + dma_sync_single_for_device(na->pdev, *map, sz, + DMA_TO_DEVICE); + } +} + +static inline void +netmap_reload_map(struct netmap_adapter *na, + bus_dma_tag_t tag, bus_dmamap_t map, void *buf) +{ + u_int sz = NETMAP_BUF_SIZE(na); + + if (*map) { + dma_unmap_single(na->pdev, *map, sz, + DMA_BIDIRECTIONAL); + } + + *map = dma_map_single(na->pdev, buf, sz, + DMA_BIDIRECTIONAL); +} #endif /* linux */ @@ -1764,10 +1771,26 @@ netmap_idx_k2n(struct netmap_kring *kr, int idx) /* Entries of the look-up table. */ +#ifdef __FreeBSD__ struct lut_entry { void *vaddr; /* virtual address. */ vm_paddr_t paddr; /* physical address. */ }; +#else /* linux & _WIN32 */ +/* dma-mapping in linux can assign a buffer a different address + * depending on the device, so we need to have a separate + * physical-address look-up table for each na. + * We can still share the vaddrs, though, therefore we split + * the lut_entry structure. + */ +struct lut_entry { + void *vaddr; /* virtual address. */ +}; + +struct plut_entry { + vm_paddr_t paddr; /* physical address. */ +}; +#endif /* linux & _WIN32 */ struct netmap_obj_pool; @@ -1789,12 +1812,13 @@ PNMB(struct netmap_adapter *na, struct netmap_slot *slot, uint64_t *pp) { uint32_t i = slot->buf_idx; struct lut_entry *lut = na->na_lut.lut; + struct plut_entry *plut = na->na_lut.plut; void *ret = (i >= na->na_lut.objtotal) ? lut[0].vaddr : lut[i].vaddr; -#ifndef _WIN32 - *pp = (i >= na->na_lut.objtotal) ? lut[0].paddr : lut[i].paddr; +#ifdef _WIN32 + *pp = (i >= na->na_lut.objtotal) ? (uint64_t)plut[0].paddr.QuadPart : (uint64_t)plut[i].paddr.QuadPart; #else - *pp = (i >= na->na_lut.objtotal) ? (uint64_t)lut[0].paddr.QuadPart : (uint64_t)lut[i].paddr.QuadPart; + *pp = (i >= na->na_lut.objtotal) ? plut[0].paddr : plut[i].paddr; #endif return ret; } @@ -1823,7 +1847,7 @@ struct netmap_priv_d { uint32_t np_flags; /* from the ioctl */ u_int np_qfirst[NR_TXRX], np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */ - uint16_t np_txpoll; /* XXX and also np_rxpoll ? */ + uint16_t np_txpoll; int np_sync_flags; /* to be passed to nm_sync */ int np_refs; /* use with NMG_LOCK held */ diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index a39aa1b3f042..1f206a1b0292 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -108,6 +108,7 @@ struct netmap_obj_pool { struct lut_entry *lut; /* virt,phys addresses, objtotal entries */ uint32_t *bitmap; /* one bit per buffer, 1 means free */ + uint32_t *invalid_bitmap;/* one bit per buffer, 1 means invalid */ uint32_t bitmap_slots; /* number of uint32 entries in bitmap */ /* ---------------------------------------------------*/ @@ -134,7 +135,7 @@ struct netmap_obj_pool { struct netmap_mem_ops { int (*nmd_get_lut)(struct netmap_mem_d *, struct netmap_lut*); - int (*nmd_get_info)(struct netmap_mem_d *, u_int *size, + int (*nmd_get_info)(struct netmap_mem_d *, uint64_t *size, u_int *memflags, uint16_t *id); vm_paddr_t (*nmd_ofstophys)(struct netmap_mem_d *, vm_ooffset_t); @@ -217,7 +218,7 @@ netmap_mem_##name(struct netmap_adapter *na, t1 a1) \ } NMD_DEFCB1(int, get_lut, struct netmap_lut *); -NMD_DEFCB3(int, get_info, u_int *, u_int *, uint16_t *); +NMD_DEFCB3(int, get_info, uint64_t *, u_int *, uint16_t *); NMD_DEFCB1(vm_paddr_t, ofstophys, vm_ooffset_t); static int netmap_mem_config(struct netmap_mem_d *); NMD_DEFCB(int, config); @@ -243,6 +244,7 @@ netmap_mem_get_id(struct netmap_mem_d *nmd) #define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx) #define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx) #define NMA_LOCK(n) NM_MTX_LOCK((n)->nm_mtx) +#define NMA_SPINLOCK(n) NM_MTX_SPINLOCK((n)->nm_mtx) #define NMA_UNLOCK(n) NM_MTX_UNLOCK((n)->nm_mtx) #ifdef NM_DEBUG_MEM_PUTGET @@ -291,68 +293,115 @@ netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) NMA_UNLOCK(nmd); } - if (!nmd->lasterr && na->pdev) - netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na); + if (!nmd->lasterr && na->pdev) { + nmd->lasterr = netmap_mem_map(&nmd->pools[NETMAP_BUF_POOL], na); + if (nmd->lasterr) { + netmap_mem_deref(nmd, na); + } + } return nmd->lasterr; } -void +static int +nm_isset(uint32_t *bitmap, u_int i) +{ + return bitmap[ (i>>5) ] & ( 1U << (i & 31U) ); +} + + +static int +netmap_init_obj_allocator_bitmap(struct netmap_obj_pool *p) +{ + u_int n, j; + + if (p->bitmap == NULL) { + /* Allocate the bitmap */ + n = (p->objtotal + 31) / 32; + p->bitmap = nm_os_malloc(sizeof(uint32_t) * n); + if (p->bitmap == NULL) { + D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n, + p->name); + return ENOMEM; + } + p->bitmap_slots = n; + } else { + memset(p->bitmap, 0, p->bitmap_slots); + } + + p->objfree = 0; + /* + * Set all the bits in the bitmap that have + * corresponding buffers to 1 to indicate they are + * free. + */ + for (j = 0; j < p->objtotal; j++) { + if (p->invalid_bitmap && nm_isset(p->invalid_bitmap, j)) { + D("skipping %s %d", p->name, j); + continue; + } + p->bitmap[ (j>>5) ] |= ( 1U << (j & 31U) ); + p->objfree++; + } + + ND("%s free %u", p->name, p->objfree); + if (p->objfree == 0) + return ENOMEM; + + return 0; +} + +static int +netmap_mem_init_bitmaps(struct netmap_mem_d *nmd) +{ + int i, error = 0; + + for (i = 0; i < NETMAP_POOLS_NR; i++) { + struct netmap_obj_pool *p = &nmd->pools[i]; + + error = netmap_init_obj_allocator_bitmap(p); + if (error) + return error; + } + + /* + * buffers 0 and 1 are reserved + */ + if (nmd->pools[NETMAP_BUF_POOL].objfree < 2) { + return ENOMEM; + } + + nmd->pools[NETMAP_BUF_POOL].objfree -= 2; + if (nmd->pools[NETMAP_BUF_POOL].bitmap) { + /* XXX This check is a workaround that prevents a + * NULL pointer crash which currently happens only + * with ptnetmap guests. + * Removed shared-info --> is the bug still there? */ + nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3U; + } + return 0; +} + +int netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) { + int last_user = 0; NMA_LOCK(nmd); - netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na); + if (na->active_fds <= 0) + netmap_mem_unmap(&nmd->pools[NETMAP_BUF_POOL], na); if (nmd->active == 1) { - u_int i; - + last_user = 1; /* * Reset the allocator when it falls out of use so that any * pool resources leaked by unclean application exits are * reclaimed. */ - for (i = 0; i < NETMAP_POOLS_NR; i++) { - struct netmap_obj_pool *p; - u_int j; - - p = &nmd->pools[i]; - p->objfree = p->objtotal; - /* - * Reproduce the net effect of the M_ZERO malloc() - * and marking of free entries in the bitmap that - * occur in finalize_obj_allocator() - */ - memset(p->bitmap, - '\0', - sizeof(uint32_t) * ((p->objtotal + 31) / 32)); - - /* - * Set all the bits in the bitmap that have - * corresponding buffers to 1 to indicate they are - * free. - */ - for (j = 0; j < p->objtotal; j++) { - if (p->lut[j].vaddr != NULL) { - p->bitmap[ (j>>5) ] |= ( 1 << (j & 31) ); - } - } - } - - /* - * Per netmap_mem_finalize_all(), - * buffers 0 and 1 are reserved - */ - nmd->pools[NETMAP_BUF_POOL].objfree -= 2; - if (nmd->pools[NETMAP_BUF_POOL].bitmap) { - /* XXX This check is a workaround that prevents a - * NULL pointer crash which currently happens only - * with ptnetmap guests. - * Removed shared-info --> is the bug still there? */ - nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3; - } + netmap_mem_init_bitmaps(nmd); } nmd->ops->nmd_deref(nmd); NMA_UNLOCK(nmd); + return last_user; } @@ -361,6 +410,9 @@ static int netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) { lut->lut = nmd->pools[NETMAP_BUF_POOL].lut; +#ifdef __FreeBSD__ + lut->plut = lut->lut; +#endif lut->objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; lut->objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; @@ -442,7 +494,6 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */ /* blueprint for the private memory allocators */ -extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */ /* XXX clang is not happy about using name as a print format */ static const struct netmap_mem_d nm_blueprint = { .pools = { @@ -601,6 +652,48 @@ nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev) return err; } +static struct lut_entry * +nm_alloc_lut(u_int nobj) +{ + size_t n = sizeof(struct lut_entry) * nobj; + struct lut_entry *lut; +#ifdef linux + lut = vmalloc(n); +#else + lut = nm_os_malloc(n); +#endif + return lut; +} + +static void +nm_free_lut(struct lut_entry *lut, u_int objtotal) +{ + bzero(lut, sizeof(struct lut_entry) * objtotal); +#ifdef linux + vfree(lut); +#else + nm_os_free(lut); +#endif +} + +#if defined(linux) || defined(_WIN32) +static struct plut_entry * +nm_alloc_plut(u_int nobj) +{ + size_t n = sizeof(struct plut_entry) * nobj; + struct plut_entry *lut; + lut = vmalloc(n); + return lut; +} + +static void +nm_free_plut(struct plut_entry * lut) +{ + vfree(lut); +} +#endif /* linux or _WIN32 */ + + /* * First, find the allocator that contains the requested offset, * then locate the cluster through a lookup table. @@ -613,7 +706,14 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) vm_paddr_t pa; struct netmap_obj_pool *p; +#if defined(__FreeBSD__) + /* This function is called by netmap_dev_pager_fault(), which holds a + * non-sleepable lock since FreeBSD 12. Since we cannot sleep, we + * spin on the trylock. */ + NMA_SPINLOCK(nmd); +#else NMA_LOCK(nmd); +#endif p = nmd->pools; for (i = 0; i < NETMAP_POOLS_NR; offset -= p[i].memtotal, i++) { @@ -640,7 +740,7 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset) + p[NETMAP_BUF_POOL].memtotal); NMA_UNLOCK(nmd); #ifndef _WIN32 - return 0; // XXX bad address + return 0; /* bad address */ #else vm_paddr_t res; res.QuadPart = 0; @@ -676,7 +776,8 @@ PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd) { int i, j; - u_int memsize, memflags, ofs = 0; + size_t memsize; + u_int memflags, ofs = 0; PMDL mainMdl, tempMdl; if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) { @@ -746,7 +847,7 @@ netmap_mem2_get_pool_info(struct netmap_mem_d* nmd, u_int pool, u_int *clustsize } static int -netmap_mem2_get_info(struct netmap_mem_d* nmd, u_int* size, u_int *memflags, +netmap_mem2_get_info(struct netmap_mem_d* nmd, uint64_t* size, u_int *memflags, nm_memid_t *id) { int error = 0; @@ -835,7 +936,6 @@ netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_ if (len > p->_objsize) { D("%s request size %d too large", p->name, len); - // XXX cannot reduce the size return NULL; } @@ -911,7 +1011,7 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr) ssize_t relofs = (ssize_t) vaddr - (ssize_t) base; /* Given address, is out of the scope of the current cluster.*/ - if (vaddr < base || relofs >= p->_clustsize) + if (base == NULL || vaddr < base || relofs >= p->_clustsize) continue; j = j + relofs / p->_objsize; @@ -923,8 +1023,11 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr) vaddr, p->name); } -#define netmap_mem_bufsize(n) \ - ((n)->pools[NETMAP_BUF_POOL]._objsize) +unsigned +netmap_mem_bufsize(struct netmap_mem_d *nmd) +{ + return nmd->pools[NETMAP_BUF_POOL]._objsize; +} #define netmap_if_malloc(n, len) netmap_obj_malloc(&(n)->pools[NETMAP_IF_POOL], len, NULL, NULL) #define netmap_if_free(n, v) netmap_obj_free_va(&(n)->pools[NETMAP_IF_POOL], (v)) @@ -934,7 +1037,7 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr) netmap_obj_malloc(&(n)->pools[NETMAP_BUF_POOL], netmap_mem_bufsize(n), _pos, _index) -#if 0 // XXX unused +#if 0 /* currently unused */ /* Return the index associated to the given packet buffer */ #define netmap_buf_index(n, v) \ (netmap_obj_offset(&(n)->pools[NETMAP_BUF_POOL], (v)) / NETMAP_BDG_BUF_SIZE(n)) @@ -1012,6 +1115,7 @@ netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n) slot[i].buf_idx = index; slot[i].len = p->_objsize; slot[i].flags = 0; + slot[i].ptr = 0; } ND("allocated %d buffers, %d available, first at %d", n, p->objfree, pos); @@ -1073,6 +1177,9 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p) if (p->bitmap) nm_os_free(p->bitmap); p->bitmap = NULL; + if (p->invalid_bitmap) + nm_os_free(p->invalid_bitmap); + p->invalid_bitmap = NULL; if (p->lut) { u_int i; @@ -1083,15 +1190,9 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p) * in the lut. */ for (i = 0; i < p->objtotal; i += p->_clustentries) { - if (p->lut[i].vaddr) - contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP); + contigfree(p->lut[i].vaddr, p->_clustsize, M_NETMAP); } - bzero(p->lut, sizeof(struct lut_entry) * p->objtotal); -#ifdef linux - vfree(p->lut); -#else - nm_os_free(p->lut); -#endif + nm_free_lut(p->lut, p->objtotal); } p->lut = NULL; p->objtotal = 0; @@ -1201,19 +1302,6 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj return 0; } -static struct lut_entry * -nm_alloc_lut(u_int nobj) -{ - size_t n = sizeof(struct lut_entry) * nobj; - struct lut_entry *lut; -#ifdef linux - lut = vmalloc(n); -#else - lut = nm_os_malloc(n); -#endif - return lut; -} - /* call with NMA_LOCK held */ static int netmap_finalize_obj_allocator(struct netmap_obj_pool *p) @@ -1221,6 +1309,11 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) int i; /* must be signed */ size_t n; + if (p->lut) { + /* already finalized, nothing to do */ + return 0; + } + /* optimistically assume we have enough memory */ p->numclusters = p->_numclusters; p->objtotal = p->_objtotal; @@ -1231,18 +1324,8 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) goto clean; } - /* Allocate the bitmap */ - n = (p->objtotal + 31) / 32; - p->bitmap = nm_os_malloc(sizeof(uint32_t) * n); - if (p->bitmap == NULL) { - D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n, - p->name); - goto clean; - } - p->bitmap_slots = n; - /* - * Allocate clusters, init pointers and bitmap + * Allocate clusters, init pointers */ n = p->_clustsize; @@ -1270,7 +1353,6 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) goto out; lim = i / 2; for (i--; i >= lim; i--) { - p->bitmap[ (i>>5) ] &= ~( 1 << (i & 31) ); if (i % p->_clustentries == 0 && p->lut[i].vaddr) contigfree(p->lut[i].vaddr, n, M_NETMAP); @@ -1283,8 +1365,7 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) break; } /* - * Set bitmap and lut state for all buffers in the current - * cluster. + * Set lut state for all buffers in the current cluster. * * [i, lim) is the set of buffer indexes that cover the * current cluster. @@ -1294,15 +1375,13 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p) * of p->_objsize. */ for (; i < lim; i++, clust += p->_objsize) { - p->bitmap[ (i>>5) ] |= ( 1 << (i & 31) ); p->lut[i].vaddr = clust; +#if !defined(linux) && !defined(_WIN32) p->lut[i].paddr = vtophys(clust); +#endif } } - p->objfree = p->objtotal; p->memtotal = p->numclusters * p->_clustsize; - if (p->objfree == 0) - goto clean; if (netmap_verbose) D("Pre-allocated %d clusters (%d/%dKB) for '%s'", p->numclusters, p->_clustsize >> 10, @@ -1348,6 +1427,7 @@ static int netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) { int i, lim = p->_objtotal; + struct netmap_lut *lut = &na->na_lut; if (na == NULL || na->pdev == NULL) return 0; @@ -1355,16 +1435,23 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) #if defined(__FreeBSD__) (void)i; (void)lim; + (void)lut; D("unsupported on FreeBSD"); - #elif defined(_WIN32) (void)i; (void)lim; - D("unsupported on Windows"); //XXX_ale, really? + (void)lut; + D("unsupported on Windows"); #else /* linux */ - for (i = 2; i < lim; i++) { - netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr); + ND("unmapping and freeing plut for %s", na->name); + if (lut->plut == NULL) + return 0; + for (i = 0; i < lim; i += p->_clustentries) { + if (lut->plut[i].paddr) + netmap_unload_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr, p->_clustsize); } + nm_free_plut(lut->plut); + lut->plut = NULL; #endif /* linux */ return 0; @@ -1373,23 +1460,65 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na) static int netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na) { -#if defined(__FreeBSD__) - D("unsupported on FreeBSD"); -#elif defined(_WIN32) - D("unsupported on Windows"); //XXX_ale, really? -#else /* linux */ - int i, lim = p->_objtotal; + int error = 0; + int i, lim = p->objtotal; + struct netmap_lut *lut = &na->na_lut; if (na->pdev == NULL) return 0; - for (i = 2; i < lim; i++) { - netmap_load_map(na, (bus_dma_tag_t) na->pdev, &p->lut[i].paddr, - p->lut[i].vaddr); +#if defined(__FreeBSD__) + (void)i; + (void)lim; + (void)lut; + D("unsupported on FreeBSD"); +#elif defined(_WIN32) + (void)i; + (void)lim; + (void)lut; + D("unsupported on Windows"); +#else /* linux */ + + if (lut->plut != NULL) { + ND("plut already allocated for %s", na->name); + return 0; } + + ND("allocating physical lut for %s", na->name); + lut->plut = nm_alloc_plut(lim); + if (lut->plut == NULL) { + D("Failed to allocate physical lut for %s", na->name); + return ENOMEM; + } + + for (i = 0; i < lim; i += p->_clustentries) { + lut->plut[i].paddr = 0; + } + + for (i = 0; i < lim; i += p->_clustentries) { + int j; + + if (p->lut[i].vaddr == NULL) + continue; + + error = netmap_load_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr, + p->lut[i].vaddr, p->_clustsize); + if (error) { + D("Failed to map cluster #%d from the %s pool", i, p->name); + break; + } + + for (j = 1; j < p->_clustentries; j++) { + lut->plut[i + j].paddr = lut->plut[i + j - 1].paddr + p->_objsize; + } + } + + if (error) + netmap_mem_unmap(p, na); + #endif /* linux */ - return 0; + return error; } static int @@ -1406,9 +1535,10 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd) goto error; nmd->nm_totalsize += nmd->pools[i].memtotal; } - /* buffers 0 and 1 are reserved */ - nmd->pools[NETMAP_BUF_POOL].objfree -= 2; - nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3; + nmd->lasterr = netmap_mem_init_bitmaps(nmd); + if (nmd->lasterr) + goto error; + nmd->flags |= NETMAP_MEM_FINALIZED; if (netmap_verbose) @@ -1430,23 +1560,25 @@ error: /* * allocator for private memory */ -static struct netmap_mem_d * -_netmap_mem_private_new(struct netmap_obj_params *p, int *perr) +static void * +_netmap_mem_private_new(size_t size, struct netmap_obj_params *p, + struct netmap_mem_ops *ops, int *perr) { struct netmap_mem_d *d = NULL; int i, err = 0; - d = nm_os_malloc(sizeof(struct netmap_mem_d)); + d = nm_os_malloc(size); if (d == NULL) { err = ENOMEM; goto error; } *d = nm_blueprint; + d->ops = ops; err = nm_mem_assign_id(d); if (err) - goto error; + goto error_free; snprintf(d->name, NM_MEM_NAMESZ, "%d", d->nm_id); for (i = 0; i < NETMAP_POOLS_NR; i++) { @@ -1461,14 +1593,18 @@ _netmap_mem_private_new(struct netmap_obj_params *p, int *perr) err = netmap_mem_config(d); if (err) - goto error; + goto error_rel_id; d->flags &= ~NETMAP_MEM_FINALIZED; return d; +error_rel_id: + NMA_LOCK_DESTROY(d); + nm_mem_release_id(d); +error_free: + nm_os_free(d); error: - netmap_mem_delete(d); if (perr) *perr = err; return NULL; @@ -1480,7 +1616,7 @@ netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd, { struct netmap_mem_d *d = NULL; struct netmap_obj_params p[NETMAP_POOLS_NR]; - int i, err = 0; + int i; u_int v, maxd; /* account for the fake host rings */ txr++; @@ -1527,16 +1663,9 @@ netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd, p[NETMAP_BUF_POOL].num, p[NETMAP_BUF_POOL].size); - d = _netmap_mem_private_new(p, perr); - if (d == NULL) - goto error; + d = _netmap_mem_private_new(sizeof(*d), p, &netmap_mem_global_ops, perr); return d; -error: - netmap_mem_delete(d); - if (perr) - *perr = err; - return NULL; } @@ -1581,14 +1710,14 @@ netmap_mem2_finalize(struct netmap_mem_d *nmd) int err; /* update configuration if changed */ - if (netmap_mem2_config(nmd)) + if (netmap_mem_config(nmd)) goto out1; nmd->active++; if (nmd->flags & NETMAP_MEM_FINALIZED) { /* may happen if config is not changed */ - ND("nothing to do"); + D("nothing to do"); goto out; } @@ -1621,12 +1750,21 @@ netmap_mem2_delete(struct netmap_mem_d *nmd) nm_os_free(nmd); } +#ifdef WITH_EXTMEM +/* doubly linekd list of all existing external allocators */ +static struct netmap_mem_ext *netmap_mem_ext_list = NULL; +NM_MTX_T nm_mem_ext_list_lock; +#endif /* WITH_EXTMEM */ + int netmap_mem_init(void) { NM_MTX_INIT(nm_mem_list_lock); NMA_LOCK_INIT(&nm_mem); netmap_mem_get(&nm_mem); +#ifdef WITH_EXTMEM + NM_MTX_INIT(nm_mem_ext_list_lock); +#endif /* WITH_EXTMEM */ return (0); } @@ -1648,10 +1786,13 @@ netmap_free_rings(struct netmap_adapter *na) struct netmap_ring *ring = kring->ring; if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) { - ND("skipping ring %s (ring %p, users %d)", - kring->name, ring, kring->users); + if (netmap_verbose) + D("NOT deleting ring %s (ring %p, users %d neekring %d)", + kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING); continue; } + if (netmap_verbose) + D("deleting ring %s", kring->name); if (i != nma_get_nrings(na, t) || na->na_flags & NAF_HOST_RINGS) netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); netmap_ring_free(na->nm_mem, ring); @@ -1684,9 +1825,13 @@ netmap_mem2_rings_create(struct netmap_adapter *na) if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) { /* uneeded, or already created by somebody else */ - ND("skipping ring %s", kring->name); + if (netmap_verbose) + D("NOT creating ring %s (ring %p, users %d neekring %d)", + kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING); continue; } + if (netmap_verbose) + D("creating %s", kring->name); ndesc = kring->nkr_num_slots; len = sizeof(struct netmap_ring) + ndesc * sizeof(struct netmap_slot); @@ -1707,7 +1852,7 @@ netmap_mem2_rings_create(struct netmap_adapter *na) ring->head = kring->rhead; ring->cur = kring->rcur; ring->tail = kring->rtail; - *(uint16_t *)(uintptr_t)&ring->nr_buf_size = + *(uint32_t *)(uintptr_t)&ring->nr_buf_size = netmap_mem_bufsize(na->nm_mem); ND("%s h %d c %d t %d", kring->name, ring->head, ring->cur, ring->tail); @@ -1876,7 +2021,7 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd) uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1; struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp); struct netmap_pools_info pi; - unsigned int memsize; + uint64_t memsize; uint16_t memid; int ret; @@ -1910,6 +2055,340 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd) return 0; } +#ifdef WITH_EXTMEM +struct netmap_mem_ext { + struct netmap_mem_d up; + + struct page **pages; + int nr_pages; + struct netmap_mem_ext *next, *prev; +}; + +/* call with nm_mem_list_lock held */ +static void +netmap_mem_ext_register(struct netmap_mem_ext *e) +{ + NM_MTX_LOCK(nm_mem_ext_list_lock); + if (netmap_mem_ext_list) + netmap_mem_ext_list->prev = e; + e->next = netmap_mem_ext_list; + netmap_mem_ext_list = e; + e->prev = NULL; + NM_MTX_UNLOCK(nm_mem_ext_list_lock); +} + +/* call with nm_mem_list_lock held */ +static void +netmap_mem_ext_unregister(struct netmap_mem_ext *e) +{ + if (e->prev) + e->prev->next = e->next; + else + netmap_mem_ext_list = e->next; + if (e->next) + e->next->prev = e->prev; + e->prev = e->next = NULL; +} + +static int +netmap_mem_ext_same_pages(struct netmap_mem_ext *e, struct page **pages, int nr_pages) +{ + int i; + + if (e->nr_pages != nr_pages) + return 0; + + for (i = 0; i < nr_pages; i++) + if (pages[i] != e->pages[i]) + return 0; + + return 1; +} + +static struct netmap_mem_ext * +netmap_mem_ext_search(struct page **pages, int nr_pages) +{ + struct netmap_mem_ext *e; + + NM_MTX_LOCK(nm_mem_ext_list_lock); + for (e = netmap_mem_ext_list; e; e = e->next) { + if (netmap_mem_ext_same_pages(e, pages, nr_pages)) { + netmap_mem_get(&e->up); + break; + } + } + NM_MTX_UNLOCK(nm_mem_ext_list_lock); + return e; +} + + +static void +netmap_mem_ext_free_pages(struct page **pages, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) { + kunmap(pages[i]); + put_page(pages[i]); + } + nm_os_vfree(pages); +} + +static void +netmap_mem_ext_delete(struct netmap_mem_d *d) +{ + int i; + struct netmap_mem_ext *e = + (struct netmap_mem_ext *)d; + + netmap_mem_ext_unregister(e); + + for (i = 0; i < NETMAP_POOLS_NR; i++) { + struct netmap_obj_pool *p = &d->pools[i]; + + if (p->lut) { + nm_free_lut(p->lut, p->objtotal); + p->lut = NULL; + } + } + if (e->pages) { + netmap_mem_ext_free_pages(e->pages, e->nr_pages); + e->pages = NULL; + e->nr_pages = 0; + } + netmap_mem2_delete(d); +} + +static int +netmap_mem_ext_config(struct netmap_mem_d *nmd) +{ + return 0; +} + +struct netmap_mem_ops netmap_mem_ext_ops = { + .nmd_get_lut = netmap_mem2_get_lut, + .nmd_get_info = netmap_mem2_get_info, + .nmd_ofstophys = netmap_mem2_ofstophys, + .nmd_config = netmap_mem_ext_config, + .nmd_finalize = netmap_mem2_finalize, + .nmd_deref = netmap_mem2_deref, + .nmd_delete = netmap_mem_ext_delete, + .nmd_if_offset = netmap_mem2_if_offset, + .nmd_if_new = netmap_mem2_if_new, + .nmd_if_delete = netmap_mem2_if_delete, + .nmd_rings_create = netmap_mem2_rings_create, + .nmd_rings_delete = netmap_mem2_rings_delete +}; + +struct netmap_mem_d * +netmap_mem_ext_create(struct nmreq *nmr, int *perror) +{ + uintptr_t p = *(uintptr_t *)&nmr->nr_arg1; + struct netmap_pools_info pi; + int error = 0; + unsigned long end, start; + int nr_pages, res, i, j; + struct page **pages = NULL; + struct netmap_mem_ext *nme; + char *clust; + size_t off; + + error = copyin((void *)p, &pi, sizeof(pi)); + if (error) + goto out; + + // XXX sanity checks + if (pi.if_pool_objtotal == 0) + pi.if_pool_objtotal = netmap_min_priv_params[NETMAP_IF_POOL].num; + if (pi.if_pool_objsize == 0) + pi.if_pool_objsize = netmap_min_priv_params[NETMAP_IF_POOL].size; + if (pi.ring_pool_objtotal == 0) + pi.ring_pool_objtotal = netmap_min_priv_params[NETMAP_RING_POOL].num; + if (pi.ring_pool_objsize == 0) + pi.ring_pool_objsize = netmap_min_priv_params[NETMAP_RING_POOL].size; + if (pi.buf_pool_objtotal == 0) + pi.buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num; + if (pi.buf_pool_objsize == 0) + pi.buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size; + D("if %d %d ring %d %d buf %d %d", + pi.if_pool_objtotal, pi.if_pool_objsize, + pi.ring_pool_objtotal, pi.ring_pool_objsize, + pi.buf_pool_objtotal, pi.buf_pool_objsize); + + end = (p + pi.memsize + PAGE_SIZE - 1) >> PAGE_SHIFT; + start = p >> PAGE_SHIFT; + nr_pages = end - start; + + pages = nm_os_vmalloc(nr_pages * sizeof(*pages)); + if (pages == NULL) { + error = ENOMEM; + goto out; + } + +#ifdef NETMAP_LINUX_HAVE_GUP_4ARGS + res = get_user_pages_unlocked( + p, + nr_pages, + pages, + FOLL_WRITE | FOLL_GET | FOLL_SPLIT | FOLL_POPULATE); // XXX check other flags +#elif defined(NETMAP_LINUX_HAVE_GUP_5ARGS) + res = get_user_pages_unlocked( + p, + nr_pages, + 1, /* write */ + 0, /* don't force */ + pages); +#elif defined(NETMAP_LINUX_HAVE_GUP_7ARGS) + res = get_user_pages_unlocked( + current, + current->mm, + p, + nr_pages, + 1, /* write */ + 0, /* don't force */ + pages); +#else + down_read(¤t->mm->mmap_sem); + res = get_user_pages( + current, + current->mm, + p, + nr_pages, + 1, /* write */ + 0, /* don't force */ + pages, + NULL); + up_read(¤t->mm->mmap_sem); +#endif /* NETMAP_LINUX_GUP */ + + if (res < nr_pages) { + error = EFAULT; + goto out_unmap; + } + + nme = netmap_mem_ext_search(pages, nr_pages); + if (nme) { + netmap_mem_ext_free_pages(pages, nr_pages); + return &nme->up; + } + D("not found, creating new"); + + nme = _netmap_mem_private_new(sizeof(*nme), + (struct netmap_obj_params[]){ + { pi.if_pool_objsize, pi.if_pool_objtotal }, + { pi.ring_pool_objsize, pi.ring_pool_objtotal }, + { pi.buf_pool_objsize, pi.buf_pool_objtotal }}, + &netmap_mem_ext_ops, + &error); + if (nme == NULL) + goto out_unmap; + + /* from now on pages will be released by nme destructor; + * we let res = 0 to prevent release in out_unmap below + */ + res = 0; + nme->pages = pages; + nme->nr_pages = nr_pages; + nme->up.flags |= NETMAP_MEM_EXT; + + clust = kmap(*pages); + off = 0; + for (i = 0; i < NETMAP_POOLS_NR; i++) { + struct netmap_obj_pool *p = &nme->up.pools[i]; + struct netmap_obj_params *o = &nme->up.params[i]; + + p->_objsize = o->size; + p->_clustsize = o->size; + p->_clustentries = 1; + + p->lut = nm_alloc_lut(o->num); + if (p->lut == NULL) { + error = ENOMEM; + goto out_delete; + } + + p->bitmap_slots = (o->num + sizeof(uint32_t) - 1) / sizeof(uint32_t); + p->invalid_bitmap = nm_os_malloc(sizeof(uint32_t) * p->bitmap_slots); + if (p->invalid_bitmap == NULL) { + error = ENOMEM; + goto out_delete; + } + + if (nr_pages == 0) { + p->objtotal = 0; + p->memtotal = 0; + p->objfree = 0; + continue; + } + + for (j = 0; j < o->num && nr_pages > 0; j++) { + size_t noff; + size_t skip; + + p->lut[j].vaddr = clust + off; + ND("%s %d at %p", p->name, j, p->lut[j].vaddr); + noff = off + p->_objsize; + if (noff < PAGE_SIZE) { + off = noff; + continue; + } + ND("too big, recomputing offset..."); + skip = PAGE_SIZE - (off & PAGE_MASK); + while (noff >= PAGE_SIZE) { + noff -= skip; + pages++; + nr_pages--; + ND("noff %zu page %p nr_pages %d", noff, + page_to_virt(*pages), nr_pages); + if (noff > 0 && !nm_isset(p->invalid_bitmap, j) && + (nr_pages == 0 || *pages != *(pages - 1) + 1)) + { + /* out of space or non contiguous, + * drop this object + * */ + p->invalid_bitmap[ (j>>5) ] |= 1U << (j & 31U); + ND("non contiguous at off %zu, drop", noff); + } + if (nr_pages == 0) + break; + skip = PAGE_SIZE; + } + off = noff; + if (nr_pages > 0) + clust = kmap(*pages); + } + p->objtotal = j; + p->numclusters = p->objtotal; + p->memtotal = j * p->_objsize; + ND("%d memtotal %u", j, p->memtotal); + } + + /* skip the first netmap_if, where the pools info reside */ + { + struct netmap_obj_pool *p = &nme->up.pools[NETMAP_IF_POOL]; + p->invalid_bitmap[0] |= 1U; + } + + netmap_mem_ext_register(nme); + + return &nme->up; + +out_delete: + netmap_mem_put(&nme->up); +out_unmap: + for (i = 0; i < res; i++) + put_page(pages[i]); + if (res) + nm_os_free(pages); +out: + if (perror) + *perror = error; + return NULL; + +} +#endif /* WITH_EXTMEM */ + + #ifdef WITH_PTNETMAP_GUEST struct mem_pt_if { struct mem_pt_if *next; @@ -2020,7 +2499,7 @@ netmap_mem_pt_guest_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) } static int -netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, u_int *size, +netmap_mem_pt_guest_get_info(struct netmap_mem_d *nmd, uint64_t *size, u_int *memflags, uint16_t *id) { int error = 0; @@ -2118,7 +2597,6 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) for (i = 0; i < nbuffers; i++) { ptnmd->buf_lut.lut[i].vaddr = vaddr; - ptnmd->buf_lut.lut[i].paddr = paddr; vaddr += bufsize; paddr += bufsize; } @@ -2256,11 +2734,17 @@ out: static void netmap_mem_pt_guest_rings_delete(struct netmap_adapter *na) { - /* TODO: remove?? */ #if 0 - struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem; - struct mem_pt_if *ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, - na->ifp); + enum txrx t; + + for_rx_tx(t) { + u_int i; + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + struct netmap_kring *kring = &NMR(na, t)[i]; + + kring->ring = NULL; + } + } #endif } diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h index 81f601c4ca9f..f0bee7a33fd5 100644 --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -136,9 +136,9 @@ struct netmap_if * netmap_mem_if_new(struct netmap_adapter *, struct netmap_priv void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *); int netmap_mem_rings_create(struct netmap_adapter *); void netmap_mem_rings_delete(struct netmap_adapter *); -void netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *); +int netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *); int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *); -int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id); +int netmap_mem_get_info(struct netmap_mem_d *, uint64_t *size, u_int *memflags, uint16_t *id); ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr); struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int* error); @@ -149,6 +149,14 @@ void netmap_mem_delete(struct netmap_mem_d *); struct netmap_mem_d* __netmap_mem_get(struct netmap_mem_d *, const char *, int); void __netmap_mem_put(struct netmap_mem_d *, const char *, int); struct netmap_mem_d* netmap_mem_find(nm_memid_t); +unsigned netmap_mem_bufsize(struct netmap_mem_d *nmd); + +#ifdef WITH_EXTMEM +struct netmap_mem_d* netmap_mem_ext_create(struct nmreq *, int *); +#else /* !WITH_EXTMEM */ +#define netmap_mem_ext_create(nmr, _perr) \ + ({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; }) +#endif /* WITH_EXTMEM */ #ifdef WITH_PTNETMAP_GUEST struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *, @@ -163,6 +171,7 @@ int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *); #define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */ #define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */ +#define NETMAP_MEM_EXT 0x10 /* external memory (not remappable) */ uint32_t netmap_extra_alloc(struct netmap_adapter *, uint32_t *, uint32_t n); diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c index 8b788920ff80..e7cc05f5ab0f 100644 --- a/sys/dev/netmap/netmap_monitor.c +++ b/sys/dev/netmap/netmap_monitor.c @@ -66,9 +66,7 @@ * has released them. In most cases, the consumer is a userspace * application which may have modified the frame contents. * - * Several copy monitors may be active on any ring. Zero-copy monitors, - * instead, need exclusive access to each of the monitored rings. This may - * change in the future, if we implement zero-copy monitor chaining. + * Several copy or zero-copy monitors may be active on any ring. * */ @@ -263,7 +261,7 @@ netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int if (zmon && z->prev != NULL) kring = z->prev; - /* sinchronize with concurrently running nm_sync()s */ + /* synchronize with concurrently running nm_sync()s */ nm_kr_stop(kring, NM_KR_LOCKED); if (nm_monitor_none(kring)) { @@ -329,7 +327,7 @@ netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring) if (zmon && mz->prev != NULL) kring = mz->prev; - /* sinchronize with concurrently running nm_sync()s */ + /* synchronize with concurrently running nm_sync()s */ nm_kr_stop(kring, NM_KR_LOCKED); if (zmon) { diff --git a/sys/dev/netmap/netmap_offloadings.c b/sys/dev/netmap/netmap_offloadings.c index 6dc32b13ff8e..e0b96a8e52a2 100644 --- a/sys/dev/netmap/netmap_offloadings.c +++ b/sys/dev/netmap/netmap_offloadings.c @@ -132,7 +132,7 @@ gso_fix_segment(uint8_t *pkt, size_t len, u_int ipv4, u_int iphlen, u_int tcp, ND("TCP/UDP csum %x", be16toh(*check)); } -static int +static inline int vnet_hdr_is_bad(struct nm_vnet_hdr *vh) { uint8_t gso_type = vh->gso_type & ~VIRTIO_NET_HDR_GSO_ECN; @@ -170,7 +170,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, u_int dst_slots = 0; if (unlikely(ft_p == ft_end)) { - RD(3, "No source slots to process"); + RD(1, "No source slots to process"); return; } @@ -189,11 +189,11 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, /* Initial sanity check on the source virtio-net header. If * something seems wrong, just drop the packet. */ if (src_len < na->up.virt_hdr_len) { - RD(3, "Short src vnet header, dropping"); + RD(1, "Short src vnet header, dropping"); return; } - if (vnet_hdr_is_bad(vh)) { - RD(3, "Bad src vnet header, dropping"); + if (unlikely(vnet_hdr_is_bad(vh))) { + RD(1, "Bad src vnet header, dropping"); return; } } @@ -266,7 +266,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, if (dst_slots >= *howmany) { /* We still have work to do, but we've run out of * dst slots, so we have to drop the packet. */ - RD(3, "Not enough slots, dropping GSO packet"); + ND(1, "Not enough slots, dropping GSO packet"); return; } @@ -281,7 +281,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, * encapsulation. */ for (;;) { if (src_len < ethhlen) { - RD(3, "Short GSO fragment [eth], dropping"); + RD(1, "Short GSO fragment [eth], dropping"); return; } ethertype = be16toh(*((uint16_t *) @@ -297,7 +297,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, (gso_hdr + ethhlen); if (src_len < ethhlen + 20) { - RD(3, "Short GSO fragment " + RD(1, "Short GSO fragment " "[IPv4], dropping"); return; } @@ -310,14 +310,14 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, iphlen = 40; break; default: - RD(3, "Unsupported ethertype, " + RD(1, "Unsupported ethertype, " "dropping GSO packet"); return; } ND(3, "type=%04x", ethertype); if (src_len < ethhlen + iphlen) { - RD(3, "Short GSO fragment [IP], dropping"); + RD(1, "Short GSO fragment [IP], dropping"); return; } @@ -329,7 +329,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, (gso_hdr + ethhlen + iphlen); if (src_len < ethhlen + iphlen + 20) { - RD(3, "Short GSO fragment " + RD(1, "Short GSO fragment " "[TCP], dropping"); return; } @@ -340,7 +340,7 @@ bdg_mismatch_datapath(struct netmap_vp_adapter *na, } if (src_len < gso_hdr_len) { - RD(3, "Short GSO fragment [TCP/UDP], dropping"); + RD(1, "Short GSO fragment [TCP/UDP], dropping"); return; } diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c index 80843403b996..48dde5382f77 100644 --- a/sys/dev/netmap/netmap_pipe.c +++ b/sys/dev/netmap/netmap_pipe.c @@ -81,7 +81,8 @@ static int netmap_default_pipes = 0; /* ignored, kept for compatibility */ SYSBEGIN(vars_pipes); SYSCTL_DECL(_dev_netmap); -SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , ""); +SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, + &netmap_default_pipes, 0, "For compatibility only"); SYSEND; /* allocate the pipe array in the parent adapter */ @@ -182,6 +183,7 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags) u_int j, k, lim_tx = txkring->nkr_num_slots - 1, lim_rx = rxkring->nkr_num_slots - 1; int m, busy; + struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring; ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name); ND(2, "before: hwcur %d hwtail %d cur %d head %d tail %d", txkring->nr_hwcur, txkring->nr_hwtail, @@ -208,18 +210,18 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags) } while (limit-- > 0) { - struct netmap_slot *rs = &rxkring->ring->slot[j]; - struct netmap_slot *ts = &txkring->ring->slot[k]; + struct netmap_slot *rs = &rxring->slot[j]; + struct netmap_slot *ts = &txring->slot[k]; struct netmap_slot tmp; - /* swap the slots */ - tmp = *rs; - *rs = *ts; - *ts = tmp; + __builtin_prefetch(ts + 1); - /* report the buffer change */ - ts->flags |= NS_BUF_CHANGED; + /* swap the slots and report the buffer change */ + tmp = *rs; + tmp.flags |= NS_BUF_CHANGED; + *rs = *ts; rs->flags |= NS_BUF_CHANGED; + *ts = tmp; j = nm_next(j, lim_rx); k = nm_next(k, lim_tx); diff --git a/sys/dev/netmap/netmap_pt.c b/sys/dev/netmap/netmap_pt.c index d3544a5b1728..edb49dc504ac 100644 --- a/sys/dev/netmap/netmap_pt.c +++ b/sys/dev/netmap/netmap_pt.c @@ -169,19 +169,19 @@ rate_batch_stats_update(struct rate_batch_stats *bf, uint32_t pre_tail, #endif /* RATE */ struct ptnetmap_state { - /* Kthreads. */ - struct nm_kctx **kctxs; + /* Kthreads. */ + struct nm_kctx **kctxs; /* Shared memory with the guest (TX/RX) */ struct ptnet_csb_gh __user *csb_gh; struct ptnet_csb_hg __user *csb_hg; - bool stopped; + bool stopped; - /* Netmap adapter wrapping the backend. */ - struct netmap_pt_host_adapter *pth_na; + /* Netmap adapter wrapping the backend. */ + struct netmap_pt_host_adapter *pth_na; - IFRATE(struct rate_context rate_ctx;) + IFRATE(struct rate_context rate_ctx;) }; static inline void @@ -1268,13 +1268,11 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, } *na = &pth_na->up; - netmap_adapter_get(*na); - /* set parent busy, because attached for ptnetmap */ parent->na_flags |= NAF_BUSY; - strncpy(pth_na->up.name, parent->name, sizeof(pth_na->up.name)); strcat(pth_na->up.name, "-PTN"); + netmap_adapter_get(*na); DBG(D("%s ptnetmap request DONE", pth_na->up.name)); @@ -1350,7 +1348,7 @@ netmap_pt_guest_txsync(struct ptnet_csb_gh *ptgh, struct ptnet_csb_hg *pthg, * go to sleep and we need to be notified by the host when more free * space is available. */ - if (nm_kr_txempty(kring)) { + if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { /* Reenable notifications. */ ptgh->guest_need_kick = 1; /* Double check */ @@ -1415,7 +1413,7 @@ netmap_pt_guest_rxsync(struct ptnet_csb_gh *ptgh, struct ptnet_csb_hg *pthg, * we need to be notified by the host when more RX slots have been * completed. */ - if (nm_kr_rxempty(kring)) { + if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { /* Reenable notifications. */ ptgh->guest_need_kick = 1; /* Double check */ @@ -1504,7 +1502,7 @@ netmap_pt_guest_attach(struct netmap_adapter *arg, if (arg->nm_mem == NULL) return ENOMEM; arg->na_flags |= NAF_MEM_OWNER; - error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter)); + error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1); if (error) return error; @@ -1517,7 +1515,7 @@ netmap_pt_guest_attach(struct netmap_adapter *arg, memset(&ptna->dr, 0, sizeof(ptna->dr)); ptna->dr.up.ifp = ifp; ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem); - ptna->dr.up.nm_config = ptna->hwup.up.nm_config; + ptna->dr.up.nm_config = ptna->hwup.up.nm_config; ptna->backend_regifs = 0; diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index 0df3d08f2a69..d364699bce26 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -150,6 +150,8 @@ __FBSDID("$FreeBSD$"); #define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) /* NM_FT_NULL terminates a list of slots in the ft */ #define NM_FT_NULL NM_BDG_BATCH_MAX +/* Default size for the Maximum Frame Size. */ +#define NM_BDG_MFS_DEFAULT 1514 /* @@ -160,7 +162,8 @@ __FBSDID("$FreeBSD$"); static int bridge_batch = NM_BDG_BATCH; /* bridge batch size */ SYSBEGIN(vars_vale); SYSCTL_DECL(_dev_netmap); -SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , ""); +SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0, + "Max batch size to be used in the bridge"); SYSEND; static int netmap_vp_create(struct nmreq *, struct ifnet *, @@ -226,9 +229,9 @@ struct nm_bridge { /* the forwarding table, MAC+ports. * XXX should be changed to an argument to be passed to - * the lookup function, and allocated on attach + * the lookup function */ - struct nm_hash_ent ht[NM_BDG_HASH]; + struct nm_hash_ent *ht; // allocated on attach #ifdef CONFIG_NET_NS struct net *ns; @@ -365,17 +368,20 @@ nm_find_bridge(const char *name, int create) } if (i == num_bridges && b) { /* name not found, can create entry */ /* initialize the bridge */ - strncpy(b->bdg_basename, name, namelen); ND("create new bridge %s with ports %d", b->bdg_basename, b->bdg_active_ports); + b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); + if (b->ht == NULL) { + D("failed to allocate hash table"); + return NULL; + } + strncpy(b->bdg_basename, name, namelen); b->bdg_namelen = namelen; b->bdg_active_ports = 0; for (i = 0; i < NM_BDG_MAXPORTS; i++) b->bdg_port_index[i] = i; /* set the default function */ b->bdg_ops.lookup = netmap_bdg_learning; - /* reset the MAC address table */ - bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); NM_BNS_GET(b); } return b; @@ -503,6 +509,7 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) ND("now %d active ports", lim); if (lim == 0) { ND("marking bridge %s as free", b->bdg_basename); + nm_os_free(b->ht); bzero(&b->bdg_ops, sizeof(b->bdg_ops)); NM_BNS_PUT(b); } @@ -542,11 +549,14 @@ netmap_vp_dtor(struct netmap_adapter *na) netmap_bdg_detach_common(b, vpna->bdg_port, -1); } - if (vpna->autodelete && na->ifp != NULL) { - ND("releasing %s", na->ifp->if_xname); - NMG_UNLOCK(); - nm_os_vi_detach(na->ifp); - NMG_LOCK(); + if (na->ifp != NULL && !nm_iszombie(na)) { + WNA(na->ifp) = NULL; + if (vpna->autodelete) { + ND("releasing %s", na->ifp->if_xname); + NMG_UNLOCK(); + nm_os_vi_detach(na->ifp); + NMG_LOCK(); + } } } @@ -603,11 +613,15 @@ err: static int nm_update_info(struct nmreq *nmr, struct netmap_adapter *na) { + uint64_t memsize; + int ret; nmr->nr_rx_rings = na->num_rx_rings; nmr->nr_tx_rings = na->num_tx_rings; nmr->nr_rx_slots = na->num_rx_desc; nmr->nr_tx_slots = na->num_tx_desc; - return netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, NULL, &nmr->nr_arg2); + ret = netmap_mem_get_info(na->nm_mem, &memsize, NULL, &nmr->nr_arg2); + nmr->nr_memsize = (uint32_t)memsize; + return ret; } /* @@ -736,7 +750,6 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, for (j = 0; j < b->bdg_active_ports; j++) { i = b->bdg_port_index[j]; vpna = b->bdg_ports[i]; - // KASSERT(na != NULL); ND("checking %s", vpna->up.name); if (!strcmp(vpna->up.name, nr_name)) { netmap_adapter_get(&vpna->up); @@ -788,6 +801,18 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, } else { struct netmap_adapter *hw; + /* the vale:nic syntax is only valid for some commands */ + switch (nmr->nr_cmd) { + case NETMAP_BDG_ATTACH: + case NETMAP_BDG_DETACH: + case NETMAP_BDG_POLLING_ON: + case NETMAP_BDG_POLLING_OFF: + break; /* ok */ + default: + error = EINVAL; + goto out; + } + error = netmap_get_hw_na(ifp, nmd, &hw); if (error || hw == NULL) goto out; @@ -848,6 +873,12 @@ nm_bdg_ctl_attach(struct nmreq *nmr) } } + /* XXX check existing one */ + error = netmap_get_bdg_na(nmr, &na, nmd, 0); + if (!error) { + error = EBUSY; + goto unref_exit; + } error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */); if (error) /* no device */ goto unlock_exit; @@ -1149,9 +1180,8 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) bna->na_polling_state = bps; bps->bna = bna; - /* disable interrupt if possible */ - if (bna->hwna->nm_intr) - bna->hwna->nm_intr(bna->hwna, 0); + /* disable interrupts if possible */ + nma_intr_enable(bna->hwna, 0); /* start kthread now */ error = nm_bdg_polling_start_kthreads(bps); if (error) { @@ -1159,8 +1189,7 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na) nm_os_free(bps->kthreads); nm_os_free(bps); bna->na_polling_state = NULL; - if (bna->hwna->nm_intr) - bna->hwna->nm_intr(bna->hwna, 1); + nma_intr_enable(bna->hwna, 1); } return error; } @@ -1180,9 +1209,8 @@ nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na) bps->configured = false; nm_os_free(bps); bna->na_polling_state = NULL; - /* reenable interrupt */ - if (bna->hwna->nm_intr) - bna->hwna->nm_intr(bna->hwna, 1); + /* reenable interrupts */ + nma_intr_enable(bna->hwna, 1); return 0; } @@ -1577,7 +1605,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) BDG_WLOCK(vpna->na_bdg); if (onoff) { for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < netmap_real_rings(na, t); i++) { struct netmap_kring *kring = &NMR(na, t)[i]; if (nm_kring_pending_on(kring)) @@ -1593,7 +1621,7 @@ netmap_vp_reg(struct netmap_adapter *na, int onoff) if (na->active_fds == 0) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < netmap_real_rings(na, t); i++) { struct netmap_kring *kring = &NMR(na, t)[i]; if (nm_kring_pending_off(kring)) @@ -1657,7 +1685,7 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, */ if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */ uint8_t *s = buf+6; - sh = nm_bridge_rthash(s); // XXX hash of source + sh = nm_bridge_rthash(s); /* hash of source */ /* update source port forwarding entry */ na->last_smac = ht[sh].mac = smac; /* XXX expire ? */ ht[sh].ports = mysrc; @@ -1667,11 +1695,10 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, } dst = NM_BDG_BROADCAST; if ((buf[0] & 1) == 0) { /* unicast */ - dh = nm_bridge_rthash(buf); // XXX hash of dst + dh = nm_bridge_rthash(buf); /* hash of dst */ if (ht[dh].mac == dmac) { /* found dst */ dst = ht[dh].ports; } - /* XXX otherwise return NM_BDG_UNKNOWN ? */ } return dst; } @@ -1785,10 +1812,8 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, dst_port = b->bdg_ops.lookup(&ft[i], &dst_ring, na); if (netmap_verbose > 255) RD(5, "slot %d port %d -> %d", i, me, dst_port); - if (dst_port == NM_BDG_NOPORT) + if (dst_port >= NM_BDG_NOPORT) continue; /* this packet is identified to be dropped */ - else if (unlikely(dst_port > NM_BDG_MAXPORTS)) - continue; else if (dst_port == NM_BDG_BROADCAST) dst_ring = 0; /* broadcasts always go to ring 0 */ else if (unlikely(dst_port == me || @@ -1882,10 +1907,10 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, needed = d->bq_len + brddst->bq_len; if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) { - if (netmap_verbose) { - RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len, - dst_na->up.virt_hdr_len); - } + if (netmap_verbose) { + RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len, + dst_na->up.virt_hdr_len); + } /* There is a virtio-net header/offloadings mismatch between * source and destination. The slower mismatch datapath will * be used to cope with all the mismatches. @@ -1902,6 +1927,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, * TCPv4 we must account for ethernet header, IP header * and TCPv4 header). */ + KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0")); needed = (needed * na->mfs) / (dst_na->mfs - WORST_CASE_GSO_HEADER) + 1; ND(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed); @@ -1916,6 +1942,9 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, dst_nr = dst_nr % nrings; kring = &dst_na->up.rx_rings[dst_nr]; ring = kring->ring; + /* the destination ring may have not been opened for RX */ + if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON)) + goto cleanup; lim = kring->nkr_num_slots - 1; retry: @@ -2196,7 +2225,7 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; if (vpna->na_bdg) - return EBUSY; + return netmap_bwrap_attach(name, na); na->na_vp = vpna; strncpy(na->name, name, sizeof(na->name)); na->na_hostvp = NULL; @@ -2248,7 +2277,10 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, nm_bound_var(&nmr->nr_arg3, 0, 0, 128*NM_BDG_MAXSLOTS, NULL); na->num_rx_desc = nmr->nr_rx_slots; - vpna->mfs = 1514; + /* Set the mfs to a default value, as it is needed on the VALE + * mismatch datapath. XXX We should set it according to the MTU + * known to the kernel. */ + vpna->mfs = NM_BDG_MFS_DEFAULT; vpna->last_smac = ~0llu; /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? vpna->mfs = netmap_buf_size; */ @@ -2330,7 +2362,8 @@ netmap_bwrap_dtor(struct netmap_adapter *na) struct nm_bridge *b = bna->up.na_bdg, *bh = bna->host.na_bdg; - netmap_mem_put(bna->host.up.nm_mem); + if (bna->host.up.nm_mem) + netmap_mem_put(bna->host.up.nm_mem); if (b) { netmap_bdg_detach_common(b, bna->up.bdg_port, @@ -2459,28 +2492,6 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) hostna->up.na_lut = na->na_lut; } - /* cross-link the netmap rings - * The original number of rings comes from hwna, - * rx rings on one side equals tx rings on the other. - */ - for_rx_tx(t) { - enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ - for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { - NMR(hwna, r)[i].ring = NMR(na, t)[i].ring; - } - } - - if (na->na_flags & NAF_HOST_RINGS) { - struct netmap_adapter *hna = &hostna->up; - /* the hostna rings are the host rings of the bwrap. - * The corresponding krings must point back to the - * hostna - */ - hna->tx_rings = &na->tx_rings[na->num_tx_rings]; - hna->tx_rings[0].na = hna; - hna->rx_rings = &na->rx_rings[na->num_rx_rings]; - hna->rx_rings[0].na = hna; - } } /* pass down the pending ring state information */ @@ -2497,9 +2508,10 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) /* copy up the current ring state information */ for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) - NMR(na, t)[i].nr_mode = - NMR(hwna, t)[i].nr_mode; + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + struct netmap_kring *kring = &NMR(hwna, t)[i]; + NMR(na, t)[i].nr_mode = kring->nr_mode; + } } /* impersonate a netmap_vp_adapter */ @@ -2537,6 +2549,14 @@ netmap_bwrap_reg(struct netmap_adapter *na, int onoff) hwna->na_lut.lut = NULL; hwna->na_lut.objtotal = 0; hwna->na_lut.objsize = 0; + + /* pass ownership of the netmap rings to the hwna */ + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + NMR(na, t)[i].ring = NULL; + } + } + } return 0; @@ -2570,6 +2590,7 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct netmap_adapter *hwna = bna->hwna; + struct netmap_adapter *hostna = &bna->host.up; int i, error = 0; enum txrx t; @@ -2586,16 +2607,49 @@ netmap_bwrap_krings_create(struct netmap_adapter *na) goto err_del_vp_rings; } - /* get each ring slot number from the corresponding hwna ring */ - for_rx_tx(t) { - enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ - for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { - NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots; + /* increment the usage counter for all the hwna krings */ + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { + NMR(hwna, t)[i].users++; } + } + + /* now create the actual rings */ + error = netmap_mem_rings_create(hwna); + if (error) { + goto err_dec_users; + } + + /* cross-link the netmap rings + * The original number of rings comes from hwna, + * rx rings on one side equals tx rings on the other. + */ + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { + NMR(na, t)[i].nkr_num_slots = NMR(hwna, r)[i].nkr_num_slots; + NMR(na, t)[i].ring = NMR(hwna, r)[i].ring; + } + } + + if (na->na_flags & NAF_HOST_RINGS) { + /* the hostna rings are the host rings of the bwrap. + * The corresponding krings must point back to the + * hostna + */ + hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; + hostna->tx_rings[0].na = hostna; + hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; + hostna->rx_rings[0].na = hostna; } return 0; +err_dec_users: + for_rx_tx(t) { + NMR(hwna, t)[i].users--; + } + hwna->nm_krings_delete(hwna); err_del_vp_rings: netmap_vp_krings_delete(na); @@ -2609,9 +2663,20 @@ netmap_bwrap_krings_delete(struct netmap_adapter *na) struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct netmap_adapter *hwna = bna->hwna; + enum txrx t; + int i; ND("%s", na->name); + /* decrement the usage counter for all the hwna krings */ + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { + NMR(hwna, t)[i].users--; + } + } + + /* delete any netmap rings that are no longer needed */ + netmap_mem_rings_delete(hwna); hwna->nm_krings_delete(hwna); netmap_vp_krings_delete(na); } @@ -2699,7 +2764,7 @@ netmap_bwrap_bdg_ctl(struct netmap_adapter *na, struct nmreq *nmr, int attach) if (npriv == NULL) return ENOMEM; npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ - error = netmap_do_regif(npriv, na, 0, NR_REG_NIC_SW); + error = netmap_do_regif(npriv, na, nmr->nr_ringid, nmr->nr_flags); if (error) { netmap_priv_delete(npriv); return error; @@ -2766,6 +2831,8 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) na->nm_mem = netmap_mem_get(hwna->nm_mem); na->virt_hdr_len = hwna->virt_hdr_len; bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ + /* Set the mfs, needed on the VALE mismatch datapath. */ + bna->up.mfs = NM_BDG_MFS_DEFAULT; bna->hwna = hwna; netmap_adapter_get(hwna); @@ -2793,6 +2860,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) na->na_hostvp = hwna->na_hostvp = hostna->na_hostvp = &bna->host; hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ + bna->host.mfs = NM_BDG_MFS_DEFAULT; } ND("%s<->%s txr %d txd %d rxr %d rxd %d", diff --git a/sys/net/netmap.h b/sys/net/netmap.h index 51d95f4e0142..53f2b41b0687 100644 --- a/sys/net/netmap.h +++ b/sys/net/netmap.h @@ -217,7 +217,8 @@ struct netmap_slot { #define NS_MOREFRAG 0x0020 /* packet has more fragments */ /* - * (VALE ports only) + * (VALE ports, ptnetmap ports and some NIC ports, e.g. + * ixgbe and i40e on Linux) * Set on all but the last slot of a multi-segment packet. * The 'len' field refers to the individual fragment. */ @@ -528,6 +529,7 @@ struct nmreq { #define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */ #define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */ #define NETMAP_POOLS_INFO_GET 13 /* get memory allocator pools info */ +#define NETMAP_POOLS_CREATE 14 /* create a new memory allocator */ uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ #define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ @@ -567,13 +569,13 @@ enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ #define NM_BDG_NAME "vale" /* prefix for bridge port name */ +#ifdef _WIN32 /* * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined * in ws2def.h but not sure if they are in the form we need. - * XXX so we redefine them - * in a convenient way to use for DeviceIoControl signatures + * We therefore redefine them in a convenient way to use for DeviceIoControl + * signatures. */ -#ifdef _WIN32 #undef _IO // ws2def.h #define _WIN_NM_IOCTL_TYPE 40000 #define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h index a03fa53ef27e..2c8d91a63b63 100644 --- a/sys/net/netmap_user.h +++ b/sys/net/netmap_user.h @@ -100,6 +100,7 @@ #endif /* likely and unlikely */ #include +#include /* nmreq_pointer_get() */ /* helper macro */ #define _NETMAP_OFFSET(type, ptr, offset) \ @@ -114,7 +115,7 @@ nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) #define NETMAP_BUF(ring, index) \ - ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) + ((char *)(ring) + (ring)->buf_ofs + ((long)(index)*(ring)->nr_buf_size)) #define NETMAP_BUF_IDX(ring, buf) \ ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ @@ -224,7 +225,7 @@ struct nm_desc { struct nm_desc *self; /* point to self if netmap. */ int fd; void *mem; - uint32_t memsize; + uint64_t memsize; int done_mmap; /* set if mem is the result of mmap */ struct netmap_if * const nifp; uint16_t first_tx_ring, last_tx_ring, cur_tx_ring; @@ -272,8 +273,6 @@ struct nm_desc { * to multiple of 64 bytes and is often faster than dealing * with other odd sizes. We assume there is enough room * in the source and destination buffers. - * - * XXX only for multiples of 64 bytes, non overlapped. */ static inline void nm_pkt_copy(const void *_src, void *_dst, int l) @@ -281,7 +280,7 @@ nm_pkt_copy(const void *_src, void *_dst, int l) const uint64_t *src = (const uint64_t *)_src; uint64_t *dst = (uint64_t *)_dst; - if (unlikely(l >= 1024)) { + if (unlikely(l >= 1024 || l % 64)) { memcpy(dst, src, l); return; } @@ -352,6 +351,7 @@ enum { NM_OPEN_ARG2 = 0x200000, NM_OPEN_ARG3 = 0x400000, NM_OPEN_RING_CFG = 0x800000, /* tx|rx rings|slots */ + NM_OPEN_EXTMEM = 0x1000000, }; @@ -613,38 +613,46 @@ nm_is_identifier(const char *s, const char *e) return 1; } -/* - * Try to open, return descriptor if successful, NULL otherwise. - * An invalid netmap name will return errno = 0; - * You can pass a pointer to a pre-filled nm_desc to add special - * parameters. Flags is used as follows - * NM_OPEN_NO_MMAP use the memory from arg, only XXX avoid mmap - * if the nr_arg2 (memory block) matches. - * NM_OPEN_ARG1 use req.nr_arg1 from arg - * NM_OPEN_ARG2 use req.nr_arg2 from arg - * NM_OPEN_RING_CFG user ring config from arg - */ -static struct nm_desc * -nm_open(const char *ifname, const struct nmreq *req, - uint64_t new_flags, const struct nm_desc *arg) +static void +nm_init_offsets(struct nm_desc *d) { - struct nm_desc *d = NULL; - const struct nm_desc *parent = arg; - u_int namelen; - uint32_t nr_ringid = 0, nr_flags, nr_reg; + struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); + struct netmap_ring *r = NETMAP_RXRING(nifp, d->first_rx_ring); + if ((void *)r == (void *)nifp) { + /* the descriptor is open for TX only */ + r = NETMAP_TXRING(nifp, d->first_tx_ring); + } + + *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; + *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; + *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); + *(void **)(uintptr_t)&d->buf_end = + (char *)d->mem + d->memsize; +} + +#define MAXERRMSG 80 +#define NM_PARSE_OK 0 +#define NM_PARSE_MEMID 1 +static int +nm_parse_one(const char *ifname, struct nmreq *d, char **out, int memid_allowed) +{ + int is_vale; const char *port = NULL; const char *vpname = NULL; -#define MAXERRMSG 80 + u_int namelen; + uint32_t nr_ringid = 0, nr_flags; char errmsg[MAXERRMSG] = ""; - enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; - int is_vale; long num; uint16_t nr_arg2 = 0; + enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state; + + errno = 0; if (strncmp(ifname, "netmap:", 7) && strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { - errno = 0; /* name not recognised, not an error */ - return NULL; + snprintf(errmsg, MAXERRMSG, "invalid port name: %s", ifname); + errno = EINVAL; + goto fail; } is_vale = (ifname[0] == 'v'); @@ -677,10 +685,14 @@ nm_open(const char *ifname, const struct nmreq *req, } namelen = port - ifname; - if (namelen >= sizeof(d->req.nr_name)) { + if (namelen >= sizeof(d->nr_name)) { snprintf(errmsg, MAXERRMSG, "name too long"); goto fail; } + memcpy(d->nr_name, ifname, namelen); + d->nr_name[namelen] = '\0'; + D("name %s", d->nr_name); + p_state = P_START; nr_flags = NR_REG_ALL_NIC; /* default for no suffix */ while (*port) { @@ -777,21 +789,28 @@ nm_open(const char *ifname, const struct nmreq *req, p_state = P_FLAGSOK; break; case P_MEMID: - if (nr_arg2 != 0) { + if (!memid_allowed) { snprintf(errmsg, MAXERRMSG, "double setting of memid"); goto fail; } num = strtol(port, (char **)&port, 10); if (num <= 0) { - snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num); - goto fail; + ND("non-numeric memid %s (out = %p)", port, out); + if (out == NULL) + goto fail; + *out = (char *)port; + while (*port) + port++; + } else { + nr_arg2 = num; + memid_allowed = 0; + p_state = P_RNGSFXOK; } - nr_arg2 = num; - p_state = P_RNGSFXOK; break; } } - if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) { + if (p_state != P_START && p_state != P_RNGSFXOK && + p_state != P_FLAGSOK && p_state != P_MEMID) { snprintf(errmsg, MAXERRMSG, "unexpected end of port name"); goto fail; } @@ -800,6 +819,138 @@ nm_open(const char *ifname, const struct nmreq *req, (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "", (nr_flags & NR_MONITOR_TX) ? "MONITOR_TX" : "", (nr_flags & NR_MONITOR_RX) ? "MONITOR_RX" : ""); + + d->nr_flags |= nr_flags; + d->nr_ringid |= nr_ringid; + d->nr_arg2 = nr_arg2; + + return (p_state == P_MEMID) ? NM_PARSE_MEMID : NM_PARSE_OK; +fail: + if (!errno) + errno = EINVAL; + if (out) + *out = strdup(errmsg); + return -1; +} + +static int +nm_interp_memid(const char *memid, struct nmreq *req, char **err) +{ + int fd = -1; + char errmsg[MAXERRMSG] = ""; + struct nmreq greq; + off_t mapsize; + struct netmap_pools_info *pi; + + /* first, try to look for a netmap port with this name */ + fd = open("/dev/netmap", O_RDONLY); + if (fd < 0) { + snprintf(errmsg, MAXERRMSG, "cannot open /dev/netmap: %s", strerror(errno)); + goto fail; + } + memset(&greq, 0, sizeof(greq)); + if (nm_parse_one(memid, &greq, err, 0) == NM_PARSE_OK) { + greq.nr_version = NETMAP_API; + if (ioctl(fd, NIOCGINFO, &greq) < 0) { + if (errno == ENOENT || errno == ENXIO) + goto try_external; + snprintf(errmsg, MAXERRMSG, "cannot getinfo for %s: %s", memid, strerror(errno)); + goto fail; + } + req->nr_arg2 = greq.nr_arg2; + close(fd); + return 0; + } +try_external: + D("trying with external memory"); + close(fd); + fd = open(memid, O_RDWR); + if (fd < 0) { + snprintf(errmsg, MAXERRMSG, "cannot open %s: %s", memid, strerror(errno)); + goto fail; + } + mapsize = lseek(fd, 0, SEEK_END); + if (mapsize < 0) { + snprintf(errmsg, MAXERRMSG, "failed to obtain filesize of %s: %s", memid, strerror(errno)); + goto fail; + } + pi = mmap(0, mapsize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (pi == MAP_FAILED) { + snprintf(errmsg, MAXERRMSG, "cannot map %s: %s", memid, strerror(errno)); + goto fail; + } + req->nr_cmd = NETMAP_POOLS_CREATE; + pi->memsize = mapsize; + nmreq_pointer_put(req, pi); + D("mapped %zu bytes at %p from file %s", mapsize, pi, memid); + return 0; + +fail: + D("%s", errmsg); + close(fd); + if (err && !*err) + *err = strdup(errmsg); + return errno; +} + +static int +nm_parse(const char *ifname, struct nm_desc *d, char *errmsg) +{ + char *err; + switch (nm_parse_one(ifname, &d->req, &err, 1)) { + case NM_PARSE_OK: + D("parse OK"); + break; + case NM_PARSE_MEMID: + D("memid: %s", err); + errno = nm_interp_memid(err, &d->req, &err); + D("errno = %d", errno); + if (!errno) + break; + /* fallthrough */ + default: + D("error"); + strncpy(errmsg, err, MAXERRMSG); + errmsg[MAXERRMSG-1] = '\0'; + free(err); + return -1; + } + D("parsed name: %s", d->req.nr_name); + d->self = d; + return 0; +} + +/* + * Try to open, return descriptor if successful, NULL otherwise. + * An invalid netmap name will return errno = 0; + * You can pass a pointer to a pre-filled nm_desc to add special + * parameters. Flags is used as follows + * NM_OPEN_NO_MMAP use the memory from arg, only + * if the nr_arg2 (memory block) matches. + * Special case: if arg is NULL, skip the + * mmap entirely (maybe because you are going + * to do it by yourself, or you plan to call + * nm_mmap() only later) + * NM_OPEN_ARG1 use req.nr_arg1 from arg + * NM_OPEN_ARG2 use req.nr_arg2 from arg + * NM_OPEN_RING_CFG user ring config from arg + */ +static struct nm_desc * +nm_open(const char *ifname, const struct nmreq *req, + uint64_t new_flags, const struct nm_desc *arg) +{ + struct nm_desc *d = NULL; + const struct nm_desc *parent = arg; + char errmsg[MAXERRMSG] = ""; + uint32_t nr_reg; + struct netmap_pools_info *pi = NULL; + + if (strncmp(ifname, "netmap:", 7) && + strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) { + errno = 0; /* name not recognised, not an error */ + return NULL; + } + d = (struct nm_desc *)calloc(1, sizeof(*d)); if (d == NULL) { snprintf(errmsg, MAXERRMSG, "nm_desc alloc failure"); @@ -813,32 +964,87 @@ nm_open(const char *ifname, const struct nmreq *req, goto fail; } - if (req) + if (req) { d->req = *req; - d->req.nr_version = NETMAP_API; - d->req.nr_ringid &= ~NETMAP_RING_MASK; + } else { + d->req.nr_arg1 = 4; + d->req.nr_arg2 = 0; + d->req.nr_arg3 = 0; + } + + if (!(new_flags & NM_OPEN_IFNAME)) { + char *err; + switch (nm_parse_one(ifname, &d->req, &err, 1)) { + case NM_PARSE_OK: + break; + case NM_PARSE_MEMID: + if ((new_flags & NM_OPEN_NO_MMAP) && + IS_NETMAP_DESC(parent)) { + /* ignore the memid setting, since we are + * going to use the parent's one + */ + break; + } + errno = nm_interp_memid(err, &d->req, &err); + if (!errno) + break; + /* fallthrough */ + default: + strncpy(errmsg, err, MAXERRMSG); + errmsg[MAXERRMSG-1] = '\0'; + free(err); + goto fail; + } + d->self = d; + } + + /* compatibility checks for POOL_SCREATE and NM_OPEN flags + * the first check may be dropped once we have a larger nreq + */ + if (d->req.nr_cmd == NETMAP_POOLS_CREATE) { + if (IS_NETMAP_DESC(parent)) { + if (new_flags & (NM_OPEN_ARG1 | NM_OPEN_ARG2 | NM_OPEN_ARG3)) { + snprintf(errmsg, MAXERRMSG, + "POOLS_CREATE is incompatibile " + "with NM_OPEN_ARG? flags"); + errno = EINVAL; + goto fail; + } + if (new_flags & NM_OPEN_NO_MMAP) { + snprintf(errmsg, MAXERRMSG, + "POOLS_CREATE is incompatible " + "with NM_OPEN_NO_MMAP flag"); + errno = EINVAL; + goto fail; + } + } + } + + d->req.nr_version = NETMAP_API; + d->req.nr_ringid &= NETMAP_RING_MASK; - /* these fields are overridden by ifname and flags processing */ - d->req.nr_ringid |= nr_ringid; - d->req.nr_flags |= nr_flags; - if (nr_arg2) - d->req.nr_arg2 = nr_arg2; - memcpy(d->req.nr_name, ifname, namelen); - d->req.nr_name[namelen] = '\0'; /* optionally import info from parent */ if (IS_NETMAP_DESC(parent) && new_flags) { - if (new_flags & NM_OPEN_ARG1) + if (new_flags & NM_OPEN_EXTMEM) { + if (parent->req.nr_cmd == NETMAP_POOLS_CREATE) { + d->req.nr_cmd = NETMAP_POOLS_CREATE; + nmreq_pointer_put(&d->req, nmreq_pointer_get(&parent->req)); + D("Warning: not overriding arg[1-3] since external memory is being used"); + new_flags &= ~(NM_OPEN_ARG1 | NM_OPEN_ARG2 | NM_OPEN_ARG3); + } + } + if (new_flags & NM_OPEN_ARG1) { D("overriding ARG1 %d", parent->req.nr_arg1); - d->req.nr_arg1 = new_flags & NM_OPEN_ARG1 ? - parent->req.nr_arg1 : 4; - if (new_flags & NM_OPEN_ARG2) + d->req.nr_arg1 = parent->req.nr_arg1; + } + if (new_flags & (NM_OPEN_ARG2 | NM_OPEN_NO_MMAP)) { D("overriding ARG2 %d", parent->req.nr_arg2); - d->req.nr_arg2 = new_flags & NM_OPEN_ARG2 ? - parent->req.nr_arg2 : 0; - if (new_flags & NM_OPEN_ARG3) + d->req.nr_arg2 = parent->req.nr_arg2; + } + if (new_flags & NM_OPEN_ARG3) { D("overriding ARG3 %d", parent->req.nr_arg3); - d->req.nr_arg3 = new_flags & NM_OPEN_ARG3 ? - parent->req.nr_arg3 : 0; + d->req.nr_arg3 = parent->req.nr_arg3; + } if (new_flags & NM_OPEN_RING_CFG) { D("overriding RING_CFG"); d->req.nr_tx_slots = parent->req.nr_tx_slots; @@ -859,15 +1065,26 @@ nm_open(const char *ifname, const struct nmreq *req, /* add the *XPOLL flags */ d->req.nr_ringid |= new_flags & (NETMAP_NO_TX_POLL | NETMAP_DO_RX_POLL); + if (d->req.nr_cmd == NETMAP_POOLS_CREATE) { + pi = nmreq_pointer_get(&d->req); + } + if (ioctl(d->fd, NIOCREGIF, &d->req)) { snprintf(errmsg, MAXERRMSG, "NIOCREGIF failed: %s", strerror(errno)); goto fail; } - /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ - if ((!(new_flags & NM_OPEN_NO_MMAP) || parent) && nm_mmap(d, parent)) { - snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); - goto fail; + if (pi != NULL) { + d->mem = pi; + d->memsize = pi->memsize; + nm_init_offsets(d); + } else if ((!(new_flags & NM_OPEN_NO_MMAP) || parent)) { + /* if parent is defined, do nm_mmap() even if NM_OPEN_NO_MMAP is set */ + errno = nm_mmap(d, parent); + if (errno) { + snprintf(errmsg, MAXERRMSG, "mmap failed: %s", strerror(errno)); + goto fail; + } } nr_reg = d->req.nr_flags & NR_REG_MASK; @@ -934,7 +1151,8 @@ nm_close(struct nm_desc *d) */ static void *__xxzt[] __attribute__ ((unused)) = { (void *)nm_open, (void *)nm_inject, - (void *)nm_dispatch, (void *)nm_nextpkt } ; + (void *)nm_dispatch, (void *)nm_nextpkt, + (void *)nm_parse } ; if (d == NULL || d->self != d) return EINVAL; @@ -971,17 +1189,8 @@ nm_mmap(struct nm_desc *d, const struct nm_desc *parent) } d->done_mmap = 1; } - { - struct netmap_if *nifp = NETMAP_IF(d->mem, d->req.nr_offset); - struct netmap_ring *r = NETMAP_RXRING(nifp, ); - - *(struct netmap_if **)(uintptr_t)&(d->nifp) = nifp; - *(struct netmap_ring **)(uintptr_t)&d->some_ring = r; - *(void **)(uintptr_t)&d->buf_start = NETMAP_BUF(r, 0); - *(void **)(uintptr_t)&d->buf_end = - (char *)d->mem + d->memsize; - } + nm_init_offsets(d); return 0; fail: @@ -994,13 +1203,13 @@ fail: static int nm_inject(struct nm_desc *d, const void *buf, size_t size) { - u_int c, n = d->last_tx_ring - d->first_tx_ring + 1; + u_int c, n = d->last_tx_ring - d->first_tx_ring + 1, + ri = d->cur_tx_ring; - for (c = 0; c < n ; c++) { + for (c = 0; c < n ; c++, ri++) { /* compute current ring to use */ struct netmap_ring *ring; uint32_t i, idx; - uint32_t ri = d->cur_tx_ring + c; if (ri > d->last_tx_ring) ri = d->first_tx_ring; @@ -1038,11 +1247,10 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) * of buffers and the int is large enough that we never wrap, * so we can omit checking for -1 */ - for (c=0; c < n && cnt != got; c++) { + for (c=0; c < n && cnt != got; c++, ri++) { /* compute current ring to use */ struct netmap_ring *ring; - ri = d->cur_rx_ring + c; if (ri > d->last_rx_ring) ri = d->first_rx_ring; ring = NETMAP_RXRING(d->nifp, ri); @@ -1053,6 +1261,9 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) } i = ring->cur; idx = ring->slot[i].buf_idx; + /* d->cur_rx_ring doesn't change inside this loop, but + * set it here, so it reflects d->hdr.buf's ring */ + d->cur_rx_ring = ri; d->hdr.slot = &ring->slot[i]; d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); // __builtin_prefetch(buf); @@ -1065,7 +1276,6 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg) d->hdr.flags = 0; cb(arg, &d->hdr, d->hdr.buf); } - d->cur_rx_ring = ri; return got; } diff --git a/sys/net/netmap_virt.h b/sys/net/netmap_virt.h index a520a16dc9b1..751ae539d982 100644 --- a/sys/net/netmap_virt.h +++ b/sys/net/netmap_virt.h @@ -139,6 +139,13 @@ nmreq_pointer_put(struct nmreq *nmr, void *userptr) *pp = (uintptr_t)userptr; } +static inline void * +nmreq_pointer_get(const struct nmreq *nmr) +{ + const uintptr_t * pp = (const uintptr_t *)&nmr->nr_arg1; + return (void *)*pp; +} + /* ptnetmap features */ #define PTNETMAP_F_VNET_HDR 1