netmap: align codebase to the current upstream (760279cfb2730a585)

Changelist:
  - Replace netmap passthrough host support with a more general
    mechanism to call TXSYNC/RXSYNC from an in-kernel event-loop.
    No kernel threads are used to use this feature: the application
    is required to spawn a thread (or a process) and issue a
    SYNC_KLOOP_START (NIOCCTRL) command in the thread body. The
    kernel loop is executed by the ioctl implementation, which returns
    to userspace only when a different thread calls SYNC_KLOOP_STOP
    or the netmap file descriptor is closed.
  - Update the if_ptnet driver to cope with the new data structures,
    and prune all the obsolete ptnetmap code.
  - Add support for "null" netmap ports, useful to allocate netmap_if,
    netmap_ring and netmap buffers to be used by specialized applications
    (e.g. hypervisors). TXSYNC/RXSYNC on these ports have no effect.
  - Various fixes and code refactoring.

Sponsored by:	Sunny Valley Networks
Differential Revision:	https://reviews.freebsd.org/D18015
This commit is contained in:
vmaffione 2018-12-05 11:57:16 +00:00
parent d43668a7b1
commit 9899d78b5d
21 changed files with 2553 additions and 1161 deletions

View File

@ -2517,17 +2517,19 @@ dev/nand/nandsim_swap.c optional nandsim nand
dev/nand/nfc_if.m optional nand
dev/netmap/if_ptnet.c optional netmap inet
dev/netmap/netmap.c optional netmap
dev/netmap/netmap_bdg.c optional netmap
dev/netmap/netmap_freebsd.c optional netmap
dev/netmap/netmap_generic.c optional netmap
dev/netmap/netmap_kloop.c optional netmap
dev/netmap/netmap_legacy.c optional netmap
dev/netmap/netmap_mbq.c optional netmap
dev/netmap/netmap_mem2.c optional netmap
dev/netmap/netmap_monitor.c optional netmap
dev/netmap/netmap_null.c optional netmap
dev/netmap/netmap_offloadings.c optional netmap
dev/netmap/netmap_pipe.c optional netmap
dev/netmap/netmap_pt.c optional netmap
dev/netmap/netmap_vale.c optional netmap
dev/netmap/netmap_legacy.c optional netmap
dev/netmap/netmap_bdg.c optional netmap
# compile-with "${NORMAL_C} -Wconversion -Wextra"
dev/nfsmb/nfsmb.c optional nfsmb pci
dev/nge/if_nge.c optional nge

View File

@ -129,7 +129,7 @@ ixl_netmap_attach(struct ixl_vsi *vsi)
na.ifp = vsi->ifp;
na.na_flags = NAF_BDG_MAYSLEEP;
// XXX check that queues is set.
nm_prinf("queues is %p\n", vsi->queues);
nm_prinf("queues is %p", vsi->queues);
if (vsi->queues) {
na.num_tx_desc = vsi->queues[0].num_desc;
na.num_rx_desc = vsi->queues[0].num_desc;

View File

@ -128,8 +128,8 @@ struct ptnet_queue {
struct resource *irq;
void *cookie;
int kring_id;
struct ptnet_csb_gh *ptgh;
struct ptnet_csb_hg *pthg;
struct nm_csb_atok *atok;
struct nm_csb_ktoa *ktoa;
unsigned int kick;
struct mtx lock;
struct buf_ring *bufring; /* for TX queues */
@ -166,8 +166,8 @@ struct ptnet_softc {
unsigned int num_tx_rings;
struct ptnet_queue *queues;
struct ptnet_queue *rxqueues;
struct ptnet_csb_gh *csb_gh;
struct ptnet_csb_hg *csb_hg;
struct nm_csb_atok *csb_gh;
struct nm_csb_ktoa *csb_hg;
unsigned int min_tx_space;
@ -209,7 +209,7 @@ static void ptnet_tick(void *opaque);
static int ptnet_irqs_init(struct ptnet_softc *sc);
static void ptnet_irqs_fini(struct ptnet_softc *sc);
static uint32_t ptnet_nm_ptctl(if_t ifp, uint32_t cmd);
static uint32_t ptnet_nm_ptctl(struct ptnet_softc *sc, uint32_t cmd);
static int ptnet_nm_config(struct netmap_adapter *na,
struct nm_config_info *info);
static void ptnet_update_vnet_hdr(struct ptnet_softc *sc);
@ -327,7 +327,7 @@ ptnet_attach(device_t dev)
sc->num_rings = num_tx_rings + num_rx_rings;
sc->num_tx_rings = num_tx_rings;
if (sc->num_rings * sizeof(struct ptnet_csb_gh) > PAGE_SIZE) {
if (sc->num_rings * sizeof(struct nm_csb_atok) > PAGE_SIZE) {
device_printf(dev, "CSB cannot handle that many rings (%u)\n",
sc->num_rings);
err = ENOMEM;
@ -342,7 +342,7 @@ ptnet_attach(device_t dev)
err = ENOMEM;
goto err_path;
}
sc->csb_hg = (struct ptnet_csb_hg *)(((char *)sc->csb_gh) + PAGE_SIZE);
sc->csb_hg = (struct nm_csb_ktoa *)(((char *)sc->csb_gh) + PAGE_SIZE);
{
/*
@ -379,8 +379,8 @@ ptnet_attach(device_t dev)
pq->sc = sc;
pq->kring_id = i;
pq->kick = PTNET_IO_KICK_BASE + 4 * i;
pq->ptgh = sc->csb_gh + i;
pq->pthg = sc->csb_hg + i;
pq->atok = sc->csb_gh + i;
pq->ktoa = sc->csb_hg + i;
snprintf(pq->lock_name, sizeof(pq->lock_name), "%s-%d",
device_get_nameunit(dev), i);
mtx_init(&pq->lock, pq->lock_name, NULL, MTX_DEF);
@ -505,12 +505,25 @@ err_path:
return err;
}
/* Stop host sync-kloop if it was running. */
static void
ptnet_device_shutdown(struct ptnet_softc *sc)
{
ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_DELETE);
bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAH, 0);
bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAL, 0);
bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAH, 0);
bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAL, 0);
}
static int
ptnet_detach(device_t dev)
{
struct ptnet_softc *sc = device_get_softc(dev);
int i;
ptnet_device_shutdown(sc);
#ifdef DEVICE_POLLING
if (sc->ifp->if_capenable & IFCAP_POLLING) {
ether_poll_deregister(sc->ifp);
@ -543,10 +556,6 @@ ptnet_detach(device_t dev)
ptnet_irqs_fini(sc);
if (sc->csb_gh) {
bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAH, 0);
bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAL, 0);
bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAH, 0);
bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAL, 0);
contigfree(sc->csb_gh, 2*PAGE_SIZE, M_DEVBUF);
sc->csb_gh = NULL;
sc->csb_hg = NULL;
@ -583,9 +592,8 @@ ptnet_detach(device_t dev)
static int
ptnet_suspend(device_t dev)
{
struct ptnet_softc *sc;
struct ptnet_softc *sc = device_get_softc(dev);
sc = device_get_softc(dev);
(void)sc;
return (0);
@ -594,9 +602,8 @@ ptnet_suspend(device_t dev)
static int
ptnet_resume(device_t dev)
{
struct ptnet_softc *sc;
struct ptnet_softc *sc = device_get_softc(dev);
sc = device_get_softc(dev);
(void)sc;
return (0);
@ -605,11 +612,11 @@ ptnet_resume(device_t dev)
static int
ptnet_shutdown(device_t dev)
{
/*
* Suspend already does all of what we need to
* do here; we just never expect to be resumed.
*/
return (ptnet_suspend(dev));
struct ptnet_softc *sc = device_get_softc(dev);
ptnet_device_shutdown(sc);
return (0);
}
static int
@ -796,7 +803,7 @@ ptnet_ioctl(if_t ifp, u_long cmd, caddr_t data)
/* Make sure the worker sees the
* IFF_DRV_RUNNING down. */
PTNET_Q_LOCK(pq);
pq->ptgh->guest_need_kick = 0;
pq->atok->appl_need_kick = 0;
PTNET_Q_UNLOCK(pq);
/* Wait for rescheduling to finish. */
if (pq->taskq) {
@ -810,7 +817,7 @@ ptnet_ioctl(if_t ifp, u_long cmd, caddr_t data)
for (i = 0; i < sc->num_rings; i++) {
pq = sc-> queues + i;
PTNET_Q_LOCK(pq);
pq->ptgh->guest_need_kick = 1;
pq->atok->appl_need_kick = 1;
PTNET_Q_UNLOCK(pq);
}
}
@ -881,7 +888,7 @@ ptnet_init_locked(struct ptnet_softc *sc)
return ret;
}
if (sc->ptna->backend_regifs == 0) {
if (sc->ptna->backend_users == 0) {
ret = ptnet_nm_krings_create(na_nm);
if (ret) {
device_printf(sc->dev, "ptnet_nm_krings_create() "
@ -962,7 +969,7 @@ ptnet_stop(struct ptnet_softc *sc)
ptnet_nm_register(na_dr, 0 /* off */);
if (sc->ptna->backend_regifs == 0) {
if (sc->ptna->backend_users == 0) {
netmap_mem_rings_delete(na_dr);
ptnet_nm_krings_delete(na_nm);
}
@ -1092,9 +1099,8 @@ ptnet_media_status(if_t ifp, struct ifmediareq *ifmr)
}
static uint32_t
ptnet_nm_ptctl(if_t ifp, uint32_t cmd)
ptnet_nm_ptctl(struct ptnet_softc *sc, uint32_t cmd)
{
struct ptnet_softc *sc = if_getsoftc(ifp);
/*
* Write a command and read back error status,
* with zero meaning success.
@ -1130,8 +1136,8 @@ ptnet_sync_from_csb(struct ptnet_softc *sc, struct netmap_adapter *na)
/* Sync krings from the host, reading from
* CSB. */
for (i = 0; i < sc->num_rings; i++) {
struct ptnet_csb_gh *ptgh = sc->queues[i].ptgh;
struct ptnet_csb_hg *pthg = sc->queues[i].pthg;
struct nm_csb_atok *atok = sc->queues[i].atok;
struct nm_csb_ktoa *ktoa = sc->queues[i].ktoa;
struct netmap_kring *kring;
if (i < na->num_tx_rings) {
@ -1139,15 +1145,15 @@ ptnet_sync_from_csb(struct ptnet_softc *sc, struct netmap_adapter *na)
} else {
kring = na->rx_rings[i - na->num_tx_rings];
}
kring->rhead = kring->ring->head = ptgh->head;
kring->rcur = kring->ring->cur = ptgh->cur;
kring->nr_hwcur = pthg->hwcur;
kring->rhead = kring->ring->head = atok->head;
kring->rcur = kring->ring->cur = atok->cur;
kring->nr_hwcur = ktoa->hwcur;
kring->nr_hwtail = kring->rtail =
kring->ring->tail = pthg->hwtail;
kring->ring->tail = ktoa->hwtail;
ND("%d,%d: csb {hc %u h %u c %u ht %u}", t, i,
pthg->hwcur, ptgh->head, ptgh->cur,
pthg->hwtail);
ktoa->hwcur, atok->head, atok->cur,
ktoa->hwtail);
ND("%d,%d: kring {hc %u rh %u rc %u h %u c %u ht %u rt %u t %u}",
t, i, kring->nr_hwcur, kring->rhead, kring->rcur,
kring->ring->head, kring->ring->cur, kring->nr_hwtail,
@ -1178,7 +1184,7 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff)
int i;
if (!onoff) {
sc->ptna->backend_regifs--;
sc->ptna->backend_users--;
}
/* If this is the last netmap client, guest interrupt enable flags may
@ -1191,17 +1197,17 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff)
D("Exit netmap mode, re-enable interrupts");
for (i = 0; i < sc->num_rings; i++) {
pq = sc->queues + i;
pq->ptgh->guest_need_kick = 1;
pq->atok->appl_need_kick = 1;
}
}
if (onoff) {
if (sc->ptna->backend_regifs == 0) {
if (sc->ptna->backend_users == 0) {
/* Initialize notification enable fields in the CSB. */
for (i = 0; i < sc->num_rings; i++) {
pq = sc->queues + i;
pq->pthg->host_need_kick = 1;
pq->ptgh->guest_need_kick =
pq->ktoa->kern_need_kick = 1;
pq->atok->appl_need_kick =
(!(ifp->if_capenable & IFCAP_POLLING)
&& i >= sc->num_tx_rings);
}
@ -1211,17 +1217,13 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff)
/* Make sure the host adapter passed through is ready
* for txsync/rxsync. */
ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_CREATE);
ret = ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_CREATE);
if (ret) {
return ret;
}
}
/* Sync from CSB must be done after REGIF PTCTL. Skip this
* step only if this is a netmap client and it is not the
* first one. */
if ((!native && sc->ptna->backend_regifs == 0) ||
(native && na->active_fds == 0)) {
/* Align the guest krings and rings to the state stored
* in the CSB. */
ptnet_sync_from_csb(sc, na);
}
@ -1254,19 +1256,13 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff)
}
}
/* Sync from CSB must be done before UNREGIF PTCTL, on the last
* netmap client. */
if (native && na->active_fds == 0) {
ptnet_sync_from_csb(sc, na);
}
if (sc->ptna->backend_regifs == 0) {
ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_DELETE);
if (sc->ptna->backend_users == 0) {
ret = ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_DELETE);
}
}
if (onoff) {
sc->ptna->backend_regifs++;
sc->ptna->backend_users++;
}
return ret;
@ -1279,7 +1275,7 @@ ptnet_nm_txsync(struct netmap_kring *kring, int flags)
struct ptnet_queue *pq = sc->queues + kring->ring_id;
bool notify;
notify = netmap_pt_guest_txsync(pq->ptgh, pq->pthg, kring, flags);
notify = netmap_pt_guest_txsync(pq->atok, pq->ktoa, kring, flags);
if (notify) {
ptnet_kick(pq);
}
@ -1294,7 +1290,7 @@ ptnet_nm_rxsync(struct netmap_kring *kring, int flags)
struct ptnet_queue *pq = sc->rxqueues + kring->ring_id;
bool notify;
notify = netmap_pt_guest_rxsync(pq->ptgh, pq->pthg, kring, flags);
notify = netmap_pt_guest_rxsync(pq->atok, pq->ktoa, kring, flags);
if (notify) {
ptnet_kick(pq);
}
@ -1310,7 +1306,7 @@ ptnet_nm_intr(struct netmap_adapter *na, int onoff)
for (i = 0; i < sc->num_rings; i++) {
struct ptnet_queue *pq = sc->queues + i;
pq->ptgh->guest_need_kick = onoff;
pq->atok->appl_need_kick = onoff;
}
}
@ -1676,25 +1672,13 @@ ptnet_rx_csum(struct mbuf *m, struct virtio_net_hdr *hdr)
}
/* End of offloading-related functions to be shared with vtnet. */
static inline void
ptnet_sync_tail(struct ptnet_csb_hg *pthg, struct netmap_kring *kring)
{
struct netmap_ring *ring = kring->ring;
/* Update hwcur and hwtail as known by the host. */
ptnetmap_guest_read_kring_csb(pthg, kring);
/* nm_sync_finalize */
ring->tail = kring->rtail = kring->nr_hwtail;
}
static void
ptnet_ring_update(struct ptnet_queue *pq, struct netmap_kring *kring,
unsigned int head, unsigned int sync_flags)
{
struct netmap_ring *ring = kring->ring;
struct ptnet_csb_gh *ptgh = pq->ptgh;
struct ptnet_csb_hg *pthg = pq->pthg;
struct nm_csb_atok *atok = pq->atok;
struct nm_csb_ktoa *ktoa = pq->ktoa;
/* Some packets have been pushed to the netmap ring. We have
* to tell the host to process the new packets, updating cur
@ -1704,11 +1688,11 @@ ptnet_ring_update(struct ptnet_queue *pq, struct netmap_kring *kring,
/* Mimic nm_txsync_prologue/nm_rxsync_prologue. */
kring->rcur = kring->rhead = head;
ptnetmap_guest_write_kring_csb(ptgh, kring->rcur, kring->rhead);
ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead);
/* Kick the host if needed. */
if (NM_ACCESS_ONCE(pthg->host_need_kick)) {
ptgh->sync_flags = sync_flags;
if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
atok->sync_flags = sync_flags;
ptnet_kick(pq);
}
}
@ -1728,8 +1712,8 @@ ptnet_drain_transmit_queue(struct ptnet_queue *pq, unsigned int budget,
struct netmap_adapter *na = &sc->ptna->dr.up;
if_t ifp = sc->ifp;
unsigned int batch_count = 0;
struct ptnet_csb_gh *ptgh;
struct ptnet_csb_hg *pthg;
struct nm_csb_atok *atok;
struct nm_csb_ktoa *ktoa;
struct netmap_kring *kring;
struct netmap_ring *ring;
struct netmap_slot *slot;
@ -1758,8 +1742,8 @@ ptnet_drain_transmit_queue(struct ptnet_queue *pq, unsigned int budget,
return ENETDOWN;
}
ptgh = pq->ptgh;
pthg = pq->pthg;
atok = pq->atok;
ktoa = pq->ktoa;
kring = na->tx_rings[pq->kring_id];
ring = kring->ring;
lim = kring->nkr_num_slots - 1;
@ -1771,17 +1755,17 @@ ptnet_drain_transmit_queue(struct ptnet_queue *pq, unsigned int budget,
/* We ran out of slot, let's see if the host has
* freed up some, by reading hwcur and hwtail from
* the CSB. */
ptnet_sync_tail(pthg, kring);
ptnet_sync_tail(ktoa, kring);
if (PTNET_TX_NOSPACE(head, kring, minspace)) {
/* Still no slots available. Reactivate the
* interrupts so that we can be notified
* when some free slots are made available by
* the host. */
ptgh->guest_need_kick = 1;
atok->appl_need_kick = 1;
/* Double-check. */
ptnet_sync_tail(pthg, kring);
ptnet_sync_tail(ktoa, kring);
if (likely(PTNET_TX_NOSPACE(head, kring,
minspace))) {
break;
@ -1790,7 +1774,7 @@ ptnet_drain_transmit_queue(struct ptnet_queue *pq, unsigned int budget,
RD(1, "Found more slots by doublecheck");
/* More slots were freed before reactivating
* the interrupts. */
ptgh->guest_need_kick = 0;
atok->appl_need_kick = 0;
}
}
@ -2020,8 +2004,8 @@ ptnet_rx_eof(struct ptnet_queue *pq, unsigned int budget, bool may_resched)
{
struct ptnet_softc *sc = pq->sc;
bool have_vnet_hdr = sc->vnet_hdr_len;
struct ptnet_csb_gh *ptgh = pq->ptgh;
struct ptnet_csb_hg *pthg = pq->pthg;
struct nm_csb_atok *atok = pq->atok;
struct nm_csb_ktoa *ktoa = pq->ktoa;
struct netmap_adapter *na = &sc->ptna->dr.up;
struct netmap_kring *kring = na->rx_rings[pq->kring_id];
struct netmap_ring *ring = kring->ring;
@ -2053,21 +2037,21 @@ host_sync:
/* We ran out of slot, let's see if the host has
* added some, by reading hwcur and hwtail from
* the CSB. */
ptnet_sync_tail(pthg, kring);
ptnet_sync_tail(ktoa, kring);
if (head == ring->tail) {
/* Still no slots available. Reactivate
* interrupts as they were disabled by the
* host thread right before issuing the
* last interrupt. */
ptgh->guest_need_kick = 1;
atok->appl_need_kick = 1;
/* Double-check. */
ptnet_sync_tail(pthg, kring);
ptnet_sync_tail(ktoa, kring);
if (likely(head == ring->tail)) {
break;
}
ptgh->guest_need_kick = 0;
atok->appl_need_kick = 0;
}
}

View File

@ -79,7 +79,7 @@ vtnet_free_used(struct virtqueue *vq, int netmap_bufs, enum txrx t, int idx)
}
if (deq)
nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)\n",
nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)",
deq, nm_txrx2str(t), idx, netmap_bufs);
}
@ -230,7 +230,7 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
/*writeable=*/0);
if (unlikely(err)) {
if (err != ENOSPC)
nm_prerr("virtqueue_enqueue(%s) failed: %d\n",
nm_prerr("virtqueue_enqueue(%s) failed: %d",
kring->name, err);
break;
}
@ -251,7 +251,7 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
if (token == NULL)
break;
if (unlikely(token != (void *)txq))
nm_prerr("BUG: TX token mismatch\n");
nm_prerr("BUG: TX token mismatch");
else
n++;
}
@ -307,7 +307,7 @@ vtnet_netmap_kring_refill(struct netmap_kring *kring, u_int nm_i, u_int head)
/*readable=*/0, /*writeable=*/sg.sg_nseg);
if (unlikely(err)) {
if (err != ENOSPC)
nm_prerr("virtqueue_enqueue(%s) failed: %d\n",
nm_prerr("virtqueue_enqueue(%s) failed: %d",
kring->name, err);
break;
}
@ -391,7 +391,7 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
break;
}
if (unlikely(token != (void *)rxq)) {
nm_prerr("BUG: RX token mismatch\n");
nm_prerr("BUG: RX token mismatch");
} else {
/* Skip the virtio-net header. */
len -= sc->vtnet_hdr_size;
@ -533,7 +533,7 @@ vtnet_netmap_attach(struct vtnet_softc *sc)
netmap_attach(&na);
nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d\n",
nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d",
na.num_tx_rings, na.num_tx_desc,
na.num_tx_rings, na.num_rx_desc);
}

File diff suppressed because it is too large Load Diff

View File

@ -126,7 +126,7 @@ netmap_bdg_name(struct netmap_vp_adapter *vp)
* Right now we have a static array and deletions are protected
* by an exclusive lock.
*/
static struct nm_bridge *nm_bridges;
struct nm_bridge *nm_bridges;
#endif /* !CONFIG_NET_NS */
@ -139,15 +139,15 @@ nm_is_id_char(const char c)
(c == '_');
}
/* Validate the name of a VALE bridge port and return the
/* Validate the name of a bdg port and return the
* position of the ":" character. */
static int
nm_vale_name_validate(const char *name)
nm_bdg_name_validate(const char *name, size_t prefixlen)
{
int colon_pos = -1;
int i;
if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
if (!name || strlen(name) < prefixlen) {
return -1;
}
@ -186,9 +186,10 @@ nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
netmap_bns_getbridges(&bridges, &num_bridges);
namelen = nm_vale_name_validate(name);
namelen = nm_bdg_name_validate(name,
(ops != NULL ? strlen(ops->name) : 0));
if (namelen < 0) {
D("invalid bridge name %s", name ? name : NULL);
nm_prerr("invalid bridge name %s", name ? name : NULL);
return NULL;
}
@ -213,7 +214,7 @@ nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
b->bdg_active_ports);
b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
if (b->ht == NULL) {
D("failed to allocate hash table");
nm_prerr("failed to allocate hash table");
return NULL;
}
strncpy(b->bdg_basename, name, namelen);
@ -222,7 +223,7 @@ nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
for (i = 0; i < NM_BDG_MAXPORTS; i++)
b->bdg_port_index[i] = i;
/* set the default function */
b->bdg_ops = ops;
b->bdg_ops = b->bdg_saved_ops = *ops;
b->private_data = b->ht;
b->bdg_flags = 0;
NM_BNS_GET(b);
@ -240,12 +241,48 @@ netmap_bdg_free(struct nm_bridge *b)
ND("marking bridge %s as free", b->bdg_basename);
nm_os_free(b->ht);
b->bdg_ops = NULL;
memset(&b->bdg_ops, 0, sizeof(b->bdg_ops));
memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops));
b->bdg_flags = 0;
NM_BNS_PUT(b);
return 0;
}
/* Called by external kernel modules (e.g., Openvswitch).
* to modify the private data previously given to regops().
* 'name' may be just bridge's name (including ':' if it
* is not just NM_BDG_NAME).
* Called without NMG_LOCK.
*/
int
netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
void *callback_data, void *auth_token)
{
void *private_data = NULL;
struct nm_bridge *b;
int error = 0;
NMG_LOCK();
b = nm_find_bridge(name, 0 /* don't create */, NULL);
if (!b) {
error = EINVAL;
goto unlock_update_priv;
}
if (!nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_update_priv;
}
BDG_WLOCK(b);
private_data = callback(b->private_data, callback_data, &error);
b->private_data = private_data;
BDG_WUNLOCK(b);
unlock_update_priv:
NMG_UNLOCK();
return error;
}
/* remove from bridge b the ports in slots hw and sw
* (sw can be -1 if not needed)
@ -267,8 +304,8 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
acquire BDG_WLOCK() and copy back the array.
*/
if (netmap_verbose)
D("detach %d and %d (lim %d)", hw, sw, lim);
if (netmap_debug & NM_DEBUG_BDG)
nm_prinf("detach %d and %d (lim %d)", hw, sw, lim);
/* make a copy of the list of active ports, update it,
* and then copy back within BDG_WLOCK().
*/
@ -291,12 +328,12 @@ netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
}
}
if (hw >= 0 || sw >= 0) {
D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw);
}
BDG_WLOCK(b);
if (b->bdg_ops->dtor)
b->bdg_ops->dtor(b->bdg_ports[s_hw]);
if (b->bdg_ops.dtor)
b->bdg_ops.dtor(b->bdg_ports[s_hw]);
b->bdg_ports[s_hw] = NULL;
if (s_sw >= 0) {
b->bdg_ports[s_sw] = NULL;
@ -402,7 +439,7 @@ netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
/* yes we should, see if we have space to attach entries */
needed = 2; /* in some cases we only need 1 */
if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
D("bridge full %d, cannot create new port", b->bdg_active_ports);
nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports);
return ENOMEM;
}
/* record the next two ports available, but do not allocate yet */
@ -428,9 +465,10 @@ netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
}
/* bdg_netmap_attach creates a struct netmap_adapter */
error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna);
error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna);
if (error) {
D("error %d", error);
if (netmap_debug & NM_DEBUG_BDG)
nm_prerr("error %d", error);
goto out;
}
/* shortcut - we can skip get_hw_na(),
@ -459,7 +497,7 @@ netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
/* host adapter might not be created */
error = hw->nm_bdg_attach(nr_name, hw, b);
if (error == NM_NEED_BWRAP) {
error = b->bdg_ops->bwrap_attach(nr_name, hw);
error = b->bdg_ops.bwrap_attach(nr_name, hw);
}
if (error)
goto out;
@ -502,142 +540,13 @@ out:
return error;
}
/* Process NETMAP_REQ_VALE_ATTACH.
*/
int
nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
{
struct nmreq_vale_attach *req =
(struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter * vpna;
struct netmap_adapter *na = NULL;
struct netmap_mem_d *nmd = NULL;
struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
/* permission check for modified bridges */
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_exit;
}
if (req->reg.nr_mem_id) {
nmd = netmap_mem_find(req->reg.nr_mem_id);
if (nmd == NULL) {
error = EINVAL;
goto unlock_exit;
}
}
/* check for existing one */
error = netmap_get_vale_na(hdr, &na, nmd, 0);
if (na) {
error = EBUSY;
goto unref_exit;
}
error = netmap_get_vale_na(hdr, &na,
nmd, 1 /* create if not exists */);
if (error) { /* no device */
goto unlock_exit;
}
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto unlock_exit;
}
if (NETMAP_OWNED_BY_ANY(na)) {
error = EBUSY;
goto unref_exit;
}
if (na->nm_bdg_ctl) {
/* nop for VALE ports. The bwrap needs to put the hwna
* in netmap mode (see netmap_bwrap_bdg_ctl)
*/
error = na->nm_bdg_ctl(hdr, na);
if (error)
goto unref_exit;
ND("registered %s to netmap-mode", na->name);
}
vpna = (struct netmap_vp_adapter *)na;
req->port_index = vpna->bdg_port;
NMG_UNLOCK();
return 0;
unref_exit:
netmap_adapter_put(na);
unlock_exit:
NMG_UNLOCK();
return error;
}
static inline int
nm_is_bwrap(struct netmap_adapter *na)
{
return na->nm_register == netmap_bwrap_reg;
}
/* Process NETMAP_REQ_VALE_DETACH.
*/
int
nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
{
struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter *vpna;
struct netmap_adapter *na;
struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
/* permission check for modified bridges */
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_exit;
}
error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
if (error) { /* no device, or another bridge or user owns the device */
goto unlock_exit;
}
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto unlock_exit;
} else if (nm_is_bwrap(na) &&
((struct netmap_bwrap_adapter *)na)->na_polling_state) {
/* Don't detach a NIC with polling */
error = EBUSY;
goto unref_exit;
}
vpna = (struct netmap_vp_adapter *)na;
if (na->na_vp != vpna) {
/* trying to detach first attach of VALE persistent port attached
* to 2 bridges
*/
error = EBUSY;
goto unref_exit;
}
nmreq_det->port_index = vpna->bdg_port;
if (na->nm_bdg_ctl) {
/* remove the port from bridge. The bwrap
* also needs to put the hwna in normal mode
*/
error = na->nm_bdg_ctl(hdr, na);
}
unref_exit:
netmap_adapter_put(na);
unlock_exit:
NMG_UNLOCK();
return error;
}
struct nm_bdg_polling_state;
struct
@ -661,7 +570,7 @@ struct nm_bdg_polling_state {
};
static void
netmap_bwrap_polling(void *data, int is_kthread)
netmap_bwrap_polling(void *data)
{
struct nm_bdg_kthread *nbk = data;
struct netmap_bwrap_adapter *bna;
@ -693,7 +602,6 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
bzero(&kcfg, sizeof(kcfg));
kcfg.worker_fn = netmap_bwrap_polling;
kcfg.use_kthread = 1;
for (i = 0; i < bps->ncpus; i++) {
struct nm_bdg_kthread *t = bps->kthreads + i;
int all = (bps->ncpus == 1 &&
@ -703,8 +611,9 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
t->bps = bps;
t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
t->qlast = all ? bps->qlast : t->qfirst + 1;
D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
t->qlast);
if (netmap_verbose)
nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
t->qlast);
kcfg.type = i;
kcfg.worker_private = t;
@ -732,7 +641,7 @@ nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
int error, i, j;
if (!bps) {
D("polling is not configured");
nm_prerr("polling is not configured");
return EFAULT;
}
bps->stopped = false;
@ -741,7 +650,7 @@ nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
struct nm_bdg_kthread *t = bps->kthreads + i;
error = nm_os_kctx_worker_start(t->nmk);
if (error) {
D("error in nm_kthread_start()");
nm_prerr("error in nm_kthread_start(): %d", error);
goto cleanup;
}
}
@ -784,10 +693,10 @@ get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
avail_cpus = nm_os_ncpus();
if (req_cpus == 0) {
D("req_cpus must be > 0");
nm_prerr("req_cpus must be > 0");
return EINVAL;
} else if (req_cpus >= avail_cpus) {
D("Cannot use all the CPUs in the system");
nm_prerr("Cannot use all the CPUs in the system");
return EINVAL;
}
@ -797,7 +706,7 @@ get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
* For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
* ring 2 and 3 are polled by core 2 and 3, respectively. */
if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
D("Rings %u-%u not in range (have %d rings)",
nm_prerr("Rings %u-%u not in range (have %d rings)",
i, i + req_cpus, nma_get_nrings(na, NR_RX));
return EINVAL;
}
@ -809,7 +718,7 @@ get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
/* Poll all the rings using a core specified by nr_first_cpu_id.
* the number of cores must be 1. */
if (req_cpus != 1) {
D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
"(was %d)", req_cpus);
return EINVAL;
}
@ -817,7 +726,7 @@ get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
qlast = nma_get_nrings(na, NR_RX);
core_from = i;
} else {
D("Invalid polling mode");
nm_prerr("Invalid polling mode");
return EINVAL;
}
@ -826,7 +735,7 @@ get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
bps->qlast = qlast;
bps->cpu_from = core_from;
bps->ncpus = req_cpus;
D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u",
req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
"MULTI" : "SINGLE",
qfirst, qlast, core_from, req_cpus);
@ -842,7 +751,7 @@ nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *
bna = (struct netmap_bwrap_adapter *)na;
if (bna->na_polling_state) {
D("ERROR adapter already in polling mode");
nm_prerr("ERROR adapter already in polling mode");
return EFAULT;
}
@ -871,7 +780,7 @@ nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *
/* start kthread now */
error = nm_bdg_polling_start_kthreads(bps);
if (error) {
D("ERROR nm_bdg_polling_start_kthread()");
nm_prerr("ERROR nm_bdg_polling_start_kthread()");
nm_os_free(bps->kthreads);
nm_os_free(bps);
bna->na_polling_state = NULL;
@ -887,7 +796,7 @@ nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
struct nm_bdg_polling_state *bps;
if (!bna->na_polling_state) {
D("ERROR adapter is not in polling mode");
nm_prerr("ERROR adapter is not in polling mode");
return EFAULT;
}
bps = bna->na_polling_state;
@ -932,86 +841,6 @@ nm_bdg_polling(struct nmreq_header *hdr)
return error;
}
/* Process NETMAP_REQ_VALE_LIST. */
int
netmap_bdg_list(struct nmreq_header *hdr)
{
struct nmreq_vale_list *req =
(struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
int namelen = strlen(hdr->nr_name);
struct nm_bridge *b, *bridges;
struct netmap_vp_adapter *vpna;
int error = 0, i, j;
u_int num_bridges;
netmap_bns_getbridges(&bridges, &num_bridges);
/* this is used to enumerate bridges and ports */
if (namelen) { /* look up indexes of bridge and port */
if (strncmp(hdr->nr_name, NM_BDG_NAME,
strlen(NM_BDG_NAME))) {
return EINVAL;
}
NMG_LOCK();
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (!b) {
NMG_UNLOCK();
return ENOENT;
}
req->nr_bridge_idx = b - bridges; /* bridge index */
req->nr_port_idx = NM_BDG_NOPORT;
for (j = 0; j < b->bdg_active_ports; j++) {
i = b->bdg_port_index[j];
vpna = b->bdg_ports[i];
if (vpna == NULL) {
D("This should not happen");
continue;
}
/* the former and the latter identify a
* virtual port and a NIC, respectively
*/
if (!strcmp(vpna->up.name, hdr->nr_name)) {
req->nr_port_idx = i; /* port index */
break;
}
}
NMG_UNLOCK();
} else {
/* return the first non-empty entry starting from
* bridge nr_arg1 and port nr_arg2.
*
* Users can detect the end of the same bridge by
* seeing the new and old value of nr_arg1, and can
* detect the end of all the bridge by error != 0
*/
i = req->nr_bridge_idx;
j = req->nr_port_idx;
NMG_LOCK();
for (error = ENOENT; i < NM_BRIDGES; i++) {
b = bridges + i;
for ( ; j < NM_BDG_MAXPORTS; j++) {
if (b->bdg_ports[j] == NULL)
continue;
vpna = b->bdg_ports[j];
/* write back the VALE switch name */
strncpy(hdr->nr_name, vpna->up.name,
(size_t)IFNAMSIZ);
error = 0;
goto out;
}
j = 0; /* following bridges scan from 0 */
}
out:
req->nr_bridge_idx = i;
req->nr_port_idx = j;
NMG_UNLOCK();
}
return error;
}
/* Called by external kernel modules (e.g., Openvswitch).
* to set configure/lookup/dtor functions of a VALE instance.
* Register callbacks to the given bridge. 'name' may be just
@ -1041,12 +870,19 @@ netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *privat
if (!bdg_ops) {
/* resetting the bridge */
bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
b->bdg_ops = NULL;
b->bdg_ops = b->bdg_saved_ops;
b->private_data = b->ht;
} else {
/* modifying the bridge */
b->private_data = private_data;
b->bdg_ops = bdg_ops;
#define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m
nm_bdg_override(lookup);
nm_bdg_override(config);
nm_bdg_override(dtor);
nm_bdg_override(vp_create);
nm_bdg_override(bwrap_attach);
#undef nm_bdg_override
}
BDG_WUNLOCK(b);
@ -1071,8 +907,8 @@ netmap_bdg_config(struct nm_ifreq *nr)
NMG_UNLOCK();
/* Don't call config() with NMG_LOCK() held */
BDG_RLOCK(b);
if (b->bdg_ops->config != NULL)
error = b->bdg_ops->config(nr);
if (b->bdg_ops.config != NULL)
error = b->bdg_ops.config(nr);
BDG_RUNLOCK(b);
return error;
}
@ -1137,7 +973,7 @@ netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
int n;
if (head > lim) {
D("ouch dangerous reset!!!");
nm_prerr("ouch dangerous reset!!!");
n = netmap_ring_reinit(kring);
goto done;
}
@ -1154,7 +990,7 @@ netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
void *addr = NMB(na, slot);
if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
D("bad buffer index %d, ignore ?",
nm_prerr("bad buffer index %d, ignore ?",
slot->buf_idx);
}
slot->flags &= ~NS_BUF_CHANGED;
@ -1283,8 +1119,8 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
int ret = NM_IRQ_COMPLETED;
int error;
if (netmap_verbose)
D("%s %s 0x%x", na->name, kring->name, flags);
if (netmap_debug & NM_DEBUG_RXINTR)
nm_prinf("%s %s 0x%x", na->name, kring->name, flags);
bkring = vpna->up.tx_rings[ring_nr];
@ -1293,8 +1129,8 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
return EIO;
}
if (netmap_verbose)
D("%s head %d cur %d tail %d", na->name,
if (netmap_debug & NM_DEBUG_RXINTR)
nm_prinf("%s head %d cur %d tail %d", na->name,
kring->rhead, kring->rcur, kring->rtail);
/* simulate a user wakeup on the rx ring
@ -1305,7 +1141,7 @@ netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
goto put_out;
if (kring->nr_hwcur == kring->nr_hwtail) {
if (netmap_verbose)
D("how strange, interrupt with no packets on %s",
nm_prerr("how strange, interrupt with no packets on %s",
na->name);
goto put_out;
}
@ -1593,8 +1429,8 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
na->name, ring_n,
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
ring->head, ring->cur, ring->tail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
kring->rhead, kring->rcur, kring->rtail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
/* second step: the new packets are sent on the tx ring
* (which is actually the same ring)
*/
@ -1612,7 +1448,7 @@ netmap_bwrap_notify(struct netmap_kring *kring, int flags)
ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
na->name, ring_n,
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
ring->head, ring->cur, ring->tail,
kring->rhead, kring->rcur, kring->rtail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
put_out:
nm_kr_put(hw_kring);
@ -1688,7 +1524,7 @@ netmap_bwrap_attach_common(struct netmap_adapter *na,
/* make sure the NIC is not already in use */
if (NETMAP_OWNED_BY_ANY(hwna)) {
D("NIC %s busy, cannot attach to bridge", hwna->name);
nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name);
return EBUSY;
}
@ -1756,6 +1592,8 @@ netmap_bwrap_attach_common(struct netmap_adapter *na,
hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
}
if (hwna->na_flags & NAF_MOREFRAG)
na->na_flags |= NAF_MOREFRAG;
ND("%s<->%s txr %d txd %d rxr %d rxd %d",
na->name, ifp->if_xname,

View File

@ -44,6 +44,40 @@
#endif /* __FreeBSD__ */
/*
* The following bridge-related functions are used by other
* kernel modules.
*
* VALE only supports unicast or broadcast. The lookup
* function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate
* drop.
*/
typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
struct netmap_vp_adapter *, void *private_data);
typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error);
typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr,
struct ifnet *ifp, struct netmap_mem_d *nmd,
struct netmap_vp_adapter **ret);
typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna);
struct netmap_bdg_ops {
bdg_lookup_fn_t lookup;
bdg_config_fn_t config;
bdg_dtor_fn_t dtor;
bdg_vp_create_fn_t vp_create;
bdg_bwrap_attach_fn_t bwrap_attach;
char name[IFNAMSIZ];
};
int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *);
int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
#define NM_BRIDGES 8 /* number of bridges */
#define NM_BDG_MAXPORTS 254 /* up to 254 */
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
/* XXX Should go away after fixing find_bridge() - Michio */
#define NM_BDG_HASH 1024 /* forwarding table entries */
@ -95,7 +129,8 @@ struct nm_bridge {
* different ring index.
* The function is set by netmap_bdg_regops().
*/
struct netmap_bdg_ops *bdg_ops;
struct netmap_bdg_ops bdg_ops;
struct netmap_bdg_ops bdg_saved_ops;
/*
* Contains the data structure used by the bdg_ops.lookup function.
@ -111,6 +146,7 @@ struct nm_bridge {
*/
#define NM_BDG_ACTIVE 1
#define NM_BDG_EXCLUSIVE 2
#define NM_BDG_NEED_BWRAP 4
uint8_t bdg_flags;
@ -149,6 +185,13 @@ int netmap_bwrap_attach_common(struct netmap_adapter *na,
struct netmap_adapter *hwna);
int netmap_bwrap_krings_create_common(struct netmap_adapter *na);
void netmap_bwrap_krings_delete_common(struct netmap_adapter *na);
struct nm_bridge *netmap_init_bridges2(u_int);
void netmap_uninit_bridges2(struct nm_bridge *, u_int);
int netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
void *callback_data, void *auth_token);
int netmap_bdg_config(struct nm_ifreq *nifr);
int nm_is_bwrap(struct netmap_adapter *);
#define NM_NEED_BWRAP (-2)
#endif /* _NET_NETMAP_BDG_H_ */

View File

@ -735,9 +735,9 @@ out:
}
#endif /* WITH_EXTMEM */
/* ======================== PTNETMAP SUPPORT ========================== */
/* ================== PTNETMAP GUEST SUPPORT ==================== */
#ifdef WITH_PTNETMAP_GUEST
#ifdef WITH_PTNETMAP
#include <sys/bus.h>
#include <sys/rman.h>
#include <machine/bus.h> /* bus_dmamap_* */
@ -932,7 +932,7 @@ ptn_memdev_shutdown(device_t dev)
return bus_generic_shutdown(dev);
}
#endif /* WITH_PTNETMAP_GUEST */
#endif /* WITH_PTNETMAP */
/*
* In order to track whether pages are still mapped, we hook into
@ -1145,8 +1145,8 @@ nm_os_ncpus(void)
}
struct nm_kctx_ctx {
struct thread *user_td; /* thread user-space (kthread creator) to send ioctl */
struct ptnetmap_cfgentry_bhyve cfg;
/* Userspace thread (kthread creator). */
struct thread *user_td;
/* worker function and parameter */
nm_kctx_worker_fn_t worker_fn;
@ -1161,56 +1161,17 @@ struct nm_kctx_ctx {
struct nm_kctx {
struct thread *worker;
struct mtx worker_lock;
uint64_t scheduled; /* pending wake_up request */
struct nm_kctx_ctx worker_ctx;
int run; /* used to stop kthread */
int attach_user; /* kthread attached to user_process */
int affinity;
};
void inline
nm_os_kctx_worker_wakeup(struct nm_kctx *nmk)
{
/*
* There may be a race between FE and BE,
* which call both this function, and worker kthread,
* that reads nmk->scheduled.
*
* For us it is not important the counter value,
* but simply that it has changed since the last
* time the kthread saw it.
*/
mtx_lock(&nmk->worker_lock);
nmk->scheduled++;
if (nmk->worker_ctx.cfg.wchan) {
wakeup((void *)(uintptr_t)nmk->worker_ctx.cfg.wchan);
}
mtx_unlock(&nmk->worker_lock);
}
void inline
nm_os_kctx_send_irq(struct nm_kctx *nmk)
{
struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
int err;
if (ctx->user_td && ctx->cfg.ioctl_fd > 0) {
err = kern_ioctl(ctx->user_td, ctx->cfg.ioctl_fd, ctx->cfg.ioctl_cmd,
(caddr_t)&ctx->cfg.ioctl_data);
if (err) {
D("kern_ioctl error: %d ioctl parameters: fd %d com %lu data %p",
err, ctx->cfg.ioctl_fd, (unsigned long)ctx->cfg.ioctl_cmd,
&ctx->cfg.ioctl_data);
}
}
}
static void
nm_kctx_worker(void *data)
{
struct nm_kctx *nmk = data;
struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
uint64_t old_scheduled = nmk->scheduled;
if (nmk->affinity >= 0) {
thread_lock(curthread);
@ -1231,30 +1192,8 @@ nm_kctx_worker(void *data)
kthread_suspend_check();
}
/*
* if wchan is not defined, we don't have notification
* mechanism and we continually execute worker_fn()
*/
if (!ctx->cfg.wchan) {
ctx->worker_fn(ctx->worker_private, 1); /* worker body */
} else {
/* checks if there is a pending notification */
mtx_lock(&nmk->worker_lock);
if (likely(nmk->scheduled != old_scheduled)) {
old_scheduled = nmk->scheduled;
mtx_unlock(&nmk->worker_lock);
ctx->worker_fn(ctx->worker_private, 1); /* worker body */
continue;
} else if (nmk->run) {
/* wait on event with one second timeout */
msleep((void *)(uintptr_t)ctx->cfg.wchan, &nmk->worker_lock,
0, "nmk_ev", hz);
nmk->scheduled++;
}
mtx_unlock(&nmk->worker_lock);
}
/* Continuously execute worker process. */
ctx->worker_fn(ctx->worker_private); /* worker body */
}
kthread_exit();
@ -1284,11 +1223,6 @@ nm_os_kctx_create(struct nm_kctx_cfg *cfg, void *opaque)
/* attach kthread to user process (ptnetmap) */
nmk->attach_user = cfg->attach_user;
/* store kick/interrupt configuration */
if (opaque) {
nmk->worker_ctx.cfg = *((struct ptnetmap_cfgentry_bhyve *)opaque);
}
return nmk;
}
@ -1298,9 +1232,13 @@ nm_os_kctx_worker_start(struct nm_kctx *nmk)
struct proc *p = NULL;
int error = 0;
if (nmk->worker) {
/* Temporarily disable this function as it is currently broken
* and causes kernel crashes. The failure can be triggered by
* the "vale_polling_enable_disable" test in ctrl-api-test.c. */
return EOPNOTSUPP;
if (nmk->worker)
return EBUSY;
}
/* check if we want to attach kthread to user process */
if (nmk->attach_user) {
@ -1329,15 +1267,14 @@ err:
void
nm_os_kctx_worker_stop(struct nm_kctx *nmk)
{
if (!nmk->worker) {
if (!nmk->worker)
return;
}
/* tell to kthread to exit from main loop */
nmk->run = 0;
/* wake up kthread if it sleeps */
kthread_resume(nmk->worker);
nm_os_kctx_worker_wakeup(nmk);
nmk->worker = NULL;
}
@ -1347,11 +1284,9 @@ nm_os_kctx_destroy(struct nm_kctx *nmk)
{
if (!nmk)
return;
if (nmk->worker) {
nm_os_kctx_worker_stop(nmk);
}
memset(&nmk->worker_ctx.cfg, 0, sizeof(nmk->worker_ctx.cfg));
if (nmk->worker)
nm_os_kctx_worker_stop(nmk);
free(nmk, M_DEVBUF);
}

View File

@ -81,7 +81,6 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */
// XXX temporary - D() defined here
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
@ -179,7 +178,7 @@ static void rate_callback(unsigned long arg)
r = mod_timer(&ctx->timer, jiffies +
msecs_to_jiffies(RATE_PERIOD * 1000));
if (unlikely(r))
D("[v1000] Error: mod_timer()");
nm_prerr("mod_timer() failed");
}
static struct rate_context rate_ctx;
@ -240,14 +239,14 @@ generic_netmap_unregister(struct netmap_adapter *na)
for_each_rx_kring_h(r, kring, na) {
if (nm_kring_pending_off(kring)) {
D("Emulated adapter: ring '%s' deactivated", kring->name);
nm_prinf("Emulated adapter: ring '%s' deactivated", kring->name);
kring->nr_mode = NKR_NETMAP_OFF;
}
}
for_each_tx_kring_h(r, kring, na) {
if (nm_kring_pending_off(kring)) {
kring->nr_mode = NKR_NETMAP_OFF;
D("Emulated adapter: ring '%s' deactivated", kring->name);
nm_prinf("Emulated adapter: ring '%s' deactivated", kring->name);
}
}
@ -300,11 +299,11 @@ generic_netmap_unregister(struct netmap_adapter *na)
#ifdef RATE_GENERIC
if (--rate_ctx.refcount == 0) {
D("del_timer()");
nm_prinf("del_timer()");
del_timer(&rate_ctx.timer);
}
#endif
D("Emulated adapter for %s deactivated", na->name);
nm_prinf("Emulated adapter for %s deactivated", na->name);
}
return 0;
@ -329,14 +328,14 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
}
if (na->active_fds == 0) {
D("Emulated adapter for %s activated", na->name);
nm_prinf("Emulated adapter for %s activated", na->name);
/* Do all memory allocations when (na->active_fds == 0), to
* simplify error management. */
/* Allocate memory for mitigation support on all the rx queues. */
gna->mit = nm_os_malloc(na->num_rx_rings * sizeof(struct nm_generic_mit));
if (!gna->mit) {
D("mitigation allocation failed");
nm_prerr("mitigation allocation failed");
error = ENOMEM;
goto out;
}
@ -363,7 +362,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
kring->tx_pool =
nm_os_malloc(na->num_tx_desc * sizeof(struct mbuf *));
if (!kring->tx_pool) {
D("tx_pool allocation failed");
nm_prerr("tx_pool allocation failed");
error = ENOMEM;
goto free_tx_pools;
}
@ -374,14 +373,14 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
for_each_rx_kring_h(r, kring, na) {
if (nm_kring_pending_on(kring)) {
D("Emulated adapter: ring '%s' activated", kring->name);
nm_prinf("Emulated adapter: ring '%s' activated", kring->name);
kring->nr_mode = NKR_NETMAP_ON;
}
}
for_each_tx_kring_h(r, kring, na) {
if (nm_kring_pending_on(kring)) {
D("Emulated adapter: ring '%s' activated", kring->name);
nm_prinf("Emulated adapter: ring '%s' activated", kring->name);
kring->nr_mode = NKR_NETMAP_ON;
}
}
@ -399,14 +398,14 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
/* Prepare to intercept incoming traffic. */
error = nm_os_catch_rx(gna, 1);
if (error) {
D("nm_os_catch_rx(1) failed (%d)", error);
nm_prerr("nm_os_catch_rx(1) failed (%d)", error);
goto free_tx_pools;
}
/* Let netmap control the packet steering. */
error = nm_os_catch_tx(gna, 1);
if (error) {
D("nm_os_catch_tx(1) failed (%d)", error);
nm_prerr("nm_os_catch_tx(1) failed (%d)", error);
goto catch_rx;
}
@ -414,11 +413,11 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
#ifdef RATE_GENERIC
if (rate_ctx.refcount == 0) {
D("setup_timer()");
nm_prinf("setup_timer()");
memset(&rate_ctx, 0, sizeof(rate_ctx));
setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
D("Error: mod_timer()");
nm_prerr("Error: mod_timer()");
}
}
rate_ctx.refcount++;
@ -462,7 +461,7 @@ generic_mbuf_destructor(struct mbuf *m)
unsigned int r_orig = r;
if (unlikely(!nm_netmap_on(na) || r >= na->num_tx_rings)) {
D("Error: no netmap adapter on device %p",
nm_prerr("Error: no netmap adapter on device %p",
GEN_TX_MBUF_IFP(m));
return;
}
@ -488,7 +487,7 @@ generic_mbuf_destructor(struct mbuf *m)
if (match) {
if (r != r_orig) {
RD(1, "event %p migrated: ring %u --> %u",
nm_prlim(1, "event %p migrated: ring %u --> %u",
m, r_orig, r);
}
break;
@ -497,7 +496,7 @@ generic_mbuf_destructor(struct mbuf *m)
if (++r == na->num_tx_rings) r = 0;
if (r == r_orig) {
RD(1, "Cannot match event %p", m);
nm_prlim(1, "Cannot match event %p", m);
return;
}
}
@ -528,7 +527,7 @@ generic_netmap_tx_clean(struct netmap_kring *kring, int txqdisc)
u_int n = 0;
struct mbuf **tx_pool = kring->tx_pool;
ND("hwcur = %d, hwtail = %d", kring->nr_hwcur, kring->nr_hwtail);
nm_prdis("hwcur = %d, hwtail = %d", kring->nr_hwcur, kring->nr_hwtail);
while (nm_i != hwcur) { /* buffers not completed */
struct mbuf *m = tx_pool[nm_i];
@ -537,7 +536,7 @@ generic_netmap_tx_clean(struct netmap_kring *kring, int txqdisc)
if (m == NULL) {
/* Nothing to do, this is going
* to be replenished. */
RD(3, "Is this happening?");
nm_prlim(3, "Is this happening?");
} else if (MBUF_QUEUED(m)) {
break; /* Not dequeued yet. */
@ -576,7 +575,7 @@ generic_netmap_tx_clean(struct netmap_kring *kring, int txqdisc)
nm_i = nm_next(nm_i, lim);
}
kring->nr_hwtail = nm_prev(nm_i, lim);
ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
nm_prdis("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
return n;
}
@ -598,7 +597,7 @@ ring_middle(u_int inf, u_int sup, u_int lim)
}
if (unlikely(e >= n)) {
D("This cannot happen");
nm_prerr("This cannot happen");
e = 0;
}
@ -654,7 +653,7 @@ generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
kring->tx_pool[e] = NULL;
ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? MBUF_REFCNT(m) : -2 );
nm_prdis("Request Event at %d mbuf %p refcnt %d", e, m, m ? MBUF_REFCNT(m) : -2 );
/* Decrement the refcount. This will free it if we lose the race
* with the driver. */
@ -699,7 +698,7 @@ generic_netmap_txsync(struct netmap_kring *kring, int flags)
* but only when cur == hwtail, which means that the
* client is going to block. */
event = ring_middle(nm_i, head, lim);
ND(3, "Place txqdisc event (hwcur=%u,event=%u,"
nm_prdis("Place txqdisc event (hwcur=%u,event=%u,"
"head=%u,hwtail=%u)", nm_i, event, head,
kring->nr_hwtail);
}
@ -725,7 +724,7 @@ generic_netmap_txsync(struct netmap_kring *kring, int flags)
kring->tx_pool[nm_i] = m =
nm_os_get_mbuf(ifp, NETMAP_BUF_SIZE(na));
if (m == NULL) {
RD(2, "Failed to replenish mbuf");
nm_prlim(2, "Failed to replenish mbuf");
/* Here we could schedule a timer which
* retries to replenish after a while,
* and notifies the client when it
@ -854,7 +853,7 @@ generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
/* This may happen when GRO/LRO features are enabled for
* the NIC driver when the generic adapter does not
* support RX scatter-gather. */
RD(2, "Warning: driver pushed up big packet "
nm_prlim(2, "Warning: driver pushed up big packet "
"(size=%d)", (int)MBUF_LEN(m));
m_freem(m);
} else if (unlikely(mbq_len(&kring->rx_queue) > 1024)) {
@ -1048,7 +1047,7 @@ generic_netmap_dtor(struct netmap_adapter *na)
*/
netmap_adapter_put(prev_na);
}
D("Native netmap adapter %p restored", prev_na);
nm_prinf("Native netmap adapter %p restored", prev_na);
}
NM_RESTORE_NA(ifp, prev_na);
/*
@ -1056,7 +1055,7 @@ generic_netmap_dtor(struct netmap_adapter *na)
* overrides WNA(ifp) if na->ifp is not NULL.
*/
na->ifp = NULL;
D("Emulated netmap adapter for %s destroyed", na->name);
nm_prinf("Emulated netmap adapter for %s destroyed", na->name);
}
int
@ -1086,7 +1085,7 @@ generic_netmap_attach(struct ifnet *ifp)
#ifdef __FreeBSD__
if (ifp->if_type == IFT_LOOP) {
D("if_loop is not supported by %s", __func__);
nm_prerr("if_loop is not supported by %s", __func__);
return EINVAL;
}
#endif
@ -1096,26 +1095,25 @@ generic_netmap_attach(struct ifnet *ifp)
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
* instance when also PF_RING is in use. */
D("Error: netmap adapter hook is busy");
nm_prerr("Error: netmap adapter hook is busy");
return EBUSY;
}
num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
nm_os_generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */
ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
if (num_tx_desc == 0 || num_rx_desc == 0) {
D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
nm_prerr("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
return EINVAL;
}
gna = nm_os_malloc(sizeof(*gna));
if (gna == NULL) {
D("no memory on attach, give up");
nm_prerr("no memory on attach, give up");
return ENOMEM;
}
na = (struct netmap_adapter *)gna;
strncpy(na->name, ifp->if_xname, sizeof(na->name));
strlcpy(na->name, ifp->if_xname, sizeof(na->name));
na->ifp = ifp;
na->num_tx_desc = num_tx_desc;
na->num_rx_desc = num_rx_desc;
@ -1129,10 +1127,10 @@ generic_netmap_attach(struct ifnet *ifp)
*/
na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
nm_prdis("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
ifp->num_tx_queues, ifp->real_num_tx_queues,
ifp->tx_queue_len);
ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
nm_prdis("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
ifp->num_rx_queues, ifp->real_num_rx_queues);
nm_os_generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
@ -1151,7 +1149,7 @@ generic_netmap_attach(struct ifnet *ifp)
nm_os_generic_set_features(gna);
D("Emulated adapter for %s created (prev was %p)", na->name, gna->prev);
nm_prinf("Emulated adapter for %s created (prev was %p)", na->name, gna->prev);
return retval;
}

View File

@ -54,30 +54,31 @@
#if defined(CONFIG_NETMAP_GENERIC)
#define WITH_GENERIC
#endif
#if defined(CONFIG_NETMAP_PTNETMAP_GUEST)
#define WITH_PTNETMAP_GUEST
#endif
#if defined(CONFIG_NETMAP_PTNETMAP_HOST)
#define WITH_PTNETMAP_HOST
#if defined(CONFIG_NETMAP_PTNETMAP)
#define WITH_PTNETMAP
#endif
#if defined(CONFIG_NETMAP_SINK)
#define WITH_SINK
#endif
#if defined(CONFIG_NETMAP_NULL)
#define WITH_NMNULL
#endif
#elif defined (_WIN32)
#define WITH_VALE // comment out to disable VALE support
#define WITH_PIPES
#define WITH_MONITOR
#define WITH_GENERIC
#define WITH_NMNULL
#else /* neither linux nor windows */
#define WITH_VALE // comment out to disable VALE support
#define WITH_PIPES
#define WITH_MONITOR
#define WITH_GENERIC
#define WITH_PTNETMAP_HOST /* ptnetmap host support */
#define WITH_PTNETMAP_GUEST /* ptnetmap guest support */
#define WITH_PTNETMAP /* ptnetmap guest support */
#define WITH_EXTMEM
#define WITH_NMNULL
#endif
#if defined(__FreeBSD__)
@ -239,28 +240,39 @@ typedef struct hrtimer{
#define NMG_LOCK_ASSERT() NM_MTX_ASSERT(netmap_global_lock)
#if defined(__FreeBSD__)
#define nm_prerr printf
#define nm_prinf printf
#define nm_prerr_int printf
#define nm_prinf_int printf
#elif defined (_WIN32)
#define nm_prerr DbgPrint
#define nm_prinf DbgPrint
#define nm_prerr_int DbgPrint
#define nm_prinf_int DbgPrint
#elif defined(linux)
#define nm_prerr(fmt, arg...) printk(KERN_ERR fmt, ##arg)
#define nm_prinf(fmt, arg...) printk(KERN_INFO fmt, ##arg)
#define nm_prerr_int(fmt, arg...) printk(KERN_ERR fmt, ##arg)
#define nm_prinf_int(fmt, arg...) printk(KERN_INFO fmt, ##arg)
#endif
#define ND(format, ...)
#define D(format, ...) \
#define nm_prinf(format, ...) \
do { \
struct timeval __xxts; \
microtime(&__xxts); \
nm_prerr("%03d.%06d [%4d] %-25s " format "\n", \
nm_prinf_int("%03d.%06d [%4d] %-25s " format "\n",\
(int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
__LINE__, __FUNCTION__, ##__VA_ARGS__); \
} while (0)
/* rate limited, lps indicates how many per second */
#define RD(lps, format, ...) \
#define nm_prerr(format, ...) \
do { \
struct timeval __xxts; \
microtime(&__xxts); \
nm_prerr_int("%03d.%06d [%4d] %-25s " format "\n",\
(int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
__LINE__, __FUNCTION__, ##__VA_ARGS__); \
} while (0)
/* Disabled printf (used to be ND). */
#define nm_prdis(format, ...)
/* Rate limited, lps indicates how many per second. */
#define nm_prlim(lps, format, ...) \
do { \
static int t0, __cnt; \
if (t0 != time_second) { \
@ -268,9 +280,14 @@ typedef struct hrtimer{
__cnt = 0; \
} \
if (__cnt++ < lps) \
D(format, ##__VA_ARGS__); \
nm_prinf(format, ##__VA_ARGS__); \
} while (0)
/* Old macros. */
#define ND nm_prdis
#define D nm_prerr
#define RD nm_prlim
struct netmap_adapter;
struct nm_bdg_fwd;
struct nm_bridge;
@ -700,7 +717,7 @@ struct netmap_adapter {
*/
#define NAF_HOST_RINGS 64 /* the adapter supports the host rings */
#define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */
#define NAF_PTNETMAP_HOST 256 /* the adapter supports ptnetmap in the host */
/* free */
#define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */
#define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */
#define NAF_BUSY (1U<<31) /* the adapter is used internally and
@ -718,9 +735,9 @@ struct netmap_adapter {
u_int num_tx_desc; /* number of descriptor in each queue */
u_int num_rx_desc;
/* tx_rings and rx_rings are private but allocated
* as a contiguous chunk of memory. Each array has
* N+1 entries, for the adapter queues and for the host queue.
/* tx_rings and rx_rings are private but allocated as a
* contiguous chunk of memory. Each array has N+K entries,
* N for the hardware rings and K for the host rings.
*/
struct netmap_kring **tx_rings; /* array of TX rings. */
struct netmap_kring **rx_rings; /* array of RX rings. */
@ -1080,12 +1097,12 @@ struct netmap_bwrap_adapter {
*/
struct netmap_vp_adapter *saved_na_vp;
};
int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token);
int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token);
int nm_bdg_polling(struct nmreq_header *hdr);
int netmap_bdg_list(struct nmreq_header *hdr);
#ifdef WITH_VALE
int netmap_vale_attach(struct nmreq_header *hdr, void *auth_token);
int netmap_vale_detach(struct nmreq_header *hdr, void *auth_token);
int netmap_vale_list(struct nmreq_header *hdr);
int netmap_vi_create(struct nmreq_header *hdr, int);
int nm_vi_create(struct nmreq_header *);
int nm_vi_destroy(const char *name);
@ -1115,6 +1132,12 @@ struct netmap_pipe_adapter {
#endif /* WITH_PIPES */
#ifdef WITH_NMNULL
struct netmap_null_adapter {
struct netmap_adapter up;
};
#endif /* WITH_NMNULL */
/* return slots reserved to rx clients; used in drivers */
static inline uint32_t
@ -1442,51 +1465,8 @@ void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp);
int netmap_get_hw_na(struct ifnet *ifp,
struct netmap_mem_d *nmd, struct netmap_adapter **na);
/*
* The following bridge-related functions are used by other
* kernel modules.
*
* VALE only supports unicast or broadcast. The lookup
* function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
* NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate
* drop.
*/
typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
struct netmap_vp_adapter *, void *private_data);
typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error);
typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr,
struct ifnet *ifp, struct netmap_mem_d *nmd,
struct netmap_vp_adapter **ret);
typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna);
struct netmap_bdg_ops {
bdg_lookup_fn_t lookup;
bdg_config_fn_t config;
bdg_dtor_fn_t dtor;
bdg_vp_create_fn_t vp_create;
bdg_bwrap_attach_fn_t bwrap_attach;
char name[IFNAMSIZ];
};
int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *);
int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
#define NM_BRIDGES 8 /* number of bridges */
#define NM_BDG_MAXPORTS 254 /* up to 254 */
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
struct nm_bridge *netmap_init_bridges2(u_int);
void netmap_uninit_bridges2(struct nm_bridge *, u_int);
int netmap_init_bridges(void);
void netmap_uninit_bridges(void);
int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
void *callback_data, void *auth_token);
int netmap_bdg_config(struct nm_ifreq *nifr);
#ifdef WITH_VALE
uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
uint32_t netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
struct netmap_vp_adapter *, void *private_data);
/* these are redefined in case of no VALE support */
@ -1525,11 +1505,20 @@ void netmap_monitor_stop(struct netmap_adapter *na);
(((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
#endif
#ifdef WITH_NMNULL
int netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create);
#else /* !WITH_NMNULL */
#define netmap_get_null_na(hdr, _2, _3, _4) \
(((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
#endif /* WITH_NMNULL */
#ifdef CONFIG_NET_NS
struct net *netmap_bns_get(void);
void netmap_bns_put(struct net *);
void netmap_bns_getbridges(struct nm_bridge **, u_int *);
#else
extern struct nm_bridge *nm_bridges;
#define netmap_bns_get()
#define netmap_bns_put(_1)
#define netmap_bns_getbridges(b, n) \
@ -1591,16 +1580,24 @@ int netmap_adapter_put(struct netmap_adapter *na);
#define NETMAP_BUF_SIZE(_na) ((_na)->na_lut.objsize)
extern int netmap_no_pendintr;
extern int netmap_mitigate;
extern int netmap_verbose; /* for debugging */
enum { /* verbose flags */
NM_VERB_ON = 1, /* generic verbose */
NM_VERB_HOST = 0x2, /* verbose host stack */
NM_VERB_RXSYNC = 0x10, /* verbose on rxsync/txsync */
NM_VERB_TXSYNC = 0x20,
NM_VERB_RXINTR = 0x100, /* verbose on rx/tx intr (driver) */
NM_VERB_TXINTR = 0x200,
NM_VERB_NIC_RXSYNC = 0x1000, /* verbose on rx/tx intr (driver) */
NM_VERB_NIC_TXSYNC = 0x2000,
extern int netmap_verbose;
#ifdef CONFIG_NETMAP_DEBUG
extern int netmap_debug; /* for debugging */
#else /* !CONFIG_NETMAP_DEBUG */
#define netmap_debug (0)
#endif /* !CONFIG_NETMAP_DEBUG */
enum { /* debug flags */
NM_DEBUG_ON = 1, /* generic debug messsages */
NM_DEBUG_HOST = 0x2, /* debug host stack */
NM_DEBUG_RXSYNC = 0x10, /* debug on rxsync/txsync */
NM_DEBUG_TXSYNC = 0x20,
NM_DEBUG_RXINTR = 0x100, /* debug on rx/tx intr (driver) */
NM_DEBUG_TXINTR = 0x200,
NM_DEBUG_NIC_RXSYNC = 0x1000, /* debug on rx/tx intr (driver) */
NM_DEBUG_NIC_TXSYNC = 0x2000,
NM_DEBUG_MEM = 0x4000, /* verbose memory allocations/deallocations */
NM_DEBUG_VALE = 0x8000, /* debug messages from memory allocators */
NM_DEBUG_BDG = NM_DEBUG_VALE,
};
extern int netmap_txsync_retry;
@ -1612,7 +1609,6 @@ extern int netmap_generic_rings;
#ifdef linux
extern int netmap_generic_txqdisc;
#endif
extern int ptnetmap_tx_workers;
/*
* NA returns a pointer to the struct netmap adapter from the ifp.
@ -1809,6 +1805,11 @@ static inline int
netmap_idx_n2k(struct netmap_kring *kr, int idx)
{
int n = kr->nkr_num_slots;
if (likely(kr->nkr_hwofs == 0)) {
return idx;
}
idx += kr->nkr_hwofs;
if (idx < 0)
return idx + n;
@ -1823,6 +1824,11 @@ static inline int
netmap_idx_k2n(struct netmap_kring *kr, int idx)
{
int n = kr->nkr_num_slots;
if (likely(kr->nkr_hwofs == 0)) {
return idx;
}
idx -= kr->nkr_hwofs;
if (idx < 0)
return idx + n;
@ -1911,6 +1917,9 @@ struct netmap_priv_d {
u_int np_qfirst[NR_TXRX],
np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
uint16_t np_txpoll;
uint16_t np_kloop_state; /* use with NMG_LOCK held */
#define NM_SYNC_KLOOP_RUNNING (1 << 0)
#define NM_SYNC_KLOOP_STOPPING (1 << 1)
int np_sync_flags; /* to be passed to nm_sync */
int np_refs; /* use with NMG_LOCK held */
@ -1920,7 +1929,26 @@ struct netmap_priv_d {
* number of rings.
*/
NM_SELINFO_T *np_si[NR_TXRX];
/* In the optional CSB mode, the user must specify the start address
* of two arrays of Communication Status Block (CSB) entries, for the
* two directions (kernel read application write, and kernel write
* application read).
* The number of entries must agree with the number of rings bound to
* the netmap file descriptor. The entries corresponding to the TX
* rings are laid out before the ones corresponding to the RX rings.
*
* Array of CSB entries for application --> kernel communication
* (N entries). */
struct nm_csb_atok *np_csb_atok_base;
/* Array of CSB entries for kernel --> application communication
* (N entries). */
struct nm_csb_ktoa *np_csb_ktoa_base;
struct thread *np_td; /* kqueue, just debugging */
#ifdef linux
struct file *np_filp; /* used by sync kloop */
#endif /* linux */
};
struct netmap_priv_d *netmap_priv_new(void);
@ -1943,6 +1971,14 @@ static inline int nm_kring_pending(struct netmap_priv_d *np)
return 0;
}
/* call with NMG_LOCK held */
static __inline int
nm_si_user(struct netmap_priv_d *priv, enum txrx t)
{
return (priv->np_na != NULL &&
(priv->np_qlast[t] - priv->np_qfirst[t] > 1));
}
#ifdef WITH_PIPES
int netmap_pipe_txsync(struct netmap_kring *txkring, int flags);
int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags);
@ -2143,17 +2179,14 @@ void nm_os_vi_init_index(void);
* kernel thread routines
*/
struct nm_kctx; /* OS-specific kernel context - opaque */
typedef void (*nm_kctx_worker_fn_t)(void *data, int is_kthread);
typedef void (*nm_kctx_notify_fn_t)(void *data);
typedef void (*nm_kctx_worker_fn_t)(void *data);
/* kthread configuration */
struct nm_kctx_cfg {
long type; /* kthread type/identifier */
nm_kctx_worker_fn_t worker_fn; /* worker function */
void *worker_private;/* worker parameter */
nm_kctx_notify_fn_t notify_fn; /* notify function */
int attach_user; /* attach kthread to user process */
int use_kthread; /* use a kthread for the context */
};
/* kthread configuration */
struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg,
@ -2161,47 +2194,24 @@ struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg,
int nm_os_kctx_worker_start(struct nm_kctx *);
void nm_os_kctx_worker_stop(struct nm_kctx *);
void nm_os_kctx_destroy(struct nm_kctx *);
void nm_os_kctx_worker_wakeup(struct nm_kctx *nmk);
void nm_os_kctx_send_irq(struct nm_kctx *);
void nm_os_kctx_worker_setaff(struct nm_kctx *, int);
u_int nm_os_ncpus(void);
#ifdef WITH_PTNETMAP_HOST
int netmap_sync_kloop(struct netmap_priv_d *priv,
struct nmreq_header *hdr);
int netmap_sync_kloop_stop(struct netmap_priv_d *priv);
#ifdef WITH_PTNETMAP
/* ptnetmap guest routines */
/*
* netmap adapter for host ptnetmap ports
* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver
*/
struct netmap_pt_host_adapter {
struct netmap_adapter up;
/* the passed-through adapter */
struct netmap_adapter *parent;
/* parent->na_flags, saved at NETMAP_PT_HOST_CREATE time,
* and restored at NETMAP_PT_HOST_DELETE time */
uint32_t parent_na_flags;
int (*parent_nm_notify)(struct netmap_kring *kring, int flags);
void *ptns;
};
/* ptnetmap host-side routines */
int netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct netmap_mem_d * nmd, int create);
int ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na);
static inline int
nm_ptnetmap_host_on(struct netmap_adapter *na)
{
return na && na->na_flags & NAF_PTNETMAP_HOST;
}
#else /* !WITH_PTNETMAP_HOST */
#define netmap_get_pt_host_na(hdr, _2, _3, _4) \
(((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0)
#define ptnetmap_ctl(_1, _2, _3) EINVAL
#define nm_ptnetmap_host_on(_1) EINVAL
#endif /* !WITH_PTNETMAP_HOST */
#ifdef WITH_PTNETMAP_GUEST
/* ptnetmap GUEST routines */
struct ptnetmap_memdev;
int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **,
uint64_t *);
void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int);
/*
* netmap adapter for guest ptnetmap ports
@ -2218,26 +2228,83 @@ struct netmap_pt_guest_adapter {
* network stack and netmap clients.
* Used to decide when we need (de)allocate krings/rings and
* start (stop) ptnetmap kthreads. */
int backend_regifs;
int backend_users;
};
int netmap_pt_guest_attach(struct netmap_adapter *na,
unsigned int nifp_offset,
unsigned int memid);
struct ptnet_csb_gh;
struct ptnet_csb_hg;
bool netmap_pt_guest_txsync(struct ptnet_csb_gh *ptgh,
struct ptnet_csb_hg *pthg,
struct netmap_kring *kring,
int flags);
bool netmap_pt_guest_rxsync(struct ptnet_csb_gh *ptgh,
struct ptnet_csb_hg *pthg,
bool netmap_pt_guest_txsync(struct nm_csb_atok *atok,
struct nm_csb_ktoa *ktoa,
struct netmap_kring *kring, int flags);
bool netmap_pt_guest_rxsync(struct nm_csb_atok *atok,
struct nm_csb_ktoa *ktoa,
struct netmap_kring *kring, int flags);
int ptnet_nm_krings_create(struct netmap_adapter *na);
void ptnet_nm_krings_delete(struct netmap_adapter *na);
void ptnet_nm_dtor(struct netmap_adapter *na);
#endif /* WITH_PTNETMAP_GUEST */
/* Guest driver: Write kring pointers (cur, head) to the CSB.
* This routine is coupled with ptnetmap_host_read_kring_csb(). */
static inline void
ptnetmap_guest_write_kring_csb(struct nm_csb_atok *atok, uint32_t cur,
uint32_t head)
{
/*
* We need to write cur and head to the CSB but we cannot do it atomically.
* There is no way we can prevent the host from reading the updated value
* of one of the two and the old value of the other. However, if we make
* sure that the host never reads a value of head more recent than the
* value of cur we are safe. We can allow the host to read a value of cur
* more recent than the value of head, since in the netmap ring cur can be
* ahead of head and cur cannot wrap around head because it must be behind
* tail. Inverting the order of writes below could instead result into the
* host to think head went ahead of cur, which would cause the sync
* prologue to fail.
*
* The following memory barrier scheme is used to make this happen:
*
* Guest Host
*
* STORE(cur) LOAD(head)
* mb() <-----------> mb()
* STORE(head) LOAD(cur)
*/
atok->cur = cur;
nm_stst_barrier();
atok->head = head;
}
/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB.
* This routine is coupled with ptnetmap_host_write_kring_csb(). */
static inline void
ptnetmap_guest_read_kring_csb(struct nm_csb_ktoa *ktoa,
struct netmap_kring *kring)
{
/*
* We place a memory barrier to make sure that the update of hwtail never
* overtakes the update of hwcur.
* (see explanation in ptnetmap_host_write_kring_csb).
*/
kring->nr_hwtail = ktoa->hwtail;
nm_stst_barrier();
kring->nr_hwcur = ktoa->hwcur;
}
/* Helper function wrapping ptnetmap_guest_read_kring_csb(). */
static inline void
ptnet_sync_tail(struct nm_csb_ktoa *ktoa, struct netmap_kring *kring)
{
struct netmap_ring *ring = kring->ring;
/* Update hwcur and hwtail as known by the host. */
ptnetmap_guest_read_kring_csb(ktoa, kring);
/* nm_sync_finalize */
ring->tail = kring->rtail = kring->nr_hwtail;
}
#endif /* WITH_PTNETMAP */
#ifdef __FreeBSD__
/*
@ -2355,4 +2422,16 @@ nm_os_get_mbuf(struct ifnet *ifp, int len)
struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t);
int nmreq_checkduplicate(struct nmreq_option *);
int netmap_init_bridges(void);
void netmap_uninit_bridges(void);
/* Functions to read and write CSB fields from the kernel. */
#if defined (linux)
#define CSB_READ(csb, field, r) (get_user(r, &csb->field))
#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
#else /* ! linux */
#define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
#endif /* ! linux */
#endif /* _NET_NETMAP_KERN_H_ */

View File

@ -0,0 +1,916 @@
/*
* Copyright (C) 2016-2018 Vincenzo Maffione
* Copyright (C) 2015 Stefano Garzarella
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* common headers
*/
#if defined(__FreeBSD__)
#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/types.h>
#include <sys/selinfo.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h>
#define usleep_range(_1, _2) \
pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE)
#elif defined(linux)
#include <bsd_glue.h>
#include <linux/file.h>
#include <linux/eventfd.h>
#endif
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <net/netmap_virt.h>
#include <dev/netmap/netmap_mem2.h>
/* Support for eventfd-based notifications. */
#if defined(linux)
#define SYNC_KLOOP_POLL
#endif
/* Write kring pointers (hwcur, hwtail) to the CSB.
* This routine is coupled with ptnetmap_guest_read_kring_csb(). */
static inline void
sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur,
uint32_t hwtail)
{
/*
* The same scheme used in ptnetmap_guest_write_kring_csb() applies here.
* We allow the application to read a value of hwcur more recent than the value
* of hwtail, since this would anyway result in a consistent view of the
* ring state (and hwcur can never wraparound hwtail, since hwcur must be
* behind head).
*
* The following memory barrier scheme is used to make this happen:
*
* Application Kernel
*
* STORE(hwcur) LOAD(hwtail)
* mb() <-------------> mb()
* STORE(hwtail) LOAD(hwcur)
*/
CSB_WRITE(ptr, hwcur, hwcur);
nm_stst_barrier();
CSB_WRITE(ptr, hwtail, hwtail);
}
/* Read kring pointers (head, cur, sync_flags) from the CSB.
* This routine is coupled with ptnetmap_guest_write_kring_csb(). */
static inline void
sync_kloop_kernel_read(struct nm_csb_atok __user *ptr,
struct netmap_ring *shadow_ring,
uint32_t num_slots)
{
/*
* We place a memory barrier to make sure that the update of head never
* overtakes the update of cur.
* (see explanation in ptnetmap_guest_write_kring_csb).
*/
CSB_READ(ptr, head, shadow_ring->head);
nm_stst_barrier();
CSB_READ(ptr, cur, shadow_ring->cur);
CSB_READ(ptr, sync_flags, shadow_ring->flags);
}
/* Enable or disable application --> kernel kicks. */
static inline void
csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val)
{
CSB_WRITE(csb_ktoa, kern_need_kick, val);
}
/* Are application interrupt enabled or disabled? */
static inline uint32_t
csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok)
{
uint32_t v;
CSB_READ(csb_atok, appl_need_kick, v);
return v;
}
static inline void
sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring)
{
nm_prinf("%s - name: %s hwcur: %d hwtail: %d "
"rhead: %d rcur: %d rtail: %d",
title, kring->name, kring->nr_hwcur, kring->nr_hwtail,
kring->rhead, kring->rcur, kring->rtail);
}
struct sync_kloop_ring_args {
struct netmap_kring *kring;
struct nm_csb_atok *csb_atok;
struct nm_csb_ktoa *csb_ktoa;
#ifdef SYNC_KLOOP_POLL
struct eventfd_ctx *irq_ctx;
#endif /* SYNC_KLOOP_POLL */
};
static void
netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a)
{
struct netmap_kring *kring = a->kring;
struct nm_csb_atok *csb_atok = a->csb_atok;
struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
bool more_txspace = false;
uint32_t num_slots;
int batch;
num_slots = kring->nkr_num_slots;
/* Disable application --> kernel notifications. */
csb_ktoa_kick_enable(csb_ktoa, 0);
/* Copy the application kring pointers from the CSB */
sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
for (;;) {
batch = shadow_ring.head - kring->nr_hwcur;
if (batch < 0)
batch += num_slots;
#ifdef PTN_TX_BATCH_LIM
if (batch > PTN_TX_BATCH_LIM(num_slots)) {
/* If application moves ahead too fast, let's cut the move so
* that we don't exceed our batch limit. */
uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots);
if (head_lim >= num_slots)
head_lim -= num_slots;
nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head,
head_lim);
shadow_ring.head = head_lim;
batch = PTN_TX_BATCH_LIM(num_slots);
}
#endif /* PTN_TX_BATCH_LIM */
if (nm_kr_txspace(kring) <= (num_slots >> 1)) {
shadow_ring.flags |= NAF_FORCE_RECLAIM;
}
/* Netmap prologue */
shadow_ring.tail = kring->rtail;
if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) {
/* Reinit ring and enable notifications. */
netmap_ring_reinit(kring);
csb_ktoa_kick_enable(csb_ktoa, 1);
break;
}
if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
sync_kloop_kring_dump("pre txsync", kring);
}
if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
/* Reenable notifications. */
csb_ktoa_kick_enable(csb_ktoa, 1);
nm_prerr("txsync() failed");
break;
}
/*
* Finalize
* Copy kernel hwcur and hwtail into the CSB for the application sync(), and
* do the nm_sync_finalize.
*/
sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur,
kring->nr_hwtail);
if (kring->rtail != kring->nr_hwtail) {
/* Some more room available in the parent adapter. */
kring->rtail = kring->nr_hwtail;
more_txspace = true;
}
if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
sync_kloop_kring_dump("post txsync", kring);
}
/* Interrupt the application if needed. */
#ifdef SYNC_KLOOP_POLL
if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
/* Disable application kick to avoid sending unnecessary kicks */
eventfd_signal(a->irq_ctx, 1);
more_txspace = false;
}
#endif /* SYNC_KLOOP_POLL */
/* Read CSB to see if there is more work to do. */
sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
if (shadow_ring.head == kring->rhead) {
/*
* No more packets to transmit. We enable notifications and
* go to sleep, waiting for a kick from the application when new
* new slots are ready for transmission.
*/
/* Reenable notifications. */
csb_ktoa_kick_enable(csb_ktoa, 1);
/* Doublecheck. */
sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
if (shadow_ring.head != kring->rhead) {
/* We won the race condition, there are more packets to
* transmit. Disable notifications and do another cycle */
csb_ktoa_kick_enable(csb_ktoa, 0);
continue;
}
break;
}
if (nm_kr_txempty(kring)) {
/* No more available TX slots. We stop waiting for a notification
* from the backend (netmap_tx_irq). */
nm_prdis(1, "TX ring");
break;
}
}
#ifdef SYNC_KLOOP_POLL
if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
eventfd_signal(a->irq_ctx, 1);
}
#endif /* SYNC_KLOOP_POLL */
}
/* RX cycle without receive any packets */
#define SYNC_LOOP_RX_DRY_CYCLES_MAX 2
static inline int
sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head)
{
return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head,
kring->nkr_num_slots - 1));
}
static void
netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a)
{
struct netmap_kring *kring = a->kring;
struct nm_csb_atok *csb_atok = a->csb_atok;
struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
int dry_cycles = 0;
bool some_recvd = false;
uint32_t num_slots;
num_slots = kring->nkr_num_slots;
/* Get RX csb_atok and csb_ktoa pointers from the CSB. */
num_slots = kring->nkr_num_slots;
/* Disable notifications. */
csb_ktoa_kick_enable(csb_ktoa, 0);
/* Copy the application kring pointers from the CSB */
sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
for (;;) {
uint32_t hwtail;
/* Netmap prologue */
shadow_ring.tail = kring->rtail;
if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) {
/* Reinit ring and enable notifications. */
netmap_ring_reinit(kring);
csb_ktoa_kick_enable(csb_ktoa, 1);
break;
}
if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
sync_kloop_kring_dump("pre rxsync", kring);
}
if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
/* Reenable notifications. */
csb_ktoa_kick_enable(csb_ktoa, 1);
nm_prerr("rxsync() failed");
break;
}
/*
* Finalize
* Copy kernel hwcur and hwtail into the CSB for the application sync()
*/
hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail);
if (kring->rtail != hwtail) {
kring->rtail = hwtail;
some_recvd = true;
dry_cycles = 0;
} else {
dry_cycles++;
}
if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
sync_kloop_kring_dump("post rxsync", kring);
}
#ifdef SYNC_KLOOP_POLL
/* Interrupt the application if needed. */
if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
/* Disable application kick to avoid sending unnecessary kicks */
eventfd_signal(a->irq_ctx, 1);
some_recvd = false;
}
#endif /* SYNC_KLOOP_POLL */
/* Read CSB to see if there is more work to do. */
sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
if (sync_kloop_norxslots(kring, shadow_ring.head)) {
/*
* No more slots available for reception. We enable notification and
* go to sleep, waiting for a kick from the application when new receive
* slots are available.
*/
/* Reenable notifications. */
csb_ktoa_kick_enable(csb_ktoa, 1);
/* Doublecheck. */
sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
if (!sync_kloop_norxslots(kring, shadow_ring.head)) {
/* We won the race condition, more slots are available. Disable
* notifications and do another cycle. */
csb_ktoa_kick_enable(csb_ktoa, 0);
continue;
}
break;
}
hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
if (unlikely(hwtail == kring->rhead ||
dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) {
/* No more packets to be read from the backend. We stop and
* wait for a notification from the backend (netmap_rx_irq). */
nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d",
hwtail, kring->rhead, dry_cycles);
break;
}
}
nm_kr_put(kring);
#ifdef SYNC_KLOOP_POLL
/* Interrupt the application if needed. */
if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
eventfd_signal(a->irq_ctx, 1);
}
#endif /* SYNC_KLOOP_POLL */
}
#ifdef SYNC_KLOOP_POLL
struct sync_kloop_poll_entry {
/* Support for receiving notifications from
* a netmap ring or from the application. */
struct file *filp;
wait_queue_t wait;
wait_queue_head_t *wqh;
/* Support for sending notifications to the application. */
struct eventfd_ctx *irq_ctx;
struct file *irq_filp;
};
struct sync_kloop_poll_ctx {
poll_table wait_table;
unsigned int next_entry;
unsigned int num_entries;
struct sync_kloop_poll_entry entries[0];
};
static void
sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
struct sync_kloop_poll_ctx *poll_ctx =
container_of(pt, struct sync_kloop_poll_ctx, wait_table);
struct sync_kloop_poll_entry *entry = poll_ctx->entries +
poll_ctx->next_entry;
BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries);
entry->wqh = wqh;
entry->filp = file;
/* Use the default wake up function. */
init_waitqueue_entry(&entry->wait, current);
add_wait_queue(wqh, &entry->wait);
poll_ctx->next_entry++;
}
#endif /* SYNC_KLOOP_POLL */
int
netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr)
{
struct nmreq_sync_kloop_start *req =
(struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body;
struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL;
#ifdef SYNC_KLOOP_POLL
struct sync_kloop_poll_ctx *poll_ctx = NULL;
#endif /* SYNC_KLOOP_POLL */
int num_rx_rings, num_tx_rings, num_rings;
uint32_t sleep_us = req->sleep_us;
struct nm_csb_atok* csb_atok_base;
struct nm_csb_ktoa* csb_ktoa_base;
struct netmap_adapter *na;
struct nmreq_option *opt;
int err = 0;
int i;
if (sleep_us > 1000000) {
/* We do not accept sleeping for more than a second. */
return EINVAL;
}
if (priv->np_nifp == NULL) {
return ENXIO;
}
mb(); /* make sure following reads are not from cache */
na = priv->np_na;
if (!nm_netmap_on(na)) {
return ENXIO;
}
NMG_LOCK();
/* Make sure the application is working in CSB mode. */
if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) {
NMG_UNLOCK();
nm_prerr("sync-kloop on %s requires "
"NETMAP_REQ_OPT_CSB option", na->name);
return EINVAL;
}
csb_atok_base = priv->np_csb_atok_base;
csb_ktoa_base = priv->np_csb_ktoa_base;
/* Make sure that no kloop is currently running. */
if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
err = EBUSY;
}
priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING;
NMG_UNLOCK();
if (err) {
return err;
}
num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX];
num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX];
num_rings = num_tx_rings + num_rx_rings;
/* Validate notification options. */
opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS);
if (opt != NULL) {
err = nmreq_checkduplicate(opt);
if (err) {
opt->nro_status = err;
goto out;
}
if (opt->nro_size != sizeof(*eventfds_opt) +
sizeof(eventfds_opt->eventfds[0]) * num_rings) {
/* Option size not consistent with the number of
* entries. */
opt->nro_status = err = EINVAL;
goto out;
}
#ifdef SYNC_KLOOP_POLL
eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt;
opt->nro_status = 0;
/* We need 2 poll entries for TX and RX notifications coming
* from the netmap adapter, plus one entries per ring for the
* notifications coming from the application. */
poll_ctx = nm_os_malloc(sizeof(*poll_ctx) +
(2 + num_rings) * sizeof(poll_ctx->entries[0]));
init_poll_funcptr(&poll_ctx->wait_table,
sync_kloop_poll_table_queue_proc);
poll_ctx->num_entries = 2 + num_rings;
poll_ctx->next_entry = 0;
/* Poll for notifications coming from the applications through
* eventfds . */
for (i = 0; i < num_rings; i++) {
struct eventfd_ctx *irq;
struct file *filp;
unsigned long mask;
filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd);
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
goto out;
}
mask = filp->f_op->poll(filp, &poll_ctx->wait_table);
if (mask & POLLERR) {
err = EINVAL;
goto out;
}
filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd);
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
goto out;
}
poll_ctx->entries[i].irq_filp = filp;
irq = eventfd_ctx_fileget(filp);
if (IS_ERR(irq)) {
err = PTR_ERR(irq);
goto out;
}
poll_ctx->entries[i].irq_ctx = irq;
}
/* Poll for notifications coming from the netmap rings bound to
* this file descriptor. */
{
NM_SELINFO_T *si[NR_TXRX];
NMG_LOCK();
si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] :
&na->rx_rings[priv->np_qfirst[NR_RX]]->si;
si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] :
&na->tx_rings[priv->np_qfirst[NR_TX]]->si;
NMG_UNLOCK();
poll_wait(priv->np_filp, si[NR_RX], &poll_ctx->wait_table);
poll_wait(priv->np_filp, si[NR_TX], &poll_ctx->wait_table);
}
#else /* SYNC_KLOOP_POLL */
opt->nro_status = EOPNOTSUPP;
goto out;
#endif /* SYNC_KLOOP_POLL */
}
/* Main loop. */
for (;;) {
if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) {
break;
}
#ifdef SYNC_KLOOP_POLL
if (poll_ctx)
__set_current_state(TASK_INTERRUPTIBLE);
#endif /* SYNC_KLOOP_POLL */
/* Process all the TX rings bound to this file descriptor. */
for (i = 0; i < num_tx_rings; i++) {
struct sync_kloop_ring_args a = {
.kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]],
.csb_atok = csb_atok_base + i,
.csb_ktoa = csb_ktoa_base + i,
};
#ifdef SYNC_KLOOP_POLL
if (poll_ctx)
a.irq_ctx = poll_ctx->entries[i].irq_ctx;
#endif /* SYNC_KLOOP_POLL */
if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) {
continue;
}
netmap_sync_kloop_tx_ring(&a);
nm_kr_put(a.kring);
}
/* Process all the RX rings bound to this file descriptor. */
for (i = 0; i < num_rx_rings; i++) {
struct sync_kloop_ring_args a = {
.kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]],
.csb_atok = csb_atok_base + num_tx_rings + i,
.csb_ktoa = csb_ktoa_base + num_tx_rings + i,
};
#ifdef SYNC_KLOOP_POLL
if (poll_ctx)
a.irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx;
#endif /* SYNC_KLOOP_POLL */
if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) {
continue;
}
netmap_sync_kloop_rx_ring(&a);
nm_kr_put(a.kring);
}
#ifdef SYNC_KLOOP_POLL
if (poll_ctx) {
/* If a poll context is present, yield to the scheduler
* waiting for a notification to come either from
* netmap or the application. */
schedule_timeout_interruptible(msecs_to_jiffies(1000));
} else
#endif /* SYNC_KLOOP_POLL */
{
/* Default synchronization method: sleep for a while. */
usleep_range(sleep_us, sleep_us);
}
}
out:
#ifdef SYNC_KLOOP_POLL
if (poll_ctx) {
/* Stop polling from netmap and the eventfds, and deallocate
* the poll context. */
__set_current_state(TASK_RUNNING);
for (i = 0; i < poll_ctx->next_entry; i++) {
struct sync_kloop_poll_entry *entry =
poll_ctx->entries + i;
if (entry->wqh)
remove_wait_queue(entry->wqh, &entry->wait);
/* We did not get a reference to the eventfds, but
* don't do that on netmap file descriptors (since
* a reference was not taken. */
if (entry->filp && entry->filp != priv->np_filp)
fput(entry->filp);
if (entry->irq_ctx)
eventfd_ctx_put(entry->irq_ctx);
if (entry->irq_filp)
fput(entry->irq_filp);
}
nm_os_free(poll_ctx);
poll_ctx = NULL;
}
#endif /* SYNC_KLOOP_POLL */
/* Reset the kloop state. */
NMG_LOCK();
priv->np_kloop_state = 0;
NMG_UNLOCK();
return err;
}
int
netmap_sync_kloop_stop(struct netmap_priv_d *priv)
{
bool running = true;
int err = 0;
NMG_LOCK();
priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING;
NMG_UNLOCK();
while (running) {
usleep_range(1000, 1500);
NMG_LOCK();
running = (NM_ACCESS_ONCE(priv->np_kloop_state)
& NM_SYNC_KLOOP_RUNNING);
NMG_UNLOCK();
}
return err;
}
#ifdef WITH_PTNETMAP
/*
* Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers.
* These routines are reused across the different operating systems supported
* by netmap.
*/
/*
* Reconcile host and guest views of the transmit ring.
*
* Guest user wants to transmit packets up to the one before ring->head,
* and guest kernel knows tx_ring->hwcur is the first packet unsent
* by the host kernel.
*
* We push out as many packets as possible, and possibly
* reclaim buffers from previously completed transmission.
*
* Notifications from the host are enabled only if the user guest would
* block (no space in the ring).
*/
bool
netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
struct netmap_kring *kring, int flags)
{
bool notify = false;
/* Disable notifications */
atok->appl_need_kick = 0;
/*
* First part: tell the host (updating the CSB) to process the new
* packets.
*/
kring->nr_hwcur = ktoa->hwcur;
ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead);
/* Ask for a kick from a guest to the host if needed. */
if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring))
&& NM_ACCESS_ONCE(ktoa->kern_need_kick)) ||
(flags & NAF_FORCE_RECLAIM)) {
atok->sync_flags = flags;
notify = true;
}
/*
* Second part: reclaim buffers for completed transmissions.
*/
if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) {
ptnetmap_guest_read_kring_csb(ktoa, kring);
}
/*
* No more room in the ring for new transmissions. The user thread will
* go to sleep and we need to be notified by the host when more free
* space is available.
*/
if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
/* Reenable notifications. */
atok->appl_need_kick = 1;
/* Double check */
ptnetmap_guest_read_kring_csb(ktoa, kring);
/* If there is new free space, disable notifications */
if (unlikely(!nm_kr_txempty(kring))) {
atok->appl_need_kick = 0;
}
}
nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
kring->name, atok->head, atok->cur, ktoa->hwtail,
kring->rhead, kring->rcur, kring->nr_hwtail);
return notify;
}
/*
* Reconcile host and guest view of the receive ring.
*
* Update hwcur/hwtail from host (reading from CSB).
*
* If guest user has released buffers up to the one before ring->head, we
* also give them to the host.
*
* Notifications from the host are enabled only if the user guest would
* block (no more completed slots in the ring).
*/
bool
netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
struct netmap_kring *kring, int flags)
{
bool notify = false;
/* Disable notifications */
atok->appl_need_kick = 0;
/*
* First part: import newly received packets, by updating the kring
* hwtail to the hwtail known from the host (read from the CSB).
* This also updates the kring hwcur.
*/
ptnetmap_guest_read_kring_csb(ktoa, kring);
kring->nr_kflags &= ~NKR_PENDINTR;
/*
* Second part: tell the host about the slots that guest user has
* released, by updating cur and head in the CSB.
*/
if (kring->rhead != kring->nr_hwcur) {
ptnetmap_guest_write_kring_csb(atok, kring->rcur,
kring->rhead);
/* Ask for a kick from the guest to the host if needed. */
if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
atok->sync_flags = flags;
notify = true;
}
}
/*
* No more completed RX slots. The user thread will go to sleep and
* we need to be notified by the host when more RX slots have been
* completed.
*/
if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
/* Reenable notifications. */
atok->appl_need_kick = 1;
/* Double check */
ptnetmap_guest_read_kring_csb(ktoa, kring);
/* If there are new slots, disable notifications. */
if (!nm_kr_rxempty(kring)) {
atok->appl_need_kick = 0;
}
}
nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
kring->name, atok->head, atok->cur, ktoa->hwtail,
kring->rhead, kring->rcur, kring->nr_hwtail);
return notify;
}
/*
* Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor.
*/
int
ptnet_nm_krings_create(struct netmap_adapter *na)
{
struct netmap_pt_guest_adapter *ptna =
(struct netmap_pt_guest_adapter *)na; /* Upcast. */
struct netmap_adapter *na_nm = &ptna->hwup.up;
struct netmap_adapter *na_dr = &ptna->dr.up;
int ret;
if (ptna->backend_users) {
return 0;
}
/* Create krings on the public netmap adapter. */
ret = netmap_hw_krings_create(na_nm);
if (ret) {
return ret;
}
/* Copy krings into the netmap adapter private to the driver. */
na_dr->tx_rings = na_nm->tx_rings;
na_dr->rx_rings = na_nm->rx_rings;
return 0;
}
void
ptnet_nm_krings_delete(struct netmap_adapter *na)
{
struct netmap_pt_guest_adapter *ptna =
(struct netmap_pt_guest_adapter *)na; /* Upcast. */
struct netmap_adapter *na_nm = &ptna->hwup.up;
struct netmap_adapter *na_dr = &ptna->dr.up;
if (ptna->backend_users) {
return;
}
na_dr->tx_rings = NULL;
na_dr->rx_rings = NULL;
netmap_hw_krings_delete(na_nm);
}
void
ptnet_nm_dtor(struct netmap_adapter *na)
{
struct netmap_pt_guest_adapter *ptna =
(struct netmap_pt_guest_adapter *)na;
netmap_mem_put(ptna->dr.up.nm_mem);
memset(&ptna->dr, 0, sizeof(ptna->dr));
netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
}
int
netmap_pt_guest_attach(struct netmap_adapter *arg,
unsigned int nifp_offset, unsigned int memid)
{
struct netmap_pt_guest_adapter *ptna;
struct ifnet *ifp = arg ? arg->ifp : NULL;
int error;
/* get allocator */
arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
if (arg->nm_mem == NULL)
return ENOMEM;
arg->na_flags |= NAF_MEM_OWNER;
error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
if (error)
return error;
/* get the netmap_pt_guest_adapter */
ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
/* Initialize a separate pass-through netmap adapter that is going to
* be used by the ptnet driver only, and so never exposed to netmap
* applications. We only need a subset of the available fields. */
memset(&ptna->dr, 0, sizeof(ptna->dr));
ptna->dr.up.ifp = ifp;
ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
ptna->backend_users = 0;
return 0;
}
#endif /* WITH_PTNETMAP */

View File

@ -56,6 +56,7 @@
*/
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_bdg.h>
static int
nmreq_register_from_legacy(struct nmreq *nmr, struct nmreq_header *hdr,
@ -80,10 +81,11 @@ nmreq_register_from_legacy(struct nmreq *nmr, struct nmreq_header *hdr,
} else {
regmode = NR_REG_ALL_NIC;
}
nmr->nr_flags = regmode |
(nmr->nr_flags & (~NR_REG_MASK));
req->nr_mode = regmode;
} else {
req->nr_mode = nmr->nr_flags & NR_REG_MASK;
}
req->nr_mode = nmr->nr_flags & NR_REG_MASK;
/* Fix nr_name, nr_mode and nr_ringid to handle pipe requests. */
if (req->nr_mode == NR_REG_PIPE_MASTER ||
req->nr_mode == NR_REG_PIPE_SLAVE) {
@ -131,7 +133,7 @@ nmreq_from_legacy(struct nmreq *nmr, u_long ioctl_cmd)
/* First prepare the request header. */
hdr->nr_version = NETMAP_API; /* new API */
strncpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name));
strlcpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name));
hdr->nr_options = (uintptr_t)NULL;
hdr->nr_body = (uintptr_t)NULL;
@ -221,7 +223,7 @@ nmreq_from_legacy(struct nmreq *nmr, u_long ioctl_cmd)
}
case NETMAP_PT_HOST_CREATE:
case NETMAP_PT_HOST_DELETE: {
D("Netmap passthrough not supported yet");
nm_prerr("Netmap passthrough not supported yet");
return NULL;
break;
}
@ -242,7 +244,6 @@ nmreq_from_legacy(struct nmreq *nmr, u_long ioctl_cmd)
if (!req) { goto oom; }
hdr->nr_body = (uintptr_t)req;
hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET;
req->nr_offset = nmr->nr_offset;
req->nr_memsize = nmr->nr_memsize;
req->nr_tx_slots = nmr->nr_tx_slots;
req->nr_rx_slots = nmr->nr_rx_slots;
@ -262,7 +263,7 @@ oom:
}
nm_os_free(hdr);
}
D("Failed to allocate memory for nmreq_xyz struct");
nm_prerr("Failed to allocate memory for nmreq_xyz struct");
return NULL;
}
@ -300,7 +301,6 @@ nmreq_to_legacy(struct nmreq_header *hdr, struct nmreq *nmr)
case NETMAP_REQ_PORT_INFO_GET: {
struct nmreq_port_info_get *req =
(struct nmreq_port_info_get *)(uintptr_t)hdr->nr_body;
nmr->nr_offset = req->nr_offset;
nmr->nr_memsize = req->nr_memsize;
nmr->nr_tx_slots = req->nr_tx_slots;
nmr->nr_rx_slots = req->nr_rx_slots;
@ -321,7 +321,7 @@ nmreq_to_legacy(struct nmreq_header *hdr, struct nmreq *nmr)
case NETMAP_REQ_VALE_LIST: {
struct nmreq_vale_list *req =
(struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
strncpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name));
strlcpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name));
nmr->nr_arg1 = req->nr_bridge_idx;
nmr->nr_arg2 = req->nr_port_idx;
break;

View File

@ -318,7 +318,7 @@ netmap_mem_get_id(struct netmap_mem_d *nmd)
#ifdef NM_DEBUG_MEM_PUTGET
#define NM_DBG_REFC(nmd, func, line) \
nm_prinf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
nm_prinf("%d mem[%d] -> %d", line, (nmd)->nm_id, (nmd)->refcount);
#else
#define NM_DBG_REFC(nmd, func, line)
#endif
@ -397,15 +397,15 @@ netmap_init_obj_allocator_bitmap(struct netmap_obj_pool *p)
if (p->bitmap == NULL) {
/* Allocate the bitmap */
n = (p->objtotal + 31) / 32;
p->bitmap = nm_os_malloc(sizeof(uint32_t) * n);
p->bitmap = nm_os_malloc(sizeof(p->bitmap[0]) * n);
if (p->bitmap == NULL) {
D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
nm_prerr("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
p->name);
return ENOMEM;
}
p->bitmap_slots = n;
} else {
memset(p->bitmap, 0, p->bitmap_slots);
memset(p->bitmap, 0, p->bitmap_slots * sizeof(p->bitmap[0]));
}
p->objfree = 0;
@ -416,16 +416,21 @@ netmap_init_obj_allocator_bitmap(struct netmap_obj_pool *p)
*/
for (j = 0; j < p->objtotal; j++) {
if (p->invalid_bitmap && nm_isset(p->invalid_bitmap, j)) {
D("skipping %s %d", p->name, j);
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("skipping %s %d", p->name, j);
continue;
}
p->bitmap[ (j>>5) ] |= ( 1U << (j & 31U) );
p->objfree++;
}
ND("%s free %u", p->name, p->objfree);
if (p->objfree == 0)
if (netmap_verbose)
nm_prinf("%s free %u", p->name, p->objfree);
if (p->objfree == 0) {
if (netmap_verbose)
nm_prerr("%s: no objects available", p->name);
return ENOMEM;
}
return 0;
}
@ -447,6 +452,7 @@ netmap_mem_init_bitmaps(struct netmap_mem_d *nmd)
* buffers 0 and 1 are reserved
*/
if (nmd->pools[NETMAP_BUF_POOL].objfree < 2) {
nm_prerr("%s: not enough buffers", nmd->pools[NETMAP_BUF_POOL].name);
return ENOMEM;
}
@ -480,8 +486,10 @@ netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na)
nmd->ops->nmd_deref(nmd);
nmd->active--;
if (!nmd->active)
if (last_user) {
nmd->nm_grp = -1;
nmd->lasterr = 0;
}
NMA_UNLOCK(nmd);
return last_user;
@ -720,16 +728,20 @@ nm_mem_assign_group(struct netmap_mem_d *nmd, struct device *dev)
{
int err = 0, id;
id = nm_iommu_group_id(dev);
if (netmap_verbose)
D("iommu_group %d", id);
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("iommu_group %d", id);
NMA_LOCK(nmd);
if (nmd->nm_grp < 0)
nmd->nm_grp = id;
if (nmd->nm_grp != id)
if (nmd->nm_grp != id) {
if (netmap_verbose)
nm_prerr("iommu group mismatch: %u vs %u",
nmd->nm_grp, id);
nmd->lasterr = err = ENOMEM;
}
NMA_UNLOCK(nmd);
return err;
@ -805,7 +817,7 @@ netmap_mem2_ofstophys(struct netmap_mem_d* nmd, vm_ooffset_t offset)
return pa;
}
/* this is only in case of errors */
D("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
nm_prerr("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
p[NETMAP_IF_POOL].memtotal,
p[NETMAP_IF_POOL].memtotal
+ p[NETMAP_RING_POOL].memtotal,
@ -854,13 +866,13 @@ win32_build_user_vm_map(struct netmap_mem_d* nmd)
int i, j;
if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) {
D("memory not finalised yet");
nm_prerr("memory not finalised yet");
return NULL;
}
mainMdl = IoAllocateMdl(NULL, memsize, FALSE, FALSE, NULL);
if (mainMdl == NULL) {
D("failed to allocate mdl");
nm_prerr("failed to allocate mdl");
return NULL;
}
@ -876,7 +888,7 @@ win32_build_user_vm_map(struct netmap_mem_d* nmd)
tempMdl = IoAllocateMdl(p->lut[0].vaddr, clsz, FALSE, FALSE, NULL);
if (tempMdl == NULL) {
NMA_UNLOCK(nmd);
D("fail to allocate tempMdl");
nm_prerr("fail to allocate tempMdl");
IoFreeMdl(mainMdl);
return NULL;
}
@ -971,7 +983,7 @@ netmap_obj_offset(struct netmap_obj_pool *p, const void *vaddr)
p->name, ofs, i, vaddr);
return ofs;
}
D("address %p is not contained inside any cluster (%s)",
nm_prerr("address %p is not contained inside any cluster (%s)",
vaddr, p->name);
return 0; /* An error occurred */
}
@ -1002,12 +1014,12 @@ netmap_obj_malloc(struct netmap_obj_pool *p, u_int len, uint32_t *start, uint32_
void *vaddr = NULL;
if (len > p->_objsize) {
D("%s request size %d too large", p->name, len);
nm_prerr("%s request size %d too large", p->name, len);
return NULL;
}
if (p->objfree == 0) {
D("no more %s objects", p->name);
nm_prerr("no more %s objects", p->name);
return NULL;
}
if (start)
@ -1049,13 +1061,13 @@ netmap_obj_free(struct netmap_obj_pool *p, uint32_t j)
uint32_t *ptr, mask;
if (j >= p->objtotal) {
D("invalid index %u, max %u", j, p->objtotal);
nm_prerr("invalid index %u, max %u", j, p->objtotal);
return 1;
}
ptr = &p->bitmap[j / 32];
mask = (1 << (j % 32));
if (*ptr & mask) {
D("ouch, double free on buffer %d", j);
nm_prerr("ouch, double free on buffer %d", j);
return 1;
} else {
*ptr |= mask;
@ -1086,7 +1098,7 @@ netmap_obj_free_va(struct netmap_obj_pool *p, void *vaddr)
netmap_obj_free(p, j);
return;
}
D("address %p is not contained inside any cluster (%s)",
nm_prerr("address %p is not contained inside any cluster (%s)",
vaddr, p->name);
}
@ -1127,7 +1139,7 @@ netmap_extra_alloc(struct netmap_adapter *na, uint32_t *head, uint32_t n)
uint32_t cur = *head; /* save current head */
uint32_t *p = netmap_buf_malloc(nmd, &pos, head);
if (p == NULL) {
D("no more buffers after %d of %d", i, n);
nm_prerr("no more buffers after %d of %d", i, n);
*head = cur; /* restore */
break;
}
@ -1158,9 +1170,9 @@ netmap_extra_free(struct netmap_adapter *na, uint32_t head)
break;
}
if (head != 0)
D("breaking with head %d", head);
if (netmap_verbose)
D("freed %d buffers", i);
nm_prerr("breaking with head %d", head);
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("freed %d buffers", i);
}
@ -1176,7 +1188,7 @@ netmap_new_bufs(struct netmap_mem_d *nmd, struct netmap_slot *slot, u_int n)
for (i = 0; i < n; i++) {
void *vaddr = netmap_buf_malloc(nmd, &pos, &index);
if (vaddr == NULL) {
D("no more buffers after %d of %d", i, n);
nm_prerr("no more buffers after %d of %d", i, n);
goto cleanup;
}
slot[i].buf_idx = index;
@ -1217,7 +1229,7 @@ netmap_free_buf(struct netmap_mem_d *nmd, uint32_t i)
struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
if (i < 2 || i >= p->objtotal) {
D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
nm_prerr("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
return;
}
netmap_obj_free(p, i);
@ -1317,22 +1329,22 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj
#define LINE_ROUND NM_CACHE_ALIGN // 64
if (objsize >= MAX_CLUSTSIZE) {
/* we could do it but there is no point */
D("unsupported allocation for %d bytes", objsize);
nm_prerr("unsupported allocation for %d bytes", objsize);
return EINVAL;
}
/* make sure objsize is a multiple of LINE_ROUND */
i = (objsize & (LINE_ROUND - 1));
if (i) {
D("XXX aligning object by %d bytes", LINE_ROUND - i);
nm_prinf("aligning object by %d bytes", LINE_ROUND - i);
objsize += LINE_ROUND - i;
}
if (objsize < p->objminsize || objsize > p->objmaxsize) {
D("requested objsize %d out of range [%d, %d]",
nm_prerr("requested objsize %d out of range [%d, %d]",
objsize, p->objminsize, p->objmaxsize);
return EINVAL;
}
if (objtotal < p->nummin || objtotal > p->nummax) {
D("requested objtotal %d out of range [%d, %d]",
nm_prerr("requested objtotal %d out of range [%d, %d]",
objtotal, p->nummin, p->nummax);
return EINVAL;
}
@ -1354,13 +1366,13 @@ netmap_config_obj_allocator(struct netmap_obj_pool *p, u_int objtotal, u_int obj
}
/* exact solution not found */
if (clustentries == 0) {
D("unsupported allocation for %d bytes", objsize);
nm_prerr("unsupported allocation for %d bytes", objsize);
return EINVAL;
}
/* compute clustsize */
clustsize = clustentries * objsize;
if (netmap_verbose)
D("objsize %d clustsize %d objects %d",
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("objsize %d clustsize %d objects %d",
objsize, clustsize, clustentries);
/*
@ -1403,7 +1415,7 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
p->lut = nm_alloc_lut(p->objtotal);
if (p->lut == NULL) {
D("Unable to create lookup table for '%s'", p->name);
nm_prerr("Unable to create lookup table for '%s'", p->name);
goto clean;
}
@ -1430,7 +1442,7 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
* If we get here, there is a severe memory shortage,
* so halve the allocated memory to reclaim some.
*/
D("Unable to create cluster at %d for '%s' allocator",
nm_prerr("Unable to create cluster at %d for '%s' allocator",
i, p->name);
if (i < 2) /* nothing to halve */
goto out;
@ -1466,7 +1478,7 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
}
p->memtotal = p->numclusters * p->_clustsize;
if (netmap_verbose)
D("Pre-allocated %d clusters (%d/%dKB) for '%s'",
nm_prinf("Pre-allocated %d clusters (%d/%dKB) for '%s'",
p->numclusters, p->_clustsize >> 10,
p->memtotal >> 10, p->name);
@ -1498,8 +1510,8 @@ netmap_mem_reset_all(struct netmap_mem_d *nmd)
{
int i;
if (netmap_verbose)
D("resetting %p", nmd);
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("resetting %p", nmd);
for (i = 0; i < NETMAP_POOLS_NR; i++) {
netmap_reset_obj_allocator(&nmd->pools[i]);
}
@ -1525,7 +1537,7 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
(void)i;
(void)lim;
(void)lut;
D("unsupported on Windows");
nm_prerr("unsupported on Windows");
#else /* linux */
ND("unmapping and freeing plut for %s", na->name);
if (lut->plut == NULL)
@ -1561,7 +1573,7 @@ netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
(void)i;
(void)lim;
(void)lut;
D("unsupported on Windows");
nm_prerr("unsupported on Windows");
#else /* linux */
if (lut->plut != NULL) {
@ -1572,7 +1584,7 @@ netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
ND("allocating physical lut for %s", na->name);
lut->plut = nm_alloc_plut(lim);
if (lut->plut == NULL) {
D("Failed to allocate physical lut for %s", na->name);
nm_prerr("Failed to allocate physical lut for %s", na->name);
return ENOMEM;
}
@ -1589,7 +1601,7 @@ netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na)
error = netmap_load_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr,
p->lut[i].vaddr, p->_clustsize);
if (error) {
D("Failed to map cluster #%d from the %s pool", i, p->name);
nm_prerr("Failed to map cluster #%d from the %s pool", i, p->name);
break;
}
@ -1627,13 +1639,13 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd)
nmd->flags |= NETMAP_MEM_FINALIZED;
if (netmap_verbose)
D("interfaces %d KB, rings %d KB, buffers %d MB",
nm_prinf("interfaces %d KB, rings %d KB, buffers %d MB",
nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
nmd->pools[NETMAP_RING_POOL].memtotal >> 10,
nmd->pools[NETMAP_BUF_POOL].memtotal >> 20);
if (netmap_verbose)
D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
nm_prinf("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
return 0;
@ -1740,7 +1752,7 @@ netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd,
p[NETMAP_BUF_POOL].num = v;
if (netmap_verbose)
D("req if %d*%d ring %d*%d buf %d*%d",
nm_prinf("req if %d*%d ring %d*%d buf %d*%d",
p[NETMAP_IF_POOL].num,
p[NETMAP_IF_POOL].size,
p[NETMAP_RING_POOL].num,
@ -1850,13 +1862,13 @@ netmap_free_rings(struct netmap_adapter *na)
struct netmap_ring *ring = kring->ring;
if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
if (netmap_verbose)
D("NOT deleting ring %s (ring %p, users %d neekring %d)",
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("NOT deleting ring %s (ring %p, users %d neekring %d)",
kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
continue;
}
if (netmap_verbose)
D("deleting ring %s", kring->name);
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("deleting ring %s", kring->name);
if (!(kring->nr_kflags & NKR_FAKERING)) {
ND("freeing bufs for %s", kring->name);
netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
@ -1891,19 +1903,19 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) {
/* uneeded, or already created by somebody else */
if (netmap_verbose)
D("NOT creating ring %s (ring %p, users %d neekring %d)",
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("NOT creating ring %s (ring %p, users %d neekring %d)",
kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
continue;
}
if (netmap_verbose)
D("creating %s", kring->name);
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("creating %s", kring->name);
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
D("Cannot allocate %s_ring", nm_txrx2str(t));
nm_prerr("Cannot allocate %s_ring", nm_txrx2str(t));
goto cleanup;
}
ND("txring at %p", ring);
@ -1925,14 +1937,16 @@ netmap_mem2_rings_create(struct netmap_adapter *na)
ND("initializing slots for %s_ring", nm_txrx2str(t));
if (!(kring->nr_kflags & NKR_FAKERING)) {
/* this is a real ring */
ND("allocating buffers for %s", kring->name);
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("allocating buffers for %s", kring->name);
if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
D("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
nm_prerr("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
goto cleanup;
}
} else {
/* this is a fake ring, set all indices to 0 */
ND("NOT allocating buffers for %s", kring->name);
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("NOT allocating buffers for %s", kring->name);
netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
}
/* ring info */
@ -1998,7 +2012,7 @@ netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
/* initialize base fields -- override const */
*(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
*(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
strncpy(nifp->ni_name, na->name, (size_t)IFNAMSIZ);
strlcpy(nifp->ni_name, na->name, sizeof(nifp->ni_name));
/*
* fill the slots for the rx and tx rings. They contain the offset
@ -2049,8 +2063,8 @@ static void
netmap_mem2_deref(struct netmap_mem_d *nmd)
{
if (netmap_verbose)
D("active = %d", nmd->active);
if (netmap_debug & NM_DEBUG_MEM)
nm_prinf("active = %d", nmd->active);
}
@ -2217,14 +2231,15 @@ netmap_mem_ext_create(uint64_t usrptr, struct nmreq_pools_info *pi, int *perror)
pi->nr_buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num;
if (pi->nr_buf_pool_objsize == 0)
pi->nr_buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size;
D("if %d %d ring %d %d buf %d %d",
if (netmap_verbose & NM_DEBUG_MEM)
nm_prinf("if %d %d ring %d %d buf %d %d",
pi->nr_if_pool_objtotal, pi->nr_if_pool_objsize,
pi->nr_ring_pool_objtotal, pi->nr_ring_pool_objsize,
pi->nr_buf_pool_objtotal, pi->nr_buf_pool_objsize);
os = nm_os_extmem_create(usrptr, pi, &error);
if (os == NULL) {
D("os extmem creation failed");
nm_prerr("os extmem creation failed");
goto out;
}
@ -2233,7 +2248,8 @@ netmap_mem_ext_create(uint64_t usrptr, struct nmreq_pools_info *pi, int *perror)
nm_os_extmem_delete(os);
return &nme->up;
}
D("not found, creating new");
if (netmap_verbose & NM_DEBUG_MEM)
nm_prinf("not found, creating new");
nme = _netmap_mem_private_new(sizeof(*nme),
(struct netmap_obj_params[]){
@ -2343,7 +2359,7 @@ out:
#endif /* WITH_EXTMEM */
#ifdef WITH_PTNETMAP_GUEST
#ifdef WITH_PTNETMAP
struct mem_pt_if {
struct mem_pt_if *next;
struct ifnet *ifp;
@ -2386,7 +2402,8 @@ netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp,
NMA_UNLOCK(nmd);
D("added (ifp=%p,nifp_offset=%u)", ptif->ifp, ptif->nifp_offset);
nm_prinf("ifp=%s,nifp_offset=%u",
ptif->ifp->if_xname, ptif->nifp_offset);
return 0;
}
@ -2667,7 +2684,7 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na)
continue;
kring->ring = (struct netmap_ring *)
((char *)nifp +
nifp->ring_ofs[i + na->num_tx_rings + 1]);
nifp->ring_ofs[netmap_all_rings(na, NR_TX) + i]);
}
error = 0;
@ -2832,4 +2849,4 @@ netmap_mem_pt_guest_new(struct ifnet *ifp,
return nmd;
}
#endif /* WITH_PTNETMAP_GUEST */
#endif /* WITH_PTNETMAP */

View File

@ -158,14 +158,14 @@ struct netmap_mem_d* netmap_mem_ext_create(uint64_t, struct nmreq_pools_info *,
({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; })
#endif /* WITH_EXTMEM */
#ifdef WITH_PTNETMAP_GUEST
#ifdef WITH_PTNETMAP
struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *,
unsigned int nifp_offset,
unsigned int memid);
struct ptnetmap_memdev;
struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16_t);
int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *);
#endif /* WITH_PTNETMAP_GUEST */
#endif /* WITH_PTNETMAP */
int netmap_mem_pools_info_get(struct nmreq_pools_info *,
struct netmap_mem_d *);

View File

@ -0,0 +1,184 @@
/*
* Copyright (C) 2018 Giuseppe Lettieri
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/* $FreeBSD$ */
#if defined(__FreeBSD__)
#include <sys/cdefs.h> /* prerequisite */
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h> /* defines used in kernel.h */
#include <sys/kernel.h> /* types used in module initialization */
#include <sys/malloc.h>
#include <sys/poll.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/selinfo.h>
#include <sys/sysctl.h>
#include <sys/socket.h> /* sockaddrs */
#include <net/if.h>
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* */
#include <sys/refcount.h>
#elif defined(linux)
#include "bsd_glue.h"
#elif defined(__APPLE__)
#warning OSX support is only partial
#include "osx_glue.h"
#elif defined(_WIN32)
#include "win_glue.h"
#else
#error Unsupported platform
#endif /* unsupported */
/*
* common headers
*/
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
#ifdef WITH_NMNULL
static int
netmap_null_txsync(struct netmap_kring *kring, int flags)
{
(void)kring;
(void)flags;
return 0;
}
static int
netmap_null_rxsync(struct netmap_kring *kring, int flags)
{
(void)kring;
(void)flags;
return 0;
}
static int
netmap_null_krings_create(struct netmap_adapter *na)
{
return netmap_krings_create(na, 0);
}
static void
netmap_null_krings_delete(struct netmap_adapter *na)
{
netmap_krings_delete(na);
}
static int
netmap_null_reg(struct netmap_adapter *na, int onoff)
{
if (na->active_fds == 0) {
if (onoff)
na->na_flags |= NAF_NETMAP_ON;
else
na->na_flags &= ~NAF_NETMAP_ON;
}
return 0;
}
static int
netmap_null_bdg_attach(const char *name, struct netmap_adapter *na,
struct nm_bridge *b)
{
(void)name;
(void)na;
(void)b;
return EINVAL;
}
int
netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create)
{
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
struct netmap_null_adapter *nna;
int error;
if (req->nr_mode != NR_REG_NULL) {
nm_prdis("not a null port");
return 0;
}
if (!create) {
nm_prerr("null ports cannot be re-opened");
return EINVAL;
}
if (nmd == NULL) {
nm_prerr("null ports must use an existing allocator");
return EINVAL;
}
nna = nm_os_malloc(sizeof(*nna));
if (nna == NULL) {
error = ENOMEM;
goto err;
}
snprintf(nna->up.name, sizeof(nna->up.name), "null:%s", hdr->nr_name);
nna->up.nm_txsync = netmap_null_txsync;
nna->up.nm_rxsync = netmap_null_rxsync;
nna->up.nm_register = netmap_null_reg;
nna->up.nm_krings_create = netmap_null_krings_create;
nna->up.nm_krings_delete = netmap_null_krings_delete;
nna->up.nm_bdg_attach = netmap_null_bdg_attach;
nna->up.nm_mem = netmap_mem_get(nmd);
nna->up.num_tx_rings = req->nr_tx_rings;
nna->up.num_rx_rings = req->nr_rx_rings;
nna->up.num_tx_desc = req->nr_tx_slots;
nna->up.num_rx_desc = req->nr_rx_slots;
error = netmap_attach_common(&nna->up);
if (error)
goto free_nna;
*na = &nna->up;
netmap_adapter_get(*na);
nm_prdis("created null %s", nna->up.name);
return 0;
free_nna:
nm_os_free(nna);
err:
return error;
}
#endif /* WITH_NMNULL */

View File

@ -443,7 +443,7 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
/* In case of no error we put our rings in netmap mode */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
for (i = 0; i < nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_on(kring)) {
struct netmap_kring *sring, *dring;
@ -490,7 +490,7 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
if (na->active_fds == 0)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
for (i = 0; i < nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_off(kring)) {
@ -567,7 +567,7 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
sna = na;
cleanup:
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(sna, t) + 1; i++) {
for (i = 0; i < nma_get_nrings(sna, t); i++) {
struct netmap_kring *kring = NMR(sna, t)[i];
struct netmap_ring *ring = kring->ring;
uint32_t j, lim = kring->nkr_num_slots - 1;
@ -674,11 +674,11 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
int create_error;
/* Temporarily remove the pipe suffix. */
strncpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
strlcpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
*cbra = '\0';
error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
/* Restore the pipe suffix. */
strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
if (!error)
break;
if (error != ENXIO || retries++) {
@ -691,7 +691,7 @@ netmap_get_pipe_na(struct nmreq_header *hdr, struct netmap_adapter **na,
NMG_UNLOCK();
create_error = netmap_vi_create(hdr, 1 /* autodelete */);
NMG_LOCK();
strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
if (create_error && create_error != EEXIST) {
if (create_error != EOPNOTSUPP) {
D("failed to create a persistent vale port: %d", create_error);

View File

@ -121,18 +121,18 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0,
"Max batch size to be used in the bridge");
SYSEND;
static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *,
static int netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
static int netmap_vp_bdg_attach(const char *, struct netmap_adapter *,
static int netmap_vale_vp_bdg_attach(const char *, struct netmap_adapter *,
struct nm_bridge *);
static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *);
/*
* For each output interface, nm_bdg_q is used to construct a list.
* For each output interface, nm_vale_q is used to construct a list.
* bq_len is the number of output buffers (we can have coalescing
* during the copy).
*/
struct nm_bdg_q {
struct nm_vale_q {
uint16_t bq_head;
uint16_t bq_tail;
uint32_t bq_len; /* number of buffers */
@ -140,10 +140,10 @@ struct nm_bdg_q {
/* Holds the default callbacks */
struct netmap_bdg_ops vale_bdg_ops = {
.lookup = netmap_bdg_learning,
.lookup = netmap_vale_learning,
.config = NULL,
.dtor = NULL,
.vp_create = netmap_vp_create,
.vp_create = netmap_vale_vp_create,
.bwrap_attach = netmap_vale_bwrap_attach,
.name = NM_BDG_NAME,
};
@ -212,14 +212,14 @@ nm_alloc_bdgfwd(struct netmap_adapter *na)
/* all port:rings + broadcast */
num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
l += sizeof(struct nm_bdg_q) * num_dstq;
l += sizeof(struct nm_vale_q) * num_dstq;
l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
nrings = netmap_real_rings(na, NR_TX);
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
struct nm_bdg_fwd *ft;
struct nm_bdg_q *dstq;
struct nm_vale_q *dstq;
int j;
ft = nm_os_malloc(l);
@ -227,7 +227,7 @@ nm_alloc_bdgfwd(struct netmap_adapter *na)
nm_free_bdgfwd(na);
return ENOMEM;
}
dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
dstq = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
for (j = 0; j < num_dstq; j++) {
dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
dstq[j].bq_len = 0;
@ -307,11 +307,228 @@ unlock_bdg_free:
return ret;
}
/* Process NETMAP_REQ_VALE_LIST. */
int
netmap_vale_list(struct nmreq_header *hdr)
{
struct nmreq_vale_list *req =
(struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
int namelen = strlen(hdr->nr_name);
struct nm_bridge *b, *bridges;
struct netmap_vp_adapter *vpna;
int error = 0, i, j;
u_int num_bridges;
netmap_bns_getbridges(&bridges, &num_bridges);
/* this is used to enumerate bridges and ports */
if (namelen) { /* look up indexes of bridge and port */
if (strncmp(hdr->nr_name, NM_BDG_NAME,
strlen(NM_BDG_NAME))) {
return EINVAL;
}
NMG_LOCK();
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (!b) {
NMG_UNLOCK();
return ENOENT;
}
req->nr_bridge_idx = b - bridges; /* bridge index */
req->nr_port_idx = NM_BDG_NOPORT;
for (j = 0; j < b->bdg_active_ports; j++) {
i = b->bdg_port_index[j];
vpna = b->bdg_ports[i];
if (vpna == NULL) {
nm_prerr("This should not happen");
continue;
}
/* the former and the latter identify a
* virtual port and a NIC, respectively
*/
if (!strcmp(vpna->up.name, hdr->nr_name)) {
req->nr_port_idx = i; /* port index */
break;
}
}
NMG_UNLOCK();
} else {
/* return the first non-empty entry starting from
* bridge nr_arg1 and port nr_arg2.
*
* Users can detect the end of the same bridge by
* seeing the new and old value of nr_arg1, and can
* detect the end of all the bridge by error != 0
*/
i = req->nr_bridge_idx;
j = req->nr_port_idx;
NMG_LOCK();
for (error = ENOENT; i < NM_BRIDGES; i++) {
b = bridges + i;
for ( ; j < NM_BDG_MAXPORTS; j++) {
if (b->bdg_ports[j] == NULL)
continue;
vpna = b->bdg_ports[j];
/* write back the VALE switch name */
strlcpy(hdr->nr_name, vpna->up.name,
sizeof(hdr->nr_name));
error = 0;
goto out;
}
j = 0; /* following bridges scan from 0 */
}
out:
req->nr_bridge_idx = i;
req->nr_port_idx = j;
NMG_UNLOCK();
}
return error;
}
/* Process NETMAP_REQ_VALE_ATTACH.
*/
int
netmap_vale_attach(struct nmreq_header *hdr, void *auth_token)
{
struct nmreq_vale_attach *req =
(struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter * vpna;
struct netmap_adapter *na = NULL;
struct netmap_mem_d *nmd = NULL;
struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
/* permission check for modified bridges */
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_exit;
}
if (req->reg.nr_mem_id) {
nmd = netmap_mem_find(req->reg.nr_mem_id);
if (nmd == NULL) {
error = EINVAL;
goto unlock_exit;
}
}
/* check for existing one */
error = netmap_get_vale_na(hdr, &na, nmd, 0);
if (na) {
error = EBUSY;
goto unref_exit;
}
error = netmap_get_vale_na(hdr, &na,
nmd, 1 /* create if not exists */);
if (error) { /* no device */
goto unlock_exit;
}
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto unlock_exit;
}
if (NETMAP_OWNED_BY_ANY(na)) {
error = EBUSY;
goto unref_exit;
}
if (na->nm_bdg_ctl) {
/* nop for VALE ports. The bwrap needs to put the hwna
* in netmap mode (see netmap_bwrap_bdg_ctl)
*/
error = na->nm_bdg_ctl(hdr, na);
if (error)
goto unref_exit;
ND("registered %s to netmap-mode", na->name);
}
vpna = (struct netmap_vp_adapter *)na;
req->port_index = vpna->bdg_port;
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
return 0;
unref_exit:
netmap_adapter_put(na);
unlock_exit:
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
return error;
}
/* Process NETMAP_REQ_VALE_DETACH.
*/
int
netmap_vale_detach(struct nmreq_header *hdr, void *auth_token)
{
struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter *vpna;
struct netmap_adapter *na;
struct nm_bridge *b = NULL;
int error;
NMG_LOCK();
/* permission check for modified bridges */
b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_exit;
}
error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
if (error) { /* no device, or another bridge or user owns the device */
goto unlock_exit;
}
if (na == NULL) { /* VALE prefix missing */
error = EINVAL;
goto unlock_exit;
} else if (nm_is_bwrap(na) &&
((struct netmap_bwrap_adapter *)na)->na_polling_state) {
/* Don't detach a NIC with polling */
error = EBUSY;
goto unref_exit;
}
vpna = (struct netmap_vp_adapter *)na;
if (na->na_vp != vpna) {
/* trying to detach first attach of VALE persistent port attached
* to 2 bridges
*/
error = EBUSY;
goto unref_exit;
}
nmreq_det->port_index = vpna->bdg_port;
if (na->nm_bdg_ctl) {
/* remove the port from bridge. The bwrap
* also needs to put the hwna in normal mode
*/
error = na->nm_bdg_ctl(hdr, na);
}
unref_exit:
netmap_adapter_put(na);
unlock_exit:
NMG_UNLOCK();
return error;
}
/* nm_dtor callback for ephemeral VALE ports */
static void
netmap_vp_dtor(struct netmap_adapter *na)
netmap_vale_vp_dtor(struct netmap_adapter *na)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
struct nm_bridge *b = vpna->na_bdg;
@ -334,47 +551,13 @@ netmap_vp_dtor(struct netmap_adapter *na)
}
/* Called by external kernel modules (e.g., Openvswitch).
* to modify the private data previously given to regops().
* 'name' may be just bridge's name (including ':' if it
* is not just NM_BDG_NAME).
* Called without NMG_LOCK.
*/
int
nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
void *callback_data, void *auth_token)
{
void *private_data = NULL;
struct nm_bridge *b;
int error = 0;
NMG_LOCK();
b = nm_find_bridge(name, 0 /* don't create */, NULL);
if (!b) {
error = EINVAL;
goto unlock_update_priv;
}
if (!nm_bdg_valid_auth_token(b, auth_token)) {
error = EACCES;
goto unlock_update_priv;
}
BDG_WLOCK(b);
private_data = callback(b->private_data, callback_data, &error);
b->private_data = private_data;
BDG_WUNLOCK(b);
unlock_update_priv:
NMG_UNLOCK();
return error;
}
/* nm_krings_create callback for VALE ports.
* Calls the standard netmap_krings_create, then adds leases on rx
* rings and bdgfwd on tx rings.
*/
static int
netmap_vp_krings_create(struct netmap_adapter *na)
netmap_vale_vp_krings_create(struct netmap_adapter *na)
{
u_int tailroom;
int error, i;
@ -409,7 +592,7 @@ netmap_vp_krings_create(struct netmap_adapter *na)
/* nm_krings_delete callback for VALE ports. */
static void
netmap_vp_krings_delete(struct netmap_adapter *na)
netmap_vale_vp_krings_delete(struct netmap_adapter *na)
{
nm_free_bdgfwd(na);
netmap_krings_delete(na);
@ -417,7 +600,7 @@ netmap_vp_krings_delete(struct netmap_adapter *na)
static int
nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
nm_vale_flush(struct nm_bdg_fwd *ft, u_int n,
struct netmap_vp_adapter *na, u_int ring_nr);
@ -429,7 +612,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
* Returns the next position in the ring.
*/
static int
nm_bdg_preflush(struct netmap_kring *kring, u_int end)
nm_vale_preflush(struct netmap_kring *kring, u_int end)
{
struct netmap_vp_adapter *na =
(struct netmap_vp_adapter*)kring->na;
@ -470,7 +653,7 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end)
buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
if (unlikely(buf == NULL)) {
RD(5, "NULL %s buffer pointer from %s slot %d len %d",
nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d",
(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
kring->name, j, ft[ft_i].ft_len);
buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
@ -488,7 +671,7 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end)
ft[ft_i - frags].ft_frags = frags;
frags = 1;
if (unlikely((int)ft_i >= bridge_batch))
ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
}
if (frags > 1) {
/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
@ -496,10 +679,10 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end)
frags--;
ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
ft[ft_i - frags].ft_frags = frags;
D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
nm_prlim(5, "Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
}
if (ft_i)
ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
BDG_RUNLOCK(b);
return j;
}
@ -528,7 +711,7 @@ do { \
static __inline uint32_t
nm_bridge_rthash(const uint8_t *addr)
nm_vale_rthash(const uint8_t *addr)
{
uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
@ -554,7 +737,7 @@ nm_bridge_rthash(const uint8_t *addr)
* ring in *dst_ring (at the moment, always use ring 0)
*/
uint32_t
netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
struct netmap_vp_adapter *na, void *private_data)
{
uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset;
@ -586,17 +769,17 @@ netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
*/
if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
uint8_t *s = buf+6;
sh = nm_bridge_rthash(s); /* hash of source */
sh = nm_vale_rthash(s); /* hash of source */
/* update source port forwarding entry */
na->last_smac = ht[sh].mac = smac; /* XXX expire ? */
ht[sh].ports = mysrc;
if (netmap_verbose)
D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
if (netmap_debug & NM_DEBUG_VALE)
nm_prinf("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
}
dst = NM_BDG_BROADCAST;
if ((buf[0] & 1) == 0) { /* unicast */
dh = nm_bridge_rthash(buf); /* hash of dst */
dh = nm_vale_rthash(buf); /* hash of dst */
if (ht[dh].mac == dmac) { /* found dst */
dst = ht[dh].ports;
}
@ -655,24 +838,28 @@ nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
k->nkr_leases[lease_idx] = NR_NOSLOT;
k->nkr_lease_idx = nm_next(lease_idx, lim);
#ifdef CONFIG_NETMAP_DEBUG
if (n > nm_kr_space(k, is_rx)) {
D("invalid request for %d slots", n);
nm_prerr("invalid request for %d slots", n);
panic("x");
}
#endif /* CONFIG NETMAP_DEBUG */
/* XXX verify that there are n slots */
k->nkr_hwlease += n;
if (k->nkr_hwlease > lim)
k->nkr_hwlease -= lim + 1;
#ifdef CONFIG_NETMAP_DEBUG
if (k->nkr_hwlease >= k->nkr_num_slots ||
k->nr_hwcur >= k->nkr_num_slots ||
k->nr_hwtail >= k->nkr_num_slots ||
k->nkr_lease_idx >= k->nkr_num_slots) {
D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
nm_prerr("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
k->na->name,
k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
k->nkr_lease_idx, k->nkr_num_slots);
}
#endif /* CONFIG_NETMAP_DEBUG */
return lease_idx;
}
@ -682,10 +869,10 @@ nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
* number of ports, and lets us replace the learn and dispatch functions.
*/
int
nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
u_int ring_nr)
{
struct nm_bdg_q *dst_ents, *brddst;
struct nm_vale_q *dst_ents, *brddst;
uint16_t num_dsts = 0, *dsts;
struct nm_bridge *b = na->na_bdg;
u_int i, me = na->bdg_port;
@ -696,14 +883,14 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
* queues per port plus one for the broadcast traffic.
* Then we have an array of destination indexes.
*/
dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
dst_ents = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
/* first pass: find a destination for each packet in the batch */
for (i = 0; likely(i < n); i += ft[i].ft_frags) {
uint8_t dst_ring = ring_nr; /* default, same ring as origin */
uint16_t dst_port, d_i;
struct nm_bdg_q *d;
struct nm_vale_q *d;
struct nm_bdg_fwd *start_ft = NULL;
ND("slot %d frags %d", i, ft[i].ft_frags);
@ -720,7 +907,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
*/
continue;
}
dst_port = b->bdg_ops->lookup(start_ft, &dst_ring, na, b->private_data);
dst_port = b->bdg_ops.lookup(start_ft, &dst_ring, na, b->private_data);
if (netmap_verbose > 255)
RD(5, "slot %d port %d -> %d", i, me, dst_port);
if (dst_port >= NM_BDG_NOPORT)
@ -778,7 +965,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
u_int dst_nr, lim, j, d_i, next, brd_next;
u_int needed, howmany;
int retry = netmap_txsync_retry;
struct nm_bdg_q *d;
struct nm_vale_q *d;
uint32_t my_start = 0, lease_idx = 0;
int nrings;
int virt_hdr_mismatch = 0;
@ -862,7 +1049,7 @@ retry:
if (dst_na->retry && retry) {
/* try to get some free slot from the previous run */
kring->nm_notify(kring, 0);
kring->nm_notify(kring, NAF_FORCE_RECLAIM);
/* actually useful only for bwraps, since there
* the notify will trigger a txsync on the hwna. VALE ports
* have dst_na->retry == 0
@ -1030,7 +1217,7 @@ cleanup:
/* nm_txsync callback for VALE ports */
static int
netmap_vp_txsync(struct netmap_kring *kring, int flags)
netmap_vale_vp_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_vp_adapter *na =
(struct netmap_vp_adapter *)kring->na;
@ -1049,17 +1236,17 @@ netmap_vp_txsync(struct netmap_kring *kring, int flags)
if (bridge_batch > NM_BDG_BATCH)
bridge_batch = NM_BDG_BATCH;
done = nm_bdg_preflush(kring, head);
done = nm_vale_preflush(kring, head);
done:
if (done != head)
D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
nm_prerr("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
/*
* packets between 'done' and 'cur' are left unsent.
*/
kring->nr_hwcur = done;
kring->nr_hwtail = nm_prev(done, lim);
if (netmap_verbose)
D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
if (netmap_debug & NM_DEBUG_TXSYNC)
nm_prinf("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
return 0;
}
@ -1068,7 +1255,7 @@ done:
* Only persistent VALE ports have a non-null ifp.
*/
static int
netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret)
{
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
@ -1089,7 +1276,7 @@ netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
na = &vpna->up;
na->ifp = ifp;
strncpy(na->name, hdr->nr_name, sizeof(na->name));
strlcpy(na->name, hdr->nr_name, sizeof(na->name));
/* bound checking */
na->num_tx_rings = req->nr_tx_rings;
@ -1109,6 +1296,7 @@ netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
*/
nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
/* validate extra bufs */
extrabufs = req->nr_extra_bufs;
nm_bound_var(&extrabufs, 0, 0,
128*NM_BDG_MAXSLOTS, NULL);
req->nr_extra_bufs = extrabufs; /* write back */
@ -1121,7 +1309,7 @@ netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
/*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
vpna->mfs = netmap_buf_size; */
if (netmap_verbose)
D("max frame size %u", vpna->mfs);
nm_prinf("max frame size %u", vpna->mfs);
na->na_flags |= NAF_BDG_MAYSLEEP;
/* persistent VALE ports look like hw devices
@ -1129,12 +1317,12 @@ netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
*/
if (ifp)
na->na_flags |= NAF_NATIVE;
na->nm_txsync = netmap_vp_txsync;
na->nm_rxsync = netmap_vp_rxsync;
na->nm_register = netmap_vp_reg;
na->nm_krings_create = netmap_vp_krings_create;
na->nm_krings_delete = netmap_vp_krings_delete;
na->nm_dtor = netmap_vp_dtor;
na->nm_txsync = netmap_vale_vp_txsync;
na->nm_rxsync = netmap_vp_rxsync; /* use the one provided by bdg */
na->nm_register = netmap_vp_reg; /* use the one provided by bdg */
na->nm_krings_create = netmap_vale_vp_krings_create;
na->nm_krings_delete = netmap_vale_vp_krings_delete;
na->nm_dtor = netmap_vale_vp_dtor;
ND("nr_mem_id %d", req->nr_mem_id);
na->nm_mem = nmd ?
netmap_mem_get(nmd):
@ -1144,7 +1332,7 @@ netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
req->nr_extra_bufs, npipes, &error);
if (na->nm_mem == NULL)
goto err;
na->nm_bdg_attach = netmap_vp_bdg_attach;
na->nm_bdg_attach = netmap_vale_vp_bdg_attach;
/* other nmd fields are set in the common routine */
error = netmap_attach_common(na);
if (error)
@ -1163,19 +1351,16 @@ err:
* The na_vp port is this same netmap_adapter. There is no host port.
*/
static int
netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na,
netmap_vale_vp_bdg_attach(const char *name, struct netmap_adapter *na,
struct nm_bridge *b)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
if (b->bdg_ops != &vale_bdg_ops) {
return NM_NEED_BWRAP;
}
if (vpna->na_bdg) {
if ((b->bdg_flags & NM_BDG_NEED_BWRAP) || vpna->na_bdg) {
return NM_NEED_BWRAP;
}
na->na_vp = vpna;
strncpy(na->name, name, sizeof(na->name));
strlcpy(na->name, name, sizeof(na->name));
na->na_hostvp = NULL;
return 0;
}
@ -1186,12 +1371,12 @@ netmap_vale_bwrap_krings_create(struct netmap_adapter *na)
int error;
/* impersonate a netmap_vp_adapter */
error = netmap_vp_krings_create(na);
error = netmap_vale_vp_krings_create(na);
if (error)
return error;
error = netmap_bwrap_krings_create_common(na);
if (error) {
netmap_vp_krings_delete(na);
netmap_vale_vp_krings_delete(na);
}
return error;
}
@ -1200,7 +1385,7 @@ static void
netmap_vale_bwrap_krings_delete(struct netmap_adapter *na)
{
netmap_bwrap_krings_delete_common(na);
netmap_vp_krings_delete(na);
netmap_vale_vp_krings_delete(na);
}
static int
@ -1216,9 +1401,9 @@ netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
return ENOMEM;
}
na = &bna->up.up;
strncpy(na->name, nr_name, sizeof(na->name));
strlcpy(na->name, nr_name, sizeof(na->name));
na->nm_register = netmap_bwrap_reg;
na->nm_txsync = netmap_vp_txsync;
na->nm_txsync = netmap_vale_vp_txsync;
// na->nm_rxsync = netmap_bwrap_rxsync;
na->nm_krings_create = netmap_vale_bwrap_krings_create;
na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
@ -1313,7 +1498,8 @@ nm_vi_destroy(const char *name)
NMG_UNLOCK();
D("destroying a persistent vale interface %s", ifp->if_xname);
if (netmap_verbose)
nm_prinf("destroying a persistent vale interface %s", ifp->if_xname);
/* Linux requires all the references are released
* before unregister
*/
@ -1389,9 +1575,10 @@ netmap_vi_create(struct nmreq_header *hdr, int autodelete)
}
}
/* netmap_vp_create creates a struct netmap_vp_adapter */
error = netmap_vp_create(hdr, ifp, nmd, &vpna);
error = netmap_vale_vp_create(hdr, ifp, nmd, &vpna);
if (error) {
D("error %d", error);
if (netmap_debug & NM_DEBUG_VALE)
nm_prerr("error %d", error);
goto err_1;
}
/* persist-specific routines */

View File

@ -3,12 +3,12 @@
# Compile netmap as a module, useful if you want a netmap bridge
# or loadable drivers.
SYSDIR?=${SRCTOP}/sys
.include "${SYSDIR}/conf/kern.opts.mk"
.include <bsd.own.mk> # FreeBSD 10 and earlier
# .include "${SYSDIR}/conf/kern.opts.mk"
.PATH: ${SYSDIR}/dev/netmap
.PATH.h: ${SYSDIR}/net
CFLAGS += -I${SYSDIR}/ -D INET
.PATH: ${.CURDIR}/../../dev/netmap
.PATH.h: ${.CURDIR}/../../net
CFLAGS += -I${.CURDIR}/../../ -D INET -D VIMAGE
KMOD = netmap
SRCS = device_if.h bus_if.h pci_if.h opt_netmap.h
SRCS += netmap.c netmap.h netmap_kern.h
@ -20,8 +20,10 @@ SRCS += netmap_freebsd.c
SRCS += netmap_offloadings.c
SRCS += netmap_pipe.c
SRCS += netmap_monitor.c
SRCS += netmap_pt.c
SRCS += netmap_kloop.c
SRCS += netmap_legacy.c
SRCS += netmap_bdg.c
SRCS += netmap_null.c
SRCS += if_ptnet.c
SRCS += opt_inet.h opt_inet6.h

View File

@ -41,9 +41,9 @@
#ifndef _NET_NETMAP_H_
#define _NET_NETMAP_H_
#define NETMAP_API 12 /* current API version */
#define NETMAP_API 13 /* current API version */
#define NETMAP_MIN_API 11 /* min and max versions accepted */
#define NETMAP_MIN_API 13 /* min and max versions accepted */
#define NETMAP_MAX_API 15
/*
* Some fields should be cache-aligned to reduce contention.
@ -333,12 +333,17 @@ struct netmap_ring {
*/
/*
* check if space is available in the ring.
* Check if space is available in the ring. We use ring->head, which
* points to the next netmap slot to be published to netmap. It is
* possible that the applications moves ring->cur ahead of ring->tail
* (e.g., by setting ring->cur <== ring->tail), if it wants more slots
* than the ones currently available, and it wants to be notified when
* more arrive. See netmap(4) for more details and examples.
*/
static inline int
nm_ring_empty(struct netmap_ring *ring)
{
return (ring->cur == ring->tail);
return (ring->head == ring->tail);
}
/*
@ -479,6 +484,10 @@ struct nmreq_option {
* !=0: errno value
*/
uint32_t nro_status;
/* Option size, used only for options that can have variable size
* (e.g. because they contain arrays). For fixed-size options this
* field should be set to zero. */
uint64_t nro_size;
};
/* Header common to all requests. Do not reorder these fields, as we need
@ -518,12 +527,32 @@ enum {
NETMAP_REQ_VALE_POLLING_DISABLE,
/* Get info about the pools of a memory allocator. */
NETMAP_REQ_POOLS_INFO_GET,
/* Start an in-kernel loop that syncs the rings periodically or
* on notifications. The loop runs in the context of the ioctl
* syscall, and only stops on NETMAP_REQ_SYNC_KLOOP_STOP. */
NETMAP_REQ_SYNC_KLOOP_START,
/* Stops the thread executing the in-kernel loop. The thread
* returns from the ioctl syscall. */
NETMAP_REQ_SYNC_KLOOP_STOP,
/* Enable CSB mode on a registered netmap control device. */
NETMAP_REQ_CSB_ENABLE,
};
enum {
/* On NETMAP_REQ_REGISTER, ask netmap to use memory allocated
* from user-space allocated memory pools (e.g. hugepages). */
NETMAP_REQ_OPT_EXTMEM = 1,
/* ON NETMAP_REQ_SYNC_KLOOP_START, ask netmap to use eventfd-based
* notifications to synchronize the kernel loop with the application.
*/
NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS,
/* On NETMAP_REQ_REGISTER, ask netmap to work in CSB mode, where
* head, cur and tail pointers are not exchanged through the
* struct netmap_ring header, but rather using an user-provided
* memory area (see struct nm_csb_atok and struct nm_csb_ktoa). */
NETMAP_REQ_OPT_CSB,
};
/*
@ -541,6 +570,7 @@ struct nmreq_register {
uint16_t nr_mem_id; /* id of the memory allocator */
uint16_t nr_ringid; /* ring(s) we care about */
uint32_t nr_mode; /* specify NR_REG_* modes */
uint32_t nr_extra_bufs; /* number of requested extra buffers */
uint64_t nr_flags; /* additional flags (see below) */
/* monitors use nr_ringid and nr_mode to select the rings to monitor */
@ -549,9 +579,7 @@ struct nmreq_register {
#define NR_ZCOPY_MON 0x400
/* request exclusive access to the selected rings */
#define NR_EXCLUSIVE 0x800
/* request ptnetmap host support */
#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */
#define NR_PTNETMAP_HOST 0x1000
/* 0x1000 unused */
#define NR_RX_RINGS_ONLY 0x2000
#define NR_TX_RINGS_ONLY 0x4000
/* Applications set this flag if they are able to deal with virtio-net headers,
@ -564,8 +592,6 @@ struct nmreq_register {
* NETMAP_DO_RX_POLL. */
#define NR_DO_RX_POLL 0x10000
#define NR_NO_TX_POLL 0x20000
uint32_t nr_extra_bufs; /* number of requested extra buffers */
};
/* Valid values for nmreq_register.nr_mode (see above). */
@ -576,10 +602,11 @@ enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
NR_REG_ONE_NIC = 4,
NR_REG_PIPE_MASTER = 5, /* deprecated, use "x{y" port name syntax */
NR_REG_PIPE_SLAVE = 6, /* deprecated, use "x}y" port name syntax */
NR_REG_NULL = 7,
};
/* A single ioctl number is shared by all the new API command.
* Demultiplexing is done using the nr_hdr.nr_reqtype field.
* Demultiplexing is done using the hdr.nr_reqtype field.
* FreeBSD uses the size value embedded in the _IOWR to determine
* how much to copy in/out, so we define the ioctl() command
* specifying only nmreq_header, and copyin/copyout the rest. */
@ -595,16 +622,18 @@ enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
/*
* nr_reqtype: NETMAP_REQ_PORT_INFO_GET
* Get information about a netmap port, including number of rings.
* slots per ring, id of the memory allocator, etc.
* slots per ring, id of the memory allocator, etc. The netmap
* control device used for this operation does not need to be bound
* to a netmap port.
*/
struct nmreq_port_info_get {
uint64_t nr_offset; /* nifp offset in the shared region */
uint64_t nr_memsize; /* size of the shared region */
uint32_t nr_tx_slots; /* slots in tx rings */
uint32_t nr_rx_slots; /* slots in rx rings */
uint16_t nr_tx_rings; /* number of tx rings */
uint16_t nr_rx_rings; /* number of rx rings */
uint16_t nr_mem_id; /* id of the memory allocator */
uint16_t nr_mem_id; /* memory allocator id (in/out) */
uint16_t pad1;
};
#define NM_BDG_NAME "vale" /* prefix for bridge port name */
@ -620,6 +649,7 @@ struct nmreq_port_info_get {
struct nmreq_vale_attach {
struct nmreq_register reg;
uint32_t port_index;
uint32_t pad1;
};
/*
@ -630,6 +660,7 @@ struct nmreq_vale_attach {
*/
struct nmreq_vale_detach {
uint32_t port_index;
uint32_t pad1;
};
/*
@ -639,15 +670,18 @@ struct nmreq_vale_detach {
struct nmreq_vale_list {
/* Name of the VALE port (valeXXX:YYY) or empty. */
uint16_t nr_bridge_idx;
uint16_t pad1;
uint32_t nr_port_idx;
};
/*
* nr_reqtype: NETMAP_REQ_PORT_HDR_SET or NETMAP_REQ_PORT_HDR_GET
* Set the port header length.
* Set or get the port header length of the port identified by hdr.nr_name.
* The control device does not need to be bound to a netmap port.
*/
struct nmreq_port_hdr {
uint32_t nr_hdr_len;
uint32_t pad1;
};
/*
@ -660,6 +694,7 @@ struct nmreq_vale_newif {
uint16_t nr_tx_rings; /* number of tx rings */
uint16_t nr_rx_rings; /* number of rx rings */
uint16_t nr_mem_id; /* id of the memory allocator */
uint16_t pad1;
};
/*
@ -672,17 +707,20 @@ struct nmreq_vale_polling {
#define NETMAP_POLLING_MODE_MULTI_CPU 2
uint32_t nr_first_cpu_id;
uint32_t nr_num_polling_cpus;
uint32_t pad1;
};
/*
* nr_reqtype: NETMAP_REQ_POOLS_INFO_GET
* Get info about the pools of the memory allocator of the port bound
* to a given netmap control device (used i.e. by a ptnetmap-enabled
* hypervisor). The nr_hdr.nr_name field is ignored.
* Get info about the pools of the memory allocator of the netmap
* port specified by hdr.nr_name and nr_mem_id. The netmap control
* device used for this operation does not need to be bound to a netmap
* port.
*/
struct nmreq_pools_info {
uint64_t nr_memsize;
uint16_t nr_mem_id;
uint16_t nr_mem_id; /* in/out argument */
uint16_t pad1[3];
uint64_t nr_if_pool_offset;
uint32_t nr_if_pool_objtotal;
uint32_t nr_if_pool_objsize;
@ -694,14 +732,152 @@ struct nmreq_pools_info {
uint32_t nr_buf_pool_objsize;
};
/*
* nr_reqtype: NETMAP_REQ_SYNC_KLOOP_START
* Start an in-kernel loop that syncs the rings periodically or on
* notifications. The loop runs in the context of the ioctl syscall,
* and only stops on NETMAP_REQ_SYNC_KLOOP_STOP.
* The registered netmap port must be open in CSB mode.
*/
struct nmreq_sync_kloop_start {
/* Sleeping is the default synchronization method for the kloop.
* The 'sleep_us' field specifies how many microsconds to sleep for
* when there is no work to do, before doing another kloop iteration.
*/
uint32_t sleep_us;
uint32_t pad1;
};
/* A CSB entry for the application --> kernel direction. */
struct nm_csb_atok {
uint32_t head; /* AW+ KR+ the head of the appl netmap_ring */
uint32_t cur; /* AW+ KR+ the cur of the appl netmap_ring */
uint32_t appl_need_kick; /* AW+ KR+ kern --> appl notification enable */
uint32_t sync_flags; /* AW+ KR+ the flags of the appl [tx|rx]sync() */
uint32_t pad[12]; /* pad to a 64 bytes cacheline */
};
/* A CSB entry for the application <-- kernel direction. */
struct nm_csb_ktoa {
uint32_t hwcur; /* AR+ KW+ the hwcur of the kern netmap_kring */
uint32_t hwtail; /* AR+ KW+ the hwtail of the kern netmap_kring */
uint32_t kern_need_kick; /* AR+ KW+ appl-->kern notification enable */
uint32_t pad[13];
};
#ifdef __linux__
#ifdef __KERNEL__
#define nm_stst_barrier smp_wmb
#else /* !__KERNEL__ */
static inline void nm_stst_barrier(void)
{
/* A memory barrier with release semantic has the combined
* effect of a store-store barrier and a load-store barrier,
* which is fine for us. */
__atomic_thread_fence(__ATOMIC_RELEASE);
}
#endif /* !__KERNEL__ */
#elif defined(__FreeBSD__)
#ifdef _KERNEL
#define nm_stst_barrier atomic_thread_fence_rel
#else /* !_KERNEL */
static inline void nm_stst_barrier(void)
{
__atomic_thread_fence(__ATOMIC_RELEASE);
}
#endif /* !_KERNEL */
#else /* !__linux__ && !__FreeBSD__ */
#error "OS not supported"
#endif /* !__linux__ && !__FreeBSD__ */
/* Application side of sync-kloop: Write ring pointers (cur, head) to the CSB.
* This routine is coupled with sync_kloop_kernel_read(). */
static inline void
nm_sync_kloop_appl_write(struct nm_csb_atok *atok, uint32_t cur,
uint32_t head)
{
/*
* We need to write cur and head to the CSB but we cannot do it atomically.
* There is no way we can prevent the host from reading the updated value
* of one of the two and the old value of the other. However, if we make
* sure that the host never reads a value of head more recent than the
* value of cur we are safe. We can allow the host to read a value of cur
* more recent than the value of head, since in the netmap ring cur can be
* ahead of head and cur cannot wrap around head because it must be behind
* tail. Inverting the order of writes below could instead result into the
* host to think head went ahead of cur, which would cause the sync
* prologue to fail.
*
* The following memory barrier scheme is used to make this happen:
*
* Guest Host
*
* STORE(cur) LOAD(head)
* mb() <-----------> mb()
* STORE(head) LOAD(cur)
*
*/
atok->cur = cur;
nm_stst_barrier();
atok->head = head;
}
/* Application side of sync-kloop: Read kring pointers (hwcur, hwtail) from
* the CSB. This routine is coupled with sync_kloop_kernel_write(). */
static inline void
nm_sync_kloop_appl_read(struct nm_csb_ktoa *ktoa, uint32_t *hwtail,
uint32_t *hwcur)
{
/*
* We place a memory barrier to make sure that the update of hwtail never
* overtakes the update of hwcur.
* (see explanation in sync_kloop_kernel_write).
*/
*hwtail = ktoa->hwtail;
nm_stst_barrier();
*hwcur = ktoa->hwcur;
}
/*
* data for NETMAP_REQ_OPT_* options
*/
struct nmreq_opt_sync_kloop_eventfds {
struct nmreq_option nro_opt; /* common header */
/* An array of N entries for bidirectional notifications between
* the kernel loop and the application. The number of entries and
* their order must agree with the CSB arrays passed in the
* NETMAP_REQ_OPT_CSB option. Each entry contains a file descriptor
* backed by an eventfd.
*/
struct {
/* Notifier for the application --> kernel loop direction. */
int32_t ioeventfd;
/* Notifier for the kernel loop --> application direction. */
int32_t irqfd;
} eventfds[0];
};
struct nmreq_opt_extmem {
struct nmreq_option nro_opt; /* common header */
uint64_t nro_usrptr; /* (in) ptr to usr memory */
struct nmreq_pools_info nro_info; /* (in/out) */
};
struct nmreq_opt_csb {
struct nmreq_option nro_opt;
/* Array of CSB entries for application --> kernel communication
* (N entries). */
uint64_t csb_atok;
/* Array of CSB entries for kernel --> application communication
* (N entries). */
uint64_t csb_ktoa;
};
#endif /* _NET_NETMAP_H_ */

View File

@ -138,11 +138,12 @@ nm_tx_pending(struct netmap_ring *r)
return nm_ring_next(r, r->tail) != r->head;
}
/* Compute the number of slots available in the netmap ring. We use
* ring->head as explained in the comment above nm_ring_empty(). */
static inline uint32_t
nm_ring_space(struct netmap_ring *ring)
{
int ret = ring->tail - ring->cur;
int ret = ring->tail - ring->head;
if (ret < 0)
ret += ring->num_slots;
return ret;
@ -1091,18 +1092,36 @@ nm_dispatch(struct nm_desc *d, int cnt, nm_cb_t cb, u_char *arg)
ring = NETMAP_RXRING(d->nifp, ri);
for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
u_int idx, i;
u_char *oldbuf;
struct netmap_slot *slot;
if (d->hdr.buf) { /* from previous round */
cb(arg, &d->hdr, d->hdr.buf);
}
i = ring->cur;
idx = ring->slot[i].buf_idx;
slot = &ring->slot[i];
idx = slot->buf_idx;
/* d->cur_rx_ring doesn't change inside this loop, but
* set it here, so it reflects d->hdr.buf's ring */
d->cur_rx_ring = ri;
d->hdr.slot = &ring->slot[i];
d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
d->hdr.slot = slot;
oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
// __builtin_prefetch(buf);
d->hdr.len = d->hdr.caplen = ring->slot[i].len;
d->hdr.len = d->hdr.caplen = slot->len;
while (slot->flags & NS_MOREFRAG) {
u_char *nbuf;
u_int oldlen = slot->len;
i = nm_ring_next(ring, i);
slot = &ring->slot[i];
d->hdr.len += slot->len;
nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx);
if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size &&
oldlen == ring->nr_buf_size) {
d->hdr.caplen += slot->len;
oldbuf = nbuf;
} else {
oldbuf = NULL;
}
}
d->hdr.ts = ring->ts;
ring->head = ring->cur = nm_ring_next(ring, i);
}

View File

@ -1,7 +1,7 @@
/*
* Copyright (C) 2013-2016 Luigi Rizzo
* Copyright (C) 2013-2016 Giuseppe Lettieri
* Copyright (C) 2013-2016 Vincenzo Maffione
* Copyright (C) 2013-2018 Vincenzo Maffione
* Copyright (C) 2015 Stefano Garzarella
* All rights reserved.
*
@ -33,14 +33,15 @@
#define NETMAP_VIRT_H
/*
* ptnetmap_memdev: device used to expose memory into the guest VM
* Register offsets and other macros for the ptnetmap paravirtual devices:
* ptnetmap-memdev: device used to expose memory into the guest
* ptnet: paravirtualized NIC exposing a netmap port in the guest
*
* These macros are used in the hypervisor frontend (QEMU, bhyve) and in the
* guest device driver.
*/
/* PCI identifiers and PCI BARs for the ptnetmap memdev
* and ptnetmap network interface. */
/* PCI identifiers and PCI BARs for ptnetmap-memdev and ptnet. */
#define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev"
#define PTNETMAP_PCI_VENDOR_ID 0x1b36 /* QEMU virtual devices */
#define PTNETMAP_PCI_DEVICE_ID 0x000c /* memory device */
@ -49,7 +50,7 @@
#define PTNETMAP_MEM_PCI_BAR 1
#define PTNETMAP_MSIX_PCI_BAR 2
/* Registers for the ptnetmap memdev */
/* Device registers for ptnetmap-memdev */
#define PTNET_MDEV_IO_MEMSIZE_LO 0 /* netmap memory size (low) */
#define PTNET_MDEV_IO_MEMSIZE_HI 4 /* netmap_memory_size (high) */
#define PTNET_MDEV_IO_MEMID 8 /* memory allocator ID in the host */
@ -64,74 +65,10 @@
#define PTNET_MDEV_IO_BUF_POOL_OBJSZ 96
#define PTNET_MDEV_IO_END 100
/*
* ptnetmap configuration
*
* The ptnet kthreads (running in host kernel-space) need to be configured
* in order to know how to intercept guest kicks (I/O register writes) and
* how to inject MSI-X interrupts to the guest. The configuration may vary
* depending on the hypervisor. Currently, we support QEMU/KVM on Linux and
* and bhyve on FreeBSD.
* The configuration is passed by the hypervisor to the host netmap module
* by means of an ioctl() with nr_cmd=NETMAP_PT_HOST_CREATE, and it is
* specified by the ptnetmap_cfg struct. This struct contains an header
* with general informations and an array of entries whose size depends
* on the hypervisor. The NETMAP_PT_HOST_CREATE command is issued every
* time the kthreads are started.
*/
struct ptnetmap_cfg {
#define PTNETMAP_CFGTYPE_QEMU 0x1
#define PTNETMAP_CFGTYPE_BHYVE 0x2
uint16_t cfgtype; /* how to interpret the cfg entries */
uint16_t entry_size; /* size of a config entry */
uint32_t num_rings; /* number of config entries */
void *csb_gh; /* CSB for guest --> host communication */
void *csb_hg; /* CSB for host --> guest communication */
/* Configuration entries are allocated right after the struct. */
};
/* Configuration of a ptnetmap ring for QEMU. */
struct ptnetmap_cfgentry_qemu {
uint32_t ioeventfd; /* to intercept guest register access */
uint32_t irqfd; /* to inject guest interrupts */
};
/* Configuration of a ptnetmap ring for bhyve. */
struct ptnetmap_cfgentry_bhyve {
uint64_t wchan; /* tsleep() parameter, to wake up kthread */
uint32_t ioctl_fd; /* ioctl fd */
/* ioctl parameters to send irq */
uint32_t ioctl_cmd;
/* vmm.ko MSIX parameters for IOCTL */
struct {
uint64_t msg_data;
uint64_t addr;
} ioctl_data;
};
/*
* Pass a pointer to a userspace buffer to be passed to kernelspace for write
* or read. Used by NETMAP_PT_HOST_CREATE.
* XXX deprecated
*/
static inline void
nmreq_pointer_put(struct nmreq *nmr, void *userptr)
{
uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
*pp = (uintptr_t)userptr;
}
static inline void *
nmreq_pointer_get(const struct nmreq *nmr)
{
const uintptr_t *pp = (const uintptr_t *)&nmr->nr_arg1;
return (void *)*pp;
}
/* ptnetmap features */
#define PTNETMAP_F_VNET_HDR 1
/* I/O registers for the ptnet device. */
/* Device registers for the ptnet network device. */
#define PTNET_IO_PTFEAT 0
#define PTNET_IO_PTCTL 4
#define PTNET_IO_MAC_LO 8
@ -153,140 +90,11 @@ nmreq_pointer_get(const struct nmreq *nmr)
#define PTNET_IO_KICK_BASE 128
#define PTNET_IO_MASK 0xff
/* ptnetmap control commands (values for PTCTL register) */
/* ptnet control commands (values for PTCTL register):
* - CREATE starts the host sync-kloop
* - DELETE stops the host sync-kloop
*/
#define PTNETMAP_PTCTL_CREATE 1
#define PTNETMAP_PTCTL_DELETE 2
/* ptnetmap synchronization variables shared between guest and host */
struct ptnet_csb_gh {
uint32_t head; /* GW+ HR+ the head of the guest netmap_ring */
uint32_t cur; /* GW+ HR+ the cur of the guest netmap_ring */
uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */
uint32_t sync_flags; /* GW+ HR+ the flags of the guest [tx|rx]sync() */
char pad[48]; /* pad to a 64 bytes cacheline */
};
struct ptnet_csb_hg {
uint32_t hwcur; /* GR+ HW+ the hwcur of the host netmap_kring */
uint32_t hwtail; /* GR+ HW+ the hwtail of the host netmap_kring */
uint32_t host_need_kick; /* GR+ HW+ guest-->host notification enable */
char pad[4+48];
};
#ifdef WITH_PTNETMAP_GUEST
/* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */
struct ptnetmap_memdev;
int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **,
uint64_t *);
void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int);
/* Guest driver: Write kring pointers (cur, head) to the CSB.
* This routine is coupled with ptnetmap_host_read_kring_csb(). */
static inline void
ptnetmap_guest_write_kring_csb(struct ptnet_csb_gh *ptr, uint32_t cur,
uint32_t head)
{
/*
* We need to write cur and head to the CSB but we cannot do it atomically.
* There is no way we can prevent the host from reading the updated value
* of one of the two and the old value of the other. However, if we make
* sure that the host never reads a value of head more recent than the
* value of cur we are safe. We can allow the host to read a value of cur
* more recent than the value of head, since in the netmap ring cur can be
* ahead of head and cur cannot wrap around head because it must be behind
* tail. Inverting the order of writes below could instead result into the
* host to think head went ahead of cur, which would cause the sync
* prologue to fail.
*
* The following memory barrier scheme is used to make this happen:
*
* Guest Host
*
* STORE(cur) LOAD(head)
* mb() <-----------> mb()
* STORE(head) LOAD(cur)
*/
ptr->cur = cur;
mb();
ptr->head = head;
}
/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB.
* This routine is coupled with ptnetmap_host_write_kring_csb(). */
static inline void
ptnetmap_guest_read_kring_csb(struct ptnet_csb_hg *pthg, struct netmap_kring *kring)
{
/*
* We place a memory barrier to make sure that the update of hwtail never
* overtakes the update of hwcur.
* (see explanation in ptnetmap_host_write_kring_csb).
*/
kring->nr_hwtail = pthg->hwtail;
mb();
kring->nr_hwcur = pthg->hwcur;
}
#endif /* WITH_PTNETMAP_GUEST */
#ifdef WITH_PTNETMAP_HOST
/*
* ptnetmap kernel thread routines
* */
/* Functions to read and write CSB fields in the host */
#if defined (linux)
#define CSB_READ(csb, field, r) (get_user(r, &csb->field))
#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
#else /* ! linux */
#define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
#endif /* ! linux */
/* Host netmap: Write kring pointers (hwcur, hwtail) to the CSB.
* This routine is coupled with ptnetmap_guest_read_kring_csb(). */
static inline void
ptnetmap_host_write_kring_csb(struct ptnet_csb_hg __user *ptr, uint32_t hwcur,
uint32_t hwtail)
{
/*
* The same scheme used in ptnetmap_guest_write_kring_csb() applies here.
* We allow the guest to read a value of hwcur more recent than the value
* of hwtail, since this would anyway result in a consistent view of the
* ring state (and hwcur can never wraparound hwtail, since hwcur must be
* behind head).
*
* The following memory barrier scheme is used to make this happen:
*
* Guest Host
*
* STORE(hwcur) LOAD(hwtail)
* mb() <-------------> mb()
* STORE(hwtail) LOAD(hwcur)
*/
CSB_WRITE(ptr, hwcur, hwcur);
mb();
CSB_WRITE(ptr, hwtail, hwtail);
}
/* Host netmap: Read kring pointers (head, cur, sync_flags) from the CSB.
* This routine is coupled with ptnetmap_guest_write_kring_csb(). */
static inline void
ptnetmap_host_read_kring_csb(struct ptnet_csb_gh __user *ptr,
struct netmap_ring *shadow_ring,
uint32_t num_slots)
{
/*
* We place a memory barrier to make sure that the update of head never
* overtakes the update of cur.
* (see explanation in ptnetmap_guest_write_kring_csb).
*/
CSB_READ(ptr, head, shadow_ring->head);
mb();
CSB_READ(ptr, cur, shadow_ring->cur);
CSB_READ(ptr, sync_flags, shadow_ring->flags);
}
#endif /* WITH_PTNETMAP_HOST */
#endif /* NETMAP_VIRT_H */