diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 63b766bc6f0b..35d530390b25 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -120,10 +120,12 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, int netmap_drop = 0; /* debugging */ int netmap_flags = 0; /* debug flags */ +int netmap_fwd = 0; /* force transparent mode */ int netmap_copy = 0; /* debugging, copy content */ SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , ""); +SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , ""); SYSCTL_INT(_dev_netmap, OID_AUTO, copy, CTLFLAG_RW, &netmap_copy, 0 , ""); #ifdef NM_BRIDGE /* support for netmap bridge */ @@ -647,20 +649,152 @@ netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) /* * Handlers for synchronization of the queues from/to the host. - * + * Netmap has two operating modes: + * - in the default mode, the rings connected to the host stack are + * just another ring pair managed by userspace; + * - in transparent mode (XXX to be defined) incoming packets + * (from the host or the NIC) are marked as NS_FORWARD upon + * arrival, and the user application has a chance to reset the + * flag for packets that should be dropped. + * On the RXSYNC or poll(), packets in RX rings between + * kring->nr_kcur and ring->cur with NS_FORWARD still set are moved + * to the other side. + * The transfer NIC --> host is relatively easy, just encapsulate + * into mbufs and we are done. The host --> NIC side is slightly + * harder because there might not be room in the tx ring so it + * might take a while before releasing the buffer. + */ + +/* + * pass a chain of buffers to the host stack as coming from 'dst' + */ +static void +netmap_send_up(struct ifnet *dst, struct mbuf *head) +{ + struct mbuf *m; + + /* send packets up, outside the lock */ + while ((m = head) != NULL) { + head = head->m_nextpkt; + m->m_nextpkt = NULL; + if (netmap_verbose & NM_VERB_HOST) + D("sending up pkt %p size %d", m, MBUF_LEN(m)); + NM_SEND_UP(dst, m); + } +} + +struct mbq { + struct mbuf *head; + struct mbuf *tail; + int count; +}; + +/* + * put a copy of the buffers marked NS_FORWARD into an mbuf chain. + * Run from hwcur to cur - reserved + */ +static void +netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force) +{ + /* Take packets from hwcur to cur-reserved and pass them up. + * In case of no buffers we give up. At the end of the loop, + * the queue is drained in all cases. + * XXX handle reserved + */ + int k = kring->ring->cur - kring->ring->reserved; + u_int n, lim = kring->nkr_num_slots - 1; + struct mbuf *m, *tail = q->tail; + + if (k < 0) + k = k + kring->nkr_num_slots; + for (n = kring->nr_hwcur; n != k;) { + struct netmap_slot *slot = &kring->ring->slot[n]; + + n = (n == lim) ? 0 : n + 1; + if ((slot->flags & NS_FORWARD) == 0 && !force) + continue; + if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) { + D("bad pkt at %d len %d", n, slot->len); + continue; + } + slot->flags &= ~NS_FORWARD; // XXX needed ? + m = m_devget(NMB(slot), slot->len, 0, kring->na->ifp, NULL); + + if (m == NULL) + break; + if (tail) + tail->m_nextpkt = m; + else + q->head = m; + tail = m; + q->count++; + m->m_nextpkt = NULL; + } + q->tail = tail; +} + +/* + * called under main lock to send packets from the host to the NIC + * The host ring has packets from nr_hwcur to (cur - reserved) + * to be sent down. We scan the tx rings, which have just been + * flushed so nr_hwcur == cur. Pushing packets down means + * increment cur and decrement avail. + * XXX to be verified + */ +static void +netmap_sw_to_nic(struct netmap_adapter *na) +{ + struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings]; + struct netmap_kring *k1 = &na->tx_rings[0]; + int i, howmany, src_lim, dst_lim; + + howmany = kring->nr_hwavail; /* XXX otherwise cur - reserved - nr_hwcur */ + + src_lim = kring->nkr_num_slots; + for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) { + ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail); + dst_lim = k1->nkr_num_slots; + while (howmany > 0 && k1->ring->avail > 0) { + struct netmap_slot *src, *dst, tmp; + src = &kring->ring->slot[kring->nr_hwcur]; + dst = &k1->ring->slot[k1->ring->cur]; + tmp = *src; + src->buf_idx = dst->buf_idx; + src->flags = NS_BUF_CHANGED; + + dst->buf_idx = tmp.buf_idx; + dst->len = tmp.len; + dst->flags = NS_BUF_CHANGED; + ND("out len %d buf %d from %d to %d", + dst->len, dst->buf_idx, + kring->nr_hwcur, k1->ring->cur); + + if (++kring->nr_hwcur >= src_lim) + kring->nr_hwcur = 0; + howmany--; + kring->nr_hwavail--; + if (++k1->ring->cur >= dst_lim) + k1->ring->cur = 0; + k1->ring->avail--; + } + kring->ring->cur = kring->nr_hwcur; // XXX + k1++; + } +} + +/* * netmap_sync_to_host() passes packets up. We are called from a * system call in user process context, and the only contention * can be among multiple user threads erroneously calling - * this routine concurrently. In principle we should not even - * need to lock. + * this routine concurrently. */ static void netmap_sync_to_host(struct netmap_adapter *na) { struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings]; struct netmap_ring *ring = kring->ring; - struct mbuf *head = NULL, *tail = NULL, *m; - u_int k, n, lim = kring->nkr_num_slots - 1; + u_int k, lim = kring->nkr_num_slots - 1; + struct mbq q = { NULL, NULL }; k = ring->cur; if (k > lim) { @@ -673,37 +807,12 @@ netmap_sync_to_host(struct netmap_adapter *na) * In case of no buffers we give up. At the end of the loop, * the queue is drained in all cases. */ - for (n = kring->nr_hwcur; n != k;) { - struct netmap_slot *slot = &ring->slot[n]; - - n = (n == lim) ? 0 : n + 1; - if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) { - D("bad pkt at %d len %d", n, slot->len); - continue; - } - m = m_devget(NMB(slot), slot->len, 0, na->ifp, NULL); - - if (m == NULL) - break; - if (tail) - tail->m_nextpkt = m; - else - head = m; - tail = m; - m->m_nextpkt = NULL; - } + netmap_grab_packets(kring, &q, 1); kring->nr_hwcur = k; kring->nr_hwavail = ring->avail = lim; // na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0); - /* send packets up, outside the lock */ - while ((m = head) != NULL) { - head = head->m_nextpkt; - m->m_nextpkt = NULL; - if (netmap_verbose & NM_VERB_HOST) - D("sending up pkt %p size %d", m, MBUF_LEN(m)); - NM_SEND_UP(na->ifp, m); - } + netmap_send_up(na->ifp, q.head); } /* @@ -1236,7 +1345,8 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) struct ifnet *ifp; struct netmap_kring *kring; u_int core_lock, i, check_all, want_tx, want_rx, revents = 0; - u_int lim_tx, lim_rx; + u_int lim_tx, lim_rx, host_forwarded = 0; + struct mbq q = { NULL, NULL, 0 }; enum {NO_CL, NEED_CL, LOCKED_CL }; /* see below */ void *pwait = dev; /* linux compatibility */ @@ -1284,6 +1394,17 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) return (revents); } + /* if we are in transparent mode, check also the host rx ring */ + kring = &na->rx_rings[lim_rx]; + if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all + && want_rx + && (netmap_fwd || kring->ring->flags & NR_FORWARD) ) { + if (kring->ring->avail == 0) + netmap_sync_from_host(na, td, dev); + if (kring->ring->avail > 0) + revents |= want_rx; + } + /* * check_all is set if the card has more than one queue and * the client is polling all of them. If true, we sleep on @@ -1359,6 +1480,7 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) * to avoid that the tx rings stall). */ if (priv->np_txpoll || want_tx) { +flush_tx: for (i = priv->np_qfirst; i < lim_tx; i++) { kring = &na->tx_rings[i]; /* @@ -1411,6 +1533,11 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) } if (na->separate_locks) na->nm_lock(ifp, NETMAP_RX_LOCK, i); + if (netmap_fwd ||kring->ring->flags & NR_FORWARD) { + ND(10, "forwarding some buffers up %d to %d", + kring->nr_hwcur, kring->ring->cur); + netmap_grab_packets(kring, &q, netmap_fwd); + } if (na->nm_rxsync(ifp, i, 0 /* no lock */)) revents |= POLLERR; @@ -1433,8 +1560,28 @@ netmap_poll(struct cdev *dev, int events, struct thread *td) if (want_rx) selrecord(td, &na->rx_si); } + + /* forward host to the netmap ring */ + kring = &na->rx_rings[lim_rx]; + if (kring->nr_hwavail > 0) + ND("host rx %d has %d packets", lim_rx, kring->nr_hwavail); + if ( (priv->np_qlast == NETMAP_HW_RING) // XXX check_all + && (netmap_fwd || kring->ring->flags & NR_FORWARD) + && kring->nr_hwavail > 0 && !host_forwarded) { + if (core_lock == NEED_CL) { + na->nm_lock(ifp, NETMAP_CORE_LOCK, 0); + core_lock = LOCKED_CL; + } + netmap_sw_to_nic(na); + host_forwarded = 1; /* prevent another pass */ + want_rx = 0; + goto flush_tx; + } + if (core_lock == LOCKED_CL) na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0); + if (q.head) + netmap_send_up(na->ifp, q.head); return (revents); } @@ -1586,7 +1733,8 @@ netmap_start(struct ifnet *ifp, struct mbuf *m) goto done; /* no space */ } if (len > NETMAP_BUF_SIZE) { - D("drop packet size %d > %d", len, NETMAP_BUF_SIZE); + D("%s from_host, drop packet size %d > %d", ifp->if_xname, + len, NETMAP_BUF_SIZE); goto done; /* too long for us */ } @@ -1597,6 +1745,7 @@ netmap_start(struct ifnet *ifp, struct mbuf *m) slot = &kring->ring->slot[i]; m_copydata(m, 0, len, NMB(slot)); slot->len = len; + slot->flags = kring->nkr_slot_flags; kring->nr_hwavail++; if (netmap_verbose & NM_VERB_HOST) D("wake up host ring %s %d", na->ifp->if_xname, na->num_rx_rings);