update the netmap example programs merging some common code in nm_util.c

pkt-gen now implements several functions (unlimited transmit, receive,
ping-pong) and can operate on a 'tap' device.
This commit is contained in:
Luigi Rizzo 2013-02-17 04:43:22 +00:00
parent 17001e0b94
commit f8e4e36a0a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=246896
6 changed files with 1368 additions and 933 deletions

View File

@ -5,7 +5,7 @@
# we can just define 'progs' and create custom targets.
PROGS = pkt-gen bridge testpcap libnetmap.so
CLEANFILES = $(PROGS) pcap.o
CLEANFILES = $(PROGS) pcap.o nm_util.o
NO_MAN=
CFLAGS += -Werror -Wall -nostdinc -I/usr/include -I../../../sys
CFLAGS += -Wextra
@ -17,9 +17,12 @@ LDFLAGS += -lpthread -lpcap
all: $(PROGS)
pkt-gen bridge: nm_util.o
$(CC) $(CFLAGS) -o ${.TARGET} ${.TARGET:=.c} nm_util.o $(LDFLAGS)
testpcap: pcap.c libnetmap.so
$(CC) $(CFLAGS) -L. -lnetmap -o ${.TARGET} pcap.c
$(CC) $(CFLAGS) -DTEST -L. -lnetmap -o ${.TARGET} pcap.c
libnetmap.so: pcap.c
libnetmap.so: pcap.c nm_util.c
$(CC) $(CFLAGS) -fpic -c ${.ALLSRC}
$(CC) -shared -o ${.TARGET} ${.ALLSRC:.c=.o}

View File

@ -9,195 +9,24 @@
* $FreeBSD$
*/
#include <errno.h>
#include <signal.h> /* signal */
#include <stdlib.h>
#include <stdio.h>
#include <string.h> /* strcmp */
#include <fcntl.h> /* open */
#include <unistd.h> /* close */
#include "nm_util.h"
#include <sys/endian.h> /* le64toh */
#include <sys/mman.h> /* PROT_* */
#include <sys/ioctl.h> /* ioctl */
#include <machine/param.h>
#include <sys/poll.h>
#include <sys/socket.h> /* sockaddr.. */
#include <arpa/inet.h> /* ntohs */
#include <net/if.h> /* ifreq */
#include <net/ethernet.h>
#include <net/netmap.h>
#include <net/netmap_user.h>
#include <netinet/in.h> /* sockaddr_in */
#define MIN(a, b) ((a) < (b) ? (a) : (b))
int verbose = 0;
/* debug support */
#define ND(format, ...) {}
#define D(format, ...) do { \
if (!verbose) break; \
struct timeval _xxts; \
gettimeofday(&_xxts, NULL); \
fprintf(stderr, "%03d.%06d %s [%d] " format "\n", \
(int)_xxts.tv_sec %1000, (int)_xxts.tv_usec, \
__FUNCTION__, __LINE__, ##__VA_ARGS__); \
} while (0)
char *version = "$Id: bridge.c 10857 2012-04-06 12:18:22Z luigi $";
char *version = "$Id: bridge.c 12016 2013-01-23 17:24:22Z luigi $";
static int do_abort = 0;
/*
* info on a ring we handle
*/
struct my_ring {
const char *ifname;
int fd;
char *mem; /* userspace mmap address */
u_int memsize;
u_int queueid;
u_int begin, end; /* first..last+1 rings to check */
struct netmap_if *nifp;
struct netmap_ring *tx, *rx; /* shortcuts */
uint32_t if_flags;
uint32_t if_reqcap;
uint32_t if_curcap;
};
static void
sigint_h(__unused int sig)
sigint_h(int sig)
{
(void)sig; /* UNUSED */
do_abort = 1;
signal(SIGINT, SIG_DFL);
}
static int
do_ioctl(struct my_ring *me, unsigned long what)
{
struct ifreq ifr;
int error;
bzero(&ifr, sizeof(ifr));
strncpy(ifr.ifr_name, me->ifname, sizeof(ifr.ifr_name));
switch (what) {
case SIOCSIFFLAGS:
ifr.ifr_flagshigh = me->if_flags >> 16;
ifr.ifr_flags = me->if_flags & 0xffff;
break;
case SIOCSIFCAP:
ifr.ifr_reqcap = me->if_reqcap;
ifr.ifr_curcap = me->if_curcap;
break;
}
error = ioctl(me->fd, what, &ifr);
if (error) {
D("ioctl error 0x%lx", what);
return error;
}
switch (what) {
case SIOCGIFFLAGS:
me->if_flags = (ifr.ifr_flagshigh << 16) |
(0xffff & ifr.ifr_flags);
if (verbose)
D("flags are 0x%x", me->if_flags);
break;
case SIOCGIFCAP:
me->if_reqcap = ifr.ifr_reqcap;
me->if_curcap = ifr.ifr_curcap;
if (verbose)
D("curcap are 0x%x", me->if_curcap);
break;
}
return 0;
}
/*
* open a device. if me->mem is null then do an mmap.
*/
static int
netmap_open(struct my_ring *me, int ringid)
{
int fd, err, l;
struct nmreq req;
me->fd = fd = open("/dev/netmap", O_RDWR);
if (fd < 0) {
D("Unable to open /dev/netmap");
return (-1);
}
bzero(&req, sizeof(req));
strncpy(req.nr_name, me->ifname, sizeof(req.nr_name));
req.nr_ringid = ringid;
req.nr_version = NETMAP_API;
err = ioctl(fd, NIOCGINFO, &req);
if (err) {
D("cannot get info on %s", me->ifname);
goto error;
}
me->memsize = l = req.nr_memsize;
if (verbose)
D("memsize is %d MB", l>>20);
err = ioctl(fd, NIOCREGIF, &req);
if (err) {
D("Unable to register %s", me->ifname);
goto error;
}
if (me->mem == NULL) {
me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
if (me->mem == MAP_FAILED) {
D("Unable to mmap");
me->mem = NULL;
goto error;
}
}
me->nifp = NETMAP_IF(me->mem, req.nr_offset);
me->queueid = ringid;
if (ringid & NETMAP_SW_RING) {
me->begin = req.nr_rx_rings;
me->end = me->begin + 1;
me->tx = NETMAP_TXRING(me->nifp, req.nr_tx_rings);
me->rx = NETMAP_RXRING(me->nifp, req.nr_rx_rings);
} else if (ringid & NETMAP_HW_RING) {
D("XXX check multiple threads");
me->begin = ringid & NETMAP_RING_MASK;
me->end = me->begin + 1;
me->tx = NETMAP_TXRING(me->nifp, me->begin);
me->rx = NETMAP_RXRING(me->nifp, me->begin);
} else {
me->begin = 0;
me->end = req.nr_rx_rings; // XXX max of the two
me->tx = NETMAP_TXRING(me->nifp, 0);
me->rx = NETMAP_RXRING(me->nifp, 0);
}
return (0);
error:
close(me->fd);
return -1;
}
static int
netmap_close(struct my_ring *me)
{
D("");
if (me->mem)
munmap(me->mem, me->memsize);
ioctl(me->fd, NIOCUNREGIF, NULL);
close(me->fd);
return (0);
}
/*
* move up to 'limit' pkts from rxring to txring swapping buffers.
*/
@ -237,7 +66,7 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
if (rs->len < 14 || rs->len > 2048)
D("wrong len %d rx[%d] -> tx[%d]", rs->len, j, k);
else if (verbose > 1)
D("send len %d rx[%d] -> tx[%d]", rs->len, j, k);
D("%s send len %d rx[%d] -> tx[%d]", msg, rs->len, j, k);
ts->len = rs->len;
/* report the buffer change. */
@ -251,7 +80,7 @@ process_rings(struct netmap_ring *rxring, struct netmap_ring *txring,
rxring->cur = j;
txring->cur = k;
if (verbose && m > 0)
D("sent %d packets to %p", m, txring);
D("%s sent %d packets to %p", msg, m, txring);
return (m);
}
@ -287,7 +116,7 @@ move(struct my_ring *src, struct my_ring *dst, u_int limit)
* how many packets on this set of queues ?
*/
static int
howmany(struct my_ring *me, int tx)
pkt_queued(struct my_ring *me, int tx)
{
u_int i, tot = 0;
@ -337,6 +166,7 @@ main(int argc, char **argv)
while ( (ch = getopt(argc, argv, "b:i:vw:")) != -1) {
switch (ch) {
default:
D("bad option %c %s", ch, optarg);
usage();
break;
@ -361,6 +191,7 @@ main(int argc, char **argv)
}
}
argc -= optind;
argv += optind;
@ -394,44 +225,12 @@ main(int argc, char **argv)
/* two different interfaces. Take all rings on if1 */
i = 0; // all hw rings
}
if (netmap_open(me, i))
if (netmap_open(me, i, 1))
return (1);
me[1].mem = me[0].mem; /* copy the pointer, so only one mmap */
if (netmap_open(me+1, 0))
if (netmap_open(me+1, 0, 1))
return (1);
/* if bridging two interfaces, set promisc mode */
if (i != NETMAP_SW_RING) {
do_ioctl(me, SIOCGIFFLAGS);
if ((me[0].if_flags & IFF_UP) == 0) {
D("%s is down, bringing up...", me[0].ifname);
me[0].if_flags |= IFF_UP;
}
me[0].if_flags |= IFF_PPROMISC;
do_ioctl(me, SIOCSIFFLAGS);
do_ioctl(me+1, SIOCGIFFLAGS);
me[1].if_flags |= IFF_PPROMISC;
do_ioctl(me+1, SIOCSIFFLAGS);
/* also disable checksums etc. */
do_ioctl(me, SIOCGIFCAP);
me[0].if_reqcap = me[0].if_curcap;
me[0].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
do_ioctl(me+0, SIOCSIFCAP);
}
do_ioctl(me+1, SIOCGIFFLAGS);
if ((me[1].if_flags & IFF_UP) == 0) {
D("%s is down, bringing up...", me[1].ifname);
me[1].if_flags |= IFF_UP;
}
do_ioctl(me+1, SIOCSIFFLAGS);
do_ioctl(me+1, SIOCGIFCAP);
me[1].if_reqcap = me[1].if_curcap;
me[1].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
do_ioctl(me+1, SIOCSIFCAP);
/* setup poll(2) variables. */
memset(pollfd, 0, sizeof(pollfd));
for (i = 0; i < 2; i++) {
@ -451,8 +250,8 @@ main(int argc, char **argv)
int n0, n1, ret;
pollfd[0].events = pollfd[1].events = 0;
pollfd[0].revents = pollfd[1].revents = 0;
n0 = howmany(me, 0);
n1 = howmany(me + 1, 0);
n0 = pkt_queued(me, 0);
n1 = pkt_queued(me + 1, 0);
if (n0)
pollfd[1].events |= POLLOUT;
else
@ -468,14 +267,14 @@ main(int argc, char **argv)
ret <= 0 ? "timeout" : "ok",
pollfd[0].events,
pollfd[0].revents,
howmany(me, 0),
pkt_queued(me, 0),
me[0].rx->cur,
howmany(me, 1),
pkt_queued(me, 1),
pollfd[1].events,
pollfd[1].revents,
howmany(me+1, 0),
pkt_queued(me+1, 0),
me[1].rx->cur,
howmany(me+1, 1)
pkt_queued(me+1, 1)
);
if (ret < 0)
continue;

View File

@ -0,0 +1,251 @@
/*
* Copyright (C) 2012 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD$
* $Id$
*
* utilities to use netmap devices.
* This does the basic functions of opening a device and issuing
* ioctls()
*/
#include "nm_util.h"
extern int verbose;
int
nm_do_ioctl(struct my_ring *me, u_long what, int subcmd)
{
struct ifreq ifr;
int error;
#if defined( __FreeBSD__ ) || defined (__APPLE__)
int fd = me->fd;
#endif
#ifdef linux
struct ethtool_value eval;
int fd;
fd = socket(AF_INET, SOCK_DGRAM, 0);
if (fd < 0) {
printf("Error: cannot get device control socket.\n");
return -1;
}
#endif /* linux */
(void)subcmd; // unused
bzero(&ifr, sizeof(ifr));
strncpy(ifr.ifr_name, me->ifname, sizeof(ifr.ifr_name));
switch (what) {
case SIOCSIFFLAGS:
#ifndef __APPLE__
ifr.ifr_flagshigh = me->if_flags >> 16;
#endif
ifr.ifr_flags = me->if_flags & 0xffff;
break;
#if defined( __FreeBSD__ )
case SIOCSIFCAP:
ifr.ifr_reqcap = me->if_reqcap;
ifr.ifr_curcap = me->if_curcap;
break;
#endif
#ifdef linux
case SIOCETHTOOL:
eval.cmd = subcmd;
eval.data = 0;
ifr.ifr_data = (caddr_t)&eval;
break;
#endif /* linux */
}
error = ioctl(fd, what, &ifr);
if (error)
goto done;
switch (what) {
case SIOCGIFFLAGS:
#ifndef __APPLE__
me->if_flags = (ifr.ifr_flagshigh << 16) |
(0xffff & ifr.ifr_flags);
#endif
if (verbose)
D("flags are 0x%x", me->if_flags);
break;
#if defined( __FreeBSD__ )
case SIOCGIFCAP:
me->if_reqcap = ifr.ifr_reqcap;
me->if_curcap = ifr.ifr_curcap;
if (verbose)
D("curcap are 0x%x", me->if_curcap);
break;
#endif /* __FreeBSD__ */
}
done:
#ifdef linux
close(fd);
#endif
if (error)
D("ioctl error %d %lu", error, what);
return error;
}
/*
* open a device. if me->mem is null then do an mmap.
* Returns the file descriptor.
* The extra flag checks configures promisc mode.
*/
int
netmap_open(struct my_ring *me, int ringid, int promisc)
{
int fd, err, l;
struct nmreq req;
me->fd = fd = open("/dev/netmap", O_RDWR);
if (fd < 0) {
D("Unable to open /dev/netmap");
return (-1);
}
bzero(&req, sizeof(req));
req.nr_version = NETMAP_API;
strncpy(req.nr_name, me->ifname, sizeof(req.nr_name));
req.nr_ringid = ringid;
err = ioctl(fd, NIOCGINFO, &req);
if (err) {
D("cannot get info on %s, errno %d ver %d",
me->ifname, errno, req.nr_version);
goto error;
}
me->memsize = l = req.nr_memsize;
if (verbose)
D("memsize is %d MB", l>>20);
err = ioctl(fd, NIOCREGIF, &req);
if (err) {
D("Unable to register %s", me->ifname);
goto error;
}
if (me->mem == NULL) {
me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
if (me->mem == MAP_FAILED) {
D("Unable to mmap");
me->mem = NULL;
goto error;
}
}
/* Set the operating mode. */
if (ringid != NETMAP_SW_RING) {
nm_do_ioctl(me, SIOCGIFFLAGS, 0);
if ((me[0].if_flags & IFF_UP) == 0) {
D("%s is down, bringing up...", me[0].ifname);
me[0].if_flags |= IFF_UP;
}
if (promisc) {
me[0].if_flags |= IFF_PPROMISC;
nm_do_ioctl(me, SIOCSIFFLAGS, 0);
}
#ifdef __FreeBSD__
/* also disable checksums etc. */
nm_do_ioctl(me, SIOCGIFCAP, 0);
me[0].if_reqcap = me[0].if_curcap;
me[0].if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
nm_do_ioctl(me+0, SIOCSIFCAP, 0);
#endif
#ifdef linux
/* disable:
* - generic-segmentation-offload
* - tcp-segmentation-offload
* - rx-checksumming
* - tx-checksumming
* XXX check how to set back the caps.
*/
nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_SGSO);
nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_STSO);
nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_SRXCSUM);
nm_do_ioctl(me, SIOCETHTOOL, ETHTOOL_STXCSUM);
#endif /* linux */
}
me->nifp = NETMAP_IF(me->mem, req.nr_offset);
me->queueid = ringid;
if (ringid & NETMAP_SW_RING) {
me->begin = req.nr_rx_rings;
me->end = me->begin + 1;
me->tx = NETMAP_TXRING(me->nifp, req.nr_tx_rings);
me->rx = NETMAP_RXRING(me->nifp, req.nr_rx_rings);
} else if (ringid & NETMAP_HW_RING) {
D("XXX check multiple threads");
me->begin = ringid & NETMAP_RING_MASK;
me->end = me->begin + 1;
me->tx = NETMAP_TXRING(me->nifp, me->begin);
me->rx = NETMAP_RXRING(me->nifp, me->begin);
} else {
me->begin = 0;
me->end = req.nr_rx_rings; // XXX max of the two
me->tx = NETMAP_TXRING(me->nifp, 0);
me->rx = NETMAP_RXRING(me->nifp, 0);
}
return (0);
error:
close(me->fd);
return -1;
}
int
netmap_close(struct my_ring *me)
{
D("");
if (me->mem)
munmap(me->mem, me->memsize);
ioctl(me->fd, NIOCUNREGIF, NULL);
close(me->fd);
return (0);
}
/*
* how many packets on this set of queues ?
*/
int
pkt_queued(struct my_ring *me, int tx)
{
u_int i, tot = 0;
ND("me %p begin %d end %d", me, me->begin, me->end);
for (i = me->begin; i < me->end; i++) {
struct netmap_ring *ring = tx ?
NETMAP_TXRING(me->nifp, i) : NETMAP_RXRING(me->nifp, i);
tot += ring->avail;
}
if (0 && verbose && tot && !tx)
D("ring %s %s %s has %d avail at %d",
me->ifname, tx ? "tx": "rx",
me->end >= me->nifp->ni_tx_rings ? // XXX who comes first ?
"host":"net",
tot, NETMAP_TXRING(me->nifp, me->begin)->cur);
return tot;
}

View File

@ -0,0 +1,183 @@
/*
* Copyright (C) 2012 Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* $FreeBSD$
* $Id$
*
* Some utilities to build netmap-based programs.
*/
#ifndef _NM_UTIL_H
#define _NM_UTIL_H
#include <errno.h>
#include <signal.h> /* signal */
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h> /* PRI* macros */
#include <string.h> /* strcmp */
#include <fcntl.h> /* open */
#include <unistd.h> /* close */
#include <ifaddrs.h> /* getifaddrs */
#include <sys/mman.h> /* PROT_* */
#include <sys/ioctl.h> /* ioctl */
#include <sys/poll.h>
#include <sys/socket.h> /* sockaddr.. */
#include <arpa/inet.h> /* ntohs */
#include <sys/param.h>
#include <sys/sysctl.h> /* sysctl */
#include <sys/time.h> /* timersub */
#include <net/ethernet.h>
#include <net/if.h> /* ifreq */
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <net/netmap.h>
#include <net/netmap_user.h>
#ifndef MY_PCAP /* use the system's pcap if available */
#ifdef NO_PCAP
#define PCAP_ERRBUF_SIZE 512
typedef void pcap_t;
struct pcap_pkthdr;
#define pcap_inject(a,b,c) ((void)a, (void)b, (void)c, -1)
#define pcap_dispatch(a, b, c, d) (void)c
#define pcap_open_live(a, b, c, d, e) ((void)e, NULL)
#else /* !NO_PCAP */
#include <pcap/pcap.h> // XXX do we need it ?
#endif /* !NO_PCAP */
#endif // XXX hack
#include <pthread.h> /* pthread_* */
#ifdef linux
#define ifr_flagshigh ifr_flags
#define ifr_curcap ifr_flags
#define ifr_reqcap ifr_flags
#define IFF_PPROMISC IFF_PROMISC
#include <linux/ethtool.h>
#include <linux/sockios.h>
#define CLOCK_REALTIME_PRECISE CLOCK_REALTIME
#include <netinet/ether.h> /* ether_aton */
#include <linux/if_packet.h> /* sockaddr_ll */
#endif /* linux */
#ifdef __FreeBSD__
#include <sys/endian.h> /* le64toh */
#include <machine/param.h>
#include <pthread_np.h> /* pthread w/ affinity */
#include <sys/cpuset.h> /* cpu_set */
#include <net/if_dl.h> /* LLADDR */
#endif /* __FreeBSD__ */
#ifdef __APPLE__
#define ifr_flagshigh ifr_flags // XXX
#define IFF_PPROMISC IFF_PROMISC
#include <net/if_dl.h> /* LLADDR */
#define clock_gettime(a,b) \
do {struct timespec t0 = {0,0}; *(b) = t0; } while (0)
#endif /* __APPLE__ */
static inline int min(int a, int b) { return a < b ? a : b; }
extern int time_second;
/* debug support */
#define ND(format, ...) do {} while(0)
#define D(format, ...) \
fprintf(stderr, "%s [%d] " format "\n", \
__FUNCTION__, __LINE__, ##__VA_ARGS__)
#define RD(lps, format, ...) \
do { \
static int t0, cnt; \
if (t0 != time_second) { \
t0 = time_second; \
cnt = 0; \
} \
if (cnt++ < lps) \
D(format, ##__VA_ARGS__); \
} while (0)
// XXX does it work on 32-bit machines ?
static inline void prefetch (const void *x)
{
__asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x));
}
// XXX only for multiples of 64 bytes, non overlapped.
static inline void
pkt_copy(const void *_src, void *_dst, int l)
{
const uint64_t *src = _src;
uint64_t *dst = _dst;
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
if (unlikely(l >= 1024)) {
bcopy(src, dst, l);
return;
}
for (; l > 0; l-=64) {
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
/*
* info on a ring we handle
*/
struct my_ring {
const char *ifname;
int fd;
char *mem; /* userspace mmap address */
u_int memsize;
u_int queueid;
u_int begin, end; /* first..last+1 rings to check */
struct netmap_if *nifp;
struct netmap_ring *tx, *rx; /* shortcuts */
uint32_t if_flags;
uint32_t if_reqcap;
uint32_t if_curcap;
};
int netmap_open(struct my_ring *me, int ringid, int promisc);
int netmap_close(struct my_ring *me);
int nm_do_ioctl(struct my_ring *me, u_long what, int subcmd);
#endif /* _NM_UTIL_H */

View File

@ -1,5 +1,5 @@
/*
* (C) 2011 Luigi Rizzo
* (C) 2011-2012 Luigi Rizzo
*
* BSD license
*
@ -10,81 +10,18 @@
* $FreeBSD$
*/
#include <errno.h>
#include <signal.h> /* signal */
#include <stdlib.h>
#include <stdio.h>
#include <string.h> /* strcmp */
#include <fcntl.h> /* open */
#include <unistd.h> /* close */
#define MY_PCAP
#include "nm_util.h"
#include <sys/endian.h> /* le64toh */
#include <sys/mman.h> /* PROT_* */
#include <sys/ioctl.h> /* ioctl */
#include <machine/param.h>
#include <sys/poll.h>
#include <sys/socket.h> /* sockaddr.. */
#include <arpa/inet.h> /* ntohs */
#include <net/if.h> /* ifreq */
#include <net/ethernet.h>
#include <net/netmap.h>
#include <net/netmap_user.h>
#include <netinet/in.h> /* sockaddr_in */
#include <sys/socket.h>
#include <ifaddrs.h>
#define MIN(a, b) ((a) < (b) ? (a) : (b))
const char *version = "$Id$";
char *version = "$Id: pcap.c 11463 2012-07-30 15:26:02Z luigi $";
int verbose = 0;
/* debug support */
#define ND(format, ...) do {} while (0)
#define D(format, ...) do { \
if (verbose) \
fprintf(stderr, "--- %s [%d] " format "\n", \
__FUNCTION__, __LINE__, ##__VA_ARGS__); \
} while (0)
static inline void prefetch (const void *x)
{
__asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x));
}
// XXX only for multiples of 64 bytes, non overlapped.
static inline void
pkt_copy(const void *_src, void *_dst, int l)
{
const uint64_t *src = _src;
uint64_t *dst = _dst;
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
if (unlikely(l >= 1024)) {
bcopy(src, dst, l);
return;
}
for (; l > 0; l-=64) {
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
/*
* We redefine here a number of structures that are in pcap.h
* so we can compile this file without the system header.
*/
#ifndef PCAP_ERRBUF_SIZE
#define PCAP_ERRBUF_SIZE 128
/*
* Each packet is accompanied by a header including the timestamp,
* captured size and actual size.
@ -135,12 +72,13 @@ typedef enum {
PCAP_D_OUT
} pcap_direction_t;
struct bpf_program;
typedef void (*pcap_handler)(u_char *user,
const struct pcap_pkthdr *h, const u_char *bytes);
char errbuf[PCAP_ERRBUF_SIZE];
pcap_t *pcap_open_live(const char *device, int snaplen,
int promisc, int to_ms, char *errbuf);
@ -154,24 +92,6 @@ char *pcap_lookupdev(char *errbuf);
int pcap_inject(pcap_t *p, const void *buf, size_t size);
int pcap_fileno(pcap_t *p);
const char *pcap_lib_version(void);
void pcap_freealldevs(pcap_if_t *);
pcap_t *pcap_create(const char *, char *);
int pcap_activate(pcap_t *);
int pcap_can_set_rfmon(pcap_t *);
int pcap_set_snaplen(pcap_t *, int);
int pcap_snapshot(pcap_t *);
int pcap_lookupnet(const char *, uint32_t *, uint32_t *, char *);
int pcap_set_promisc(pcap_t *, int);
int pcap_set_timeout(pcap_t *, int);
int pcap_compile(pcap_t *, struct bpf_program *, const char *, int,
uint32_t);
int pcap_setfilter(pcap_t *, struct bpf_program *);
int pcap_datalink(pcap_t *);
const char *pcap_datalink_val_to_name(int);
const char *pcap_datalink_val_to_description(int);
int pcap_stats(pcap_t *, struct pcap_stat *);
int pcap_loop(pcap_t *, int, pcap_handler, u_char *);
char *pcap_geterr(pcap_t *);
struct eproto {
@ -180,7 +100,7 @@ struct eproto {
};
#endif /* !PCAP_ERRBUF_SIZE */
#ifdef __PIC__
#ifndef TEST
/*
* build as a shared library
*/
@ -190,8 +110,12 @@ char pcap_version[] = "libnetmap version 0.3";
/*
* Our equivalent of pcap_t
*/
struct my_ring {
struct nmreq nmr;
struct pcap_ring {
struct my_ring me;
#if 0
const char *ifname;
//struct nmreq nmr;
int fd;
char *mem; /* userspace mmap address */
@ -200,6 +124,10 @@ struct my_ring {
u_int begin, end; /* first..last+1 rings to check */
struct netmap_if *nifp;
uint32_t if_flags;
uint32_t if_reqcap;
uint32_t if_curcap;
#endif
int snaplen;
char *errbuf;
int promisc;
@ -207,9 +135,6 @@ struct my_ring {
struct pcap_pkthdr hdr;
uint32_t if_flags;
uint32_t if_reqcap;
uint32_t if_curcap;
struct pcap_stat st;
@ -217,114 +142,6 @@ struct my_ring {
};
static int
do_ioctl(struct my_ring *me, unsigned long what)
{
struct ifreq ifr;
int error;
bzero(&ifr, sizeof(ifr));
strncpy(ifr.ifr_name, me->nmr.nr_name, sizeof(ifr.ifr_name));
switch (what) {
case SIOCSIFFLAGS:
D("call SIOCSIFFLAGS 0x%x", me->if_flags);
ifr.ifr_flagshigh = (me->if_flags >> 16) & 0xffff;
ifr.ifr_flags = me->if_flags & 0xffff;
break;
case SIOCSIFCAP:
ifr.ifr_reqcap = me->if_reqcap;
ifr.ifr_curcap = me->if_curcap;
break;
}
error = ioctl(me->fd, what, &ifr);
if (error) {
D("ioctl 0x%lx error %d", what, error);
return error;
}
switch (what) {
case SIOCSIFFLAGS:
case SIOCGIFFLAGS:
me->if_flags = (ifr.ifr_flagshigh << 16) |
(0xffff & ifr.ifr_flags);
D("flags are L 0x%x H 0x%x 0x%x",
(uint16_t)ifr.ifr_flags,
(uint16_t)ifr.ifr_flagshigh, me->if_flags);
break;
case SIOCGIFCAP:
me->if_reqcap = ifr.ifr_reqcap;
me->if_curcap = ifr.ifr_curcap;
D("curcap are 0x%x", me->if_curcap);
break;
}
return 0;
}
/*
* open a device. if me->mem is null then do an mmap.
*/
static int
netmap_open(struct my_ring *me, int ringid)
{
int fd, err, l;
u_int i;
struct nmreq req;
me->fd = fd = open("/dev/netmap", O_RDWR);
if (fd < 0) {
D("Unable to open /dev/netmap");
return (-1);
}
bzero(&req, sizeof(req));
strncpy(req.nr_name, me->nmr.nr_name, sizeof(req.nr_name));
req.nr_ringid = ringid;
req.nr_version = NETMAP_API;
err = ioctl(fd, NIOCGINFO, &req);
if (err) {
D("cannot get info on %s", me->nmr.nr_name);
goto error;
}
me->memsize = l = req.nr_memsize;
ND("memsize is %d MB", l>>20);
err = ioctl(fd, NIOCREGIF, &req);
if (err) {
D("Unable to register %s", me->nmr.nr_name);
goto error;
}
if (me->mem == NULL) {
me->mem = mmap(0, l, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
if (me->mem == MAP_FAILED) {
D("Unable to mmap");
me->mem = NULL;
goto error;
}
}
me->nifp = NETMAP_IF(me->mem, req.nr_offset);
me->queueid = ringid;
if (ringid & NETMAP_SW_RING) {
me->begin = req.nr_rx_rings;
me->end = me->begin + 1;
} else if (ringid & NETMAP_HW_RING) {
me->begin = ringid & NETMAP_RING_MASK;
me->end = me->begin + 1;
} else {
me->begin = 0;
me->end = req.nr_rx_rings;
}
/* request timestamps for packets */
for (i = me->begin; i < me->end; i++) {
struct netmap_ring *ring = NETMAP_RXRING(me->nifp, i);
ring->flags = NR_TIMESTAMP;
}
//me->tx = NETMAP_TXRING(me->nifp, 0);
return (0);
error:
close(me->fd);
return -1;
}
/*
* There is a set of functions that tcpdump expects even if probably
@ -343,10 +160,12 @@ const char *pcap_lib_version(void)
}
int
pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf)
pcap_findalldevs(pcap_if_t **alldevsp, char *errbuf)
{
pcap_if_t *top = NULL;
#ifndef linux
struct ifaddrs *i_head, *i;
pcap_if_t *top = NULL, *cur;
pcap_if_t *cur;
struct pcap_addr *tail = NULL;
int l;
@ -397,7 +216,7 @@ pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf)
}
#define SA_NEXT(x) ((struct sockaddr *)((char *)(x) + (x)->sa_len))
pca->addr = (struct sockaddr *)(pca + 1);
bcopy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len);
pkt_copy(i->ifa_addr, pca->addr, i->ifa_addr->sa_len);
if (i->ifa_netmask) {
pca->netmask = SA_NEXT(pca->addr);
bcopy(i->ifa_netmask, pca->netmask, i->ifa_netmask->sa_len);
@ -415,12 +234,15 @@ pcap_findalldevs(pcap_if_t **alldevsp, __unused char *errbuf)
}
freeifaddrs(i_head);
#endif /* !linux */
(void)errbuf; /* UNUSED */
*alldevsp = top;
return 0;
}
void pcap_freealldevs(__unused pcap_if_t *alldevs)
void pcap_freealldevs(pcap_if_t *alldevs)
{
(void)alldevs; /* UNUSED */
D("unimplemented");
}
@ -447,8 +269,9 @@ pcap_activate(pcap_t *p)
}
int
pcap_can_set_rfmon(__unused pcap_t *p)
pcap_can_set_rfmon(pcap_t *p)
{
(void)p; /* UNUSED */
D("");
return 0; /* no we can't */
}
@ -456,7 +279,7 @@ pcap_can_set_rfmon(__unused pcap_t *p)
int
pcap_set_snaplen(pcap_t *p, int snaplen)
{
struct my_ring *me = p;
struct pcap_ring *me = p;
D("len %d", snaplen);
me->snaplen = snaplen;
@ -466,7 +289,7 @@ pcap_set_snaplen(pcap_t *p, int snaplen)
int
pcap_snapshot(pcap_t *p)
{
struct my_ring *me = p;
struct pcap_ring *me = p;
D("len %d", me->snaplen);
return me->snaplen;
@ -474,9 +297,10 @@ pcap_snapshot(pcap_t *p)
int
pcap_lookupnet(const char *device, uint32_t *netp,
uint32_t *maskp, __unused char *errbuf)
uint32_t *maskp, char *errbuf)
{
(void)errbuf; /* UNUSED */
D("device %s", device);
inet_aton("10.0.0.255", (struct in_addr *)netp);
inet_aton("255.255.255.0",(struct in_addr *) maskp);
@ -486,17 +310,17 @@ pcap_lookupnet(const char *device, uint32_t *netp,
int
pcap_set_promisc(pcap_t *p, int promisc)
{
struct my_ring *me = p;
struct pcap_ring *me = p;
D("promisc %d", promisc);
if (do_ioctl(me, SIOCGIFFLAGS))
if (nm_do_ioctl(&me->me, SIOCGIFFLAGS, 0))
D("SIOCGIFFLAGS failed");
if (promisc) {
me->if_flags |= IFF_PPROMISC;
me->me.if_flags |= IFF_PPROMISC;
} else {
me->if_flags &= ~IFF_PPROMISC;
me->me.if_flags &= ~IFF_PPROMISC;
}
if (do_ioctl(me, SIOCSIFFLAGS))
if (nm_do_ioctl(&me->me, SIOCSIFFLAGS, 0))
D("SIOCSIFFLAGS failed");
return 0;
}
@ -504,7 +328,7 @@ pcap_set_promisc(pcap_t *p, int promisc)
int
pcap_set_timeout(pcap_t *p, int to_ms)
{
struct my_ring *me = p;
struct pcap_ring *me = p;
D("%d ms", to_ms);
me->to_ms = to_ms;
@ -514,23 +338,30 @@ pcap_set_timeout(pcap_t *p, int to_ms)
struct bpf_program;
int
pcap_compile(__unused pcap_t *p, __unused struct bpf_program *fp,
const char *str, __unused int optimize, __unused uint32_t netmask)
pcap_compile(pcap_t *p, struct bpf_program *fp,
const char *str, int optimize, uint32_t netmask)
{
(void)p; /* UNUSED */
(void)fp; /* UNUSED */
(void)optimize; /* UNUSED */
(void)netmask; /* UNUSED */
D("%s", str);
return 0;
}
int
pcap_setfilter(__unused pcap_t *p, __unused struct bpf_program *fp)
pcap_setfilter(pcap_t *p, struct bpf_program *fp)
{
(void)p; /* UNUSED */
(void)fp; /* UNUSED */
D("");
return 0;
}
int
pcap_datalink(__unused pcap_t *p)
pcap_datalink(pcap_t *p)
{
(void)p; /* UNUSED */
D("returns 1");
return 1; // ethernet
}
@ -553,7 +384,7 @@ struct pcap_stat;
int
pcap_stats(pcap_t *p, struct pcap_stat *ps)
{
struct my_ring *me = p;
struct pcap_ring *me = p;
ND("");
*ps = me->st;
@ -563,44 +394,42 @@ pcap_stats(pcap_t *p, struct pcap_stat *ps)
char *
pcap_geterr(pcap_t *p)
{
struct my_ring *me = p;
struct pcap_ring *me = p;
D("");
return me->msg;
}
pcap_t *
pcap_open_live(const char *device, __unused int snaplen,
int promisc, int to_ms, __unused char *errbuf)
pcap_open_live(const char *device, int snaplen,
int promisc, int to_ms, char *errbuf)
{
struct my_ring *me;
struct pcap_ring *me;
int l;
(void)snaplen; /* UNUSED */
(void)errbuf; /* UNUSED */
if (!device) {
D("missing device name");
return NULL;
}
l = strlen(device) + 1;
D("request to open %s snaplen %d promisc %d timeout %dms",
device, snaplen, promisc, to_ms);
me = calloc(1, sizeof(*me));
me = calloc(1, sizeof(*me) + l);
if (me == NULL) {
D("failed to allocate struct for %s", device);
return NULL;
}
strncpy(me->nmr.nr_name, device, sizeof(me->nmr.nr_name));
if (netmap_open(me, 0)) {
me->me.ifname = (char *)(me + 1);
strcpy((char *)me->me.ifname, device);
if (netmap_open(&me->me, 0, promisc)) {
D("error opening %s", device);
free(me);
return NULL;
}
me->to_ms = to_ms;
if (do_ioctl(me, SIOCGIFFLAGS))
D("SIOCGIFFLAGS failed");
if (promisc) {
me->if_flags |= IFF_PPROMISC;
if (do_ioctl(me, SIOCSIFFLAGS))
D("SIOCSIFFLAGS failed");
}
if (do_ioctl(me, SIOCGIFCAP))
D("SIOCGIFCAP failed");
me->if_reqcap &= ~(IFCAP_HWCSUM | IFCAP_TSO | IFCAP_TOE);
if (do_ioctl(me, SIOCSIFCAP))
D("SIOCSIFCAP failed");
return (pcap_t *)me;
}
@ -640,15 +469,19 @@ pcap_get_selectable_fd(pcap_t *p)
}
int
pcap_setnonblock(__unused pcap_t *p, int nonblock, __unused char *errbuf)
pcap_setnonblock(pcap_t *p, int nonblock, char *errbuf)
{
(void)p; /* UNUSED */
(void)errbuf; /* UNUSED */
D("mode is %d", nonblock);
return 0; /* ignore */
}
int
pcap_setdirection(__unused pcap_t *p, __unused pcap_direction_t d)
pcap_setdirection(pcap_t *p, pcap_direction_t d)
{
(void)p; /* UNUSED */
(void)d; /* UNUSED */
D("");
return 0; /* ignore */
};
@ -656,7 +489,8 @@ pcap_setdirection(__unused pcap_t *p, __unused pcap_direction_t d)
int
pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
{
struct my_ring *me = p;
struct pcap_ring *pme = p;
struct my_ring *me = &pme->me;
int got = 0;
u_int si;
@ -669,7 +503,7 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
ND("ring has %d pkts", ring->avail);
if (ring->avail == 0)
continue;
me->hdr.ts = ring->ts;
pme->hdr.ts = ring->ts;
/*
* XXX a proper prefetch should be done as
* prefetch(i); callback(i-1); ...
@ -684,15 +518,15 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
}
u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
prefetch(buf);
me->hdr.len = me->hdr.caplen = ring->slot[i].len;
pme->hdr.len = pme->hdr.caplen = ring->slot[i].len;
// D("call %p len %d", p, me->hdr.len);
callback(user, &me->hdr, buf);
callback(user, &pme->hdr, buf);
ring->cur = NETMAP_RING_NEXT(ring, i);
ring->avail--;
got++;
}
}
me->st.ps_recv += got;
pme->st.ps_recv += got;
return got;
}
@ -732,13 +566,13 @@ pcap_inject(pcap_t *p, const void *buf, size_t size)
int
pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
{
struct my_ring *me = p;
struct pcap_ring *me = p;
struct pollfd fds[1];
int i;
ND("cnt %d", cnt);
memset(fds, 0, sizeof(fds));
fds[0].fd = me->fd;
fds[0].fd = me->me.fd;
fds[0].events = (POLLIN);
while (cnt == -1 || cnt > 0) {
@ -753,11 +587,10 @@ pcap_loop(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
return 0;
}
#endif /* __PIC__ */
#endif /* !TEST */
#ifndef __PIC__
static void
do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf)
#ifdef TEST /* build test code */
void do_send(u_char *user, const struct pcap_pkthdr *h, const u_char *buf)
{
pcap_inject((pcap_t *)user, buf, h->caplen);
}
@ -819,4 +652,4 @@ main(int argc, char **argv)
return (0);
}
#endif /* !__PIC__ */
#endif /* TEST */

File diff suppressed because it is too large Load Diff