- correct a bug in pcap_dispatch(): a count of 0 means infinity.

- in pcap_dispatch(), issue a prefetch on the buffer before the
  callback, this may save a little bit of time if the client
  is very fast.

- in pcap_inject(), use a fast copy routine, which also helps
  saving a few nanoseconds with fast clients.
This commit is contained in:
luigi 2012-05-03 15:34:44 +00:00
parent 48f59d28b6
commit 5bceb861dc

View File

@ -49,6 +49,34 @@ int verbose = 0;
__FUNCTION__, __LINE__, ##__VA_ARGS__); \
} while (0)
inline void prefetch (const void *x)
{
__asm volatile("prefetcht0 %0" :: "m" (*(const unsigned long *)x));
}
// XXX only for multiples of 64 bytes, non overlapped.
static inline void
pkt_copy(const void *_src, void *_dst, int l)
{
const uint64_t *src = _src;
uint64_t *dst = _dst;
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
if (unlikely(l >= 1024)) {
bcopy(src, dst, l);
return;
}
for (; l > 0; l-=64) {
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
/*
* We redefine here a number of structures that are in pcap.h
@ -479,21 +507,21 @@ pcap_setfilter(__unused pcap_t *p, __unused struct bpf_program *fp)
int
pcap_datalink(__unused pcap_t *p)
{
D("");
D("returns 1");
return 1; // ethernet
}
const char *
pcap_datalink_val_to_name(int dlt)
{
D("%d", dlt);
D("%d returns DLT_EN10MB", dlt);
return "DLT_EN10MB";
}
const char *
pcap_datalink_val_to_description(int dlt)
{
D("%d", dlt);
D("%d returns Ethernet link", dlt);
return "Ethernet link";
}
@ -525,7 +553,8 @@ pcap_open_live(const char *device, __unused int snaplen,
{
struct my_ring *me;
D("request to open %s", device);
D("request to open %s snaplen %d promisc %d timeout %dms",
device, snaplen, promisc, to_ms);
me = calloc(1, sizeof(*me));
if (me == NULL) {
D("failed to allocate struct for %s", device);
@ -610,6 +639,8 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
u_int si;
ND("cnt %d", cnt);
if (cnt == 0)
cnt = -1;
/* scan all rings */
for (si = me->begin; si < me->end; si++) {
struct netmap_ring *ring = NETMAP_RXRING(me->nifp, si);
@ -617,6 +648,10 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
if (ring->avail == 0)
continue;
me->hdr.ts = ring->ts;
/*
* XXX a proper prefetch should be done as
* prefetch(i); callback(i-1); ...
*/
while ((cnt == -1 || cnt != got) && ring->avail > 0) {
u_int i = ring->cur;
u_int idx = ring->slot[i].buf_idx;
@ -626,6 +661,7 @@ pcap_dispatch(pcap_t *p, int cnt, pcap_handler callback, u_char *user)
sleep(2);
}
u_char *buf = (u_char *)NETMAP_BUF(ring, idx);
prefetch(buf);
me->hdr.len = me->hdr.caplen = ring->slot[i].len;
// D("call %p len %d", p, me->hdr.len);
callback(user, &me->hdr, buf);
@ -660,7 +696,7 @@ pcap_inject(pcap_t *p, const void *buf, size_t size)
}
u_char *dst = (u_char *)NETMAP_BUF(ring, idx);
ring->slot[i].len = size;
bcopy(buf, dst, size);
pkt_copy(buf, dst, size);
ring->cur = NETMAP_RING_NEXT(ring, i);
ring->avail--;
// if (ring->avail == 0) ioctl(me->fd, NIOCTXSYNC, NULL);