Maintain and observe a ZBUF_FLAG_IMMUTABLE flag on zero-copy BPF

buffer kernel descriptors, which is used to allow the buffer
currently in the BPF "store" position to be assigned to userspace
when it fills, even if userspace hasn't acknowledged the buffer
in the "hold" position yet.  To implement this, notify the buffer
model when a buffer becomes full, and check that the store buffer
is writable, not just for it being full, before trying to append
new packet data.  Shared memory buffers will be assigned to
userspace at most once per fill, be it in the store or in the
hold position.

This removes the restriction that at most one shared memory can
by owned by userspace, reducing the chances that userspace will
need to call select() after acknowledging one buffer in order to
wait for the next buffer when under high load.  This more fully
realizes the goal of zero system calls in order to process a
high-speed packet stream from BPF.

Update bpf.4 to reflect that both buffers may be owned by userspace
at once; caution against assuming this.
This commit is contained in:
Robert Watson 2008-04-07 02:51:00 +00:00
parent 08304c1617
commit a7a91e6592
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=177966
4 changed files with 133 additions and 22 deletions

View File

@ -259,14 +259,14 @@ may be used to sleep awaiting the availbility of a completed buffer.
They will return a readable file descriptor when ownership of the next buffer
is assigned to user space.
.Pp
In the current implementation, the kernel will assign ownership of at most
one buffer at a time to the user process.
The user processes must acknowledge the current buffer in order to be
notified that the next buffer is ready for processing.
Programs should not rely on this as an invariant, as it may change in future
versions; in particular, they must maintain their own notion of which buffer
is "next" so that if both buffers are owned by userspace, it can process them
in the correct order.
In the current implementation, the kernel may assign zero, one, or both
buffers to the user process; however, an earlier implementation maintained
the invariant that at most one buffer could be assigned to the user process
at a time.
In order to both ensure progress and high performance, user processes should
acknowledge a completely processed buffer as quickly as possible, returning
it for reuse, and not block waiting on a second buffer while holding another
buffer.
.Sh IOCTLS
The
.Xr ioctl 2

View File

@ -218,6 +218,45 @@ bpf_canfreebuf(struct bpf_d *d)
return (0);
}
/*
* Allow the buffer model to indicate that the current store buffer is
* immutable, regardless of the appearance of space. Return (1) if the
* buffer is writable, and (0) if not.
*/
static int
bpf_canwritebuf(struct bpf_d *d)
{
BPFD_LOCK_ASSERT(d);
switch (d->bd_bufmode) {
case BPF_BUFMODE_ZBUF:
return (bpf_zerocopy_canwritebuf(d));
}
return (1);
}
/*
* Notify buffer model that an attempt to write to the store buffer has
* resulted in a dropped packet, in which case the buffer may be considered
* full.
*/
static void
bpf_buffull(struct bpf_d *d)
{
BPFD_LOCK_ASSERT(d);
switch (d->bd_bufmode) {
case BPF_BUFMODE_ZBUF:
bpf_zerocopy_buffull(d);
break;
}
}
/*
* Notify the buffer model that a buffer has moved into the hold position.
*/
void
bpf_bufheld(struct bpf_d *d)
{
@ -1691,27 +1730,28 @@ catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
/*
* Round up the end of the previous packet to the next longword.
*
* Drop the packet if there's no room and no hope of room
* If the packet would overflow the storage buffer or the storage
* buffer is considered immutable by the buffer model, try to rotate
* the buffer and wakeup pending processes.
*/
curlen = BPF_WORDALIGN(d->bd_slen);
if (curlen + totlen > d->bd_bufsize) {
/*
* This packet will overflow the storage buffer.
* Rotate the buffers if we can, then wakeup any
* pending reads.
*/
if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
if (d->bd_fbuf == NULL) {
/*
* We haven't completed the previous read yet,
* so drop the packet.
* There's no room in the store buffer, and no
* prospect of room, so drop the packet. Notify the
* buffer model.
*/
bpf_buffull(d);
++d->bd_dcount;
return;
}
ROTATE_BUFFERS(d);
do_wakeup = 1;
curlen = 0;
}
else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
/*
* Immediate mode is set, or the read timeout has already
* expired during a select call. A packet arrived, so the

View File

@ -85,7 +85,7 @@ __FBSDID("$FreeBSD$");
* scatter-gather copying. One significant mitigating factor is that on
* systems with a direct memory map, we can avoid TLB misses.
*
* At the front of the shared memor region is a bpf_zbuf_header, which
* At the front of the shared memory region is a bpf_zbuf_header, which
* contains shared control data to allow user space and the kernel to
* synchronize; this is included in zb_size, but not bpf_bufsize, so that BPF
* knows that the space is not available.
@ -94,10 +94,18 @@ struct zbuf {
vm_offset_t zb_uaddr; /* User address, may be stale. */
size_t zb_size; /* Size of buffer, incl. header. */
u_int zb_numpages; /* Number of pages. */
int zb_flags; /* Flags on zbuf. */
struct sf_buf **zb_pages; /* Pages themselves. */
struct bpf_zbuf_header *zb_header; /* Shared header. */
};
/*
* When a buffer has been assigned to userspace, flag it as such, as the
* buffer may remain in the store position as a result of the user process
* not yet having acknowledged the buffer in the hold position yet.
*/
#define ZBUF_FLAG_IMMUTABLE 0x00000001 /* Set when owned by user. */
/*
* Release a page we've previously wired.
*/
@ -254,6 +262,9 @@ bpf_zerocopy_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
src_bytes = (u_char *)src;
zb = (struct zbuf *)buf;
KASSERT((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0,
("bpf_zerocopy_append_bytes: ZBUF_FLAG_IMMUTABLE"));
/*
* Scatter-gather copy to user pages mapped into kernel address space
* using sf_bufs: copy up to a page at a time.
@ -303,6 +314,9 @@ bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
m = (struct mbuf *)src;
zb = (struct zbuf *)buf;
KASSERT((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0,
("bpf_zerocopy_append_mbuf: ZBUF_FLAG_IMMUTABLE"));
/*
* Scatter gather both from an mbuf chain and to a user page set
* mapped into kernel address space using sf_bufs. If we're lucky,
@ -343,10 +357,39 @@ bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
}
}
/*
* Notification from the BPF framework that a buffer in the store position is
* rejecting packets and may be considered full. We mark the buffer as
* immutable and assign to userspace so that it is immediately available for
* the user process to access.
*/
void
bpf_zerocopy_buffull(struct bpf_d *d)
{
struct zbuf *zb;
KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF,
("bpf_zerocopy_buffull: not in zbuf mode"));
zb = (struct zbuf *)d->bd_sbuf;
KASSERT(zb != NULL, ("bpf_zerocopy_buffull: zb == NULL"));
if ((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0) {
zb->zb_flags |= ZBUF_FLAG_IMMUTABLE;
zb->zb_header->bzh_kernel_len = d->bd_slen;
atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
}
}
/*
* Notification from the BPF framework that a buffer has moved into the held
* slot on a descriptor. Zero-copy BPF will update the shared page to let
* the user process know.
* the user process know and flag the buffer as immutable if it hasn't
* already been marked immutable due to filling while it was in the store
* position.
*
* Note: identical logic as in bpf_zerocopy_buffull(), except that we operate
* on bd_hbuf and bd_hlen.
*/
void
bpf_zerocopy_bufheld(struct bpf_d *d)
@ -358,8 +401,12 @@ bpf_zerocopy_bufheld(struct bpf_d *d)
zb = (struct zbuf *)d->bd_hbuf;
KASSERT(zb != NULL, ("bpf_zerocopy_bufheld: zb == NULL"));
zb->zb_header->bzh_kernel_len = d->bd_hlen;
atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
if ((zb->zb_flags & ZBUF_FLAG_IMMUTABLE) == 0) {
zb->zb_flags |= ZBUF_FLAG_IMMUTABLE;
zb->zb_header->bzh_kernel_len = d->bd_hlen;
atomic_add_rel_int(&zb->zb_header->bzh_kernel_gen, 1);
}
}
/*
@ -385,6 +432,28 @@ bpf_zerocopy_canfreebuf(struct bpf_d *d)
return (0);
}
/*
* Query from the BPF framework as to whether or not the buffer current in
* the store position can actually be written to. This may return false if
* the store buffer is assigned to userspace before the hold buffer is
* acknowledged.
*/
int
bpf_zerocopy_canwritebuf(struct bpf_d *d)
{
struct zbuf *zb;
KASSERT(d->bd_bufmode == BPF_BUFMODE_ZBUF,
("bpf_zerocopy_canwritebuf: not in zbuf mode"));
zb = (struct zbuf *)d->bd_sbuf;
KASSERT(zb != NULL, ("bpf_zerocopy_canwritebuf: bd_sbuf NULL"));
if (zb->zb_flags & ZBUF_FLAG_IMMUTABLE)
return (0);
return (1);
}
/*
* Free zero copy buffers at request of descriptor.
*/

View File

@ -40,8 +40,10 @@ void bpf_zerocopy_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset,
void *src, u_int len);
void bpf_zerocopy_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset,
void *src, u_int len);
void bpf_zerocopy_buffull(struct bpf_d *);
void bpf_zerocopy_bufheld(struct bpf_d *);
int bpf_zerocopy_canfreebuf(struct bpf_d *);
int bpf_zerocopy_canwritebuf(struct bpf_d *);
void bpf_zerocopy_free(struct bpf_d *d);
int bpf_zerocopy_ioctl_getzmax(struct thread *td, struct bpf_d *d,
size_t *i);