2005-01-07 01:45:51 +00:00
|
|
|
/*-
|
2017-11-20 19:43:44 +00:00
|
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
*
|
1994-05-24 10:09:53 +00:00
|
|
|
* Copyright (c) 1990, 1991, 1993
|
2011-12-31 07:21:28 +00:00
|
|
|
* The Regents of the University of California. All rights reserved.
|
2019-05-13 13:45:28 +00:00
|
|
|
* Copyright (c) 2019 Andrey V. Elsukov <ae@FreeBSD.org>
|
1994-05-24 10:09:53 +00:00
|
|
|
*
|
|
|
|
* This code is derived from the Stanford/CMU enet packet filter,
|
|
|
|
* (net/enet.c) distributed as part of 4.3BSD, and code contributed
|
1995-05-30 08:16:23 +00:00
|
|
|
* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
|
1994-05-24 10:09:53 +00:00
|
|
|
* Berkeley Laboratory.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2017-02-28 23:42:47 +00:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
1994-05-24 10:09:53 +00:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
2001-10-17 10:18:42 +00:00
|
|
|
* @(#)bpf.c 8.4 (Berkeley) 1/9/95
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
|
2007-12-25 13:24:02 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
2001-01-29 13:26:14 +00:00
|
|
|
#include "opt_bpf.h"
|
2016-04-11 10:00:38 +00:00
|
|
|
#include "opt_ddb.h"
|
2001-01-29 13:26:14 +00:00
|
|
|
#include "opt_netgraph.h"
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
#include <sys/param.h>
|
1995-12-02 19:38:06 +00:00
|
|
|
#include <sys/conf.h>
|
Extract eventfilter declarations to sys/_eventfilter.h
This allows replacing "sys/eventfilter.h" includes with "sys/_eventfilter.h"
in other header files (e.g., sys/{bus,conf,cpu}.h) and reduces header
pollution substantially.
EVENTHANDLER_DECLARE and EVENTHANDLER_LIST_DECLAREs were moved out of .c
files into appropriate headers (e.g., sys/proc.h, powernv/opal.h).
As a side effect of reduced header pollution, many .c files and headers no
longer contain needed definitions. The remainder of the patch addresses
adding appropriate includes to fix those files.
LOCK_DEBUG and LOCK_FILE_LINE_ARG are moved to sys/_lock.h, as required by
sys/mutex.h since r326106 (but silently protected by header pollution prior
to this change).
No functional change (intended). Of course, any out of tree modules that
relied on header pollution for sys/eventhandler.h, sys/lock.h, or
sys/mutex.h inclusion need to be fixed. __FreeBSD_version has been bumped.
2019-05-20 00:38:23 +00:00
|
|
|
#include <sys/eventhandler.h>
|
2004-12-22 17:37:57 +00:00
|
|
|
#include <sys/fcntl.h>
|
2009-06-17 15:01:01 +00:00
|
|
|
#include <sys/jail.h>
|
Extract eventfilter declarations to sys/_eventfilter.h
This allows replacing "sys/eventfilter.h" includes with "sys/_eventfilter.h"
in other header files (e.g., sys/{bus,conf,cpu}.h) and reduces header
pollution substantially.
EVENTHANDLER_DECLARE and EVENTHANDLER_LIST_DECLAREs were moved out of .c
files into appropriate headers (e.g., sys/proc.h, powernv/opal.h).
As a side effect of reduced header pollution, many .c files and headers no
longer contain needed definitions. The remainder of the patch addresses
adding appropriate includes to fix those files.
LOCK_DEBUG and LOCK_FILE_LINE_ARG are moved to sys/_lock.h, as required by
sys/mutex.h since r326106 (but silently protected by header pollution prior
to this change).
No functional change (intended). Of course, any out of tree modules that
relied on header pollution for sys/eventhandler.h, sys/lock.h, or
sys/mutex.h inclusion need to be fixed. __FreeBSD_version has been bumped.
2019-05-20 00:38:23 +00:00
|
|
|
#include <sys/ktr.h>
|
|
|
|
#include <sys/lock.h>
|
1997-09-02 01:19:47 +00:00
|
|
|
#include <sys/malloc.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/mbuf.h>
|
Extract eventfilter declarations to sys/_eventfilter.h
This allows replacing "sys/eventfilter.h" includes with "sys/_eventfilter.h"
in other header files (e.g., sys/{bus,conf,cpu}.h) and reduces header
pollution substantially.
EVENTHANDLER_DECLARE and EVENTHANDLER_LIST_DECLAREs were moved out of .c
files into appropriate headers (e.g., sys/proc.h, powernv/opal.h).
As a side effect of reduced header pollution, many .c files and headers no
longer contain needed definitions. The remainder of the patch addresses
adding appropriate includes to fix those files.
LOCK_DEBUG and LOCK_FILE_LINE_ARG are moved to sys/_lock.h, as required by
sys/mutex.h since r326106 (but silently protected by header pollution prior
to this change).
No functional change (intended). Of course, any out of tree modules that
relied on header pollution for sys/eventhandler.h, sys/lock.h, or
sys/mutex.h inclusion need to be fixed. __FreeBSD_version has been bumped.
2019-05-20 00:38:23 +00:00
|
|
|
#include <sys/mutex.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/time.h>
|
2006-11-06 13:42:10 +00:00
|
|
|
#include <sys/priv.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/proc.h>
|
1995-12-06 23:52:35 +00:00
|
|
|
#include <sys/signalvar.h>
|
1997-03-24 12:12:36 +00:00
|
|
|
#include <sys/filio.h>
|
|
|
|
#include <sys/sockio.h>
|
|
|
|
#include <sys/ttycom.h>
|
2004-12-22 17:37:57 +00:00
|
|
|
#include <sys/uio.h>
|
2018-01-25 12:13:41 +00:00
|
|
|
#include <sys/sysent.h>
|
Extract eventfilter declarations to sys/_eventfilter.h
This allows replacing "sys/eventfilter.h" includes with "sys/_eventfilter.h"
in other header files (e.g., sys/{bus,conf,cpu}.h) and reduces header
pollution substantially.
EVENTHANDLER_DECLARE and EVENTHANDLER_LIST_DECLAREs were moved out of .c
files into appropriate headers (e.g., sys/proc.h, powernv/opal.h).
As a side effect of reduced header pollution, many .c files and headers no
longer contain needed definitions. The remainder of the patch addresses
adding appropriate includes to fix those files.
LOCK_DEBUG and LOCK_FILE_LINE_ARG are moved to sys/_lock.h, as required by
sys/mutex.h since r326106 (but silently protected by header pollution prior
to this change).
No functional change (intended). Of course, any out of tree modules that
relied on header pollution for sys/eventhandler.h, sys/lock.h, or
sys/mutex.h inclusion need to be fixed. __FreeBSD_version has been bumped.
2019-05-20 00:38:23 +00:00
|
|
|
#include <sys/systm.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2003-08-05 07:12:49 +00:00
|
|
|
#include <sys/event.h>
|
|
|
|
#include <sys/file.h>
|
1997-09-14 03:03:05 +00:00
|
|
|
#include <sys/poll.h>
|
2003-08-05 07:12:49 +00:00
|
|
|
#include <sys/proc.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
#include <sys/socket.h>
|
|
|
|
|
2016-04-11 10:00:38 +00:00
|
|
|
#ifdef DDB
|
|
|
|
#include <ddb/ddb.h>
|
|
|
|
#endif
|
|
|
|
|
1997-11-18 16:29:53 +00:00
|
|
|
#include <net/if.h>
|
2013-10-26 17:58:36 +00:00
|
|
|
#include <net/if_var.h>
|
2015-12-31 05:03:27 +00:00
|
|
|
#include <net/if_dl.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <net/bpf.h>
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
#include <net/bpf_buffer.h>
|
2005-12-06 02:58:12 +00:00
|
|
|
#ifdef BPF_JITTER
|
|
|
|
#include <net/bpf_jitter.h>
|
|
|
|
#endif
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
#include <net/bpf_zerocopy.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <net/bpfdesc.h>
|
2015-12-31 05:03:27 +00:00
|
|
|
#include <net/route.h>
|
2009-08-01 19:26:27 +00:00
|
|
|
#include <net/vnet.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
#include <netinet/in.h>
|
|
|
|
#include <netinet/if_ether.h>
|
|
|
|
#include <sys/kernel.h>
|
1995-12-14 09:55:16 +00:00
|
|
|
#include <sys/sysctl.h>
|
1998-01-24 02:54:56 +00:00
|
|
|
|
2006-07-26 03:15:16 +00:00
|
|
|
#include <net80211/ieee80211_freebsd.h>
|
|
|
|
|
2006-10-22 11:52:19 +00:00
|
|
|
#include <security/mac/mac_framework.h>
|
|
|
|
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
MALLOC_DEFINE(M_BPF, "BPF", "BPF data");
|
1995-12-08 11:19:42 +00:00
|
|
|
|
2018-04-20 09:57:31 +00:00
|
|
|
static struct bpf_if_ext dead_bpf_if = {
|
2019-05-13 13:45:28 +00:00
|
|
|
.bif_dlist = CK_LIST_HEAD_INITIALIZER()
|
2018-04-20 09:57:31 +00:00
|
|
|
};
|
|
|
|
|
2015-04-20 22:08:11 +00:00
|
|
|
struct bpf_if {
|
|
|
|
#define bif_next bif_ext.bif_next
|
|
|
|
#define bif_dlist bif_ext.bif_dlist
|
|
|
|
struct bpf_if_ext bif_ext; /* public members */
|
|
|
|
u_int bif_dlt; /* link layer type */
|
|
|
|
u_int bif_hdrlen; /* length of link header */
|
2019-05-13 13:45:28 +00:00
|
|
|
struct bpfd_list bif_wlist; /* writer-only list */
|
2015-04-20 22:08:11 +00:00
|
|
|
struct ifnet *bif_ifp; /* corresponding interface */
|
2017-08-16 19:40:07 +00:00
|
|
|
struct bpf_if **bif_bpf; /* Pointer to pointer to us */
|
2019-05-13 13:45:28 +00:00
|
|
|
volatile u_int bif_refcnt;
|
|
|
|
struct epoch_context epoch_ctx;
|
2015-04-20 22:08:11 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
CTASSERT(offsetof(struct bpf_if, bif_ext) == 0);
|
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
struct bpf_program_buffer {
|
|
|
|
struct epoch_context epoch_ctx;
|
|
|
|
#ifdef BPF_JITTER
|
|
|
|
bpf_jit_filter *func;
|
|
|
|
#endif
|
|
|
|
void *buffer[0];
|
|
|
|
};
|
2018-06-19 10:34:45 +00:00
|
|
|
|
2001-01-29 13:26:14 +00:00
|
|
|
#if defined(DEV_BPF) || defined(NETGRAPH_BPF)
|
1995-11-29 10:49:16 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
#define PRINET 26 /* interruptible */
|
|
|
|
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
#define SIZEOF_BPF_HDR(type) \
|
|
|
|
(offsetof(type, bh_hdrlen) + sizeof(((type *)0)->bh_hdrlen))
|
|
|
|
|
2010-04-25 16:43:41 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
|
|
#include <sys/mount.h>
|
|
|
|
#include <compat/freebsd32/freebsd32.h>
|
|
|
|
#define BPF_ALIGNMENT32 sizeof(int32_t)
|
2016-04-21 19:57:40 +00:00
|
|
|
#define BPF_WORDALIGN32(x) roundup2(x, BPF_ALIGNMENT32)
|
2010-04-25 16:43:41 +00:00
|
|
|
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
#ifndef BURN_BRIDGES
|
2010-04-25 16:43:41 +00:00
|
|
|
/*
|
|
|
|
* 32-bit version of structure prepended to each packet. We use this header
|
|
|
|
* instead of the standard one for 32-bit streams. We mark the a stream as
|
|
|
|
* 32-bit the first time we see a 32-bit compat ioctl request.
|
|
|
|
*/
|
|
|
|
struct bpf_hdr32 {
|
|
|
|
struct timeval32 bh_tstamp; /* time stamp */
|
|
|
|
uint32_t bh_caplen; /* length of captured portion */
|
|
|
|
uint32_t bh_datalen; /* original length of packet */
|
|
|
|
uint16_t bh_hdrlen; /* length of bpf header (this struct
|
|
|
|
plus alignment padding) */
|
|
|
|
};
|
2011-12-31 07:21:28 +00:00
|
|
|
#endif
|
2010-04-25 16:43:41 +00:00
|
|
|
|
|
|
|
struct bpf_program32 {
|
|
|
|
u_int bf_len;
|
|
|
|
uint32_t bf_insns;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct bpf_dltlist32 {
|
|
|
|
u_int bfl_len;
|
|
|
|
u_int bfl_list;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define BIOCSETF32 _IOW('B', 103, struct bpf_program32)
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
#define BIOCSRTIMEOUT32 _IOW('B', 109, struct timeval32)
|
|
|
|
#define BIOCGRTIMEOUT32 _IOR('B', 110, struct timeval32)
|
|
|
|
#define BIOCGDLTLIST32 _IOWR('B', 121, struct bpf_dltlist32)
|
|
|
|
#define BIOCSETWF32 _IOW('B', 123, struct bpf_program32)
|
|
|
|
#define BIOCSETFNR32 _IOW('B', 130, struct bpf_program32)
|
2011-12-31 07:21:28 +00:00
|
|
|
#endif
|
2010-04-25 16:43:41 +00:00
|
|
|
|
2018-04-10 19:42:50 +00:00
|
|
|
#define BPF_LOCK() sx_xlock(&bpf_sx)
|
|
|
|
#define BPF_UNLOCK() sx_xunlock(&bpf_sx)
|
|
|
|
#define BPF_LOCK_ASSERT() sx_assert(&bpf_sx, SA_XLOCKED)
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2005-02-28 12:35:52 +00:00
|
|
|
* bpf_iflist is a list of BPF interface structures, each corresponding to a
|
2019-05-13 13:45:28 +00:00
|
|
|
* specific DLT. The same network interface might have several BPF interface
|
2005-02-28 12:35:52 +00:00
|
|
|
* structures registered by different layers in the stack (i.e., 802.11
|
|
|
|
* frames, ethernet frames, etc).
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_HEAD(bpf_iflist, bpf_if);
|
|
|
|
static struct bpf_iflist bpf_iflist;
|
2018-04-10 19:42:50 +00:00
|
|
|
static struct sx bpf_sx; /* bpf global lock */
|
2005-07-24 17:21:17 +00:00
|
|
|
static int bpf_bpfd_cnt;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
static void bpfif_ref(struct bpf_if *);
|
|
|
|
static void bpfif_rele(struct bpf_if *);
|
|
|
|
|
|
|
|
static void bpfd_ref(struct bpf_d *);
|
|
|
|
static void bpfd_rele(struct bpf_d *);
|
2006-06-15 15:39:12 +00:00
|
|
|
static void bpf_attachd(struct bpf_d *, struct bpf_if *);
|
|
|
|
static void bpf_detachd(struct bpf_d *);
|
2019-05-13 13:45:28 +00:00
|
|
|
static void bpf_detachd_locked(struct bpf_d *, bool);
|
|
|
|
static void bpfd_free(epoch_context_t);
|
2007-09-10 00:03:06 +00:00
|
|
|
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
|
2015-12-31 05:03:27 +00:00
|
|
|
struct sockaddr *, int *, struct bpf_d *);
|
2002-03-19 21:54:18 +00:00
|
|
|
static int bpf_setif(struct bpf_d *, struct ifreq *);
|
|
|
|
static void bpf_timed_out(void *);
|
2001-02-16 17:10:28 +00:00
|
|
|
static __inline void
|
2002-03-19 21:54:18 +00:00
|
|
|
bpf_wakeup(struct bpf_d *);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
static void catchpacket(struct bpf_d *, u_char *, u_int, u_int,
|
|
|
|
void (*)(struct bpf_d *, caddr_t, u_int, void *, u_int),
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
struct bintime *);
|
2002-03-19 21:54:18 +00:00
|
|
|
static void reset_d(struct bpf_d *);
|
2012-05-29 22:28:46 +00:00
|
|
|
static int bpf_setf(struct bpf_d *, struct bpf_program *, u_long cmd);
|
2003-01-20 19:08:46 +00:00
|
|
|
static int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
|
|
|
|
static int bpf_setdlt(struct bpf_d *, u_int);
|
2003-08-05 07:12:49 +00:00
|
|
|
static void filt_bpfdetach(struct knote *);
|
|
|
|
static int filt_bpfread(struct knote *, long);
|
2005-05-04 03:09:28 +00:00
|
|
|
static void bpf_drvinit(void *);
|
2005-07-24 17:21:17 +00:00
|
|
|
static int bpf_stats_sysctl(SYSCTL_HANDLER_ARGS);
|
|
|
|
|
|
|
|
SYSCTL_NODE(_net, OID_AUTO, bpf, CTLFLAG_RW, 0, "bpf sysctl");
|
2008-08-29 20:34:06 +00:00
|
|
|
int bpf_maxinsns = BPF_MAXINSNS;
|
2005-07-24 17:21:17 +00:00
|
|
|
SYSCTL_INT(_net_bpf, OID_AUTO, maxinsns, CTLFLAG_RW,
|
|
|
|
&bpf_maxinsns, 0, "Maximum bpf program instructions");
|
2009-03-10 14:28:19 +00:00
|
|
|
static int bpf_zerocopy_enable = 0;
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
SYSCTL_INT(_net_bpf, OID_AUTO, zerocopy_enable, CTLFLAG_RW,
|
|
|
|
&bpf_zerocopy_enable, 0, "Enable new zero-copy BPF buffer sessions");
|
2011-11-07 15:43:11 +00:00
|
|
|
static SYSCTL_NODE(_net_bpf, OID_AUTO, stats, CTLFLAG_MPSAFE | CTLFLAG_RW,
|
2005-07-24 17:21:17 +00:00
|
|
|
bpf_stats_sysctl, "bpf statistics portal");
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2018-07-24 16:35:52 +00:00
|
|
|
VNET_DEFINE_STATIC(int, bpf_optimize_writers) = 0;
|
2012-04-06 06:55:21 +00:00
|
|
|
#define V_bpf_optimize_writers VNET(bpf_optimize_writers)
|
2014-11-07 09:39:05 +00:00
|
|
|
SYSCTL_INT(_net_bpf, OID_AUTO, optimize_writers, CTLFLAG_VNET | CTLFLAG_RW,
|
|
|
|
&VNET_NAME(bpf_optimize_writers), 0,
|
2012-04-06 06:55:21 +00:00
|
|
|
"Do not send packets until BPF program is set");
|
|
|
|
|
1995-12-08 11:19:42 +00:00
|
|
|
static d_open_t bpfopen;
|
|
|
|
static d_read_t bpfread;
|
|
|
|
static d_write_t bpfwrite;
|
|
|
|
static d_ioctl_t bpfioctl;
|
1997-09-14 03:03:05 +00:00
|
|
|
static d_poll_t bpfpoll;
|
2003-08-05 07:12:49 +00:00
|
|
|
static d_kqfilter_t bpfkqfilter;
|
1995-12-08 11:19:42 +00:00
|
|
|
|
1999-05-30 16:53:49 +00:00
|
|
|
static struct cdevsw bpf_cdevsw = {
|
2004-02-21 21:10:55 +00:00
|
|
|
.d_version = D_VERSION,
|
2003-03-03 12:15:54 +00:00
|
|
|
.d_open = bpfopen,
|
|
|
|
.d_read = bpfread,
|
|
|
|
.d_write = bpfwrite,
|
|
|
|
.d_ioctl = bpfioctl,
|
|
|
|
.d_poll = bpfpoll,
|
|
|
|
.d_name = "bpf",
|
2003-08-05 07:12:49 +00:00
|
|
|
.d_kqfilter = bpfkqfilter,
|
1999-05-30 16:53:49 +00:00
|
|
|
};
|
1995-12-08 11:19:42 +00:00
|
|
|
|
2009-09-12 20:03:45 +00:00
|
|
|
static struct filterops bpfread_filtops = {
|
|
|
|
.f_isfd = 1,
|
|
|
|
.f_detach = filt_bpfdetach,
|
|
|
|
.f_event = filt_bpfread,
|
|
|
|
};
|
1995-12-08 11:19:42 +00:00
|
|
|
|
2012-05-21 22:13:48 +00:00
|
|
|
/*
|
2019-05-13 13:45:28 +00:00
|
|
|
* LOCKING MODEL USED BY BPF
|
|
|
|
*
|
2012-05-21 22:13:48 +00:00
|
|
|
* Locks:
|
2019-05-13 13:45:28 +00:00
|
|
|
* 1) global lock (BPF_LOCK). Sx, used to protect some global counters,
|
|
|
|
* every bpf_iflist changes, serializes ioctl access to bpf descriptors.
|
|
|
|
* 2) Descriptor lock. Mutex, used to protect BPF buffers and various
|
|
|
|
* structure fields used by bpf_*tap* code.
|
2012-05-21 22:13:48 +00:00
|
|
|
*
|
2019-05-13 13:45:28 +00:00
|
|
|
* Lock order: global lock, then descriptor lock.
|
2012-05-21 22:13:48 +00:00
|
|
|
*
|
2019-05-13 13:45:28 +00:00
|
|
|
* There are several possible consumers:
|
2012-05-21 22:13:48 +00:00
|
|
|
*
|
2019-05-13 13:45:28 +00:00
|
|
|
* 1. The kernel registers interface pointer with bpfattach().
|
|
|
|
* Each call allocates new bpf_if structure, references ifnet pointer
|
|
|
|
* and links bpf_if into bpf_iflist chain. This is protected with global
|
|
|
|
* lock.
|
2012-05-21 22:13:48 +00:00
|
|
|
*
|
2019-05-13 13:45:28 +00:00
|
|
|
* 2. An userland application uses ioctl() call to bpf_d descriptor.
|
|
|
|
* All such call are serialized with global lock. BPF filters can be
|
|
|
|
* changed, but pointer to old filter will be freed using epoch_call().
|
|
|
|
* Thus it should be safe for bpf_tap/bpf_mtap* code to do access to
|
|
|
|
* filter pointers, even if change will happen during bpf_tap execution.
|
|
|
|
* Destroying of bpf_d descriptor also is doing using epoch_call().
|
2012-05-21 22:13:48 +00:00
|
|
|
*
|
2019-05-13 13:45:28 +00:00
|
|
|
* 3. An userland application can write packets into bpf_d descriptor.
|
|
|
|
* There we need to be sure, that ifnet won't disappear during bpfwrite().
|
2012-05-21 22:13:48 +00:00
|
|
|
*
|
2019-05-13 13:45:28 +00:00
|
|
|
* 4. The kernel invokes bpf_tap/bpf_mtap* functions. The access to
|
|
|
|
* bif_dlist is protected with net_epoch_preempt section. So, it should
|
|
|
|
* be safe to make access to bpf_d descriptor inside the section.
|
|
|
|
*
|
|
|
|
* 5. The kernel invokes bpfdetach() on interface destroying. All lists
|
|
|
|
* are modified with global lock held and actual free() is done using
|
|
|
|
* epoch_call().
|
2012-05-21 22:13:48 +00:00
|
|
|
*/
|
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
static void
|
|
|
|
bpfif_free(epoch_context_t ctx)
|
|
|
|
{
|
|
|
|
struct bpf_if *bp;
|
|
|
|
|
|
|
|
bp = __containerof(ctx, struct bpf_if, epoch_ctx);
|
|
|
|
if_rele(bp->bif_ifp);
|
|
|
|
free(bp, M_BPF);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bpfif_ref(struct bpf_if *bp)
|
|
|
|
{
|
|
|
|
|
|
|
|
refcount_acquire(&bp->bif_refcnt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bpfif_rele(struct bpf_if *bp)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (!refcount_release(&bp->bif_refcnt))
|
|
|
|
return;
|
|
|
|
epoch_call(net_epoch_preempt, &bp->epoch_ctx, bpfif_free);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bpfd_ref(struct bpf_d *d)
|
|
|
|
{
|
|
|
|
|
|
|
|
refcount_acquire(&d->bd_refcnt);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bpfd_rele(struct bpf_d *d)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (!refcount_release(&d->bd_refcnt))
|
|
|
|
return;
|
|
|
|
epoch_call(net_epoch_preempt, &d->epoch_ctx, bpfd_free);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct bpf_program_buffer*
|
|
|
|
bpf_program_buffer_alloc(size_t size, int flags)
|
|
|
|
{
|
|
|
|
|
|
|
|
return (malloc(sizeof(struct bpf_program_buffer) + size,
|
|
|
|
M_BPF, flags));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bpf_program_buffer_free(epoch_context_t ctx)
|
|
|
|
{
|
|
|
|
struct bpf_program_buffer *ptr;
|
|
|
|
|
|
|
|
ptr = __containerof(ctx, struct bpf_program_buffer, epoch_ctx);
|
|
|
|
#ifdef BPF_JITTER
|
|
|
|
if (ptr->func != NULL)
|
|
|
|
bpf_destroy_jit_filter(ptr->func);
|
|
|
|
#endif
|
|
|
|
free(ptr, M_BPF);
|
|
|
|
}
|
|
|
|
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
/*
|
|
|
|
* Wrapper functions for various buffering methods. If the set of buffer
|
|
|
|
* modes expands, we will probably want to introduce a switch data structure
|
|
|
|
* similar to protosw, et.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
bpf_append_bytes(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
|
|
|
|
u_int len)
|
|
|
|
{
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
|
|
|
|
switch (d->bd_bufmode) {
|
|
|
|
case BPF_BUFMODE_BUFFER:
|
|
|
|
return (bpf_buffer_append_bytes(d, buf, offset, src, len));
|
|
|
|
|
|
|
|
case BPF_BUFMODE_ZBUF:
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_zcopy, 1);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
return (bpf_zerocopy_append_bytes(d, buf, offset, src, len));
|
|
|
|
|
|
|
|
default:
|
|
|
|
panic("bpf_buf_append_bytes");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bpf_append_mbuf(struct bpf_d *d, caddr_t buf, u_int offset, void *src,
|
|
|
|
u_int len)
|
|
|
|
{
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
|
|
|
|
switch (d->bd_bufmode) {
|
|
|
|
case BPF_BUFMODE_BUFFER:
|
|
|
|
return (bpf_buffer_append_mbuf(d, buf, offset, src, len));
|
|
|
|
|
|
|
|
case BPF_BUFMODE_ZBUF:
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_zcopy, 1);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
return (bpf_zerocopy_append_mbuf(d, buf, offset, src, len));
|
|
|
|
|
|
|
|
default:
|
|
|
|
panic("bpf_buf_append_mbuf");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-05 20:11:28 +00:00
|
|
|
/*
|
|
|
|
* This function gets called when the free buffer is re-assigned.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
bpf_buf_reclaimed(struct bpf_d *d)
|
|
|
|
{
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
2008-07-05 20:11:28 +00:00
|
|
|
|
|
|
|
switch (d->bd_bufmode) {
|
|
|
|
case BPF_BUFMODE_BUFFER:
|
|
|
|
return;
|
|
|
|
|
|
|
|
case BPF_BUFMODE_ZBUF:
|
|
|
|
bpf_zerocopy_buf_reclaimed(d);
|
|
|
|
return;
|
|
|
|
|
|
|
|
default:
|
|
|
|
panic("bpf_buf_reclaimed");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
/*
|
|
|
|
* If the buffer mechanism has a way to decide that a held buffer can be made
|
|
|
|
* free, then it is exposed via the bpf_canfreebuf() interface. (1) is
|
|
|
|
* returned if the buffer can be discarded, (0) is returned if it cannot.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
bpf_canfreebuf(struct bpf_d *d)
|
|
|
|
{
|
|
|
|
|
|
|
|
BPFD_LOCK_ASSERT(d);
|
|
|
|
|
|
|
|
switch (d->bd_bufmode) {
|
|
|
|
case BPF_BUFMODE_ZBUF:
|
|
|
|
return (bpf_zerocopy_canfreebuf(d));
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2008-04-07 02:51:00 +00:00
|
|
|
/*
|
|
|
|
* Allow the buffer model to indicate that the current store buffer is
|
|
|
|
* immutable, regardless of the appearance of space. Return (1) if the
|
|
|
|
* buffer is writable, and (0) if not.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
bpf_canwritebuf(struct bpf_d *d)
|
|
|
|
{
|
|
|
|
BPFD_LOCK_ASSERT(d);
|
|
|
|
|
|
|
|
switch (d->bd_bufmode) {
|
|
|
|
case BPF_BUFMODE_ZBUF:
|
|
|
|
return (bpf_zerocopy_canwritebuf(d));
|
|
|
|
}
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Notify buffer model that an attempt to write to the store buffer has
|
|
|
|
* resulted in a dropped packet, in which case the buffer may be considered
|
|
|
|
* full.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
bpf_buffull(struct bpf_d *d)
|
|
|
|
{
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
2008-04-07 02:51:00 +00:00
|
|
|
|
|
|
|
switch (d->bd_bufmode) {
|
|
|
|
case BPF_BUFMODE_ZBUF:
|
|
|
|
bpf_zerocopy_buffull(d);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Notify the buffer model that a buffer has moved into the hold position.
|
|
|
|
*/
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
void
|
|
|
|
bpf_bufheld(struct bpf_d *d)
|
|
|
|
{
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
|
|
|
|
switch (d->bd_bufmode) {
|
|
|
|
case BPF_BUFMODE_ZBUF:
|
|
|
|
bpf_zerocopy_bufheld(d);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bpf_free(struct bpf_d *d)
|
|
|
|
{
|
|
|
|
|
|
|
|
switch (d->bd_bufmode) {
|
|
|
|
case BPF_BUFMODE_BUFFER:
|
|
|
|
return (bpf_buffer_free(d));
|
|
|
|
|
|
|
|
case BPF_BUFMODE_ZBUF:
|
|
|
|
return (bpf_zerocopy_free(d));
|
|
|
|
|
|
|
|
default:
|
|
|
|
panic("bpf_buf_free");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
bpf_uiomove(struct bpf_d *d, caddr_t buf, u_int len, struct uio *uio)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
return (bpf_buffer_uiomove(d, buf, len, uio));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
bpf_ioctl_sblen(struct bpf_d *d, u_int *i)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (d->bd_bufmode != BPF_BUFMODE_BUFFER)
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
return (bpf_buffer_ioctl_sblen(d, i));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
bpf_ioctl_getzmax(struct thread *td, struct bpf_d *d, size_t *i)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
return (bpf_zerocopy_ioctl_getzmax(td, d, i));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
bpf_ioctl_rotzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
return (bpf_zerocopy_ioctl_rotzbuf(td, d, bz));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (d->bd_bufmode != BPF_BUFMODE_ZBUF)
|
|
|
|
return (EOPNOTSUPP);
|
|
|
|
return (bpf_zerocopy_ioctl_setzbuf(td, d, bz));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* General BPF functions.
|
|
|
|
*/
|
1994-05-24 10:09:53 +00:00
|
|
|
static int
|
2007-09-10 00:03:06 +00:00
|
|
|
bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
|
2015-12-31 05:03:27 +00:00
|
|
|
struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2006-07-26 03:15:16 +00:00
|
|
|
const struct ieee80211_bpf_params *p;
|
2007-09-10 00:03:06 +00:00
|
|
|
struct ether_header *eh;
|
1994-05-24 10:09:53 +00:00
|
|
|
struct mbuf *m;
|
|
|
|
int error;
|
|
|
|
int len;
|
|
|
|
int hlen;
|
2005-08-22 19:35:48 +00:00
|
|
|
int slen;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Build a sockaddr based on the data link layer type.
|
|
|
|
* We do this at this level because the ethernet header
|
|
|
|
* is copied directly into the data field of the sockaddr.
|
|
|
|
* In the case of SLIP, there is no header and the packet
|
|
|
|
* is forwarded as is.
|
|
|
|
* Also, we are careful to leave room at the front of the mbuf
|
|
|
|
* for the link level header.
|
|
|
|
*/
|
|
|
|
switch (linktype) {
|
|
|
|
|
|
|
|
case DLT_SLIP:
|
|
|
|
sockp->sa_family = AF_INET;
|
|
|
|
hlen = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DLT_EN10MB:
|
|
|
|
sockp->sa_family = AF_UNSPEC;
|
|
|
|
/* XXX Would MAXLINKHDR be better? */
|
2003-03-03 05:04:57 +00:00
|
|
|
hlen = ETHER_HDR_LEN;
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DLT_FDDI:
|
1995-03-14 09:16:07 +00:00
|
|
|
sockp->sa_family = AF_IMPLINK;
|
|
|
|
hlen = 0;
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
|
1998-08-18 10:13:11 +00:00
|
|
|
case DLT_RAW:
|
1994-05-24 10:09:53 +00:00
|
|
|
sockp->sa_family = AF_UNSPEC;
|
|
|
|
hlen = 0;
|
|
|
|
break;
|
|
|
|
|
2005-06-26 18:11:11 +00:00
|
|
|
case DLT_NULL:
|
|
|
|
/*
|
|
|
|
* null interface types require a 4 byte pseudo header which
|
|
|
|
* corresponds to the address family of the packet.
|
|
|
|
*/
|
|
|
|
sockp->sa_family = AF_UNSPEC;
|
|
|
|
hlen = 4;
|
|
|
|
break;
|
|
|
|
|
1998-07-29 05:35:16 +00:00
|
|
|
case DLT_ATM_RFC1483:
|
|
|
|
/*
|
|
|
|
* en atm driver requires 4-byte atm pseudo header.
|
|
|
|
* though it isn't standard, vpi:vci needs to be
|
|
|
|
* specified anyway.
|
|
|
|
*/
|
|
|
|
sockp->sa_family = AF_UNSPEC;
|
2003-03-02 15:56:49 +00:00
|
|
|
hlen = 12; /* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
|
1998-07-29 05:35:16 +00:00
|
|
|
break;
|
|
|
|
|
2000-09-16 14:17:15 +00:00
|
|
|
case DLT_PPP:
|
|
|
|
sockp->sa_family = AF_UNSPEC;
|
|
|
|
hlen = 4; /* This should match PPP_HDRLEN */
|
|
|
|
break;
|
|
|
|
|
2006-07-26 03:15:16 +00:00
|
|
|
case DLT_IEEE802_11: /* IEEE 802.11 wireless */
|
|
|
|
sockp->sa_family = AF_IEEE80211;
|
|
|
|
hlen = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DLT_IEEE802_11_RADIO: /* IEEE 802.11 wireless w/ phy params */
|
|
|
|
sockp->sa_family = AF_IEEE80211;
|
|
|
|
sockp->sa_len = 12; /* XXX != 0 */
|
|
|
|
hlen = sizeof(struct ieee80211_bpf_params);
|
|
|
|
break;
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
default:
|
|
|
|
return (EIO);
|
|
|
|
}
|
|
|
|
|
|
|
|
len = uio->uio_resid;
|
2013-01-24 14:29:31 +00:00
|
|
|
if (len < hlen || len - hlen > ifp->if_mtu)
|
2005-06-26 18:11:11 +00:00
|
|
|
return (EMSGSIZE);
|
|
|
|
|
2013-03-12 13:42:47 +00:00
|
|
|
m = m_get2(len, M_WAITOK, MT_DATA, M_PKTHDR);
|
2013-01-24 14:29:31 +00:00
|
|
|
if (m == NULL)
|
1994-05-24 10:09:53 +00:00
|
|
|
return (EIO);
|
1995-09-22 17:57:48 +00:00
|
|
|
m->m_pkthdr.len = m->m_len = len;
|
1994-05-24 10:09:53 +00:00
|
|
|
*mp = m;
|
2002-11-14 23:24:13 +00:00
|
|
|
|
2005-08-22 19:35:48 +00:00
|
|
|
error = uiomove(mtod(m, u_char *), len, uio);
|
|
|
|
if (error)
|
|
|
|
goto bad;
|
|
|
|
|
2015-12-31 05:03:27 +00:00
|
|
|
slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
|
2005-08-22 19:35:48 +00:00
|
|
|
if (slen == 0) {
|
|
|
|
error = EPERM;
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
2007-09-10 00:03:06 +00:00
|
|
|
/* Check for multicast destination */
|
|
|
|
switch (linktype) {
|
|
|
|
case DLT_EN10MB:
|
|
|
|
eh = mtod(m, struct ether_header *);
|
|
|
|
if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
|
|
|
|
if (bcmp(ifp->if_broadcastaddr, eh->ether_dhost,
|
|
|
|
ETHER_ADDR_LEN) == 0)
|
|
|
|
m->m_flags |= M_BCAST;
|
|
|
|
else
|
|
|
|
m->m_flags |= M_MCAST;
|
|
|
|
}
|
2015-12-31 05:03:27 +00:00
|
|
|
if (d->bd_hdrcmplt == 0) {
|
|
|
|
memcpy(eh->ether_shost, IF_LLADDR(ifp),
|
|
|
|
sizeof(eh->ether_shost));
|
|
|
|
}
|
2007-09-10 00:03:06 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2005-08-22 19:35:48 +00:00
|
|
|
* Make room for link header, and copy it to sockaddr
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
if (hlen != 0) {
|
2006-07-26 03:15:16 +00:00
|
|
|
if (sockp->sa_family == AF_IEEE80211) {
|
|
|
|
/*
|
|
|
|
* Collect true length from the parameter header
|
|
|
|
* NB: sockp is known to be zero'd so if we do a
|
|
|
|
* short copy unspecified parameters will be
|
|
|
|
* zero.
|
|
|
|
* NB: packet may not be aligned after stripping
|
|
|
|
* bpf params
|
|
|
|
* XXX check ibp_vers
|
|
|
|
*/
|
|
|
|
p = mtod(m, const struct ieee80211_bpf_params *);
|
|
|
|
hlen = p->ibp_len;
|
|
|
|
if (hlen > sizeof(sockp->sa_data)) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
}
|
2013-11-01 20:53:49 +00:00
|
|
|
bcopy(mtod(m, const void *), sockp->sa_data, hlen);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2007-02-26 22:24:14 +00:00
|
|
|
*hdrlen = hlen;
|
2005-08-22 19:35:48 +00:00
|
|
|
|
|
|
|
return (0);
|
2002-11-14 23:24:13 +00:00
|
|
|
bad:
|
1994-05-24 10:09:53 +00:00
|
|
|
m_freem(m);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2019-05-13 13:45:28 +00:00
|
|
|
* Attach descriptor to the bpf interface, i.e. make d listen on bp,
|
|
|
|
* then reset its buffers and counters with reset_d().
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
static void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2012-05-21 22:13:48 +00:00
|
|
|
int op_w;
|
|
|
|
|
|
|
|
BPF_LOCK_ASSERT();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Save sysctl value to protect from sysctl change
|
|
|
|
* between reads
|
|
|
|
*/
|
2015-04-20 10:44:46 +00:00
|
|
|
op_w = V_bpf_optimize_writers || d->bd_writer;
|
2012-05-21 22:13:48 +00:00
|
|
|
|
|
|
|
if (d->bd_bif != NULL)
|
2019-05-13 13:45:28 +00:00
|
|
|
bpf_detachd_locked(d, false);
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2012-04-06 06:55:21 +00:00
|
|
|
* Point d at bp, and add d to the interface's list.
|
2015-07-31 21:43:27 +00:00
|
|
|
* Since there are many applications using BPF for
|
2012-04-06 06:55:21 +00:00
|
|
|
* sending raw packets only (dhcpd, cdpd are good examples)
|
|
|
|
* we can delay adding d to the list of active listeners until
|
|
|
|
* some filter is configured.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
2019-05-13 13:45:28 +00:00
|
|
|
/*
|
|
|
|
* Hold reference to bpif while descriptor uses this interface.
|
|
|
|
*/
|
|
|
|
bpfif_ref(bp);
|
2012-05-21 22:13:48 +00:00
|
|
|
d->bd_bif = bp;
|
|
|
|
if (op_w != 0) {
|
2012-04-06 06:55:21 +00:00
|
|
|
/* Add to writers-only list */
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_INSERT_HEAD(&bp->bif_wlist, d, bd_next);
|
2012-04-06 06:55:21 +00:00
|
|
|
/*
|
|
|
|
* We decrement bd_writer on every filter set operation.
|
|
|
|
* First BIOCSETF is done by pcap_open_live() to set up
|
2019-05-13 13:45:28 +00:00
|
|
|
* snap length. After that appliation usually sets its own
|
|
|
|
* filter.
|
2012-04-06 06:55:21 +00:00
|
|
|
*/
|
|
|
|
d->bd_writer = 2;
|
|
|
|
} else
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
|
2012-04-06 06:55:21 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
reset_d(d);
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2005-07-24 17:21:17 +00:00
|
|
|
bpf_bpfd_cnt++;
|
2012-04-06 06:55:21 +00:00
|
|
|
|
|
|
|
CTR3(KTR_NET, "%s: bpf_attach called by pid %d, adding to %s list",
|
|
|
|
__func__, d->bd_pid, d->bd_writer ? "writer" : "active");
|
|
|
|
|
2012-05-21 22:13:48 +00:00
|
|
|
if (op_w == 0)
|
2012-04-06 06:55:21 +00:00
|
|
|
EVENTHANDLER_INVOKE(bpf_track, bp->bif_ifp, bp->bif_dlt, 1);
|
|
|
|
}
|
|
|
|
|
2014-06-11 11:27:44 +00:00
|
|
|
/*
|
|
|
|
* Check if we need to upgrade our descriptor @d from write-only mode.
|
|
|
|
*/
|
|
|
|
static int
|
2019-05-13 13:45:28 +00:00
|
|
|
bpf_check_upgrade(u_long cmd, struct bpf_d *d, struct bpf_insn *fcode,
|
|
|
|
int flen)
|
2014-06-11 11:27:44 +00:00
|
|
|
{
|
|
|
|
int is_snap, need_upgrade;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if we've already upgraded or new filter is empty.
|
|
|
|
*/
|
|
|
|
if (d->bd_writer == 0 || fcode == NULL)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
need_upgrade = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if cmd looks like snaplen setting from
|
|
|
|
* pcap_bpf.c:pcap_open_live().
|
|
|
|
* Note we're not checking .k value here:
|
2017-12-27 03:23:21 +00:00
|
|
|
* while pcap_open_live() definitely sets to non-zero value,
|
2014-06-11 11:27:44 +00:00
|
|
|
* we'd prefer to treat k=0 (deny ALL) case the same way: e.g.
|
|
|
|
* do not consider upgrading immediately
|
|
|
|
*/
|
2019-05-13 13:45:28 +00:00
|
|
|
if (cmd == BIOCSETF && flen == 1 &&
|
|
|
|
fcode[0].code == (BPF_RET | BPF_K))
|
2014-06-11 11:27:44 +00:00
|
|
|
is_snap = 1;
|
|
|
|
else
|
|
|
|
is_snap = 0;
|
|
|
|
|
|
|
|
if (is_snap == 0) {
|
|
|
|
/*
|
|
|
|
* We're setting first filter and it doesn't look like
|
|
|
|
* setting snaplen. We're probably using bpf directly.
|
|
|
|
* Upgrade immediately.
|
|
|
|
*/
|
|
|
|
need_upgrade = 1;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Do not require upgrade by first BIOCSETF
|
|
|
|
* (used to set snaplen) by pcap_open_live().
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (--d->bd_writer == 0) {
|
|
|
|
/*
|
|
|
|
* First snaplen filter has already
|
|
|
|
* been set. This is probably catch-all
|
|
|
|
* filter
|
|
|
|
*/
|
|
|
|
need_upgrade = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
CTR5(KTR_NET,
|
|
|
|
"%s: filter function set by pid %d, "
|
|
|
|
"bd_writer counter %d, snap %d upgrade %d",
|
|
|
|
__func__, d->bd_pid, d->bd_writer,
|
|
|
|
is_snap, need_upgrade);
|
|
|
|
|
|
|
|
return (need_upgrade);
|
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Detach a file from its interface.
|
|
|
|
*/
|
|
|
|
static void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_detachd(struct bpf_d *d)
|
2012-05-21 22:13:48 +00:00
|
|
|
{
|
|
|
|
BPF_LOCK();
|
2019-05-13 13:45:28 +00:00
|
|
|
bpf_detachd_locked(d, false);
|
2012-05-21 22:13:48 +00:00
|
|
|
BPF_UNLOCK();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2019-05-13 13:45:28 +00:00
|
|
|
bpf_detachd_locked(struct bpf_d *d, bool detached_ifp)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct bpf_if *bp;
|
2004-09-09 04:11:12 +00:00
|
|
|
struct ifnet *ifp;
|
2019-05-13 13:45:28 +00:00
|
|
|
int error;
|
2012-04-06 06:55:21 +00:00
|
|
|
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_LOCK_ASSERT();
|
2019-05-13 13:45:28 +00:00
|
|
|
CTR2(KTR_NET, "%s: detach required by pid %d", __func__, d->bd_pid);
|
2012-04-06 06:53:58 +00:00
|
|
|
|
2012-05-21 22:13:48 +00:00
|
|
|
/* Check if descriptor is attached */
|
|
|
|
if ((bp = d->bd_bif) == NULL)
|
|
|
|
return;
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
Rework r348303 to reduce the time of holding global BPF lock.
It appeared that using NET_EPOCH_WAIT() while holding global BPF lock
can lead to another panic:
spin lock 0xfffff800183c9840 (turnstile lock) held by 0xfffff80018e2c5a0 (tid 100325) too long
panic: spin lock held too long
...
#0 sched_switch (td=0xfffff80018e2c5a0, newtd=0xfffff8000389e000, flags=<optimized out>) at /usr/src/sys/kern/sched_ule.c:2133
#1 0xffffffff80bf9912 in mi_switch (flags=256, newtd=0x0) at /usr/src/sys/kern/kern_synch.c:439
#2 0xffffffff80c21db7 in sched_bind (td=<optimized out>, cpu=<optimized out>) at /usr/src/sys/kern/sched_ule.c:2704
#3 0xffffffff80c34c33 in epoch_block_handler_preempt (global=<optimized out>, cr=0xfffffe00005a1a00, arg=<optimized out>)
at /usr/src/sys/kern/subr_epoch.c:394
#4 0xffffffff803c741b in epoch_block (global=<optimized out>, cr=<optimized out>, cb=<optimized out>, ct=<optimized out>)
at /usr/src/sys/contrib/ck/src/ck_epoch.c:416
#5 ck_epoch_synchronize_wait (global=0xfffff8000380cd80, cb=<optimized out>, ct=<optimized out>) at /usr/src/sys/contrib/ck/src/ck_epoch.c:465
#6 0xffffffff80c3475e in epoch_wait_preempt (epoch=0xfffff8000380cd80) at /usr/src/sys/kern/subr_epoch.c:513
#7 0xffffffff80ce970b in bpf_detachd_locked (d=0xfffff801d309cc00, detached_ifp=<optimized out>) at /usr/src/sys/net/bpf.c:856
#8 0xffffffff80ced166 in bpf_detachd (d=<optimized out>) at /usr/src/sys/net/bpf.c:836
#9 bpf_dtor (data=0xfffff801d309cc00) at /usr/src/sys/net/bpf.c:914
To fix this add the check to the catchpacket() that BPF descriptor was
not detached just before we acquired BPFD_LOCK().
Reported by: slavash
Tested by: slavash
MFC after: 1 week
2019-05-28 11:45:00 +00:00
|
|
|
/* Remove d from the interface's descriptor list. */
|
|
|
|
CK_LIST_REMOVE(d, bd_next);
|
2012-04-06 06:55:21 +00:00
|
|
|
/* Save bd_writer value */
|
|
|
|
error = d->bd_writer;
|
2012-04-06 06:53:58 +00:00
|
|
|
ifp = bp->bif_ifp;
|
2004-07-24 16:58:56 +00:00
|
|
|
d->bd_bif = NULL;
|
2019-05-13 13:45:28 +00:00
|
|
|
if (detached_ifp) {
|
|
|
|
/*
|
|
|
|
* Notify descriptor as it's detached, so that any
|
|
|
|
* sleepers wake up and get ENXIO.
|
|
|
|
*/
|
|
|
|
bpf_wakeup(d);
|
|
|
|
}
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2012-04-06 06:53:58 +00:00
|
|
|
bpf_bpfd_cnt--;
|
2004-09-09 04:11:12 +00:00
|
|
|
|
2012-04-06 06:55:21 +00:00
|
|
|
/* Call event handler iff d is attached */
|
|
|
|
if (error == 0)
|
|
|
|
EVENTHANDLER_INVOKE(bpf_track, ifp, bp->bif_dlt, 0);
|
2009-05-18 17:18:40 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Check if this descriptor had requested promiscuous mode.
|
2019-05-13 13:45:28 +00:00
|
|
|
* If so and ifnet is not detached, turn it off.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2019-05-13 13:45:28 +00:00
|
|
|
if (d->bd_promisc && !detached_ifp) {
|
1994-05-24 10:09:53 +00:00
|
|
|
d->bd_promisc = 0;
|
2008-11-26 22:32:07 +00:00
|
|
|
CURVNET_SET(ifp->if_vnet);
|
2004-09-09 04:11:12 +00:00
|
|
|
error = ifpromisc(ifp, 0);
|
2008-11-26 22:32:07 +00:00
|
|
|
CURVNET_RESTORE();
|
2000-06-01 21:57:13 +00:00
|
|
|
if (error != 0 && error != ENXIO) {
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2000-06-01 21:57:13 +00:00
|
|
|
* ENXIO can happen if a pccard is unplugged
|
1994-05-24 10:09:53 +00:00
|
|
|
* Something is really wrong if we were able to put
|
|
|
|
* the driver into promiscuous mode, but can't
|
|
|
|
* take it out.
|
|
|
|
*/
|
2003-01-20 19:08:46 +00:00
|
|
|
if_printf(bp->bif_ifp,
|
|
|
|
"bpf_detach: ifpromisc failed (%d)\n", error);
|
2000-06-01 21:57:13 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2019-05-13 13:45:28 +00:00
|
|
|
bpfif_rele(bp);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
2008-08-13 15:41:21 +00:00
|
|
|
/*
|
|
|
|
* Close the descriptor by detaching it from its interface,
|
|
|
|
* deallocating its buffers, and marking it free.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
bpf_dtor(void *data)
|
|
|
|
{
|
|
|
|
struct bpf_d *d = data;
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
2008-08-13 15:41:21 +00:00
|
|
|
if (d->bd_state == BPF_WAITING)
|
|
|
|
callout_stop(&d->bd_callout);
|
|
|
|
d->bd_state = BPF_IDLE;
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2008-08-13 15:41:21 +00:00
|
|
|
funsetown(&d->bd_sigio);
|
2012-05-21 22:13:48 +00:00
|
|
|
bpf_detachd(d);
|
2008-08-13 15:41:21 +00:00
|
|
|
#ifdef MAC
|
|
|
|
mac_bpfdesc_destroy(d);
|
|
|
|
#endif /* MAC */
|
Fix a deficiency in the selinfo interface:
If a selinfo object is recorded (via selrecord()) and then it is
quickly destroyed, with the waiters missing the opportunity to awake,
at the next iteration they will find the selinfo object destroyed,
causing a PF#.
That happens because the selinfo interface has no way to drain the
waiters before to destroy the registered selinfo object. Also this
race is quite rare to get in practice, because it would require a
selrecord(), a poll request by another thread and a quick destruction
of the selrecord()'ed selinfo object.
Fix this by adding the seldrain() routine which should be called
before to destroy the selinfo objects (in order to avoid such case),
and fix the present cases where it might have already been called.
Sometimes, the context is safe enough to prevent this type of race,
like it happens in device drivers which installs selinfo objects on
poll callbacks. There, the destruction of the selinfo object happens
at driver detach time, when all the filedescriptors should be already
closed, thus there cannot be a race.
For this case, mfi(4) device driver can be set as an example, as it
implements a full correct logic for preventing this from happening.
Sponsored by: Sandvine Incorporated
Reported by: rstone
Tested by: pluknet
Reviewed by: jhb, kib
Approved by: re (bz)
MFC after: 3 weeks
2011-08-25 15:51:54 +00:00
|
|
|
seldrain(&d->bd_sel);
|
2008-08-13 15:41:21 +00:00
|
|
|
knlist_destroy(&d->bd_sel.si_note);
|
2010-03-12 19:14:58 +00:00
|
|
|
callout_drain(&d->bd_callout);
|
2019-05-13 13:45:28 +00:00
|
|
|
bpfd_rele(d);
|
2008-08-13 15:41:21 +00:00
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Open ethernet device. Returns ENXIO for illegal minor device number,
|
|
|
|
* EBUSY if file is open by another process.
|
|
|
|
*/
|
|
|
|
/* ARGSUSED */
|
1995-12-08 11:19:42 +00:00
|
|
|
static int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2001-02-16 17:10:28 +00:00
|
|
|
struct bpf_d *d;
|
2015-07-31 20:02:12 +00:00
|
|
|
int error;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2008-10-23 15:53:51 +00:00
|
|
|
d = malloc(sizeof(*d), M_BPF, M_WAITOK | M_ZERO);
|
2008-08-13 15:41:21 +00:00
|
|
|
error = devfs_set_cdevpriv(d, bpf_dtor);
|
|
|
|
if (error != 0) {
|
|
|
|
free(d, M_BPF);
|
|
|
|
return (error);
|
|
|
|
}
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
|
2018-03-20 22:57:06 +00:00
|
|
|
/* Setup counters */
|
|
|
|
d->bd_rcount = counter_u64_alloc(M_WAITOK);
|
|
|
|
d->bd_dcount = counter_u64_alloc(M_WAITOK);
|
|
|
|
d->bd_fcount = counter_u64_alloc(M_WAITOK);
|
|
|
|
d->bd_wcount = counter_u64_alloc(M_WAITOK);
|
|
|
|
d->bd_wfcount = counter_u64_alloc(M_WAITOK);
|
|
|
|
d->bd_wdcount = counter_u64_alloc(M_WAITOK);
|
|
|
|
d->bd_zcopy = counter_u64_alloc(M_WAITOK);
|
|
|
|
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
/*
|
|
|
|
* For historical reasons, perform a one-time initialization call to
|
|
|
|
* the buffer routines, even though we're not yet committed to a
|
|
|
|
* particular buffer method.
|
|
|
|
*/
|
|
|
|
bpf_buffer_init(d);
|
2015-04-20 10:44:46 +00:00
|
|
|
if ((flags & FREAD) == 0)
|
|
|
|
d->bd_writer = 2;
|
2012-12-10 16:14:44 +00:00
|
|
|
d->bd_hbuf_in_use = 0;
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
d->bd_bufmode = BPF_BUFMODE_BUFFER;
|
1995-06-15 18:11:00 +00:00
|
|
|
d->bd_sig = SIGIO;
|
2007-02-26 22:24:14 +00:00
|
|
|
d->bd_direction = BPF_D_INOUT;
|
2019-05-13 13:45:28 +00:00
|
|
|
d->bd_refcnt = 1;
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_PID_REFRESH(d, td);
|
2002-07-31 16:09:38 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
mac_bpfdesc_init(d);
|
|
|
|
mac_bpfdesc_create(td->td_ucred, d);
|
2002-07-31 16:09:38 +00:00
|
|
|
#endif
|
2012-05-21 22:17:29 +00:00
|
|
|
mtx_init(&d->bd_lock, devtoname(dev), "bpf cdev lock", MTX_DEF);
|
|
|
|
callout_init_mtx(&d->bd_callout, &d->bd_lock, 0);
|
|
|
|
knlist_init_mtx(&d->bd_sel.si_note, &d->bd_lock);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bpfread - read next chunk of packets from buffers
|
|
|
|
*/
|
1995-12-08 11:19:42 +00:00
|
|
|
static int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpfread(struct cdev *dev, struct uio *uio, int ioflag)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2008-08-13 15:41:21 +00:00
|
|
|
struct bpf_d *d;
|
1994-05-24 10:09:53 +00:00
|
|
|
int error;
|
2010-02-20 00:19:21 +00:00
|
|
|
int non_block;
|
|
|
|
int timed_out;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2008-08-13 15:41:21 +00:00
|
|
|
error = devfs_get_cdevpriv((void **)&d);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Restrict application to use a buffer the same size as
|
|
|
|
* as kernel buffers.
|
|
|
|
*/
|
|
|
|
if (uio->uio_resid != d->bd_bufsize)
|
|
|
|
return (EINVAL);
|
|
|
|
|
2010-02-20 00:19:21 +00:00
|
|
|
non_block = ((ioflag & O_NONBLOCK) != 0);
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_PID_REFRESH_CUR(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
if (d->bd_bufmode != BPF_BUFMODE_BUFFER) {
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
return (EOPNOTSUPP);
|
|
|
|
}
|
2001-12-14 22:17:54 +00:00
|
|
|
if (d->bd_state == BPF_WAITING)
|
|
|
|
callout_stop(&d->bd_callout);
|
|
|
|
timed_out = (d->bd_state == BPF_TIMED_OUT);
|
|
|
|
d->bd_state = BPF_IDLE;
|
2013-05-23 21:33:10 +00:00
|
|
|
while (d->bd_hbuf_in_use) {
|
|
|
|
error = mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
|
2012-12-10 16:14:44 +00:00
|
|
|
PRINET|PCATCH, "bd_hbuf", 0);
|
2013-05-23 21:33:10 +00:00
|
|
|
if (error != 0) {
|
|
|
|
BPFD_UNLOCK(d);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* If the hold buffer is empty, then do a timed sleep, which
|
|
|
|
* ends when the timeout expires or when enough packets
|
|
|
|
* have arrived to fill the store buffer.
|
|
|
|
*/
|
2004-07-24 16:58:56 +00:00
|
|
|
while (d->bd_hbuf == NULL) {
|
2010-02-20 00:19:21 +00:00
|
|
|
if (d->bd_slen != 0) {
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* A packet(s) either arrived since the previous
|
|
|
|
* read or arrived while we were asleep.
|
|
|
|
*/
|
2010-02-20 00:19:21 +00:00
|
|
|
if (d->bd_immediate || non_block || timed_out) {
|
|
|
|
/*
|
|
|
|
* Rotate the buffers and return what's here
|
|
|
|
* if we are in immediate mode, non-blocking
|
|
|
|
* flag is set, or this descriptor timed out.
|
|
|
|
*/
|
|
|
|
ROTATE_BUFFERS(d);
|
|
|
|
break;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
The advent of if_detach, allowing interface removal at runtime, makes it
possible for a panic to occur if BPF is in use on the interface at the
time of the call to if_detach. This happens because BPF maintains pointers
to the struct ifnet describing the interface, which is freed by if_detach.
To correct this problem, a new call, bpfdetach, is introduced. bpfdetach
locates BPF descriptor references to the interface, and NULLs them. Other
BPF code is modified so that discovery of a NULL interface results in
ENXIO (already implemented for some calls). Processes blocked on a BPF
call will also be woken up so that they can receive ENXIO.
Interface drivers that invoke bpfattach and if_detach must be modified to
also call bpfattach(ifp) before calling if_detach(ifp). This is relevant
for buses that support hot removal, such as pccard and usb. Patches to
all effected devices will not be committed, only to if_wi.c, due to
testing limitations. To reproduce the crash, load up tcpdump on you
favorite pccard ethernet card, and then eject the card. As some pccard
drivers do not invoke if_detach(ifp), this bug will not manifest itself
for those drivers.
Reviewed by: wes
2000-03-19 05:42:34 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* No data is available, check to see if the bpf device
|
|
|
|
* is still pointed at a real interface. If not, return
|
|
|
|
* ENXIO so that the userland process knows to rebind
|
|
|
|
* it before using it again.
|
|
|
|
*/
|
|
|
|
if (d->bd_bif == NULL) {
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
The advent of if_detach, allowing interface removal at runtime, makes it
possible for a panic to occur if BPF is in use on the interface at the
time of the call to if_detach. This happens because BPF maintains pointers
to the struct ifnet describing the interface, which is freed by if_detach.
To correct this problem, a new call, bpfdetach, is introduced. bpfdetach
locates BPF descriptor references to the interface, and NULLs them. Other
BPF code is modified so that discovery of a NULL interface results in
ENXIO (already implemented for some calls). Processes blocked on a BPF
call will also be woken up so that they can receive ENXIO.
Interface drivers that invoke bpfattach and if_detach must be modified to
also call bpfattach(ifp) before calling if_detach(ifp). This is relevant
for buses that support hot removal, such as pccard and usb. Patches to
all effected devices will not be committed, only to if_wi.c, due to
testing limitations. To reproduce the crash, load up tcpdump on you
favorite pccard ethernet card, and then eject the card. As some pccard
drivers do not invoke if_detach(ifp), this bug will not manifest itself
for those drivers.
Reviewed by: wes
2000-03-19 05:42:34 +00:00
|
|
|
return (ENXIO);
|
|
|
|
}
|
|
|
|
|
2010-02-20 00:19:21 +00:00
|
|
|
if (non_block) {
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2000-12-17 20:50:22 +00:00
|
|
|
return (EWOULDBLOCK);
|
|
|
|
}
|
2012-05-21 22:17:29 +00:00
|
|
|
error = msleep(d, &d->bd_lock, PRINET|PCATCH,
|
2001-02-16 17:10:28 +00:00
|
|
|
"bpf", d->bd_rtout);
|
1994-05-24 10:09:53 +00:00
|
|
|
if (error == EINTR || error == ERESTART) {
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
if (error == EWOULDBLOCK) {
|
|
|
|
/*
|
|
|
|
* On a timeout, return what's in the buffer,
|
|
|
|
* which may be nothing. If there is something
|
|
|
|
* in the store buffer, we can rotate the buffers.
|
|
|
|
*/
|
|
|
|
if (d->bd_hbuf)
|
|
|
|
/*
|
|
|
|
* We filled up the buffer in between
|
|
|
|
* getting the timeout and arriving
|
|
|
|
* here, so we don't need to rotate.
|
|
|
|
*/
|
|
|
|
break;
|
|
|
|
|
|
|
|
if (d->bd_slen == 0) {
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
ROTATE_BUFFERS(d);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* At this point, we know we have something in the hold slot.
|
|
|
|
*/
|
2012-12-10 16:14:44 +00:00
|
|
|
d->bd_hbuf_in_use = 1;
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Move data from hold buffer into user space.
|
|
|
|
* We know the entire buffer is transferred since
|
|
|
|
* we checked above that the read buffer is bpf_bufsize bytes.
|
2012-12-10 16:14:44 +00:00
|
|
|
*
|
|
|
|
* We do not have to worry about simultaneous reads because
|
|
|
|
* we waited for sole access to the hold buffer above.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
2012-12-10 16:14:44 +00:00
|
|
|
KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
|
|
|
|
d->bd_fbuf = d->bd_hbuf;
|
|
|
|
d->bd_hbuf = NULL;
|
|
|
|
d->bd_hlen = 0;
|
|
|
|
bpf_buf_reclaimed(d);
|
|
|
|
d->bd_hbuf_in_use = 0;
|
|
|
|
wakeup(&d->bd_hbuf_in_use);
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If there are processes sleeping on this descriptor, wake them up.
|
|
|
|
*/
|
2001-02-16 17:10:28 +00:00
|
|
|
static __inline void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_wakeup(struct bpf_d *d)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2005-05-04 03:09:28 +00:00
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
2001-12-14 22:17:54 +00:00
|
|
|
if (d->bd_state == BPF_WAITING) {
|
|
|
|
callout_stop(&d->bd_callout);
|
|
|
|
d->bd_state = BPF_IDLE;
|
|
|
|
}
|
2003-03-02 16:54:40 +00:00
|
|
|
wakeup(d);
|
Installed the second patch attached to kern/7899 with some changes suggested
by bde, a few other tweaks to get the patch to apply cleanly again and
some improvements to the comments.
This change closes some fairly minor security holes associated with
F_SETOWN, fixes a few bugs, and removes some limitations that F_SETOWN
had on tty devices. For more details, see the description on the PR.
Because this patch increases the size of the proc and pgrp structures,
it is necessary to re-install the includes and recompile libkvm,
the vinum lkm, fstat, gcore, gdb, ipfilter, ps, top, and w.
PR: kern/7899
Reviewed by: bde, elvind
1998-11-11 10:04:13 +00:00
|
|
|
if (d->bd_async && d->bd_sig && d->bd_sigio)
|
2002-05-01 20:44:46 +00:00
|
|
|
pgsigio(&d->bd_sigio, d->bd_sig, 0);
|
1995-06-15 18:11:00 +00:00
|
|
|
|
2003-11-09 09:17:26 +00:00
|
|
|
selwakeuppri(&d->bd_sel, PRINET);
|
2004-08-15 06:24:42 +00:00
|
|
|
KNOTE_LOCKED(&d->bd_sel.si_note, 0);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
2001-12-14 22:17:54 +00:00
|
|
|
static void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_timed_out(void *arg)
|
2001-12-14 22:17:54 +00:00
|
|
|
{
|
|
|
|
struct bpf_d *d = (struct bpf_d *)arg;
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
2010-03-12 19:14:58 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
if (callout_pending(&d->bd_callout) ||
|
|
|
|
!callout_active(&d->bd_callout))
|
2010-03-12 19:14:58 +00:00
|
|
|
return;
|
2001-12-14 22:17:54 +00:00
|
|
|
if (d->bd_state == BPF_WAITING) {
|
|
|
|
d->bd_state = BPF_TIMED_OUT;
|
|
|
|
if (d->bd_slen != 0)
|
|
|
|
bpf_wakeup(d);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
static int
|
|
|
|
bpf_ready(struct bpf_d *d)
|
|
|
|
{
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
|
|
|
|
if (!bpf_canfreebuf(d) && d->bd_hlen != 0)
|
|
|
|
return (1);
|
|
|
|
if ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
|
|
|
|
d->bd_slen != 0)
|
|
|
|
return (1);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2006-06-15 15:39:12 +00:00
|
|
|
static int
|
|
|
|
bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2019-05-13 13:45:28 +00:00
|
|
|
struct route ro;
|
|
|
|
struct sockaddr dst;
|
|
|
|
struct epoch_tracker et;
|
|
|
|
struct bpf_if *bp;
|
2008-08-13 15:41:21 +00:00
|
|
|
struct bpf_d *d;
|
1994-05-24 10:09:53 +00:00
|
|
|
struct ifnet *ifp;
|
2007-02-26 22:24:14 +00:00
|
|
|
struct mbuf *m, *mc;
|
|
|
|
int error, hlen;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2008-08-13 15:41:21 +00:00
|
|
|
error = devfs_get_cdevpriv((void **)&d);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
NET_EPOCH_ENTER(et);
|
|
|
|
BPFD_LOCK(d);
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_PID_REFRESH_CUR(d);
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_wcount, 1);
|
2019-05-13 13:45:28 +00:00
|
|
|
if ((bp = d->bd_bif) == NULL) {
|
|
|
|
error = ENXIO;
|
|
|
|
goto out_locked;
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
ifp = bp->bif_ifp;
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
if ((ifp->if_flags & IFF_UP) == 0) {
|
2019-05-13 13:45:28 +00:00
|
|
|
error = ENETDOWN;
|
|
|
|
goto out_locked;
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
}
|
2004-12-08 05:40:02 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
if (uio->uio_resid == 0)
|
|
|
|
goto out_locked;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2004-06-11 03:45:42 +00:00
|
|
|
bzero(&dst, sizeof(dst));
|
2007-06-17 21:51:43 +00:00
|
|
|
m = NULL;
|
|
|
|
hlen = 0;
|
2019-05-13 13:45:28 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Take extra reference, unlock d and exit from epoch section,
|
|
|
|
* since bpf_movein() can sleep.
|
|
|
|
*/
|
|
|
|
bpfd_ref(d);
|
|
|
|
NET_EPOCH_EXIT(et);
|
|
|
|
BPFD_UNLOCK(d);
|
|
|
|
|
|
|
|
error = bpf_movein(uio, (int)bp->bif_dlt, ifp,
|
2015-12-31 05:03:27 +00:00
|
|
|
&m, &dst, &hlen, d);
|
2019-05-13 13:45:28 +00:00
|
|
|
|
|
|
|
if (error != 0) {
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_wdcount, 1);
|
2019-05-13 13:45:28 +00:00
|
|
|
bpfd_rele(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
}
|
2019-05-13 13:45:28 +00:00
|
|
|
|
|
|
|
BPFD_LOCK(d);
|
|
|
|
/*
|
|
|
|
* Check that descriptor is still attached to the interface.
|
|
|
|
* This can happen on bpfdetach(). To avoid access to detached
|
|
|
|
* ifnet, free mbuf and return ENXIO.
|
|
|
|
*/
|
|
|
|
if (d->bd_bif == NULL) {
|
|
|
|
counter_u64_add(d->bd_wdcount, 1);
|
|
|
|
BPFD_UNLOCK(d);
|
|
|
|
bpfd_rele(d);
|
|
|
|
m_freem(m);
|
|
|
|
return (ENXIO);
|
|
|
|
}
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_wfcount, 1);
|
1999-10-15 05:07:00 +00:00
|
|
|
if (d->bd_hdrcmplt)
|
|
|
|
dst.sa_family = pseudo_AF_HDRCMPLT;
|
|
|
|
|
2007-02-26 22:24:14 +00:00
|
|
|
if (d->bd_feedback) {
|
2012-12-05 08:04:20 +00:00
|
|
|
mc = m_dup(m, M_NOWAIT);
|
2007-02-26 22:24:14 +00:00
|
|
|
if (mc != NULL)
|
|
|
|
mc->m_pkthdr.rcvif = ifp;
|
2008-04-15 17:08:24 +00:00
|
|
|
/* Set M_PROMISC for outgoing packets to be discarded. */
|
|
|
|
if (d->bd_direction == BPF_D_INOUT)
|
|
|
|
m->m_flags |= M_PROMISC;
|
2007-02-26 22:24:14 +00:00
|
|
|
} else
|
|
|
|
mc = NULL;
|
|
|
|
|
|
|
|
m->m_pkthdr.len -= hlen;
|
|
|
|
m->m_len -= hlen;
|
|
|
|
m->m_data += hlen; /* XXX */
|
|
|
|
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
CURVNET_SET(ifp->if_vnet);
|
2002-07-31 16:09:38 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
mac_bpfdesc_create_mbuf(d, m);
|
2007-02-26 22:24:14 +00:00
|
|
|
if (mc != NULL)
|
2007-10-24 19:04:04 +00:00
|
|
|
mac_bpfdesc_create_mbuf(d, mc);
|
2002-07-31 16:09:38 +00:00
|
|
|
#endif
|
2007-02-26 22:24:14 +00:00
|
|
|
|
2015-12-31 05:03:27 +00:00
|
|
|
bzero(&ro, sizeof(ro));
|
|
|
|
if (hlen != 0) {
|
|
|
|
ro.ro_prepend = (u_char *)&dst.sa_data;
|
|
|
|
ro.ro_plen = hlen;
|
|
|
|
ro.ro_flags = RT_HAS_HEADER;
|
|
|
|
}
|
|
|
|
|
2019-05-13 20:17:55 +00:00
|
|
|
/* Avoid possible recursion on BPFD_LOCK(). */
|
|
|
|
NET_EPOCH_ENTER(et);
|
|
|
|
BPFD_UNLOCK(d);
|
2015-12-31 05:03:27 +00:00
|
|
|
error = (*ifp->if_output)(ifp, m, &dst, &ro);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
if (error)
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_wdcount, 1);
|
2007-02-26 22:24:14 +00:00
|
|
|
|
|
|
|
if (mc != NULL) {
|
2007-08-06 14:26:03 +00:00
|
|
|
if (error == 0)
|
2007-02-26 22:24:14 +00:00
|
|
|
(*ifp->if_input)(ifp, mc);
|
2007-08-06 14:26:03 +00:00
|
|
|
else
|
2007-02-26 22:24:14 +00:00
|
|
|
m_freem(mc);
|
|
|
|
}
|
2019-05-13 20:17:55 +00:00
|
|
|
NET_EPOCH_EXIT(et);
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
CURVNET_RESTORE();
|
2019-05-13 13:45:28 +00:00
|
|
|
bpfd_rele(d);
|
|
|
|
return (error);
|
2007-02-26 22:24:14 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
out_locked:
|
|
|
|
counter_u64_add(d->bd_wdcount, 1);
|
|
|
|
NET_EPOCH_EXIT(et);
|
|
|
|
BPFD_UNLOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2009-03-07 22:17:44 +00:00
|
|
|
* Reset a descriptor by flushing its packet buffer and clearing the receive
|
|
|
|
* and drop counts. This is doable for kernel-only buffers, but with
|
|
|
|
* zero-copy buffers, we can't write to (or rotate) buffers that are
|
|
|
|
* currently owned by userspace. It would be nice if we could encapsulate
|
|
|
|
* this logic in the buffer code rather than here.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
static void
|
2006-06-15 15:39:12 +00:00
|
|
|
reset_d(struct bpf_d *d)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2001-02-16 17:10:28 +00:00
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
2009-03-07 22:17:44 +00:00
|
|
|
|
2012-12-10 16:14:44 +00:00
|
|
|
while (d->bd_hbuf_in_use)
|
|
|
|
mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock, PRINET,
|
|
|
|
"bd_hbuf", 0);
|
2009-03-07 22:17:44 +00:00
|
|
|
if ((d->bd_hbuf != NULL) &&
|
|
|
|
(d->bd_bufmode != BPF_BUFMODE_ZBUF || bpf_canfreebuf(d))) {
|
1994-05-24 10:09:53 +00:00
|
|
|
/* Free the hold buffer. */
|
|
|
|
d->bd_fbuf = d->bd_hbuf;
|
2004-07-24 16:58:56 +00:00
|
|
|
d->bd_hbuf = NULL;
|
2009-03-07 22:17:44 +00:00
|
|
|
d->bd_hlen = 0;
|
2008-07-05 20:11:28 +00:00
|
|
|
bpf_buf_reclaimed(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2009-03-07 22:17:44 +00:00
|
|
|
if (bpf_canwritebuf(d))
|
|
|
|
d->bd_slen = 0;
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_zero(d->bd_rcount);
|
|
|
|
counter_u64_zero(d->bd_dcount);
|
|
|
|
counter_u64_zero(d->bd_fcount);
|
|
|
|
counter_u64_zero(d->bd_wcount);
|
|
|
|
counter_u64_zero(d->bd_wfcount);
|
|
|
|
counter_u64_zero(d->bd_wdcount);
|
|
|
|
counter_u64_zero(d->bd_zcopy);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* FIONREAD Check for read packet available.
|
|
|
|
* BIOCGBLEN Get buffer len [for read()].
|
2008-07-07 09:25:49 +00:00
|
|
|
* BIOCSETF Set read filter.
|
|
|
|
* BIOCSETFNR Set read filter without resetting descriptor.
|
|
|
|
* BIOCSETWF Set write filter.
|
1994-05-24 10:09:53 +00:00
|
|
|
* BIOCFLUSH Flush read packet buffer.
|
|
|
|
* BIOCPROMISC Put interface into promiscuous mode.
|
|
|
|
* BIOCGDLT Get link layer type.
|
|
|
|
* BIOCGETIF Get interface name.
|
|
|
|
* BIOCSETIF Set interface.
|
|
|
|
* BIOCSRTIMEOUT Set read timeout.
|
|
|
|
* BIOCGRTIMEOUT Get read timeout.
|
|
|
|
* BIOCGSTATS Get packet stats.
|
|
|
|
* BIOCIMMEDIATE Set immediate mode.
|
|
|
|
* BIOCVERSION Get filter language version.
|
1999-10-15 05:07:00 +00:00
|
|
|
* BIOCGHDRCMPLT Get "header already complete" flag
|
|
|
|
* BIOCSHDRCMPLT Set "header already complete" flag
|
2007-02-26 22:24:14 +00:00
|
|
|
* BIOCGDIRECTION Get packet direction flag
|
|
|
|
* BIOCSDIRECTION Set packet direction flag
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
* BIOCGTSTAMP Get time stamp format and resolution.
|
|
|
|
* BIOCSTSTAMP Set time stamp format and resolution.
|
2005-08-22 19:35:48 +00:00
|
|
|
* BIOCLOCK Set "locked" flag
|
2007-02-26 22:24:14 +00:00
|
|
|
* BIOCFEEDBACK Set packet feedback mode.
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
* BIOCSETZBUF Set current zero-copy buffer locations.
|
|
|
|
* BIOCGETZMAX Get maximum zero-copy buffer size.
|
|
|
|
* BIOCROTZBUF Force rotation of zero-copy buffer
|
|
|
|
* BIOCSETBUFMODE Set buffer mode.
|
|
|
|
* BIOCGETBUFMODE Get current buffer mode.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
/* ARGSUSED */
|
1995-12-08 11:19:42 +00:00
|
|
|
static int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags,
|
|
|
|
struct thread *td)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2008-08-13 15:41:21 +00:00
|
|
|
struct bpf_d *d;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = devfs_get_cdevpriv((void **)&d);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2007-12-23 14:10:33 +00:00
|
|
|
/*
|
2005-09-05 23:08:04 +00:00
|
|
|
* Refresh PID associated with this descriptor.
|
|
|
|
*/
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_PID_REFRESH(d, td);
|
2001-12-14 22:17:54 +00:00
|
|
|
if (d->bd_state == BPF_WAITING)
|
|
|
|
callout_stop(&d->bd_callout);
|
|
|
|
d->bd_state = BPF_IDLE;
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2001-12-14 22:17:54 +00:00
|
|
|
|
2005-08-22 19:35:48 +00:00
|
|
|
if (d->bd_locked == 1) {
|
|
|
|
switch (cmd) {
|
|
|
|
case BIOCGBLEN:
|
|
|
|
case BIOCFLUSH:
|
|
|
|
case BIOCGDLT:
|
2007-12-23 14:10:33 +00:00
|
|
|
case BIOCGDLTLIST:
|
2010-04-25 16:43:41 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
|
|
case BIOCGDLTLIST32:
|
|
|
|
#endif
|
2005-08-22 19:35:48 +00:00
|
|
|
case BIOCGETIF:
|
|
|
|
case BIOCGRTIMEOUT:
|
2017-06-27 01:29:10 +00:00
|
|
|
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
|
2010-04-25 16:43:41 +00:00
|
|
|
case BIOCGRTIMEOUT32:
|
|
|
|
#endif
|
2005-08-22 19:35:48 +00:00
|
|
|
case BIOCGSTATS:
|
|
|
|
case BIOCVERSION:
|
|
|
|
case BIOCGRSIG:
|
|
|
|
case BIOCGHDRCMPLT:
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
case BIOCSTSTAMP:
|
2007-02-26 22:24:14 +00:00
|
|
|
case BIOCFEEDBACK:
|
2005-08-22 19:35:48 +00:00
|
|
|
case FIONREAD:
|
|
|
|
case BIOCLOCK:
|
|
|
|
case BIOCSRTIMEOUT:
|
2017-06-27 01:29:10 +00:00
|
|
|
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
|
2010-04-25 16:43:41 +00:00
|
|
|
case BIOCSRTIMEOUT32:
|
|
|
|
#endif
|
2005-08-22 19:35:48 +00:00
|
|
|
case BIOCIMMEDIATE:
|
|
|
|
case TIOCGPGRP:
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
case BIOCROTZBUF:
|
2005-08-22 19:35:48 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (EPERM);
|
|
|
|
}
|
|
|
|
}
|
2010-04-25 16:43:41 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
|
|
/*
|
|
|
|
* If we see a 32-bit compat ioctl, mark the stream as 32-bit so
|
|
|
|
* that it will get 32-bit packet headers.
|
|
|
|
*/
|
|
|
|
switch (cmd) {
|
|
|
|
case BIOCSETF32:
|
|
|
|
case BIOCSETFNR32:
|
|
|
|
case BIOCSETWF32:
|
|
|
|
case BIOCGDLTLIST32:
|
|
|
|
case BIOCGRTIMEOUT32:
|
|
|
|
case BIOCSRTIMEOUT32:
|
2018-01-25 12:13:41 +00:00
|
|
|
if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
|
|
|
|
BPFD_LOCK(d);
|
|
|
|
d->bd_compat32 = 1;
|
|
|
|
BPFD_UNLOCK(d);
|
|
|
|
}
|
2010-04-25 16:43:41 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-11-26 22:32:07 +00:00
|
|
|
CURVNET_SET(TD_TO_VNET(td));
|
1994-05-24 10:09:53 +00:00
|
|
|
switch (cmd) {
|
|
|
|
|
|
|
|
default:
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for read packet available.
|
|
|
|
*/
|
|
|
|
case FIONREAD:
|
|
|
|
{
|
|
|
|
int n;
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
n = d->bd_slen;
|
2012-12-10 16:14:44 +00:00
|
|
|
while (d->bd_hbuf_in_use)
|
|
|
|
mtx_sleep(&d->bd_hbuf_in_use, &d->bd_lock,
|
|
|
|
PRINET, "bd_hbuf", 0);
|
1994-05-24 10:09:53 +00:00
|
|
|
if (d->bd_hbuf)
|
|
|
|
n += d->bd_hlen;
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
*(int *)addr = n;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get buffer len [for read()].
|
|
|
|
*/
|
|
|
|
case BIOCGBLEN:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
*(u_int *)addr = d->bd_bufsize;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set buffer length.
|
|
|
|
*/
|
|
|
|
case BIOCSBLEN:
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
error = bpf_ioctl_sblen(d, (u_int *)addr);
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set link layer read filter.
|
|
|
|
*/
|
|
|
|
case BIOCSETF:
|
2008-07-07 09:25:49 +00:00
|
|
|
case BIOCSETFNR:
|
2005-08-22 19:35:48 +00:00
|
|
|
case BIOCSETWF:
|
2010-04-25 16:43:41 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
|
|
case BIOCSETF32:
|
|
|
|
case BIOCSETFNR32:
|
|
|
|
case BIOCSETWF32:
|
|
|
|
#endif
|
2005-08-22 19:35:48 +00:00
|
|
|
error = bpf_setf(d, (struct bpf_program *)addr, cmd);
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Flush read packet buffer.
|
|
|
|
*/
|
|
|
|
case BIOCFLUSH:
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
reset_d(d);
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Put interface into promiscuous mode.
|
|
|
|
*/
|
|
|
|
case BIOCPROMISC:
|
2004-07-24 16:58:56 +00:00
|
|
|
if (d->bd_bif == NULL) {
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* No interface attached yet.
|
|
|
|
*/
|
|
|
|
error = EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (d->bd_promisc == 0) {
|
|
|
|
error = ifpromisc(d->bd_bif->bif_ifp, 1);
|
|
|
|
if (error == 0)
|
|
|
|
d->bd_promisc = 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
2003-01-20 19:08:46 +00:00
|
|
|
* Get current data link type.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
case BIOCGDLT:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPF_LOCK();
|
2004-07-24 16:58:56 +00:00
|
|
|
if (d->bd_bif == NULL)
|
1994-05-24 10:09:53 +00:00
|
|
|
error = EINVAL;
|
|
|
|
else
|
|
|
|
*(u_int *)addr = d->bd_bif->bif_dlt;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPF_UNLOCK();
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
|
2003-03-02 15:56:49 +00:00
|
|
|
/*
|
2003-01-20 19:08:46 +00:00
|
|
|
* Get a list of supported data link types.
|
|
|
|
*/
|
2010-04-25 16:43:41 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
|
|
case BIOCGDLTLIST32:
|
|
|
|
{
|
|
|
|
struct bpf_dltlist32 *list32;
|
|
|
|
struct bpf_dltlist dltlist;
|
|
|
|
|
|
|
|
list32 = (struct bpf_dltlist32 *)addr;
|
|
|
|
dltlist.bfl_len = list32->bfl_len;
|
|
|
|
dltlist.bfl_list = PTRIN(list32->bfl_list);
|
2012-05-21 22:21:00 +00:00
|
|
|
BPF_LOCK();
|
2010-04-25 16:43:41 +00:00
|
|
|
if (d->bd_bif == NULL)
|
|
|
|
error = EINVAL;
|
|
|
|
else {
|
|
|
|
error = bpf_getdltlist(d, &dltlist);
|
|
|
|
if (error == 0)
|
|
|
|
list32->bfl_len = dltlist.bfl_len;
|
|
|
|
}
|
2012-05-21 22:21:00 +00:00
|
|
|
BPF_UNLOCK();
|
2010-04-25 16:43:41 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2003-01-20 19:08:46 +00:00
|
|
|
case BIOCGDLTLIST:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPF_LOCK();
|
2004-07-24 16:58:56 +00:00
|
|
|
if (d->bd_bif == NULL)
|
2003-01-20 19:08:46 +00:00
|
|
|
error = EINVAL;
|
|
|
|
else
|
|
|
|
error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
|
2012-05-21 22:21:00 +00:00
|
|
|
BPF_UNLOCK();
|
2003-01-20 19:08:46 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set data link type.
|
|
|
|
*/
|
|
|
|
case BIOCSDLT:
|
2012-05-21 22:13:48 +00:00
|
|
|
BPF_LOCK();
|
2004-07-24 16:58:56 +00:00
|
|
|
if (d->bd_bif == NULL)
|
2003-01-20 19:08:46 +00:00
|
|
|
error = EINVAL;
|
|
|
|
else
|
|
|
|
error = bpf_setdlt(d, *(u_int *)addr);
|
2012-05-21 22:13:48 +00:00
|
|
|
BPF_UNLOCK();
|
2003-01-20 19:08:46 +00:00
|
|
|
break;
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
1999-11-03 21:32:28 +00:00
|
|
|
* Get interface name.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
case BIOCGETIF:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPF_LOCK();
|
2004-07-24 16:58:56 +00:00
|
|
|
if (d->bd_bif == NULL)
|
1994-05-24 10:09:53 +00:00
|
|
|
error = EINVAL;
|
1999-11-03 21:32:28 +00:00
|
|
|
else {
|
|
|
|
struct ifnet *const ifp = d->bd_bif->bif_ifp;
|
|
|
|
struct ifreq *const ifr = (struct ifreq *)addr;
|
|
|
|
|
2003-10-31 18:32:15 +00:00
|
|
|
strlcpy(ifr->ifr_name, ifp->if_xname,
|
|
|
|
sizeof(ifr->ifr_name));
|
1999-11-03 21:32:28 +00:00
|
|
|
}
|
2012-05-21 22:21:00 +00:00
|
|
|
BPF_UNLOCK();
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set interface.
|
|
|
|
*/
|
|
|
|
case BIOCSETIF:
|
2015-07-31 20:02:12 +00:00
|
|
|
{
|
|
|
|
int alloc_buf, size;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Behavior here depends on the buffering model. If
|
|
|
|
* we're using kernel memory buffers, then we can
|
|
|
|
* allocate them here. If we're using zero-copy,
|
|
|
|
* then the user process must have registered buffers
|
|
|
|
* by the time we get here.
|
|
|
|
*/
|
|
|
|
alloc_buf = 0;
|
|
|
|
BPFD_LOCK(d);
|
|
|
|
if (d->bd_bufmode == BPF_BUFMODE_BUFFER &&
|
|
|
|
d->bd_sbuf == NULL)
|
|
|
|
alloc_buf = 1;
|
|
|
|
BPFD_UNLOCK(d);
|
|
|
|
if (alloc_buf) {
|
|
|
|
size = d->bd_bufsize;
|
|
|
|
error = bpf_buffer_ioctl_sblen(d, &size);
|
|
|
|
if (error != 0)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
BPF_LOCK();
|
|
|
|
error = bpf_setif(d, (struct ifreq *)addr);
|
|
|
|
BPF_UNLOCK();
|
|
|
|
break;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Set read timeout.
|
|
|
|
*/
|
|
|
|
case BIOCSRTIMEOUT:
|
2017-06-27 01:29:10 +00:00
|
|
|
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
|
2010-04-25 16:43:41 +00:00
|
|
|
case BIOCSRTIMEOUT32:
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct timeval *tv = (struct timeval *)addr;
|
2012-03-03 08:19:18 +00:00
|
|
|
#if defined(COMPAT_FREEBSD32) && !defined(__mips__)
|
2010-04-25 16:43:41 +00:00
|
|
|
struct timeval32 *tv32;
|
|
|
|
struct timeval tv64;
|
|
|
|
|
|
|
|
if (cmd == BIOCSRTIMEOUT32) {
|
|
|
|
tv32 = (struct timeval32 *)addr;
|
|
|
|
tv = &tv64;
|
|
|
|
tv->tv_sec = tv32->tv_sec;
|
|
|
|
tv->tv_usec = tv32->tv_usec;
|
|
|
|
} else
|
|
|
|
#endif
|
|
|
|
tv = (struct timeval *)addr;
|
1998-10-04 17:20:22 +00:00
|
|
|
|
1998-10-08 00:32:08 +00:00
|
|
|
/*
|
|
|
|
* Subtract 1 tick from tvtohz() since this isn't
|
|
|
|
* a one-shot timer.
|
|
|
|
*/
|
|
|
|
if ((error = itimerfix(tv)) == 0)
|
|
|
|
d->bd_rtout = tvtohz(tv) - 1;
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get read timeout.
|
|
|
|
*/
|
|
|
|
case BIOCGRTIMEOUT:
|
2017-06-27 01:29:10 +00:00
|
|
|
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
|
2010-04-25 16:43:41 +00:00
|
|
|
case BIOCGRTIMEOUT32:
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2010-04-25 16:43:41 +00:00
|
|
|
struct timeval *tv;
|
2017-06-27 01:29:10 +00:00
|
|
|
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
|
2010-04-25 16:43:41 +00:00
|
|
|
struct timeval32 *tv32;
|
|
|
|
struct timeval tv64;
|
|
|
|
|
|
|
|
if (cmd == BIOCGRTIMEOUT32)
|
|
|
|
tv = &tv64;
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
tv = (struct timeval *)addr;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1998-10-08 00:32:08 +00:00
|
|
|
tv->tv_sec = d->bd_rtout / hz;
|
|
|
|
tv->tv_usec = (d->bd_rtout % hz) * tick;
|
2017-06-27 01:29:10 +00:00
|
|
|
#if defined(COMPAT_FREEBSD32) && defined(__amd64__)
|
2010-04-25 16:43:41 +00:00
|
|
|
if (cmd == BIOCGRTIMEOUT32) {
|
|
|
|
tv32 = (struct timeval32 *)addr;
|
|
|
|
tv32->tv_sec = tv->tv_sec;
|
|
|
|
tv32->tv_usec = tv->tv_usec;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get packet stats.
|
|
|
|
*/
|
|
|
|
case BIOCGSTATS:
|
|
|
|
{
|
|
|
|
struct bpf_stat *bs = (struct bpf_stat *)addr;
|
|
|
|
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
/* XXXCSJP overflow */
|
2018-03-20 22:57:06 +00:00
|
|
|
bs->bs_recv = (u_int)counter_u64_fetch(d->bd_rcount);
|
|
|
|
bs->bs_drop = (u_int)counter_u64_fetch(d->bd_dcount);
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set immediate mode.
|
|
|
|
*/
|
|
|
|
case BIOCIMMEDIATE:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
d->bd_immediate = *(u_int *)addr;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case BIOCVERSION:
|
|
|
|
{
|
|
|
|
struct bpf_version *bv = (struct bpf_version *)addr;
|
|
|
|
|
|
|
|
bv->bv_major = BPF_MAJOR_VERSION;
|
|
|
|
bv->bv_minor = BPF_MINOR_VERSION;
|
|
|
|
break;
|
|
|
|
}
|
1995-06-15 18:11:00 +00:00
|
|
|
|
1999-10-15 05:07:00 +00:00
|
|
|
/*
|
|
|
|
* Get "header already complete" flag
|
|
|
|
*/
|
|
|
|
case BIOCGHDRCMPLT:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
1999-10-15 05:07:00 +00:00
|
|
|
*(u_int *)addr = d->bd_hdrcmplt;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1999-10-15 05:07:00 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set "header already complete" flag
|
|
|
|
*/
|
|
|
|
case BIOCSHDRCMPLT:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
1999-10-15 05:07:00 +00:00
|
|
|
d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1999-10-15 05:07:00 +00:00
|
|
|
break;
|
|
|
|
|
2000-03-18 06:30:42 +00:00
|
|
|
/*
|
2007-02-26 22:24:14 +00:00
|
|
|
* Get packet direction flag
|
2000-03-18 06:30:42 +00:00
|
|
|
*/
|
2007-02-26 22:24:14 +00:00
|
|
|
case BIOCGDIRECTION:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
2007-02-26 22:24:14 +00:00
|
|
|
*(u_int *)addr = d->bd_direction;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2000-03-18 06:30:42 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
2007-02-26 22:24:14 +00:00
|
|
|
* Set packet direction flag
|
2000-03-18 06:30:42 +00:00
|
|
|
*/
|
2007-02-26 22:24:14 +00:00
|
|
|
case BIOCSDIRECTION:
|
|
|
|
{
|
|
|
|
u_int direction;
|
|
|
|
|
|
|
|
direction = *(u_int *)addr;
|
|
|
|
switch (direction) {
|
|
|
|
case BPF_D_IN:
|
|
|
|
case BPF_D_INOUT:
|
|
|
|
case BPF_D_OUT:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
2007-02-26 22:24:14 +00:00
|
|
|
d->bd_direction = direction;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2007-02-26 22:24:14 +00:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
error = EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
/*
|
2010-09-16 18:37:33 +00:00
|
|
|
* Get packet timestamp format and resolution.
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
*/
|
|
|
|
case BIOCGTSTAMP:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
*(u_int *)addr = d->bd_tstamp;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set packet timestamp format and resolution.
|
|
|
|
*/
|
|
|
|
case BIOCSTSTAMP:
|
|
|
|
{
|
|
|
|
u_int func;
|
|
|
|
|
|
|
|
func = *(u_int *)addr;
|
|
|
|
if (BPF_T_VALID(func))
|
|
|
|
d->bd_tstamp = func;
|
|
|
|
else
|
|
|
|
error = EINVAL;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2007-02-26 22:24:14 +00:00
|
|
|
case BIOCFEEDBACK:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
2007-02-26 22:24:14 +00:00
|
|
|
d->bd_feedback = *(u_int *)addr;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2007-02-26 22:24:14 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case BIOCLOCK:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
2007-02-26 22:24:14 +00:00
|
|
|
d->bd_locked = 1;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2000-03-18 06:30:42 +00:00
|
|
|
break;
|
|
|
|
|
1995-06-15 18:11:00 +00:00
|
|
|
case FIONBIO: /* Non-blocking I/O */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FIOASYNC: /* Send signal on receive packets */
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
1995-06-15 18:11:00 +00:00
|
|
|
d->bd_async = *(int *)addr;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1995-06-15 18:11:00 +00:00
|
|
|
break;
|
|
|
|
|
Installed the second patch attached to kern/7899 with some changes suggested
by bde, a few other tweaks to get the patch to apply cleanly again and
some improvements to the comments.
This change closes some fairly minor security holes associated with
F_SETOWN, fixes a few bugs, and removes some limitations that F_SETOWN
had on tty devices. For more details, see the description on the PR.
Because this patch increases the size of the proc and pgrp structures,
it is necessary to re-install the includes and recompile libkvm,
the vinum lkm, fstat, gcore, gdb, ipfilter, ps, top, and w.
PR: kern/7899
Reviewed by: bde, elvind
1998-11-11 10:04:13 +00:00
|
|
|
case FIOSETOWN:
|
2012-05-21 22:21:00 +00:00
|
|
|
/*
|
|
|
|
* XXX: Add some sort of locking here?
|
|
|
|
* fsetown() can sleep.
|
|
|
|
*/
|
Installed the second patch attached to kern/7899 with some changes suggested
by bde, a few other tweaks to get the patch to apply cleanly again and
some improvements to the comments.
This change closes some fairly minor security holes associated with
F_SETOWN, fixes a few bugs, and removes some limitations that F_SETOWN
had on tty devices. For more details, see the description on the PR.
Because this patch increases the size of the proc and pgrp structures,
it is necessary to re-install the includes and recompile libkvm,
the vinum lkm, fstat, gcore, gdb, ipfilter, ps, top, and w.
PR: kern/7899
Reviewed by: bde, elvind
1998-11-11 10:04:13 +00:00
|
|
|
error = fsetown(*(int *)addr, &d->bd_sigio);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FIOGETOWN:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
2002-10-03 02:13:00 +00:00
|
|
|
*(int *)addr = fgetown(&d->bd_sigio);
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
Installed the second patch attached to kern/7899 with some changes suggested
by bde, a few other tweaks to get the patch to apply cleanly again and
some improvements to the comments.
This change closes some fairly minor security holes associated with
F_SETOWN, fixes a few bugs, and removes some limitations that F_SETOWN
had on tty devices. For more details, see the description on the PR.
Because this patch increases the size of the proc and pgrp structures,
it is necessary to re-install the includes and recompile libkvm,
the vinum lkm, fstat, gcore, gdb, ipfilter, ps, top, and w.
PR: kern/7899
Reviewed by: bde, elvind
1998-11-11 10:04:13 +00:00
|
|
|
break;
|
1995-06-15 18:11:00 +00:00
|
|
|
|
Installed the second patch attached to kern/7899 with some changes suggested
by bde, a few other tweaks to get the patch to apply cleanly again and
some improvements to the comments.
This change closes some fairly minor security holes associated with
F_SETOWN, fixes a few bugs, and removes some limitations that F_SETOWN
had on tty devices. For more details, see the description on the PR.
Because this patch increases the size of the proc and pgrp structures,
it is necessary to re-install the includes and recompile libkvm,
the vinum lkm, fstat, gcore, gdb, ipfilter, ps, top, and w.
PR: kern/7899
Reviewed by: bde, elvind
1998-11-11 10:04:13 +00:00
|
|
|
/* This is deprecated, FIOSETOWN should be used instead. */
|
|
|
|
case TIOCSPGRP:
|
|
|
|
error = fsetown(-(*(int *)addr), &d->bd_sigio);
|
1995-06-15 18:11:00 +00:00
|
|
|
break;
|
|
|
|
|
Installed the second patch attached to kern/7899 with some changes suggested
by bde, a few other tweaks to get the patch to apply cleanly again and
some improvements to the comments.
This change closes some fairly minor security holes associated with
F_SETOWN, fixes a few bugs, and removes some limitations that F_SETOWN
had on tty devices. For more details, see the description on the PR.
Because this patch increases the size of the proc and pgrp structures,
it is necessary to re-install the includes and recompile libkvm,
the vinum lkm, fstat, gcore, gdb, ipfilter, ps, top, and w.
PR: kern/7899
Reviewed by: bde, elvind
1998-11-11 10:04:13 +00:00
|
|
|
/* This is deprecated, FIOGETOWN should be used instead. */
|
1995-06-15 18:11:00 +00:00
|
|
|
case TIOCGPGRP:
|
2002-10-03 02:13:00 +00:00
|
|
|
*(int *)addr = -fgetown(&d->bd_sigio);
|
1995-06-15 18:11:00 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case BIOCSRSIG: /* Set receive signal */
|
|
|
|
{
|
2003-03-02 15:56:49 +00:00
|
|
|
u_int sig;
|
1995-06-15 18:11:00 +00:00
|
|
|
|
|
|
|
sig = *(u_int *)addr;
|
|
|
|
|
|
|
|
if (sig >= NSIG)
|
|
|
|
error = EINVAL;
|
2012-05-21 22:21:00 +00:00
|
|
|
else {
|
|
|
|
BPFD_LOCK(d);
|
1995-06-15 18:11:00 +00:00
|
|
|
d->bd_sig = sig;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
|
|
|
}
|
1995-06-15 18:11:00 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case BIOCGRSIG:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
1995-06-15 18:11:00 +00:00
|
|
|
*(u_int *)addr = d->bd_sig;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1995-06-15 18:11:00 +00:00
|
|
|
break;
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
|
|
|
|
case BIOCGETBUFMODE:
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_LOCK(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
*(u_int *)addr = d->bd_bufmode;
|
2012-05-21 22:21:00 +00:00
|
|
|
BPFD_UNLOCK(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case BIOCSETBUFMODE:
|
|
|
|
/*
|
|
|
|
* Allow the buffering mode to be changed as long as we
|
|
|
|
* haven't yet committed to a particular mode. Our
|
|
|
|
* definition of commitment, for now, is whether or not a
|
|
|
|
* buffer has been allocated or an interface attached, since
|
|
|
|
* that's the point where things get tricky.
|
|
|
|
*/
|
|
|
|
switch (*(u_int *)addr) {
|
|
|
|
case BPF_BUFMODE_BUFFER:
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BPF_BUFMODE_ZBUF:
|
|
|
|
if (bpf_zerocopy_enable)
|
|
|
|
break;
|
|
|
|
/* FALLSTHROUGH */
|
|
|
|
|
|
|
|
default:
|
2010-04-27 15:16:54 +00:00
|
|
|
CURVNET_RESTORE();
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
if (d->bd_sbuf != NULL || d->bd_hbuf != NULL ||
|
|
|
|
d->bd_fbuf != NULL || d->bd_bif != NULL) {
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2010-04-27 15:16:54 +00:00
|
|
|
CURVNET_RESTORE();
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
return (EBUSY);
|
|
|
|
}
|
|
|
|
d->bd_bufmode = *(u_int *)addr;
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case BIOCGETZMAX:
|
2010-04-27 15:16:54 +00:00
|
|
|
error = bpf_ioctl_getzmax(td, d, (size_t *)addr);
|
|
|
|
break;
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
|
|
|
|
case BIOCSETZBUF:
|
2010-04-27 15:16:54 +00:00
|
|
|
error = bpf_ioctl_setzbuf(td, d, (struct bpf_zbuf *)addr);
|
|
|
|
break;
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
|
|
|
|
case BIOCROTZBUF:
|
2010-04-27 15:16:54 +00:00
|
|
|
error = bpf_ioctl_rotzbuf(td, d, (struct bpf_zbuf *)addr);
|
|
|
|
break;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2008-11-26 22:32:07 +00:00
|
|
|
CURVNET_RESTORE();
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2019-05-13 13:45:28 +00:00
|
|
|
* Set d's packet filter program to fp. If this file already has a filter,
|
|
|
|
* free it and replace it. Returns EINVAL for bogus requests.
|
2012-06-04 12:36:58 +00:00
|
|
|
*
|
2019-05-13 13:45:28 +00:00
|
|
|
* Note we use global lock here to serialize bpf_setf() and bpf_setif()
|
|
|
|
* calls.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
1995-12-14 09:55:16 +00:00
|
|
|
static int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2012-05-29 22:28:46 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
|
|
struct bpf_program fp_swab;
|
|
|
|
struct bpf_program32 *fp32;
|
|
|
|
#endif
|
2019-05-13 13:45:28 +00:00
|
|
|
struct bpf_program_buffer *fcode;
|
|
|
|
struct bpf_insn *filter;
|
2006-05-30 19:24:01 +00:00
|
|
|
#ifdef BPF_JITTER
|
2019-05-13 13:45:28 +00:00
|
|
|
bpf_jit_filter *jfunc;
|
2005-12-06 02:58:12 +00:00
|
|
|
#endif
|
2012-05-29 22:21:53 +00:00
|
|
|
size_t size;
|
|
|
|
u_int flen;
|
2019-05-13 13:45:28 +00:00
|
|
|
bool track_event;
|
2010-04-25 16:43:41 +00:00
|
|
|
|
2012-05-29 22:28:46 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
2012-05-29 18:44:53 +00:00
|
|
|
switch (cmd) {
|
|
|
|
case BIOCSETF32:
|
|
|
|
case BIOCSETWF32:
|
|
|
|
case BIOCSETFNR32:
|
2010-04-25 16:43:41 +00:00
|
|
|
fp32 = (struct bpf_program32 *)fp;
|
|
|
|
fp_swab.bf_len = fp32->bf_len;
|
2019-05-13 13:45:28 +00:00
|
|
|
fp_swab.bf_insns =
|
|
|
|
(struct bpf_insn *)(uintptr_t)fp32->bf_insns;
|
2010-04-25 16:43:41 +00:00
|
|
|
fp = &fp_swab;
|
2012-05-29 18:44:53 +00:00
|
|
|
switch (cmd) {
|
|
|
|
case BIOCSETF32:
|
|
|
|
cmd = BIOCSETF;
|
|
|
|
break;
|
|
|
|
case BIOCSETWF32:
|
2010-04-25 16:43:41 +00:00
|
|
|
cmd = BIOCSETWF;
|
2012-05-29 18:44:53 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
2010-04-25 16:43:41 +00:00
|
|
|
}
|
|
|
|
#endif
|
2012-05-29 22:21:53 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
filter = NULL;
|
2012-05-29 22:21:53 +00:00
|
|
|
#ifdef BPF_JITTER
|
2019-05-13 13:45:28 +00:00
|
|
|
jfunc = NULL;
|
2012-05-29 22:21:53 +00:00
|
|
|
#endif
|
2012-05-21 22:13:48 +00:00
|
|
|
/*
|
|
|
|
* Check new filter validness before acquiring any locks.
|
|
|
|
* Allocate memory for new filter, if needed.
|
|
|
|
*/
|
|
|
|
flen = fp->bf_len;
|
2012-05-29 22:28:46 +00:00
|
|
|
if (flen > bpf_maxinsns || (fp->bf_insns == NULL && flen != 0))
|
2012-05-21 22:13:48 +00:00
|
|
|
return (EINVAL);
|
|
|
|
size = flen * sizeof(*fp->bf_insns);
|
2012-05-29 12:52:30 +00:00
|
|
|
if (size > 0) {
|
2019-05-13 13:45:28 +00:00
|
|
|
/* We're setting up new filter. Copy and check actual data. */
|
|
|
|
fcode = bpf_program_buffer_alloc(size, M_WAITOK);
|
|
|
|
filter = (struct bpf_insn *)fcode->buffer;
|
|
|
|
if (copyin(fp->bf_insns, filter, size) != 0 ||
|
|
|
|
!bpf_validate(filter, flen)) {
|
2012-05-29 12:52:30 +00:00
|
|
|
free(fcode, M_BPF);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
2012-05-21 22:19:19 +00:00
|
|
|
#ifdef BPF_JITTER
|
2018-06-11 23:32:06 +00:00
|
|
|
if (cmd != BIOCSETWF) {
|
|
|
|
/*
|
|
|
|
* Filter is copied inside fcode and is
|
|
|
|
* perfectly valid.
|
|
|
|
*/
|
2019-05-13 13:45:28 +00:00
|
|
|
jfunc = bpf_jitter(filter, flen);
|
2018-06-11 23:32:06 +00:00
|
|
|
}
|
2012-05-21 22:19:19 +00:00
|
|
|
#endif
|
2012-05-29 22:21:53 +00:00
|
|
|
}
|
2012-05-21 22:19:19 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
track_event = false;
|
|
|
|
fcode = NULL;
|
2012-05-21 22:13:48 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
BPF_LOCK();
|
2012-05-29 12:52:30 +00:00
|
|
|
BPFD_LOCK(d);
|
2019-05-13 13:45:28 +00:00
|
|
|
/* Set up new filter. */
|
2012-05-29 22:21:53 +00:00
|
|
|
if (cmd == BIOCSETWF) {
|
2019-05-13 13:45:28 +00:00
|
|
|
if (d->bd_wfilter != NULL) {
|
|
|
|
fcode = __containerof((void *)d->bd_wfilter,
|
|
|
|
struct bpf_program_buffer, buffer);
|
|
|
|
#ifdef BPF_JITTER
|
|
|
|
fcode->func = NULL;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
d->bd_wfilter = filter;
|
2012-05-29 22:21:53 +00:00
|
|
|
} else {
|
2019-05-13 13:45:28 +00:00
|
|
|
if (d->bd_rfilter != NULL) {
|
|
|
|
fcode = __containerof((void *)d->bd_rfilter,
|
|
|
|
struct bpf_program_buffer, buffer);
|
|
|
|
#ifdef BPF_JITTER
|
|
|
|
fcode->func = d->bd_bfilter;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
d->bd_rfilter = filter;
|
2006-05-30 19:24:01 +00:00
|
|
|
#ifdef BPF_JITTER
|
2012-05-29 12:52:30 +00:00
|
|
|
d->bd_bfilter = jfunc;
|
2005-12-06 02:58:12 +00:00
|
|
|
#endif
|
2012-05-29 12:52:30 +00:00
|
|
|
if (cmd == BIOCSETF)
|
|
|
|
reset_d(d);
|
2012-04-06 06:55:21 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
if (bpf_check_upgrade(cmd, d, filter, flen) != 0) {
|
|
|
|
/*
|
|
|
|
* Filter can be set several times without
|
|
|
|
* specifying interface. In this case just mark d
|
|
|
|
* as reader.
|
|
|
|
*/
|
|
|
|
d->bd_writer = 0;
|
|
|
|
if (d->bd_bif != NULL) {
|
|
|
|
/*
|
|
|
|
* Remove descriptor from writers-only list
|
|
|
|
* and add it to active readers list.
|
|
|
|
*/
|
|
|
|
CK_LIST_REMOVE(d, bd_next);
|
|
|
|
CK_LIST_INSERT_HEAD(&d->bd_bif->bif_dlist,
|
|
|
|
d, bd_next);
|
|
|
|
CTR2(KTR_NET,
|
|
|
|
"%s: upgrade required by pid %d",
|
|
|
|
__func__, d->bd_pid);
|
|
|
|
track_event = true;
|
|
|
|
}
|
|
|
|
}
|
2012-05-29 12:52:30 +00:00
|
|
|
}
|
|
|
|
BPFD_UNLOCK(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
if (fcode != NULL)
|
|
|
|
epoch_call(net_epoch_preempt, &fcode->epoch_ctx,
|
|
|
|
bpf_program_buffer_free);
|
|
|
|
|
|
|
|
if (track_event)
|
|
|
|
EVENTHANDLER_INVOKE(bpf_track,
|
|
|
|
d->bd_bif->bif_ifp, d->bd_bif->bif_dlt, 1);
|
2012-04-06 06:55:21 +00:00
|
|
|
|
2012-05-21 22:13:48 +00:00
|
|
|
BPF_UNLOCK();
|
2012-05-29 12:52:30 +00:00
|
|
|
return (0);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Detach a file from its current interface (if attached at all) and attach
|
|
|
|
* to the interface indicated by the name stored in ifr.
|
|
|
|
* Return an errno or 0.
|
|
|
|
*/
|
|
|
|
static int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_setif(struct bpf_d *d, struct ifreq *ifr)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct bpf_if *bp;
|
1996-02-06 18:51:28 +00:00
|
|
|
struct ifnet *theywant;
|
|
|
|
|
2012-05-21 22:13:48 +00:00
|
|
|
BPF_LOCK_ASSERT();
|
|
|
|
|
1996-02-06 18:51:28 +00:00
|
|
|
theywant = ifunit(ifr->ifr_name);
|
2006-06-02 19:59:33 +00:00
|
|
|
if (theywant == NULL || theywant->if_bpf == NULL)
|
|
|
|
return (ENXIO);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2006-06-02 19:59:33 +00:00
|
|
|
bp = theywant->if_bpf;
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2015-07-31 20:02:12 +00:00
|
|
|
* At this point, we expect the buffer is already allocated. If not,
|
|
|
|
* return an error.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
switch (d->bd_bufmode) {
|
|
|
|
case BPF_BUFMODE_BUFFER:
|
|
|
|
case BPF_BUFMODE_ZBUF:
|
|
|
|
if (d->bd_sbuf == NULL)
|
|
|
|
return (EINVAL);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
panic("bpf_setif: bufmode %d", d->bd_bufmode);
|
|
|
|
}
|
2012-05-21 22:13:48 +00:00
|
|
|
if (bp != d->bd_bif)
|
2006-06-02 19:59:33 +00:00
|
|
|
bpf_attachd(d, bp);
|
2019-05-13 13:45:28 +00:00
|
|
|
else {
|
|
|
|
BPFD_LOCK(d);
|
|
|
|
reset_d(d);
|
|
|
|
BPFD_UNLOCK(d);
|
|
|
|
}
|
2006-06-02 19:59:33 +00:00
|
|
|
return (0);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
1997-09-14 03:03:05 +00:00
|
|
|
* Support for select() and poll() system calls
|
1994-05-24 10:09:53 +00:00
|
|
|
*
|
|
|
|
* Return true iff the specific operation will not block indefinitely.
|
|
|
|
* Otherwise, return false but make a note that a selwakeup() must be done.
|
|
|
|
*/
|
2002-09-28 17:15:38 +00:00
|
|
|
static int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpfpoll(struct cdev *dev, int events, struct thread *td)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2001-02-16 17:10:28 +00:00
|
|
|
struct bpf_d *d;
|
2001-04-04 23:27:35 +00:00
|
|
|
int revents;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2008-08-13 15:41:21 +00:00
|
|
|
if (devfs_get_cdevpriv((void **)&d) != 0 || d->bd_bif == NULL)
|
|
|
|
return (events &
|
|
|
|
(POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM));
|
The advent of if_detach, allowing interface removal at runtime, makes it
possible for a panic to occur if BPF is in use on the interface at the
time of the call to if_detach. This happens because BPF maintains pointers
to the struct ifnet describing the interface, which is freed by if_detach.
To correct this problem, a new call, bpfdetach, is introduced. bpfdetach
locates BPF descriptor references to the interface, and NULLs them. Other
BPF code is modified so that discovery of a NULL interface results in
ENXIO (already implemented for some calls). Processes blocked on a BPF
call will also be woken up so that they can receive ENXIO.
Interface drivers that invoke bpfattach and if_detach must be modified to
also call bpfattach(ifp) before calling if_detach(ifp). This is relevant
for buses that support hot removal, such as pccard and usb. Patches to
all effected devices will not be committed, only to if_wi.c, due to
testing limitations. To reproduce the crash, load up tcpdump on you
favorite pccard ethernet card, and then eject the card. As some pccard
drivers do not invoke if_detach(ifp), this bug will not manifest itself
for those drivers.
Reviewed by: wes
2000-03-19 05:42:34 +00:00
|
|
|
|
2005-09-05 23:08:04 +00:00
|
|
|
/*
|
|
|
|
* Refresh PID associated with this descriptor.
|
|
|
|
*/
|
2001-04-04 23:27:35 +00:00
|
|
|
revents = events & (POLLOUT | POLLWRNORM);
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_PID_REFRESH(d, td);
|
This Implements the mumbled about "Jail" feature.
This is a seriously beefed up chroot kind of thing. The process
is jailed along the same lines as a chroot does it, but with
additional tough restrictions imposed on what the superuser can do.
For all I know, it is safe to hand over the root bit inside a
prison to the customer living in that prison, this is what
it was developed for in fact: "real virtual servers".
Each prison has an ip number associated with it, which all IP
communications will be coerced to use and each prison has its own
hostname.
Needless to say, you need more RAM this way, but the advantage is
that each customer can run their own particular version of apache
and not stomp on the toes of their neighbors.
It generally does what one would expect, but setting up a jail
still takes a little knowledge.
A few notes:
I have no scripts for setting up a jail, don't ask me for them.
The IP number should be an alias on one of the interfaces.
mount a /proc in each jail, it will make ps more useable.
/proc/<pid>/status tells the hostname of the prison for
jailed processes.
Quotas are only sensible if you have a mountpoint per prison.
There are no privisions for stopping resource-hogging.
Some "#ifdef INET" and similar may be missing (send patches!)
If somebody wants to take it from here and develop it into
more of a "virtual machine" they should be most welcome!
Tools, comments, patches & documentation most welcome.
Have fun...
Sponsored by: http://www.rndassociates.com/
Run for almost a year by: http://www.servetheweb.com/
1999-04-28 11:38:52 +00:00
|
|
|
if (events & (POLLIN | POLLRDNORM)) {
|
2003-08-05 07:12:49 +00:00
|
|
|
if (bpf_ready(d))
|
1997-09-14 03:03:05 +00:00
|
|
|
revents |= events & (POLLIN | POLLRDNORM);
|
2001-12-14 22:17:54 +00:00
|
|
|
else {
|
2001-09-21 22:46:54 +00:00
|
|
|
selrecord(td, &d->bd_sel);
|
2001-12-14 22:17:54 +00:00
|
|
|
/* Start the read timeout if necessary. */
|
|
|
|
if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
|
|
|
|
callout_reset(&d->bd_callout, d->bd_rtout,
|
|
|
|
bpf_timed_out, d);
|
|
|
|
d->bd_state = BPF_WAITING;
|
|
|
|
}
|
|
|
|
}
|
This Implements the mumbled about "Jail" feature.
This is a seriously beefed up chroot kind of thing. The process
is jailed along the same lines as a chroot does it, but with
additional tough restrictions imposed on what the superuser can do.
For all I know, it is safe to hand over the root bit inside a
prison to the customer living in that prison, this is what
it was developed for in fact: "real virtual servers".
Each prison has an ip number associated with it, which all IP
communications will be coerced to use and each prison has its own
hostname.
Needless to say, you need more RAM this way, but the advantage is
that each customer can run their own particular version of apache
and not stomp on the toes of their neighbors.
It generally does what one would expect, but setting up a jail
still takes a little knowledge.
A few notes:
I have no scripts for setting up a jail, don't ask me for them.
The IP number should be an alias on one of the interfaces.
mount a /proc in each jail, it will make ps more useable.
/proc/<pid>/status tells the hostname of the prison for
jailed processes.
Quotas are only sensible if you have a mountpoint per prison.
There are no privisions for stopping resource-hogging.
Some "#ifdef INET" and similar may be missing (send patches!)
If somebody wants to take it from here and develop it into
more of a "virtual machine" they should be most welcome!
Tools, comments, patches & documentation most welcome.
Have fun...
Sponsored by: http://www.rndassociates.com/
Run for almost a year by: http://www.servetheweb.com/
1999-04-28 11:38:52 +00:00
|
|
|
}
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
1997-09-14 03:03:05 +00:00
|
|
|
return (revents);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
2003-08-05 07:12:49 +00:00
|
|
|
/*
|
|
|
|
* Support for kevent() system call. Register EVFILT_READ filters and
|
|
|
|
* reject all others.
|
|
|
|
*/
|
|
|
|
int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpfkqfilter(struct cdev *dev, struct knote *kn)
|
2003-08-05 07:12:49 +00:00
|
|
|
{
|
2008-08-13 15:41:21 +00:00
|
|
|
struct bpf_d *d;
|
2003-08-05 07:12:49 +00:00
|
|
|
|
2008-08-13 15:41:21 +00:00
|
|
|
if (devfs_get_cdevpriv((void **)&d) != 0 ||
|
|
|
|
kn->kn_filter != EVFILT_READ)
|
2003-08-05 07:12:49 +00:00
|
|
|
return (1);
|
|
|
|
|
2007-12-23 14:10:33 +00:00
|
|
|
/*
|
2005-09-05 23:08:04 +00:00
|
|
|
* Refresh PID associated with this descriptor.
|
|
|
|
*/
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_PID_REFRESH_CUR(d);
|
2003-08-05 07:12:49 +00:00
|
|
|
kn->kn_fop = &bpfread_filtops;
|
|
|
|
kn->kn_hook = d;
|
2006-07-03 20:02:06 +00:00
|
|
|
knlist_add(&d->bd_sel.si_note, kn, 1);
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2003-08-05 07:12:49 +00:00
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2006-06-15 15:39:12 +00:00
|
|
|
filt_bpfdetach(struct knote *kn)
|
2003-08-05 07:12:49 +00:00
|
|
|
{
|
|
|
|
struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
|
|
|
|
|
2004-08-15 06:24:42 +00:00
|
|
|
knlist_remove(&d->bd_sel.si_note, kn, 0);
|
2003-08-05 07:12:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2006-06-15 15:39:12 +00:00
|
|
|
filt_bpfread(struct knote *kn, long hint)
|
2003-08-05 07:12:49 +00:00
|
|
|
{
|
|
|
|
struct bpf_d *d = (struct bpf_d *)kn->kn_hook;
|
|
|
|
int ready;
|
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
2003-08-05 07:12:49 +00:00
|
|
|
ready = bpf_ready(d);
|
|
|
|
if (ready) {
|
|
|
|
kn->kn_data = d->bd_slen;
|
2015-08-03 22:14:45 +00:00
|
|
|
/*
|
|
|
|
* Ignore the hold buffer if it is being copied to user space.
|
|
|
|
*/
|
|
|
|
if (!d->bd_hbuf_in_use && d->bd_hbuf)
|
2003-08-05 07:12:49 +00:00
|
|
|
kn->kn_data += d->bd_hlen;
|
2010-03-12 19:42:42 +00:00
|
|
|
} else if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
|
2003-08-05 07:12:49 +00:00
|
|
|
callout_reset(&d->bd_callout, d->bd_rtout,
|
|
|
|
bpf_timed_out, d);
|
|
|
|
d->bd_state = BPF_WAITING;
|
|
|
|
}
|
2004-02-16 18:19:15 +00:00
|
|
|
|
2003-08-05 07:12:49 +00:00
|
|
|
return (ready);
|
|
|
|
}
|
|
|
|
|
2011-12-31 07:21:28 +00:00
|
|
|
#define BPF_TSTAMP_NONE 0
|
|
|
|
#define BPF_TSTAMP_FAST 1
|
|
|
|
#define BPF_TSTAMP_NORMAL 2
|
|
|
|
#define BPF_TSTAMP_EXTERN 3
|
|
|
|
|
|
|
|
static int
|
|
|
|
bpf_ts_quality(int tstype)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (tstype == BPF_T_NONE)
|
|
|
|
return (BPF_TSTAMP_NONE);
|
|
|
|
if ((tstype & BPF_T_FAST) != 0)
|
|
|
|
return (BPF_TSTAMP_FAST);
|
|
|
|
|
|
|
|
return (BPF_TSTAMP_NORMAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
bpf_gettime(struct bintime *bt, int tstype, struct mbuf *m)
|
|
|
|
{
|
|
|
|
struct m_tag *tag;
|
|
|
|
int quality;
|
|
|
|
|
|
|
|
quality = bpf_ts_quality(tstype);
|
|
|
|
if (quality == BPF_TSTAMP_NONE)
|
|
|
|
return (quality);
|
|
|
|
|
|
|
|
if (m != NULL) {
|
|
|
|
tag = m_tag_locate(m, MTAG_BPF, MTAG_BPF_TIMESTAMP, NULL);
|
|
|
|
if (tag != NULL) {
|
|
|
|
*bt = *(struct bintime *)(tag + 1);
|
|
|
|
return (BPF_TSTAMP_EXTERN);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (quality == BPF_TSTAMP_NORMAL)
|
|
|
|
binuptime(bt);
|
|
|
|
else
|
|
|
|
getbinuptime(bt);
|
|
|
|
|
|
|
|
return (quality);
|
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Incoming linkage from device drivers. Process the packet pkt, of length
|
|
|
|
* pktlen, which is stored in a contiguous buffer. The packet is parsed
|
|
|
|
* by each process' filter, and if accepted, stashed into the corresponding
|
|
|
|
* buffer.
|
|
|
|
*/
|
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2019-05-13 13:45:28 +00:00
|
|
|
struct epoch_tracker et;
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
struct bintime bt;
|
2003-03-02 15:56:49 +00:00
|
|
|
struct bpf_d *d;
|
2009-08-12 17:28:53 +00:00
|
|
|
#ifdef BPF_JITTER
|
|
|
|
bpf_jit_filter *bf;
|
|
|
|
#endif
|
2011-12-31 07:21:28 +00:00
|
|
|
u_int slen;
|
|
|
|
int gottime;
|
2001-02-16 17:10:28 +00:00
|
|
|
|
2011-12-31 07:21:28 +00:00
|
|
|
gottime = BPF_TSTAMP_NONE;
|
2019-05-13 13:45:28 +00:00
|
|
|
NET_EPOCH_ENTER(et);
|
|
|
|
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_rcount, 1);
|
2008-08-01 21:38:46 +00:00
|
|
|
/*
|
2019-05-13 13:45:28 +00:00
|
|
|
* NB: We dont call BPF_CHECK_DIRECTION() here since there
|
|
|
|
* is no way for the caller to indiciate to us whether this
|
|
|
|
* packet is inbound or outbound. In the bpf_mtap() routines,
|
|
|
|
* we use the interface pointers on the mbuf to figure it out.
|
2008-08-01 21:38:46 +00:00
|
|
|
*/
|
2005-12-06 02:58:12 +00:00
|
|
|
#ifdef BPF_JITTER
|
2009-08-12 17:28:53 +00:00
|
|
|
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
|
|
|
|
if (bf != NULL)
|
|
|
|
slen = (*(bf->func))(pkt, pktlen, pktlen);
|
2005-12-06 02:58:12 +00:00
|
|
|
else
|
|
|
|
#endif
|
2005-08-22 19:35:48 +00:00
|
|
|
slen = bpf_filter(d->bd_rfilter, pkt, pktlen, pktlen);
|
2002-07-31 16:11:32 +00:00
|
|
|
if (slen != 0) {
|
2012-04-06 06:53:58 +00:00
|
|
|
/*
|
|
|
|
* Filter matches. Let's to acquire write lock.
|
|
|
|
*/
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_fcount, 1);
|
2011-12-31 07:21:28 +00:00
|
|
|
if (gottime < bpf_ts_quality(d->bd_tstamp))
|
2019-05-13 13:45:28 +00:00
|
|
|
gottime = bpf_gettime(&bt, d->bd_tstamp,
|
|
|
|
NULL);
|
2002-07-31 16:11:32 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
|
2002-07-31 16:11:32 +00:00
|
|
|
#endif
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
catchpacket(d, pkt, pktlen, slen,
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
bpf_append_bytes, &bt);
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2002-07-31 16:11:32 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2019-05-13 13:45:28 +00:00
|
|
|
NET_EPOCH_EXIT(et);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
2008-04-28 19:42:11 +00:00
|
|
|
#define BPF_CHECK_DIRECTION(d, r, i) \
|
|
|
|
(((d)->bd_direction == BPF_D_IN && (r) != (i)) || \
|
|
|
|
((d)->bd_direction == BPF_D_OUT && (r) == (i)))
|
2007-02-26 22:24:14 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Incoming linkage from device drivers, when packet is in an mbuf chain.
|
2012-04-06 06:53:58 +00:00
|
|
|
* Locking model is explained in bpf_tap().
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2019-05-13 13:45:28 +00:00
|
|
|
struct epoch_tracker et;
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
struct bintime bt;
|
1994-05-24 10:09:53 +00:00
|
|
|
struct bpf_d *d;
|
2009-08-12 17:28:53 +00:00
|
|
|
#ifdef BPF_JITTER
|
|
|
|
bpf_jit_filter *bf;
|
|
|
|
#endif
|
2011-12-31 07:21:28 +00:00
|
|
|
u_int pktlen, slen;
|
|
|
|
int gottime;
|
2006-07-24 15:42:04 +00:00
|
|
|
|
2008-04-15 17:08:24 +00:00
|
|
|
/* Skip outgoing duplicate packets. */
|
Restructure mbuf send tags to provide stronger guarantees.
- Perform ifp mismatch checks (to determine if a send tag is allocated
for a different ifp than the one the packet is being output on), in
ip_output() and ip6_output(). This avoids sending packets with send
tags to ifnet drivers that don't support send tags.
Since we are now checking for ifp mismatches before invoking
if_output, we can now try to allocate a new tag before invoking
if_output sending the original packet on the new tag if allocation
succeeds.
To avoid code duplication for the fragment and unfragmented cases,
add ip_output_send() and ip6_output_send() as wrappers around
if_output and nd6_output_ifp, respectively. All of the logic for
setting send tags and dealing with send tag-related errors is done
in these wrapper functions.
For pseudo interfaces that wrap other network interfaces (vlan and
lagg), wrapper send tags are now allocated so that ip*_output see
the wrapper ifp as the ifp in the send tag. The if_transmit
routines rewrite the send tags after performing an ifp mismatch
check. If an ifp mismatch is detected, the transmit routines fail
with EAGAIN.
- To provide clearer life cycle management of send tags, especially
in the presence of vlan and lagg wrapper tags, add a reference count
to send tags managed via m_snd_tag_ref() and m_snd_tag_rele().
Provide a helper function (m_snd_tag_init()) for use by drivers
supporting send tags. m_snd_tag_init() takes care of the if_ref
on the ifp meaning that code alloating send tags via if_snd_tag_alloc
no longer has to manage that manually. Similarly, m_snd_tag_rele
drops the refcount on the ifp after invoking if_snd_tag_free when
the last reference to a send tag is dropped.
This also closes use after free races if there are pending packets in
driver tx rings after the socket is closed (e.g. from tcpdrop).
In order for m_free to work reliably, add a new CSUM_SND_TAG flag in
csum_flags to indicate 'snd_tag' is set (rather than 'rcvif').
Drivers now also check this flag instead of checking snd_tag against
NULL. This avoids false positive matches when a forwarded packet
has a non-NULL rcvif that was treated as a send tag.
- cxgbe was relying on snd_tag_free being called when the inp was
detached so that it could kick the firmware to flush any pending
work on the flow. This is because the driver doesn't require ACK
messages from the firmware for every request, but instead does a
kind of manual interrupt coalescing by only setting a flag to
request a completion on a subset of requests. If all of the
in-flight requests don't have the flag when the tag is detached from
the inp, the flow might never return the credits. The current
snd_tag_free command issues a flush command to force the credits to
return. However, the credit return is what also frees the mbufs,
and since those mbufs now hold references on the tag, this meant
that snd_tag_free would never be called.
To fix, explicitly drop the mbuf's reference on the snd tag when the
mbuf is queued in the firmware work queue. This means that once the
inp's reference on the tag goes away and all in-flight mbufs have
been queued to the firmware, tag's refcount will drop to zero and
snd_tag_free will kick in and send the flush request. Note that we
need to avoid doing this in the middle of ethofld_tx(), so the
driver grabs a temporary reference on the tag around that loop to
defer the free to the end of the function in case it sends the last
mbuf to the queue after the inp has dropped its reference on the
tag.
- mlx5 preallocates send tags and was using the ifp pointer even when
the send tag wasn't in use. Explicitly use the ifp from other data
structures instead.
- Sprinkle some assertions in various places to assert that received
packets don't have a send tag, and that other places that overwrite
rcvif (e.g. 802.11 transmit) don't clobber a send tag pointer.
Reviewed by: gallatin, hselasky, rgrimes, ae
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D20117
2019-05-24 22:30:40 +00:00
|
|
|
if ((m->m_flags & M_PROMISC) != 0 && m_rcvif(m) == NULL) {
|
2008-04-15 17:08:24 +00:00
|
|
|
m->m_flags &= ~M_PROMISC;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2002-09-18 19:48:59 +00:00
|
|
|
pktlen = m_length(m, NULL);
|
2011-12-31 07:21:28 +00:00
|
|
|
gottime = BPF_TSTAMP_NONE;
|
2012-04-06 06:53:58 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
NET_EPOCH_ENTER(et);
|
|
|
|
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
|
Restructure mbuf send tags to provide stronger guarantees.
- Perform ifp mismatch checks (to determine if a send tag is allocated
for a different ifp than the one the packet is being output on), in
ip_output() and ip6_output(). This avoids sending packets with send
tags to ifnet drivers that don't support send tags.
Since we are now checking for ifp mismatches before invoking
if_output, we can now try to allocate a new tag before invoking
if_output sending the original packet on the new tag if allocation
succeeds.
To avoid code duplication for the fragment and unfragmented cases,
add ip_output_send() and ip6_output_send() as wrappers around
if_output and nd6_output_ifp, respectively. All of the logic for
setting send tags and dealing with send tag-related errors is done
in these wrapper functions.
For pseudo interfaces that wrap other network interfaces (vlan and
lagg), wrapper send tags are now allocated so that ip*_output see
the wrapper ifp as the ifp in the send tag. The if_transmit
routines rewrite the send tags after performing an ifp mismatch
check. If an ifp mismatch is detected, the transmit routines fail
with EAGAIN.
- To provide clearer life cycle management of send tags, especially
in the presence of vlan and lagg wrapper tags, add a reference count
to send tags managed via m_snd_tag_ref() and m_snd_tag_rele().
Provide a helper function (m_snd_tag_init()) for use by drivers
supporting send tags. m_snd_tag_init() takes care of the if_ref
on the ifp meaning that code alloating send tags via if_snd_tag_alloc
no longer has to manage that manually. Similarly, m_snd_tag_rele
drops the refcount on the ifp after invoking if_snd_tag_free when
the last reference to a send tag is dropped.
This also closes use after free races if there are pending packets in
driver tx rings after the socket is closed (e.g. from tcpdrop).
In order for m_free to work reliably, add a new CSUM_SND_TAG flag in
csum_flags to indicate 'snd_tag' is set (rather than 'rcvif').
Drivers now also check this flag instead of checking snd_tag against
NULL. This avoids false positive matches when a forwarded packet
has a non-NULL rcvif that was treated as a send tag.
- cxgbe was relying on snd_tag_free being called when the inp was
detached so that it could kick the firmware to flush any pending
work on the flow. This is because the driver doesn't require ACK
messages from the firmware for every request, but instead does a
kind of manual interrupt coalescing by only setting a flag to
request a completion on a subset of requests. If all of the
in-flight requests don't have the flag when the tag is detached from
the inp, the flow might never return the credits. The current
snd_tag_free command issues a flush command to force the credits to
return. However, the credit return is what also frees the mbufs,
and since those mbufs now hold references on the tag, this meant
that snd_tag_free would never be called.
To fix, explicitly drop the mbuf's reference on the snd tag when the
mbuf is queued in the firmware work queue. This means that once the
inp's reference on the tag goes away and all in-flight mbufs have
been queued to the firmware, tag's refcount will drop to zero and
snd_tag_free will kick in and send the flush request. Note that we
need to avoid doing this in the middle of ethofld_tx(), so the
driver grabs a temporary reference on the tag around that loop to
defer the free to the end of the function in case it sends the last
mbuf to the queue after the inp has dropped its reference on the
tag.
- mlx5 preallocates send tags and was using the ifp pointer even when
the send tag wasn't in use. Explicitly use the ifp from other data
structures instead.
- Sprinkle some assertions in various places to assert that received
packets don't have a send tag, and that other places that overwrite
rcvif (e.g. 802.11 transmit) don't clobber a send tag pointer.
Reviewed by: gallatin, hselasky, rgrimes, ae
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D20117
2019-05-24 22:30:40 +00:00
|
|
|
if (BPF_CHECK_DIRECTION(d, m_rcvif(m), bp->bif_ifp))
|
2000-03-18 06:30:42 +00:00
|
|
|
continue;
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_rcount, 1);
|
2005-12-06 02:58:12 +00:00
|
|
|
#ifdef BPF_JITTER
|
2009-08-12 17:28:53 +00:00
|
|
|
bf = bpf_jitter_enable != 0 ? d->bd_bfilter : NULL;
|
2005-12-06 02:58:12 +00:00
|
|
|
/* XXX We cannot handle multiple mbufs. */
|
2009-08-12 17:28:53 +00:00
|
|
|
if (bf != NULL && m->m_next == NULL)
|
2019-05-13 13:45:28 +00:00
|
|
|
slen = (*(bf->func))(mtod(m, u_char *), pktlen,
|
|
|
|
pktlen);
|
2005-12-06 02:58:12 +00:00
|
|
|
else
|
|
|
|
#endif
|
2005-08-22 19:35:48 +00:00
|
|
|
slen = bpf_filter(d->bd_rfilter, (u_char *)m, pktlen, 0);
|
2005-08-18 22:30:52 +00:00
|
|
|
if (slen != 0) {
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
2012-04-06 06:53:58 +00:00
|
|
|
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_fcount, 1);
|
2011-12-31 07:21:28 +00:00
|
|
|
if (gottime < bpf_ts_quality(d->bd_tstamp))
|
|
|
|
gottime = bpf_gettime(&bt, d->bd_tstamp, m);
|
2002-09-21 00:59:56 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
|
2002-09-21 00:59:56 +00:00
|
|
|
#endif
|
|
|
|
catchpacket(d, (u_char *)m, pktlen, slen,
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
bpf_append_mbuf, &bt);
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2005-08-18 22:30:52 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2019-05-13 13:45:28 +00:00
|
|
|
NET_EPOCH_EXIT(et);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
2003-12-28 03:56:00 +00:00
|
|
|
/*
|
|
|
|
* Incoming linkage from device drivers, when packet is in
|
|
|
|
* an mbuf chain and to be prepended by a contiguous header.
|
|
|
|
*/
|
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_mtap2(struct bpf_if *bp, void *data, u_int dlen, struct mbuf *m)
|
2003-12-28 03:56:00 +00:00
|
|
|
{
|
2019-05-13 13:45:28 +00:00
|
|
|
struct epoch_tracker et;
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
struct bintime bt;
|
2003-12-28 03:56:00 +00:00
|
|
|
struct mbuf mb;
|
|
|
|
struct bpf_d *d;
|
2011-12-31 07:21:28 +00:00
|
|
|
u_int pktlen, slen;
|
|
|
|
int gottime;
|
2006-07-24 15:42:04 +00:00
|
|
|
|
2008-04-15 17:08:24 +00:00
|
|
|
/* Skip outgoing duplicate packets. */
|
|
|
|
if ((m->m_flags & M_PROMISC) != 0 && m->m_pkthdr.rcvif == NULL) {
|
|
|
|
m->m_flags &= ~M_PROMISC;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2003-12-28 03:56:00 +00:00
|
|
|
pktlen = m_length(m, NULL);
|
|
|
|
/*
|
|
|
|
* Craft on-stack mbuf suitable for passing to bpf_filter.
|
|
|
|
* Note that we cut corners here; we only setup what's
|
|
|
|
* absolutely needed--this mbuf should never go anywhere else.
|
|
|
|
*/
|
Add an external mbuf buffer type that holds multiple unmapped pages.
Unmapped mbufs allow sendfile to carry multiple pages of data in a
single mbuf, without mapping those pages. It is a requirement for
Netflix's in-kernel TLS, and provides a 5-10% CPU savings on heavy web
serving workloads when used by sendfile, due to effectively
compressing socket buffers by an order of magnitude, and hence
reducing cache misses.
For this new external mbuf buffer type (EXT_PGS), the ext_buf pointer
now points to a struct mbuf_ext_pgs structure instead of a data
buffer. This structure contains an array of physical addresses (this
reduces cache misses compared to an earlier version that stored an
array of vm_page_t pointers). It also stores additional fields needed
for in-kernel TLS such as the TLS header and trailer data that are
currently unused. To more easily detect these mbufs, the M_NOMAP flag
is set in m_flags in addition to M_EXT.
Various functions like m_copydata() have been updated to safely access
packet contents (using uiomove_fromphys()), to make things like BPF
safe.
NIC drivers advertise support for unmapped mbufs on transmit via a new
IFCAP_NOMAP capability. This capability can be toggled via the new
'nomap' and '-nomap' ifconfig(8) commands. For NIC drivers that only
transmit packet contents via DMA and use bus_dma, adding the
capability to if_capabilities and if_capenable should be all that is
required.
If a NIC does not support unmapped mbufs, they are converted to a
chain of mapped mbufs (using sf_bufs to provide the mapping) in
ip_output or ip6_output. If an unmapped mbuf requires software
checksums, it is also converted to a chain of mapped mbufs before
computing the checksum.
Submitted by: gallatin (earlier version)
Reviewed by: gallatin, hselasky, rrs
Discussed with: ae, kp (firewalls)
Relnotes: yes
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D20616
2019-06-29 00:48:33 +00:00
|
|
|
mb.m_flags = 0;
|
2003-12-28 03:56:00 +00:00
|
|
|
mb.m_next = m;
|
|
|
|
mb.m_data = data;
|
|
|
|
mb.m_len = dlen;
|
|
|
|
pktlen += dlen;
|
|
|
|
|
2011-12-31 07:21:28 +00:00
|
|
|
gottime = BPF_TSTAMP_NONE;
|
2012-04-06 06:53:58 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
NET_EPOCH_ENTER(et);
|
|
|
|
CK_LIST_FOREACH(d, &bp->bif_dlist, bd_next) {
|
2008-04-28 19:42:11 +00:00
|
|
|
if (BPF_CHECK_DIRECTION(d, m->m_pkthdr.rcvif, bp->bif_ifp))
|
2003-12-28 03:56:00 +00:00
|
|
|
continue;
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_rcount, 1);
|
2005-08-22 19:35:48 +00:00
|
|
|
slen = bpf_filter(d->bd_rfilter, (u_char *)&mb, pktlen, 0);
|
2005-08-18 22:30:52 +00:00
|
|
|
if (slen != 0) {
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK(d);
|
2012-04-06 06:53:58 +00:00
|
|
|
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_fcount, 1);
|
2011-12-31 07:21:28 +00:00
|
|
|
if (gottime < bpf_ts_quality(d->bd_tstamp))
|
|
|
|
gottime = bpf_gettime(&bt, d->bd_tstamp, m);
|
2003-12-28 03:56:00 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
if (mac_bpfdesc_check_receive(d, bp->bif_ifp) == 0)
|
2003-12-28 03:56:00 +00:00
|
|
|
#endif
|
|
|
|
catchpacket(d, (u_char *)&mb, pktlen, slen,
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
bpf_append_mbuf, &bt);
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_UNLOCK(d);
|
2005-08-18 22:30:52 +00:00
|
|
|
}
|
2003-12-28 03:56:00 +00:00
|
|
|
}
|
2019-05-13 13:45:28 +00:00
|
|
|
NET_EPOCH_EXIT(et);
|
2003-12-28 03:56:00 +00:00
|
|
|
}
|
|
|
|
|
2007-02-26 22:24:14 +00:00
|
|
|
#undef BPF_CHECK_DIRECTION
|
2011-12-31 07:21:28 +00:00
|
|
|
#undef BPF_TSTAMP_NONE
|
|
|
|
#undef BPF_TSTAMP_FAST
|
|
|
|
#undef BPF_TSTAMP_NORMAL
|
|
|
|
#undef BPF_TSTAMP_EXTERN
|
|
|
|
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
static int
|
|
|
|
bpf_hdrlen(struct bpf_d *d)
|
|
|
|
{
|
|
|
|
int hdrlen;
|
|
|
|
|
|
|
|
hdrlen = d->bd_bif->bif_hdrlen;
|
|
|
|
#ifndef BURN_BRIDGES
|
|
|
|
if (d->bd_tstamp == BPF_T_NONE ||
|
|
|
|
BPF_T_FORMAT(d->bd_tstamp) == BPF_T_MICROTIME)
|
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
|
|
if (d->bd_compat32)
|
|
|
|
hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr32);
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
hdrlen += SIZEOF_BPF_HDR(struct bpf_hdr);
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
hdrlen += SIZEOF_BPF_HDR(struct bpf_xhdr);
|
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
|
|
if (d->bd_compat32)
|
|
|
|
hdrlen = BPF_WORDALIGN32(hdrlen);
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
hdrlen = BPF_WORDALIGN(hdrlen);
|
|
|
|
|
|
|
|
return (hdrlen - d->bd_bif->bif_hdrlen);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
bpf_bintime2ts(struct bintime *bt, struct bpf_ts *ts, int tstype)
|
|
|
|
{
|
2016-07-27 11:08:59 +00:00
|
|
|
struct bintime bt2, boottimebin;
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
struct timeval tsm;
|
|
|
|
struct timespec tsn;
|
|
|
|
|
2011-12-31 07:21:28 +00:00
|
|
|
if ((tstype & BPF_T_MONOTONIC) == 0) {
|
|
|
|
bt2 = *bt;
|
2016-07-27 11:08:59 +00:00
|
|
|
getboottimebin(&boottimebin);
|
2011-12-31 07:21:28 +00:00
|
|
|
bintime_add(&bt2, &boottimebin);
|
|
|
|
bt = &bt2;
|
|
|
|
}
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
switch (BPF_T_FORMAT(tstype)) {
|
|
|
|
case BPF_T_MICROTIME:
|
|
|
|
bintime2timeval(bt, &tsm);
|
|
|
|
ts->bt_sec = tsm.tv_sec;
|
|
|
|
ts->bt_frac = tsm.tv_usec;
|
|
|
|
break;
|
|
|
|
case BPF_T_NANOTIME:
|
|
|
|
bintime2timespec(bt, &tsn);
|
|
|
|
ts->bt_sec = tsn.tv_sec;
|
|
|
|
ts->bt_frac = tsn.tv_nsec;
|
|
|
|
break;
|
|
|
|
case BPF_T_BINTIME:
|
|
|
|
ts->bt_sec = bt->sec;
|
|
|
|
ts->bt_frac = bt->frac;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Move the packet data from interface memory (pkt) into the
|
2004-02-16 18:19:15 +00:00
|
|
|
* store buffer. "cpfn" is the routine called to do the actual data
|
1994-05-24 10:09:53 +00:00
|
|
|
* transfer. bcopy is passed in to copy contiguous chunks, while
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
* bpf_append_mbuf is passed in to copy mbuf chains. In the latter case,
|
1994-05-24 10:09:53 +00:00
|
|
|
* pkt is really an mbuf.
|
|
|
|
*/
|
|
|
|
static void
|
2006-06-15 15:39:12 +00:00
|
|
|
catchpacket(struct bpf_d *d, u_char *pkt, u_int pktlen, u_int snaplen,
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
void (*cpfn)(struct bpf_d *, caddr_t, u_int, void *, u_int),
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
struct bintime *bt)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
struct bpf_xhdr hdr;
|
|
|
|
#ifndef BURN_BRIDGES
|
|
|
|
struct bpf_hdr hdr_old;
|
2010-04-25 16:43:41 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
struct bpf_hdr32 hdr32_old;
|
2010-04-25 16:43:41 +00:00
|
|
|
#endif
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
#endif
|
|
|
|
int caplen, curlen, hdrlen, totlen;
|
2005-03-02 21:59:39 +00:00
|
|
|
int do_wakeup = 0;
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
int do_timestamp;
|
|
|
|
int tstype;
|
2004-02-16 18:19:15 +00:00
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPFD_LOCK_ASSERT(d);
|
Rework r348303 to reduce the time of holding global BPF lock.
It appeared that using NET_EPOCH_WAIT() while holding global BPF lock
can lead to another panic:
spin lock 0xfffff800183c9840 (turnstile lock) held by 0xfffff80018e2c5a0 (tid 100325) too long
panic: spin lock held too long
...
#0 sched_switch (td=0xfffff80018e2c5a0, newtd=0xfffff8000389e000, flags=<optimized out>) at /usr/src/sys/kern/sched_ule.c:2133
#1 0xffffffff80bf9912 in mi_switch (flags=256, newtd=0x0) at /usr/src/sys/kern/kern_synch.c:439
#2 0xffffffff80c21db7 in sched_bind (td=<optimized out>, cpu=<optimized out>) at /usr/src/sys/kern/sched_ule.c:2704
#3 0xffffffff80c34c33 in epoch_block_handler_preempt (global=<optimized out>, cr=0xfffffe00005a1a00, arg=<optimized out>)
at /usr/src/sys/kern/subr_epoch.c:394
#4 0xffffffff803c741b in epoch_block (global=<optimized out>, cr=<optimized out>, cb=<optimized out>, ct=<optimized out>)
at /usr/src/sys/contrib/ck/src/ck_epoch.c:416
#5 ck_epoch_synchronize_wait (global=0xfffff8000380cd80, cb=<optimized out>, ct=<optimized out>) at /usr/src/sys/contrib/ck/src/ck_epoch.c:465
#6 0xffffffff80c3475e in epoch_wait_preempt (epoch=0xfffff8000380cd80) at /usr/src/sys/kern/subr_epoch.c:513
#7 0xffffffff80ce970b in bpf_detachd_locked (d=0xfffff801d309cc00, detached_ifp=<optimized out>) at /usr/src/sys/net/bpf.c:856
#8 0xffffffff80ced166 in bpf_detachd (d=<optimized out>) at /usr/src/sys/net/bpf.c:836
#9 bpf_dtor (data=0xfffff801d309cc00) at /usr/src/sys/net/bpf.c:914
To fix this add the check to the catchpacket() that BPF descriptor was
not detached just before we acquired BPFD_LOCK().
Reported by: slavash
Tested by: slavash
MFC after: 1 week
2019-05-28 11:45:00 +00:00
|
|
|
if (d->bd_bif == NULL) {
|
|
|
|
/* Descriptor was detached in concurrent thread */
|
|
|
|
counter_u64_add(d->bd_dcount, 1);
|
|
|
|
return;
|
|
|
|
}
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Detect whether user space has released a buffer back to us, and if
|
|
|
|
* so, move it from being a hold buffer to a free buffer. This may
|
|
|
|
* not be the best place to do it (for example, we might only want to
|
|
|
|
* run this check if we need the space), but for now it's a reliable
|
|
|
|
* spot to do it.
|
|
|
|
*/
|
2008-03-25 07:41:33 +00:00
|
|
|
if (d->bd_fbuf == NULL && bpf_canfreebuf(d)) {
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
d->bd_fbuf = d->bd_hbuf;
|
|
|
|
d->bd_hbuf = NULL;
|
|
|
|
d->bd_hlen = 0;
|
2008-07-05 20:11:28 +00:00
|
|
|
bpf_buf_reclaimed(d);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Figure out how many bytes to move. If the packet is
|
|
|
|
* greater or equal to the snapshot length, transfer that
|
|
|
|
* much. Otherwise, transfer the whole packet (unless
|
|
|
|
* we hit the buffer size limit).
|
|
|
|
*/
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
hdrlen = bpf_hdrlen(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
totlen = hdrlen + min(snaplen, pktlen);
|
|
|
|
if (totlen > d->bd_bufsize)
|
|
|
|
totlen = d->bd_bufsize;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Round up the end of the previous packet to the next longword.
|
2008-04-07 02:51:00 +00:00
|
|
|
*
|
|
|
|
* Drop the packet if there's no room and no hope of room
|
|
|
|
* If the packet would overflow the storage buffer or the storage
|
|
|
|
* buffer is considered immutable by the buffer model, try to rotate
|
|
|
|
* the buffer and wakeup pending processes.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2010-04-25 16:43:41 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
|
|
if (d->bd_compat32)
|
|
|
|
curlen = BPF_WORDALIGN32(d->bd_slen);
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
curlen = BPF_WORDALIGN(d->bd_slen);
|
2008-04-07 02:51:00 +00:00
|
|
|
if (curlen + totlen > d->bd_bufsize || !bpf_canwritebuf(d)) {
|
2004-07-24 16:58:56 +00:00
|
|
|
if (d->bd_fbuf == NULL) {
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2008-04-07 02:51:00 +00:00
|
|
|
* There's no room in the store buffer, and no
|
|
|
|
* prospect of room, so drop the packet. Notify the
|
|
|
|
* buffer model.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2008-04-07 02:51:00 +00:00
|
|
|
bpf_buffull(d);
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_add(d->bd_dcount, 1);
|
1994-05-24 10:09:53 +00:00
|
|
|
return;
|
|
|
|
}
|
2015-08-03 18:22:31 +00:00
|
|
|
KASSERT(!d->bd_hbuf_in_use, ("hold buffer is in use"));
|
1994-05-24 10:09:53 +00:00
|
|
|
ROTATE_BUFFERS(d);
|
2005-03-02 21:59:39 +00:00
|
|
|
do_wakeup = 1;
|
1994-05-24 10:09:53 +00:00
|
|
|
curlen = 0;
|
2008-04-07 02:51:00 +00:00
|
|
|
} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT)
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
* Immediate mode is set, or the read timeout has already
|
|
|
|
* expired during a select call. A packet arrived, so the
|
|
|
|
* reader should be woken up.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2005-03-02 21:59:39 +00:00
|
|
|
do_wakeup = 1;
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
caplen = totlen - hdrlen;
|
|
|
|
tstype = d->bd_tstamp;
|
|
|
|
do_timestamp = tstype != BPF_T_NONE;
|
|
|
|
#ifndef BURN_BRIDGES
|
|
|
|
if (tstype == BPF_T_NONE || BPF_T_FORMAT(tstype) == BPF_T_MICROTIME) {
|
|
|
|
struct bpf_ts ts;
|
|
|
|
if (do_timestamp)
|
|
|
|
bpf_bintime2ts(bt, &ts, tstype);
|
2010-04-25 16:43:41 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
if (d->bd_compat32) {
|
|
|
|
bzero(&hdr32_old, sizeof(hdr32_old));
|
|
|
|
if (do_timestamp) {
|
|
|
|
hdr32_old.bh_tstamp.tv_sec = ts.bt_sec;
|
|
|
|
hdr32_old.bh_tstamp.tv_usec = ts.bt_frac;
|
|
|
|
}
|
|
|
|
hdr32_old.bh_datalen = pktlen;
|
|
|
|
hdr32_old.bh_hdrlen = hdrlen;
|
|
|
|
hdr32_old.bh_caplen = caplen;
|
|
|
|
bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr32_old,
|
|
|
|
sizeof(hdr32_old));
|
|
|
|
goto copy;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
bzero(&hdr_old, sizeof(hdr_old));
|
|
|
|
if (do_timestamp) {
|
|
|
|
hdr_old.bh_tstamp.tv_sec = ts.bt_sec;
|
|
|
|
hdr_old.bh_tstamp.tv_usec = ts.bt_frac;
|
|
|
|
}
|
|
|
|
hdr_old.bh_datalen = pktlen;
|
|
|
|
hdr_old.bh_hdrlen = hdrlen;
|
|
|
|
hdr_old.bh_caplen = caplen;
|
|
|
|
bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr_old,
|
|
|
|
sizeof(hdr_old));
|
2010-04-25 16:43:41 +00:00
|
|
|
goto copy;
|
|
|
|
}
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
/*
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
* Append the bpf header. Note we append the actual header size, but
|
|
|
|
* move forward the length of the header plus padding.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
bzero(&hdr, sizeof(hdr));
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
if (do_timestamp)
|
|
|
|
bpf_bintime2ts(bt, &hdr.bh_tstamp, tstype);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
hdr.bh_datalen = pktlen;
|
|
|
|
hdr.bh_hdrlen = hdrlen;
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
hdr.bh_caplen = caplen;
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
bpf_append_bytes(d, d->bd_sbuf, curlen, &hdr, sizeof(hdr));
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Copy the packet data into the store buffer and update its length.
|
|
|
|
*/
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
#ifndef BURN_BRIDGES
|
|
|
|
copy:
|
2010-04-25 16:43:41 +00:00
|
|
|
#endif
|
Implement flexible BPF timestamping framework.
- Allow setting format, resolution and accuracy of BPF time stamps per
listener. Previously, we were only able to use microtime(9). Now we can
set various resolutions and accuracies with ioctl(2) BIOCSTSTAMP command.
Similarly, we can get the current resolution and accuracy with BIOCGTSTAMP
command. Document all supported options in bpf(4) and their uses.
- Introduce new time stamp 'struct bpf_ts' and header 'struct bpf_xhdr'.
The new time stamp has both 64-bit second and fractional parts. bpf_xhdr
has this time stamp instead of 'struct timeval' for bh_tstamp. The new
structures let us use bh_tstamp of same size on both 32-bit and 64-bit
platforms without adding additional shims for 32-bit binaries. On 64-bit
platforms, size of BPF header does not change compared to bpf_hdr as its
members are already all 64-bit long. On 32-bit platforms, the size may
increase by 8 bytes. For backward compatibility, struct bpf_hdr with
struct timeval is still the default header unless new time stamp format is
explicitly requested. However, the behaviour may change in the future and
all relevant code is wrapped around "#ifdef BURN_BRIDGES" for now.
- Add experimental support for tagging mbufs with time stamps from a lower
layer, e.g., device driver. Currently, mbuf_tags(9) is used to tag mbufs.
The time stamps must be uptime in 'struct bintime' format as binuptime(9)
and getbinuptime(9) do.
Reviewed by: net@
2010-06-15 19:28:44 +00:00
|
|
|
(*cpfn)(d, d->bd_sbuf, curlen + hdrlen, pkt, caplen);
|
1994-05-24 10:09:53 +00:00
|
|
|
d->bd_slen = curlen + totlen;
|
2005-03-02 21:59:39 +00:00
|
|
|
|
|
|
|
if (do_wakeup)
|
|
|
|
bpf_wakeup(d);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free buffers currently in use by a descriptor.
|
|
|
|
* Called on close.
|
|
|
|
*/
|
|
|
|
static void
|
2019-05-13 13:45:28 +00:00
|
|
|
bpfd_free(epoch_context_t ctx)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2019-05-13 13:45:28 +00:00
|
|
|
struct bpf_d *d;
|
|
|
|
struct bpf_program_buffer *p;
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* We don't need to lock out interrupts since this descriptor has
|
|
|
|
* been detached from its interface and it yet hasn't been marked
|
|
|
|
* free.
|
|
|
|
*/
|
2019-05-13 13:45:28 +00:00
|
|
|
d = __containerof(ctx, struct bpf_d, epoch_ctx);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
bpf_free(d);
|
2010-03-29 20:24:03 +00:00
|
|
|
if (d->bd_rfilter != NULL) {
|
2019-05-13 13:45:28 +00:00
|
|
|
p = __containerof((void *)d->bd_rfilter,
|
|
|
|
struct bpf_program_buffer, buffer);
|
2019-05-13 14:07:02 +00:00
|
|
|
#ifdef BPF_JITTER
|
|
|
|
p->func = d->bd_bfilter;
|
|
|
|
#endif
|
2019-05-13 13:45:28 +00:00
|
|
|
bpf_program_buffer_free(&p->epoch_ctx);
|
|
|
|
}
|
|
|
|
if (d->bd_wfilter != NULL) {
|
|
|
|
p = __containerof((void *)d->bd_wfilter,
|
|
|
|
struct bpf_program_buffer, buffer);
|
2019-05-13 14:07:02 +00:00
|
|
|
#ifdef BPF_JITTER
|
|
|
|
p->func = NULL;
|
|
|
|
#endif
|
2019-05-13 13:45:28 +00:00
|
|
|
bpf_program_buffer_free(&p->epoch_ctx);
|
2005-12-06 02:58:12 +00:00
|
|
|
}
|
2018-03-20 22:57:06 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
mtx_destroy(&d->bd_lock);
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_free(d->bd_rcount);
|
|
|
|
counter_u64_free(d->bd_dcount);
|
|
|
|
counter_u64_free(d->bd_fcount);
|
|
|
|
counter_u64_free(d->bd_wcount);
|
|
|
|
counter_u64_free(d->bd_wfcount);
|
|
|
|
counter_u64_free(d->bd_wdcount);
|
|
|
|
counter_u64_free(d->bd_zcopy);
|
2019-05-13 13:45:28 +00:00
|
|
|
free(d, M_BPF);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
2002-11-14 23:24:13 +00:00
|
|
|
/*
|
|
|
|
* Attach an interface to bpf. dlt is the link layer type; hdrlen is the
|
|
|
|
* fixed size of the link header (variable length headers not yet supported).
|
|
|
|
*/
|
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
|
2002-11-14 23:24:13 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
|
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2001-08-23 22:38:08 +00:00
|
|
|
* Attach an interface to bpf. ifp is a pointer to the structure
|
|
|
|
* defining the interface to be attached, dlt is the link layer type,
|
|
|
|
* and hdrlen is the fixed size of the link header (variable length
|
|
|
|
* headers are not yet supporrted).
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
void
|
2019-05-13 13:45:28 +00:00
|
|
|
bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen,
|
|
|
|
struct bpf_if **driverp)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct bpf_if *bp;
|
2006-06-15 15:39:12 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
KASSERT(*driverp == NULL,
|
|
|
|
("bpfattach2: driverp already initialized"));
|
2019-03-19 10:29:32 +00:00
|
|
|
|
|
|
|
bp = malloc(sizeof(*bp), M_BPF, M_WAITOK | M_ZERO);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_INIT(&bp->bif_dlist);
|
|
|
|
CK_LIST_INIT(&bp->bif_wlist);
|
1994-05-24 10:09:53 +00:00
|
|
|
bp->bif_ifp = ifp;
|
|
|
|
bp->bif_dlt = dlt;
|
2019-03-19 10:29:32 +00:00
|
|
|
bp->bif_hdrlen = hdrlen;
|
2017-08-16 19:40:07 +00:00
|
|
|
bp->bif_bpf = driverp;
|
2019-05-13 13:45:28 +00:00
|
|
|
bp->bif_refcnt = 1;
|
2006-06-02 19:59:33 +00:00
|
|
|
*driverp = bp;
|
2019-05-13 13:45:28 +00:00
|
|
|
/*
|
|
|
|
* Reference ifnet pointer, so it won't freed until
|
|
|
|
* we release it.
|
|
|
|
*/
|
|
|
|
if_ref(ifp);
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_LOCK();
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_INSERT_HEAD(&bpf_iflist, bp, bif_next);
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_UNLOCK();
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2015-12-22 15:00:04 +00:00
|
|
|
if (bootverbose && IS_DEFAULT_VNET(curvnet))
|
2002-11-14 23:24:13 +00:00
|
|
|
if_printf(ifp, "bpf attached\n");
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
1995-11-29 10:49:16 +00:00
|
|
|
|
2016-04-11 10:00:38 +00:00
|
|
|
#ifdef VIMAGE
|
|
|
|
/*
|
|
|
|
* When moving interfaces between vnet instances we need a way to
|
|
|
|
* query the dlt and hdrlen before detach so we can re-attch the if_bpf
|
|
|
|
* after the vmove. We unfortunately have no device driver infrastructure
|
|
|
|
* to query the interface for these values after creation/attach, thus
|
|
|
|
* add this as a workaround.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
bpf_get_bp_params(struct bpf_if *bp, u_int *bif_dlt, u_int *bif_hdrlen)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (bp == NULL)
|
|
|
|
return (ENXIO);
|
|
|
|
if (bif_dlt == NULL && bif_hdrlen == NULL)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
if (bif_dlt != NULL)
|
|
|
|
*bif_dlt = bp->bif_dlt;
|
|
|
|
if (bif_hdrlen != NULL)
|
|
|
|
*bif_hdrlen = bp->bif_hdrlen;
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
The advent of if_detach, allowing interface removal at runtime, makes it
possible for a panic to occur if BPF is in use on the interface at the
time of the call to if_detach. This happens because BPF maintains pointers
to the struct ifnet describing the interface, which is freed by if_detach.
To correct this problem, a new call, bpfdetach, is introduced. bpfdetach
locates BPF descriptor references to the interface, and NULLs them. Other
BPF code is modified so that discovery of a NULL interface results in
ENXIO (already implemented for some calls). Processes blocked on a BPF
call will also be woken up so that they can receive ENXIO.
Interface drivers that invoke bpfattach and if_detach must be modified to
also call bpfattach(ifp) before calling if_detach(ifp). This is relevant
for buses that support hot removal, such as pccard and usb. Patches to
all effected devices will not be committed, only to if_wi.c, due to
testing limitations. To reproduce the crash, load up tcpdump on you
favorite pccard ethernet card, and then eject the card. As some pccard
drivers do not invoke if_detach(ifp), this bug will not manifest itself
for those drivers.
Reviewed by: wes
2000-03-19 05:42:34 +00:00
|
|
|
/*
|
2012-05-21 22:13:48 +00:00
|
|
|
* Detach bpf from an interface. This involves detaching each descriptor
|
|
|
|
* associated with the interface. Notify each descriptor as it's detached
|
|
|
|
* so that any sleepers wake up and get ENXIO.
|
The advent of if_detach, allowing interface removal at runtime, makes it
possible for a panic to occur if BPF is in use on the interface at the
time of the call to if_detach. This happens because BPF maintains pointers
to the struct ifnet describing the interface, which is freed by if_detach.
To correct this problem, a new call, bpfdetach, is introduced. bpfdetach
locates BPF descriptor references to the interface, and NULLs them. Other
BPF code is modified so that discovery of a NULL interface results in
ENXIO (already implemented for some calls). Processes blocked on a BPF
call will also be woken up so that they can receive ENXIO.
Interface drivers that invoke bpfattach and if_detach must be modified to
also call bpfattach(ifp) before calling if_detach(ifp). This is relevant
for buses that support hot removal, such as pccard and usb. Patches to
all effected devices will not be committed, only to if_wi.c, due to
testing limitations. To reproduce the crash, load up tcpdump on you
favorite pccard ethernet card, and then eject the card. As some pccard
drivers do not invoke if_detach(ifp), this bug will not manifest itself
for those drivers.
Reviewed by: wes
2000-03-19 05:42:34 +00:00
|
|
|
*/
|
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpfdetach(struct ifnet *ifp)
|
The advent of if_detach, allowing interface removal at runtime, makes it
possible for a panic to occur if BPF is in use on the interface at the
time of the call to if_detach. This happens because BPF maintains pointers
to the struct ifnet describing the interface, which is freed by if_detach.
To correct this problem, a new call, bpfdetach, is introduced. bpfdetach
locates BPF descriptor references to the interface, and NULLs them. Other
BPF code is modified so that discovery of a NULL interface results in
ENXIO (already implemented for some calls). Processes blocked on a BPF
call will also be woken up so that they can receive ENXIO.
Interface drivers that invoke bpfattach and if_detach must be modified to
also call bpfattach(ifp) before calling if_detach(ifp). This is relevant
for buses that support hot removal, such as pccard and usb. Patches to
all effected devices will not be committed, only to if_wi.c, due to
testing limitations. To reproduce the crash, load up tcpdump on you
favorite pccard ethernet card, and then eject the card. As some pccard
drivers do not invoke if_detach(ifp), this bug will not manifest itself
for those drivers.
Reviewed by: wes
2000-03-19 05:42:34 +00:00
|
|
|
{
|
2019-05-13 13:45:28 +00:00
|
|
|
struct bpf_if *bp, *bp_temp;
|
|
|
|
struct bpf_d *d;
|
The advent of if_detach, allowing interface removal at runtime, makes it
possible for a panic to occur if BPF is in use on the interface at the
time of the call to if_detach. This happens because BPF maintains pointers
to the struct ifnet describing the interface, which is freed by if_detach.
To correct this problem, a new call, bpfdetach, is introduced. bpfdetach
locates BPF descriptor references to the interface, and NULLs them. Other
BPF code is modified so that discovery of a NULL interface results in
ENXIO (already implemented for some calls). Processes blocked on a BPF
call will also be woken up so that they can receive ENXIO.
Interface drivers that invoke bpfattach and if_detach must be modified to
also call bpfattach(ifp) before calling if_detach(ifp). This is relevant
for buses that support hot removal, such as pccard and usb. Patches to
all effected devices will not be committed, only to if_wi.c, due to
testing limitations. To reproduce the crash, load up tcpdump on you
favorite pccard ethernet card, and then eject the card. As some pccard
drivers do not invoke if_detach(ifp), this bug will not manifest itself
for those drivers.
Reviewed by: wes
2000-03-19 05:42:34 +00:00
|
|
|
|
2012-05-21 22:17:29 +00:00
|
|
|
BPF_LOCK();
|
2012-01-10 00:48:29 +00:00
|
|
|
/* Find all bpf_if struct's which reference ifp and detach them. */
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_FOREACH_SAFE(bp, &bpf_iflist, bif_next, bp_temp) {
|
2012-12-02 21:43:37 +00:00
|
|
|
if (ifp != bp->bif_ifp)
|
|
|
|
continue;
|
The advent of if_detach, allowing interface removal at runtime, makes it
possible for a panic to occur if BPF is in use on the interface at the
time of the call to if_detach. This happens because BPF maintains pointers
to the struct ifnet describing the interface, which is freed by if_detach.
To correct this problem, a new call, bpfdetach, is introduced. bpfdetach
locates BPF descriptor references to the interface, and NULLs them. Other
BPF code is modified so that discovery of a NULL interface results in
ENXIO (already implemented for some calls). Processes blocked on a BPF
call will also be woken up so that they can receive ENXIO.
Interface drivers that invoke bpfattach and if_detach must be modified to
also call bpfattach(ifp) before calling if_detach(ifp). This is relevant
for buses that support hot removal, such as pccard and usb. Patches to
all effected devices will not be committed, only to if_wi.c, due to
testing limitations. To reproduce the crash, load up tcpdump on you
favorite pccard ethernet card, and then eject the card. As some pccard
drivers do not invoke if_detach(ifp), this bug will not manifest itself
for those drivers.
Reviewed by: wes
2000-03-19 05:42:34 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_REMOVE(bp, bif_next);
|
2018-04-24 17:42:25 +00:00
|
|
|
*bp->bif_bpf = (struct bpf_if *)&dead_bpf_if;
|
2012-12-02 21:43:37 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
CTR4(KTR_NET,
|
|
|
|
"%s: sheduling free for encap %d (%p) for if %p",
|
2012-12-02 21:43:37 +00:00
|
|
|
__func__, bp->bif_dlt, bp, ifp);
|
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
/* Detach common descriptors */
|
|
|
|
while ((d = CK_LIST_FIRST(&bp->bif_dlist)) != NULL) {
|
|
|
|
bpf_detachd_locked(d, true);
|
2012-01-10 00:48:29 +00:00
|
|
|
}
|
2012-12-02 21:43:37 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
/* Detach writer-only descriptors */
|
|
|
|
while ((d = CK_LIST_FIRST(&bp->bif_wlist)) != NULL) {
|
|
|
|
bpf_detachd_locked(d, true);
|
2012-12-02 21:43:37 +00:00
|
|
|
}
|
2019-05-13 13:45:28 +00:00
|
|
|
bpfif_rele(bp);
|
2012-12-02 21:43:37 +00:00
|
|
|
}
|
2012-06-04 12:36:58 +00:00
|
|
|
BPF_UNLOCK();
|
2012-05-21 22:17:29 +00:00
|
|
|
}
|
|
|
|
|
2003-01-20 19:08:46 +00:00
|
|
|
/*
|
|
|
|
* Get a list of available data link type of the interface.
|
|
|
|
*/
|
|
|
|
static int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
|
2003-01-20 19:08:46 +00:00
|
|
|
{
|
|
|
|
struct ifnet *ifp;
|
|
|
|
struct bpf_if *bp;
|
2016-02-24 22:00:35 +00:00
|
|
|
u_int *lst;
|
|
|
|
int error, n, n1;
|
2003-01-20 19:08:46 +00:00
|
|
|
|
2012-05-21 22:21:00 +00:00
|
|
|
BPF_LOCK_ASSERT();
|
|
|
|
|
2003-01-20 19:08:46 +00:00
|
|
|
ifp = d->bd_bif->bif_ifp;
|
2016-02-24 22:00:35 +00:00
|
|
|
n1 = 0;
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
|
2016-02-24 22:00:35 +00:00
|
|
|
if (bp->bif_ifp == ifp)
|
|
|
|
n1++;
|
|
|
|
}
|
|
|
|
if (bfl->bfl_list == NULL) {
|
|
|
|
bfl->bfl_len = n1;
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
if (n1 > bfl->bfl_len)
|
|
|
|
return (ENOMEM);
|
2019-05-13 13:45:28 +00:00
|
|
|
|
2016-02-24 22:00:35 +00:00
|
|
|
lst = malloc(n1 * sizeof(u_int), M_TEMP, M_WAITOK);
|
2003-01-20 19:08:46 +00:00
|
|
|
n = 0;
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
|
2003-01-20 19:08:46 +00:00
|
|
|
if (bp->bif_ifp != ifp)
|
|
|
|
continue;
|
2019-05-13 13:45:28 +00:00
|
|
|
lst[n++] = bp->bif_dlt;
|
2003-01-20 19:08:46 +00:00
|
|
|
}
|
2016-02-24 22:00:35 +00:00
|
|
|
error = copyout(lst, bfl->bfl_list, sizeof(u_int) * n);
|
|
|
|
free(lst, M_TEMP);
|
2003-01-20 19:08:46 +00:00
|
|
|
bfl->bfl_len = n;
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the data link type of a BPF instance.
|
|
|
|
*/
|
|
|
|
static int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_setdlt(struct bpf_d *d, u_int dlt)
|
2003-01-20 19:08:46 +00:00
|
|
|
{
|
|
|
|
int error, opromisc;
|
|
|
|
struct ifnet *ifp;
|
|
|
|
struct bpf_if *bp;
|
The advent of if_detach, allowing interface removal at runtime, makes it
possible for a panic to occur if BPF is in use on the interface at the
time of the call to if_detach. This happens because BPF maintains pointers
to the struct ifnet describing the interface, which is freed by if_detach.
To correct this problem, a new call, bpfdetach, is introduced. bpfdetach
locates BPF descriptor references to the interface, and NULLs them. Other
BPF code is modified so that discovery of a NULL interface results in
ENXIO (already implemented for some calls). Processes blocked on a BPF
call will also be woken up so that they can receive ENXIO.
Interface drivers that invoke bpfattach and if_detach must be modified to
also call bpfattach(ifp) before calling if_detach(ifp). This is relevant
for buses that support hot removal, such as pccard and usb. Patches to
all effected devices will not be committed, only to if_wi.c, due to
testing limitations. To reproduce the crash, load up tcpdump on you
favorite pccard ethernet card, and then eject the card. As some pccard
drivers do not invoke if_detach(ifp), this bug will not manifest itself
for those drivers.
Reviewed by: wes
2000-03-19 05:42:34 +00:00
|
|
|
|
2012-05-21 22:13:48 +00:00
|
|
|
BPF_LOCK_ASSERT();
|
2019-05-13 13:45:28 +00:00
|
|
|
MPASS(d->bd_bif != NULL);
|
2012-05-21 22:13:48 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
/*
|
|
|
|
* It is safe to check bd_bif without BPFD_LOCK, it can not be
|
|
|
|
* changed while we hold global lock.
|
|
|
|
*/
|
2003-01-20 19:08:46 +00:00
|
|
|
if (d->bd_bif->bif_dlt == dlt)
|
|
|
|
return (0);
|
2012-05-21 22:13:48 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
ifp = d->bd_bif->bif_ifp;
|
|
|
|
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
|
2003-01-20 19:08:46 +00:00
|
|
|
if (bp->bif_ifp == ifp && bp->bif_dlt == dlt)
|
|
|
|
break;
|
|
|
|
}
|
2019-05-13 13:45:28 +00:00
|
|
|
if (bp == NULL)
|
|
|
|
return (EINVAL);
|
2012-05-21 22:13:48 +00:00
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
opromisc = d->bd_promisc;
|
|
|
|
bpf_attachd(d, bp);
|
|
|
|
if (opromisc) {
|
|
|
|
error = ifpromisc(bp->bif_ifp, 1);
|
|
|
|
if (error)
|
|
|
|
if_printf(bp->bif_ifp, "%s: ifpromisc failed (%d)\n",
|
|
|
|
__func__, error);
|
|
|
|
else
|
|
|
|
d->bd_promisc = 1;
|
2003-01-20 19:08:46 +00:00
|
|
|
}
|
2019-05-13 13:45:28 +00:00
|
|
|
return (0);
|
The advent of if_detach, allowing interface removal at runtime, makes it
possible for a panic to occur if BPF is in use on the interface at the
time of the call to if_detach. This happens because BPF maintains pointers
to the struct ifnet describing the interface, which is freed by if_detach.
To correct this problem, a new call, bpfdetach, is introduced. bpfdetach
locates BPF descriptor references to the interface, and NULLs them. Other
BPF code is modified so that discovery of a NULL interface results in
ENXIO (already implemented for some calls). Processes blocked on a BPF
call will also be woken up so that they can receive ENXIO.
Interface drivers that invoke bpfattach and if_detach must be modified to
also call bpfattach(ifp) before calling if_detach(ifp). This is relevant
for buses that support hot removal, such as pccard and usb. Patches to
all effected devices will not be committed, only to if_wi.c, due to
testing limitations. To reproduce the crash, load up tcpdump on you
favorite pccard ethernet card, and then eject the card. As some pccard
drivers do not invoke if_detach(ifp), this bug will not manifest itself
for those drivers.
Reviewed by: wes
2000-03-19 05:42:34 +00:00
|
|
|
}
|
|
|
|
|
1997-09-16 11:44:05 +00:00
|
|
|
static void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_drvinit(void *unused)
|
1995-11-29 10:49:16 +00:00
|
|
|
{
|
2008-08-13 15:41:21 +00:00
|
|
|
struct cdev *dev;
|
1996-03-28 14:33:59 +00:00
|
|
|
|
2018-04-10 19:42:50 +00:00
|
|
|
sx_init(&bpf_sx, "bpf global lock");
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_INIT(&bpf_iflist);
|
2008-08-13 15:41:21 +00:00
|
|
|
|
|
|
|
dev = make_dev(&bpf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "bpf");
|
|
|
|
/* For compatibility */
|
|
|
|
make_dev_alias(dev, "bpf0");
|
1995-11-29 10:49:16 +00:00
|
|
|
}
|
|
|
|
|
2009-06-19 20:31:44 +00:00
|
|
|
/*
|
|
|
|
* Zero out the various packet counters associated with all of the bpf
|
|
|
|
* descriptors. At some point, we will probably want to get a bit more
|
|
|
|
* granular and allow the user to specify descriptors to be zeroed.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
bpf_zero_counters(void)
|
|
|
|
{
|
|
|
|
struct bpf_if *bp;
|
|
|
|
struct bpf_d *bd;
|
|
|
|
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_LOCK();
|
2019-05-13 13:45:28 +00:00
|
|
|
/*
|
|
|
|
* We are protected by global lock here, interfaces and
|
|
|
|
* descriptors can not be deleted while we hold it.
|
|
|
|
*/
|
|
|
|
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
|
|
|
|
CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
|
2018-03-20 22:57:06 +00:00
|
|
|
counter_u64_zero(bd->bd_rcount);
|
|
|
|
counter_u64_zero(bd->bd_dcount);
|
|
|
|
counter_u64_zero(bd->bd_fcount);
|
|
|
|
counter_u64_zero(bd->bd_wcount);
|
|
|
|
counter_u64_zero(bd->bd_wfcount);
|
|
|
|
counter_u64_zero(bd->bd_zcopy);
|
2009-06-19 20:31:44 +00:00
|
|
|
}
|
|
|
|
}
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_UNLOCK();
|
2009-06-19 20:31:44 +00:00
|
|
|
}
|
|
|
|
|
2012-05-21 22:13:48 +00:00
|
|
|
/*
|
|
|
|
* Fill filter statistics
|
|
|
|
*/
|
2005-07-24 17:21:17 +00:00
|
|
|
static void
|
|
|
|
bpfstats_fill_xbpf(struct xbpf_d *d, struct bpf_d *bd)
|
|
|
|
{
|
|
|
|
|
2019-05-13 13:45:28 +00:00
|
|
|
BPF_LOCK_ASSERT();
|
2005-07-24 17:21:17 +00:00
|
|
|
bzero(d, sizeof(*d));
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
d->bd_structsize = sizeof(*d);
|
2005-07-24 17:21:17 +00:00
|
|
|
d->bd_immediate = bd->bd_immediate;
|
|
|
|
d->bd_promisc = bd->bd_promisc;
|
|
|
|
d->bd_hdrcmplt = bd->bd_hdrcmplt;
|
2007-02-26 22:24:14 +00:00
|
|
|
d->bd_direction = bd->bd_direction;
|
|
|
|
d->bd_feedback = bd->bd_feedback;
|
2005-07-24 17:21:17 +00:00
|
|
|
d->bd_async = bd->bd_async;
|
2018-03-20 22:57:06 +00:00
|
|
|
d->bd_rcount = counter_u64_fetch(bd->bd_rcount);
|
|
|
|
d->bd_dcount = counter_u64_fetch(bd->bd_dcount);
|
|
|
|
d->bd_fcount = counter_u64_fetch(bd->bd_fcount);
|
2005-07-24 17:21:17 +00:00
|
|
|
d->bd_sig = bd->bd_sig;
|
|
|
|
d->bd_slen = bd->bd_slen;
|
|
|
|
d->bd_hlen = bd->bd_hlen;
|
|
|
|
d->bd_bufsize = bd->bd_bufsize;
|
|
|
|
d->bd_pid = bd->bd_pid;
|
|
|
|
strlcpy(d->bd_ifname,
|
|
|
|
bd->bd_bif->bif_ifp->if_xname, IFNAMSIZ);
|
2005-08-22 19:35:48 +00:00
|
|
|
d->bd_locked = bd->bd_locked;
|
2018-03-20 22:57:06 +00:00
|
|
|
d->bd_wcount = counter_u64_fetch(bd->bd_wcount);
|
|
|
|
d->bd_wdcount = counter_u64_fetch(bd->bd_wdcount);
|
|
|
|
d->bd_wfcount = counter_u64_fetch(bd->bd_wfcount);
|
|
|
|
d->bd_zcopy = counter_u64_fetch(bd->bd_zcopy);
|
Introduce support for zero-copy BPF buffering, which reduces the
overhead of packet capture by allowing a user process to directly "loan"
buffer memory to the kernel rather than using read(2) to explicitly copy
data from kernel address space.
The user process will issue new BPF ioctls to set the shared memory
buffer mode and provide pointers to buffers and their size. The kernel
then wires and maps the pages into kernel address space using sf_buf(9),
which on supporting architectures will use the direct map region. The
current "buffered" access mode remains the default, and support for
zero-copy buffers must, for the time being, be explicitly enabled using
a sysctl for the kernel to accept requests to use it.
The kernel and user process synchronize use of the buffers with atomic
operations, avoiding the need for system calls under load; the user
process may use select()/poll()/kqueue() to manage blocking while
waiting for network data if the user process is able to consume data
faster than the kernel generates it. Patchs to libpcap are available
to allow libpcap applications to transparently take advantage of this
support. Detailed information on the new API may be found in bpf(4),
including specific atomic operations and memory barriers required to
synchronize buffer use safely.
These changes modify the base BPF implementation to (roughly) abstrac
the current buffer model, allowing the new shared memory model to be
added, and add new monitoring statistics for netstat to print. The
implementation, with the exception of some monitoring hanges that break
the netstat monitoring ABI for BPF, will be MFC'd.
Zerocopy bpf buffers are still considered experimental are disabled
by default. To experiment with this new facility, adjust the
net.bpf.zerocopy_enable sysctl variable to 1.
Changes to libpcap will be made available as a patch for the time being,
and further refinements to the implementation are expected.
Sponsored by: Seccuris Inc.
In collaboration with: rwatson
Tested by: pwood, gallatin
MFC after: 4 months [1]
[1] Certain portions will probably not be MFCed, specifically things
that can break the monitoring ABI.
2008-03-24 13:49:17 +00:00
|
|
|
d->bd_bufmode = bd->bd_bufmode;
|
2005-07-24 17:21:17 +00:00
|
|
|
}
|
|
|
|
|
2012-05-21 22:13:48 +00:00
|
|
|
/*
|
|
|
|
* Handle `netstat -B' stats request
|
|
|
|
*/
|
2005-07-24 17:21:17 +00:00
|
|
|
static int
|
|
|
|
bpf_stats_sysctl(SYSCTL_HANDLER_ARGS)
|
|
|
|
{
|
2014-10-28 12:00:39 +00:00
|
|
|
static const struct xbpf_d zerostats;
|
|
|
|
struct xbpf_d *xbdbuf, *xbd, tempstats;
|
2005-07-26 17:21:56 +00:00
|
|
|
int index, error;
|
2005-07-24 17:21:17 +00:00
|
|
|
struct bpf_if *bp;
|
|
|
|
struct bpf_d *bd;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX This is not technically correct. It is possible for non
|
|
|
|
* privileged users to open bpf devices. It would make sense
|
|
|
|
* if the users who opened the devices were able to retrieve
|
|
|
|
* the statistics for them, too.
|
|
|
|
*/
|
2006-11-06 13:42:10 +00:00
|
|
|
error = priv_check(req->td, PRIV_NET_BPF);
|
2005-07-24 17:21:17 +00:00
|
|
|
if (error)
|
|
|
|
return (error);
|
2009-06-19 20:31:44 +00:00
|
|
|
/*
|
|
|
|
* Check to see if the user is requesting that the counters be
|
|
|
|
* zeroed out. Explicitly check that the supplied data is zeroed,
|
|
|
|
* as we aren't allowing the user to set the counters currently.
|
|
|
|
*/
|
|
|
|
if (req->newptr != NULL) {
|
2014-10-28 12:00:39 +00:00
|
|
|
if (req->newlen != sizeof(tempstats))
|
2009-06-19 20:31:44 +00:00
|
|
|
return (EINVAL);
|
2014-10-28 12:00:39 +00:00
|
|
|
memset(&tempstats, 0, sizeof(tempstats));
|
|
|
|
error = SYSCTL_IN(req, &tempstats, sizeof(tempstats));
|
|
|
|
if (error)
|
|
|
|
return (error);
|
|
|
|
if (bcmp(&tempstats, &zerostats, sizeof(tempstats)) != 0)
|
2009-06-19 20:31:44 +00:00
|
|
|
return (EINVAL);
|
|
|
|
bpf_zero_counters();
|
|
|
|
return (0);
|
|
|
|
}
|
2005-07-24 17:21:17 +00:00
|
|
|
if (req->oldptr == NULL)
|
2005-07-26 17:21:56 +00:00
|
|
|
return (SYSCTL_OUT(req, 0, bpf_bpfd_cnt * sizeof(*xbd)));
|
2005-07-24 17:21:17 +00:00
|
|
|
if (bpf_bpfd_cnt == 0)
|
|
|
|
return (SYSCTL_OUT(req, 0, 0));
|
2005-07-26 17:21:56 +00:00
|
|
|
xbdbuf = malloc(req->oldlen, M_BPF, M_WAITOK);
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_LOCK();
|
2005-07-26 17:21:56 +00:00
|
|
|
if (req->oldlen < (bpf_bpfd_cnt * sizeof(*xbd))) {
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_UNLOCK();
|
2005-07-26 17:21:56 +00:00
|
|
|
free(xbdbuf, M_BPF);
|
|
|
|
return (ENOMEM);
|
|
|
|
}
|
|
|
|
index = 0;
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_FOREACH(bp, &bpf_iflist, bif_next) {
|
2012-04-06 06:55:21 +00:00
|
|
|
/* Send writers-only first */
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_FOREACH(bd, &bp->bif_wlist, bd_next) {
|
2012-04-06 06:55:21 +00:00
|
|
|
xbd = &xbdbuf[index++];
|
|
|
|
bpfstats_fill_xbpf(xbd, bd);
|
|
|
|
}
|
2019-05-13 13:45:28 +00:00
|
|
|
CK_LIST_FOREACH(bd, &bp->bif_dlist, bd_next) {
|
2005-07-26 17:21:56 +00:00
|
|
|
xbd = &xbdbuf[index++];
|
|
|
|
bpfstats_fill_xbpf(xbd, bd);
|
2005-07-24 17:21:17 +00:00
|
|
|
}
|
|
|
|
}
|
2012-04-06 06:53:58 +00:00
|
|
|
BPF_UNLOCK();
|
2005-07-26 17:21:56 +00:00
|
|
|
error = SYSCTL_OUT(req, xbdbuf, index * sizeof(*xbd));
|
|
|
|
free(xbdbuf, M_BPF);
|
2005-07-24 17:21:17 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2008-03-16 10:58:09 +00:00
|
|
|
SYSINIT(bpfdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE,bpf_drvinit,NULL);
|
1995-11-29 10:49:16 +00:00
|
|
|
|
2001-01-29 13:26:14 +00:00
|
|
|
#else /* !DEV_BPF && !NETGRAPH_BPF */
|
2018-04-24 17:42:25 +00:00
|
|
|
|
1999-04-28 01:18:13 +00:00
|
|
|
/*
|
|
|
|
* NOP stubs to allow bpf-using drivers to load and function.
|
|
|
|
*
|
|
|
|
* A 'better' implementation would allow the core bpf functionality
|
|
|
|
* to be loaded at runtime.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_tap(struct bpf_if *bp, u_char *pkt, u_int pktlen)
|
1999-04-28 01:18:13 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_mtap(struct bpf_if *bp, struct mbuf *m)
|
1999-04-28 01:18:13 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2003-12-28 03:56:00 +00:00
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_mtap2(struct bpf_if *bp, void *d, u_int l, struct mbuf *m)
|
2003-12-28 03:56:00 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
1999-04-28 01:18:13 +00:00
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpfattach(struct ifnet *ifp, u_int dlt, u_int hdrlen)
|
1999-04-28 01:18:13 +00:00
|
|
|
{
|
2006-06-14 02:23:28 +00:00
|
|
|
|
|
|
|
bpfattach2(ifp, dlt, hdrlen, &ifp->if_bpf);
|
1999-04-28 01:18:13 +00:00
|
|
|
}
|
|
|
|
|
2003-10-04 01:32:28 +00:00
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpfattach2(struct ifnet *ifp, u_int dlt, u_int hdrlen, struct bpf_if **driverp)
|
2003-10-04 01:32:28 +00:00
|
|
|
{
|
2006-06-14 02:23:28 +00:00
|
|
|
|
2018-04-24 17:42:25 +00:00
|
|
|
*driverp = (struct bpf_if *)&dead_bpf_if;
|
2003-10-04 01:32:28 +00:00
|
|
|
}
|
|
|
|
|
2000-04-27 15:11:41 +00:00
|
|
|
void
|
2006-06-15 15:39:12 +00:00
|
|
|
bpfdetach(struct ifnet *ifp)
|
2000-04-27 15:11:41 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
1999-04-28 01:18:13 +00:00
|
|
|
u_int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_filter(const struct bpf_insn *pc, u_char *p, u_int wirelen, u_int buflen)
|
1999-04-28 01:18:13 +00:00
|
|
|
{
|
|
|
|
return -1; /* "no filter" behaviour */
|
|
|
|
}
|
|
|
|
|
2001-01-29 13:26:14 +00:00
|
|
|
int
|
2006-06-15 15:39:12 +00:00
|
|
|
bpf_validate(const struct bpf_insn *f, int len)
|
2001-01-29 13:26:14 +00:00
|
|
|
{
|
|
|
|
return 0; /* false */
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* !DEV_BPF && !NETGRAPH_BPF */
|
2016-04-11 10:00:38 +00:00
|
|
|
|
|
|
|
#ifdef DDB
|
|
|
|
static void
|
|
|
|
bpf_show_bpf_if(struct bpf_if *bpf_if)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (bpf_if == NULL)
|
|
|
|
return;
|
|
|
|
db_printf("%p:\n", bpf_if);
|
|
|
|
#define BPF_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, bpf_if->e);
|
|
|
|
/* bif_ext.bif_next */
|
|
|
|
/* bif_ext.bif_dlist */
|
|
|
|
BPF_DB_PRINTF("%#x", bif_dlt);
|
|
|
|
BPF_DB_PRINTF("%u", bif_hdrlen);
|
|
|
|
/* bif_wlist */
|
2019-05-13 13:45:28 +00:00
|
|
|
BPF_DB_PRINTF("%p", bif_ifp);
|
|
|
|
BPF_DB_PRINTF("%p", bif_bpf);
|
|
|
|
BPF_DB_PRINTF("%u", bif_refcnt);
|
2016-04-11 10:00:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
DB_SHOW_COMMAND(bpf_if, db_show_bpf_if)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (!have_addr) {
|
|
|
|
db_printf("usage: show bpf_if <struct bpf_if *>\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
bpf_show_bpf_if((struct bpf_if *)addr);
|
|
|
|
}
|
|
|
|
#endif
|