freebsd-dev/sys/net/bpf.h
Andrey V. Elsukov 699281b545 Rework locking in BPF code to remove rwlock from fast path.
On high packets rate the contention on rwlock in bpf_*tap*() functions
can lead to packets dropping. To avoid this, migrate this code to use
epoch(9) KPI and ConcurrencyKit's lists.

* all lists changed to use CK_LIST;
* reference counting added to bpf_if and bpf_d;
* now bpf_if references ifnet and releases this reference on destroy;
* each bpf_d descriptor references bpf_if when it is attached;
* new struct bpf_program_buffer introduced to keep BPF filter programs;
* bpf_program_buffer, bpf_d and bpf_if structures are freed by
  epoch_call();
* bpf_freelist and ifnet_departure event are no longer needed, thus
  both are removed;

Reviewed by:	melifaro
Sponsored by:	Yandex LLC
Differential Revision:	https://reviews.freebsd.org/D20224
2019-05-13 13:45:28 +00:00

477 lines
15 KiB
C

/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1990, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from the Stanford/CMU enet packet filter,
* (net/enet.c) distributed as part of 4.3BSD, and code contributed
* to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
* Berkeley Laboratory.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)bpf.h 8.1 (Berkeley) 6/10/93
* @(#)bpf.h 1.34 (LBL) 6/16/96
*
* $FreeBSD$
*/
#ifndef _NET_BPF_H_
#define _NET_BPF_H_
#include <sys/ck.h>
#include <net/dlt.h>
/* BSD style release date */
#define BPF_RELEASE 199606
typedef int32_t bpf_int32;
typedef u_int32_t bpf_u_int32;
typedef int64_t bpf_int64;
typedef u_int64_t bpf_u_int64;
/*
* Alignment macros. BPF_WORDALIGN rounds up to the next
* even multiple of BPF_ALIGNMENT.
*/
#define BPF_ALIGNMENT sizeof(long)
#define BPF_WORDALIGN(x) (((x)+(BPF_ALIGNMENT-1))&~(BPF_ALIGNMENT-1))
#define BPF_MAXINSNS 512
#define BPF_MAXBUFSIZE 0x80000
#define BPF_MINBUFSIZE 32
/*
* Structure for BIOCSETF.
*/
struct bpf_program {
u_int bf_len;
struct bpf_insn *bf_insns;
};
/*
* Struct returned by BIOCGSTATS.
*/
struct bpf_stat {
u_int bs_recv; /* number of packets received */
u_int bs_drop; /* number of packets dropped */
};
/*
* Struct return by BIOCVERSION. This represents the version number of
* the filter language described by the instruction encodings below.
* bpf understands a program iff kernel_major == filter_major &&
* kernel_minor >= filter_minor, that is, if the value returned by the
* running kernel has the same major number and a minor number equal
* equal to or less than the filter being downloaded. Otherwise, the
* results are undefined, meaning an error may be returned or packets
* may be accepted haphazardly.
* It has nothing to do with the source code version.
*/
struct bpf_version {
u_short bv_major;
u_short bv_minor;
};
/* Current version number of filter architecture. */
#define BPF_MAJOR_VERSION 1
#define BPF_MINOR_VERSION 1
/*
* Historically, BPF has supported a single buffering model, first using mbuf
* clusters in kernel, and later using malloc(9) buffers in kernel. We now
* support multiple buffering modes, which may be queried and set using
* BIOCGETBUFMODE and BIOCSETBUFMODE. So as to avoid handling the complexity
* of changing modes while sniffing packets, the mode becomes fixed once an
* interface has been attached to the BPF descriptor.
*/
#define BPF_BUFMODE_BUFFER 1 /* Kernel buffers with read(). */
#define BPF_BUFMODE_ZBUF 2 /* Zero-copy buffers. */
/*-
* Struct used by BIOCSETZBUF, BIOCROTZBUF: describes up to two zero-copy
* buffer as used by BPF.
*/
struct bpf_zbuf {
void *bz_bufa; /* Location of 'a' zero-copy buffer. */
void *bz_bufb; /* Location of 'b' zero-copy buffer. */
size_t bz_buflen; /* Size of zero-copy buffers. */
};
#define BIOCGBLEN _IOR('B', 102, u_int)
#define BIOCSBLEN _IOWR('B', 102, u_int)
#define BIOCSETF _IOW('B', 103, struct bpf_program)
#define BIOCFLUSH _IO('B', 104)
#define BIOCPROMISC _IO('B', 105)
#define BIOCGDLT _IOR('B', 106, u_int)
#define BIOCGETIF _IOR('B', 107, struct ifreq)
#define BIOCSETIF _IOW('B', 108, struct ifreq)
#define BIOCSRTIMEOUT _IOW('B', 109, struct timeval)
#define BIOCGRTIMEOUT _IOR('B', 110, struct timeval)
#define BIOCGSTATS _IOR('B', 111, struct bpf_stat)
#define BIOCIMMEDIATE _IOW('B', 112, u_int)
#define BIOCVERSION _IOR('B', 113, struct bpf_version)
#define BIOCGRSIG _IOR('B', 114, u_int)
#define BIOCSRSIG _IOW('B', 115, u_int)
#define BIOCGHDRCMPLT _IOR('B', 116, u_int)
#define BIOCSHDRCMPLT _IOW('B', 117, u_int)
#define BIOCGDIRECTION _IOR('B', 118, u_int)
#define BIOCSDIRECTION _IOW('B', 119, u_int)
#define BIOCSDLT _IOW('B', 120, u_int)
#define BIOCGDLTLIST _IOWR('B', 121, struct bpf_dltlist)
#define BIOCLOCK _IO('B', 122)
#define BIOCSETWF _IOW('B', 123, struct bpf_program)
#define BIOCFEEDBACK _IOW('B', 124, u_int)
#define BIOCGETBUFMODE _IOR('B', 125, u_int)
#define BIOCSETBUFMODE _IOW('B', 126, u_int)
#define BIOCGETZMAX _IOR('B', 127, size_t)
#define BIOCROTZBUF _IOR('B', 128, struct bpf_zbuf)
#define BIOCSETZBUF _IOW('B', 129, struct bpf_zbuf)
#define BIOCSETFNR _IOW('B', 130, struct bpf_program)
#define BIOCGTSTAMP _IOR('B', 131, u_int)
#define BIOCSTSTAMP _IOW('B', 132, u_int)
/* Obsolete */
#define BIOCGSEESENT BIOCGDIRECTION
#define BIOCSSEESENT BIOCSDIRECTION
/* Packet directions */
enum bpf_direction {
BPF_D_IN, /* See incoming packets */
BPF_D_INOUT, /* See incoming and outgoing packets */
BPF_D_OUT /* See outgoing packets */
};
/* Time stamping functions */
#define BPF_T_MICROTIME 0x0000
#define BPF_T_NANOTIME 0x0001
#define BPF_T_BINTIME 0x0002
#define BPF_T_NONE 0x0003
#define BPF_T_FORMAT_MASK 0x0003
#define BPF_T_NORMAL 0x0000
#define BPF_T_FAST 0x0100
#define BPF_T_MONOTONIC 0x0200
#define BPF_T_MONOTONIC_FAST (BPF_T_FAST | BPF_T_MONOTONIC)
#define BPF_T_FLAG_MASK 0x0300
#define BPF_T_FORMAT(t) ((t) & BPF_T_FORMAT_MASK)
#define BPF_T_FLAG(t) ((t) & BPF_T_FLAG_MASK)
#define BPF_T_VALID(t) \
((t) == BPF_T_NONE || (BPF_T_FORMAT(t) != BPF_T_NONE && \
((t) & ~(BPF_T_FORMAT_MASK | BPF_T_FLAG_MASK)) == 0))
#define BPF_T_MICROTIME_FAST (BPF_T_MICROTIME | BPF_T_FAST)
#define BPF_T_NANOTIME_FAST (BPF_T_NANOTIME | BPF_T_FAST)
#define BPF_T_BINTIME_FAST (BPF_T_BINTIME | BPF_T_FAST)
#define BPF_T_MICROTIME_MONOTONIC (BPF_T_MICROTIME | BPF_T_MONOTONIC)
#define BPF_T_NANOTIME_MONOTONIC (BPF_T_NANOTIME | BPF_T_MONOTONIC)
#define BPF_T_BINTIME_MONOTONIC (BPF_T_BINTIME | BPF_T_MONOTONIC)
#define BPF_T_MICROTIME_MONOTONIC_FAST (BPF_T_MICROTIME | BPF_T_MONOTONIC_FAST)
#define BPF_T_NANOTIME_MONOTONIC_FAST (BPF_T_NANOTIME | BPF_T_MONOTONIC_FAST)
#define BPF_T_BINTIME_MONOTONIC_FAST (BPF_T_BINTIME | BPF_T_MONOTONIC_FAST)
/*
* Structure prepended to each packet.
*/
struct bpf_ts {
bpf_int64 bt_sec; /* seconds */
bpf_u_int64 bt_frac; /* fraction */
};
struct bpf_xhdr {
struct bpf_ts bh_tstamp; /* time stamp */
bpf_u_int32 bh_caplen; /* length of captured portion */
bpf_u_int32 bh_datalen; /* original length of packet */
u_short bh_hdrlen; /* length of bpf header (this struct
plus alignment padding) */
};
/* Obsolete */
struct bpf_hdr {
struct timeval bh_tstamp; /* time stamp */
bpf_u_int32 bh_caplen; /* length of captured portion */
bpf_u_int32 bh_datalen; /* original length of packet */
u_short bh_hdrlen; /* length of bpf header (this struct
plus alignment padding) */
};
#ifdef _KERNEL
#define MTAG_BPF 0x627066
#define MTAG_BPF_TIMESTAMP 0
#endif
/*
* When using zero-copy BPF buffers, a shared memory header is present
* allowing the kernel BPF implementation and user process to synchronize
* without using system calls. This structure defines that header. When
* accessing these fields, appropriate atomic operation and memory barriers
* are required in order not to see stale or out-of-order data; see bpf(4)
* for reference code to access these fields from userspace.
*
* The layout of this structure is critical, and must not be changed; if must
* fit in a single page on all architectures.
*/
struct bpf_zbuf_header {
volatile u_int bzh_kernel_gen; /* Kernel generation number. */
volatile u_int bzh_kernel_len; /* Length of data in the buffer. */
volatile u_int bzh_user_gen; /* User generation number. */
u_int _bzh_pad[5];
};
/*
* The instruction encodings.
*
* Please inform tcpdump-workers@lists.tcpdump.org if you use any
* of the reserved values, so that we can note that they're used
* (and perhaps implement it in the reference BPF implementation
* and encourage its implementation elsewhere).
*/
/*
* The upper 8 bits of the opcode aren't used. BSD/OS used 0x8000.
*/
/* instruction classes */
#define BPF_CLASS(code) ((code) & 0x07)
#define BPF_LD 0x00
#define BPF_LDX 0x01
#define BPF_ST 0x02
#define BPF_STX 0x03
#define BPF_ALU 0x04
#define BPF_JMP 0x05
#define BPF_RET 0x06
#define BPF_MISC 0x07
/* ld/ldx fields */
#define BPF_SIZE(code) ((code) & 0x18)
#define BPF_W 0x00
#define BPF_H 0x08
#define BPF_B 0x10
/* 0x18 reserved; used by BSD/OS */
#define BPF_MODE(code) ((code) & 0xe0)
#define BPF_IMM 0x00
#define BPF_ABS 0x20
#define BPF_IND 0x40
#define BPF_MEM 0x60
#define BPF_LEN 0x80
#define BPF_MSH 0xa0
/* 0xc0 reserved; used by BSD/OS */
/* 0xe0 reserved; used by BSD/OS */
/* alu/jmp fields */
#define BPF_OP(code) ((code) & 0xf0)
#define BPF_ADD 0x00
#define BPF_SUB 0x10
#define BPF_MUL 0x20
#define BPF_DIV 0x30
#define BPF_OR 0x40
#define BPF_AND 0x50
#define BPF_LSH 0x60
#define BPF_RSH 0x70
#define BPF_NEG 0x80
#define BPF_MOD 0x90
#define BPF_XOR 0xa0
/* 0xb0 reserved */
/* 0xc0 reserved */
/* 0xd0 reserved */
/* 0xe0 reserved */
/* 0xf0 reserved */
#define BPF_JA 0x00
#define BPF_JEQ 0x10
#define BPF_JGT 0x20
#define BPF_JGE 0x30
#define BPF_JSET 0x40
/* 0x50 reserved; used on BSD/OS */
/* 0x60 reserved */
/* 0x70 reserved */
/* 0x80 reserved */
/* 0x90 reserved */
/* 0xa0 reserved */
/* 0xb0 reserved */
/* 0xc0 reserved */
/* 0xd0 reserved */
/* 0xe0 reserved */
/* 0xf0 reserved */
#define BPF_SRC(code) ((code) & 0x08)
#define BPF_K 0x00
#define BPF_X 0x08
/* ret - BPF_K and BPF_X also apply */
#define BPF_RVAL(code) ((code) & 0x18)
#define BPF_A 0x10
/* 0x18 reserved */
/* misc */
#define BPF_MISCOP(code) ((code) & 0xf8)
#define BPF_TAX 0x00
/* 0x08 reserved */
/* 0x10 reserved */
/* 0x18 reserved */
/* #define BPF_COP 0x20 NetBSD "coprocessor" extensions */
/* 0x28 reserved */
/* 0x30 reserved */
/* 0x38 reserved */
/* #define BPF_COPX 0x40 NetBSD "coprocessor" extensions */
/* also used on BSD/OS */
/* 0x48 reserved */
/* 0x50 reserved */
/* 0x58 reserved */
/* 0x60 reserved */
/* 0x68 reserved */
/* 0x70 reserved */
/* 0x78 reserved */
#define BPF_TXA 0x80
/* 0x88 reserved */
/* 0x90 reserved */
/* 0x98 reserved */
/* 0xa0 reserved */
/* 0xa8 reserved */
/* 0xb0 reserved */
/* 0xb8 reserved */
/* 0xc0 reserved; used on BSD/OS */
/* 0xc8 reserved */
/* 0xd0 reserved */
/* 0xd8 reserved */
/* 0xe0 reserved */
/* 0xe8 reserved */
/* 0xf0 reserved */
/* 0xf8 reserved */
/*
* The instruction data structure.
*/
struct bpf_insn {
u_short code;
u_char jt;
u_char jf;
bpf_u_int32 k;
};
/*
* Macros for insn array initializers.
*/
#define BPF_STMT(code, k) { (u_short)(code), 0, 0, k }
#define BPF_JUMP(code, k, jt, jf) { (u_short)(code), jt, jf, k }
/*
* Structure to retrieve available DLTs for the interface.
*/
struct bpf_dltlist {
u_int bfl_len; /* number of bfd_list array */
u_int *bfl_list; /* array of DLTs */
};
#ifdef _KERNEL
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_BPF);
#endif
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_bpf);
#endif
/*
* Rotate the packet buffers in descriptor d. Move the store buffer into the
* hold slot, and the free buffer into the store slot. Zero the length of the
* new store buffer. Descriptor lock should be held. One must be careful to
* not rotate the buffers twice, i.e. if fbuf != NULL.
*/
#define ROTATE_BUFFERS(d) do { \
(d)->bd_hbuf = (d)->bd_sbuf; \
(d)->bd_hlen = (d)->bd_slen; \
(d)->bd_sbuf = (d)->bd_fbuf; \
(d)->bd_slen = 0; \
(d)->bd_fbuf = NULL; \
bpf_bufheld(d); \
} while (0)
/*
* Descriptor associated with each attached hardware interface.
* Part of this structure is exposed to external callers to speed up
* bpf_peers_present() calls.
*/
struct bpf_if;
CK_LIST_HEAD(bpfd_list, bpf_d);
struct bpf_if_ext {
CK_LIST_ENTRY(bpf_if) bif_next; /* list of all interfaces */
struct bpfd_list bif_dlist; /* descriptor list */
};
void bpf_bufheld(struct bpf_d *d);
int bpf_validate(const struct bpf_insn *, int);
void bpf_tap(struct bpf_if *, u_char *, u_int);
void bpf_mtap(struct bpf_if *, struct mbuf *);
void bpf_mtap2(struct bpf_if *, void *, u_int, struct mbuf *);
void bpfattach(struct ifnet *, u_int, u_int);
void bpfattach2(struct ifnet *, u_int, u_int, struct bpf_if **);
void bpfdetach(struct ifnet *);
#ifdef VIMAGE
int bpf_get_bp_params(struct bpf_if *, u_int *, u_int *);
#endif
void bpfilterattach(int);
u_int bpf_filter(const struct bpf_insn *, u_char *, u_int, u_int);
static __inline int
bpf_peers_present(struct bpf_if *bpf)
{
struct bpf_if_ext *ext;
ext = (struct bpf_if_ext *)bpf;
if (!CK_LIST_EMPTY(&ext->bif_dlist))
return (1);
return (0);
}
#define BPF_TAP(_ifp,_pkt,_pktlen) do { \
if (bpf_peers_present((_ifp)->if_bpf)) \
bpf_tap((_ifp)->if_bpf, (_pkt), (_pktlen)); \
} while (0)
#define BPF_MTAP(_ifp,_m) do { \
if (bpf_peers_present((_ifp)->if_bpf)) { \
M_ASSERTVALID(_m); \
bpf_mtap((_ifp)->if_bpf, (_m)); \
} \
} while (0)
#define BPF_MTAP2(_ifp,_data,_dlen,_m) do { \
if (bpf_peers_present((_ifp)->if_bpf)) { \
M_ASSERTVALID(_m); \
bpf_mtap2((_ifp)->if_bpf,(_data),(_dlen),(_m)); \
} \
} while (0)
#endif
/*
* Number of scratch memory words (for BPF_LD|BPF_MEM and BPF_ST).
*/
#define BPF_MEMWORDS 16
#ifdef _SYS_EVENTHANDLER_H_
/* BPF attach/detach events */
struct ifnet;
typedef void (*bpf_track_fn)(void *, struct ifnet *, int /* dlt */,
int /* 1 =>'s attach */);
EVENTHANDLER_DECLARE(bpf_track, bpf_track_fn);
#endif /* _SYS_EVENTHANDLER_H_ */
#endif /* _NET_BPF_H_ */