1994-05-24 10:09:53 +00:00
|
|
|
/*-
|
|
|
|
* Copyright (c) 1982, 1986, 1990, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
1996-03-11 02:22:23 +00:00
|
|
|
* @(#)socketvar.h 8.3 (Berkeley) 2/19/95
|
2008-07-29 07:45:05 +00:00
|
|
|
*
|
1999-08-28 01:08:13 +00:00
|
|
|
* $FreeBSD$
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
|
1994-08-21 04:42:17 +00:00
|
|
|
#ifndef _SYS_SOCKETVAR_H_
|
|
|
|
#define _SYS_SOCKETVAR_H_
|
|
|
|
|
1996-05-01 01:53:59 +00:00
|
|
|
#include <sys/queue.h> /* for TAILQ macros */
|
2001-01-09 04:33:49 +00:00
|
|
|
#include <sys/selinfo.h> /* for struct selinfo */
|
2004-06-12 16:08:41 +00:00
|
|
|
#include <sys/_lock.h>
|
|
|
|
#include <sys/_mutex.h>
|
2007-05-03 14:42:42 +00:00
|
|
|
#include <sys/_sx.h>
|
2008-07-29 07:45:05 +00:00
|
|
|
#include <sys/sockbuf.h>
|
|
|
|
#include <sys/sockstate.h>
|
|
|
|
#ifdef _KERNEL
|
|
|
|
#include <sys/sockopt.h>
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
|
Permit buiding kernels with options VIMAGE, restricted to only a single
active network stack instance. Turning on options VIMAGE at compile
time yields the following changes relative to default kernel build:
1) V_ accessor macros for virtualized variables resolve to structure
fields via base pointers, instead of being resolved as fields in global
structs or plain global variables. As an example, V_ifnet becomes:
options VIMAGE: ((struct vnet_net *) vnet_net)->_ifnet
default build: vnet_net_0._ifnet
options VIMAGE_GLOBALS: ifnet
2) INIT_VNET_* macros will declare and set up base pointers to be used
by V_ accessor macros, instead of resolving to whitespace:
INIT_VNET_NET(ifp->if_vnet); becomes
struct vnet_net *vnet_net = (ifp->if_vnet)->mod_data[VNET_MOD_NET];
3) Memory for vnet modules registered via vnet_mod_register() is now
allocated at run time in sys/kern/kern_vimage.c, instead of per vnet
module structs being declared as globals. If required, vnet modules
can now request the framework to provide them with allocated bzeroed
memory by filling in the vmi_size field in their vmi_modinfo structures.
4) structs socket, ifnet, inpcbinfo, tcpcb and syncache_head are
extended to hold a pointer to the parent vnet. options VIMAGE builds
will fill in those fields as required.
5) curvnet is introduced as a new global variable in options VIMAGE
builds, always pointing to the default and only struct vnet.
6) struct sysctl_oid has been extended with additional two fields to
store major and minor virtualization module identifiers, oid_v_subs and
oid_v_mod. SYSCTL_V_* family of macros will fill in those fields
accordingly, and store the offset in the appropriate vnet container
struct in oid_arg1.
In sysctl handlers dealing with virtualized sysctls, the
SYSCTL_RESOLVE_V_ARG1() macro will compute the address of the target
variable and make it available in arg1 variable for further processing.
Unused fields in structs vnet_inet, vnet_inet6 and vnet_ipfw have
been deleted.
Reviewed by: bz, rwatson
Approved by: julian (mentor)
2009-04-30 13:36:26 +00:00
|
|
|
struct vnet;
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Kernel structure per socket.
|
|
|
|
* Contains send and receive buffer queues,
|
|
|
|
* handle on protocol and pointer to protocol
|
|
|
|
* private data and error information.
|
|
|
|
*/
|
1998-05-15 20:11:40 +00:00
|
|
|
typedef u_quad_t so_gen_t;
|
|
|
|
|
2009-06-01 21:17:03 +00:00
|
|
|
struct socket;
|
|
|
|
|
2010-07-18 20:57:53 +00:00
|
|
|
/*-
|
2004-06-01 19:33:06 +00:00
|
|
|
* Locking key to struct socket:
|
|
|
|
* (a) constant after allocation, no locking required.
|
|
|
|
* (b) locked by SOCK_LOCK(so).
|
|
|
|
* (c) locked by SOCKBUF_LOCK(&so->so_rcv).
|
|
|
|
* (d) locked by SOCKBUF_LOCK(&so->so_snd).
|
|
|
|
* (e) locked by ACCEPT_LOCK().
|
|
|
|
* (f) not locked since integer reads/writes are atomic.
|
|
|
|
* (g) used only as a sleep/wakeup address, no value.
|
2004-06-27 03:23:09 +00:00
|
|
|
* (h) locked by global mutex so_global_mtx.
|
2002-04-27 08:24:29 +00:00
|
|
|
*/
|
1994-05-24 10:09:53 +00:00
|
|
|
struct socket {
|
2004-06-12 20:47:32 +00:00
|
|
|
int so_count; /* (b) reference count */
|
2004-06-01 19:33:06 +00:00
|
|
|
short so_type; /* (a) generic type, see socket.h */
|
2002-05-31 11:52:35 +00:00
|
|
|
short so_options; /* from socket call, see socket.h */
|
|
|
|
short so_linger; /* time to linger while closing */
|
2004-06-20 21:39:46 +00:00
|
|
|
short so_state; /* (b) internal state flags SS_* */
|
Integrate accept locking from rwatson_netperf, introducing a new
global mutex, accept_mtx, which serializes access to the following
fields across all sockets:
so_qlen so_incqlen so_qstate
so_comp so_incomp so_list
so_head
While providing only coarse granularity, this approach avoids lock
order issues between sockets by avoiding ownership of the fields
by a specific socket and its per-socket mutexes.
While here, rewrite soclose(), sofree(), soaccept(), and
sonewconn() to add assertions, close additional races and address
lock order concerns. In particular:
- Reorganize the optimistic concurrency behavior in accept1() to
always allocate a file descriptor with falloc() so that if we do
find a socket, we don't have to encounter the "Oh, there wasn't
a socket" race that can occur if falloc() sleeps in the current
code, which broke inbound accept() ordering, not to mention
requiring backing out socket state changes in a way that raced
with the protocol level. We may want to add a lockless read of
the queue state if polling of empty queues proves to be important
to optimize.
- In accept1(), soref() the socket while holding the accept lock
so that the socket cannot be free'd in a race with the protocol
layer. Likewise in netgraph equivilents of the accept1() code.
- In sonewconn(), loop waiting for the queue to be small enough to
insert our new socket once we've committed to inserting it, or
races can occur that cause the incomplete socket queue to
overfill. In the previously implementation, it was sufficient
to simply tested once since calling soabort() didn't release
synchronization permitting another thread to insert a socket as
we discard a previous one.
- In soclose()/sofree()/et al, it is the responsibility of the
caller to remove a socket from the incomplete connection queue
before calling soabort(), which prevents soabort() from having
to walk into the accept socket to release the socket from its
queue, and avoids races when releasing the accept mutex to enter
soabort(), permitting soabort() to avoid lock ordering issues
with the caller.
- Generally cluster accept queue related operations together
throughout these functions in order to facilitate locking.
Annotate new locking in socketvar.h.
2004-06-02 04:15:39 +00:00
|
|
|
int so_qstate; /* (e) internal state flags SQ_* */
|
2002-06-28 23:17:08 +00:00
|
|
|
void *so_pcb; /* protocol control block */
|
Permit buiding kernels with options VIMAGE, restricted to only a single
active network stack instance. Turning on options VIMAGE at compile
time yields the following changes relative to default kernel build:
1) V_ accessor macros for virtualized variables resolve to structure
fields via base pointers, instead of being resolved as fields in global
structs or plain global variables. As an example, V_ifnet becomes:
options VIMAGE: ((struct vnet_net *) vnet_net)->_ifnet
default build: vnet_net_0._ifnet
options VIMAGE_GLOBALS: ifnet
2) INIT_VNET_* macros will declare and set up base pointers to be used
by V_ accessor macros, instead of resolving to whitespace:
INIT_VNET_NET(ifp->if_vnet); becomes
struct vnet_net *vnet_net = (ifp->if_vnet)->mod_data[VNET_MOD_NET];
3) Memory for vnet modules registered via vnet_mod_register() is now
allocated at run time in sys/kern/kern_vimage.c, instead of per vnet
module structs being declared as globals. If required, vnet modules
can now request the framework to provide them with allocated bzeroed
memory by filling in the vmi_size field in their vmi_modinfo structures.
4) structs socket, ifnet, inpcbinfo, tcpcb and syncache_head are
extended to hold a pointer to the parent vnet. options VIMAGE builds
will fill in those fields as required.
5) curvnet is introduced as a new global variable in options VIMAGE
builds, always pointing to the default and only struct vnet.
6) struct sysctl_oid has been extended with additional two fields to
store major and minor virtualization module identifiers, oid_v_subs and
oid_v_mod. SYSCTL_V_* family of macros will fill in those fields
accordingly, and store the offset in the appropriate vnet container
struct in oid_arg1.
In sysctl handlers dealing with virtualized sysctls, the
SYSCTL_RESOLVE_V_ARG1() macro will compute the address of the target
variable and make it available in arg1 variable for further processing.
Unused fields in structs vnet_inet, vnet_inet6 and vnet_ipfw have
been deleted.
Reviewed by: bz, rwatson
Approved by: julian (mentor)
2009-04-30 13:36:26 +00:00
|
|
|
struct vnet *so_vnet; /* network stack instance */
|
2004-06-01 19:33:06 +00:00
|
|
|
struct protosw *so_proto; /* (a) protocol handle */
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
1996-01-30 23:02:38 +00:00
|
|
|
* Variables for connection queuing.
|
1994-05-24 10:09:53 +00:00
|
|
|
* Socket where accepts occur is so_head in all subsidiary sockets.
|
|
|
|
* If so_head is 0, socket is not related to an accept.
|
2001-06-28 04:39:49 +00:00
|
|
|
* For head socket so_incomp queues partially completed connections,
|
|
|
|
* while so_comp is a queue of connections ready to be accepted.
|
1994-05-24 10:09:53 +00:00
|
|
|
* If a connection is aborted and it has so_head set, then
|
2001-06-28 04:39:49 +00:00
|
|
|
* it has to be pulled out of either so_incomp or so_comp.
|
1994-05-24 10:09:53 +00:00
|
|
|
* We allow connections to queue up based on current queue lengths
|
|
|
|
* and limit on number of queued connections for this socket.
|
|
|
|
*/
|
2006-07-24 01:02:07 +00:00
|
|
|
struct socket *so_head; /* (e) back pointer to listen socket */
|
Integrate accept locking from rwatson_netperf, introducing a new
global mutex, accept_mtx, which serializes access to the following
fields across all sockets:
so_qlen so_incqlen so_qstate
so_comp so_incomp so_list
so_head
While providing only coarse granularity, this approach avoids lock
order issues between sockets by avoiding ownership of the fields
by a specific socket and its per-socket mutexes.
While here, rewrite soclose(), sofree(), soaccept(), and
sonewconn() to add assertions, close additional races and address
lock order concerns. In particular:
- Reorganize the optimistic concurrency behavior in accept1() to
always allocate a file descriptor with falloc() so that if we do
find a socket, we don't have to encounter the "Oh, there wasn't
a socket" race that can occur if falloc() sleeps in the current
code, which broke inbound accept() ordering, not to mention
requiring backing out socket state changes in a way that raced
with the protocol level. We may want to add a lockless read of
the queue state if polling of empty queues proves to be important
to optimize.
- In accept1(), soref() the socket while holding the accept lock
so that the socket cannot be free'd in a race with the protocol
layer. Likewise in netgraph equivilents of the accept1() code.
- In sonewconn(), loop waiting for the queue to be small enough to
insert our new socket once we've committed to inserting it, or
races can occur that cause the incomplete socket queue to
overfill. In the previously implementation, it was sufficient
to simply tested once since calling soabort() didn't release
synchronization permitting another thread to insert a socket as
we discard a previous one.
- In soclose()/sofree()/et al, it is the responsibility of the
caller to remove a socket from the incomplete connection queue
before calling soabort(), which prevents soabort() from having
to walk into the accept socket to release the socket from its
queue, and avoids races when releasing the accept mutex to enter
soabort(), permitting soabort() to avoid lock ordering issues
with the caller.
- Generally cluster accept queue related operations together
throughout these functions in order to facilitate locking.
Annotate new locking in socketvar.h.
2004-06-02 04:15:39 +00:00
|
|
|
TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */
|
|
|
|
TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */
|
|
|
|
TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */
|
2005-01-24 12:20:21 +00:00
|
|
|
u_short so_qlen; /* (e) number of unaccepted connections */
|
|
|
|
u_short so_incqlen; /* (e) number of unaccepted incomplete
|
1996-10-07 04:32:42 +00:00
|
|
|
connections */
|
2005-01-24 12:20:21 +00:00
|
|
|
u_short so_qlimit; /* (e) max number queued connections */
|
2004-06-15 13:43:11 +00:00
|
|
|
short so_timeo; /* (g) connection timeout */
|
2004-06-24 04:27:10 +00:00
|
|
|
u_short so_error; /* (f) error affecting connection */
|
2002-05-31 11:52:35 +00:00
|
|
|
struct sigio *so_sigio; /* [sg] information for async I/O or
|
1998-11-11 10:56:07 +00:00
|
|
|
out of band data (SIGURG) */
|
2004-06-24 02:57:12 +00:00
|
|
|
u_long so_oobmark; /* (c) chars to oob mark */
|
2002-05-31 11:52:35 +00:00
|
|
|
TAILQ_HEAD(, aiocblist) so_aiojobq; /* AIO ops waiting on socket */
|
2008-07-29 07:45:05 +00:00
|
|
|
|
|
|
|
struct sockbuf so_rcv, so_snd;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2004-06-15 13:43:11 +00:00
|
|
|
struct ucred *so_cred; /* (a) user credentials */
|
2004-06-13 02:50:07 +00:00
|
|
|
struct label *so_label; /* (b) MAC label for socket */
|
|
|
|
struct label *so_peerlabel; /* (b) cached MAC label for peer */
|
2006-07-24 01:05:36 +00:00
|
|
|
/* NB: generation count must not be first. */
|
2004-06-27 03:23:09 +00:00
|
|
|
so_gen_t so_gencnt; /* (h) generation count */
|
2005-07-09 12:24:40 +00:00
|
|
|
void *so_emuldata; /* (b) private data for emulators */
|
2002-05-31 11:52:35 +00:00
|
|
|
struct so_accf {
|
|
|
|
struct accept_filter *so_accept_filter;
|
|
|
|
void *so_accept_filter_arg; /* saved filter args */
|
|
|
|
char *so_accept_filter_str; /* saved user args */
|
|
|
|
} *so_accf;
|
2010-11-12 13:02:26 +00:00
|
|
|
/*
|
|
|
|
* so_fibnum, so_user_cookie and friends can be used to attach
|
|
|
|
* some user-specified metadata to a socket, which then can be
|
|
|
|
* used by the kernel for various actions.
|
|
|
|
* so_user_cookie is used by ipfw/dummynet.
|
|
|
|
*/
|
Add code to allow the system to handle multiple routing tables.
This particular implementation is designed to be fully backwards compatible
and to be MFC-able to 7.x (and 6.x)
Currently the only protocol that can make use of the multiple tables is IPv4
Similar functionality exists in OpenBSD and Linux.
From my notes:
-----
One thing where FreeBSD has been falling behind, and which by chance I
have some time to work on is "policy based routing", which allows
different
packet streams to be routed by more than just the destination address.
Constraints:
------------
I want to make some form of this available in the 6.x tree
(and by extension 7.x) , but FreeBSD in general needs it so I might as
well do it in -current and back port the portions I need.
One of the ways that this can be done is to have the ability to
instantiate multiple kernel routing tables (which I will now
refer to as "Forwarding Information Bases" or "FIBs" for political
correctness reasons). Which FIB a particular packet uses to make
the next hop decision can be decided by a number of mechanisms.
The policies these mechanisms implement are the "Policies" referred
to in "Policy based routing".
One of the constraints I have if I try to back port this work to
6.x is that it must be implemented as a EXTENSION to the existing
ABIs in 6.x so that third party applications do not need to be
recompiled in timespan of the branch.
This first version will not have some of the bells and whistles that
will come with later versions. It will, for example, be limited to 16
tables in the first commit.
Implementation method, Compatible version. (part 1)
-------------------------------
For this reason I have implemented a "sufficient subset" of a
multiple routing table solution in Perforce, and back-ported it
to 6.x. (also in Perforce though not always caught up with what I
have done in -current/P4). The subset allows a number of FIBs
to be defined at compile time (8 is sufficient for my purposes in 6.x)
and implements the changes needed to allow IPV4 to use them. I have not
done the changes for ipv6 simply because I do not need it, and I do not
have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it.
Other protocol families are left untouched and should there be
users with proprietary protocol families, they should continue to work
and be oblivious to the existence of the extra FIBs.
To understand how this is done, one must know that the current FIB
code starts everything off with a single dimensional array of
pointers to FIB head structures (One per protocol family), each of
which in turn points to the trie of routes available to that family.
The basic change in the ABI compatible version of the change is to
extent that array to be a 2 dimensional array, so that
instead of protocol family X looking at rt_tables[X] for the
table it needs, it looks at rt_tables[Y][X] when for all
protocol families except ipv4 Y is always 0.
Code that is unaware of the change always just sees the first row
of the table, which of course looks just like the one dimensional
array that existed before.
The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign()
are all maintained, but refer only to the first row of the array,
so that existing callers in proprietary protocols can continue to
do the "right thing".
Some new entry points are added, for the exclusive use of ipv4 code
called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(),
which have an extra argument which refers the code to the correct row.
In addition, there are some new entry points (currently called
rtalloc_fib() and friends) that check the Address family being
looked up and call either rtalloc() (and friends) if the protocol
is not IPv4 forcing the action to row 0 or to the appropriate row
if it IS IPv4 (and that info is available). These are for calling
from code that is not specific to any particular protocol. The way
these are implemented would change in the non ABI preserving code
to be added later.
One feature of the first version of the code is that for ipv4,
the interface routes show up automatically on all the FIBs, so
that no matter what FIB you select you always have the basic
direct attached hosts available to you. (rtinit() does this
automatically).
You CAN delete an interface route from one FIB should you want
to but by default it's there. ARP information is also available
in each FIB. It's assumed that the same machine would have the
same MAC address, regardless of which FIB you are using to get
to it.
This brings us as to how the correct FIB is selected for an outgoing
IPV4 packet.
Firstly, all packets have a FIB associated with them. if nothing
has been done to change it, it will be FIB 0. The FIB is changed
in the following ways.
Packets fall into one of a number of classes.
1/ locally generated packets, coming from a socket/PCB.
Such packets select a FIB from a number associated with the
socket/PCB. This in turn is inherited from the process,
but can be changed by a socket option. The process in turn
inherits it on fork. I have written a utility call setfib
that acts a bit like nice..
setfib -3 ping target.example.com # will use fib 3 for ping.
It is an obvious extension to make it a property of a jail
but I have not done so. It can be achieved by combining the setfib and
jail commands.
2/ packets received on an interface for forwarding.
By default these packets would use table 0,
(or possibly a number settable in a sysctl(not yet)).
but prior to routing the firewall can inspect them (see below).
(possibly in the future you may be able to associate a FIB
with packets received on an interface.. An ifconfig arg, but not yet.)
3/ packets inspected by a packet classifier, which can arbitrarily
associate a fib with it on a packet by packet basis.
A fib assigned to a packet by a packet classifier
(such as ipfw) would over-ride a fib associated by
a more default source. (such as cases 1 or 2).
4/ a tcp listen socket associated with a fib will generate
accept sockets that are associated with that same fib.
5/ Packets generated in response to some other packet (e.g. reset
or icmp packets). These should use the FIB associated with the
packet being reponded to.
6/ Packets generated during encapsulation.
gif, tun and other tunnel interfaces will encapsulate using the FIB
that was in effect withthe proces that set up the tunnel.
thus setfib 1 ifconfig gif0 [tunnel instructions]
will set the fib for the tunnel to use to be fib 1.
Routing messages would be associated with their
process, and thus select one FIB or another.
messages from the kernel would be associated with the fib they
refer to and would only be received by a routing socket associated
with that fib. (not yet implemented)
In addition Netstat has been edited to be able to cope with the
fact that the array is now 2 dimensional. (It looks in system
memory using libkvm (!)). Old versions of netstat see only the first FIB.
In addition two sysctls are added to give:
a) the number of FIBs compiled in (active)
b) the default FIB of the calling process.
Early testing experience:
-------------------------
Basically our (IronPort's) appliance does this functionality already
using ipfw fwd but that method has some drawbacks.
For example,
It can't fully simulate a routing table because it can't influence the
socket's choice of local address when a connect() is done.
Testing during the generating of these changes has been
remarkably smooth so far. Multiple tables have co-existed
with no notable side effects, and packets have been routes
accordingly.
ipfw has grown 2 new keywords:
setfib N ip from anay to any
count ip from any to any fib N
In pf there seems to be a requirement to be able to give symbolic names to the
fibs but I do not have that capacity. I am not sure if it is required.
SCTP has interestingly enough built in support for this, called VRFs
in Cisco parlance. it will be interesting to see how that handles it
when it suddenly actually does something.
Where to next:
--------------------
After committing the ABI compatible version and MFCing it, I'd
like to proceed in a forward direction in -current. this will
result in some roto-tilling in the routing code.
Firstly: the current code's idea of having a separate tree per
protocol family, all of the same format, and pointed to by the
1 dimensional array is a bit silly. Especially when one considers that
there is code that makes assumptions about every protocol having the
same internal structures there. Some protocols don't WANT that
sort of structure. (for example the whole idea of a netmask is foreign
to appletalk). This needs to be made opaque to the external code.
My suggested first change is to add routing method pointers to the
'domain' structure, along with information pointing the data.
instead of having an array of pointers to uniform structures,
there would be an array pointing to the 'domain' structures
for each protocol address domain (protocol family),
and the methods this reached would be called. The methods would have
an argument that gives FIB number, but the protocol would be free
to ignore it.
When the ABI can be changed it raises the possibilty of the
addition of a fib entry into the "struct route". Currently,
the structure contains the sockaddr of the desination, and the resulting
fib entry. To make this work fully, one could add a fib number
so that given an address and a fib, one can find the third element, the
fib entry.
Interaction with the ARP layer/ LL layer would need to be
revisited as well. Qing Li has been working on this already.
This work was sponsored by Ironport Systems/Cisco
Reviewed by: several including rwatson, bz and mlair (parts each)
Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
|
|
|
int so_fibnum; /* routing domain for this socket */
|
2010-11-12 13:02:26 +00:00
|
|
|
uint32_t so_user_cookie;
|
1994-05-24 10:09:53 +00:00
|
|
|
};
|
|
|
|
|
Integrate accept locking from rwatson_netperf, introducing a new
global mutex, accept_mtx, which serializes access to the following
fields across all sockets:
so_qlen so_incqlen so_qstate
so_comp so_incomp so_list
so_head
While providing only coarse granularity, this approach avoids lock
order issues between sockets by avoiding ownership of the fields
by a specific socket and its per-socket mutexes.
While here, rewrite soclose(), sofree(), soaccept(), and
sonewconn() to add assertions, close additional races and address
lock order concerns. In particular:
- Reorganize the optimistic concurrency behavior in accept1() to
always allocate a file descriptor with falloc() so that if we do
find a socket, we don't have to encounter the "Oh, there wasn't
a socket" race that can occur if falloc() sleeps in the current
code, which broke inbound accept() ordering, not to mention
requiring backing out socket state changes in a way that raced
with the protocol level. We may want to add a lockless read of
the queue state if polling of empty queues proves to be important
to optimize.
- In accept1(), soref() the socket while holding the accept lock
so that the socket cannot be free'd in a race with the protocol
layer. Likewise in netgraph equivilents of the accept1() code.
- In sonewconn(), loop waiting for the queue to be small enough to
insert our new socket once we've committed to inserting it, or
races can occur that cause the incomplete socket queue to
overfill. In the previously implementation, it was sufficient
to simply tested once since calling soabort() didn't release
synchronization permitting another thread to insert a socket as
we discard a previous one.
- In soclose()/sofree()/et al, it is the responsibility of the
caller to remove a socket from the incomplete connection queue
before calling soabort(), which prevents soabort() from having
to walk into the accept socket to release the socket from its
queue, and avoids races when releasing the accept mutex to enter
soabort(), permitting soabort() to avoid lock ordering issues
with the caller.
- Generally cluster accept queue related operations together
throughout these functions in order to facilitate locking.
Annotate new locking in socketvar.h.
2004-06-02 04:15:39 +00:00
|
|
|
/*
|
|
|
|
* Global accept mutex to serialize access to accept queues and
|
|
|
|
* fields associated with multiple sockets. This allows us to
|
|
|
|
* avoid defining a lock order between listen and accept sockets
|
|
|
|
* until such time as it proves to be a good idea.
|
|
|
|
*/
|
2013-04-10 08:09:25 +00:00
|
|
|
extern struct mtx_padalign accept_mtx;
|
2004-10-18 22:19:43 +00:00
|
|
|
#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED)
|
|
|
|
#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED)
|
2004-06-11 22:39:42 +00:00
|
|
|
#define ACCEPT_LOCK() mtx_lock(&accept_mtx)
|
|
|
|
#define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx)
|
Integrate accept locking from rwatson_netperf, introducing a new
global mutex, accept_mtx, which serializes access to the following
fields across all sockets:
so_qlen so_incqlen so_qstate
so_comp so_incomp so_list
so_head
While providing only coarse granularity, this approach avoids lock
order issues between sockets by avoiding ownership of the fields
by a specific socket and its per-socket mutexes.
While here, rewrite soclose(), sofree(), soaccept(), and
sonewconn() to add assertions, close additional races and address
lock order concerns. In particular:
- Reorganize the optimistic concurrency behavior in accept1() to
always allocate a file descriptor with falloc() so that if we do
find a socket, we don't have to encounter the "Oh, there wasn't
a socket" race that can occur if falloc() sleeps in the current
code, which broke inbound accept() ordering, not to mention
requiring backing out socket state changes in a way that raced
with the protocol level. We may want to add a lockless read of
the queue state if polling of empty queues proves to be important
to optimize.
- In accept1(), soref() the socket while holding the accept lock
so that the socket cannot be free'd in a race with the protocol
layer. Likewise in netgraph equivilents of the accept1() code.
- In sonewconn(), loop waiting for the queue to be small enough to
insert our new socket once we've committed to inserting it, or
races can occur that cause the incomplete socket queue to
overfill. In the previously implementation, it was sufficient
to simply tested once since calling soabort() didn't release
synchronization permitting another thread to insert a socket as
we discard a previous one.
- In soclose()/sofree()/et al, it is the responsibility of the
caller to remove a socket from the incomplete connection queue
before calling soabort(), which prevents soabort() from having
to walk into the accept socket to release the socket from its
queue, and avoids races when releasing the accept mutex to enter
soabort(), permitting soabort() to avoid lock ordering issues
with the caller.
- Generally cluster accept queue related operations together
throughout these functions in order to facilitate locking.
Annotate new locking in socketvar.h.
2004-06-02 04:15:39 +00:00
|
|
|
|
2004-06-12 16:08:41 +00:00
|
|
|
/*
|
|
|
|
* Per-socket mutex: we reuse the receive socket buffer mutex for space
|
|
|
|
* efficiency. This decision should probably be revisited as we optimize
|
|
|
|
* locking for the socket code.
|
|
|
|
*/
|
|
|
|
#define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv)
|
|
|
|
#define SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv)
|
|
|
|
#define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv)
|
|
|
|
#define SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv)
|
|
|
|
#define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
|
|
|
|
|
2004-06-01 02:42:56 +00:00
|
|
|
/*
|
|
|
|
* Socket state bits stored in so_qstate.
|
|
|
|
*/
|
|
|
|
#define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */
|
|
|
|
#define SQ_COMP 0x1000 /* unaccepted, complete connection */
|
|
|
|
|
1998-05-15 20:11:40 +00:00
|
|
|
/*
|
|
|
|
* Externalized form of struct socket used by the sysctl(3) interface.
|
|
|
|
*/
|
2001-09-05 01:22:14 +00:00
|
|
|
struct xsocket {
|
1998-05-15 20:11:40 +00:00
|
|
|
size_t xso_len; /* length of this structure */
|
|
|
|
struct socket *xso_so; /* makes a convenient handle sometimes */
|
|
|
|
short so_type;
|
|
|
|
short so_options;
|
|
|
|
short so_linger;
|
|
|
|
short so_state;
|
|
|
|
caddr_t so_pcb; /* another convenient handle */
|
|
|
|
int xso_protocol;
|
|
|
|
int xso_family;
|
2005-01-24 12:20:21 +00:00
|
|
|
u_short so_qlen;
|
|
|
|
u_short so_incqlen;
|
|
|
|
u_short so_qlimit;
|
1998-05-15 20:11:40 +00:00
|
|
|
short so_timeo;
|
|
|
|
u_short so_error;
|
|
|
|
pid_t so_pgid;
|
|
|
|
u_long so_oobmark;
|
2008-07-29 07:45:05 +00:00
|
|
|
struct xsockbuf so_rcv, so_snd;
|
1998-05-15 20:11:40 +00:00
|
|
|
uid_t so_uid; /* XXX */
|
|
|
|
};
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2004-06-12 22:09:34 +00:00
|
|
|
#ifdef _KERNEL
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Macros for sockets and socket buffering.
|
|
|
|
*/
|
|
|
|
|
Correct two problems relating to sorflush(), which is called to flush
read socket buffers in shutdown() and close():
- Call socantrcvmore() before sblock() to dislodge any threads that
might be sleeping (potentially indefinitely) while holding sblock(),
such as a thread blocked in recv().
- Flag the sblock() call as non-interruptible so that a signal
delivered to the thread calling sorflush() doesn't cause sblock() to
fail. The sblock() is required to ensure that all other socket
consumer threads have, in fact, left, and do not enter, the socket
buffer until we're done flushin it.
To implement the latter, change the 'flags' argument to sblock() to
accept two flags, SBL_WAIT and SBL_NOINTR, rather than one M_WAITOK
flag. When SBL_NOINTR is set, it forces a non-interruptible sx
acquisition, regardless of the setting of the disposition of SB_NOINTR
on the socket buffer; without this change it would be possible for
another thread to clear SB_NOINTR between when the socket buffer mutex
is released and sblock() is invoked.
Reviewed by: bz, kmacy
Reported by: Jos Backus <jos at catnook dot com>
2008-01-31 08:22:24 +00:00
|
|
|
/*
|
|
|
|
* Flags to sblock().
|
|
|
|
*/
|
|
|
|
#define SBL_WAIT 0x00000001 /* Wait if not immediately available. */
|
|
|
|
#define SBL_NOINTR 0x00000002 /* Force non-interruptible sleep. */
|
|
|
|
#define SBL_VALID (SBL_WAIT | SBL_NOINTR)
|
|
|
|
|
1998-05-31 18:37:22 +00:00
|
|
|
/*
|
|
|
|
* Do we need to notify the other side when I/O is possible?
|
|
|
|
*/
|
2000-01-14 02:53:29 +00:00
|
|
|
#define sb_notify(sb) (((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \
|
2000-04-16 18:53:38 +00:00
|
|
|
SB_UPCALL | SB_AIO | SB_KNOTE)) != 0)
|
1998-05-31 18:37:22 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/* do we have to send all at once on a socket? */
|
|
|
|
#define sosendallatonce(so) \
|
|
|
|
((so)->so_proto->pr_flags & PR_ATOMIC)
|
|
|
|
|
|
|
|
/* can we read something from so? */
|
2009-07-07 09:43:44 +00:00
|
|
|
#define soreadabledata(so) \
|
1994-05-24 10:09:53 +00:00
|
|
|
((so)->so_rcv.sb_cc >= (so)->so_rcv.sb_lowat || \
|
2000-12-31 10:24:19 +00:00
|
|
|
!TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
|
2009-07-07 09:43:44 +00:00
|
|
|
#define soreadable(so) \
|
|
|
|
(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
/* can we write something to so? */
|
|
|
|
#define sowriteable(so) \
|
1994-10-02 17:25:04 +00:00
|
|
|
((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
|
1994-05-24 10:09:53 +00:00
|
|
|
(((so)->so_state&SS_ISCONNECTED) || \
|
1994-10-02 17:25:04 +00:00
|
|
|
((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \
|
2004-06-14 18:16:22 +00:00
|
|
|
((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \
|
1994-05-24 10:09:53 +00:00
|
|
|
(so)->so_error)
|
|
|
|
|
2001-11-17 03:07:11 +00:00
|
|
|
/*
|
|
|
|
* soref()/sorele() ref-count the socket structure. Note that you must
|
|
|
|
* still explicitly close the socket, but the last ref count will free
|
|
|
|
* the structure.
|
|
|
|
*/
|
2004-06-12 18:37:29 +00:00
|
|
|
#define soref(so) do { \
|
2004-06-12 20:47:32 +00:00
|
|
|
SOCK_LOCK_ASSERT(so); \
|
2004-06-12 18:37:29 +00:00
|
|
|
++(so)->so_count; \
|
|
|
|
} while (0)
|
2001-11-17 03:07:11 +00:00
|
|
|
|
2004-06-12 18:37:29 +00:00
|
|
|
#define sorele(so) do { \
|
2004-10-18 22:19:43 +00:00
|
|
|
ACCEPT_LOCK_ASSERT(); \
|
2004-06-12 20:47:32 +00:00
|
|
|
SOCK_LOCK_ASSERT(so); \
|
2004-06-12 18:37:29 +00:00
|
|
|
if ((so)->so_count <= 0) \
|
|
|
|
panic("sorele"); \
|
|
|
|
if (--(so)->so_count == 0) \
|
|
|
|
sofree(so); \
|
2004-10-18 22:19:43 +00:00
|
|
|
else { \
|
2004-06-12 20:47:32 +00:00
|
|
|
SOCK_UNLOCK(so); \
|
2004-10-18 22:19:43 +00:00
|
|
|
ACCEPT_UNLOCK(); \
|
|
|
|
} \
|
2004-06-12 18:37:29 +00:00
|
|
|
} while (0)
|
2001-11-17 03:07:11 +00:00
|
|
|
|
Merge next step in socket buffer locking:
- sowakeup() now asserts the socket buffer lock on entry. Move
the call to KNOTE higher in sowakeup() so that it is made with
the socket buffer lock held for consistency with other calls.
Release the socket buffer lock prior to calling into pgsigio(),
so_upcall(), or aio_swake(). Locking for this event management
will need revisiting in the future, but this model avoids lock
order reversals when upcalls into other subsystems result in
socket/socket buffer operations. Assert that the socket buffer
lock is not held at the end of the function.
- Wrapper macros for sowakeup(), sorwakeup() and sowwakeup(), now
have _locked versions which assert the socket buffer lock on
entry. If a wakeup is required by sb_notify(), invoke
sowakeup(); otherwise, unconditionally release the socket buffer
lock. This results in the socket buffer lock being released
whether a wakeup is required or not.
- Break out socantsendmore() into socantsendmore_locked() that
asserts the socket buffer lock. socantsendmore()
unconditionally locks the socket buffer before calling
socantsendmore_locked(). Note that both functions return with
the socket buffer unlocked as socantsendmore_locked() calls
sowwakeup_locked() which has the same properties. Assert that
the socket buffer is unlocked on return.
- Break out socantrcvmore() into socantrcvmore_locked() that
asserts the socket buffer lock. socantrcvmore() unconditionally
locks the socket buffer before calling socantrcvmore_locked().
Note that both functions return with the socket buffer unlocked
as socantrcvmore_locked() calls sorwakeup_locked() which has
similar properties. Assert that the socket buffer is unlocked
on return.
- Break out sbrelease() into a sbrelease_locked() that asserts the
socket buffer lock. sbrelease() unconditionally locks the
socket buffer before calling sbrelease_locked().
sbrelease_locked() now invokes sbflush_locked() instead of
sbflush().
- Assert the socket buffer lock in socket buffer sanity check
functions sblastrecordchk(), sblastmbufchk().
- Assert the socket buffer lock in SBLINKRECORD().
- Break out various sbappend() functions into sbappend_locked()
(and variations on that name) that assert the socket buffer
lock. The !_locked() variations unconditionally lock the socket
buffer before calling their _locked counterparts. Internally,
make sure to call _locked() support routines, etc, if already
holding the socket buffer lock.
- Break out sbinsertoob() into sbinsertoob_locked() that asserts
the socket buffer lock. sbinsertoob() unconditionally locks the
socket buffer before calling sbinsertoob_locked().
- Break out sbflush() into sbflush_locked() that asserts the
socket buffer lock. sbflush() unconditionally locks the socket
buffer before calling sbflush_locked(). Update panic strings
for new function names.
- Break out sbdrop() into sbdrop_locked() that asserts the socket
buffer lock. sbdrop() unconditionally locks the socket buffer
before calling sbdrop_locked().
- Break out sbdroprecord() into sbdroprecord_locked() that asserts
the socket buffer lock. sbdroprecord() unconditionally locks
the socket buffer before calling sbdroprecord_locked().
- sofree() now calls socantsendmore_locked() and re-acquires the
socket buffer lock on return. It also now calls
sbrelease_locked().
- sorflush() now calls socantrcvmore_locked() and re-acquires the
socket buffer lock on return. Clean up/mess up other behavior
in sorflush() relating to the temporary stack copy of the socket
buffer used with dom_dispose by more properly initializing the
temporary copy, and selectively bzeroing/copying more carefully
to prevent WITNESS from getting confused by improperly
initialized mutexes. Annotate why that's necessary, or at
least, needed.
- soisconnected() now calls sbdrop_locked() before unlocking the
socket buffer to avoid locking overhead.
Some parts of this change were:
Submitted by: sam
Sponsored by: FreeBSD Foundation
Obtained from: BSD/OS
2004-06-21 00:20:43 +00:00
|
|
|
/*
|
|
|
|
* In sorwakeup() and sowwakeup(), acquire the socket buffer lock to
|
|
|
|
* avoid a non-atomic test-and-wakeup. However, sowakeup is
|
|
|
|
* responsible for releasing the lock if it is called. We unlock only
|
|
|
|
* if we don't call into sowakeup. If any code is introduced that
|
|
|
|
* directly invokes the underlying sowakeup() primitives, it must
|
|
|
|
* maintain the same semantics.
|
|
|
|
*/
|
|
|
|
#define sorwakeup_locked(so) do { \
|
|
|
|
SOCKBUF_LOCK_ASSERT(&(so)->so_rcv); \
|
2004-06-12 18:37:29 +00:00
|
|
|
if (sb_notify(&(so)->so_rcv)) \
|
Merge next step in socket buffer locking:
- sowakeup() now asserts the socket buffer lock on entry. Move
the call to KNOTE higher in sowakeup() so that it is made with
the socket buffer lock held for consistency with other calls.
Release the socket buffer lock prior to calling into pgsigio(),
so_upcall(), or aio_swake(). Locking for this event management
will need revisiting in the future, but this model avoids lock
order reversals when upcalls into other subsystems result in
socket/socket buffer operations. Assert that the socket buffer
lock is not held at the end of the function.
- Wrapper macros for sowakeup(), sorwakeup() and sowwakeup(), now
have _locked versions which assert the socket buffer lock on
entry. If a wakeup is required by sb_notify(), invoke
sowakeup(); otherwise, unconditionally release the socket buffer
lock. This results in the socket buffer lock being released
whether a wakeup is required or not.
- Break out socantsendmore() into socantsendmore_locked() that
asserts the socket buffer lock. socantsendmore()
unconditionally locks the socket buffer before calling
socantsendmore_locked(). Note that both functions return with
the socket buffer unlocked as socantsendmore_locked() calls
sowwakeup_locked() which has the same properties. Assert that
the socket buffer is unlocked on return.
- Break out socantrcvmore() into socantrcvmore_locked() that
asserts the socket buffer lock. socantrcvmore() unconditionally
locks the socket buffer before calling socantrcvmore_locked().
Note that both functions return with the socket buffer unlocked
as socantrcvmore_locked() calls sorwakeup_locked() which has
similar properties. Assert that the socket buffer is unlocked
on return.
- Break out sbrelease() into a sbrelease_locked() that asserts the
socket buffer lock. sbrelease() unconditionally locks the
socket buffer before calling sbrelease_locked().
sbrelease_locked() now invokes sbflush_locked() instead of
sbflush().
- Assert the socket buffer lock in socket buffer sanity check
functions sblastrecordchk(), sblastmbufchk().
- Assert the socket buffer lock in SBLINKRECORD().
- Break out various sbappend() functions into sbappend_locked()
(and variations on that name) that assert the socket buffer
lock. The !_locked() variations unconditionally lock the socket
buffer before calling their _locked counterparts. Internally,
make sure to call _locked() support routines, etc, if already
holding the socket buffer lock.
- Break out sbinsertoob() into sbinsertoob_locked() that asserts
the socket buffer lock. sbinsertoob() unconditionally locks the
socket buffer before calling sbinsertoob_locked().
- Break out sbflush() into sbflush_locked() that asserts the
socket buffer lock. sbflush() unconditionally locks the socket
buffer before calling sbflush_locked(). Update panic strings
for new function names.
- Break out sbdrop() into sbdrop_locked() that asserts the socket
buffer lock. sbdrop() unconditionally locks the socket buffer
before calling sbdrop_locked().
- Break out sbdroprecord() into sbdroprecord_locked() that asserts
the socket buffer lock. sbdroprecord() unconditionally locks
the socket buffer before calling sbdroprecord_locked().
- sofree() now calls socantsendmore_locked() and re-acquires the
socket buffer lock on return. It also now calls
sbrelease_locked().
- sorflush() now calls socantrcvmore_locked() and re-acquires the
socket buffer lock on return. Clean up/mess up other behavior
in sorflush() relating to the temporary stack copy of the socket
buffer used with dom_dispose by more properly initializing the
temporary copy, and selectively bzeroing/copying more carefully
to prevent WITNESS from getting confused by improperly
initialized mutexes. Annotate why that's necessary, or at
least, needed.
- soisconnected() now calls sbdrop_locked() before unlocking the
socket buffer to avoid locking overhead.
Some parts of this change were:
Submitted by: sam
Sponsored by: FreeBSD Foundation
Obtained from: BSD/OS
2004-06-21 00:20:43 +00:00
|
|
|
sowakeup((so), &(so)->so_rcv); \
|
|
|
|
else \
|
|
|
|
SOCKBUF_UNLOCK(&(so)->so_rcv); \
|
2004-06-12 18:37:29 +00:00
|
|
|
} while (0)
|
2002-05-31 11:52:35 +00:00
|
|
|
|
Merge next step in socket buffer locking:
- sowakeup() now asserts the socket buffer lock on entry. Move
the call to KNOTE higher in sowakeup() so that it is made with
the socket buffer lock held for consistency with other calls.
Release the socket buffer lock prior to calling into pgsigio(),
so_upcall(), or aio_swake(). Locking for this event management
will need revisiting in the future, but this model avoids lock
order reversals when upcalls into other subsystems result in
socket/socket buffer operations. Assert that the socket buffer
lock is not held at the end of the function.
- Wrapper macros for sowakeup(), sorwakeup() and sowwakeup(), now
have _locked versions which assert the socket buffer lock on
entry. If a wakeup is required by sb_notify(), invoke
sowakeup(); otherwise, unconditionally release the socket buffer
lock. This results in the socket buffer lock being released
whether a wakeup is required or not.
- Break out socantsendmore() into socantsendmore_locked() that
asserts the socket buffer lock. socantsendmore()
unconditionally locks the socket buffer before calling
socantsendmore_locked(). Note that both functions return with
the socket buffer unlocked as socantsendmore_locked() calls
sowwakeup_locked() which has the same properties. Assert that
the socket buffer is unlocked on return.
- Break out socantrcvmore() into socantrcvmore_locked() that
asserts the socket buffer lock. socantrcvmore() unconditionally
locks the socket buffer before calling socantrcvmore_locked().
Note that both functions return with the socket buffer unlocked
as socantrcvmore_locked() calls sorwakeup_locked() which has
similar properties. Assert that the socket buffer is unlocked
on return.
- Break out sbrelease() into a sbrelease_locked() that asserts the
socket buffer lock. sbrelease() unconditionally locks the
socket buffer before calling sbrelease_locked().
sbrelease_locked() now invokes sbflush_locked() instead of
sbflush().
- Assert the socket buffer lock in socket buffer sanity check
functions sblastrecordchk(), sblastmbufchk().
- Assert the socket buffer lock in SBLINKRECORD().
- Break out various sbappend() functions into sbappend_locked()
(and variations on that name) that assert the socket buffer
lock. The !_locked() variations unconditionally lock the socket
buffer before calling their _locked counterparts. Internally,
make sure to call _locked() support routines, etc, if already
holding the socket buffer lock.
- Break out sbinsertoob() into sbinsertoob_locked() that asserts
the socket buffer lock. sbinsertoob() unconditionally locks the
socket buffer before calling sbinsertoob_locked().
- Break out sbflush() into sbflush_locked() that asserts the
socket buffer lock. sbflush() unconditionally locks the socket
buffer before calling sbflush_locked(). Update panic strings
for new function names.
- Break out sbdrop() into sbdrop_locked() that asserts the socket
buffer lock. sbdrop() unconditionally locks the socket buffer
before calling sbdrop_locked().
- Break out sbdroprecord() into sbdroprecord_locked() that asserts
the socket buffer lock. sbdroprecord() unconditionally locks
the socket buffer before calling sbdroprecord_locked().
- sofree() now calls socantsendmore_locked() and re-acquires the
socket buffer lock on return. It also now calls
sbrelease_locked().
- sorflush() now calls socantrcvmore_locked() and re-acquires the
socket buffer lock on return. Clean up/mess up other behavior
in sorflush() relating to the temporary stack copy of the socket
buffer used with dom_dispose by more properly initializing the
temporary copy, and selectively bzeroing/copying more carefully
to prevent WITNESS from getting confused by improperly
initialized mutexes. Annotate why that's necessary, or at
least, needed.
- soisconnected() now calls sbdrop_locked() before unlocking the
socket buffer to avoid locking overhead.
Some parts of this change were:
Submitted by: sam
Sponsored by: FreeBSD Foundation
Obtained from: BSD/OS
2004-06-21 00:20:43 +00:00
|
|
|
#define sorwakeup(so) do { \
|
|
|
|
SOCKBUF_LOCK(&(so)->so_rcv); \
|
|
|
|
sorwakeup_locked(so); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define sowwakeup_locked(so) do { \
|
2004-06-24 04:27:10 +00:00
|
|
|
SOCKBUF_LOCK_ASSERT(&(so)->so_snd); \
|
2004-06-12 18:37:29 +00:00
|
|
|
if (sb_notify(&(so)->so_snd)) \
|
|
|
|
sowakeup((so), &(so)->so_snd); \
|
Merge next step in socket buffer locking:
- sowakeup() now asserts the socket buffer lock on entry. Move
the call to KNOTE higher in sowakeup() so that it is made with
the socket buffer lock held for consistency with other calls.
Release the socket buffer lock prior to calling into pgsigio(),
so_upcall(), or aio_swake(). Locking for this event management
will need revisiting in the future, but this model avoids lock
order reversals when upcalls into other subsystems result in
socket/socket buffer operations. Assert that the socket buffer
lock is not held at the end of the function.
- Wrapper macros for sowakeup(), sorwakeup() and sowwakeup(), now
have _locked versions which assert the socket buffer lock on
entry. If a wakeup is required by sb_notify(), invoke
sowakeup(); otherwise, unconditionally release the socket buffer
lock. This results in the socket buffer lock being released
whether a wakeup is required or not.
- Break out socantsendmore() into socantsendmore_locked() that
asserts the socket buffer lock. socantsendmore()
unconditionally locks the socket buffer before calling
socantsendmore_locked(). Note that both functions return with
the socket buffer unlocked as socantsendmore_locked() calls
sowwakeup_locked() which has the same properties. Assert that
the socket buffer is unlocked on return.
- Break out socantrcvmore() into socantrcvmore_locked() that
asserts the socket buffer lock. socantrcvmore() unconditionally
locks the socket buffer before calling socantrcvmore_locked().
Note that both functions return with the socket buffer unlocked
as socantrcvmore_locked() calls sorwakeup_locked() which has
similar properties. Assert that the socket buffer is unlocked
on return.
- Break out sbrelease() into a sbrelease_locked() that asserts the
socket buffer lock. sbrelease() unconditionally locks the
socket buffer before calling sbrelease_locked().
sbrelease_locked() now invokes sbflush_locked() instead of
sbflush().
- Assert the socket buffer lock in socket buffer sanity check
functions sblastrecordchk(), sblastmbufchk().
- Assert the socket buffer lock in SBLINKRECORD().
- Break out various sbappend() functions into sbappend_locked()
(and variations on that name) that assert the socket buffer
lock. The !_locked() variations unconditionally lock the socket
buffer before calling their _locked counterparts. Internally,
make sure to call _locked() support routines, etc, if already
holding the socket buffer lock.
- Break out sbinsertoob() into sbinsertoob_locked() that asserts
the socket buffer lock. sbinsertoob() unconditionally locks the
socket buffer before calling sbinsertoob_locked().
- Break out sbflush() into sbflush_locked() that asserts the
socket buffer lock. sbflush() unconditionally locks the socket
buffer before calling sbflush_locked(). Update panic strings
for new function names.
- Break out sbdrop() into sbdrop_locked() that asserts the socket
buffer lock. sbdrop() unconditionally locks the socket buffer
before calling sbdrop_locked().
- Break out sbdroprecord() into sbdroprecord_locked() that asserts
the socket buffer lock. sbdroprecord() unconditionally locks
the socket buffer before calling sbdroprecord_locked().
- sofree() now calls socantsendmore_locked() and re-acquires the
socket buffer lock on return. It also now calls
sbrelease_locked().
- sorflush() now calls socantrcvmore_locked() and re-acquires the
socket buffer lock on return. Clean up/mess up other behavior
in sorflush() relating to the temporary stack copy of the socket
buffer used with dom_dispose by more properly initializing the
temporary copy, and selectively bzeroing/copying more carefully
to prevent WITNESS from getting confused by improperly
initialized mutexes. Annotate why that's necessary, or at
least, needed.
- soisconnected() now calls sbdrop_locked() before unlocking the
socket buffer to avoid locking overhead.
Some parts of this change were:
Submitted by: sam
Sponsored by: FreeBSD Foundation
Obtained from: BSD/OS
2004-06-21 00:20:43 +00:00
|
|
|
else \
|
|
|
|
SOCKBUF_UNLOCK(&(so)->so_snd); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define sowwakeup(so) do { \
|
|
|
|
SOCKBUF_LOCK(&(so)->so_snd); \
|
|
|
|
sowwakeup_locked(so); \
|
2004-06-12 18:37:29 +00:00
|
|
|
} while (0)
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2000-06-20 01:09:23 +00:00
|
|
|
struct accept_filter {
|
|
|
|
char accf_name[16];
|
2009-06-01 21:17:03 +00:00
|
|
|
int (*accf_callback)
|
2002-03-19 20:18:42 +00:00
|
|
|
(struct socket *so, void *arg, int waitflag);
|
2000-06-20 01:09:23 +00:00
|
|
|
void * (*accf_create)
|
2002-03-19 20:18:42 +00:00
|
|
|
(struct socket *so, char *arg);
|
2000-06-20 01:09:23 +00:00
|
|
|
void (*accf_destroy)
|
2002-03-19 20:18:42 +00:00
|
|
|
(struct socket *so);
|
2002-05-02 22:03:19 +00:00
|
|
|
SLIST_ENTRY(accept_filter) accf_next;
|
2001-08-31 13:46:22 +00:00
|
|
|
};
|
2000-06-20 01:09:23 +00:00
|
|
|
|
1997-12-21 16:35:12 +00:00
|
|
|
#ifdef MALLOC_DECLARE
|
2002-05-02 22:03:19 +00:00
|
|
|
MALLOC_DECLARE(M_ACCF);
|
1997-12-21 16:35:12 +00:00
|
|
|
MALLOC_DECLARE(M_PCB);
|
|
|
|
MALLOC_DECLARE(M_SONAME);
|
|
|
|
#endif
|
|
|
|
|
2002-05-02 22:03:19 +00:00
|
|
|
extern int maxsockets;
|
1994-08-18 22:36:09 +00:00
|
|
|
extern u_long sb_max;
|
1998-05-15 20:11:40 +00:00
|
|
|
extern so_gen_t so_gencnt;
|
1996-03-11 02:22:23 +00:00
|
|
|
|
1996-05-01 01:53:59 +00:00
|
|
|
struct mbuf;
|
|
|
|
struct sockaddr;
|
1998-02-01 20:08:39 +00:00
|
|
|
struct ucred;
|
|
|
|
struct uio;
|
1996-05-01 01:53:59 +00:00
|
|
|
|
2009-06-01 21:17:03 +00:00
|
|
|
/* 'which' values for socket upcalls. */
|
|
|
|
#define SO_RCV 1
|
|
|
|
#define SO_SND 2
|
|
|
|
|
|
|
|
/* Return values for socket upcalls. */
|
|
|
|
#define SU_OK 0
|
|
|
|
#define SU_ISCONNECTED 1
|
|
|
|
|
1994-10-02 17:25:04 +00:00
|
|
|
/*
|
|
|
|
* From uipc_socket and friends
|
|
|
|
*/
|
2002-03-19 20:18:42 +00:00
|
|
|
int sockargs(struct mbuf **mp, caddr_t buf, int buflen, int type);
|
|
|
|
int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len);
|
2006-03-16 07:03:14 +00:00
|
|
|
void soabort(struct socket *so);
|
2002-03-19 20:18:42 +00:00
|
|
|
int soaccept(struct socket *so, struct sockaddr **nam);
|
2002-05-02 22:03:19 +00:00
|
|
|
int socheckuid(struct socket *so, uid_t uid);
|
2002-03-19 20:18:42 +00:00
|
|
|
int sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
|
2013-03-02 21:11:30 +00:00
|
|
|
int sobindat(int fd, struct socket *so, struct sockaddr *nam,
|
|
|
|
struct thread *td);
|
2002-03-19 20:18:42 +00:00
|
|
|
int soclose(struct socket *so);
|
|
|
|
int soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
|
2013-03-02 21:11:30 +00:00
|
|
|
int soconnectat(int fd, struct socket *so, struct sockaddr *nam,
|
|
|
|
struct thread *td);
|
2002-03-19 20:18:42 +00:00
|
|
|
int soconnect2(struct socket *so1, struct socket *so2);
|
At long last, commit the zero copy sockets code.
MAKEDEV: Add MAKEDEV glue for the ti(4) device nodes.
ti.4: Update the ti(4) man page to include information on the
TI_JUMBO_HDRSPLIT and TI_PRIVATE_JUMBOS kernel options,
and also include information about the new character
device interface and the associated ioctls.
man9/Makefile: Add jumbo.9 and zero_copy.9 man pages and associated
links.
jumbo.9: New man page describing the jumbo buffer allocator
interface and operation.
zero_copy.9: New man page describing the general characteristics of
the zero copy send and receive code, and what an
application author should do to take advantage of the
zero copy functionality.
NOTES: Add entries for ZERO_COPY_SOCKETS, TI_PRIVATE_JUMBOS,
TI_JUMBO_HDRSPLIT, MSIZE, and MCLSHIFT.
conf/files: Add uipc_jumbo.c and uipc_cow.c.
conf/options: Add the 5 options mentioned above.
kern_subr.c: Receive side zero copy implementation. This takes
"disposable" pages attached to an mbuf, gives them to
a user process, and then recycles the user's page.
This is only active when ZERO_COPY_SOCKETS is turned on
and the kern.ipc.zero_copy.receive sysctl variable is
set to 1.
uipc_cow.c: Send side zero copy functions. Takes a page written
by the user and maps it copy on write and assigns it
kernel virtual address space. Removes copy on write
mapping once the buffer has been freed by the network
stack.
uipc_jumbo.c: Jumbo disposable page allocator code. This allocates
(optionally) disposable pages for network drivers that
want to give the user the option of doing zero copy
receive.
uipc_socket.c: Add kern.ipc.zero_copy.{send,receive} sysctls that are
enabled if ZERO_COPY_SOCKETS is turned on.
Add zero copy send support to sosend() -- pages get
mapped into the kernel instead of getting copied if
they meet size and alignment restrictions.
uipc_syscalls.c:Un-staticize some of the sf* functions so that they
can be used elsewhere. (uipc_cow.c)
if_media.c: In the SIOCGIFMEDIA ioctl in ifmedia_ioctl(), avoid
calling malloc() with M_WAITOK. Return an error if
the M_NOWAIT malloc fails.
The ti(4) driver and the wi(4) driver, at least, call
this with a mutex held. This causes witness warnings
for 'ifconfig -a' with a wi(4) or ti(4) board in the
system. (I've only verified for ti(4)).
ip_output.c: Fragment large datagrams so that each segment contains
a multiple of PAGE_SIZE amount of data plus headers.
This allows the receiver to potentially do page
flipping on receives.
if_ti.c: Add zero copy receive support to the ti(4) driver. If
TI_PRIVATE_JUMBOS is not defined, it now uses the
jumbo(9) buffer allocator for jumbo receive buffers.
Add a new character device interface for the ti(4)
driver for the new debugging interface. This allows
(a patched version of) gdb to talk to the Tigon board
and debug the firmware. There are also a few additional
debugging ioctls available through this interface.
Add header splitting support to the ti(4) driver.
Tweak some of the default interrupt coalescing
parameters to more useful defaults.
Add hooks for supporting transmit flow control, but
leave it turned off with a comment describing why it
is turned off.
if_tireg.h: Change the firmware rev to 12.4.11, since we're really
at 12.4.11 plus fixes from 12.4.13.
Add defines needed for debugging.
Remove the ti_stats structure, it is now defined in
sys/tiio.h.
ti_fw.h: 12.4.11 firmware.
ti_fw2.h: 12.4.11 firmware, plus selected fixes from 12.4.13,
and my header splitting patches. Revision 12.4.13
doesn't handle 10/100 negotiation properly. (This
firmware is the same as what was in the tree previously,
with the addition of header splitting support.)
sys/jumbo.h: Jumbo buffer allocator interface.
sys/mbuf.h: Add a new external mbuf type, EXT_DISPOSABLE, to
indicate that the payload buffer can be thrown away /
flipped to a userland process.
socketvar.h: Add prototype for socow_setup.
tiio.h: ioctl interface to the character portion of the ti(4)
driver, plus associated structure/type definitions.
uio.h: Change prototype for uiomoveco() so that we'll know
whether the source page is disposable.
ufs_readwrite.c:Update for new prototype of uiomoveco().
vm_fault.c: In vm_fault(), check to see whether we need to do a page
based copy on write fault.
vm_object.c: Add a new function, vm_object_allocate_wait(). This
does the same thing that vm_object allocate does, except
that it gives the caller the opportunity to specify whether
it should wait on the uma_zalloc() of the object structre.
This allows vm objects to be allocated while holding a
mutex. (Without generating WITNESS warnings.)
vm_object_allocate() is implemented as a call to
vm_object_allocate_wait() with the malloc flag set to
M_WAITOK.
vm_object.h: Add prototype for vm_object_allocate_wait().
vm_page.c: Add page-based copy on write setup, clear and fault
routines.
vm_page.h: Add page based COW function prototypes and variable in
the vm_page structure.
Many thanks to Drew Gallatin, who wrote the zero copy send and receive
code, and to all the other folks who have tested and reviewed this code
over the years.
2002-06-26 03:37:47 +00:00
|
|
|
int socow_setup(struct mbuf *m0, struct uio *uio);
|
2002-03-19 20:18:42 +00:00
|
|
|
int socreate(int dom, struct socket **aso, int type, int proto,
|
|
|
|
struct ucred *cred, struct thread *td);
|
|
|
|
int sodisconnect(struct socket *so);
|
2004-03-01 03:14:23 +00:00
|
|
|
struct sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags);
|
2002-03-19 20:18:42 +00:00
|
|
|
void sofree(struct socket *so);
|
|
|
|
void sohasoutofband(struct socket *so);
|
|
|
|
int solisten(struct socket *so, int backlog, struct thread *td);
|
2005-10-30 19:44:40 +00:00
|
|
|
void solisten_proto(struct socket *so, int backlog);
|
In the current world order, solisten() implements the state transition of
a socket from a regular socket to a listening socket able to accept new
connections. As part of this state transition, solisten() calls into the
protocol to update protocol-layer state. There were several bugs in this
implementation that could result in a race wherein a TCP SYN received
in the interval between the protocol state transition and the shortly
following socket layer transition would result in a panic in the TCP code,
as the socket would be in the TCPS_LISTEN state, but the socket would not
have the SO_ACCEPTCONN flag set.
This change does the following:
- Pushes the socket state transition from the socket layer solisten() to
to socket "library" routines called from the protocol. This permits
the socket routines to be called while holding the protocol mutexes,
preventing a race exposing the incomplete socket state transition to TCP
after the TCP state transition has completed. The check for a socket
layer state transition is performed by solisten_proto_check(), and the
actual transition is performed by solisten_proto().
- Holds the socket lock for the duration of the socket state test and set,
and over the protocol layer state transition, which is now possible as
the socket lock is acquired by the protocol layer, rather than vice
versa. This prevents additional state related races in the socket
layer.
This permits the dual transition of socket layer and protocol layer state
to occur while holding locks for both layers, making the two changes
atomic with respect to one another. Similar changes are likely require
elsewhere in the socket/protocol code.
Reported by: Peter Holm <peter@holm.cc>
Review and fixes from: emax, Antoine Brodin <antoine.brodin@laposte.net>
Philosophical head nod: gnn
2005-02-21 21:58:17 +00:00
|
|
|
int solisten_proto_check(struct socket *so);
|
1996-03-11 02:22:23 +00:00
|
|
|
struct socket *
|
2002-03-19 20:18:42 +00:00
|
|
|
sonewconn(struct socket *head, int connstatus);
|
1999-11-22 02:45:11 +00:00
|
|
|
|
|
|
|
|
Make similar changes to fo_stat() and fo_poll() as made earlier to
fo_read() and fo_write(): explicitly use the cred argument to fo_poll()
as "active_cred" using the passed file descriptor's f_cred reference
to provide access to the file credential. Add an active_cred
argument to fo_stat() so that implementers have access to the active
credential as well as the file credential. Generally modify callers
of fo_stat() to pass in td->td_ucred rather than fp->f_cred, which
was redundantly provided via the fp argument. This set of modifications
also permits threads to perform these operations on behalf of another
thread without modifying their credential.
Trickle this change down into fo_stat/poll() implementations:
- badfo_poll(), badfo_stat(): modify/add arguments.
- kqueue_poll(), kqueue_stat(): modify arguments.
- pipe_poll(), pipe_stat(): modify/add arguments, pass active_cred to
MAC checks rather than td->td_ucred.
- soo_poll(), soo_stat(): modify/add arguments, pass fp->f_cred rather
than cred to pru_sopoll() to maintain current semantics.
- sopoll(): moidfy arguments.
- vn_poll(), vn_statfile(): modify/add arguments, pass new arguments
to vn_stat(). Pass active_cred to MAC and fp->f_cred to VOP_POLL()
to maintian current semantics.
- vn_close(): rename cred to file_cred to reflect reality while I'm here.
- vn_stat(): Add active_cred and file_cred arguments to vn_stat()
and consumers so that this distinction is maintained at the VFS
as well as 'struct file' layer. Pass active_cred instead of
td->td_ucred to MAC and to VOP_GETATTR() to maintain current semantics.
- fifofs: modify the creation of a "filetemp" so that the file
credential is properly initialized and can be used in the socket
code if desired. Pass ap->a_td->td_ucred as the active
credential to soo_poll(). If we teach the vnop interface about
the distinction between file and active credentials, we would use
the active credential here.
Note that current inconsistent passing of active_cred vs. file_cred to
VOP's is maintained. It's not clear why GETATTR would be authorized
using active_cred while POLL would be authorized using file_cred at
the file system level.
Obtained from: TrustedBSD Project
Sponsored by: DARPA, NAI Labs
2002-08-16 12:52:03 +00:00
|
|
|
int sopoll(struct socket *so, int events, struct ucred *active_cred,
|
2002-03-23 08:46:52 +00:00
|
|
|
struct thread *td);
|
soreceive_generic(), and sopoll_generic(). Add new functions sosend(),
soreceive(), and sopoll(), which are wrappers for pru_sosend,
pru_soreceive, and pru_sopoll, and are now used univerally by socket
consumers rather than either directly invoking the old so*() functions
or directly invoking the protocol switch method (about an even split
prior to this commit).
This completes an architectural change that was begun in 1996 to permit
protocols to provide substitute implementations, as now used by UDP.
Consumers now uniformly invoke sosend(), soreceive(), and sopoll() to
perform these operations on sockets -- in particular, distributed file
systems and socket system calls.
Architectural head nod: sam, gnn, wollman
2006-07-24 15:20:08 +00:00
|
|
|
int sopoll_generic(struct socket *so, int events,
|
|
|
|
struct ucred *active_cred, struct thread *td);
|
2002-03-23 08:46:52 +00:00
|
|
|
int soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
|
|
|
|
struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
|
2009-06-22 23:08:05 +00:00
|
|
|
int soreceive_stream(struct socket *so, struct sockaddr **paddr,
|
|
|
|
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
|
|
|
|
int *flagsp);
|
2008-07-02 23:23:27 +00:00
|
|
|
int soreceive_dgram(struct socket *so, struct sockaddr **paddr,
|
|
|
|
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
|
|
|
|
int *flagsp);
|
soreceive_generic(), and sopoll_generic(). Add new functions sosend(),
soreceive(), and sopoll(), which are wrappers for pru_sosend,
pru_soreceive, and pru_sopoll, and are now used univerally by socket
consumers rather than either directly invoking the old so*() functions
or directly invoking the protocol switch method (about an even split
prior to this commit).
This completes an architectural change that was begun in 1996 to permit
protocols to provide substitute implementations, as now used by UDP.
Consumers now uniformly invoke sosend(), soreceive(), and sopoll() to
perform these operations on sockets -- in particular, distributed file
systems and socket system calls.
Architectural head nod: sam, gnn, wollman
2006-07-24 15:20:08 +00:00
|
|
|
int soreceive_generic(struct socket *so, struct sockaddr **paddr,
|
|
|
|
struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
|
|
|
|
int *flagsp);
|
2002-03-19 20:18:42 +00:00
|
|
|
int soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
|
|
|
|
void sorflush(struct socket *so);
|
|
|
|
int sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
|
2002-03-23 08:46:52 +00:00
|
|
|
struct mbuf *top, struct mbuf *control, int flags,
|
|
|
|
struct thread *td);
|
2006-01-13 10:22:01 +00:00
|
|
|
int sosend_dgram(struct socket *so, struct sockaddr *addr,
|
|
|
|
struct uio *uio, struct mbuf *top, struct mbuf *control,
|
|
|
|
int flags, struct thread *td);
|
soreceive_generic(), and sopoll_generic(). Add new functions sosend(),
soreceive(), and sopoll(), which are wrappers for pru_sosend,
pru_soreceive, and pru_sopoll, and are now used univerally by socket
consumers rather than either directly invoking the old so*() functions
or directly invoking the protocol switch method (about an even split
prior to this commit).
This completes an architectural change that was begun in 1996 to permit
protocols to provide substitute implementations, as now used by UDP.
Consumers now uniformly invoke sosend(), soreceive(), and sopoll() to
perform these operations on sockets -- in particular, distributed file
systems and socket system calls.
Architectural head nod: sam, gnn, wollman
2006-07-24 15:20:08 +00:00
|
|
|
int sosend_generic(struct socket *so, struct sockaddr *addr,
|
|
|
|
struct uio *uio, struct mbuf *top, struct mbuf *control,
|
|
|
|
int flags, struct thread *td);
|
2002-03-19 20:18:42 +00:00
|
|
|
int soshutdown(struct socket *so, int how);
|
|
|
|
void sotoxsocket(struct socket *so, struct xsocket *xso);
|
2009-06-01 21:17:03 +00:00
|
|
|
void soupcall_clear(struct socket *so, int which);
|
|
|
|
void soupcall_set(struct socket *so, int which,
|
|
|
|
int (*func)(struct socket *, void *, int), void *arg);
|
2002-03-19 20:18:42 +00:00
|
|
|
void sowakeup(struct socket *so, struct sockbuf *sb);
|
1997-12-21 16:35:12 +00:00
|
|
|
|
2002-05-02 22:03:19 +00:00
|
|
|
/*
|
|
|
|
* Accept filter functions (duh).
|
|
|
|
*/
|
2002-03-19 20:18:42 +00:00
|
|
|
int accept_filt_add(struct accept_filter *filt);
|
|
|
|
int accept_filt_del(char *name);
|
2002-05-02 22:03:19 +00:00
|
|
|
struct accept_filter *accept_filt_get(char *name);
|
2000-06-20 01:09:23 +00:00
|
|
|
#ifdef ACCEPT_FILTER_MOD
|
2002-05-02 22:03:19 +00:00
|
|
|
#ifdef SYSCTL_DECL
|
2000-09-06 18:49:13 +00:00
|
|
|
SYSCTL_DECL(_net_inet_accf);
|
2002-05-02 22:03:19 +00:00
|
|
|
#endif
|
|
|
|
int accept_filt_generic_mod_event(module_t mod, int event, void *data);
|
|
|
|
#endif
|
2001-10-05 07:06:32 +00:00
|
|
|
|
1999-12-29 04:46:21 +00:00
|
|
|
#endif /* _KERNEL */
|
1994-08-21 04:42:17 +00:00
|
|
|
|
1996-03-11 02:22:23 +00:00
|
|
|
#endif /* !_SYS_SOCKETVAR_H_ */
|