Listening sockets improvements.

o Separate fields of struct socket that belong to listening from
  fields that belong to normal dataflow, and unionize them.  This
  shrinks the structure a bit.
  - Take out selinfo's from the socket buffers into the socket. The
    first reason is to support braindamaged scenario when a socket is
    added to kevent(2) and then listen(2) is cast on it. The second
    reason is that there is future plan to make socket buffers pluggable,
    so that for a dataflow socket a socket buffer can be changed, and
    in this case we also want to keep same selinfos through the lifetime
    of a socket.
  - Remove struct struct so_accf. Since now listening stuff no longer
    affects struct socket size, just move its fields into listening part
    of the union.
  - Provide sol_upcall field and enforce that so_upcall_set() may be called
    only on a dataflow socket, which has buffers, and for listening sockets
    provide solisten_upcall_set().

o Remove ACCEPT_LOCK() global.
  - Add a mutex to socket, to be used instead of socket buffer lock to lock
    fields of struct socket that don't belong to a socket buffer.
  - Allow to acquire two socket locks, but the first one must belong to a
    listening socket.
  - Make soref()/sorele() to use atomic(9).  This allows in some situations
    to do soref() without owning socket lock.  There is place for improvement
    here, it is possible to make sorele() also to lock optionally.
  - Most protocols aren't touched by this change, except UNIX local sockets.
    See below for more information.

o Reduce copy-and-paste in kernel modules that accept connections from
  listening sockets: provide function solisten_dequeue(), and use it in
  the following modules: ctl(4), iscsi(4), ng_btsocket(4), ng_ksocket(4),
  infiniband, rpc.

o UNIX local sockets.
  - Removal of ACCEPT_LOCK() global uncovered several races in the UNIX
    local sockets.  Most races exist around spawning a new socket, when we
    are connecting to a local listening socket.  To cover them, we need to
    hold locks on both PCBs when spawning a third one.  This means holding
    them across sonewconn().  This creates a LOR between pcb locks and
    unp_list_lock.
  - To fix the new LOR, abandon the global unp_list_lock in favor of global
    unp_link_lock.  Indeed, separating these two locks didn't provide us any
    extra parralelism in the UNIX sockets.
  - Now call into uipc_attach() may happen with unp_link_lock hold if, we
    are accepting, or without unp_link_lock in case if we are just creating
    a socket.
  - Another problem in UNIX sockets is that uipc_close() basicly did nothing
    for a listening socket.  The vnode remained opened for connections.  This
    is fixed by removing vnode in uipc_close().  Maybe the right way would be
    to do it for all sockets (not only listening), simply move the vnode
    teardown from uipc_detach() to uipc_close()?

Sponsored by:		Netflix
Differential Revision:	https://reviews.freebsd.org/D9770
This commit is contained in:
Gleb Smirnoff 2017-06-08 21:30:34 +00:00
parent 4623e047a7
commit 779f106aa1
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=319722
27 changed files with 1090 additions and 922 deletions

View File

@ -458,44 +458,19 @@ ctl_ha_connect(struct ha_softc *softc)
static int
ctl_ha_accept(struct ha_softc *softc)
{
struct socket *so;
struct socket *lso, *so;
struct sockaddr *sap;
int error;
ACCEPT_LOCK();
if (softc->ha_lso->so_rcv.sb_state & SBS_CANTRCVMORE)
softc->ha_lso->so_error = ECONNABORTED;
if (softc->ha_lso->so_error) {
error = softc->ha_lso->so_error;
softc->ha_lso->so_error = 0;
ACCEPT_UNLOCK();
lso = softc->ha_lso;
SOLISTEN_LOCK(lso);
error = solisten_dequeue(lso, &so, 0);
if (error == EWOULDBLOCK)
return (error);
if (error) {
printf("%s: socket error %d\n", __func__, error);
goto out;
}
so = TAILQ_FIRST(&softc->ha_lso->so_comp);
if (so == NULL) {
ACCEPT_UNLOCK();
return (EWOULDBLOCK);
}
KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
/*
* Before changing the flags on the socket, we have to bump the
* reference count. Otherwise, if the protocol calls sofree(),
* the socket will be released due to a zero refcount.
*/
SOCK_LOCK(so); /* soref() and so_state update */
soref(so); /* file descriptor reference */
TAILQ_REMOVE(&softc->ha_lso->so_comp, so, so_list);
softc->ha_lso->so_qlen--;
so->so_state |= SS_NBIO;
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
sap = NULL;
error = soaccept(so, &sap);
@ -556,9 +531,6 @@ ctl_ha_listen(struct ha_softc *softc)
printf("%s: REUSEPORT setting failed %d\n",
__func__, error);
}
SOCKBUF_LOCK(&softc->ha_lso->so_rcv);
soupcall_set(softc->ha_lso, SO_RCV, ctl_ha_lupcall, softc);
SOCKBUF_UNLOCK(&softc->ha_lso->so_rcv);
}
memcpy(&sa, &softc->ha_peer_in, sizeof(sa));
@ -572,6 +544,10 @@ ctl_ha_listen(struct ha_softc *softc)
printf("%s: solisten() error %d\n", __func__, error);
goto out;
}
SOLISTEN_LOCK(softc->ha_lso);
softc->ha_lso->so_state |= SS_NBIO;
solisten_upcall_set(softc->ha_lso, ctl_ha_lupcall, softc);
SOLISTEN_UNLOCK(softc->ha_lso);
return (0);
out:

View File

@ -92,7 +92,6 @@ struct icl_listen_sock {
struct icl_listen *ils_listen;
struct socket *ils_socket;
bool ils_running;
bool ils_disconnecting;
int ils_id;
};
@ -184,7 +183,9 @@ icl_listen_free(struct icl_listen *il)
while (ils->ils_running) {
ICL_DEBUG("waiting for accept thread to terminate");
sx_xunlock(&il->il_lock);
ils->ils_disconnecting = true;
SOLISTEN_LOCK(ils->ils_socket);
ils->ils_socket->so_error = ENOTCONN;
SOLISTEN_UNLOCK(ils->ils_socket);
wakeup(&ils->ils_socket->so_timeo);
pause("icl_unlisten", 1 * hz);
sx_xlock(&il->il_lock);
@ -200,9 +201,9 @@ icl_listen_free(struct icl_listen *il)
}
/*
* XXX: Doing accept in a separate thread in each socket might not be the best way
* to do stuff, but it's pretty clean and debuggable - and you probably won't
* have hundreds of listening sockets anyway.
* XXX: Doing accept in a separate thread in each socket might not be the
* best way to do stuff, but it's pretty clean and debuggable - and you
* probably won't have hundreds of listening sockets anyway.
*/
static void
icl_accept_thread(void *arg)
@ -218,55 +219,22 @@ icl_accept_thread(void *arg)
ils->ils_running = true;
for (;;) {
ACCEPT_LOCK();
while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) {
if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
head->so_error = ECONNABORTED;
break;
}
error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
"accept", 0);
if (error) {
ACCEPT_UNLOCK();
ICL_WARN("msleep failed with error %d", error);
continue;
}
if (ils->ils_disconnecting) {
ACCEPT_UNLOCK();
ICL_DEBUG("terminating");
ils->ils_running = false;
kthread_exit();
return;
}
SOLISTEN_LOCK(head);
error = solisten_dequeue(head, &so, 0);
if (error == ENOTCONN) {
/*
* XXXGL: ENOTCONN is our mark from icl_listen_free().
* Neither socket code, nor msleep(9) may return it.
*/
ICL_DEBUG("terminating");
ils->ils_running = false;
kthread_exit();
return;
}
if (head->so_error) {
error = head->so_error;
head->so_error = 0;
ACCEPT_UNLOCK();
ICL_WARN("socket error %d", error);
if (error) {
ICL_WARN("solisten_dequeue error %d", error);
continue;
}
so = TAILQ_FIRST(&head->so_comp);
KASSERT(so != NULL, ("NULL so"));
KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
/*
* Before changing the flags on the socket, we have to bump the
* reference count. Otherwise, if the protocol calls sofree(),
* the socket will be released due to a zero refcount.
*/
SOCK_LOCK(so); /* soref() and so_state update */
soref(so); /* file descriptor reference */
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
so->so_state |= (head->so_state & SS_NBIO);
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
sa = NULL;
error = soaccept(so, &sa);

View File

@ -170,32 +170,36 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
break;
case FIOASYNC:
/*
* XXXRW: This code separately acquires SOCK_LOCK(so) and
* SOCKBUF_LOCK(&so->so_rcv) even though they are the same
* mutex to avoid introducing the assumption that they are
* the same.
*/
if (*(int *)data) {
SOCK_LOCK(so);
so->so_state |= SS_ASYNC;
if (SOLISTENING(so)) {
so->sol_sbrcv_flags |= SB_ASYNC;
so->sol_sbsnd_flags |= SB_ASYNC;
} else {
SOCKBUF_LOCK(&so->so_rcv);
so->so_rcv.sb_flags |= SB_ASYNC;
SOCKBUF_UNLOCK(&so->so_rcv);
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_flags |= SB_ASYNC;
SOCKBUF_UNLOCK(&so->so_snd);
}
SOCK_UNLOCK(so);
SOCKBUF_LOCK(&so->so_rcv);
so->so_rcv.sb_flags |= SB_ASYNC;
SOCKBUF_UNLOCK(&so->so_rcv);
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_flags |= SB_ASYNC;
SOCKBUF_UNLOCK(&so->so_snd);
} else {
SOCK_LOCK(so);
so->so_state &= ~SS_ASYNC;
if (SOLISTENING(so)) {
so->sol_sbrcv_flags &= ~SB_ASYNC;
so->sol_sbsnd_flags &= ~SB_ASYNC;
} else {
SOCKBUF_LOCK(&so->so_rcv);
so->so_rcv.sb_flags &= ~SB_ASYNC;
SOCKBUF_UNLOCK(&so->so_rcv);
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_flags &= ~SB_ASYNC;
SOCKBUF_UNLOCK(&so->so_snd);
}
SOCK_UNLOCK(so);
SOCKBUF_LOCK(&so->so_rcv);
so->so_rcv.sb_flags &= ~SB_ASYNC;
SOCKBUF_UNLOCK(&so->so_rcv);
SOCKBUF_LOCK(&so->so_snd);
so->so_snd.sb_flags &= ~SB_ASYNC;
SOCKBUF_UNLOCK(&so->so_snd);
}
break;
@ -706,7 +710,6 @@ soaio_process_sb(struct socket *so, struct sockbuf *sb)
sb->sb_flags &= ~SB_AIO_RUNNING;
SOCKBUF_UNLOCK(sb);
ACCEPT_LOCK();
SOCK_LOCK(so);
sorele(so);
}

View File

@ -173,13 +173,13 @@ accept_filt_getopt(struct socket *so, struct sockopt *sopt)
error = EINVAL;
goto out;
}
if ((so->so_options & SO_ACCEPTFILTER) == 0) {
if (so->sol_accept_filter == NULL) {
error = EINVAL;
goto out;
}
strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
if (so->so_accf->so_accept_filter_str != NULL)
strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
strcpy(afap->af_name, so->sol_accept_filter->accf_name);
if (so->sol_accept_filter_str != NULL)
strcpy(afap->af_arg, so->sol_accept_filter_str);
out:
SOCK_UNLOCK(so);
if (error == 0)
@ -193,31 +193,57 @@ accept_filt_setopt(struct socket *so, struct sockopt *sopt)
{
struct accept_filter_arg *afap;
struct accept_filter *afp;
struct so_accf *newaf;
int error = 0;
char *accept_filter_str = NULL;
void *accept_filter_arg = NULL;
int error;
/*
* Handle the simple delete case first.
*/
if (sopt == NULL || sopt->sopt_val == NULL) {
struct socket *sp, *sp1;
int wakeup;
SOCK_LOCK(so);
if ((so->so_options & SO_ACCEPTCONN) == 0) {
SOCK_UNLOCK(so);
return (EINVAL);
}
if (so->so_accf != NULL) {
struct so_accf *af = so->so_accf;
if (af->so_accept_filter != NULL &&
af->so_accept_filter->accf_destroy != NULL) {
af->so_accept_filter->accf_destroy(so);
}
if (af->so_accept_filter_str != NULL)
free(af->so_accept_filter_str, M_ACCF);
free(af, M_ACCF);
so->so_accf = NULL;
if (so->sol_accept_filter == NULL) {
SOCK_UNLOCK(so);
return (0);
}
if (so->sol_accept_filter->accf_destroy != NULL)
so->sol_accept_filter->accf_destroy(so);
if (so->sol_accept_filter_str != NULL)
free(so->sol_accept_filter_str, M_ACCF);
so->sol_accept_filter = NULL;
so->sol_accept_filter_arg = NULL;
so->sol_accept_filter_str = NULL;
so->so_options &= ~SO_ACCEPTFILTER;
SOCK_UNLOCK(so);
/*
* Move from incomplete queue to complete only those
* connections, that are blocked by us.
*/
wakeup = 0;
TAILQ_FOREACH_SAFE(sp, &so->sol_incomp, so_list, sp1) {
SOCK_LOCK(sp);
if (sp->so_options & SO_ACCEPTFILTER) {
TAILQ_REMOVE(&so->sol_incomp, sp, so_list);
TAILQ_INSERT_TAIL(&so->sol_comp, sp, so_list);
sp->so_qstate = SQ_COMP;
sp->so_options &= ~SO_ACCEPTFILTER;
so->sol_incqlen--;
so->sol_qlen++;
wakeup = 1;
}
SOCK_UNLOCK(sp);
}
if (wakeup)
solisten_wakeup(so); /* unlocks */
else
SOLISTEN_UNLOCK(so);
return (0);
}
@ -238,17 +264,10 @@ accept_filt_setopt(struct socket *so, struct sockopt *sopt)
free(afap, M_TEMP);
return (ENOENT);
}
/*
* Allocate the new accept filter instance storage. We may
* have to free it again later if we fail to attach it. If
* attached properly, 'newaf' is NULLed to avoid a free()
* while in use.
*/
newaf = malloc(sizeof(*newaf), M_ACCF, M_WAITOK | M_ZERO);
if (afp->accf_create != NULL && afap->af_name[0] != '\0') {
size_t len = strlen(afap->af_name) + 1;
newaf->so_accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
strcpy(newaf->so_accept_filter_str, afap->af_name);
accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
strcpy(accept_filter_str, afap->af_name);
}
/*
@ -256,8 +275,8 @@ accept_filt_setopt(struct socket *so, struct sockopt *sopt)
* without first removing it.
*/
SOCK_LOCK(so);
if (((so->so_options & SO_ACCEPTCONN) == 0) ||
(so->so_accf != NULL)) {
if ((so->so_options & SO_ACCEPTCONN) == 0 ||
so->sol_accept_filter != NULL) {
error = EINVAL;
goto out;
}
@ -268,25 +287,20 @@ accept_filt_setopt(struct socket *so, struct sockopt *sopt)
* can't block.
*/
if (afp->accf_create != NULL) {
newaf->so_accept_filter_arg =
afp->accf_create(so, afap->af_arg);
if (newaf->so_accept_filter_arg == NULL) {
accept_filter_arg = afp->accf_create(so, afap->af_arg);
if (accept_filter_arg == NULL) {
error = EINVAL;
goto out;
}
}
newaf->so_accept_filter = afp;
so->so_accf = newaf;
so->sol_accept_filter = afp;
so->sol_accept_filter_arg = accept_filter_arg;
so->sol_accept_filter_str = accept_filter_str;
so->so_options |= SO_ACCEPTFILTER;
newaf = NULL;
out:
SOCK_UNLOCK(so);
if (newaf != NULL) {
if (newaf->so_accept_filter_str != NULL)
free(newaf->so_accept_filter_str, M_ACCF);
free(newaf, M_ACCF);
}
if (afap != NULL)
free(afap, M_TEMP);
if (accept_filter_str != NULL)
free(accept_filter_str, M_ACCF);
free(afap, M_TEMP);
return (error);
}

View File

@ -448,8 +448,6 @@ db_print_socket(struct socket *so, const char *socketname, int indent)
db_printf(")\n");
db_print_indent(indent);
db_printf("so_qstate: 0x%x (", so->so_qstate);
db_print_soqstate(so->so_qstate);
db_printf(") ");
db_printf("so_pcb: %p ", so->so_pcb);
db_printf("so_proto: %p\n", so->so_proto);
@ -458,24 +456,28 @@ db_print_socket(struct socket *so, const char *socketname, int indent)
db_print_protosw(so->so_proto, "so_proto", indent);
db_print_indent(indent);
db_printf("so_head: %p ", so->so_head);
db_printf("so_incomp first: %p ", TAILQ_FIRST(&so->so_incomp));
db_printf("so_comp first: %p\n", TAILQ_FIRST(&so->so_comp));
if (so->so_options & SO_ACCEPTCONN) {
db_printf("sol_incomp first: %p ",
TAILQ_FIRST(&so->sol_incomp));
db_printf("sol_comp first: %p\n", TAILQ_FIRST(&so->sol_comp));
db_printf("sol_qlen: %d ", so->sol_qlen);
db_printf("sol_incqlen: %d ", so->sol_incqlen);
db_printf("sol_qlimit: %d ", so->sol_qlimit);
} else {
db_printf("so_qstate: 0x%x (", so->so_qstate);
db_print_soqstate(so->so_qstate);
db_printf("so_listen: %p ", so->so_listen);
/* so_list skipped */
db_printf("so_timeo: %d ", so->so_timeo);
db_printf("so_error: %d\n", so->so_error);
db_print_indent(indent);
/* so_list skipped */
db_printf("so_qlen: %u ", so->so_qlen);
db_printf("so_incqlen: %u ", so->so_incqlen);
db_printf("so_qlimit: %u ", so->so_qlimit);
db_printf("so_timeo: %d ", so->so_timeo);
db_printf("so_error: %d\n", so->so_error);
db_print_indent(indent);
db_printf("so_sigio: %p ", so->so_sigio);
db_printf("so_oobmark: %lu ", so->so_oobmark);
db_print_indent(indent);
db_printf("so_sigio: %p ", so->so_sigio);
db_printf("so_oobmark: %lu ", so->so_oobmark);
db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
db_print_sockbuf(&so->so_snd, "so_snd", indent);
db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
db_print_sockbuf(&so->so_snd, "so_snd", indent);
}
}
DB_SHOW_COMMAND(socket, db_show_socket)

View File

@ -314,14 +314,14 @@ sowakeup(struct socket *so, struct sockbuf *sb)
SOCKBUF_LOCK_ASSERT(sb);
selwakeuppri(&sb->sb_sel, PSOCK);
if (!SEL_WAITING(&sb->sb_sel))
selwakeuppri(sb->sb_sel, PSOCK);
if (!SEL_WAITING(sb->sb_sel))
sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_acc);
}
KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
if (sb->sb_upcall != NULL && !(so->so_state & SS_ISDISCONNECTED)) {
ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
if (ret == SU_ISCONNECTED) {

File diff suppressed because it is too large Load Diff

View File

@ -68,13 +68,6 @@ __FBSDID("$FreeBSD$");
#include <security/audit/audit.h>
#include <security/mac/mac_framework.h>
/*
* Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC
* and SOCK_NONBLOCK.
*/
#define ACCEPT4_INHERIT 0x1
#define ACCEPT4_COMPAT 0x2
static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
@ -350,59 +343,22 @@ kern_accept4(struct thread *td, int s, struct sockaddr **name,
(flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps);
if (error != 0)
goto done;
ACCEPT_LOCK();
if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
ACCEPT_UNLOCK();
error = EWOULDBLOCK;
SOCK_LOCK(head);
if (!SOLISTENING(head)) {
SOCK_UNLOCK(head);
error = EINVAL;
goto noconnection;
}
while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
head->so_error = ECONNABORTED;
break;
}
error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
"accept", 0);
if (error != 0) {
ACCEPT_UNLOCK();
goto noconnection;
}
}
if (head->so_error) {
error = head->so_error;
head->so_error = 0;
ACCEPT_UNLOCK();
error = solisten_dequeue(head, &so, flags);
if (error != 0)
goto noconnection;
}
so = TAILQ_FIRST(&head->so_comp);
KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
/*
* Before changing the flags on the socket, we have to bump the
* reference count. Otherwise, if the protocol calls sofree(),
* the socket will be released due to a zero refcount.
*/
SOCK_LOCK(so); /* soref() and so_state update */
soref(so); /* file descriptor reference */
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
if (flags & ACCEPT4_INHERIT)
so->so_state |= (head->so_state & SS_NBIO);
else
so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
/* An extra reference on `nfp' has been held for us by falloc(). */
td->td_retval[0] = fd;
/* connection has been removed from the listen queue */
KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
/* Connection has been removed from the listen queue. */
KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
if (flags & ACCEPT4_INHERIT) {
pgid = fgetown(&head->so_sigio);
@ -420,7 +376,6 @@ kern_accept4(struct thread *td, int s, struct sockaddr **name,
(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
tmp = fflag & FASYNC;
(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
sa = NULL;
error = soaccept(so, &sa);
if (error != 0)
goto noconnection;
@ -558,7 +513,7 @@ kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
}
SOCK_LOCK(so);
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
error = msleep(&so->so_timeo, &so->so_lock, PSOCK | PCATCH,
"connec", 0);
if (error != 0) {
if (error == EINTR || error == ERESTART)

View File

@ -189,10 +189,9 @@ SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
/*
* Locking and synchronization:
*
* Three types of locks exit in the local domain socket implementation: a
* global list mutex, a global linkage rwlock, and per-unpcb mutexes. Of the
* global locks, the list lock protects the socket count, global generation
* number, and stream/datagram global lists. The linkage lock protects the
* Two types of locks exist in the local domain socket implementation: a
* a global linkage rwlock and per-unpcb mutexes. The linkage lock protects
* the socket count, global generation number, stream/datagram global lists and
* interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be
* held exclusively over the acquisition of multiple unpcb locks to prevent
* deadlock.
@ -233,7 +232,6 @@ SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
* to perform namei() and other file system operations.
*/
static struct rwlock unp_link_rwlock;
static struct mtx unp_list_lock;
static struct mtx unp_defers_lock;
#define UNP_LINK_LOCK_INIT() rw_init(&unp_link_rwlock, \
@ -250,11 +248,7 @@ static struct mtx unp_defers_lock;
#define UNP_LINK_WUNLOCK() rw_wunlock(&unp_link_rwlock)
#define UNP_LINK_WLOCK_ASSERT() rw_assert(&unp_link_rwlock, \
RA_WLOCKED)
#define UNP_LIST_LOCK_INIT() mtx_init(&unp_list_lock, \
"unp_list_lock", NULL, MTX_DEF)
#define UNP_LIST_LOCK() mtx_lock(&unp_list_lock)
#define UNP_LIST_UNLOCK() mtx_unlock(&unp_list_lock)
#define UNP_LINK_WOWNED() rw_wowned(&unp_link_rwlock)
#define UNP_DEFERRED_LOCK_INIT() mtx_init(&unp_defers_lock, \
"unp_defer", NULL, MTX_DEF)
@ -396,6 +390,7 @@ uipc_attach(struct socket *so, int proto, struct thread *td)
u_long sendspace, recvspace;
struct unpcb *unp;
int error;
bool locked;
KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
@ -430,10 +425,12 @@ uipc_attach(struct socket *so, int proto, struct thread *td)
unp->unp_socket = so;
so->so_pcb = unp;
unp->unp_refcount = 1;
if (so->so_head != NULL)
if (so->so_listen != NULL)
unp->unp_flags |= UNP_NASCENT;
UNP_LIST_LOCK();
if ((locked = UNP_LINK_WOWNED()) == false)
UNP_LINK_WLOCK();
unp->unp_gencnt = ++unp_gencnt;
unp_count++;
switch (so->so_type) {
@ -452,7 +449,9 @@ uipc_attach(struct socket *so, int proto, struct thread *td)
default:
panic("uipc_attach");
}
UNP_LIST_UNLOCK();
if (locked == false)
UNP_LINK_WUNLOCK();
return (0);
}
@ -607,6 +606,7 @@ static void
uipc_close(struct socket *so)
{
struct unpcb *unp, *unp2;
struct vnode *vp = NULL;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
@ -619,8 +619,14 @@ uipc_close(struct socket *so)
unp_disconnect(unp, unp2);
UNP_PCB_UNLOCK(unp2);
}
if (SOLISTENING(so) && ((vp = unp->unp_vnode) != NULL)) {
VOP_UNP_DETACH(vp);
unp->unp_vnode = NULL;
}
UNP_PCB_UNLOCK(unp);
UNP_LINK_WUNLOCK();
if (vp)
vrele(vp);
}
static int
@ -657,18 +663,13 @@ uipc_detach(struct socket *so)
vp = NULL;
local_unp_rights = 0;
UNP_LIST_LOCK();
UNP_LINK_WLOCK();
LIST_REMOVE(unp, unp_link);
unp->unp_gencnt = ++unp_gencnt;
--unp_count;
UNP_LIST_UNLOCK();
if ((unp->unp_flags & UNP_NASCENT) != 0) {
UNP_PCB_LOCK(unp);
goto teardown;
}
UNP_LINK_WLOCK();
UNP_PCB_LOCK(unp);
if ((unp->unp_flags & UNP_NASCENT) != 0)
goto teardown;
if ((vp = unp->unp_vnode) != NULL) {
VOP_UNP_DETACH(vp);
@ -693,8 +694,8 @@ uipc_detach(struct socket *so)
UNP_PCB_UNLOCK(ref);
}
local_unp_rights = unp_rights;
UNP_LINK_WUNLOCK();
teardown:
UNP_LINK_WUNLOCK();
unp->unp_socket->so_pcb = NULL;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
@ -1315,7 +1316,7 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
{
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
struct vnode *vp;
struct socket *so2, *so3;
struct socket *so2;
struct unpcb *unp, *unp2, *unp3;
struct nameidata nd;
char buf[SOCK_MAXADDRLEN];
@ -1392,22 +1393,20 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
error = EPROTOTYPE;
goto bad2;
}
UNP_PCB_LOCK(unp);
UNP_PCB_LOCK(unp2);
if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
if (so2->so_options & SO_ACCEPTCONN) {
CURVNET_SET(so2->so_vnet);
so3 = sonewconn(so2, 0);
so2 = sonewconn(so2, 0);
CURVNET_RESTORE();
} else
so3 = NULL;
if (so3 == NULL) {
so2 = NULL;
if (so2 == NULL) {
error = ECONNREFUSED;
goto bad2;
goto bad3;
}
unp = sotounpcb(so);
unp2 = sotounpcb(so2);
unp3 = sotounpcb(so3);
UNP_PCB_LOCK(unp);
UNP_PCB_LOCK(unp2);
unp3 = sotounpcb(so2);
UNP_PCB_LOCK(unp3);
if (unp2->unp_addr != NULL) {
bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
@ -1433,23 +1432,19 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
unp->unp_flags |= UNP_HAVEPC;
if (unp2->unp_flags & UNP_WANTCRED)
unp3->unp_flags |= UNP_WANTCRED;
UNP_PCB_UNLOCK(unp3);
UNP_PCB_UNLOCK(unp2);
UNP_PCB_UNLOCK(unp);
unp2 = unp3;
#ifdef MAC
mac_socketpeer_set_from_socket(so, so3);
mac_socketpeer_set_from_socket(so3, so);
mac_socketpeer_set_from_socket(so, so2);
mac_socketpeer_set_from_socket(so2, so);
#endif
so2 = so3;
}
unp = sotounpcb(so);
KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
unp2 = sotounpcb(so2);
KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL"));
UNP_PCB_LOCK(unp);
UNP_PCB_LOCK(unp2);
KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
sotounpcb(so2) == unp2,
("%s: unp2 %p so2 %p", __func__, unp2, so2));
error = unp_connect2(so, so2, PRU_CONNECT);
bad3:
UNP_PCB_UNLOCK(unp2);
UNP_PCB_UNLOCK(unp);
bad2:
@ -1591,10 +1586,10 @@ unp_pcblist(SYSCTL_HANDLER_ARGS)
* OK, now we're committed to doing something.
*/
xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
UNP_LIST_LOCK();
UNP_LINK_RLOCK();
gencnt = unp_gencnt;
n = unp_count;
UNP_LIST_UNLOCK();
UNP_LINK_RUNLOCK();
xug->xug_len = sizeof *xug;
xug->xug_count = n;
@ -1608,7 +1603,7 @@ unp_pcblist(SYSCTL_HANDLER_ARGS)
unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
UNP_LIST_LOCK();
UNP_LINK_RLOCK();
for (unp = LIST_FIRST(head), i = 0; unp && i < n;
unp = LIST_NEXT(unp, unp_link)) {
UNP_PCB_LOCK(unp);
@ -1623,7 +1618,7 @@ unp_pcblist(SYSCTL_HANDLER_ARGS)
}
UNP_PCB_UNLOCK(unp);
}
UNP_LIST_UNLOCK();
UNP_LINK_RUNLOCK();
n = i; /* In case we lost some during malloc. */
error = 0;
@ -1881,7 +1876,6 @@ unp_init(void)
TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
UNP_LINK_LOCK_INIT();
UNP_LIST_LOCK_INIT();
UNP_DEFERRED_LOCK_INIT();
}
@ -2232,8 +2226,7 @@ unp_accessable(struct filedescent **fdep, int fdcount)
static void
unp_gc_process(struct unpcb *unp)
{
struct socket *soa;
struct socket *so;
struct socket *so, *soa;
struct file *fp;
/* Already processed. */
@ -2253,28 +2246,30 @@ unp_gc_process(struct unpcb *unp)
return;
}
/*
* Mark all sockets we reference with RIGHTS.
*/
so = unp->unp_socket;
if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
SOCKBUF_LOCK(&so->so_rcv);
unp_scan(so->so_rcv.sb_mb, unp_accessable);
SOCKBUF_UNLOCK(&so->so_rcv);
SOCK_LOCK(so);
if (SOLISTENING(so)) {
/*
* Mark all sockets in our accept queue.
*/
TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
continue;
SOCKBUF_LOCK(&soa->so_rcv);
unp_scan(soa->so_rcv.sb_mb, unp_accessable);
SOCKBUF_UNLOCK(&soa->so_rcv);
}
} else {
/*
* Mark all sockets we reference with RIGHTS.
*/
if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
SOCKBUF_LOCK(&so->so_rcv);
unp_scan(so->so_rcv.sb_mb, unp_accessable);
SOCKBUF_UNLOCK(&so->so_rcv);
}
}
/*
* Mark all sockets in our accept queue.
*/
ACCEPT_LOCK();
TAILQ_FOREACH(soa, &so->so_comp, so_list) {
if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
continue;
SOCKBUF_LOCK(&soa->so_rcv);
unp_scan(soa->so_rcv.sb_mb, unp_accessable);
SOCKBUF_UNLOCK(&soa->so_rcv);
}
ACCEPT_UNLOCK();
SOCK_UNLOCK(so);
unp->unp_gcflag |= UNPGC_SCANNED;
}
@ -2297,7 +2292,7 @@ unp_gc(__unused void *arg, int pending)
int i, total;
unp_taskcount++;
UNP_LIST_LOCK();
UNP_LINK_RLOCK();
/*
* First clear all gc flags from previous runs, apart from
* UNPGC_IGNORE_RIGHTS.
@ -2320,7 +2315,7 @@ unp_gc(__unused void *arg, int pending)
LIST_FOREACH(unp, *head, unp_link)
unp_gc_process(unp);
} while (unp_marked);
UNP_LIST_UNLOCK();
UNP_LINK_RUNLOCK();
if (unp_unreachable == 0)
return;
@ -2335,7 +2330,6 @@ unp_gc(__unused void *arg, int pending)
* as as unreachable and store them locally.
*/
UNP_LINK_RLOCK();
UNP_LIST_LOCK();
for (total = 0, head = heads; *head != NULL; head++)
LIST_FOREACH(unp, *head, unp_link)
if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
@ -2348,7 +2342,6 @@ unp_gc(__unused void *arg, int pending)
KASSERT(total <= unp_unreachable,
("unp_gc: incorrect unreachable count."));
}
UNP_LIST_UNLOCK();
UNP_LINK_RUNLOCK();
/*
@ -2391,10 +2384,11 @@ unp_dispose(struct socket *so)
struct unpcb *unp;
unp = sotounpcb(so);
UNP_LIST_LOCK();
UNP_LINK_WLOCK();
unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
UNP_LIST_UNLOCK();
unp_dispose_mbuf(so->so_rcv.sb_mb);
UNP_LINK_WUNLOCK();
if (!SOLISTENING(so))
unp_dispose_mbuf(so->so_rcv.sb_mb);
}
static void

View File

@ -614,21 +614,13 @@ ng_btsocket_l2cap_process_l2ca_con_ind(struct ng_mesg *msg,
pcb = ng_btsocket_l2cap_pcb_by_addr(&rt->src, ip->psm);
if (pcb != NULL) {
struct socket *so1 = NULL;
struct socket *so1;
mtx_lock(&pcb->pcb_mtx);
/*
* First check the pending connections queue and if we have
* space then create new socket and set proper source address.
*/
if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
CURVNET_SET(pcb->so->so_vnet);
so1 = sonewconn(pcb->so, 0);
CURVNET_RESTORE();
}
CURVNET_SET(pcb->so->so_vnet);
so1 = sonewconn(pcb->so, 0);
CURVNET_RESTORE();
if (so1 == NULL) {
result = NG_L2CAP_NO_RESOURCES;
goto respond;

View File

@ -1149,7 +1149,7 @@ ng_btsocket_rfcomm_connect_ind(ng_btsocket_rfcomm_session_p s, int channel)
{
ng_btsocket_rfcomm_pcb_p pcb = NULL, pcb1 = NULL;
ng_btsocket_l2cap_pcb_p l2pcb = NULL;
struct socket *so1 = NULL;
struct socket *so1;
mtx_assert(&s->session_mtx, MA_OWNED);
@ -1171,11 +1171,9 @@ ng_btsocket_rfcomm_connect_ind(ng_btsocket_rfcomm_session_p s, int channel)
mtx_lock(&pcb->pcb_mtx);
if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
CURVNET_SET(pcb->so->so_vnet);
so1 = sonewconn(pcb->so, 0);
CURVNET_RESTORE();
}
CURVNET_SET(pcb->so->so_vnet);
so1 = sonewconn(pcb->so, 0);
CURVNET_RESTORE();
mtx_unlock(&pcb->pcb_mtx);
@ -1405,47 +1403,25 @@ ng_btsocket_rfcomm_session_create(ng_btsocket_rfcomm_session_p *sp,
static int
ng_btsocket_rfcomm_session_accept(ng_btsocket_rfcomm_session_p s0)
{
struct socket *l2so = NULL;
struct socket *l2so;
struct sockaddr_l2cap *l2sa = NULL;
ng_btsocket_l2cap_pcb_t *l2pcb = NULL;
ng_btsocket_rfcomm_session_p s = NULL;
int error = 0;
int error;
mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
mtx_assert(&s0->session_mtx, MA_OWNED);
/* Check if there is a complete L2CAP connection in the queue */
if ((error = s0->l2so->so_error) != 0) {
SOLISTEN_LOCK(s0->l2so);
error = solisten_dequeue(s0->l2so, &l2so, 0);
if (error == EWOULDBLOCK)
return (error);
if (error) {
NG_BTSOCKET_RFCOMM_ERR(
"%s: Could not accept connection on L2CAP socket, error=%d\n", __func__, error);
s0->l2so->so_error = 0;
return (error);
}
ACCEPT_LOCK();
if (TAILQ_EMPTY(&s0->l2so->so_comp)) {
ACCEPT_UNLOCK();
if (s0->l2so->so_rcv.sb_state & SBS_CANTRCVMORE)
return (ECONNABORTED);
return (EWOULDBLOCK);
}
/* Accept incoming L2CAP connection */
l2so = TAILQ_FIRST(&s0->l2so->so_comp);
if (l2so == NULL)
panic("%s: l2so == NULL\n", __func__);
TAILQ_REMOVE(&s0->l2so->so_comp, l2so, so_list);
s0->l2so->so_qlen --;
l2so->so_qstate &= ~SQ_COMP;
l2so->so_head = NULL;
SOCK_LOCK(l2so);
soref(l2so);
l2so->so_state |= SS_NBIO;
SOCK_UNLOCK(l2so);
ACCEPT_UNLOCK();
error = soaccept(l2so, (struct sockaddr **) &l2sa);
if (error != 0) {
NG_BTSOCKET_RFCOMM_ERR(

View File

@ -471,20 +471,13 @@ ng_btsocket_sco_process_lp_con_ind(struct ng_mesg *msg,
pcb = ng_btsocket_sco_pcb_by_addr(&rt->src);
if (pcb != NULL) {
struct socket *so1 = NULL;
struct socket *so1;
/* pcb is locked */
/*
* First check the pending connections queue and if we have
* space then create new socket and set proper source address.
*/
if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
CURVNET_SET(pcb->so->so_vnet);
so1 = sonewconn(pcb->so, 0);
CURVNET_RESTORE();
}
CURVNET_SET(pcb->so->so_vnet);
so1 = sonewconn(pcb->so, 0);
CURVNET_RESTORE();
if (so1 == NULL) {
status = 0x0d; /* Rejected due to limited resources */

View File

@ -153,8 +153,7 @@ static const struct ng_ksocket_alias ng_ksocket_protos[] = {
};
/* Helper functions */
static int ng_ksocket_check_accept(priv_p);
static void ng_ksocket_finish_accept(priv_p);
static int ng_ksocket_accept(priv_p);
static int ng_ksocket_incoming(struct socket *so, void *arg, int waitflag);
static int ng_ksocket_parse(const struct ng_ksocket_alias *aliases,
const char *s, int family);
@ -698,6 +697,7 @@ ng_ksocket_rcvmsg(node_p node, item_p item, hook_p lasthook)
ERROUT(ENXIO);
/* Listen */
so->so_state |= SS_NBIO;
error = solisten(so, *((int32_t *)msg->data), td);
break;
}
@ -716,21 +716,16 @@ ng_ksocket_rcvmsg(node_p node, item_p item, hook_p lasthook)
if (priv->flags & KSF_ACCEPTING)
ERROUT(EALREADY);
error = ng_ksocket_check_accept(priv);
if (error != 0 && error != EWOULDBLOCK)
ERROUT(error);
/*
* If a connection is already complete, take it.
* Otherwise let the upcall function deal with
* the connection when it comes in.
*/
error = ng_ksocket_accept(priv);
if (error != 0 && error != EWOULDBLOCK)
ERROUT(error);
priv->response_token = msg->header.token;
raddr = priv->response_addr = NGI_RETADDR(item);
if (error == 0) {
ng_ksocket_finish_accept(priv);
} else
priv->flags |= KSF_ACCEPTING;
break;
}
@ -1068,13 +1063,8 @@ ng_ksocket_incoming2(node_p node, hook_p hook, void *arg1, int arg2)
}
/* Check whether a pending accept operation has completed */
if (priv->flags & KSF_ACCEPTING) {
error = ng_ksocket_check_accept(priv);
if (error != EWOULDBLOCK)
priv->flags &= ~KSF_ACCEPTING;
if (error == 0)
ng_ksocket_finish_accept(priv);
}
if (priv->flags & KSF_ACCEPTING)
(void )ng_ksocket_accept(priv);
/*
* If we don't have a hook, we must handle data events later. When
@ -1171,35 +1161,8 @@ ng_ksocket_incoming2(node_p node, hook_p hook, void *arg1, int arg2)
}
}
/*
* Check for a completed incoming connection and return 0 if one is found.
* Otherwise return the appropriate error code.
*/
static int
ng_ksocket_check_accept(priv_p priv)
{
struct socket *const head = priv->so;
int error;
if ((error = head->so_error) != 0) {
head->so_error = 0;
return error;
}
/* Unlocked read. */
if (TAILQ_EMPTY(&head->so_comp)) {
if (head->so_rcv.sb_state & SBS_CANTRCVMORE)
return ECONNABORTED;
return EWOULDBLOCK;
}
return 0;
}
/*
* Handle the first completed incoming connection, assumed to be already
* on the socket's so_comp queue.
*/
static void
ng_ksocket_finish_accept(priv_p priv)
ng_ksocket_accept(priv_p priv)
{
struct socket *const head = priv->so;
struct socket *so;
@ -1211,23 +1174,15 @@ ng_ksocket_finish_accept(priv_p priv)
int len;
int error;
ACCEPT_LOCK();
so = TAILQ_FIRST(&head->so_comp);
if (so == NULL) { /* Should never happen */
ACCEPT_UNLOCK();
return;
SOLISTEN_LOCK(head);
error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
if (error == EWOULDBLOCK) {
priv->flags |= KSF_ACCEPTING;
return (error);
}
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
SOCK_LOCK(so);
soref(so);
so->so_state |= SS_NBIO;
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
/* XXX KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); */
priv->flags &= ~KSF_ACCEPTING;
if (error)
return (error);
soaccept(so, &sa);
@ -1288,6 +1243,8 @@ ng_ksocket_finish_accept(priv_p priv)
out:
if (sa != NULL)
free(sa, M_SONAME);
return (0);
}
/*

View File

@ -5200,11 +5200,21 @@ sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
* listening responded to a INIT-ACK and then
* closed. We opened and bound.. and are now no
* longer listening.
*
* XXXGL: notes on checking listen queue length.
* 1) SCTP_IS_LISTENING() doesn't necessarily mean
* SOLISTENING(), because a listening "UDP type"
* socket isn't listening in terms of the socket
* layer. It is a normal data flow socket, that
* can fork off new connections. Thus, we should
* look into sol_qlen only in case we are !UDP.
* 2) Checking sol_qlen in general requires locking
* the socket, and this code lacks that.
*/
if ((stcb == NULL) &&
(!SCTP_IS_LISTENING(inp) ||
inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
(!(inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) &&
inp->sctp_socket->sol_qlen >= inp->sctp_socket->sol_qlimit))) {
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) {
op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");

View File

@ -152,29 +152,11 @@ sys_sctp_peeloff(td, uap)
td->td_retval[0] = fd;
CURVNET_SET(head->so_vnet);
so = sonewconn(head, SS_ISCONNECTED);
so = sopeeloff(head);
if (so == NULL) {
error = ENOMEM;
goto noconnection;
}
/*
* Before changing the flags on the socket, we have to bump the
* reference count. Otherwise, if the protocol calls sofree(),
* the socket will be released due to a zero refcount.
*/
SOCK_LOCK(so);
soref(so); /* file descriptor reference */
SOCK_UNLOCK(so);
ACCEPT_LOCK();
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
so->so_state |= (head->so_state & SS_NBIO);
so->so_state &= ~SS_NOFDREF;
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
ACCEPT_UNLOCK();
finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
if (error != 0)

View File

@ -415,12 +415,12 @@ sctp_sysctl_handle_assoclist(SYSCTL_HANDLER_ARGS)
xinpcb.qlen = 0;
xinpcb.maxqlen = 0;
} else {
xinpcb.qlen = so->so_qlen;
xinpcb.qlen_old = so->so_qlen > USHRT_MAX ?
USHRT_MAX : (uint16_t)so->so_qlen;
xinpcb.maxqlen = so->so_qlimit;
xinpcb.maxqlen_old = so->so_qlimit > USHRT_MAX ?
USHRT_MAX : (uint16_t)so->so_qlimit;
xinpcb.qlen = so->sol_qlen;
xinpcb.qlen_old = so->sol_qlen > USHRT_MAX ?
USHRT_MAX : (uint16_t)so->sol_qlen;
xinpcb.maxqlen = so->sol_qlimit;
xinpcb.maxqlen_old = so->sol_qlimit > USHRT_MAX ?
USHRT_MAX : (uint16_t)so->sol_qlimit;
}
SCTP_INP_INCR_REF(inp);
SCTP_INP_RUNLOCK(inp);

View File

@ -7138,19 +7138,12 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
}
}
SCTP_INP_WLOCK(inp);
SOCK_LOCK(so);
/* It appears for 7.0 and on, we must always call this. */
solisten_proto(so, backlog);
if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
/* remove the ACCEPTCONN flag for one-to-many sockets */
so->so_options &= ~SO_ACCEPTCONN;
if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) {
SOCK_LOCK(so);
solisten_proto(so, backlog);
SOCK_UNLOCK(so);
}
if (backlog > 0) {
inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
} else {
inp->sctp_flags &= ~SCTP_PCB_FLAGS_ACCEPTING;
}
SOCK_UNLOCK(so);
inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
SCTP_INP_WUNLOCK(inp);
return (error);
}

View File

@ -1664,7 +1664,6 @@ tcp_close(struct tcpcb *tp)
("tcp_close: !SS_PROTOREF"));
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);

View File

@ -1264,6 +1264,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* soon as possible.
*/
so = *lsop;
KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
tp = sototcpcb(so);
cred = crhold(so->so_cred);
@ -1274,7 +1275,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
#endif
ip_ttl = inp->inp_ip_ttl;
ip_tos = inp->inp_ip_tos;
win = sbspace(&so->so_rcv);
win = so->sol_sbrcv_hiwat;
ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
#ifdef TCP_RFC7413
@ -1287,7 +1288,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
* listen queue with bogus TFO connections.
*/
if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
(so->so_qlimit / 2)) {
(so->sol_qlimit / 2)) {
int result;
result = tcp_fastopen_check_cookie(inc,
@ -2115,7 +2116,7 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch,
sc->sc_flags |= SCF_WINSCALE;
}
wnd = sbspace(&lso->so_rcv);
wnd = lso->sol_sbrcv_hiwat;
wnd = imax(wnd, 0);
wnd = imin(wnd, TCP_MAXWIN);
sc->sc_wnd = wnd;

View File

@ -352,7 +352,6 @@ tcp_twstart(struct tcpcb *tp)
("tcp_twstart: !SS_PROTOREF"));
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);
@ -491,7 +490,6 @@ tcp_twclose(struct tcptw *tw, int reuse)
if (inp->inp_flags & INP_SOCKREF) {
inp->inp_flags &= ~INP_SOCKREF;
INP_WUNLOCK(inp);
ACCEPT_LOCK();
SOCK_LOCK(so);
KASSERT(so->so_state & SS_PROTOREF,
("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));

View File

@ -416,34 +416,19 @@ dequeue_socket(struct socket *head)
{
struct socket *so;
struct sockaddr_in *remote;
int error;
ACCEPT_LOCK();
so = TAILQ_FIRST(&head->so_comp);
if (!so) {
ACCEPT_UNLOCK();
return NULL;
}
SOCK_LOCK(so);
/*
* Before changing the flags on the socket, we have to bump the
* reference count. Otherwise, if the protocol calls sofree(),
* the socket will be released due to a zero refcount.
*/
soref(so);
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
so->so_state |= SS_NBIO;
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
SOLISTEN_LOCK(head);
error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
if (error == EWOULDBLOCK)
return (NULL);
remote = NULL;
soaccept(so, (struct sockaddr **)&remote);
free(remote, M_SONAME);
return so;
}
static void
iw_so_event_handler(struct work_struct *_work)
{
@ -485,18 +470,17 @@ iw_so_event_handler(struct work_struct *_work)
#endif
return;
}
static int
iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
{
struct iwcm_listen_work *work;
struct socket *so;
struct iw_cm_id *cm_id = arg;
/* check whether iw_so_event_handler() already dequeued this 'so' */
so = TAILQ_FIRST(&parent_so->so_comp);
if (!so)
if (TAILQ_EMPTY(&parent_so->sol_comp))
return SU_OK;
work = kzalloc(sizeof(*work), M_NOWAIT);
work = kzalloc(sizeof(*work), waitflag);
if (!work)
return -ENOMEM;
work->cm_id = cm_id;
@ -507,17 +491,21 @@ iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
return SU_OK;
}
static void
iw_init_sock(struct iw_cm_id *cm_id)
static int
iw_create_listen(struct iw_cm_id *cm_id, int backlog)
{
struct sockopt sopt;
struct socket *so = cm_id->so;
int on = 1;
int rc;
SOCK_LOCK(so);
soupcall_set(so, SO_RCV, iw_so_upcall, cm_id);
rc = -solisten(cm_id->so, backlog, curthread);
if (rc != 0)
return (rc);
SOLISTEN_LOCK(so);
solisten_upcall_set(so, iw_so_upcall, cm_id);
so->so_state |= SS_NBIO;
SOCK_UNLOCK(so);
SOLISTEN_UNLOCK(so);
sopt.sopt_dir = SOPT_SET;
sopt.sopt_level = IPPROTO_TCP;
sopt.sopt_name = TCP_NODELAY;
@ -525,37 +513,18 @@ iw_init_sock(struct iw_cm_id *cm_id)
sopt.sopt_valsize = sizeof(on);
sopt.sopt_td = NULL;
sosetopt(so, &sopt);
}
static int
iw_uninit_socket(struct iw_cm_id *cm_id)
{
struct socket *so = cm_id->so;
SOCK_LOCK(so);
soupcall_clear(so, SO_RCV);
SOCK_UNLOCK(so);
return (0);
}
static int
iw_create_listen(struct iw_cm_id *cm_id, int backlog)
{
int rc;
iw_init_sock(cm_id);
rc = -solisten(cm_id->so, backlog, curthread);
if (rc != 0)
iw_uninit_socket(cm_id);
return (rc);
}
static int
iw_destroy_listen(struct iw_cm_id *cm_id)
{
struct socket *so = cm_id->so;
return (iw_uninit_socket(cm_id));
SOLISTEN_LOCK(so);
solisten_upcall_set(so, NULL, NULL);
SOLISTEN_UNLOCK(so);
return (0);
}

View File

@ -310,7 +310,6 @@ sdp_closed(struct sdp_sock *ssk)
("sdp_closed: !SS_PROTOREF"));
ssk->flags &= ~SDP_SOCKREF;
SDP_WUNLOCK(ssk);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state &= ~SS_PROTOREF;
sofree(so);

View File

@ -96,6 +96,7 @@ static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so,
struct sockaddr *raddr);
static int svc_vc_accept(struct socket *head, struct socket **sop);
static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
static int svc_vc_rendezvous_soupcall(struct socket *, void *, int);
static struct xp_ops svc_vc_rendezvous_ops = {
.xp_recv = svc_vc_rendezvous_recv,
@ -183,10 +184,10 @@ svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize,
solisten(so, -1, curthread);
SOCKBUF_LOCK(&so->so_rcv);
SOLISTEN_LOCK(so);
xprt->xp_upcallset = 1;
soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
SOCKBUF_UNLOCK(&so->so_rcv);
solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt);
SOLISTEN_UNLOCK(so);
return (xprt);
@ -316,9 +317,11 @@ svc_vc_create_backchannel(SVCPOOL *pool)
int
svc_vc_accept(struct socket *head, struct socket **sop)
{
int error = 0;
struct socket *so;
int error = 0;
short nbio;
/* XXXGL: shouldn't that be an assertion? */
if ((head->so_options & SO_ACCEPTCONN) == 0) {
error = EINVAL;
goto done;
@ -328,38 +331,26 @@ svc_vc_accept(struct socket *head, struct socket **sop)
if (error != 0)
goto done;
#endif
ACCEPT_LOCK();
if (TAILQ_EMPTY(&head->so_comp)) {
ACCEPT_UNLOCK();
error = EWOULDBLOCK;
goto done;
}
so = TAILQ_FIRST(&head->so_comp);
KASSERT(!(so->so_qstate & SQ_INCOMP), ("svc_vc_accept: so SQ_INCOMP"));
KASSERT(so->so_qstate & SQ_COMP, ("svc_vc_accept: so not SQ_COMP"));
/*
* Before changing the flags on the socket, we have to bump the
* reference count. Otherwise, if the protocol calls sofree(),
* the socket will be released due to a zero refcount.
* XXX might not need soref() since this is simpler than kern_accept.
* XXXGL: we want non-blocking semantics. The socket could be a
* socket created by kernel as well as socket shared with userland,
* so we can't be sure about presense of SS_NBIO. We also shall not
* toggle it on the socket, since that may surprise userland. So we
* set SS_NBIO only temporarily.
*/
SOCK_LOCK(so); /* soref() and so_state update */
soref(so); /* file descriptor reference */
TAILQ_REMOVE(&head->so_comp, so, so_list);
head->so_qlen--;
so->so_state |= (head->so_state & SS_NBIO);
so->so_qstate &= ~SQ_COMP;
so->so_head = NULL;
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
SOLISTEN_LOCK(head);
nbio = head->so_state & SS_NBIO;
head->so_state |= SS_NBIO;
error = solisten_dequeue(head, &so, 0);
head->so_state &= (nbio & ~SS_NBIO);
if (error)
goto done;
so->so_state |= nbio;
*sop = so;
/* connection has been removed from the listen queue */
KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
done:
return (error);
}
@ -392,21 +383,21 @@ svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg,
* connection arrives after our call to accept fails
* with EWOULDBLOCK.
*/
ACCEPT_LOCK();
if (TAILQ_EMPTY(&xprt->xp_socket->so_comp))
SOLISTEN_LOCK(xprt->xp_socket);
if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp))
xprt_inactive_self(xprt);
ACCEPT_UNLOCK();
SOLISTEN_UNLOCK(xprt->xp_socket);
sx_xunlock(&xprt->xp_lock);
return (FALSE);
}
if (error) {
SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
SOLISTEN_LOCK(xprt->xp_socket);
if (xprt->xp_upcallset) {
xprt->xp_upcallset = 0;
soupcall_clear(xprt->xp_socket, SO_RCV);
}
SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
SOLISTEN_UNLOCK(xprt->xp_socket);
xprt_inactive_self(xprt);
sx_xunlock(&xprt->xp_lock);
return (FALSE);
@ -453,12 +444,6 @@ svc_vc_rendezvous_stat(SVCXPRT *xprt)
static void
svc_vc_destroy_common(SVCXPRT *xprt)
{
SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
if (xprt->xp_upcallset) {
xprt->xp_upcallset = 0;
soupcall_clear(xprt->xp_socket, SO_RCV);
}
SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
if (xprt->xp_socket)
(void)soclose(xprt->xp_socket);
@ -472,6 +457,13 @@ static void
svc_vc_rendezvous_destroy(SVCXPRT *xprt)
{
SOLISTEN_LOCK(xprt->xp_socket);
if (xprt->xp_upcallset) {
xprt->xp_upcallset = 0;
solisten_upcall_set(xprt->xp_socket, NULL, NULL);
}
SOLISTEN_UNLOCK(xprt->xp_socket);
svc_vc_destroy_common(xprt);
}
@ -480,6 +472,13 @@ svc_vc_destroy(SVCXPRT *xprt)
{
struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
if (xprt->xp_upcallset) {
xprt->xp_upcallset = 0;
soupcall_clear(xprt->xp_socket, SO_RCV);
}
SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
svc_vc_destroy_common(xprt);
if (cd->mreq)
@ -958,6 +957,16 @@ svc_vc_soupcall(struct socket *so, void *arg, int waitflag)
return (SU_OK);
}
static int
svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag)
{
SVCXPRT *xprt = (SVCXPRT *) arg;
if (!TAILQ_EMPTY(&head->sol_comp))
xprt_active(xprt);
return (SU_OK);
}
#if 0
/*
* Get the effective UID of the sending process. Used by rpcbind, keyserv

View File

@ -32,7 +32,6 @@
*/
#ifndef _SYS_SOCKBUF_H_
#define _SYS_SOCKBUF_H_
#include <sys/selinfo.h> /* for struct selinfo */
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_sx.h>
@ -64,6 +63,7 @@ struct mbuf;
struct sockaddr;
struct socket;
struct thread;
struct selinfo;
struct xsockbuf {
u_int sb_cc;
@ -84,9 +84,9 @@ struct xsockbuf {
* (a) locked by SOCKBUF_LOCK().
*/
struct sockbuf {
struct selinfo sb_sel; /* process selecting read/write */
struct mtx sb_mtx; /* sockbuf lock */
struct sx sb_sx; /* prevent I/O interlacing */
struct mtx sb_mtx; /* sockbuf lock */
struct sx sb_sx; /* prevent I/O interlacing */
struct selinfo *sb_sel; /* process selecting read/write */
short sb_state; /* (a) socket state on sockbuf */
#define sb_startzero sb_mb
struct mbuf *sb_mb; /* (a) the mbuf chain */

View File

@ -111,7 +111,15 @@ typedef __uintptr_t uintptr_t;
*/
#define SOCK_CLOEXEC 0x10000000
#define SOCK_NONBLOCK 0x20000000
#endif
#ifdef _KERNEL
/*
* Flags for accept1(), kern_accept4() and solisten_dequeue, in addition
* to SOCK_CLOEXEC and SOCK_NONBLOCK.
*/
#define ACCEPT4_INHERIT 0x1
#define ACCEPT4_COMPAT 0x2
#endif /* _KERNEL */
#endif /* __BSD_VISIBLE */
/*
* Option flags per-socket.
@ -704,9 +712,5 @@ void so_sowwakeup(struct socket *so);
void so_lock(struct socket *so);
void so_unlock(struct socket *so);
void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg);
#endif
#endif /* _KERNEL */
#endif /* !_SYS_SOCKET_H_ */

View File

@ -64,60 +64,35 @@ struct socket;
* Locking key to struct socket:
* (a) constant after allocation, no locking required.
* (b) locked by SOCK_LOCK(so).
* (c) locked by SOCKBUF_LOCK(&so->so_rcv).
* (e) locked by ACCEPT_LOCK().
* (cr) locked by SOCKBUF_LOCK(&so->so_rcv).
* (cs) locked by SOCKBUF_LOCK(&so->so_rcv).
* (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
* (f) not locked since integer reads/writes are atomic.
* (g) used only as a sleep/wakeup address, no value.
* (h) locked by global mutex so_global_mtx.
*/
TAILQ_HEAD(accept_queue, socket);
struct socket {
int so_count; /* (b) reference count */
struct mtx so_lock;
volatile u_int so_count; /* (b / refcount) */
struct selinfo so_rdsel; /* (b/cr) for so_rcv/so_comp */
struct selinfo so_wrsel; /* (b/cs) for so_snd */
short so_type; /* (a) generic type, see socket.h */
short so_options; /* from socket call, see socket.h */
short so_linger; /* time to linger while closing */
short so_options; /* (b) from socket call, see socket.h */
short so_linger; /* time to linger close(2) */
short so_state; /* (b) internal state flags SS_* */
int so_qstate; /* (e) internal state flags SQ_* */
void *so_pcb; /* protocol control block */
struct vnet *so_vnet; /* (a) network stack instance */
struct protosw *so_proto; /* (a) protocol handle */
/*
* Variables for connection queuing.
* Socket where accepts occur is so_head in all subsidiary sockets.
* If so_head is 0, socket is not related to an accept.
* For head socket so_incomp queues partially completed connections,
* while so_comp is a queue of connections ready to be accepted.
* If a connection is aborted and it has so_head set, then
* it has to be pulled out of either so_incomp or so_comp.
* We allow connections to queue up based on current queue lengths
* and limit on number of queued connections for this socket.
*/
struct socket *so_head; /* (e) back pointer to listen socket */
TAILQ_HEAD(, socket) so_incomp; /* (e) queue of partial unaccepted connections */
TAILQ_HEAD(, socket) so_comp; /* (e) queue of complete unaccepted connections */
TAILQ_ENTRY(socket) so_list; /* (e) list of unaccepted connections */
u_int so_qlen; /* (e) number of unaccepted connections */
u_int so_incqlen; /* (e) number of unaccepted incomplete
connections */
u_int so_qlimit; /* (e) max number queued connections */
short so_timeo; /* (g) connection timeout */
u_short so_error; /* (f) error affecting connection */
struct sigio *so_sigio; /* [sg] information for async I/O or
out of band data (SIGURG) */
u_long so_oobmark; /* (c) chars to oob mark */
struct sockbuf so_rcv, so_snd;
struct ucred *so_cred; /* (a) user credentials */
struct label *so_label; /* (b) MAC label for socket */
struct label *so_peerlabel; /* (b) cached MAC label for peer */
/* NB: generation count must not be first. */
so_gen_t so_gencnt; /* (h) generation count */
void *so_emuldata; /* (b) private data for emulators */
struct so_accf {
struct accept_filter *so_accept_filter;
void *so_accept_filter_arg; /* saved filter args */
char *so_accept_filter_str; /* saved user args */
} *so_accf;
struct osd osd; /* Object Specific extensions */
/*
* so_fibnum, so_user_cookie and friends can be used to attach
@ -130,39 +105,93 @@ struct socket {
int so_ts_clock; /* type of the clock used for timestamps */
uint32_t so_max_pacing_rate; /* (f) TX rate limit in bytes/s */
union {
/* Regular (data flow) socket. */
struct {
/* (cr, cs) Receive and send buffers. */
struct sockbuf so_rcv, so_snd;
void *so_pspare[2]; /* general use */
int so_ispare[2]; /* general use */
/* (e) Our place on accept queue. */
TAILQ_ENTRY(socket) so_list;
struct socket *so_listen; /* (b) */
enum {
SQ_NONE = 0,
SQ_INCOMP = 0x0800, /* on sol_incomp */
SQ_COMP = 0x1000, /* on sol_comp */
} so_qstate; /* (b) */
/* (b) cached MAC label for peer */
struct label *so_peerlabel;
u_long so_oobmark; /* chars to oob mark */
};
/*
* Listening socket, where accepts occur, is so_listen in all
* subsidiary sockets. If so_listen is NULL, socket is not
* related to an accept. For a listening socket itself
* sol_incomp queues partially completed connections, while
* sol_comp is a queue of connections ready to be accepted.
* If a connection is aborted and it has so_listen set, then
* it has to be pulled out of either sol_incomp or sol_comp.
* We allow connections to queue up based on current queue
* lengths and limit on number of queued connections for this
* socket.
*/
struct {
/* (e) queue of partial unaccepted connections */
struct accept_queue sol_incomp;
/* (e) queue of complete unaccepted connections */
struct accept_queue sol_comp;
u_int sol_qlen; /* (e) sol_comp length */
u_int sol_incqlen; /* (e) sol_incomp length */
u_int sol_qlimit; /* (e) queue limit */
/* accept_filter(9) optional data */
struct accept_filter *sol_accept_filter;
void *sol_accept_filter_arg; /* saved filter args */
char *sol_accept_filter_str; /* saved user args */
/* Optional upcall, for kernel socket. */
so_upcall_t *sol_upcall; /* (e) */
void *sol_upcallarg; /* (e) */
/* Socket buffer parameters, to be copied to
* dataflow sockets, accepted from this one. */
int sol_sbrcv_lowat;
int sol_sbsnd_lowat;
u_int sol_sbrcv_hiwat;
u_int sol_sbsnd_hiwat;
short sol_sbrcv_flags;
short sol_sbsnd_flags;
sbintime_t sol_sbrcv_timeo;
sbintime_t sol_sbsnd_timeo;
};
};
};
/*
* Global accept mutex to serialize access to accept queues and
* fields associated with multiple sockets. This allows us to
* avoid defining a lock order between listen and accept sockets
* until such time as it proves to be a good idea.
*/
extern struct mtx accept_mtx;
#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED)
#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED)
#define ACCEPT_LOCK() mtx_lock(&accept_mtx)
#define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx)
#define SOCK_MTX(so) &(so)->so_lock
#define SOCK_LOCK(so) mtx_lock(&(so)->so_lock)
#define SOCK_OWNED(so) mtx_owned(&(so)->so_lock)
#define SOCK_UNLOCK(so) mtx_unlock(&(so)->so_lock)
#define SOCK_LOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_OWNED)
#define SOCK_UNLOCK_ASSERT(so) mtx_assert(&(so)->so_lock, MA_NOTOWNED)
/*
* Per-socket mutex: we reuse the receive socket buffer mutex for space
* efficiency. This decision should probably be revisited as we optimize
* locking for the socket code.
*/
#define SOCK_MTX(_so) SOCKBUF_MTX(&(_so)->so_rcv)
#define SOCK_LOCK(_so) SOCKBUF_LOCK(&(_so)->so_rcv)
#define SOCK_OWNED(_so) SOCKBUF_OWNED(&(_so)->so_rcv)
#define SOCK_UNLOCK(_so) SOCKBUF_UNLOCK(&(_so)->so_rcv)
#define SOCK_LOCK_ASSERT(_so) SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
/*
* Socket state bits stored in so_qstate.
*/
#define SQ_INCOMP 0x0800 /* unaccepted, incomplete connection */
#define SQ_COMP 0x1000 /* unaccepted, complete connection */
#define SOLISTENING(sol) (((sol)->so_options & SO_ACCEPTCONN) != 0)
#define SOLISTEN_LOCK(sol) do { \
mtx_lock(&(sol)->so_lock); \
KASSERT(SOLISTENING(sol), \
("%s: %p not listening", __func__, (sol))); \
} while (0)
#define SOLISTEN_TRYLOCK(sol) mtx_trylock(&(sol)->so_lock)
#define SOLISTEN_UNLOCK(sol) do { \
KASSERT(SOLISTENING(sol), \
("%s: %p not listening", __func__, (sol))); \
mtx_unlock(&(sol)->so_lock); \
} while (0)
#define SOLISTEN_LOCK_ASSERT(sol) do { \
mtx_assert(&(sol)->so_lock, MA_OWNED); \
KASSERT(SOLISTENING(sol), \
("%s: %p not listening", __func__, (sol))); \
} while (0)
/*
* Externalized form of struct socket used by the sysctl(3) interface.
@ -213,8 +242,7 @@ struct xsocket {
/* can we read something from so? */
#define soreadabledata(so) \
(sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \
!TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
(sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || (so)->so_error)
#define soreadable(so) \
(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
@ -227,26 +255,19 @@ struct xsocket {
(so)->so_error)
/*
* soref()/sorele() ref-count the socket structure. Note that you must
* still explicitly close the socket, but the last ref count will free
* the structure.
* soref()/sorele() ref-count the socket structure.
* soref() may be called without owning socket lock, but in that case a
* caller must own something that holds socket, and so_count must be not 0.
* Note that you must still explicitly close the socket, but the last ref
* count will free the structure.
*/
#define soref(so) do { \
SOCK_LOCK_ASSERT(so); \
++(so)->so_count; \
} while (0)
#define soref(so) refcount_acquire(&(so)->so_count)
#define sorele(so) do { \
ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \
if ((so)->so_count <= 0) \
panic("sorele"); \
if (--(so)->so_count == 0) \
if (refcount_release(&(so)->so_count)) \
sofree(so); \
else { \
else \
SOCK_UNLOCK(so); \
ACCEPT_UNLOCK(); \
} \
} while (0)
/*
@ -369,10 +390,11 @@ void sohasoutofband(struct socket *so);
int solisten(struct socket *so, int backlog, struct thread *td);
void solisten_proto(struct socket *so, int backlog);
int solisten_proto_check(struct socket *so);
int solisten_dequeue(struct socket *, struct socket **, int);
struct socket *
sonewconn(struct socket *head, int connstatus);
struct socket *
sopeeloff(struct socket *);
int sopoll(struct socket *so, int events, struct ucred *active_cred,
struct thread *td);
int sopoll_generic(struct socket *so, int events,
@ -403,8 +425,10 @@ int soshutdown(struct socket *so, int how);
void sotoxsocket(struct socket *so, struct xsocket *xso);
void soupcall_clear(struct socket *, int);
void soupcall_set(struct socket *, int, so_upcall_t, void *);
void solisten_upcall_set(struct socket *, so_upcall_t, void *);
void sowakeup(struct socket *so, struct sockbuf *sb);
void sowakeup_aio(struct socket *so, struct sockbuf *sb);
void solisten_wakeup(struct socket *);
int selsocket(struct socket *so, int events, struct timeval *tv,
struct thread *td);

View File

@ -170,14 +170,17 @@ sotoxsocket(struct socket *so, struct xsocket *xso)
if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0)
return (-1);
xso->xso_family = domain.dom_family;
xso->so_qlen = so->so_qlen;
xso->so_incqlen = so->so_incqlen;
xso->so_qlimit = so->so_qlimit;
xso->so_timeo = so->so_timeo;
xso->so_error = so->so_error;
xso->so_oobmark = so->so_oobmark;
sbtoxsockbuf(&so->so_snd, &xso->so_snd);
sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
if (SOLISTENING(so)) {
xso->so_qlen = so->sol_qlen;
xso->so_incqlen = so->sol_incqlen;
xso->so_qlimit = so->sol_qlimit;
} else {
sbtoxsockbuf(&so->so_snd, &xso->so_snd);
sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
xso->so_oobmark = so->so_oobmark;
}
return (0);
}