1c7c3c6a86
changes to the VM system to support the new swapper, VM bug fixes, several VM optimizations, and some additional revamping of the VM code. The specific bug fixes will be documented with additional forced commits. This commit is somewhat rough in regards to code cleanup issues. Reviewed by: "John S. Dyson" <root@dyson.iquest.net>, "David Greenman" <dg@root.com>
1187 lines
28 KiB
C
1187 lines
28 KiB
C
/*
|
|
* Copyright (c) 1982, 1986, 1989, 1991, 1993
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the University of
|
|
* California, Berkeley and its contributors.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
|
|
* $Id: uipc_usrreq.c,v 1.37 1998/10/25 17:44:51 phk Exp $
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/domain.h>
|
|
#include <sys/fcntl.h>
|
|
#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
|
|
#include <sys/file.h>
|
|
#include <sys/filedesc.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mbuf.h>
|
|
#include <sys/namei.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/protosw.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/socketvar.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/un.h>
|
|
#include <sys/unpcb.h>
|
|
#include <sys/vnode.h>
|
|
|
|
#include <vm/vm_zone.h>
|
|
|
|
struct vm_zone *unp_zone;
|
|
static unp_gen_t unp_gencnt;
|
|
static u_int unp_count;
|
|
|
|
static struct unp_head unp_shead, unp_dhead;
|
|
|
|
/*
|
|
* Unix communications domain.
|
|
*
|
|
* TODO:
|
|
* SEQPACKET, RDM
|
|
* rethink name space problems
|
|
* need a proper out-of-band
|
|
* lock pushdown
|
|
*/
|
|
static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
|
|
static ino_t unp_ino; /* prototype for fake inode numbers */
|
|
|
|
static int unp_attach __P((struct socket *));
|
|
static void unp_detach __P((struct unpcb *));
|
|
static int unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *));
|
|
static int unp_connect __P((struct socket *,struct sockaddr *,
|
|
struct proc *));
|
|
static void unp_disconnect __P((struct unpcb *));
|
|
static void unp_shutdown __P((struct unpcb *));
|
|
static void unp_drop __P((struct unpcb *, int));
|
|
static void unp_gc __P((void));
|
|
static void unp_scan __P((struct mbuf *, void (*)(struct file *)));
|
|
static void unp_mark __P((struct file *));
|
|
static void unp_discard __P((struct file *));
|
|
static int unp_internalize __P((struct mbuf *, struct proc *));
|
|
|
|
static int
|
|
uipc_abort(struct socket *so)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
unp_drop(unp, ECONNABORTED);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
uipc_accept(struct socket *so, struct sockaddr **nam)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
|
|
/*
|
|
* Pass back name of connected socket,
|
|
* if it was bound and we are still connected
|
|
* (our peer may have closed already!).
|
|
*/
|
|
if (unp->unp_conn && unp->unp_conn->unp_addr) {
|
|
*nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
|
|
1);
|
|
} else {
|
|
*nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
uipc_attach(struct socket *so, int proto, struct proc *p)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp != 0)
|
|
return EISCONN;
|
|
return unp_attach(so);
|
|
}
|
|
|
|
static int
|
|
uipc_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
|
|
return unp_bind(unp, nam, p);
|
|
}
|
|
|
|
static int
|
|
uipc_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
return unp_connect(so, nam, curproc);
|
|
}
|
|
|
|
static int
|
|
uipc_connect2(struct socket *so1, struct socket *so2)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so1);
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
|
|
return unp_connect2(so1, so2);
|
|
}
|
|
|
|
/* control is EOPNOTSUPP */
|
|
|
|
static int
|
|
uipc_detach(struct socket *so)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
|
|
unp_detach(unp);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
uipc_disconnect(struct socket *so)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
unp_disconnect(unp);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
uipc_listen(struct socket *so, struct proc *p)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp == 0 || unp->unp_vnode == 0)
|
|
return EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
uipc_peeraddr(struct socket *so, struct sockaddr **nam)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
if (unp->unp_conn && unp->unp_conn->unp_addr)
|
|
*nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
|
|
1);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
uipc_rcvd(struct socket *so, int flags)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
struct socket *so2;
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
switch (so->so_type) {
|
|
case SOCK_DGRAM:
|
|
panic("uipc_rcvd DGRAM?");
|
|
/*NOTREACHED*/
|
|
|
|
case SOCK_STREAM:
|
|
#define rcv (&so->so_rcv)
|
|
#define snd (&so2->so_snd)
|
|
if (unp->unp_conn == 0)
|
|
break;
|
|
so2 = unp->unp_conn->unp_socket;
|
|
/*
|
|
* Adjust backpressure on sender
|
|
* and wakeup any waiting to write.
|
|
*/
|
|
snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
|
|
unp->unp_mbcnt = rcv->sb_mbcnt;
|
|
snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
|
|
unp->unp_cc = rcv->sb_cc;
|
|
sowwakeup(so2);
|
|
#undef snd
|
|
#undef rcv
|
|
break;
|
|
|
|
default:
|
|
panic("uipc_rcvd unknown socktype");
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* pru_rcvoob is EOPNOTSUPP */
|
|
|
|
static int
|
|
uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
|
|
struct mbuf *control, struct proc *p)
|
|
{
|
|
int error = 0;
|
|
struct unpcb *unp = sotounpcb(so);
|
|
struct socket *so2;
|
|
|
|
if (unp == 0) {
|
|
error = EINVAL;
|
|
goto release;
|
|
}
|
|
if (flags & PRUS_OOB) {
|
|
error = EOPNOTSUPP;
|
|
goto release;
|
|
}
|
|
|
|
if (control && (error = unp_internalize(control, p)))
|
|
goto release;
|
|
|
|
switch (so->so_type) {
|
|
case SOCK_DGRAM:
|
|
{
|
|
struct sockaddr *from;
|
|
|
|
if (nam) {
|
|
if (unp->unp_conn) {
|
|
error = EISCONN;
|
|
break;
|
|
}
|
|
error = unp_connect(so, nam, p);
|
|
if (error)
|
|
break;
|
|
} else {
|
|
if (unp->unp_conn == 0) {
|
|
error = ENOTCONN;
|
|
break;
|
|
}
|
|
}
|
|
so2 = unp->unp_conn->unp_socket;
|
|
if (unp->unp_addr)
|
|
from = (struct sockaddr *)unp->unp_addr;
|
|
else
|
|
from = &sun_noname;
|
|
if (sbappendaddr(&so2->so_rcv, from, m, control)) {
|
|
sorwakeup(so2);
|
|
m = 0;
|
|
control = 0;
|
|
} else
|
|
error = ENOBUFS;
|
|
if (nam)
|
|
unp_disconnect(unp);
|
|
break;
|
|
}
|
|
|
|
case SOCK_STREAM:
|
|
#define rcv (&so2->so_rcv)
|
|
#define snd (&so->so_snd)
|
|
/* Connect if not connected yet. */
|
|
/*
|
|
* Note: A better implementation would complain
|
|
* if not equal to the peer's address.
|
|
*/
|
|
if ((so->so_state & SS_ISCONNECTED) == 0) {
|
|
if (nam) {
|
|
error = unp_connect(so, nam, p);
|
|
if (error)
|
|
break; /* XXX */
|
|
} else {
|
|
error = ENOTCONN;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (so->so_state & SS_CANTSENDMORE) {
|
|
error = EPIPE;
|
|
break;
|
|
}
|
|
if (unp->unp_conn == 0)
|
|
panic("uipc_send connected but no connection?");
|
|
so2 = unp->unp_conn->unp_socket;
|
|
/*
|
|
* Send to paired receive port, and then reduce
|
|
* send buffer hiwater marks to maintain backpressure.
|
|
* Wake up readers.
|
|
*/
|
|
if (control) {
|
|
if (sbappendcontrol(rcv, m, control))
|
|
control = 0;
|
|
} else
|
|
sbappend(rcv, m);
|
|
snd->sb_mbmax -=
|
|
rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
|
|
unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
|
|
snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
|
|
unp->unp_conn->unp_cc = rcv->sb_cc;
|
|
sorwakeup(so2);
|
|
m = 0;
|
|
#undef snd
|
|
#undef rcv
|
|
break;
|
|
|
|
default:
|
|
panic("uipc_send unknown socktype");
|
|
}
|
|
|
|
/*
|
|
* SEND_EOF is equivalent to a SEND followed by
|
|
* a SHUTDOWN.
|
|
*/
|
|
if (flags & PRUS_EOF) {
|
|
socantsendmore(so);
|
|
unp_shutdown(unp);
|
|
}
|
|
|
|
release:
|
|
if (control)
|
|
m_freem(control);
|
|
if (m)
|
|
m_freem(m);
|
|
return error;
|
|
}
|
|
|
|
static int
|
|
uipc_sense(struct socket *so, struct stat *sb)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
struct socket *so2;
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
sb->st_blksize = so->so_snd.sb_hiwat;
|
|
if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
|
|
so2 = unp->unp_conn->unp_socket;
|
|
sb->st_blksize += so2->so_rcv.sb_cc;
|
|
}
|
|
sb->st_dev = NODEV;
|
|
if (unp->unp_ino == 0)
|
|
unp->unp_ino = unp_ino++;
|
|
sb->st_ino = unp->unp_ino;
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
uipc_shutdown(struct socket *so)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
socantsendmore(so);
|
|
unp_shutdown(unp);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
uipc_sockaddr(struct socket *so, struct sockaddr **nam)
|
|
{
|
|
struct unpcb *unp = sotounpcb(so);
|
|
|
|
if (unp == 0)
|
|
return EINVAL;
|
|
if (unp->unp_addr)
|
|
*nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
|
|
return 0;
|
|
}
|
|
|
|
struct pr_usrreqs uipc_usrreqs = {
|
|
uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
|
|
uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
|
|
uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
|
|
uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
|
|
sosend, soreceive, sopoll
|
|
};
|
|
|
|
/*
|
|
* Both send and receive buffers are allocated PIPSIZ bytes of buffering
|
|
* for stream sockets, although the total for sender and receiver is
|
|
* actually only PIPSIZ.
|
|
* Datagram sockets really use the sendspace as the maximum datagram size,
|
|
* and don't really want to reserve the sendspace. Their recvspace should
|
|
* be large enough for at least one max-size datagram plus address.
|
|
*/
|
|
#ifndef PIPSIZ
|
|
#define PIPSIZ 8192
|
|
#endif
|
|
static u_long unpst_sendspace = PIPSIZ;
|
|
static u_long unpst_recvspace = PIPSIZ;
|
|
static u_long unpdg_sendspace = 2*1024; /* really max datagram size */
|
|
static u_long unpdg_recvspace = 4*1024;
|
|
|
|
static int unp_rights; /* file descriptors in flight */
|
|
|
|
SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
|
|
&unpst_sendspace, 0, "");
|
|
SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
|
|
&unpst_recvspace, 0, "");
|
|
SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
|
|
&unpdg_sendspace, 0, "");
|
|
SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
|
|
&unpdg_recvspace, 0, "");
|
|
SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
|
|
|
|
static int
|
|
unp_attach(so)
|
|
struct socket *so;
|
|
{
|
|
register struct unpcb *unp;
|
|
int error;
|
|
|
|
if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
|
|
switch (so->so_type) {
|
|
|
|
case SOCK_STREAM:
|
|
error = soreserve(so, unpst_sendspace, unpst_recvspace);
|
|
break;
|
|
|
|
case SOCK_DGRAM:
|
|
error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
|
|
break;
|
|
|
|
default:
|
|
panic("unp_attach");
|
|
}
|
|
if (error)
|
|
return (error);
|
|
}
|
|
unp = zalloc(unp_zone);
|
|
if (unp == NULL)
|
|
return (ENOBUFS);
|
|
bzero(unp, sizeof *unp);
|
|
unp->unp_gencnt = ++unp_gencnt;
|
|
unp_count++;
|
|
LIST_INIT(&unp->unp_refs);
|
|
unp->unp_socket = so;
|
|
LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
|
|
: &unp_shead, unp, unp_link);
|
|
so->so_pcb = (caddr_t)unp;
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
unp_detach(unp)
|
|
register struct unpcb *unp;
|
|
{
|
|
LIST_REMOVE(unp, unp_link);
|
|
unp->unp_gencnt = ++unp_gencnt;
|
|
--unp_count;
|
|
if (unp->unp_vnode) {
|
|
unp->unp_vnode->v_socket = 0;
|
|
vrele(unp->unp_vnode);
|
|
unp->unp_vnode = 0;
|
|
}
|
|
if (unp->unp_conn)
|
|
unp_disconnect(unp);
|
|
while (unp->unp_refs.lh_first)
|
|
unp_drop(unp->unp_refs.lh_first, ECONNRESET);
|
|
soisdisconnected(unp->unp_socket);
|
|
unp->unp_socket->so_pcb = 0;
|
|
if (unp_rights) {
|
|
/*
|
|
* Normally the receive buffer is flushed later,
|
|
* in sofree, but if our receive buffer holds references
|
|
* to descriptors that are now garbage, we will dispose
|
|
* of those descriptor references after the garbage collector
|
|
* gets them (resulting in a "panic: closef: count < 0").
|
|
*/
|
|
sorflush(unp->unp_socket);
|
|
unp_gc();
|
|
}
|
|
if (unp->unp_addr)
|
|
FREE(unp->unp_addr, M_SONAME);
|
|
zfree(unp_zone, unp);
|
|
}
|
|
|
|
static int
|
|
unp_bind(unp, nam, p)
|
|
struct unpcb *unp;
|
|
struct sockaddr *nam;
|
|
struct proc *p;
|
|
{
|
|
struct sockaddr_un *soun = (struct sockaddr_un *)nam;
|
|
register struct vnode *vp;
|
|
struct vattr vattr;
|
|
int error, namelen;
|
|
struct nameidata nd;
|
|
char buf[SOCK_MAXADDRLEN];
|
|
|
|
if (unp->unp_vnode != NULL)
|
|
return (EINVAL);
|
|
#define offsetof(s, e) ((char *)&((s *)0)->e - (char *)((s *)0))
|
|
namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
|
|
if (namelen <= 0)
|
|
return EINVAL;
|
|
strncpy(buf, soun->sun_path, namelen);
|
|
buf[namelen] = 0; /* null-terminate the string */
|
|
NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
|
|
buf, p);
|
|
/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
|
|
error = namei(&nd);
|
|
if (error)
|
|
return (error);
|
|
vp = nd.ni_vp;
|
|
if (vp != NULL) {
|
|
VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
|
|
if (nd.ni_dvp == vp)
|
|
vrele(nd.ni_dvp);
|
|
else
|
|
vput(nd.ni_dvp);
|
|
vrele(vp);
|
|
return (EADDRINUSE);
|
|
}
|
|
VATTR_NULL(&vattr);
|
|
vattr.va_type = VSOCK;
|
|
vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask);
|
|
VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
|
|
error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
|
|
vput(nd.ni_dvp);
|
|
if (error)
|
|
return (error);
|
|
vp = nd.ni_vp;
|
|
vp->v_socket = unp->unp_socket;
|
|
unp->unp_vnode = vp;
|
|
unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
|
|
VOP_UNLOCK(vp, 0, p);
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
unp_connect(so, nam, p)
|
|
struct socket *so;
|
|
struct sockaddr *nam;
|
|
struct proc *p;
|
|
{
|
|
register struct sockaddr_un *soun = (struct sockaddr_un *)nam;
|
|
register struct vnode *vp;
|
|
register struct socket *so2, *so3;
|
|
struct unpcb *unp2, *unp3;
|
|
int error, len;
|
|
struct nameidata nd;
|
|
char buf[SOCK_MAXADDRLEN];
|
|
|
|
len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
|
|
if (len <= 0)
|
|
return EINVAL;
|
|
strncpy(buf, soun->sun_path, len);
|
|
buf[len] = 0;
|
|
|
|
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p);
|
|
error = namei(&nd);
|
|
if (error)
|
|
return (error);
|
|
vp = nd.ni_vp;
|
|
if (vp->v_type != VSOCK) {
|
|
error = ENOTSOCK;
|
|
goto bad;
|
|
}
|
|
error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p);
|
|
if (error)
|
|
goto bad;
|
|
so2 = vp->v_socket;
|
|
if (so2 == 0) {
|
|
error = ECONNREFUSED;
|
|
goto bad;
|
|
}
|
|
if (so->so_type != so2->so_type) {
|
|
error = EPROTOTYPE;
|
|
goto bad;
|
|
}
|
|
if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
|
|
if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
|
|
(so3 = sonewconn(so2, 0)) == 0) {
|
|
error = ECONNREFUSED;
|
|
goto bad;
|
|
}
|
|
unp2 = sotounpcb(so2);
|
|
unp3 = sotounpcb(so3);
|
|
if (unp2->unp_addr)
|
|
unp3->unp_addr = (struct sockaddr_un *)
|
|
dup_sockaddr((struct sockaddr *)
|
|
unp2->unp_addr, 1);
|
|
so2 = so3;
|
|
}
|
|
error = unp_connect2(so, so2);
|
|
bad:
|
|
vput(vp);
|
|
return (error);
|
|
}
|
|
|
|
int
|
|
unp_connect2(so, so2)
|
|
register struct socket *so;
|
|
register struct socket *so2;
|
|
{
|
|
register struct unpcb *unp = sotounpcb(so);
|
|
register struct unpcb *unp2;
|
|
|
|
if (so2->so_type != so->so_type)
|
|
return (EPROTOTYPE);
|
|
unp2 = sotounpcb(so2);
|
|
unp->unp_conn = unp2;
|
|
switch (so->so_type) {
|
|
|
|
case SOCK_DGRAM:
|
|
LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
|
|
soisconnected(so);
|
|
break;
|
|
|
|
case SOCK_STREAM:
|
|
unp2->unp_conn = unp;
|
|
soisconnected(so);
|
|
soisconnected(so2);
|
|
break;
|
|
|
|
default:
|
|
panic("unp_connect2");
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
unp_disconnect(unp)
|
|
struct unpcb *unp;
|
|
{
|
|
register struct unpcb *unp2 = unp->unp_conn;
|
|
|
|
if (unp2 == 0)
|
|
return;
|
|
unp->unp_conn = 0;
|
|
switch (unp->unp_socket->so_type) {
|
|
|
|
case SOCK_DGRAM:
|
|
LIST_REMOVE(unp, unp_reflink);
|
|
unp->unp_socket->so_state &= ~SS_ISCONNECTED;
|
|
break;
|
|
|
|
case SOCK_STREAM:
|
|
soisdisconnected(unp->unp_socket);
|
|
unp2->unp_conn = 0;
|
|
soisdisconnected(unp2->unp_socket);
|
|
break;
|
|
}
|
|
}
|
|
|
|
#ifdef notdef
|
|
void
|
|
unp_abort(unp)
|
|
struct unpcb *unp;
|
|
{
|
|
|
|
unp_detach(unp);
|
|
}
|
|
#endif
|
|
|
|
static int
|
|
unp_pcblist SYSCTL_HANDLER_ARGS
|
|
{
|
|
int error, i, n;
|
|
struct unpcb *unp, **unp_list;
|
|
unp_gen_t gencnt;
|
|
struct xunpgen xug;
|
|
struct unp_head *head;
|
|
|
|
head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
|
|
|
|
/*
|
|
* The process of preparing the PCB list is too time-consuming and
|
|
* resource-intensive to repeat twice on every request.
|
|
*/
|
|
if (req->oldptr == 0) {
|
|
n = unp_count;
|
|
req->oldidx = 2 * (sizeof xug)
|
|
+ (n + n/8) * sizeof(struct xunpcb);
|
|
return 0;
|
|
}
|
|
|
|
if (req->newptr != 0)
|
|
return EPERM;
|
|
|
|
/*
|
|
* OK, now we're committed to doing something.
|
|
*/
|
|
gencnt = unp_gencnt;
|
|
n = unp_count;
|
|
|
|
xug.xug_len = sizeof xug;
|
|
xug.xug_count = n;
|
|
xug.xug_gen = gencnt;
|
|
xug.xug_sogen = so_gencnt;
|
|
error = SYSCTL_OUT(req, &xug, sizeof xug);
|
|
if (error)
|
|
return error;
|
|
|
|
unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
|
|
if (unp_list == 0)
|
|
return ENOMEM;
|
|
|
|
for (unp = head->lh_first, i = 0; unp && i < n;
|
|
unp = unp->unp_link.le_next) {
|
|
if (unp->unp_gencnt <= gencnt)
|
|
unp_list[i++] = unp;
|
|
}
|
|
n = i; /* in case we lost some during malloc */
|
|
|
|
error = 0;
|
|
for (i = 0; i < n; i++) {
|
|
unp = unp_list[i];
|
|
if (unp->unp_gencnt <= gencnt) {
|
|
struct xunpcb xu;
|
|
xu.xu_len = sizeof xu;
|
|
xu.xu_unpp = unp;
|
|
/*
|
|
* XXX - need more locking here to protect against
|
|
* connect/disconnect races for SMP.
|
|
*/
|
|
if (unp->unp_addr)
|
|
bcopy(unp->unp_addr, &xu.xu_addr,
|
|
unp->unp_addr->sun_len);
|
|
if (unp->unp_conn && unp->unp_conn->unp_addr)
|
|
bcopy(unp->unp_conn->unp_addr,
|
|
&xu.xu_caddr,
|
|
unp->unp_conn->unp_addr->sun_len);
|
|
bcopy(unp, &xu.xu_unp, sizeof *unp);
|
|
sotoxsocket(unp->unp_socket, &xu.xu_socket);
|
|
error = SYSCTL_OUT(req, &xu, sizeof xu);
|
|
}
|
|
}
|
|
if (!error) {
|
|
/*
|
|
* Give the user an updated idea of our state.
|
|
* If the generation differs from what we told
|
|
* her before, she knows that something happened
|
|
* while we were processing this request, and it
|
|
* might be necessary to retry.
|
|
*/
|
|
xug.xug_gen = unp_gencnt;
|
|
xug.xug_sogen = so_gencnt;
|
|
xug.xug_count = unp_count;
|
|
error = SYSCTL_OUT(req, &xug, sizeof xug);
|
|
}
|
|
free(unp_list, M_TEMP);
|
|
return error;
|
|
}
|
|
|
|
SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
|
|
(caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
|
|
"List of active local datagram sockets");
|
|
SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
|
|
(caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
|
|
"List of active local stream sockets");
|
|
|
|
static void
|
|
unp_shutdown(unp)
|
|
struct unpcb *unp;
|
|
{
|
|
struct socket *so;
|
|
|
|
if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
|
|
(so = unp->unp_conn->unp_socket))
|
|
socantrcvmore(so);
|
|
}
|
|
|
|
static void
|
|
unp_drop(unp, errno)
|
|
struct unpcb *unp;
|
|
int errno;
|
|
{
|
|
struct socket *so = unp->unp_socket;
|
|
|
|
so->so_error = errno;
|
|
unp_disconnect(unp);
|
|
if (so->so_head) {
|
|
LIST_REMOVE(unp, unp_link);
|
|
unp->unp_gencnt = ++unp_gencnt;
|
|
unp_count--;
|
|
so->so_pcb = (caddr_t) 0;
|
|
if (unp->unp_addr)
|
|
FREE(unp->unp_addr, M_SONAME);
|
|
zfree(unp_zone, unp);
|
|
sofree(so);
|
|
}
|
|
}
|
|
|
|
#ifdef notdef
|
|
void
|
|
unp_drain()
|
|
{
|
|
|
|
}
|
|
#endif
|
|
|
|
int
|
|
unp_externalize(rights)
|
|
struct mbuf *rights;
|
|
{
|
|
struct proc *p = curproc; /* XXX */
|
|
register int i;
|
|
register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
|
|
register struct file **rp = (struct file **)(cm + 1);
|
|
register struct file *fp;
|
|
int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
|
|
int f;
|
|
|
|
/*
|
|
* if the new FD's will not fit, then we free them all
|
|
*/
|
|
if (!fdavail(p, newfds)) {
|
|
for (i = 0; i < newfds; i++) {
|
|
fp = *rp;
|
|
unp_discard(fp);
|
|
*rp++ = 0;
|
|
}
|
|
return (EMSGSIZE);
|
|
}
|
|
/*
|
|
* now change each pointer to an fd in the global table to
|
|
* an integer that is the index to the local fd table entry
|
|
* that we set up to point to the global one we are transferring.
|
|
* XXX this assumes a pointer and int are the same size...!
|
|
*/
|
|
for (i = 0; i < newfds; i++) {
|
|
if (fdalloc(p, 0, &f))
|
|
panic("unp_externalize");
|
|
fp = *rp;
|
|
p->p_fd->fd_ofiles[f] = fp;
|
|
fp->f_msgcount--;
|
|
unp_rights--;
|
|
*(int *)rp++ = f;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
unp_init(void)
|
|
{
|
|
unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0);
|
|
if (unp_zone == 0)
|
|
panic("unp_init");
|
|
LIST_INIT(&unp_dhead);
|
|
LIST_INIT(&unp_shead);
|
|
}
|
|
|
|
#ifndef MIN
|
|
#define MIN(a,b) (((a)<(b))?(a):(b))
|
|
#endif
|
|
|
|
static int
|
|
unp_internalize(control, p)
|
|
struct mbuf *control;
|
|
struct proc *p;
|
|
{
|
|
struct filedesc *fdp = p->p_fd;
|
|
register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
|
|
register struct file **rp;
|
|
register struct file *fp;
|
|
register int i, fd;
|
|
register struct cmsgcred *cmcred;
|
|
int oldfds;
|
|
|
|
if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) ||
|
|
cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len)
|
|
return (EINVAL);
|
|
|
|
/*
|
|
* Fill in credential information.
|
|
*/
|
|
if (cm->cmsg_type == SCM_CREDS) {
|
|
cmcred = (struct cmsgcred *)(cm + 1);
|
|
cmcred->cmcred_pid = p->p_pid;
|
|
cmcred->cmcred_uid = p->p_cred->p_ruid;
|
|
cmcred->cmcred_gid = p->p_cred->p_rgid;
|
|
cmcred->cmcred_euid = p->p_ucred->cr_uid;
|
|
cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups,
|
|
CMGROUP_MAX);
|
|
for (i = 0; i < cmcred->cmcred_ngroups; i++)
|
|
cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i];
|
|
return(0);
|
|
}
|
|
|
|
oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
|
|
/*
|
|
* check that all the FDs passed in refer to legal OPEN files
|
|
* If not, reject the entire operation.
|
|
*/
|
|
rp = (struct file **)(cm + 1);
|
|
for (i = 0; i < oldfds; i++) {
|
|
fd = *(int *)rp++;
|
|
if ((unsigned)fd >= fdp->fd_nfiles ||
|
|
fdp->fd_ofiles[fd] == NULL)
|
|
return (EBADF);
|
|
}
|
|
/*
|
|
* Now replace the integer FDs with pointers to
|
|
* the associated global file table entry..
|
|
* XXX this assumes a pointer and an int are the same size!
|
|
*/
|
|
rp = (struct file **)(cm + 1);
|
|
for (i = 0; i < oldfds; i++) {
|
|
fp = fdp->fd_ofiles[*(int *)rp];
|
|
*rp++ = fp;
|
|
fp->f_count++;
|
|
fp->f_msgcount++;
|
|
unp_rights++;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static int unp_defer, unp_gcing;
|
|
|
|
static void
|
|
unp_gc()
|
|
{
|
|
register struct file *fp, *nextfp;
|
|
register struct socket *so;
|
|
struct file **extra_ref, **fpp;
|
|
int nunref, i;
|
|
|
|
if (unp_gcing)
|
|
return;
|
|
unp_gcing = 1;
|
|
unp_defer = 0;
|
|
/*
|
|
* before going through all this, set all FDs to
|
|
* be NOT defered and NOT externally accessible
|
|
*/
|
|
for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
|
|
fp->f_flag &= ~(FMARK|FDEFER);
|
|
do {
|
|
for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
|
|
/*
|
|
* If the file is not open, skip it
|
|
*/
|
|
if (fp->f_count == 0)
|
|
continue;
|
|
/*
|
|
* If we already marked it as 'defer' in a
|
|
* previous pass, then try process it this time
|
|
* and un-mark it
|
|
*/
|
|
if (fp->f_flag & FDEFER) {
|
|
fp->f_flag &= ~FDEFER;
|
|
unp_defer--;
|
|
} else {
|
|
/*
|
|
* if it's not defered, then check if it's
|
|
* already marked.. if so skip it
|
|
*/
|
|
if (fp->f_flag & FMARK)
|
|
continue;
|
|
/*
|
|
* If all references are from messages
|
|
* in transit, then skip it. it's not
|
|
* externally accessible.
|
|
*/
|
|
if (fp->f_count == fp->f_msgcount)
|
|
continue;
|
|
/*
|
|
* If it got this far then it must be
|
|
* externally accessible.
|
|
*/
|
|
fp->f_flag |= FMARK;
|
|
}
|
|
/*
|
|
* either it was defered, or it is externally
|
|
* accessible and not already marked so.
|
|
* Now check if it is possibly one of OUR sockets.
|
|
*/
|
|
if (fp->f_type != DTYPE_SOCKET ||
|
|
(so = (struct socket *)fp->f_data) == 0)
|
|
continue;
|
|
if (so->so_proto->pr_domain != &localdomain ||
|
|
(so->so_proto->pr_flags&PR_RIGHTS) == 0)
|
|
continue;
|
|
#ifdef notdef
|
|
if (so->so_rcv.sb_flags & SB_LOCK) {
|
|
/*
|
|
* This is problematical; it's not clear
|
|
* we need to wait for the sockbuf to be
|
|
* unlocked (on a uniprocessor, at least),
|
|
* and it's also not clear what to do
|
|
* if sbwait returns an error due to receipt
|
|
* of a signal. If sbwait does return
|
|
* an error, we'll go into an infinite
|
|
* loop. Delete all of this for now.
|
|
*/
|
|
(void) sbwait(&so->so_rcv);
|
|
goto restart;
|
|
}
|
|
#endif
|
|
/*
|
|
* So, Ok, it's one of our sockets and it IS externally
|
|
* accessible (or was defered). Now we look
|
|
* to see if we hold any file descriptors in its
|
|
* message buffers. Follow those links and mark them
|
|
* as accessible too.
|
|
*/
|
|
unp_scan(so->so_rcv.sb_mb, unp_mark);
|
|
}
|
|
} while (unp_defer);
|
|
/*
|
|
* We grab an extra reference to each of the file table entries
|
|
* that are not otherwise accessible and then free the rights
|
|
* that are stored in messages on them.
|
|
*
|
|
* The bug in the orginal code is a little tricky, so I'll describe
|
|
* what's wrong with it here.
|
|
*
|
|
* It is incorrect to simply unp_discard each entry for f_msgcount
|
|
* times -- consider the case of sockets A and B that contain
|
|
* references to each other. On a last close of some other socket,
|
|
* we trigger a gc since the number of outstanding rights (unp_rights)
|
|
* is non-zero. If during the sweep phase the gc code un_discards,
|
|
* we end up doing a (full) closef on the descriptor. A closef on A
|
|
* results in the following chain. Closef calls soo_close, which
|
|
* calls soclose. Soclose calls first (through the switch
|
|
* uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
|
|
* returns because the previous instance had set unp_gcing, and
|
|
* we return all the way back to soclose, which marks the socket
|
|
* with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
|
|
* to free up the rights that are queued in messages on the socket A,
|
|
* i.e., the reference on B. The sorflush calls via the dom_dispose
|
|
* switch unp_dispose, which unp_scans with unp_discard. This second
|
|
* instance of unp_discard just calls closef on B.
|
|
*
|
|
* Well, a similar chain occurs on B, resulting in a sorflush on B,
|
|
* which results in another closef on A. Unfortunately, A is already
|
|
* being closed, and the descriptor has already been marked with
|
|
* SS_NOFDREF, and soclose panics at this point.
|
|
*
|
|
* Here, we first take an extra reference to each inaccessible
|
|
* descriptor. Then, we call sorflush ourself, since we know
|
|
* it is a Unix domain socket anyhow. After we destroy all the
|
|
* rights carried in messages, we do a last closef to get rid
|
|
* of our extra reference. This is the last close, and the
|
|
* unp_detach etc will shut down the socket.
|
|
*
|
|
* 91/09/19, bsy@cs.cmu.edu
|
|
*/
|
|
extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
|
|
for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
|
|
fp = nextfp) {
|
|
nextfp = fp->f_list.le_next;
|
|
/*
|
|
* If it's not open, skip it
|
|
*/
|
|
if (fp->f_count == 0)
|
|
continue;
|
|
/*
|
|
* If all refs are from msgs, and it's not marked accessible
|
|
* then it must be referenced from some unreachable cycle
|
|
* of (shut-down) FDs, so include it in our
|
|
* list of FDs to remove
|
|
*/
|
|
if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
|
|
*fpp++ = fp;
|
|
nunref++;
|
|
fp->f_count++;
|
|
}
|
|
}
|
|
/*
|
|
* for each FD on our hit list, do the following two things
|
|
*/
|
|
for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
|
|
struct file *tfp = *fpp;
|
|
if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL)
|
|
sorflush((struct socket *)(tfp->f_data));
|
|
}
|
|
for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
|
|
closef(*fpp, (struct proc *) NULL);
|
|
free((caddr_t)extra_ref, M_FILE);
|
|
unp_gcing = 0;
|
|
}
|
|
|
|
void
|
|
unp_dispose(m)
|
|
struct mbuf *m;
|
|
{
|
|
|
|
if (m)
|
|
unp_scan(m, unp_discard);
|
|
}
|
|
|
|
static void
|
|
unp_scan(m0, op)
|
|
register struct mbuf *m0;
|
|
void (*op) __P((struct file *));
|
|
{
|
|
register struct mbuf *m;
|
|
register struct file **rp;
|
|
register struct cmsghdr *cm;
|
|
register int i;
|
|
int qfds;
|
|
|
|
while (m0) {
|
|
for (m = m0; m; m = m->m_next)
|
|
if (m->m_type == MT_CONTROL &&
|
|
m->m_len >= sizeof(*cm)) {
|
|
cm = mtod(m, struct cmsghdr *);
|
|
if (cm->cmsg_level != SOL_SOCKET ||
|
|
cm->cmsg_type != SCM_RIGHTS)
|
|
continue;
|
|
qfds = (cm->cmsg_len - sizeof *cm)
|
|
/ sizeof (struct file *);
|
|
rp = (struct file **)(cm + 1);
|
|
for (i = 0; i < qfds; i++)
|
|
(*op)(*rp++);
|
|
break; /* XXX, but saves time */
|
|
}
|
|
m0 = m0->m_act;
|
|
}
|
|
}
|
|
|
|
static void
|
|
unp_mark(fp)
|
|
struct file *fp;
|
|
{
|
|
|
|
if (fp->f_flag & FMARK)
|
|
return;
|
|
unp_defer++;
|
|
fp->f_flag |= (FMARK|FDEFER);
|
|
}
|
|
|
|
static void
|
|
unp_discard(fp)
|
|
struct file *fp;
|
|
{
|
|
|
|
fp->f_msgcount--;
|
|
unp_rights--;
|
|
(void) closef(fp, (struct proc *)NULL);
|
|
}
|