- Close a race between enumerating UNIX domain socket pcb structures via

sysctl and socket teardown by adding a reference count to the UNIX domain
  pcb object and fixing the sysctl that enumerates unpcbs to grab a
  reference on each unpcb while it builds the list to copy out to userland.
- Close a race between UNIX domain pcb garbage collection (unp_gc()) and
  file descriptor teardown (fdrop()) by adding a new garbage collection
  flag FWAIT.  unp_gc() sets FWAIT while it walks the message buffers
  in a UNIX domain socket looking for nested file descriptor references
  and clears the flag when it is finished.  fdrop() checks to see if the
  flag is set on a file descriptor whose refcount just dropped to 0 and
  waits for unp_gc() to clear the flag before completely destroying the
  file descriptor.

MFC after:	1 week
Reviewed by:	rwatson
Submitted by:	ups
Hopefully makes the panics go away:	mx1
This commit is contained in:
John Baldwin 2007-01-05 19:59:46 +00:00
parent 663b416f16
commit 9ae328fc8f
4 changed files with 63 additions and 15 deletions

View File

@ -2181,6 +2181,17 @@ fdrop_locked(struct file *fp, struct thread *td)
FILE_UNLOCK(fp);
return (0);
}
/*
* We might have just dropped the last reference to a file
* object that is for a UNIX domain socket whose message
* buffers are being examined in unp_gc(). If that is the
* case, FWAIT will be set in f_gcflag and we need to wait for
* unp_gc() to finish its scan.
*/
while (fp->f_gcflag & FWAIT)
msleep(&fp->f_gcflag, fp->f_mtxp, 0, "fpdrop", 0);
/* We have the last ref so we can proceed without the file lock. */
FILE_UNLOCK(fp);
if (fp->f_count < 0)

View File

@ -285,6 +285,7 @@ uipc_attach(struct socket *so, int proto, struct thread *td)
unp->unp_socket = so;
so->so_pcb = unp;
unp->unp_refcount = 1;
UNP_LOCK();
unp->unp_gencnt = ++unp_gencnt;
unp_count++;
@ -451,9 +452,10 @@ uipc_connect2(struct socket *so1, struct socket *so2)
static void
uipc_detach(struct socket *so)
{
int local_unp_rights;
struct sockaddr_un *saved_unp_addr;
struct unpcb *unp;
struct vnode *vp;
int freeunp, local_unp_rights;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
@ -477,10 +479,15 @@ uipc_detach(struct socket *so)
}
unp->unp_socket->so_pcb = NULL;
local_unp_rights = unp_rights;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
unp->unp_refcount--;
freeunp = (unp->unp_refcount == 0);
UNP_UNLOCK();
if (unp->unp_addr != NULL)
FREE(unp->unp_addr, M_SONAME);
uma_zfree(unp_zone, unp);
if (saved_unp_addr != NULL)
FREE(saved_unp_addr, M_SONAME);
if (freeunp)
uma_zfree(unp_zone, unp);
if (vp) {
int vfslocked;
@ -1120,6 +1127,7 @@ static int
unp_pcblist(SYSCTL_HANDLER_ARGS)
{
int error, i, n;
int freeunp;
struct unpcb *unp, **unp_list;
unp_gen_t gencnt;
struct xunpgen *xug;
@ -1171,6 +1179,7 @@ unp_pcblist(SYSCTL_HANDLER_ARGS)
unp->unp_socket->so_cred))
continue;
unp_list[i++] = unp;
unp->unp_refcount++;
}
}
UNP_UNLOCK();
@ -1180,7 +1189,9 @@ unp_pcblist(SYSCTL_HANDLER_ARGS)
xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
for (i = 0; i < n; i++) {
unp = unp_list[i];
if (unp->unp_gencnt <= gencnt) {
UNP_LOCK();
unp->unp_refcount--;
if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) {
xu->xu_len = sizeof *xu;
xu->xu_unpp = unp;
/*
@ -1197,7 +1208,13 @@ unp_pcblist(SYSCTL_HANDLER_ARGS)
unp->unp_conn->unp_addr->sun_len);
bcopy(unp, &xu->xu_unp, sizeof *unp);
sotoxsocket(unp->unp_socket, &xu->xu_socket);
UNP_UNLOCK();
error = SYSCTL_OUT(req, xu, sizeof *xu);
} else {
freeunp = (unp->unp_refcount == 0);
UNP_UNLOCK();
if (freeunp)
uma_zfree(unp_zone, unp);
}
}
free(xu, M_TEMP);
@ -1390,7 +1407,7 @@ unp_init(void)
{
unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
NULL, NULL, UMA_ALIGN_PTR, 0);
if (unp_zone == NULL)
panic("unp_init");
uma_zone_set_max(unp_zone, maxsockets);
@ -1623,7 +1640,7 @@ unp_gc(__unused void *arg, int pending)
unp_taskcount++;
unp_defer = 0;
/*
* Before going through all this, set all FDs to be NOT defered and
* Before going through all this, set all FDs to be NOT deferred and
* NOT externally accessible.
*/
sx_slock(&filelist_lock);
@ -1648,16 +1665,16 @@ unp_gc(__unused void *arg, int pending)
continue;
}
/*
* If we already marked it as 'defer' in a previous
* pass, then try process it this time and un-mark
* it.
* If we already marked it as 'defer' in a
* previous pass, then try to process it this
* time and un-mark it.
*/
if (fp->f_gcflag & FDEFER) {
fp->f_gcflag &= ~FDEFER;
unp_defer--;
} else {
/*
* if it's not defered, then check if it's
* if it's not deferred, then check if it's
* already marked.. if so skip it
*/
if (fp->f_gcflag & FMARK) {
@ -1680,7 +1697,7 @@ unp_gc(__unused void *arg, int pending)
fp->f_gcflag |= FMARK;
}
/*
* Either it was defered, or it is externally
* Either it was deferred, or it is externally
* accessible and not already marked so. Now check
* if it is possibly one of OUR sockets.
*/
@ -1689,13 +1706,23 @@ unp_gc(__unused void *arg, int pending)
FILE_UNLOCK(fp);
continue;
}
FILE_UNLOCK(fp);
if (so->so_proto->pr_domain != &localdomain ||
(so->so_proto->pr_flags&PR_RIGHTS) == 0)
(so->so_proto->pr_flags & PR_RIGHTS) == 0) {
FILE_UNLOCK(fp);
continue;
}
/*
* Tell any other threads that do a subsequent
* fdrop() that we are scanning the message
* buffers.
*/
fp->f_gcflag |= FWAIT;
FILE_UNLOCK(fp);
/*
* So, Ok, it's one of our sockets and it IS
* externally accessible (or was defered). Now we
* externally accessible (or was deferred). Now we
* look to see if we hold any file descriptors in its
* message buffers. Follow those links and mark them
* as accessible too.
@ -1703,6 +1730,14 @@ unp_gc(__unused void *arg, int pending)
SOCKBUF_LOCK(&so->so_rcv);
unp_scan(so->so_rcv.sb_mb, unp_mark);
SOCKBUF_UNLOCK(&so->so_rcv);
/*
* Wake up any threads waiting in fdrop().
*/
FILE_LOCK(fp);
fp->f_gcflag &= ~FWAIT;
wakeup(&fp->f_gcflag);
FILE_UNLOCK(fp);
}
} while (unp_defer);
sx_sunlock(&filelist_lock);

View File

@ -128,6 +128,7 @@ struct file {
short f_gcflag; /* used by thread doing fd garbage collection */
#define FMARK 0x1 /* mark during gc() */
#define FDEFER 0x2 /* defer for next gc pass */
#define FWAIT 0x4 /* gc is scanning message buffers */
int f_msgcount; /* (f) references from message queue */
/* DTYPE_VNODE specific fields */

View File

@ -78,6 +78,7 @@ struct unpcb {
unp_gen_t unp_gencnt; /* generation count of this instance */
int unp_flags; /* flags */
struct xucred unp_peercred; /* peer credentials, if applicable */
u_int unp_refcount;
};
/*