Add tracking of process leaders sharing a file descriptor table and

allow a file descriptor table to be shared between multiple process
leaders.

PR:		50923
This commit is contained in:
tegge 2003-06-02 16:05:32 +00:00
parent a7f09ca192
commit d233f8d61a
5 changed files with 256 additions and 19 deletions

View File

@ -407,6 +407,7 @@ proc0_init(void *dummy __unused)
/* Create the file descriptor table. */
fdp = &filedesc0;
p->p_fd = &fdp->fd_fd;
p->p_fdtol = NULL;
mtx_init(&fdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
fdp->fd_fd.fd_refcnt = 1;
fdp->fd_fd.fd_cmask = cmask;

View File

@ -73,6 +73,8 @@
#include <vm/uma.h>
static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader",
"file desc to leader structures");
static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
static uma_zone_t file_zone;
@ -456,6 +458,7 @@ do_dup(td, type, old, new, retval)
struct file *fp;
struct file *delfp;
int error, newfd;
int holdleaders;
p = td->td_proc;
fdp = p->p_fd;
@ -520,6 +523,15 @@ do_dup(td, type, old, new, retval)
* introducing an ownership race for the slot.
*/
delfp = fdp->fd_ofiles[new];
if (delfp != NULL && p->p_fdtol != NULL) {
/*
* Ask fdfree() to sleep to ensure that all relevant
* process leaders can be traversed in closef().
*/
fdp->fd_holdleaderscount++;
holdleaders = 1;
} else
holdleaders = 0;
KASSERT(delfp == NULL || type == DUP_FIXED,
("dup() picked an open file"));
#if 0
@ -546,6 +558,16 @@ do_dup(td, type, old, new, retval)
mtx_lock(&Giant);
(void) closef(delfp, td);
mtx_unlock(&Giant);
if (holdleaders) {
FILEDESC_LOCK(fdp);
fdp->fd_holdleaderscount--;
if (fdp->fd_holdleaderscount == 0 &&
fdp->fd_holdleaderswakeup != 0) {
fdp->fd_holdleaderswakeup = 0;
wakeup(&fdp->fd_holdleaderscount);
}
FILEDESC_UNLOCK(fdp);
}
}
return (0);
}
@ -793,9 +815,11 @@ close(td, uap)
struct filedesc *fdp;
struct file *fp;
int fd, error;
int holdleaders;
fd = uap->fd;
error = 0;
holdleaders = 0;
fdp = td->td_proc->p_fd;
mtx_lock(&Giant);
FILEDESC_LOCK(fdp);
@ -811,6 +835,14 @@ close(td, uap)
#endif
fdp->fd_ofiles[fd] = NULL;
fdp->fd_ofileflags[fd] = 0;
if (td->td_proc->p_fdtol != NULL) {
/*
* Ask fdfree() to sleep to ensure that all relevant
* process leaders can be traversed in closef().
*/
fdp->fd_holdleaderscount++;
holdleaders = 1;
}
/*
* we now hold the fp reference that used to be owned by the descriptor
@ -829,6 +861,16 @@ close(td, uap)
error = closef(fp, td);
done2:
mtx_unlock(&Giant);
if (holdleaders) {
FILEDESC_LOCK(fdp);
fdp->fd_holdleaderscount--;
if (fdp->fd_holdleaderscount == 0 &&
fdp->fd_holdleaderswakeup != 0) {
fdp->fd_holdleaderswakeup = 0;
wakeup(&fdp->fd_holdleaderscount);
}
FILEDESC_UNLOCK(fdp);
}
return (error);
}
@ -1382,12 +1424,88 @@ fdfree(td)
struct filedesc *fdp;
struct file **fpp;
int i;
struct filedesc_to_leader *fdtol;
struct file *fp;
struct vnode *vp;
struct flock lf;
/* Certain daemons might not have file descriptors. */
fdp = td->td_proc->p_fd;
if (fdp == NULL)
return;
/* Check for special need to clear POSIX style locks */
fdtol = td->td_proc->p_fdtol;
if (fdtol != NULL) {
FILEDESC_LOCK(fdp);
KASSERT(fdtol->fdl_refcount > 0,
("filedesc_to_refcount botch: fdl_refcount=%d",
fdtol->fdl_refcount));
if (fdtol->fdl_refcount == 1 &&
(td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
i = 0;
fpp = fdp->fd_ofiles;
for (i = 0, fpp = fdp->fd_ofiles;
i < fdp->fd_lastfile;
i++, fpp++) {
if (*fpp == NULL ||
(*fpp)->f_type != DTYPE_VNODE)
continue;
fp = *fpp;
fhold(fp);
FILEDESC_UNLOCK(fdp);
lf.l_whence = SEEK_SET;
lf.l_start = 0;
lf.l_len = 0;
lf.l_type = F_UNLCK;
vp = fp->f_data;
(void) VOP_ADVLOCK(vp,
(caddr_t)td->td_proc->
p_leader,
F_UNLCK,
&lf,
F_POSIX);
FILEDESC_LOCK(fdp);
fdrop(fp, td);
fpp = fdp->fd_ofiles + i;
}
}
retry:
if (fdtol->fdl_refcount == 1) {
if (fdp->fd_holdleaderscount > 0 &&
(td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
/*
* close() or do_dup() has cleared a reference
* in a shared file descriptor table.
*/
fdp->fd_holdleaderswakeup = 1;
msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx,
PLOCK, "fdlhold", 0);
goto retry;
}
if (fdtol->fdl_holdcount > 0) {
/*
* Ensure that fdtol->fdl_leader
* remains valid in closef().
*/
fdtol->fdl_wakeup = 1;
msleep(fdtol, &fdp->fd_mtx,
PLOCK, "fdlhold", 0);
goto retry;
}
}
fdtol->fdl_refcount--;
if (fdtol->fdl_refcount == 0 &&
fdtol->fdl_holdcount == 0) {
fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
} else
fdtol = NULL;
td->td_proc->p_fdtol = NULL;
FILEDESC_UNLOCK(fdp);
if (fdtol != NULL)
FREE(fdtol, M_FILEDESC_TO_LEADER);
}
FILEDESC_LOCK(fdp);
if (--fdp->fd_refcnt > 0) {
FILEDESC_UNLOCK(fdp);
@ -1625,6 +1743,8 @@ closef(fp, td)
{
struct vnode *vp;
struct flock lf;
struct filedesc_to_leader *fdtol;
struct filedesc *fdp;
if (fp == NULL)
return (0);
@ -1636,15 +1756,51 @@ closef(fp, td)
* If the descriptor was in a message, POSIX-style locks
* aren't passed with the descriptor.
*/
if (td != NULL && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0 &&
if (td != NULL &&
fp->f_type == DTYPE_VNODE) {
lf.l_whence = SEEK_SET;
lf.l_start = 0;
lf.l_len = 0;
lf.l_type = F_UNLCK;
vp = fp->f_data;
(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
F_UNLCK, &lf, F_POSIX);
if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
lf.l_whence = SEEK_SET;
lf.l_start = 0;
lf.l_len = 0;
lf.l_type = F_UNLCK;
vp = fp->f_data;
(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
F_UNLCK, &lf, F_POSIX);
}
fdtol = td->td_proc->p_fdtol;
if (fdtol != NULL) {
/*
* Handle special case where file descriptor table
* is shared between multiple process leaders.
*/
fdp = td->td_proc->p_fd;
FILEDESC_LOCK(fdp);
for (fdtol = fdtol->fdl_next;
fdtol != td->td_proc->p_fdtol;
fdtol = fdtol->fdl_next) {
if ((fdtol->fdl_leader->p_flag &
P_ADVLOCK) == 0)
continue;
fdtol->fdl_holdcount++;
FILEDESC_UNLOCK(fdp);
lf.l_whence = SEEK_SET;
lf.l_start = 0;
lf.l_len = 0;
lf.l_type = F_UNLCK;
vp = fp->f_data;
(void) VOP_ADVLOCK(vp,
(caddr_t)fdtol->fdl_leader,
F_UNLCK, &lf, F_POSIX);
FILEDESC_LOCK(fdp);
fdtol->fdl_holdcount--;
if (fdtol->fdl_holdcount == 0 &&
fdtol->fdl_wakeup != 0) {
fdtol->fdl_wakeup = 0;
wakeup(fdtol);
}
}
FILEDESC_UNLOCK(fdp);
}
}
return (fdrop(fp, td));
}
@ -2079,6 +2235,36 @@ dupfdopen(td, fdp, indx, dfd, mode, error)
/* NOTREACHED */
}
struct filedesc_to_leader *
filedesc_to_leader_alloc(struct filedesc_to_leader *old,
struct filedesc *fdp,
struct proc *leader)
{
struct filedesc_to_leader *fdtol;
MALLOC(fdtol, struct filedesc_to_leader *,
sizeof(struct filedesc_to_leader),
M_FILEDESC_TO_LEADER,
M_WAITOK);
fdtol->fdl_refcount = 1;
fdtol->fdl_holdcount = 0;
fdtol->fdl_wakeup = 0;
fdtol->fdl_leader = leader;
if (old != NULL) {
FILEDESC_LOCK(fdp);
fdtol->fdl_next = old->fdl_next;
fdtol->fdl_prev = old;
old->fdl_next = fdtol;
fdtol->fdl_next->fdl_prev = fdtol;
FILEDESC_UNLOCK(fdp);
} else {
fdtol->fdl_next = fdtol;
fdtol->fdl_prev = fdtol;
}
return fdtol;
}
/*
* Get file structures.
*/

View File

@ -139,13 +139,6 @@ rfork(td, uap)
/* Don't allow kernel only flags. */
if ((uap->flags & RFKERNELONLY) != 0)
return (EINVAL);
/*
* Don't allow sharing of file descriptor table unless
* RFTHREAD flag is supplied
*/
if ((uap->flags & (RFPROC | RFTHREAD | RFFDG | RFCFDG)) ==
RFPROC)
return(EINVAL);
error = fork1(td, uap->flags, 0, &p2);
if (error == 0) {
td->td_retval[0] = p2 ? p2->p_pid : 0;
@ -209,6 +202,7 @@ fork1(td, flags, pages, procp)
int ok;
static int pidchecked = 0;
struct filedesc *fd;
struct filedesc_to_leader *fdtol;
struct proc *p1 = td->td_proc;
struct thread *td2;
struct kse *ke2;
@ -419,15 +413,40 @@ fork1(td, flags, pages, procp)
/*
* Copy filedesc.
*/
if (flags & RFCFDG)
if (flags & RFCFDG) {
fd = fdinit(td->td_proc->p_fd);
else if (flags & RFFDG) {
fdtol = NULL;
} else if (flags & RFFDG) {
FILEDESC_LOCK(p1->p_fd);
fd = fdcopy(td->td_proc->p_fd);
FILEDESC_UNLOCK(p1->p_fd);
} else
fdtol = NULL;
} else {
fd = fdshare(p1->p_fd);
if (p1->p_fdtol == NULL)
p1->p_fdtol =
filedesc_to_leader_alloc(NULL,
NULL,
p1->p_leader);
if ((flags & RFTHREAD) != 0) {
/*
* Shared file descriptor table and
* shared process leaders.
*/
fdtol = p1->p_fdtol;
FILEDESC_LOCK(p1->p_fd);
fdtol->fdl_refcount++;
FILEDESC_UNLOCK(p1->p_fd);
} else {
/*
* Shared file descriptor table, and
* different process leaders
*/
fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
p1->p_fd,
p2);
}
}
/*
* Make a proc table entry for the new process.
* Start by zeroing the section of proc that is zero-initialized,
@ -506,6 +525,7 @@ fork1(td, flags, pages, procp)
if (p2->p_textvp)
VREF(p2->p_textvp);
p2->p_fd = fd;
p2->p_fdtol = fdtol;
PROC_UNLOCK(p1);
PROC_UNLOCK(p2);

View File

@ -75,6 +75,8 @@ struct filedesc {
u_long fd_knhashmask; /* size of knhash */
struct klist *fd_knhash; /* hash table for attached knotes */
struct mtx fd_mtx; /* mtx to protect the members of struct filedesc */
int fd_holdleaderscount; /* block fdfree() for shared close() */
int fd_holdleaderswakeup; /* fdfree() needs wakeup */
};
/*
@ -91,6 +93,27 @@ struct filedesc0 {
char fd_dfileflags[NDFILE];
};
/*
* Structure to keep track of (process leader, struct fildedesc) tuples.
* Each process has a pointer to such a structure when detailed tracking
* is needed. e.g. when rfork(RFPROC | RFMEM) causes a file descriptor
* table to be shared by processes having different "p_leader" pointers
* and thus distinct POSIX style locks.
*
* fdl_refcount and fdl_holdcount are protected by struct filedesc mtx.
*/
struct filedesc_to_leader {
int fdl_refcount; /* references from struct proc */
int fdl_holdcount; /* temporary hold during closef */
int fdl_wakeup; /* fdfree() waits on closef() */
struct proc *fdl_leader; /* owner of POSIX locks */
/* Circular list */
struct filedesc_to_leader *fdl_prev;
struct filedesc_to_leader *fdl_next;
};
/*
* Per-process open flags.
*/
@ -131,6 +154,12 @@ static __inline struct file * fget_locked(struct filedesc *fdp, int fd);
int getvnode(struct filedesc *fdp, int fd, struct file **fpp);
void setugidsafety(struct thread *td);
struct filedesc_to_leader *
filedesc_to_leader_alloc(struct filedesc_to_leader *old,
struct filedesc *fdp,
struct proc *leader);
static __inline struct file *
fget_locked(struct filedesc *fdp, int fd)
{

View File

@ -510,6 +510,7 @@ struct proc {
TAILQ_HEAD(, thread) p_suspended; /* (td_runq) Suspended threads. */
struct ucred *p_ucred; /* (c) Process owner's identity. */
struct filedesc *p_fd; /* (b) Ptr to open files structure. */
struct filedesc_to_leader *p_fdtol; /* (b) Ptr to tracking node */
/* Accumulated stats for all KSEs? */
struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */
struct plimit *p_limit; /* (c*) Process limits. */