- Implement a lockless file descriptor lookup algorithm in
fget_unlocked(). - Save old file descriptor tables created on expansion until the entire descriptor table is freed so that pointers may be followed without regard for expanders. - Mark the file zone as NOFREE so we may attempt to reference potentially freed files. - Convert several fget_locked() users to fget_unlocked(). This requires us to manage reference counts explicitly but reduces locking overhead in the common case.
This commit is contained in:
parent
b40ce02a2f
commit
bf422e5f27
@ -124,12 +124,24 @@ static void fdused(struct filedesc *fdp, int fd);
|
||||
*/
|
||||
#define OFILESIZE (sizeof(struct file *) + sizeof(char))
|
||||
|
||||
/*
|
||||
* Storage to hold unused ofiles that need to be reclaimed.
|
||||
*/
|
||||
struct freetable {
|
||||
struct file **ft_table;
|
||||
SLIST_ENTRY(freetable) ft_next;
|
||||
};
|
||||
|
||||
/*
|
||||
* Basic allocation of descriptors:
|
||||
* one of the above, plus arrays for NDFILE descriptors.
|
||||
*/
|
||||
struct filedesc0 {
|
||||
struct filedesc fd_fd;
|
||||
/*
|
||||
* ofiles which need to be reclaimed on free.
|
||||
*/
|
||||
SLIST_HEAD(,freetable) fd_free;
|
||||
/*
|
||||
* These arrays are used when the number of open files is
|
||||
* <= NDFILE, and are then pointed to by the pointers above.
|
||||
@ -1268,7 +1280,10 @@ fpathconf(struct thread *td, struct fpathconf_args *uap)
|
||||
static void
|
||||
fdgrowtable(struct filedesc *fdp, int nfd)
|
||||
{
|
||||
struct filedesc0 *fdp0;
|
||||
struct freetable *fo;
|
||||
struct file **ntable;
|
||||
struct file **otable;
|
||||
char *nfileflags;
|
||||
int nnfiles, onfiles;
|
||||
NDSLOTTYPE *nmap;
|
||||
@ -1287,7 +1302,7 @@ fdgrowtable(struct filedesc *fdp, int nfd)
|
||||
|
||||
/* allocate a new table and (if required) new bitmaps */
|
||||
FILEDESC_XUNLOCK(fdp);
|
||||
ntable = malloc(nnfiles * OFILESIZE,
|
||||
ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable),
|
||||
M_FILEDESC, M_ZERO | M_WAITOK);
|
||||
nfileflags = (char *)&ntable[nnfiles];
|
||||
if (NDSLOTS(nnfiles) > NDSLOTS(onfiles))
|
||||
@ -1311,10 +1326,20 @@ fdgrowtable(struct filedesc *fdp, int nfd)
|
||||
}
|
||||
bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
|
||||
bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
|
||||
if (onfiles > NDFILE)
|
||||
free(fdp->fd_ofiles, M_FILEDESC);
|
||||
fdp->fd_ofiles = ntable;
|
||||
otable = fdp->fd_ofiles;
|
||||
fdp->fd_ofileflags = nfileflags;
|
||||
fdp->fd_ofiles = ntable;
|
||||
/*
|
||||
* We must preserve ofiles until the process exits because we can't
|
||||
* be certain that no threads have references to the old table via
|
||||
* _fget().
|
||||
*/
|
||||
if (onfiles > NDFILE) {
|
||||
fo = (struct freetable *)&otable[onfiles];
|
||||
fdp0 = (struct filedesc0 *)fdp;
|
||||
fo->ft_table = otable;
|
||||
SLIST_INSERT_HEAD(&fdp0->fd_free, fo, ft_next);
|
||||
}
|
||||
if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
|
||||
bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap));
|
||||
if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
|
||||
@ -1512,6 +1537,8 @@ fdhold(struct proc *p)
|
||||
static void
|
||||
fddrop(struct filedesc *fdp)
|
||||
{
|
||||
struct filedesc0 *fdp0;
|
||||
struct freetable *ft;
|
||||
int i;
|
||||
|
||||
mtx_lock(&fdesc_mtx);
|
||||
@ -1521,6 +1548,11 @@ fddrop(struct filedesc *fdp)
|
||||
return;
|
||||
|
||||
FILEDESC_LOCK_DESTROY(fdp);
|
||||
fdp0 = (struct filedesc0 *)fdp;
|
||||
while ((ft = SLIST_FIRST(&fdp0->fd_free)) != NULL) {
|
||||
SLIST_REMOVE_HEAD(&fdp0->fd_free, ft_next);
|
||||
free(ft->ft_table, M_FILEDESC);
|
||||
}
|
||||
free(fdp, M_FILEDESC);
|
||||
}
|
||||
|
||||
@ -2022,6 +2054,38 @@ finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
|
||||
atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
|
||||
}
|
||||
|
||||
struct file *
|
||||
fget_unlocked(struct filedesc *fdp, int fd)
|
||||
{
|
||||
struct file *fp;
|
||||
u_int count;
|
||||
|
||||
if (fd < 0 || fd >= fdp->fd_nfiles)
|
||||
return (NULL);
|
||||
/*
|
||||
* Fetch the descriptor locklessly. We avoid fdrop() races by
|
||||
* never raising a refcount above 0. To accomplish this we have
|
||||
* to use a cmpset loop rather than an atomic_add. The descriptor
|
||||
* must be re-verified once we acquire a reference to be certain
|
||||
* that the identity is still correct and we did not lose a race
|
||||
* due to preemption.
|
||||
*/
|
||||
for (;;) {
|
||||
fp = fdp->fd_ofiles[fd];
|
||||
if (fp == NULL)
|
||||
break;
|
||||
count = fp->f_count;
|
||||
if (count == 0)
|
||||
continue;
|
||||
if (atomic_cmpset_int(&fp->f_count, count, count + 1) != 1)
|
||||
continue;
|
||||
if (fp == ((struct file *volatile*)fdp->fd_ofiles)[fd])
|
||||
break;
|
||||
fdrop(fp, curthread);
|
||||
}
|
||||
|
||||
return (fp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract the file pointer associated with the specified descriptor for the
|
||||
@ -2030,16 +2094,12 @@ finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
|
||||
* If the descriptor doesn't exist or doesn't match 'flags', EBADF is
|
||||
* returned.
|
||||
*
|
||||
* If 'hold' is set (non-zero) the file's refcount will be bumped on return.
|
||||
* It should be dropped with fdrop(). If it is not set, then the refcount
|
||||
* will not be bumped however the thread's filedesc struct will be returned
|
||||
* locked (for fgetsock).
|
||||
*
|
||||
* If an error occured the non-zero error is returned and *fpp is set to
|
||||
* NULL. Otherwise *fpp is set and zero is returned.
|
||||
* NULL. Otherwise *fpp is held and set and zero is returned. Caller is
|
||||
* responsible for fdrop().
|
||||
*/
|
||||
static __inline int
|
||||
_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
|
||||
_fget(struct thread *td, int fd, struct file **fpp, int flags)
|
||||
{
|
||||
struct filedesc *fdp;
|
||||
struct file *fp;
|
||||
@ -2047,29 +2107,22 @@ _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
|
||||
*fpp = NULL;
|
||||
if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
|
||||
return (EBADF);
|
||||
FILEDESC_SLOCK(fdp);
|
||||
if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
if ((fp = fget_unlocked(fdp, fd)) == NULL)
|
||||
return (EBADF);
|
||||
if (fp->f_ops == &badfileops) {
|
||||
fdrop(fp, td);
|
||||
return (EBADF);
|
||||
}
|
||||
|
||||
/*
|
||||
* FREAD and FWRITE failure return EBADF as per POSIX.
|
||||
*
|
||||
* Only one flag, or 0, may be specified.
|
||||
*/
|
||||
if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
if ((flags == FREAD && (fp->f_flag & FREAD) == 0) ||
|
||||
(flags == FWRITE && (fp->f_flag & FWRITE) == 0)) {
|
||||
fdrop(fp, td);
|
||||
return (EBADF);
|
||||
}
|
||||
if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
return (EBADF);
|
||||
}
|
||||
if (hold) {
|
||||
fhold(fp);
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
}
|
||||
*fpp = fp;
|
||||
return (0);
|
||||
}
|
||||
@ -2078,21 +2131,21 @@ int
|
||||
fget(struct thread *td, int fd, struct file **fpp)
|
||||
{
|
||||
|
||||
return(_fget(td, fd, fpp, 0, 1));
|
||||
return(_fget(td, fd, fpp, 0));
|
||||
}
|
||||
|
||||
int
|
||||
fget_read(struct thread *td, int fd, struct file **fpp)
|
||||
{
|
||||
|
||||
return(_fget(td, fd, fpp, FREAD, 1));
|
||||
return(_fget(td, fd, fpp, FREAD));
|
||||
}
|
||||
|
||||
int
|
||||
fget_write(struct thread *td, int fd, struct file **fpp)
|
||||
{
|
||||
|
||||
return(_fget(td, fd, fpp, FWRITE, 1));
|
||||
return(_fget(td, fd, fpp, FWRITE));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2109,7 +2162,7 @@ _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
|
||||
int error;
|
||||
|
||||
*vpp = NULL;
|
||||
if ((error = _fget(td, fd, &fp, flags, 0)) != 0)
|
||||
if ((error = _fget(td, fd, &fp, flags)) != 0)
|
||||
return (error);
|
||||
if (fp->f_vnode == NULL) {
|
||||
error = EINVAL;
|
||||
@ -2117,7 +2170,8 @@ _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
|
||||
*vpp = fp->f_vnode;
|
||||
vref(*vpp);
|
||||
}
|
||||
FILEDESC_SUNLOCK(td->td_proc->p_fd);
|
||||
fdrop(fp, td);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -2164,7 +2218,7 @@ fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
|
||||
*spp = NULL;
|
||||
if (fflagp != NULL)
|
||||
*fflagp = 0;
|
||||
if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
|
||||
if ((error = _fget(td, fd, &fp, 0)) != 0)
|
||||
return (error);
|
||||
if (fp->f_type != DTYPE_SOCKET) {
|
||||
error = ENOTSOCK;
|
||||
@ -2176,7 +2230,8 @@ fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
|
||||
soref(*spp);
|
||||
SOCK_UNLOCK(*spp);
|
||||
}
|
||||
FILEDESC_SUNLOCK(td->td_proc->p_fd);
|
||||
fdrop(fp, td);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -3147,7 +3202,7 @@ filelistinit(void *dummy)
|
||||
{
|
||||
|
||||
file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
|
||||
NULL, NULL, UMA_ALIGN_PTR, 0);
|
||||
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
|
||||
mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
|
||||
}
|
||||
|
@ -988,7 +988,6 @@ selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits)
|
||||
fdp = td->td_proc->p_fd;
|
||||
stp = td->td_sel;
|
||||
n = 0;
|
||||
FILEDESC_SLOCK(fdp);
|
||||
STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
|
||||
fd = (int)(uintptr_t)sfp->sf_cookie;
|
||||
si = sfp->sf_si;
|
||||
@ -996,17 +995,15 @@ selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits)
|
||||
/* If the selinfo wasn't cleared the event didn't fire. */
|
||||
if (si != NULL)
|
||||
continue;
|
||||
if ((fp = fget_locked(fdp, fd)) == NULL) {
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
if ((fp = fget_unlocked(fdp, fd)) == NULL)
|
||||
return (EBADF);
|
||||
}
|
||||
idx = fd / NFDBITS;
|
||||
bit = (fd_mask)1 << (fd % NFDBITS);
|
||||
ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td);
|
||||
fdrop(fp, td);
|
||||
if (ev != 0)
|
||||
n += selsetbits(ibits, obits, idx, bit, ev);
|
||||
}
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
stp->st_flags = 0;
|
||||
td->td_retval[0] = n;
|
||||
return (0);
|
||||
@ -1030,7 +1027,6 @@ selscan(td, ibits, obits, nfd)
|
||||
|
||||
fdp = td->td_proc->p_fd;
|
||||
n = 0;
|
||||
FILEDESC_SLOCK(fdp);
|
||||
for (idx = 0, fd = 0; fd < nfd; idx++) {
|
||||
end = imin(fd + NFDBITS, nfd);
|
||||
for (bit = 1; fd < end; bit <<= 1, fd++) {
|
||||
@ -1038,18 +1034,16 @@ selscan(td, ibits, obits, nfd)
|
||||
flags = selflags(ibits, idx, bit);
|
||||
if (flags == 0)
|
||||
continue;
|
||||
if ((fp = fget_locked(fdp, fd)) == NULL) {
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
if ((fp = fget_unlocked(fdp, fd)) == NULL)
|
||||
return (EBADF);
|
||||
}
|
||||
selfdalloc(td, (void *)(uintptr_t)fd);
|
||||
ev = fo_poll(fp, flags, td->td_ucred, td);
|
||||
fdrop(fp, td);
|
||||
if (ev != 0)
|
||||
n += selsetbits(ibits, obits, idx, bit, ev);
|
||||
}
|
||||
}
|
||||
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
td->td_retval[0] = n;
|
||||
return (0);
|
||||
}
|
||||
|
@ -1720,10 +1720,13 @@ ttyhook_register(struct tty **rtp, struct proc *p, int fd,
|
||||
/* Validate the file descriptor. */
|
||||
if ((fdp = p->p_fd) == NULL)
|
||||
return (EBADF);
|
||||
FILEDESC_SLOCK(fdp);
|
||||
if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
|
||||
fp = fget_unlocked(fdp, fd);
|
||||
if (fp == NULL)
|
||||
return (EBADF);
|
||||
if (fp->f_ops == &badfileops) {
|
||||
error = EBADF;
|
||||
goto done1;
|
||||
}
|
||||
|
||||
/* Make sure the vnode is bound to a character device. */
|
||||
@ -1763,7 +1766,7 @@ ttyhook_register(struct tty **rtp, struct proc *p, int fd,
|
||||
|
||||
done3: tty_unlock(tp);
|
||||
done2: dev_relthread(dev);
|
||||
done1: FILEDESC_SUNLOCK(fdp);
|
||||
done1: fdrop(fp, curthread);
|
||||
return (error);
|
||||
}
|
||||
|
||||
|
@ -122,23 +122,16 @@ getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp)
|
||||
int error;
|
||||
|
||||
fp = NULL;
|
||||
if (fdp == NULL)
|
||||
if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) {
|
||||
error = EBADF;
|
||||
else {
|
||||
FILEDESC_SLOCK(fdp);
|
||||
fp = fget_locked(fdp, fd);
|
||||
if (fp == NULL)
|
||||
error = EBADF;
|
||||
else if (fp->f_type != DTYPE_SOCKET) {
|
||||
fp = NULL;
|
||||
error = ENOTSOCK;
|
||||
} else {
|
||||
fhold(fp);
|
||||
if (fflagp != NULL)
|
||||
*fflagp = fp->f_flag;
|
||||
error = 0;
|
||||
}
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
} else if (fp->f_type != DTYPE_SOCKET) {
|
||||
fdrop(fp, curthread);
|
||||
fp = NULL;
|
||||
error = ENOTSOCK;
|
||||
} else {
|
||||
if (fflagp != NULL)
|
||||
*fflagp = fp->f_flag;
|
||||
error = 0;
|
||||
}
|
||||
*fpp = fp;
|
||||
return (error);
|
||||
|
@ -4235,22 +4235,13 @@ getvnode(fdp, fd, fpp)
|
||||
int error;
|
||||
struct file *fp;
|
||||
|
||||
error = 0;
|
||||
fp = NULL;
|
||||
if (fdp == NULL)
|
||||
if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL)
|
||||
error = EBADF;
|
||||
else {
|
||||
FILEDESC_SLOCK(fdp);
|
||||
if ((u_int)fd >= fdp->fd_nfiles ||
|
||||
(fp = fdp->fd_ofiles[fd]) == NULL)
|
||||
error = EBADF;
|
||||
else if (fp->f_vnode == NULL) {
|
||||
fp = NULL;
|
||||
error = EINVAL;
|
||||
} else {
|
||||
fhold(fp);
|
||||
error = 0;
|
||||
}
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
else if (fp->f_vnode == NULL) {
|
||||
error = EINVAL;
|
||||
fdrop(fp, curthread);
|
||||
}
|
||||
*fpp = fp;
|
||||
return (error);
|
||||
|
@ -129,6 +129,10 @@ int getvnode(struct filedesc *fdp, int fd, struct file **fpp);
|
||||
void mountcheckdirs(struct vnode *olddp, struct vnode *newdp);
|
||||
void setugidsafety(struct thread *td);
|
||||
|
||||
/* Return a referenced file from an unlocked descriptor. */
|
||||
struct file *fget_unlocked(struct filedesc *fdp, int fd);
|
||||
|
||||
/* Requires a FILEDESC_{S,X}LOCK held and returns without a ref. */
|
||||
static __inline struct file *
|
||||
fget_locked(struct filedesc *fdp, int fd)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user