Attempt to fixup select(2) and poll(2), this should fix some races with

other threads as well as speed up the interfaces.

To fix the race and accomplish the speedup, remove selholddrop and
pollholddrop.  The entire concept is somewhat bogus because holding
the individual struct file pointers offers us no guarantees that
another thread context won't close it on us thereby removing our
access to our own reference.

Selholddrop and pollholddrop also would do multiple locks and unlocks
of mutexes _per-file_ in the fd arrays to be scanned, this needed to
be sped up.

Instead of using selholddrop and pollholddrop, simply hold the
filedesc lock over the selscan and pollscan functions.  This should
protect us against close(2)'s on the files as reduce the multiple
lock/unlock pairs per fd into a single lock over the filedesc.
This commit is contained in:
Alfred Perlstein 2002-01-29 22:54:19 +00:00
parent 7cdcc9fecd
commit eb20931127
3 changed files with 22 additions and 125 deletions

View File

@ -1492,9 +1492,7 @@ _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
return(EBADF);
FILEDESC_LOCK(fdp);
if (fd < 0 || (u_int)fd >= fdp->fd_nfiles ||
(fp = fdp->fd_ofiles[fd]) == NULL ||
fp->f_ops == &badfileops) {
if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
FILEDESC_UNLOCK(fdp);
return(EBADF);
}

View File

@ -75,9 +75,7 @@ static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
MALLOC_DEFINE(M_IOV, "iov", "large iov's");
static int pollscan __P((struct thread *, struct pollfd *, u_int));
static int pollholddrop __P((struct thread *, struct pollfd *, u_int, int));
static int selscan __P((struct thread *, fd_mask **, fd_mask **, int));
static int selholddrop __P((struct thread *, fd_mask *, fd_mask *, int, int));
static int dofileread __P((struct thread *, struct file *, int, void *,
size_t, off_t, int));
static int dofilewrite __P((struct thread *, struct file *, int,
@ -729,7 +727,7 @@ select(td, uap)
*/
fd_mask s_selbits[howmany(2048, NFDBITS)];
fd_mask s_heldbits[howmany(2048, NFDBITS)];
fd_mask *ibits[3], *obits[3], *selbits, *sbp, *heldbits, *hibits, *hobits;
fd_mask *ibits[3], *obits[3], *selbits, *sbp;
struct timeval atv, rtv, ttv;
int ncoll, error, timo, i;
u_int nbufbytes, ncpbytes, nfdbits;
@ -761,11 +759,6 @@ select(td, uap)
selbits = &s_selbits[0];
else
selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
if (2 * ncpbytes <= sizeof s_heldbits) {
bzero(s_heldbits, sizeof(s_heldbits));
heldbits = &s_heldbits[0];
} else
heldbits = malloc(2 * ncpbytes, M_SELECT, M_WAITOK | M_ZERO);
/*
* Assign pointers into the bit buffers and fetch the input bits.
@ -773,8 +766,6 @@ select(td, uap)
* together.
*/
sbp = selbits;
hibits = heldbits + ncpbytes / sizeof *heldbits;
hobits = heldbits;
#define getbits(name, x) \
do { \
if (uap->name == NULL) \
@ -786,10 +777,6 @@ select(td, uap)
error = copyin(uap->name, ibits[x], ncpbytes); \
if (error != 0) \
goto done_noproclock; \
for (i = 0; \
i < ncpbytes / sizeof ibits[i][0]; \
i++) \
hibits[i] |= ibits[x][i]; \
} \
} while (0)
getbits(in, 0);
@ -814,7 +801,6 @@ select(td, uap)
atv.tv_sec = 0;
atv.tv_usec = 0;
}
selholddrop(td, hibits, hobits, uap->nd, 1);
timo = 0;
PROC_LOCK(td->td_proc);
retry:
@ -870,7 +856,6 @@ done:
td->td_flags &= ~TDF_SELECT;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(td->td_proc);
selholddrop(td, hibits, hobits, uap->nd, 0);
done_noproclock:
/* select is not restarted after signals... */
if (error == ERESTART)
@ -890,65 +875,11 @@ done_noproclock:
}
if (selbits != &s_selbits[0])
free(selbits, M_SELECT);
if (heldbits != &s_heldbits[0])
free(heldbits, M_SELECT);
mtx_unlock(&Giant);
return (error);
}
/*
* Used to hold then release a group of fds for select(2).
* Hold (hold == 1) or release (hold == 0) a group of filedescriptors.
* if holding then use ibits setting the bits in obits, otherwise use obits.
*/
static int
selholddrop(td, ibits, obits, nfd, hold)
struct thread *td;
fd_mask *ibits, *obits;
int nfd, hold;
{
struct filedesc *fdp = td->td_proc->p_fd;
int i, fd;
fd_mask bits;
struct file *fp;
FILEDESC_LOCK(fdp);
for (i = 0; i < nfd; i += NFDBITS) {
if (hold)
bits = ibits[i/NFDBITS];
else
bits = obits[i/NFDBITS];
/* ffs(int mask) not portable, fd_mask is long */
for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
if (!(bits & 1))
continue;
fp = fdp->fd_ofiles[fd];
if (fp == NULL) {
FILEDESC_UNLOCK(fdp);
return (EBADF);
}
if (hold) {
fhold(fp);
obits[(fd)/NFDBITS] |=
((fd_mask)1 << ((fd) % NFDBITS));
} else {
/* XXX: optimize by making a special
* version of fdrop that only unlocks
* the filedesc if needed? This would
* redcuce the number of lock/unlock
* pairs by quite a bit.
*/
FILEDESC_UNLOCK(fdp);
fdrop(fp, td);
FILEDESC_LOCK(fdp);
}
}
}
FILEDESC_UNLOCK(fdp);
return (0);
}
static int
selscan(td, ibits, obits, nfd)
struct thread *td;
@ -961,7 +892,9 @@ selscan(td, ibits, obits, nfd)
int n = 0;
/* Note: backend also returns POLLHUP/POLLERR if appropriate. */
static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
struct filedesc *fdp = td->td_proc->p_fd;
FILEDESC_LOCK(fdp);
for (msk = 0; msk < 3; msk++) {
if (ibits[msk] == NULL)
continue;
@ -971,17 +904,19 @@ selscan(td, ibits, obits, nfd)
for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
if (!(bits & 1))
continue;
if (fget(td, fd, &fp))
if ((fp = fget_locked(fdp, fd)) == NULL) {
FILEDESC_UNLOCK(fdp);
return (EBADF);
}
if (fo_poll(fp, flag[msk], fp->f_cred, td)) {
obits[msk][(fd)/NFDBITS] |=
((fd_mask)1 << ((fd) % NFDBITS));
n++;
}
fdrop(fp, td);
}
}
}
FILEDESC_UNLOCK(fdp);
td->td_retval[0] = n;
return (0);
}
@ -1010,8 +945,6 @@ poll(td, uap)
int ncoll, error = 0, timo;
u_int nfds;
size_t ni;
struct pollfd p_heldbits[32];
struct pollfd *heldbits;
nfds = SCARG(uap, nfds);
@ -1033,16 +966,9 @@ poll(td, uap)
bits = malloc(ni, M_TEMP, M_WAITOK);
else
bits = smallbits;
if (ni > sizeof(p_heldbits))
heldbits = malloc(ni, M_TEMP, M_WAITOK);
else {
bzero(p_heldbits, sizeof(p_heldbits));
heldbits = p_heldbits;
}
error = copyin(SCARG(uap, fds), bits, ni);
if (error)
goto done_noproclock;
bcopy(bits, heldbits, ni);
if (SCARG(uap, timeout) != INFTIM) {
atv.tv_sec = SCARG(uap, timeout) / 1000;
atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
@ -1056,7 +982,6 @@ poll(td, uap)
atv.tv_sec = 0;
atv.tv_usec = 0;
}
pollholddrop(td, heldbits, nfds, 1);
timo = 0;
PROC_LOCK(td->td_proc);
retry:
@ -1110,7 +1035,6 @@ done:
td->td_flags &= ~TDF_SELECT;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(td->td_proc);
pollholddrop(td, heldbits, nfds, 0);
done_noproclock:
/* poll is not restarted after signals... */
if (error == ERESTART)
@ -1125,46 +1049,11 @@ done_noproclock:
out:
if (ni > sizeof(smallbits))
free(bits, M_TEMP);
if (ni > sizeof(p_heldbits))
free(heldbits, M_TEMP);
done2:
mtx_unlock(&Giant);
return (error);
}
static int
pollholddrop(td, fds, nfd, hold)
struct thread *td;
struct pollfd *fds;
u_int nfd;
int hold;
{
register struct filedesc *fdp = td->td_proc->p_fd;
int i;
struct file *fp;
FILEDESC_LOCK(fdp);
for (i = 0; i < nfd; i++, fds++) {
if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) {
fp = fdp->fd_ofiles[fds->fd];
if (hold) {
if (fp != NULL) {
fhold(fp);
fds->revents = 1;
} else
fds->revents = 0;
} else if(fp != NULL && fds->revents) {
FILE_LOCK(fp);
FILEDESC_UNLOCK(fdp);
fdrop_locked(fp, td);
FILEDESC_LOCK(fdp);
}
}
}
FILEDESC_UNLOCK(fdp);
return (0);
}
static int
pollscan(td, fds, nfd)
struct thread *td;
@ -1176,18 +1065,15 @@ pollscan(td, fds, nfd)
struct file *fp;
int n = 0;
FILEDESC_LOCK(fdp);
for (i = 0; i < nfd; i++, fds++) {
FILEDESC_LOCK(fdp);
if (fds->fd >= fdp->fd_nfiles) {
fds->revents = POLLNVAL;
n++;
FILEDESC_UNLOCK(fdp);
} else if (fds->fd < 0) {
fds->revents = 0;
FILEDESC_UNLOCK(fdp);
} else {
fp = fdp->fd_ofiles[fds->fd];
FILEDESC_UNLOCK(fdp);
if (fp == NULL) {
fds->revents = POLLNVAL;
n++;
@ -1203,6 +1089,7 @@ pollscan(td, fds, nfd)
}
}
}
FILEDESC_UNLOCK(fdp);
td->td_retval[0] = n;
return (0);
}

View File

@ -153,8 +153,20 @@ int fsetown __P((pid_t pgid, struct sigio **sigiop));
void funsetown __P((struct sigio *sigio));
void funsetownlst __P((struct sigiolst *sigiolst));
int getvnode __P((struct filedesc *fdp, int fd, struct file **fpp));
static __inline struct file * fget_locked(struct filedesc *, int);
void setugidsafety __P((struct thread *td));
static __inline struct file *
fget_locked(fdp, fd)
struct filedesc *fdp;
int fd;
{
if (fd < 0 || (u_int)fd >= fdp->fd_nfiles)
return (NULL);
return (fdp->fd_ofiles[fd]);
}
#endif /* _KERNEL */
#endif /* !_SYS_FILEDESC_H_ */