- Convert msleep(9) in select(2) and poll(2) to cv_*wait*(9).

- Since polling should not involve sleeping, keep holding a
  process lock upon scanning file descriptors.

- Hold a reference to every file descriptor prior to entering
  polling loop in order to avoid lock order reversal between
  lockmgr and p_mtx upon calling fdrop() in fo_poll().
  (NOTE: this work has not been done for netncp and netsmb
  yet because a socket itself has no reference counts.)

Reviewed by:	jhb
This commit is contained in:
tanimura 2001-05-14 05:26:48 +00:00
parent ee36805984
commit 90ac553bec
4 changed files with 158 additions and 59 deletions

View File

@ -61,6 +61,7 @@
#include <sys/sysent.h> #include <sys/sysent.h>
#include <sys/bio.h> #include <sys/bio.h>
#include <sys/buf.h> #include <sys/buf.h>
#include <sys/condvar.h>
#ifdef KTRACE #ifdef KTRACE
#include <sys/ktrace.h> #include <sys/ktrace.h>
#endif #endif
@ -74,7 +75,9 @@ static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
MALLOC_DEFINE(M_IOV, "iov", "large iov's"); MALLOC_DEFINE(M_IOV, "iov", "large iov's");
static int pollscan __P((struct proc *, struct pollfd *, u_int)); static int pollscan __P((struct proc *, struct pollfd *, u_int));
static int pollholddrop __P((struct proc *, struct pollfd *, u_int, int));
static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); static int selscan __P((struct proc *, fd_mask **, fd_mask **, int));
static int selholddrop __P((struct proc *, fd_mask *, fd_mask *, int, int));
static int dofileread __P((struct proc *, struct file *, int, void *, static int dofileread __P((struct proc *, struct file *, int, void *,
size_t, off_t, int)); size_t, off_t, int));
static int dofilewrite __P((struct proc *, struct file *, int, static int dofilewrite __P((struct proc *, struct file *, int,
@ -653,7 +656,7 @@ ioctl(p, uap)
} }
static int nselcoll; /* Select collisions since boot */ static int nselcoll; /* Select collisions since boot */
int selwait; struct cv selwait;
SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
/* /*
@ -678,9 +681,10 @@ select(p, uap)
* of 256. * of 256.
*/ */
fd_mask s_selbits[howmany(2048, NFDBITS)]; fd_mask s_selbits[howmany(2048, NFDBITS)];
fd_mask *ibits[3], *obits[3], *selbits, *sbp; fd_mask s_heldbits[howmany(2048, NFDBITS)];
fd_mask *ibits[3], *obits[3], *selbits, *sbp, *heldbits, *hibits, *hobits;
struct timeval atv, rtv, ttv; struct timeval atv, rtv, ttv;
int s, ncoll, error, timo; int ncoll, error, timo, i;
u_int nbufbytes, ncpbytes, nfdbits; u_int nbufbytes, ncpbytes, nfdbits;
if (uap->nd < 0) if (uap->nd < 0)
@ -705,6 +709,11 @@ select(p, uap)
selbits = &s_selbits[0]; selbits = &s_selbits[0];
else else
selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
if (2 * ncpbytes <= sizeof s_heldbits) {
bzero(s_heldbits, sizeof(s_heldbits));
heldbits = &s_heldbits[0];
} else
heldbits = malloc(2 * ncpbytes, M_SELECT, M_WAITOK | M_ZERO);
/* /*
* Assign pointers into the bit buffers and fetch the input bits. * Assign pointers into the bit buffers and fetch the input bits.
@ -712,6 +721,8 @@ select(p, uap)
* together. * together.
*/ */
sbp = selbits; sbp = selbits;
hibits = heldbits + ncpbytes / sizeof *heldbits;
hobits = heldbits;
#define getbits(name, x) \ #define getbits(name, x) \
do { \ do { \
if (uap->name == NULL) \ if (uap->name == NULL) \
@ -721,10 +732,12 @@ select(p, uap)
obits[x] = sbp; \ obits[x] = sbp; \
sbp += ncpbytes / sizeof *sbp; \ sbp += ncpbytes / sizeof *sbp; \
error = copyin(uap->name, ibits[x], ncpbytes); \ error = copyin(uap->name, ibits[x], ncpbytes); \
if (error != 0) { \ if (error != 0) \
PROC_LOCK(p); \ goto done_noproclock; \
goto done; \ for (i = 0; \
} \ i < ncpbytes / sizeof ibits[i][0]; \
i++) \
hibits[i] |= ibits[x][i]; \
} \ } \
} while (0) } while (0)
getbits(in, 0); getbits(in, 0);
@ -737,14 +750,11 @@ select(p, uap)
if (uap->tv) { if (uap->tv) {
error = copyin((caddr_t)uap->tv, (caddr_t)&atv, error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
sizeof (atv)); sizeof (atv));
if (error) { if (error)
PROC_LOCK(p); goto done_noproclock;
goto done;
}
if (itimerfix(&atv)) { if (itimerfix(&atv)) {
error = EINVAL; error = EINVAL;
PROC_LOCK(p); goto done_noproclock;
goto done;
} }
getmicrouptime(&rtv); getmicrouptime(&rtv);
timevaladd(&atv, &rtv); timevaladd(&atv, &rtv);
@ -752,41 +762,39 @@ select(p, uap)
atv.tv_sec = 0; atv.tv_sec = 0;
atv.tv_usec = 0; atv.tv_usec = 0;
} }
selholddrop(p, hibits, hobits, uap->nd, 1);
timo = 0; timo = 0;
PROC_LOCK(p); PROC_LOCK(p);
retry: retry:
ncoll = nselcoll; ncoll = nselcoll;
p->p_flag |= P_SELECT; p->p_flag |= P_SELECT;
PROC_UNLOCK(p);
error = selscan(p, ibits, obits, uap->nd); error = selscan(p, ibits, obits, uap->nd);
PROC_LOCK(p);
if (error || p->p_retval[0]) if (error || p->p_retval[0])
goto done; goto done;
if (atv.tv_sec || atv.tv_usec) { if (atv.tv_sec || atv.tv_usec) {
getmicrouptime(&rtv); getmicrouptime(&rtv);
if (timevalcmp(&rtv, &atv, >=)) if (timevalcmp(&rtv, &atv, >=))
goto done; goto done;
ttv = atv; ttv = atv;
timevalsub(&ttv, &rtv); timevalsub(&ttv, &rtv);
timo = ttv.tv_sec > 24 * 60 * 60 ? timo = ttv.tv_sec > 24 * 60 * 60 ?
24 * 60 * 60 * hz : tvtohz(&ttv); 24 * 60 * 60 * hz : tvtohz(&ttv);
} }
s = splhigh();
if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
splx(s);
goto retry;
}
p->p_flag &= ~P_SELECT; p->p_flag &= ~P_SELECT;
error = msleep((caddr_t)&selwait, &p->p_mtx, PSOCK | PCATCH, "select", if (timo > 0)
timo); error = cv_timedwait_sig(&selwait, &p->p_mtx, timo);
else
error = cv_wait_sig(&selwait, &p->p_mtx);
splx(s);
if (error == 0) if (error == 0)
goto retry; goto retry;
done: done:
p->p_flag &= ~P_SELECT; p->p_flag &= ~P_SELECT;
PROC_UNLOCK(p); PROC_UNLOCK(p);
selholddrop(p, hibits, hobits, uap->nd, 0);
done_noproclock:
/* select is not restarted after signals... */ /* select is not restarted after signals... */
if (error == ERESTART) if (error == ERESTART)
error = EINTR; error = EINTR;
@ -805,9 +813,45 @@ done:
} }
if (selbits != &s_selbits[0]) if (selbits != &s_selbits[0])
free(selbits, M_SELECT); free(selbits, M_SELECT);
if (heldbits != &s_heldbits[0])
free(heldbits, M_SELECT);
return (error); return (error);
} }
static int
selholddrop(p, ibits, obits, nfd, hold)
struct proc *p;
fd_mask *ibits, *obits;
int nfd, hold;
{
struct filedesc *fdp = p->p_fd;
int i, fd;
fd_mask bits;
struct file *fp;
for (i = 0; i < nfd; i += NFDBITS) {
if (hold)
bits = ibits[i/NFDBITS];
else
bits = obits[i/NFDBITS];
/* ffs(int mask) not portable, fd_mask is long */
for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
if (!(bits & 1))
continue;
fp = fdp->fd_ofiles[fd];
if (fp == NULL)
return (EBADF);
if (hold) {
fhold(fp);
obits[(fd)/NFDBITS] |=
((fd_mask)1 << ((fd) % NFDBITS));
} else
fdrop(fp, p);
}
}
return (0);
}
static int static int
selscan(p, ibits, obits, nfd) selscan(p, ibits, obits, nfd)
struct proc *p; struct proc *p;
@ -864,9 +908,11 @@ poll(p, uap)
caddr_t bits; caddr_t bits;
char smallbits[32 * sizeof(struct pollfd)]; char smallbits[32 * sizeof(struct pollfd)];
struct timeval atv, rtv, ttv; struct timeval atv, rtv, ttv;
int s, ncoll, error = 0, timo; int ncoll, error = 0, timo;
u_int nfds; u_int nfds;
size_t ni; size_t ni;
struct pollfd p_heldbits[32];
struct pollfd *heldbits;
nfds = SCARG(uap, nfds); nfds = SCARG(uap, nfds);
/* /*
@ -883,16 +929,22 @@ poll(p, uap)
bits = malloc(ni, M_TEMP, M_WAITOK); bits = malloc(ni, M_TEMP, M_WAITOK);
else else
bits = smallbits; bits = smallbits;
if (ni > sizeof(p_heldbits))
heldbits = malloc(ni, M_TEMP, M_WAITOK);
else {
bzero(p_heldbits, sizeof(p_heldbits));
heldbits = p_heldbits;
}
error = copyin(SCARG(uap, fds), bits, ni); error = copyin(SCARG(uap, fds), bits, ni);
PROC_LOCK(p);
if (error) if (error)
goto done; goto done_noproclock;
bcopy(bits, heldbits, ni);
if (SCARG(uap, timeout) != INFTIM) { if (SCARG(uap, timeout) != INFTIM) {
atv.tv_sec = SCARG(uap, timeout) / 1000; atv.tv_sec = SCARG(uap, timeout) / 1000;
atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
if (itimerfix(&atv)) { if (itimerfix(&atv)) {
error = EINVAL; error = EINVAL;
goto done; goto done_noproclock;
} }
getmicrouptime(&rtv); getmicrouptime(&rtv);
timevaladd(&atv, &rtv); timevaladd(&atv, &rtv);
@ -900,13 +952,13 @@ poll(p, uap)
atv.tv_sec = 0; atv.tv_sec = 0;
atv.tv_usec = 0; atv.tv_usec = 0;
} }
pollholddrop(p, heldbits, nfds, 1);
timo = 0; timo = 0;
PROC_LOCK(p);
retry: retry:
ncoll = nselcoll; ncoll = nselcoll;
p->p_flag |= P_SELECT; p->p_flag |= P_SELECT;
PROC_UNLOCK(p);
error = pollscan(p, (struct pollfd *)bits, nfds); error = pollscan(p, (struct pollfd *)bits, nfds);
PROC_LOCK(p);
if (error || p->p_retval[0]) if (error || p->p_retval[0])
goto done; goto done;
if (atv.tv_sec || atv.tv_usec) { if (atv.tv_sec || atv.tv_usec) {
@ -917,21 +969,20 @@ retry:
timevalsub(&ttv, &rtv); timevalsub(&ttv, &rtv);
timo = ttv.tv_sec > 24 * 60 * 60 ? timo = ttv.tv_sec > 24 * 60 * 60 ?
24 * 60 * 60 * hz : tvtohz(&ttv); 24 * 60 * 60 * hz : tvtohz(&ttv);
}
s = splhigh();
if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
splx(s);
goto retry;
} }
p->p_flag &= ~P_SELECT; p->p_flag &= ~P_SELECT;
error = msleep((caddr_t)&selwait, &p->p_mtx, PSOCK | PCATCH, "poll", if (timo > 0)
timo); error = cv_timedwait_sig(&selwait, &p->p_mtx, timo);
splx(s); else
error = cv_wait_sig(&selwait, &p->p_mtx);
if (error == 0) if (error == 0)
goto retry; goto retry;
done: done:
p->p_flag &= ~P_SELECT; p->p_flag &= ~P_SELECT;
PROC_UNLOCK(p); PROC_UNLOCK(p);
pollholddrop(p, heldbits, nfds, 0);
done_noproclock:
/* poll is not restarted after signals... */ /* poll is not restarted after signals... */
if (error == ERESTART) if (error == ERESTART)
error = EINTR; error = EINTR;
@ -945,9 +996,38 @@ done:
out: out:
if (ni > sizeof(smallbits)) if (ni > sizeof(smallbits))
free(bits, M_TEMP); free(bits, M_TEMP);
if (ni > sizeof(p_heldbits))
free(heldbits, M_TEMP);
return (error); return (error);
} }
static int
pollholddrop(p, fds, nfd, hold)
struct proc *p;
struct pollfd *fds;
u_int nfd;
int hold;
{
register struct filedesc *fdp = p->p_fd;
int i;
struct file *fp;
for (i = 0; i < nfd; i++, fds++) {
if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) {
fp = fdp->fd_ofiles[fds->fd];
if (hold) {
if (fp != NULL) {
fhold(fp);
fds->revents = 1;
} else
fds->revents = 0;
} else if(fp != NULL && fds->revents)
fdrop(fp, p);
}
}
return (0);
}
static int static int
pollscan(p, fds, nfd) pollscan(p, fds, nfd)
struct proc *p; struct proc *p;
@ -1058,7 +1138,7 @@ selwakeup(sip)
if (sip->si_flags & SI_COLL) { if (sip->si_flags & SI_COLL) {
nselcoll++; nselcoll++;
sip->si_flags &= ~SI_COLL; sip->si_flags &= ~SI_COLL;
wakeup((caddr_t)&selwait); cv_broadcast(&selwait);
} }
p = pfind(sip->si_pid); p = pfind(sip->si_pid);
sip->si_pid = 0; sip->si_pid = 0;
@ -1068,10 +1148,21 @@ selwakeup(sip)
if (p->p_stat == SSLEEP) if (p->p_stat == SSLEEP)
setrunnable(p); setrunnable(p);
else else
unsleep(p); cv_waitq_remove(p);
} else } else
p->p_flag &= ~P_SELECT; p->p_flag &= ~P_SELECT;
mtx_unlock_spin(&sched_lock); mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p); PROC_UNLOCK(p);
} }
} }
static void selectinit __P((void *));
SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
/* ARGSUSED*/
static void
selectinit(dummy)
void *dummy;
{
cv_init(&selwait, "select");
}

View File

@ -46,6 +46,7 @@
#include <sys/uio.h> #include <sys/uio.h>
#include <sys/syslog.h> #include <sys/syslog.h>
#include <sys/mbuf.h> #include <sys/mbuf.h>
#include <sys/condvar.h>
#include <net/route.h> #include <net/route.h>
#include <netipx/ipx.h> #include <netipx/ipx.h>
@ -188,18 +189,19 @@ int
ncp_sock_rselect(struct socket *so,struct proc *p, struct timeval *tv, int events) ncp_sock_rselect(struct socket *so,struct proc *p, struct timeval *tv, int events)
{ {
struct timeval atv,rtv,ttv; struct timeval atv,rtv,ttv;
int s,timo,error=0; int timo,error=0;
if (tv) { if (tv) {
atv=*tv; atv=*tv;
if (itimerfix(&atv)) { if (itimerfix(&atv)) {
error = EINVAL; error = EINVAL;
goto done; goto done_noproclock;
} }
getmicrouptime(&rtv); getmicrouptime(&rtv);
timevaladd(&atv, &rtv); timevaladd(&atv, &rtv);
} }
timo = 0; timo = 0;
PROC_LOCK(p);
retry: retry:
p->p_flag |= P_SELECT; p->p_flag |= P_SELECT;
error = ncp_poll(so, events); error = ncp_poll(so, events);
@ -215,16 +217,18 @@ retry:
timevalsub(&ttv, &rtv); timevalsub(&ttv, &rtv);
timo = tvtohz(&ttv); timo = tvtohz(&ttv);
} }
s = splhigh();
if ((p->p_flag & P_SELECT) == 0) {
splx(s);
goto retry;
}
p->p_flag &= ~P_SELECT; p->p_flag &= ~P_SELECT;
error = tsleep((caddr_t)&selwait, PSOCK, "ncpslt", timo); if (timo > 0)
splx(s); error = cv_timedwait(&selwait, &p->p_mtx, timo);
else {
cv_wait(&selwait, &p->p_mtx);
error = 0;
}
done: done:
p->p_flag &= ~P_SELECT; p->p_flag &= ~P_SELECT;
PROC_UNLOCK(p);
done_noproclock:
if (error == ERESTART) { if (error == ERESTART) {
/* printf("Signal: %x", CURSIG(p));*/ /* printf("Signal: %x", CURSIG(p));*/
error = 0; error = 0;

View File

@ -43,6 +43,7 @@
#include <sys/poll.h> #include <sys/poll.h>
#include <sys/uio.h> #include <sys/uio.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
#include <sys/condvar.h>
#include <net/if.h> #include <net/if.h>
#include <net/route.h> #include <net/route.h>
@ -100,18 +101,19 @@ static int
nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events, struct proc *p) nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events, struct proc *p)
{ {
struct timeval atv, rtv, ttv; struct timeval atv, rtv, ttv;
int s, timo, error; int timo, error;
if (tv) { if (tv) {
atv = *tv; atv = *tv;
if (itimerfix(&atv)) { if (itimerfix(&atv)) {
error = EINVAL; error = EINVAL;
goto done; goto done_noproclock;
} }
getmicrouptime(&rtv); getmicrouptime(&rtv);
timevaladd(&atv, &rtv); timevaladd(&atv, &rtv);
} }
timo = 0; timo = 0;
PROC_LOCK(p);
retry: retry:
p->p_flag |= P_SELECT; p->p_flag |= P_SELECT;
error = nb_poll(nbp, events, p); error = nb_poll(nbp, events, p);
@ -127,16 +129,18 @@ retry:
timevalsub(&ttv, &rtv); timevalsub(&ttv, &rtv);
timo = tvtohz(&ttv); timo = tvtohz(&ttv);
} }
s = splhigh(); p->p_flag &= ~P_SELECT;
if ((p->p_flag & P_SELECT) == 0) { if (timo > 0)
splx(s); error = cv_timedwait(&selwait, &p->p_mtx, timo);
goto retry; else {
cv_wait(&selwait, &p->p_mtx);
error = 0;
} }
p->p_flag &= ~P_SELECT;
error = tsleep((caddr_t)&selwait, PSOCK, "nbsel", timo);
splx(s);
done: done:
PROC_UNLOCK(p);
p->p_flag &= ~P_SELECT; p->p_flag &= ~P_SELECT;
done_noproclock:
if (error == ERESTART) if (error == ERESTART)
return 0; return 0;
return error; return error;

View File

@ -56,7 +56,7 @@ extern char copyright[]; /* system copyright */
extern int nswap; /* size of swap space */ extern int nswap; /* size of swap space */
extern int selwait; /* select timeout address */ extern struct cv selwait; /* select conditional variable */
extern int physmem; /* physical memory */ extern int physmem; /* physical memory */