- Convert msleep(9) in select(2) and poll(2) to cv_*wait*(9).

- Since polling should not involve sleeping, keep holding a
  process lock upon scanning file descriptors.

- Hold a reference to every file descriptor prior to entering
  polling loop in order to avoid lock order reversal between
  lockmgr and p_mtx upon calling fdrop() in fo_poll().
  (NOTE: this work has not been done for netncp and netsmb
  yet because a socket itself has no reference counts.)

Reviewed by:	jhb
This commit is contained in:
Seigo Tanimura 2001-05-14 05:26:48 +00:00
parent 2988afca88
commit 265fc98f36
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=76564
4 changed files with 158 additions and 59 deletions

View File

@ -61,6 +61,7 @@
#include <sys/sysent.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/condvar.h>
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
@ -74,7 +75,9 @@ static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
MALLOC_DEFINE(M_IOV, "iov", "large iov's");
static int pollscan __P((struct proc *, struct pollfd *, u_int));
static int pollholddrop __P((struct proc *, struct pollfd *, u_int, int));
static int selscan __P((struct proc *, fd_mask **, fd_mask **, int));
static int selholddrop __P((struct proc *, fd_mask *, fd_mask *, int, int));
static int dofileread __P((struct proc *, struct file *, int, void *,
size_t, off_t, int));
static int dofilewrite __P((struct proc *, struct file *, int,
@ -653,7 +656,7 @@ ioctl(p, uap)
}
static int nselcoll; /* Select collisions since boot */
int selwait;
struct cv selwait;
SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
/*
@ -678,9 +681,10 @@ select(p, uap)
* of 256.
*/
fd_mask s_selbits[howmany(2048, NFDBITS)];
fd_mask *ibits[3], *obits[3], *selbits, *sbp;
fd_mask s_heldbits[howmany(2048, NFDBITS)];
fd_mask *ibits[3], *obits[3], *selbits, *sbp, *heldbits, *hibits, *hobits;
struct timeval atv, rtv, ttv;
int s, ncoll, error, timo;
int ncoll, error, timo, i;
u_int nbufbytes, ncpbytes, nfdbits;
if (uap->nd < 0)
@ -705,6 +709,11 @@ select(p, uap)
selbits = &s_selbits[0];
else
selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
if (2 * ncpbytes <= sizeof s_heldbits) {
bzero(s_heldbits, sizeof(s_heldbits));
heldbits = &s_heldbits[0];
} else
heldbits = malloc(2 * ncpbytes, M_SELECT, M_WAITOK | M_ZERO);
/*
* Assign pointers into the bit buffers and fetch the input bits.
@ -712,6 +721,8 @@ select(p, uap)
* together.
*/
sbp = selbits;
hibits = heldbits + ncpbytes / sizeof *heldbits;
hobits = heldbits;
#define getbits(name, x) \
do { \
if (uap->name == NULL) \
@ -721,10 +732,12 @@ select(p, uap)
obits[x] = sbp; \
sbp += ncpbytes / sizeof *sbp; \
error = copyin(uap->name, ibits[x], ncpbytes); \
if (error != 0) { \
PROC_LOCK(p); \
goto done; \
} \
if (error != 0) \
goto done_noproclock; \
for (i = 0; \
i < ncpbytes / sizeof ibits[i][0]; \
i++) \
hibits[i] |= ibits[x][i]; \
} \
} while (0)
getbits(in, 0);
@ -737,14 +750,11 @@ select(p, uap)
if (uap->tv) {
error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
sizeof (atv));
if (error) {
PROC_LOCK(p);
goto done;
}
if (error)
goto done_noproclock;
if (itimerfix(&atv)) {
error = EINVAL;
PROC_LOCK(p);
goto done;
goto done_noproclock;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
@ -752,41 +762,39 @@ select(p, uap)
atv.tv_sec = 0;
atv.tv_usec = 0;
}
selholddrop(p, hibits, hobits, uap->nd, 1);
timo = 0;
PROC_LOCK(p);
retry:
ncoll = nselcoll;
p->p_flag |= P_SELECT;
PROC_UNLOCK(p);
error = selscan(p, ibits, obits, uap->nd);
PROC_LOCK(p);
if (error || p->p_retval[0])
goto done;
if (atv.tv_sec || atv.tv_usec) {
getmicrouptime(&rtv);
if (timevalcmp(&rtv, &atv, >=))
if (timevalcmp(&rtv, &atv, >=))
goto done;
ttv = atv;
timevalsub(&ttv, &rtv);
timo = ttv.tv_sec > 24 * 60 * 60 ?
24 * 60 * 60 * hz : tvtohz(&ttv);
}
s = splhigh();
if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
splx(s);
goto retry;
}
p->p_flag &= ~P_SELECT;
error = msleep((caddr_t)&selwait, &p->p_mtx, PSOCK | PCATCH, "select",
timo);
if (timo > 0)
error = cv_timedwait_sig(&selwait, &p->p_mtx, timo);
else
error = cv_wait_sig(&selwait, &p->p_mtx);
splx(s);
if (error == 0)
goto retry;
done:
p->p_flag &= ~P_SELECT;
PROC_UNLOCK(p);
selholddrop(p, hibits, hobits, uap->nd, 0);
done_noproclock:
/* select is not restarted after signals... */
if (error == ERESTART)
error = EINTR;
@ -805,9 +813,45 @@ select(p, uap)
}
if (selbits != &s_selbits[0])
free(selbits, M_SELECT);
if (heldbits != &s_heldbits[0])
free(heldbits, M_SELECT);
return (error);
}
static int
selholddrop(p, ibits, obits, nfd, hold)
struct proc *p;
fd_mask *ibits, *obits;
int nfd, hold;
{
struct filedesc *fdp = p->p_fd;
int i, fd;
fd_mask bits;
struct file *fp;
for (i = 0; i < nfd; i += NFDBITS) {
if (hold)
bits = ibits[i/NFDBITS];
else
bits = obits[i/NFDBITS];
/* ffs(int mask) not portable, fd_mask is long */
for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
if (!(bits & 1))
continue;
fp = fdp->fd_ofiles[fd];
if (fp == NULL)
return (EBADF);
if (hold) {
fhold(fp);
obits[(fd)/NFDBITS] |=
((fd_mask)1 << ((fd) % NFDBITS));
} else
fdrop(fp, p);
}
}
return (0);
}
static int
selscan(p, ibits, obits, nfd)
struct proc *p;
@ -864,9 +908,11 @@ poll(p, uap)
caddr_t bits;
char smallbits[32 * sizeof(struct pollfd)];
struct timeval atv, rtv, ttv;
int s, ncoll, error = 0, timo;
int ncoll, error = 0, timo;
u_int nfds;
size_t ni;
struct pollfd p_heldbits[32];
struct pollfd *heldbits;
nfds = SCARG(uap, nfds);
/*
@ -883,16 +929,22 @@ poll(p, uap)
bits = malloc(ni, M_TEMP, M_WAITOK);
else
bits = smallbits;
if (ni > sizeof(p_heldbits))
heldbits = malloc(ni, M_TEMP, M_WAITOK);
else {
bzero(p_heldbits, sizeof(p_heldbits));
heldbits = p_heldbits;
}
error = copyin(SCARG(uap, fds), bits, ni);
PROC_LOCK(p);
if (error)
goto done;
goto done_noproclock;
bcopy(bits, heldbits, ni);
if (SCARG(uap, timeout) != INFTIM) {
atv.tv_sec = SCARG(uap, timeout) / 1000;
atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
if (itimerfix(&atv)) {
error = EINVAL;
goto done;
goto done_noproclock;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
@ -900,13 +952,13 @@ poll(p, uap)
atv.tv_sec = 0;
atv.tv_usec = 0;
}
pollholddrop(p, heldbits, nfds, 1);
timo = 0;
PROC_LOCK(p);
retry:
ncoll = nselcoll;
p->p_flag |= P_SELECT;
PROC_UNLOCK(p);
error = pollscan(p, (struct pollfd *)bits, nfds);
PROC_LOCK(p);
if (error || p->p_retval[0])
goto done;
if (atv.tv_sec || atv.tv_usec) {
@ -917,21 +969,20 @@ poll(p, uap)
timevalsub(&ttv, &rtv);
timo = ttv.tv_sec > 24 * 60 * 60 ?
24 * 60 * 60 * hz : tvtohz(&ttv);
}
s = splhigh();
if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
splx(s);
goto retry;
}
p->p_flag &= ~P_SELECT;
error = msleep((caddr_t)&selwait, &p->p_mtx, PSOCK | PCATCH, "poll",
timo);
splx(s);
if (timo > 0)
error = cv_timedwait_sig(&selwait, &p->p_mtx, timo);
else
error = cv_wait_sig(&selwait, &p->p_mtx);
if (error == 0)
goto retry;
done:
p->p_flag &= ~P_SELECT;
PROC_UNLOCK(p);
pollholddrop(p, heldbits, nfds, 0);
done_noproclock:
/* poll is not restarted after signals... */
if (error == ERESTART)
error = EINTR;
@ -945,9 +996,38 @@ poll(p, uap)
out:
if (ni > sizeof(smallbits))
free(bits, M_TEMP);
if (ni > sizeof(p_heldbits))
free(heldbits, M_TEMP);
return (error);
}
static int
pollholddrop(p, fds, nfd, hold)
struct proc *p;
struct pollfd *fds;
u_int nfd;
int hold;
{
register struct filedesc *fdp = p->p_fd;
int i;
struct file *fp;
for (i = 0; i < nfd; i++, fds++) {
if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) {
fp = fdp->fd_ofiles[fds->fd];
if (hold) {
if (fp != NULL) {
fhold(fp);
fds->revents = 1;
} else
fds->revents = 0;
} else if(fp != NULL && fds->revents)
fdrop(fp, p);
}
}
return (0);
}
static int
pollscan(p, fds, nfd)
struct proc *p;
@ -1058,7 +1138,7 @@ selwakeup(sip)
if (sip->si_flags & SI_COLL) {
nselcoll++;
sip->si_flags &= ~SI_COLL;
wakeup((caddr_t)&selwait);
cv_broadcast(&selwait);
}
p = pfind(sip->si_pid);
sip->si_pid = 0;
@ -1068,10 +1148,21 @@ selwakeup(sip)
if (p->p_stat == SSLEEP)
setrunnable(p);
else
unsleep(p);
cv_waitq_remove(p);
} else
p->p_flag &= ~P_SELECT;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
}
}
static void selectinit __P((void *));
SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
/* ARGSUSED*/
static void
selectinit(dummy)
void *dummy;
{
cv_init(&selwait, "select");
}

View File

@ -46,6 +46,7 @@
#include <sys/uio.h>
#include <sys/syslog.h>
#include <sys/mbuf.h>
#include <sys/condvar.h>
#include <net/route.h>
#include <netipx/ipx.h>
@ -188,18 +189,19 @@ int
ncp_sock_rselect(struct socket *so,struct proc *p, struct timeval *tv, int events)
{
struct timeval atv,rtv,ttv;
int s,timo,error=0;
int timo,error=0;
if (tv) {
atv=*tv;
if (itimerfix(&atv)) {
error = EINVAL;
goto done;
goto done_noproclock;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
}
timo = 0;
PROC_LOCK(p);
retry:
p->p_flag |= P_SELECT;
error = ncp_poll(so, events);
@ -215,16 +217,18 @@ ncp_sock_rselect(struct socket *so,struct proc *p, struct timeval *tv, int event
timevalsub(&ttv, &rtv);
timo = tvtohz(&ttv);
}
s = splhigh();
if ((p->p_flag & P_SELECT) == 0) {
splx(s);
goto retry;
}
p->p_flag &= ~P_SELECT;
error = tsleep((caddr_t)&selwait, PSOCK, "ncpslt", timo);
splx(s);
if (timo > 0)
error = cv_timedwait(&selwait, &p->p_mtx, timo);
else {
cv_wait(&selwait, &p->p_mtx);
error = 0;
}
done:
p->p_flag &= ~P_SELECT;
PROC_UNLOCK(p);
done_noproclock:
if (error == ERESTART) {
/* printf("Signal: %x", CURSIG(p));*/
error = 0;

View File

@ -43,6 +43,7 @@
#include <sys/poll.h>
#include <sys/uio.h>
#include <sys/sysctl.h>
#include <sys/condvar.h>
#include <net/if.h>
#include <net/route.h>
@ -100,18 +101,19 @@ static int
nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events, struct proc *p)
{
struct timeval atv, rtv, ttv;
int s, timo, error;
int timo, error;
if (tv) {
atv = *tv;
if (itimerfix(&atv)) {
error = EINVAL;
goto done;
goto done_noproclock;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
}
timo = 0;
PROC_LOCK(p);
retry:
p->p_flag |= P_SELECT;
error = nb_poll(nbp, events, p);
@ -127,16 +129,18 @@ nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events, struct proc *p)
timevalsub(&ttv, &rtv);
timo = tvtohz(&ttv);
}
s = splhigh();
if ((p->p_flag & P_SELECT) == 0) {
splx(s);
goto retry;
p->p_flag &= ~P_SELECT;
if (timo > 0)
error = cv_timedwait(&selwait, &p->p_mtx, timo);
else {
cv_wait(&selwait, &p->p_mtx);
error = 0;
}
p->p_flag &= ~P_SELECT;
error = tsleep((caddr_t)&selwait, PSOCK, "nbsel", timo);
splx(s);
done:
PROC_UNLOCK(p);
p->p_flag &= ~P_SELECT;
done_noproclock:
if (error == ERESTART)
return 0;
return error;

View File

@ -56,7 +56,7 @@ extern char copyright[]; /* system copyright */
extern int nswap; /* size of swap space */
extern int selwait; /* select timeout address */
extern struct cv selwait; /* select conditional variable */
extern int physmem; /* physical memory */