Fixes to make select/poll mpsafe.

Problem:
  selwakeup required calling pfind which would cause lock order
  reversals with the allproc_lock and the per-process filedesc lock.
Solution:
  Instead of recording the pid of the select()'ing process into the
  selinfo structure, actually record a pointer to the thread.  To
  avoid dereferencing a bad address all the selinfo structures that
  are in use by a thread are kept in a list hung off the thread
  (protected by sellock).  When a selwakeup occurs the selinfo is
  removed from that threads list, it is also removed on the way out
  of select or poll where the thread will traverse its list removing
  all the selinfos from its own list.

Problem:
  Previously the PROC_LOCK was used to provide the mutual exclusion
  needed to ensure proper locking, this couldn't work because there
  was a single condvar used for select and poll and condvars can
  only be used with a single mutex.
Solution:
  Introduce a global mutex 'sellock' which is used to provide mutual
  exclusion when recording events to wait on as well as performing
  notification when an event occurs.

Interesting note:
  schedlock is required to manipulate the per-thread TDF_SELECT
  flag, however if given its own field it would not need schedlock,
  also because TDF_SELECT is only manipulated under sellock one
  doesn't actually use schedlock for syncronization, only to protect
  against corruption.

Proc locks are no longer used in select/poll.

Portions contributed by: davidc
This commit is contained in:
alfred 2002-03-14 01:32:30 +00:00
parent ebb034597a
commit 2c16fbdd2a
15 changed files with 135 additions and 139 deletions

View File

@ -1314,8 +1314,6 @@ psmopen(dev_t dev, int flag, int fmt, struct thread *td)
device_busy(devclass_get_device(psm_devclass, unit));
/* Initialize state */
sc->rsel.si_flags = 0;
sc->rsel.si_pid = 0;
sc->mode.level = sc->dflt_mode.level;
sc->mode.protocol = sc->dflt_mode.protocol;
sc->watchdog = FALSE;

View File

@ -809,7 +809,7 @@ common_bktr_intr( void *arg )
}
/* If someone has a select() on /dev/vbi, inform them */
if (bktr->vbi_select.si_pid) {
if (SEL_WAITING(&bktr->vbi_select)) {
selwakeup(&bktr->vbi_select);
}

View File

@ -523,8 +523,6 @@ genkbdopen(dev_t dev, int mode, int flag, struct thread *td)
bzero(&sc->gkb_q, sizeof(sc->gkb_q));
#endif
clist_alloc_cblocks(&sc->gkb_q, KB_QSIZE, KB_QSIZE/2); /* XXX */
sc->gkb_rsel.si_flags = 0;
sc->gkb_rsel.si_pid = 0;
splx(s);
return 0;

View File

@ -374,7 +374,6 @@ snp_in(snp, buf, n)
wakeup((caddr_t)snp);
}
selwakeup(&snp->snp_sel);
snp->snp_sel.si_pid = 0;
return (n);
}
@ -449,7 +448,6 @@ snp_detach(snp)
detach_notty:
selwakeup(&snp->snp_sel);
snp->snp_sel.si_pid = 0;
if ((snp->snp_flags & SNOOP_OPEN) == 0)
free(snp, M_SNP);

View File

@ -357,7 +357,7 @@ queuerawdata(midi_dbuf *dbuf, char *data, int len)
/* Wake up the processes sleeping on input data. */
cv_broadcast(&dbuf->cv_in);
if (dbuf->sel.si_pid && dbuf->rl >= dbuf->blocksize)
if (SEL_WAITING(&dbuf->sel) && dbuf->rl >= dbuf->blocksize)
selwakeup(&dbuf->sel);
}
@ -398,6 +398,6 @@ deleterawdata(midi_dbuf *dbuf, int len)
/* Wake up the processes sleeping on queueing. */
cv_broadcast(&dbuf->cv_out);
if (dbuf->sel.si_pid && dbuf->fl >= dbuf->blocksize)
if (SEL_WAITING(&dbuf->sel) && dbuf->fl >= dbuf->blocksize)
selwakeup(&dbuf->sel);
}

View File

@ -116,7 +116,7 @@ chn_wakeup(struct pcm_channel *c)
struct snd_dbuf *bs = c->bufsoft;
CHN_LOCKASSERT(c);
if (sndbuf_getsel(bs)->si_pid && chn_polltrigger(c))
if (SEL_WAITING(sndbuf_getsel(bs)) && chn_polltrigger(c))
selwakeup(sndbuf_getsel(bs));
wakeup(bs);
}

View File

@ -344,8 +344,10 @@ USB_ATTACH(ums)
sc->status.button = sc->status.obutton = 0;
sc->status.dx = sc->status.dy = sc->status.dz = 0;
#ifndef __FreeBSD__
sc->rsel.si_flags = 0;
sc->rsel.si_pid = 0;
#endif
sc->dev = make_dev(&ums_cdevsw, device_get_unit(self),
UID_ROOT, GID_OPERATOR,

View File

@ -484,7 +484,6 @@ ascattach(struct isa_device *isdp)
scu->flags &= ~FLAG_DEBUG;
scu->selp.si_flags=0;
scu->selp.si_pid=(pid_t)0;
#define ASC_UID 0
#define ASC_GID 13
make_dev(&asc_cdevsw, unit<<6, ASC_UID, ASC_GID, 0666, "asc%d", unit);
@ -531,10 +530,8 @@ ascintr(int unit)
if (scu->sbuf.size - scu->sbuf.count >= scu->linesize) {
dma_restart(scu);
}
if (scu->selp.si_pid) {
if (SEL_WAITING(&scu->selp)) {
selwakeup(&scu->selp);
scu->selp.si_pid=(pid_t)0;
scu->selp.si_flags = 0;
}
}
}

View File

@ -542,11 +542,8 @@ pcaintr(struct clockframe *frame)
pca_status.buffer = pca_status.buf[pca_status.current];
if (pca_sleep)
wakeup(&pca_sleep);
if (pca_status.wsel.si_pid) {
selwakeup((struct selinfo *)&pca_status.wsel.si_pid);
pca_status.wsel.si_pid = 0;
pca_status.wsel.si_flags = 0;
}
if (SEL_WAITING(&pca_status.wsel))
selwakeup(&pca_status.wsel);
}
}

View File

@ -1314,8 +1314,6 @@ psmopen(dev_t dev, int flag, int fmt, struct thread *td)
device_busy(devclass_get_device(psm_devclass, unit));
/* Initialize state */
sc->rsel.si_flags = 0;
sc->rsel.si_pid = 0;
sc->mode.level = sc->dflt_mode.level;
sc->mode.protocol = sc->dflt_mode.protocol;
sc->watchdog = FALSE;

View File

@ -696,8 +696,12 @@ ioctl(td, uap)
return (error);
}
static int nselcoll; /* Select collisions since boot */
/*
* sellock and selwait are initialized in selectinit() via SYSINIT.
*/
struct mtx sellock;
struct cv selwait;
int nselcoll; /* Select collisions since boot */
SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
/*
@ -775,7 +779,7 @@ select(td, uap)
sbp += ncpbytes / sizeof *sbp; \
error = copyin(uap->name, ibits[x], ncpbytes); \
if (error != 0) \
goto done_noproclock; \
goto done_nosellock; \
} \
} while (0)
getbits(in, 0);
@ -789,10 +793,10 @@ select(td, uap)
error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
sizeof (atv));
if (error)
goto done_noproclock;
goto done_nosellock;
if (itimerfix(&atv)) {
error = EINVAL;
goto done_noproclock;
goto done_nosellock;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
@ -801,61 +805,59 @@ select(td, uap)
atv.tv_usec = 0;
}
timo = 0;
PROC_LOCK(td->td_proc);
mtx_lock(&sellock);
retry:
ncoll = nselcoll;
mtx_lock_spin(&sched_lock);
td->td_flags |= TDF_SELECT;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(td->td_proc);
mtx_unlock(&sellock);
/* XXX Is there a better place for this? */
TAILQ_INIT(&td->td_selq);
error = selscan(td, ibits, obits, uap->nd);
PROC_LOCK(td->td_proc);
mtx_lock(&sellock);
if (error || td->td_retval[0])
goto done;
if (atv.tv_sec || atv.tv_usec) {
getmicrouptime(&rtv);
if (timevalcmp(&rtv, &atv, >=)) {
/*
* An event of our interest may occur during locking a process.
* In order to avoid missing the event that occured during locking
* the process, test TDF_SELECT and rescan file descriptors if
* necessary.
*/
mtx_lock_spin(&sched_lock);
if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
ncoll = nselcoll;
td->td_flags |= TDF_SELECT;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(td->td_proc);
error = selscan(td, ibits, obits, uap->nd);
PROC_LOCK(td->td_proc);
} else
mtx_unlock_spin(&sched_lock);
if (timevalcmp(&rtv, &atv, >=))
goto done;
}
ttv = atv;
timevalsub(&ttv, &rtv);
timo = ttv.tv_sec > 24 * 60 * 60 ?
24 * 60 * 60 * hz : tvtohz(&ttv);
}
/*
* An event of interest may occur while we do not hold
* sellock, so check TDF_SELECT and the number of
* collisions and rescan the file descriptors if
* necessary.
*/
mtx_lock_spin(&sched_lock);
td->td_flags &= ~TDF_SELECT;
if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
mtx_unlock_spin(&sched_lock);
goto retry;
}
mtx_unlock_spin(&sched_lock);
if (timo > 0)
error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo);
error = cv_timedwait_sig(&selwait, &sellock, timo);
else
error = cv_wait_sig(&selwait, &td->td_proc->p_mtx);
error = cv_wait_sig(&selwait, &sellock);
if (error == 0)
goto retry;
done:
clear_selinfo_list(td);
mtx_lock_spin(&sched_lock);
td->td_flags &= ~TDF_SELECT;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(td->td_proc);
done_noproclock:
mtx_unlock(&sellock);
done_nosellock:
/* select is not restarted after signals... */
if (error == ERESTART)
error = EINTR;
@ -967,13 +969,13 @@ poll(td, uap)
bits = smallbits;
error = copyin(SCARG(uap, fds), bits, ni);
if (error)
goto done_noproclock;
goto done_nosellock;
if (SCARG(uap, timeout) != INFTIM) {
atv.tv_sec = SCARG(uap, timeout) / 1000;
atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
if (itimerfix(&atv)) {
error = EINVAL;
goto done_noproclock;
goto done_nosellock;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
@ -982,59 +984,57 @@ poll(td, uap)
atv.tv_usec = 0;
}
timo = 0;
PROC_LOCK(td->td_proc);
mtx_lock(&sellock);
retry:
ncoll = nselcoll;
mtx_lock_spin(&sched_lock);
td->td_flags |= TDF_SELECT;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(td->td_proc);
mtx_unlock(&sellock);
/* XXX Is there a better place for this? */
TAILQ_INIT(&td->td_selq);
error = pollscan(td, (struct pollfd *)bits, nfds);
PROC_LOCK(td->td_proc);
mtx_lock(&sellock);
if (error || td->td_retval[0])
goto done;
if (atv.tv_sec || atv.tv_usec) {
getmicrouptime(&rtv);
if (timevalcmp(&rtv, &atv, >=)) {
/*
* An event of our interest may occur during locking a process.
* In order to avoid missing the event that occured during locking
* the process, test TDF_SELECT and rescan file descriptors if
* necessary.
*/
mtx_lock_spin(&sched_lock);
if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
ncoll = nselcoll;
td->td_flags |= TDF_SELECT;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(td->td_proc);
error = pollscan(td, (struct pollfd *)bits, nfds);
PROC_LOCK(td->td_proc);
} else
mtx_unlock_spin(&sched_lock);
if (timevalcmp(&rtv, &atv, >=))
goto done;
}
ttv = atv;
timevalsub(&ttv, &rtv);
timo = ttv.tv_sec > 24 * 60 * 60 ?
24 * 60 * 60 * hz : tvtohz(&ttv);
}
/*
* An event of interest may occur while we do not hold
* sellock, so check TDF_SELECT and the number of collisions
* and rescan the file descriptors if necessary.
*/
mtx_lock_spin(&sched_lock);
td->td_flags &= ~TDF_SELECT;
if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
mtx_unlock_spin(&sched_lock);
goto retry;
}
mtx_unlock_spin(&sched_lock);
if (timo > 0)
error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo);
error = cv_timedwait_sig(&selwait, &sellock, timo);
else
error = cv_wait_sig(&selwait, &td->td_proc->p_mtx);
error = cv_wait_sig(&selwait, &sellock);
if (error == 0)
goto retry;
done:
clear_selinfo_list(td);
mtx_lock_spin(&sched_lock);
td->td_flags &= ~TDF_SELECT;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(td->td_proc);
done_noproclock:
mtx_unlock(&sellock);
done_nosellock:
/* poll is not restarted after signals... */
if (error == ERESTART)
error = EINTR;
@ -1115,6 +1115,26 @@ openbsd_poll(td, uap)
return (poll(td, (struct poll_args *)uap));
}
/*
* Remove the references to the thread from all of the objects
* we were polling.
*
* This code assumes that the underlying owner of the selinfo
* structure will hold sellock before it changes it, and that
* it will unlink itself from our list if it goes away.
*/
void
clear_selinfo_list(td)
struct thread *td;
{
struct selinfo *si;
mtx_assert(&sellock, MA_OWNED);
TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
si->si_thread = NULL;
TAILQ_INIT(&td->td_selq);
}
/*ARGSUSED*/
int
seltrue(dev, events, td)
@ -1126,18 +1146,6 @@ seltrue(dev, events, td)
return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
}
static int
find_thread_in_proc(struct proc *p, struct thread *td)
{
struct thread *td2;
FOREACH_THREAD_IN_PROC(p, td2) {
if (td2 == td) {
return (1);
}
}
return (0);
}
/*
* Record a select request.
*/
@ -1146,29 +1154,22 @@ selrecord(selector, sip)
struct thread *selector;
struct selinfo *sip;
{
struct proc *p;
pid_t mypid;
mypid = selector->td_proc->p_pid;
if ((sip->si_pid == mypid) &&
(sip->si_thread == selector)) { /* XXXKSE should be an ID? */
return;
mtx_lock(&sellock);
/*
* If the thread is NULL then take ownership of selinfo
* however if the thread is not NULL and the thread points to
* someone else, then we have a collision, otherwise leave it alone
* as we've owned it in a previous selrecord on this selinfo.
*/
if (sip->si_thread == NULL) {
sip->si_thread = selector;
TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist);
} else if (sip->si_thread != selector) {
sip->si_flags |= SI_COLL;
}
if (sip->si_pid &&
(p = pfind(sip->si_pid)) &&
(find_thread_in_proc(p, sip->si_thread))) {
mtx_lock_spin(&sched_lock);
if (sip->si_thread->td_wchan == (caddr_t)&selwait) {
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
sip->si_flags |= SI_COLL;
return;
}
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p);
}
sip->si_pid = mypid;
sip->si_thread = selector;
mtx_unlock(&sellock);
}
/*
@ -1176,37 +1177,33 @@ selrecord(selector, sip)
*/
void
selwakeup(sip)
register struct selinfo *sip;
struct selinfo *sip;
{
struct thread *td;
register struct proc *p;
if (sip->si_pid == 0)
return;
if (sip->si_flags & SI_COLL) {
mtx_lock(&sellock);
td = sip->si_thread;
if ((sip->si_flags & SI_COLL) != 0) {
nselcoll++;
sip->si_flags &= ~SI_COLL;
cv_broadcast(&selwait);
}
p = pfind(sip->si_pid);
sip->si_pid = 0;
td = sip->si_thread;
if (p != NULL) {
if (!find_thread_in_proc(p, td)) {
PROC_UNLOCK(p); /* lock is in pfind() */;
return;
}
mtx_lock_spin(&sched_lock);
if (td->td_wchan == (caddr_t)&selwait) {
if (td->td_proc->p_stat == SSLEEP)
setrunnable(td);
else
cv_waitq_remove(td);
} else
td->td_flags &= ~TDF_SELECT;
mtx_unlock_spin(&sched_lock);
PROC_UNLOCK(p); /* Lock is in pfind() */
if (td == NULL) {
mtx_unlock(&sellock);
return;
}
TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
sip->si_thread = NULL;
mtx_lock_spin(&sched_lock);
if (td->td_wchan == (caddr_t)&selwait) {
if (td->td_proc->p_stat == SSLEEP)
setrunnable(td);
else
cv_waitq_remove(td);
} else
td->td_flags &= ~TDF_SELECT;
mtx_unlock_spin(&sched_lock);
mtx_unlock(&sellock);
}
static void selectinit __P((void *));
@ -1218,4 +1215,5 @@ selectinit(dummy)
void *dummy;
{
cv_init(&selwait, "select");
mtx_init(&sellock, "sellck", MTX_DEF);
}

View File

@ -2273,7 +2273,7 @@ ttwakeup(tp)
register struct tty *tp;
{
if (tp->t_rsel.si_pid != 0)
if (SEL_WAITING(&tp->t_rsel))
selwakeup(&tp->t_rsel);
if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL)
pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL));
@ -2289,7 +2289,7 @@ ttwwakeup(tp)
register struct tty *tp;
{
if (tp->t_wsel.si_pid != 0 && tp->t_outq.c_cc <= tp->t_olowat)
if (SEL_WAITING(&tp->t_wsel) && tp->t_outq.c_cc <= tp->t_olowat)
selwakeup(&tp->t_wsel);
if (ISSET(tp->t_state, TS_ASYNC) && tp->t_sigio != NULL)
pgsigio(tp->t_sigio, SIGIO, (tp->t_session != NULL));

View File

@ -147,6 +147,7 @@ struct pargs {
* m - Giant
* n - not locked, lazy
* o - locked by pgrpsess_lock sx
* p - select lock (sellock)
*
* If the locking key specifies two identifiers (for example, p_pptr) then
* either lock is sufficient for read access, but both locks must be held
@ -260,6 +261,8 @@ struct thread {
TAILQ_ENTRY(thread) td_blkq; /* (j) Mutex queue. XXXKSE */
TAILQ_ENTRY(thread) td_runq; /* (j) Run queue(s). XXXKSE */
TAILQ_HEAD(, selinfo) td_selq; /* (p) List of selinfos. */
#define td_startzero td_flags
int td_flags; /* (j) TDF_* flags. */
int td_dupfd; /* (k) Ret value from fdopen. XXX */

View File

@ -45,18 +45,23 @@ struct thread;
* notified when I/O becomes possible.
*/
struct selinfo {
pid_t si_pid; /* process to be notified */
struct thread *si_thread; /* thread in that process XXXKSE */
TAILQ_ENTRY(selinfo) si_thrlist; /* list hung off of thread */
struct thread *si_thread; /* thread waiting */
struct klist si_note; /* kernel note list */
short si_flags; /* see below */
};
#define SI_COLL 0x0001 /* collision occurred */
#define SEL_WAITING(si) \
((si)->si_thread != NULL || ((si)->si_flags & SI_COLL) != 0)
#ifdef _KERNEL
struct thread;
void clear_selinfo_list(struct thread *);
void selrecord __P((struct thread *selector, struct selinfo *));
void selwakeup __P((struct selinfo *));
#endif
#endif /* !_SYS_SELINFO_H_ */

View File

@ -57,6 +57,8 @@ extern char copyright[]; /* system copyright */
extern int nswap; /* size of swap space */
extern int nselcoll; /* select collisions since boot */
extern struct mtx sellock; /* select lock variable */
extern struct cv selwait; /* select conditional variable */
extern int physmem; /* physical memory */