Fix a deficiency in the selinfo interface:

If a selinfo object is recorded (via selrecord()) and then it is
quickly destroyed, with the waiters missing the opportunity to awake,
at the next iteration they will find the selinfo object destroyed,
causing a PF#.

That happens because the selinfo interface has no way to drain the
waiters before to destroy the registered selinfo object. Also this
race is quite rare to get in practice, because it would require a
selrecord(), a poll request by another thread and a quick destruction
of the selrecord()'ed selinfo object.

Fix this by adding the seldrain() routine which should be called
before to destroy the selinfo objects (in order to avoid such case),
and fix the present cases where it might have already been called.
Sometimes, the context is safe enough to prevent this type of race,
like it happens in device drivers which installs selinfo objects on
poll callbacks. There, the destruction of the selinfo object happens
at driver detach time, when all the filedescriptors should be already
closed, thus there cannot be a race.
For this case, mfi(4) device driver can be set as an example, as it
implements a full correct logic for preventing this from happening.

Sponsored by:	Sandvine Incorporated
Reported by:	rstone
Tested by:	pluknet
Reviewed by:	jhb, kib
Approved by:	re (bz)
MFC after:	3 weeks
This commit is contained in:
Attilio Rao 2011-08-25 15:51:54 +00:00
parent 745e9ba27d
commit 6aba400a70
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=225177
16 changed files with 54 additions and 4 deletions

View File

@ -1075,7 +1075,7 @@ MLINKS+=scheduler.9 curpriority_cmp.9 \
scheduler.9 setrunnable.9 \
scheduler.9 updatepri.9
MLINKS+=securelevel_gt.9 securelevel_ge.9
MLINKS+=selrecord.9 selwakeup.9
MLINKS+=seldrain.9 selrecord.9 selwakeup.9
MLINKS+=sema.9 sema_destroy.9 \
sema.9 sema_init.9 \
sema.9 sema_post.9 \

View File

@ -26,10 +26,11 @@
.\"
.\" $FreeBSD$
.\"
.Dd June 13, 2007
.Dd August 25, 2011
.Dt SELRECORD 9
.Os
.Sh NAME
.Nm seldrain ,
.Nm selrecord ,
.Nm selwakeup
.Nd "record and wakeup select requests"
@ -37,14 +38,17 @@
.In sys/param.h
.In sys/selinfo.h
.Ft void
.Fn seldrain "struct selinfo *sip"
.Ft void
.Fn selrecord "struct thread *td" "struct selinfo *sip"
.Ft void
.Fn selwakeup "struct selinfo *sip"
.Sh DESCRIPTION
.Fn seldrain ,
.Fn selrecord
and
.Fn selwakeup
are the two central functions used by
are the three central functions used by
.Xr select 2 ,
.Xr poll 2
and the objects that are being selected on.
@ -86,6 +90,15 @@ and
.Xr poll 2
when they wake up.
.Pp
.Fn seldrain
will flush the waiters queue on a specified object before its
destruction.
The object handling code must ensure that
.Fa *sip
cannot be used once
.Fn seldrain
has been called.
.Pp
The contents of
.Fa *sip
must be zeroed, such as by softc initialization, before any call to
@ -98,6 +111,10 @@ acquires and releases
.Va sellock
and may acquire and release
.Va sched_lock .
.Fn seldrain
could usually be just a wrapper for
.Fn selwakeup ,
but consumers should not generally rely on this feature.
.Sh SEE ALSO
.Xr poll 2 ,
.Xr select 2

View File

@ -1704,6 +1704,7 @@ kqueue_close(struct file *fp, struct thread *td)
SLIST_REMOVE(&fdp->fd_kqlist, kq, kqueue, kq_list);
FILEDESC_XUNLOCK(fdp);
seldrain(&kq->kq_sel);
knlist_destroy(&kq->kq_sel.si_note);
mtx_destroy(&kq->kq_lock);
kq->kq_fdp = NULL;

View File

@ -1490,6 +1490,23 @@ selfdfree(struct seltd *stp, struct selfd *sfp)
uma_zfree(selfd_zone, sfp);
}
/* Drain the waiters tied to all the selfd belonging the specified selinfo. */
void
seldrain(sip)
struct selinfo *sip;
{
/*
* This feature is already provided by doselwakeup(), thus it is
* enough to go for it.
* Eventually, the context, should take care to avoid races
* between thread calling select()/poll() and file descriptor
* detaching, but, again, the races are just the same as
* selwakeup().
*/
doselwakeup(sip, -1);
}
/*
* Record a select request.
*/

View File

@ -1517,6 +1517,7 @@ pipeclose(cpipe)
*/
knlist_clear(&cpipe->pipe_sel.si_note, 1);
cpipe->pipe_present = PIPE_FINALIZED;
seldrain(&cpipe->pipe_sel);
knlist_destroy(&cpipe->pipe_sel.si_note);
/*

View File

@ -1022,6 +1022,8 @@ tty_dealloc(void *arg)
MPASS(ttyinq_getsize(&tp->t_inq) == 0);
MPASS(ttyoutq_getsize(&tp->t_outq) == 0);
seldrain(&tp->t_inpoll);
seldrain(&tp->t_outpoll);
knlist_destroy(&tp->t_inpoll.si_note);
knlist_destroy(&tp->t_outpoll.si_note);

View File

@ -688,6 +688,8 @@ ptsdrv_free(void *softc)
racct_sub_cred(psc->pts_cred, RACCT_NPTS, 1);
crfree(psc->pts_cred);
seldrain(&psc->pts_inpoll);
seldrain(&psc->pts_outpoll);
knlist_destroy(&psc->pts_inpoll.si_note);
knlist_destroy(&psc->pts_outpoll.si_note);

View File

@ -1562,6 +1562,8 @@ mqueue_free(struct mqueue *mq)
}
mtx_destroy(&mq->mq_mutex);
seldrain(&mq->mq_rsel);
seldrain(&mq->mq_wsel);
knlist_destroy(&mq->mq_rsel.si_note);
knlist_destroy(&mq->mq_wsel.si_note);
uma_zfree(mqueue_zone, mq);

View File

@ -661,6 +661,8 @@ sofree(struct socket *so)
*/
sbdestroy(&so->so_snd, so);
sbdestroy(&so->so_rcv, so);
seldrain(&so->so_snd.sb_sel);
seldrain(&so->so_rcv.sb_sel);
knlist_destroy(&so->so_rcv.sb_sel.si_note);
knlist_destroy(&so->so_snd.sb_sel.si_note);
sodealloc(so);

View File

@ -3312,6 +3312,7 @@ vbusy(struct vnode *vp)
static void
destroy_vpollinfo(struct vpollinfo *vi)
{
seldrain(&vi->vpi_selinfo);
knlist_destroy(&vi->vpi_selinfo.si_note);
mtx_destroy(&vi->vpi_lock);
uma_zfree(vnodepoll_zone, vi);

View File

@ -652,10 +652,10 @@ bpf_dtor(void *data)
if (d->bd_bif)
bpf_detachd(d);
mtx_unlock(&bpf_mtx);
selwakeuppri(&d->bd_sel, PRINET);
#ifdef MAC
mac_bpfdesc_destroy(d);
#endif /* MAC */
seldrain(&d->bd_sel);
knlist_destroy(&d->bd_sel.si_note);
callout_drain(&d->bd_callout);
bpf_freed(d);

View File

@ -214,6 +214,7 @@ tap_destroy(struct tap_softc *tp)
KASSERT(!(tp->tap_flags & TAP_OPEN),
("%s flags is out of sync", ifp->if_xname));
seldrain(&tp->tap_rsel);
knlist_destroy(&tp->tap_rsel.si_note);
destroy_dev(tp->tap_dev);
ether_ifdetach(ifp);

View File

@ -259,6 +259,7 @@ tun_destroy(struct tun_softc *tp)
if_detach(TUN2IFP(tp));
if_free(TUN2IFP(tp));
destroy_dev(dev);
seldrain(&tp->tun_rsel);
knlist_destroy(&tp->tun_rsel.si_note);
mtx_destroy(&tp->tun_mtx);
cv_destroy(&tp->tun_cv);

View File

@ -646,6 +646,7 @@ audit_pipe_free(struct audit_pipe *ap)
cv_destroy(&ap->ap_cv);
AUDIT_PIPE_SX_LOCK_DESTROY(ap);
AUDIT_PIPE_LOCK_DESTROY(ap);
seldrain(&ap->ap_selinfo);
knlist_destroy(&ap->ap_selinfo.si_note);
TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
free(ap, M_AUDIT_PIPE);

View File

@ -51,6 +51,7 @@ struct selinfo {
#define SEL_WAITING(si) (!TAILQ_EMPTY(&(si)->si_tdlist))
#ifdef _KERNEL
void seldrain(struct selinfo *sip);
void selrecord(struct thread *selector, struct selinfo *sip);
void selwakeup(struct selinfo *sip);
void selwakeuppri(struct selinfo *sip, int pri);

View File

@ -297,6 +297,7 @@ apmclose(struct cdev *dev, int flag, int fmt, struct thread *td)
/* Remove this clone's data from the list and free it. */
ACPI_LOCK(acpi);
STAILQ_REMOVE(&acpi_sc->apm_cdevs, clone, apm_clone_data, entries);
seldrain(&clone->sel_read);
knlist_destroy(&clone->sel_read.si_note);
ACPI_UNLOCK(acpi);
free(clone, M_APMDEV);