Adapt vfs kqfilter to the shared vnode lock used by zfs write vop. Use

vnode interlock to protect the knote fields [1]. The locking assumes
that shared vnode lock is held, thus we get exclusive access to knote
either by exclusive vnode lock protection, or by shared vnode lock +
vnode interlock.

Do not use kl_locked() method to assert either lock ownership or the
fact that curthread does not own the lock. For shared locks, ownership
is not recorded, e.g. VOP_ISLOCKED can return LK_SHARED for the shared
lock not owned by curthread, causing false positives in kqueue subsystem
assertions about knlist lock.

Remove kl_locked method from knlist lock vector, and add two separate
assertion methods kl_assert_locked and kl_assert_unlocked, that are
supposed to use proper asserts. Change knlist_init accordingly.

Add convenience function knlist_init_mtx to reduce number of arguments
for typical knlist initialization.

Submitted by:	jhb [1]
Noted by:	jhb [2]
Reviewed by:	jhb
Tested by:	rnoland
This commit is contained in:
Konstantin Belousov 2009-06-10 20:59:32 +00:00
parent 5591364265
commit d8b0556c6d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=193951
18 changed files with 99 additions and 57 deletions

View File

@ -194,7 +194,7 @@ targopen(struct cdev *dev, int flags, int fmt, struct thread *td)
TAILQ_INIT(&softc->work_queue);
TAILQ_INIT(&softc->abort_queue);
TAILQ_INIT(&softc->user_ccb_queue);
knlist_init(&softc->read_select.si_note, NULL, NULL, NULL, NULL);
knlist_init_mtx(&softc->read_select.si_note, NULL);
return (0);
}

View File

@ -260,7 +260,7 @@ apm_create_clone(struct cdev *dev, struct acpi_softc *acpi_sc)
clone->acpi_sc = acpi_sc;
clone->notify_status = APM_EV_NONE;
bzero(&clone->sel_read, sizeof(clone->sel_read));
knlist_init(&clone->sel_read.si_note, &acpi_mutex, NULL, NULL, NULL);
knlist_init_mtx(&clone->sel_read.si_note, &acpi_mutex);
/*
* The acpi device is always managed by devd(8) and is considered

View File

@ -423,7 +423,7 @@ proc0_init(void *dummy __unused)
p->p_sysent = &null_sysvec;
p->p_flag = P_SYSTEM | P_INMEM;
p->p_state = PRS_NORMAL;
knlist_init(&p->p_klist, &p->p_mtx, NULL, NULL, NULL);
knlist_init_mtx(&p->p_klist, &p->p_mtx);
STAILQ_INIT(&p->p_ktr);
p->p_nice = NZERO;
td->td_tid = PID_MAX + 1;

View File

@ -208,12 +208,10 @@ SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW,
} while (0)
#ifdef INVARIANTS
#define KNL_ASSERT_LOCKED(knl) do { \
if (!knl->kl_locked((knl)->kl_lockarg)) \
panic("knlist not locked, but should be"); \
knl->kl_assert_locked((knl)->kl_lockarg); \
} while (0)
#define KNL_ASSERT_UNLOCKED(knl) do { \
if (knl->kl_locked((knl)->kl_lockarg)) \
panic("knlist locked, but should not be"); \
#define KNL_ASSERT_UNLOCKED(knl) do { \
knl->kl_assert_unlocked((knl)->kl_lockarg); \
} while (0)
#else /* !INVARIANTS */
#define KNL_ASSERT_LOCKED(knl) do {} while(0)
@ -577,7 +575,7 @@ kqueue(struct thread *td, struct kqueue_args *uap)
mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK);
TAILQ_INIT(&kq->kq_head);
kq->kq_fdp = fdp;
knlist_init(&kq->kq_sel.si_note, &kq->kq_lock, NULL, NULL, NULL);
knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
FILEDESC_XLOCK(fdp);
@ -1723,7 +1721,6 @@ MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects",
MTX_DEF);
static void knlist_mtx_lock(void *arg);
static void knlist_mtx_unlock(void *arg);
static int knlist_mtx_locked(void *arg);
static void
knlist_mtx_lock(void *arg)
@ -1737,15 +1734,22 @@ knlist_mtx_unlock(void *arg)
mtx_unlock((struct mtx *)arg);
}
static int
knlist_mtx_locked(void *arg)
static void
knlist_mtx_assert_locked(void *arg)
{
return (mtx_owned((struct mtx *)arg));
mtx_assert((struct mtx *)arg, MA_OWNED);
}
static void
knlist_mtx_assert_unlocked(void *arg)
{
mtx_assert((struct mtx *)arg, MA_NOTOWNED);
}
void
knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *),
void (*kl_unlock)(void *), int (*kl_locked)(void *))
void (*kl_unlock)(void *),
void (*kl_assert_locked)(void *), void (*kl_assert_unlocked)(void *))
{
if (lock == NULL)
@ -1761,14 +1765,25 @@ knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *),
knl->kl_unlock = knlist_mtx_unlock;
else
knl->kl_unlock = kl_unlock;
if (kl_locked == NULL)
knl->kl_locked = knlist_mtx_locked;
if (kl_assert_locked == NULL)
knl->kl_assert_locked = knlist_mtx_assert_locked;
else
knl->kl_locked = kl_locked;
knl->kl_assert_locked = kl_assert_locked;
if (kl_assert_unlocked == NULL)
knl->kl_assert_unlocked = knlist_mtx_assert_unlocked;
else
knl->kl_assert_unlocked = kl_assert_unlocked;
SLIST_INIT(&knl->kl_list);
}
void
knlist_init_mtx(struct knlist *knl, struct mtx *lock)
{
knlist_init(knl, lock, NULL, NULL, NULL, NULL);
}
void
knlist_destroy(struct knlist *knl)
{

View File

@ -304,7 +304,7 @@ fork1(td, flags, pages, procp)
#ifdef MAC
mac_proc_init(newproc);
#endif
knlist_init(&newproc->p_klist, &newproc->p_mtx, NULL, NULL, NULL);
knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx);
STAILQ_INIT(&newproc->p_ktr);
/* We have to lock the process tree while we look for a pid. */

View File

@ -328,10 +328,8 @@ kern_pipe(struct thread *td, int fildes[2])
rpipe = &pp->pp_rpipe;
wpipe = &pp->pp_wpipe;
knlist_init(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe), NULL, NULL,
NULL);
knlist_init(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe), NULL, NULL,
NULL);
knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe));
knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe));
/* Only the forward direction pipe is backed by default */
if ((error = pipe_create(rpipe, 1)) != 0 ||

View File

@ -933,8 +933,8 @@ tty_alloc_mutex(struct ttydevsw *tsw, void *sc, struct mtx *mutex)
mtx_init(&tp->t_mtxobj, "ttymtx", NULL, MTX_DEF);
}
knlist_init(&tp->t_inpoll.si_note, tp->t_mtx, NULL, NULL, NULL);
knlist_init(&tp->t_outpoll.si_note, tp->t_mtx, NULL, NULL, NULL);
knlist_init_mtx(&tp->t_inpoll.si_note, tp->t_mtx);
knlist_init_mtx(&tp->t_outpoll.si_note, tp->t_mtx);
sx_xlock(&tty_list_sx);
TAILQ_INSERT_TAIL(&tty_list, tp, t_list);

View File

@ -742,8 +742,8 @@ pts_alloc(int fflags, struct thread *td, struct file *fp)
uihold(uid);
tp = tty_alloc(&pts_class, psc);
knlist_init(&psc->pts_inpoll.si_note, tp->t_mtx, NULL, NULL, NULL);
knlist_init(&psc->pts_outpoll.si_note, tp->t_mtx, NULL, NULL, NULL);
knlist_init_mtx(&psc->pts_inpoll.si_note, tp->t_mtx);
knlist_init_mtx(&psc->pts_outpoll.si_note, tp->t_mtx);
/* Expose the slave device as well. */
tty_makedev(tp, td->td_ucred, "pts/%u", psc->pts_unit);
@ -782,8 +782,8 @@ pts_alloc_external(int fflags, struct thread *td, struct file *fp,
uihold(uid);
tp = tty_alloc(&pts_class, psc);
knlist_init(&psc->pts_inpoll.si_note, tp->t_mtx, NULL, NULL, NULL);
knlist_init(&psc->pts_outpoll.si_note, tp->t_mtx, NULL, NULL, NULL);
knlist_init_mtx(&psc->pts_inpoll.si_note, tp->t_mtx);
knlist_init_mtx(&psc->pts_outpoll.si_note, tp->t_mtx);
/* Expose the slave device as well. */
tty_makedev(tp, td->td_ucred, "%s", name);

View File

@ -1531,8 +1531,8 @@ mqueue_alloc(const struct mq_attr *attr)
mq->mq_msgsize = default_msgsize;
}
mtx_init(&mq->mq_mutex, "mqueue lock", NULL, MTX_DEF);
knlist_init(&mq->mq_rsel.si_note, &mq->mq_mutex, NULL, NULL, NULL);
knlist_init(&mq->mq_wsel.si_note, &mq->mq_mutex, NULL, NULL, NULL);
knlist_init_mtx(&mq->mq_rsel.si_note, &mq->mq_mutex);
knlist_init_mtx(&mq->mq_wsel.si_note, &mq->mq_mutex);
atomic_add_int(&curmq, 1);
return (mq);
}

View File

@ -376,10 +376,8 @@ socreate(int dom, struct socket **aso, int type, int proto,
#ifdef MAC
mac_socket_create(cred, so);
#endif
knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv),
NULL, NULL, NULL);
knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd),
NULL, NULL, NULL);
knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
so->so_count = 1;
/*
* Auto-sizing of socket buffers is managed by the protocols and
@ -445,10 +443,8 @@ sonewconn(struct socket *head, int connstatus)
#ifdef MAC
mac_socket_newconn(head, so);
#endif
knlist_init(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv),
NULL, NULL, NULL);
knlist_init(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd),
NULL, NULL, NULL);
knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
(*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
sodealloc(so);

View File

@ -1485,7 +1485,7 @@ aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lj,
aiocbe = uma_zalloc(aiocb_zone, M_WAITOK | M_ZERO);
aiocbe->inputcharge = 0;
aiocbe->outputcharge = 0;
knlist_init(&aiocbe->klist, AIO_MTX(ki), NULL, NULL, NULL);
knlist_init_mtx(&aiocbe->klist, AIO_MTX(ki));
error = ops->copyin(job, &aiocbe->uaiocb);
if (error) {
@ -2107,7 +2107,7 @@ kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list,
lj->lioj_flags = 0;
lj->lioj_count = 0;
lj->lioj_finished_count = 0;
knlist_init(&lj->klist, AIO_MTX(ki), NULL, NULL, NULL);
knlist_init_mtx(&lj->klist, AIO_MTX(ki));
ksiginfo_init(&lj->lioj_ksi);
/*

View File

@ -110,7 +110,8 @@ static void vnlru_free(int);
static void vgonel(struct vnode *);
static void vfs_knllock(void *arg);
static void vfs_knlunlock(void *arg);
static int vfs_knllocked(void *arg);
static void vfs_knl_assert_locked(void *arg);
static void vfs_knl_assert_unlocked(void *arg);
static void destroy_vpollinfo(struct vpollinfo *vi);
/*
@ -3271,7 +3272,7 @@ v_addpollinfo(struct vnode *vp)
vi = uma_zalloc(vnodepoll_zone, M_WAITOK);
mtx_init(&vi->vpi_lock, "vnode pollinfo", NULL, MTX_DEF);
knlist_init(&vi->vpi_selinfo.si_note, vp, vfs_knllock,
vfs_knlunlock, vfs_knllocked);
vfs_knlunlock, vfs_knl_assert_locked, vfs_knl_assert_unlocked);
VI_LOCK(vp);
if (vp->v_pollinfo != NULL) {
VI_UNLOCK(vp);
@ -3986,7 +3987,7 @@ static struct knlist fs_knlist;
static void
vfs_event_init(void *arg)
{
knlist_init(&fs_knlist, NULL, NULL, NULL, NULL);
knlist_init_mtx(&fs_knlist, NULL);
}
/* XXX - correct order? */
SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL);
@ -4099,12 +4100,24 @@ vfs_knlunlock(void *arg)
VOP_UNLOCK(vp, 0);
}
static int
vfs_knllocked(void *arg)
static void
vfs_knl_assert_locked(void *arg)
{
#ifdef DEBUG_VFS_LOCKS
struct vnode *vp = arg;
return (VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
ASSERT_VOP_LOCKED(vp, "vfs_knl_assert_locked");
#endif
}
static void
vfs_knl_assert_unlocked(void *arg)
{
#ifdef DEBUG_VFS_LOCKS
struct vnode *vp = arg;
ASSERT_VOP_UNLOCKED(vp, "vfs_knl_assert_unlocked");
#endif
}
int
@ -4157,27 +4170,37 @@ filt_vfsread(struct knote *kn, long hint)
{
struct vnode *vp = (struct vnode *)kn->kn_hook;
struct vattr va;
int res;
/*
* filesystem is gone, so set the EOF flag and schedule
* the knote for deletion.
*/
if (hint == NOTE_REVOKE) {
VI_LOCK(vp);
kn->kn_flags |= (EV_EOF | EV_ONESHOT);
VI_UNLOCK(vp);
return (1);
}
if (VOP_GETATTR(vp, &va, curthread->td_ucred))
return (0);
VI_LOCK(vp);
kn->kn_data = va.va_size - kn->kn_fp->f_offset;
return (kn->kn_data != 0);
res = (kn->kn_data != 0);
VI_UNLOCK(vp);
return (res);
}
/*ARGSUSED*/
static int
filt_vfswrite(struct knote *kn, long hint)
{
struct vnode *vp = (struct vnode *)kn->kn_hook;
VI_LOCK(vp);
/*
* filesystem is gone, so set the EOF flag and schedule
* the knote for deletion.
@ -4186,19 +4209,27 @@ filt_vfswrite(struct knote *kn, long hint)
kn->kn_flags |= (EV_EOF | EV_ONESHOT);
kn->kn_data = 0;
VI_UNLOCK(vp);
return (1);
}
static int
filt_vfsvnode(struct knote *kn, long hint)
{
struct vnode *vp = (struct vnode *)kn->kn_hook;
int res;
VI_LOCK(vp);
if (kn->kn_sfflags & hint)
kn->kn_fflags |= hint;
if (hint == NOTE_REVOKE) {
kn->kn_flags |= EV_EOF;
VI_UNLOCK(vp);
return (1);
}
return (kn->kn_fflags != 0);
res = (kn->kn_fflags != 0);
VI_UNLOCK(vp);
return (res);
}
int

View File

@ -648,7 +648,7 @@ bpfopen(struct cdev *dev, int flags, int fmt, struct thread *td)
#endif
mtx_init(&d->bd_mtx, devtoname(dev), "bpf cdev lock", MTX_DEF);
callout_init(&d->bd_callout, CALLOUT_MPSAFE);
knlist_init(&d->bd_sel.si_note, &d->bd_mtx, NULL, NULL, NULL);
knlist_init_mtx(&d->bd_sel.si_note, &d->bd_mtx);
return (0);
}

View File

@ -445,7 +445,7 @@ vnet_net_iattach(const void *unused __unused)
TAILQ_INIT(&V_ifnet);
TAILQ_INIT(&V_ifg_head);
knlist_init(&V_ifklist, NULL, NULL, NULL, NULL);
knlist_init_mtx(&V_ifklist, NULL);
if_grow(); /* create initial table */
return (0);
@ -548,7 +548,7 @@ if_alloc(u_char type)
TAILQ_INIT(&ifp->if_prefixhead);
TAILQ_INIT(&ifp->if_multiaddrs);
TAILQ_INIT(&ifp->if_groups);
knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL);
knlist_init_mtx(&ifp->if_klist, NULL);
#ifdef MAC
mac_ifnet_init(ifp);
#endif

View File

@ -462,7 +462,7 @@ tapcreate(struct cdev *dev)
tp->tap_flags |= TAP_INITED;
mtx_unlock(&tp->tap_mtx);
knlist_init(&tp->tap_rsel.si_note, NULL, NULL, NULL, NULL);
knlist_init_mtx(&tp->tap_rsel.si_note, NULL);
TAPDEBUG("interface %s is created. minor = %#x\n",
ifp->if_xname, dev2unit(dev));

View File

@ -391,7 +391,7 @@ tuncreate(const char *name, struct cdev *dev)
IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
ifp->if_snd.ifq_drv_maxlen = 0;
IFQ_SET_READY(&ifp->if_snd);
knlist_init(&sc->tun_rsel.si_note, NULL, NULL, NULL, NULL);
knlist_init_mtx(&sc->tun_rsel.si_note, NULL);
if_attach(ifp);
bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));

View File

@ -579,8 +579,7 @@ audit_pipe_alloc(void)
return (NULL);
ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
TAILQ_INIT(&ap->ap_queue);
knlist_init(&ap->ap_selinfo.si_note, AUDIT_PIPE_MTX(ap), NULL, NULL,
NULL);
knlist_init_mtx(&ap->ap_selinfo.si_note, AUDIT_PIPE_MTX(ap));
AUDIT_PIPE_LOCK_INIT(ap);
AUDIT_PIPE_SX_LOCK_INIT(ap);
cv_init(&ap->ap_cv, "audit_pipe");

View File

@ -124,7 +124,8 @@ struct knlist {
struct klist kl_list;
void (*kl_lock)(void *); /* lock function */
void (*kl_unlock)(void *);
int (*kl_locked)(void *);
void (*kl_assert_locked)(void *);
void (*kl_assert_unlocked)(void *);
void *kl_lockarg; /* argument passed to kl_lockf() */
};
@ -203,6 +204,7 @@ struct kevent_copyops {
struct thread;
struct proc;
struct knlist;
struct mtx;
extern void knote(struct knlist *list, long hint, int islocked);
extern void knote_fork(struct knlist *list, int pid);
@ -212,7 +214,8 @@ extern void knlist_remove_inevent(struct knlist *knl, struct knote *kn);
extern int knlist_empty(struct knlist *knl);
extern void knlist_init(struct knlist *knl, void *lock,
void (*kl_lock)(void *), void (*kl_unlock)(void *),
int (*kl_locked)(void *));
void (*kl_assert_locked)(void *), void (*kl_assert_unlocked)(void *));
extern void knlist_init_mtx(struct knlist *knl, struct mtx *lock);
extern void knlist_destroy(struct knlist *knl);
extern void knlist_cleardel(struct knlist *knl, struct thread *td,
int islocked, int killkn);