When we traverse the vnodes on a mountpoint we need to look out for

our cached 'next vnode' being removed from this mountpoint.  If we
find that it was recycled, we restart our traversal from the start
of the list.

Code to do that is in all local disk filesystems (and a few other
places) and looks roughly like this:

		MNT_ILOCK(mp);
	loop:
		for (vp = TAILQ_FIRST(&mp...);
		    (vp = nvp) != NULL;
		    nvp = TAILQ_NEXT(vp,...)) {
			if (vp->v_mount != mp)
				goto loop;
			MNT_IUNLOCK(mp);
			...
			MNT_ILOCK(mp);
		}
		MNT_IUNLOCK(mp);

The code which takes vnodes off a mountpoint looks like this:

	MNT_ILOCK(vp->v_mount);
	...
	TAILQ_REMOVE(&vp->v_mount->mnt_nvnodelist, vp, v_nmntvnodes);
	...
	MNT_IUNLOCK(vp->v_mount);
	...
	vp->v_mount = something;

(Take a moment and try to spot the locking error before you read on.)

On a SMP system, one CPU could have removed nvp from our mountlist
but not yet gotten to assign a new value to vp->v_mount while another
CPU simultaneously get to the top of the traversal loop where it
finds that (vp->v_mount != mp) is not true despite the fact that
the vnode has indeed been removed from our mountpoint.

Fix:

Introduce the macro MNT_VNODE_FOREACH() to traverse the list of
vnodes on a mountpoint while taking into account that vnodes may
be removed from the list as we go.  This saves approx 65 lines of
duplicated code.

Split the insmntque() which potentially moves a vnode from one mount
point to another into delmntque() and insmntque() which does just
what the names say.

Fix delmntque() to set vp->v_mount to NULL while holding the
mountpoint lock.
This commit is contained in:
Poul-Henning Kamp 2004-07-04 08:52:35 +00:00
parent f5e16e6131
commit e3c5a7a4dd
15 changed files with 92 additions and 168 deletions

View File

@ -308,15 +308,12 @@ void
coda_checkunmounting(mp)
struct mount *mp;
{
register struct vnode *vp, *nvp;
struct vnode *vp, *nvp;
struct cnode *cp;
int count = 0, bad = 0;
MNT_ILOCK(mp);
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp; vp = nvp) {
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
if (vp->v_mount != mp)
continue;
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);

View File

@ -308,15 +308,12 @@ void
coda_checkunmounting(mp)
struct mount *mp;
{
register struct vnode *vp, *nvp;
struct vnode *vp, *nvp;
struct cnode *cp;
int count = 0, bad = 0;
MNT_ILOCK(mp);
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp; vp = nvp) {
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
if (vp->v_mount != mp)
continue;
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);

View File

@ -812,15 +812,7 @@ msdosfs_sync(mp, waitfor, cred, td)
*/
MNT_ILOCK(mp);
loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);

View File

@ -572,12 +572,7 @@ ext2_reload(mp, cred, td)
loop:
MNT_ILOCK(mp);
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
if (vp->v_mount != mp) {
MNT_IUNLOCK(mp);
goto loop;
}
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);
@ -924,14 +919,7 @@ ext2_sync(mp, waitfor, cred, td)
*/
MNT_ILOCK(mp);
loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);

View File

@ -572,12 +572,7 @@ ext2_reload(mp, cred, td)
loop:
MNT_ILOCK(mp);
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
if (vp->v_mount != mp) {
MNT_IUNLOCK(mp);
goto loop;
}
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);
@ -924,14 +919,7 @@ ext2_sync(mp, waitfor, cred, td)
*/
MNT_ILOCK(mp);
loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);

View File

@ -717,15 +717,7 @@ vfs_stdsync(mp, waitfor, cred, td)
*/
MNT_ILOCK(mp);
loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
if (TAILQ_EMPTY(&vp->v_dirtyblkhd)) {

View File

@ -1601,3 +1601,30 @@ vfs_copyopt(opts, name, dest, len)
}
return (ENOENT);
}
/*
* This is a helper function for filesystems to traverse their
* vnodes. See MNT_VNODE_FOREACH() in sys/mount.h
*/
struct vnode *
__mnt_vnode_next(struct vnode **nvp, struct mount *mp)
{
struct vnode *vp;
mtx_assert(&mp->mnt_mtx, MA_OWNED);
vp = *nvp;
/* Check if we are done */
if (vp == NULL)
return (NULL);
/* If our next vnode is no longer ours, start over */
if (vp->v_mount != mp)
vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
/* Save pointer to next vnode in list */
if (vp != NULL)
*nvp = TAILQ_NEXT(vp, v_nmntvnodes);
else
*nvp = NULL;
return (vp);
}

View File

@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$");
static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
static void addalias(struct vnode *vp, struct cdev *nvp_rdev);
static void delmntque(struct vnode *vp);
static void insmntque(struct vnode *vp, struct mount *mp);
static void vclean(struct vnode *vp, int flags, struct thread *td);
static void vlruvp(struct vnode *vp);
@ -836,40 +837,45 @@ getnewvnode(tag, mp, vops, vpp)
if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0)
mac_associate_vnode_singlelabel(mp, vp);
#endif
delmntque(vp);
insmntque(vp, mp);
return (0);
}
/*
* Move a vnode from one mount queue to another.
* Delete from old mount point vnode list, if on one.
*/
static void
insmntque(vp, mp)
register struct vnode *vp;
register struct mount *mp;
delmntque(struct vnode *vp)
{
struct mount *mp;
if (vp->v_mount == NULL)
return;
mp = vp->v_mount;
MNT_ILOCK(mp);
vp->v_mount = NULL;
KASSERT(mp->mnt_nvnodelistsize > 0,
("bad mount point vnode list size"));
TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
mp->mnt_nvnodelistsize--;
MNT_IUNLOCK(mp);
}
/*
* Insert into list of vnodes for the new mount point, if available.
*/
static void
insmntque(struct vnode *vp, struct mount *mp)
{
/*
* Delete from old mount point vnode list, if on one.
*/
if (vp->v_mount != NULL) {
MNT_ILOCK(vp->v_mount);
KASSERT(vp->v_mount->mnt_nvnodelistsize > 0,
("bad mount point vnode list size"));
TAILQ_REMOVE(&vp->v_mount->mnt_nvnodelist, vp, v_nmntvnodes);
vp->v_mount->mnt_nvnodelistsize--;
MNT_IUNLOCK(vp->v_mount);
}
/*
* Insert into list of vnodes for the new mount point, if available.
*/
if ((vp->v_mount = mp) != NULL) {
MNT_ILOCK(vp->v_mount);
TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
mp->mnt_nvnodelistsize++;
MNT_IUNLOCK(vp->v_mount);
}
vp->v_mount = mp;
KASSERT(mp != NULL, ("Don't call insmntque(foo, NULL)"));
MNT_ILOCK(vp->v_mount);
TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
mp->mnt_nvnodelistsize++;
MNT_IUNLOCK(vp->v_mount);
}
/*
@ -1885,6 +1891,7 @@ addaliasu(nvp, nvp_rdev)
vn_lock(ovp, LK_EXCLUSIVE | LK_RETRY, curthread);
}
nvp->v_op = ops;
delmntque(ovp);
insmntque(ovp, nvp->v_mount);
vrele(nvp);
vgone(nvp);
@ -2219,14 +2226,7 @@ vflush(mp, rootrefs, flags)
}
MNT_ILOCK(mp);
loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp; vp = nvp) {
/*
* Make sure this vnode wasn't reclaimed in getnewvnode().
* Start over if it has (it won't be on the list anymore).
*/
if (vp->v_mount != mp)
goto loop;
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
MNT_IUNLOCK(mp);
@ -2466,8 +2466,7 @@ vclean(vp, flags, td)
/*
* Delete from old mount point vnode list.
*/
if (vp->v_mount != NULL)
insmntque(vp, (struct mount *)0);
delmntque(vp);
cache_purge(vp);
VI_LOCK(vp);
if (VSHOULDFREE(vp))
@ -2594,8 +2593,7 @@ vgonechrl(struct vnode *vp, struct thread *td)
vp->v_vnlock = &vp->v_lock;
vp->v_tag = "orphanchr";
vp->v_op = spec_vnodeop_p;
if (vp->v_mount != NULL)
insmntque(vp, (struct mount *)0);
delmntque(vp);
cache_purge(vp);
vrele(vp);
VI_LOCK(vp);
@ -3082,13 +3080,12 @@ vfs_msync(struct mount *mp, int flags)
tries = 5;
MNT_ILOCK(mp);
loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
TAILQ_FOREACH_SAFE(vp, &mp->mnt_nvnodelist, v_nmntvnodes, nvp) {
if (vp->v_mount != mp) {
if (--tries > 0)
goto loop;
break;
}
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {

View File

@ -716,7 +716,7 @@ nfs_root(struct mount *mp, struct vnode **vpp)
static int
nfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct thread *td)
{
struct vnode *vp, *vnp;
struct vnode *vp, *nvp;
int error, allerror = 0;
/*
@ -724,16 +724,7 @@ nfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct thread *td)
*/
MNT_ILOCK(mp);
loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
vp != NULL;
vp = vnp) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
vnp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
MNT_IUNLOCK(mp);
if (VOP_ISLOCKED(vp, NULL) || TAILQ_EMPTY(&vp->v_dirtyblkhd) ||

View File

@ -813,11 +813,7 @@ nfs_clearcommit(struct mount *mp)
s = splbio();
MNT_ILOCK(mp);
loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp; vp = nvp) {
if (vp->v_mount != mp) /* Paranoia */
goto loop;
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);

View File

@ -930,7 +930,7 @@ nfs_root(struct mount *mp, struct vnode **vpp)
static int
nfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct thread *td)
{
struct vnode *vp, *vnp;
struct vnode *vp, *nvp;
int error, allerror = 0;
/*
@ -938,16 +938,7 @@ nfs_sync(struct mount *mp, int waitfor, struct ucred *cred, struct thread *td)
*/
MNT_ILOCK(mp);
loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
vp != NULL;
vp = vnp) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
vnp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
MNT_IUNLOCK(mp);
if (VOP_ISLOCKED(vp, NULL) || TAILQ_EMPTY(&vp->v_dirtyblkhd) ||

View File

@ -165,6 +165,12 @@ struct mount {
int mnt_nvnodelistsize; /* # of vnodes on this mount */
};
struct vnode *__mnt_vnode_next(struct vnode **nvp, struct mount *mp);
#define MNT_VNODE_FOREACH(vp, mp, vp2) \
for ((vp2) = TAILQ_FIRST(&(mp)->mnt_nvnodelist); \
(vp = __mnt_vnode_next(&(vp2), (mp))) != NULL;)
#define MNT_ILOCK(mp) mtx_lock(&(mp)->mnt_mtx)
#define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx)

View File

@ -407,14 +407,7 @@ ffs_snapshot(mp, snapfile)
mp->mnt_kern_flag &= ~MNTK_SUSPENDED;
MNT_ILOCK(mp);
loop:
for (xvp = TAILQ_FIRST(&mp->mnt_nvnodelist); xvp; xvp = nvp) {
/*
* Make sure this vnode wasn't reclaimed in getnewvnode().
* Start over if it has (it won't be on the list anymore).
*/
if (xvp->v_mount != mp)
goto loop;
nvp = TAILQ_NEXT(xvp, v_nmntvnodes);
MNT_VNODE_FOREACH(xvp, mp, nvp) {
VI_LOCK(xvp);
MNT_IUNLOCK(mp);
if ((xvp->v_iflag & VI_XLOCK) ||

View File

@ -489,12 +489,7 @@ ffs_reload(mp, cred, td)
loop:
MNT_ILOCK(mp);
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
if (vp->v_mount != mp) {
MNT_IUNLOCK(mp);
goto loop;
}
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);
@ -1129,21 +1124,13 @@ ffs_sync(mp, waitfor, cred, td)
lockreq |= LK_INTERLOCK;
MNT_ILOCK(mp);
loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
/*
* If the vnode that we are about to sync is no longer
* associated with this mount point, start over.
*/
if (vp->v_mount != mp)
goto loop;
MNT_VNODE_FOREACH(vp, mp, nvp) {
/*
* Depend on the mntvnode_slock to keep things stable enough
* for a quick test. Since there might be hundreds of
* thousands of vnodes, we cannot afford even a subroutine
* call unless there's a good chance that we have work to do.
*/
nvp = TAILQ_NEXT(vp, v_nmntvnodes);
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);
@ -1168,8 +1155,6 @@ ffs_sync(mp, waitfor, cred, td)
VOP_UNLOCK(vp, 0, td);
vrele(vp);
MNT_ILOCK(mp);
if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
goto loop;
}
MNT_IUNLOCK(mp);
/*

View File

@ -403,7 +403,7 @@ quotaon(td, mp, type, fname)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct vnode *vp, **vpp;
struct vnode *nextvp;
struct vnode *nvp;
struct dquot *dq;
int error, flags;
struct nameidata nd;
@ -453,10 +453,7 @@ quotaon(td, mp, type, fname)
*/
MNT_ILOCK(mp);
again:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nextvp) {
if (vp->v_mount != mp)
goto again;
nextvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
MNT_IUNLOCK(mp);
if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
@ -475,8 +472,6 @@ quotaon(td, mp, type, fname)
MNT_ILOCK(mp);
if (error)
break;
if (TAILQ_NEXT(vp, v_nmntvnodes) != nextvp)
goto again;
}
MNT_IUNLOCK(mp);
ump->um_qflags[type] &= ~QTF_OPENING;
@ -495,7 +490,7 @@ quotaoff(td, mp, type)
int type;
{
struct vnode *vp;
struct vnode *qvp, *nextvp;
struct vnode *qvp, *nvp;
struct ufsmount *ump = VFSTOUFS(mp);
struct dquot *dq;
struct inode *ip;
@ -514,11 +509,7 @@ quotaoff(td, mp, type)
*/
MNT_ILOCK(mp);
again:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nextvp) {
if (vp->v_mount != mp)
goto again;
nextvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
MNT_IUNLOCK(mp);
if (vp->v_type == VNON) {
@ -537,8 +528,6 @@ quotaoff(td, mp, type)
VOP_UNLOCK(vp, 0, td);
vrele(vp);
MNT_ILOCK(mp);
if (TAILQ_NEXT(vp, v_nmntvnodes) != nextvp)
goto again;
}
MNT_IUNLOCK(mp);
dqflush(qvp);
@ -728,7 +717,7 @@ qsync(mp)
{
struct ufsmount *ump = VFSTOUFS(mp);
struct thread *td = curthread; /* XXX */
struct vnode *vp, *nextvp;
struct vnode *vp, *nvp;
struct dquot *dq;
int i, error;
@ -747,10 +736,7 @@ qsync(mp)
*/
MNT_ILOCK(mp);
again:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nextvp) {
if (vp->v_mount != mp)
goto again;
nextvp = TAILQ_NEXT(vp, v_nmntvnodes);
MNT_VNODE_FOREACH(vp, mp, nvp) {
VI_LOCK(vp);
MNT_IUNLOCK(mp);
if (vp->v_type == VNON) {
@ -772,8 +758,6 @@ qsync(mp)
}
vput(vp);
MNT_ILOCK(mp);
if (TAILQ_NEXT(vp, v_nmntvnodes) != nextvp)
goto again;
}
MNT_IUNLOCK(mp);
return (0);