This change creates a new list of active vnodes associated with

a mount point. Active vnodes are those with a non-zero use or hold
count, e.g., those vnodes that are not on the free list. Note that
this list is in addition to the list of all the vnodes associated
with a mount point.

To avoid adding another set of linkage pointers to the vnode
structure, the active list uses the existing linkage pointers
used by the free list (previously named v_freelist, now renamed
v_actfreelist).

This update adds the MNT_VNODE_FOREACH_ACTIVE interface that loops
over just the active vnodes associated with a mount point (typically
less than 1% of the vnodes associated with the mount point).

Reviewed by: kib
Tested by:   Peter Holm
MFC after:   2 weeks
This commit is contained in:
Kirk McKusick 2012-04-20 06:50:44 +00:00
parent f1c05d0b46
commit f257ebbb2e
6 changed files with 203 additions and 12 deletions

View File

@ -834,7 +834,7 @@ msdosfs_unmount(struct mount *mp, int mntflags)
vn_printf(vp,
"msdosfs_umount(): just before calling VOP_CLOSE()\n");
printf("freef %p, freeb %p, mount %p\n",
TAILQ_NEXT(vp, v_freelist), vp->v_freelist.tqe_prev,
TAILQ_NEXT(vp, v_actfreelist), vp->v_actfreelist.tqe_prev,
vp->v_mount);
printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
TAILQ_FIRST(&vp->v_bufobj.bo_clean.bv_hd),

View File

@ -2907,12 +2907,14 @@ nfsd_mntinit(void)
inited = 1;
nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist);
nfsv4root_mnt.mnt_export = NULL;
TAILQ_INIT(&nfsv4root_opt);
TAILQ_INIT(&nfsv4root_newopt);
nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
nfsv4root_mnt.mnt_nvnodelistsize = 0;
nfsv4root_mnt.mnt_activevnodelistsize = 0;
}
/*

View File

@ -461,6 +461,8 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
__rangeof(struct mount, mnt_startzero, mnt_endzero));
TAILQ_INIT(&mp->mnt_nvnodelist);
mp->mnt_nvnodelistsize = 0;
TAILQ_INIT(&mp->mnt_activevnodelist);
mp->mnt_activevnodelistsize = 0;
mp->mnt_ref = 0;
(void) vfs_busy(mp, MBF_NOWAIT);
mp->mnt_op = vfsp->vfc_vfsops;
@ -514,6 +516,8 @@ vfs_mount_destroy(struct mount *mp)
}
if (mp->mnt_nvnodelistsize != 0)
panic("vfs_mount_destroy: nonzero nvnodelistsize");
if (mp->mnt_activevnodelistsize != 0)
panic("vfs_mount_destroy: nonzero activevnodelistsize");
if (mp->mnt_lockref != 0)
panic("vfs_mount_destroy: nonzero lock refcount");
MNT_IUNLOCK(mp);

View File

@ -775,12 +775,16 @@ vnlru_free(int count)
break;
VNASSERT(vp->v_op != NULL, vp,
("vnlru_free: vnode already reclaimed."));
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
KASSERT((vp->v_iflag & VI_FREE) != 0,
("Removing vnode not on freelist"));
KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
("Mangling active vnode"));
TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
/*
* Don't recycle if we can't get the interlock.
*/
if (!VI_TRYLOCK(vp)) {
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist);
continue;
}
VNASSERT(VCANRECYCLE(vp), vp,
@ -1035,12 +1039,26 @@ static void
delmntque(struct vnode *vp)
{
struct mount *mp;
int active;
mp = vp->v_mount;
if (mp == NULL)
return;
MNT_ILOCK(mp);
VI_LOCK(vp);
KASSERT(mp->mnt_activevnodelistsize <= mp->mnt_nvnodelistsize,
("Active vnode list size %d > Vnode list size %d",
mp->mnt_activevnodelistsize, mp->mnt_nvnodelistsize));
active = vp->v_iflag & VI_ACTIVE;
vp->v_iflag &= ~VI_ACTIVE;
if (active) {
mtx_lock(&vnode_free_list_mtx);
TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist);
mp->mnt_activevnodelistsize--;
mtx_unlock(&vnode_free_list_mtx);
}
vp->v_mount = NULL;
VI_UNLOCK(vp);
VNASSERT(mp->mnt_nvnodelistsize > 0, vp,
("bad mount point vnode list size"));
TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
@ -1080,13 +1098,24 @@ insmntque1(struct vnode *vp, struct mount *mp,
ASSERT_VOP_ELOCKED(vp,
"insmntque: mp-safe fs and non-locked vp");
#endif
/*
* We acquire the vnode interlock early to ensure that the
* vnode cannot be recycled by another process releasing a
* holdcnt on it before we get it on both the vnode list
* and the active vnode list. The mount mutex protects only
* manipulation of the vnode list and the vnode freelist
* mutex protects only manipulation of the active vnode list.
* Hence the need to hold the vnode interlock throughout.
*/
MNT_ILOCK(mp);
VI_LOCK(vp);
if ((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 &&
((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 ||
mp->mnt_nvnodelistsize == 0)) {
locked = VOP_ISLOCKED(vp);
if (!locked || (locked == LK_EXCLUSIVE &&
(vp->v_vflag & VV_FORCEINSMQ) == 0)) {
VI_UNLOCK(vp);
MNT_IUNLOCK(mp);
if (dtr != NULL)
dtr(vp, dtr_arg);
@ -1099,6 +1128,14 @@ insmntque1(struct vnode *vp, struct mount *mp,
VNASSERT(mp->mnt_nvnodelistsize >= 0, vp,
("neg mount point vnode list size"));
mp->mnt_nvnodelistsize++;
KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
("Activating already active vnode"));
vp->v_iflag |= VI_ACTIVE;
mtx_lock(&vnode_free_list_mtx);
TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
mp->mnt_activevnodelistsize++;
mtx_unlock(&vnode_free_list_mtx);
VI_UNLOCK(vp);
MNT_IUNLOCK(mp);
return (0);
}
@ -2309,6 +2346,7 @@ vhold(struct vnode *vp)
void
vholdl(struct vnode *vp)
{
struct mount *mp;
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
vp->v_holdcnt++;
@ -2318,12 +2356,19 @@ vholdl(struct vnode *vp)
VNASSERT((vp->v_iflag & VI_FREE) != 0, vp, ("vnode not free"));
VNASSERT(vp->v_op != NULL, vp, ("vholdl: vnode already reclaimed."));
/*
* Remove a vnode from the free list and mark it as in use.
* Remove a vnode from the free list, mark it as in use,
* and put it on the active list.
*/
mtx_lock(&vnode_free_list_mtx);
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
freevnodes--;
vp->v_iflag &= ~(VI_FREE|VI_AGE);
KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
("Activating already active vnode"));
vp->v_iflag |= VI_ACTIVE;
mp = vp->v_mount;
TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
mp->mnt_activevnodelistsize++;
mtx_unlock(&vnode_free_list_mtx);
}
@ -2348,6 +2393,8 @@ void
vdropl(struct vnode *vp)
{
struct bufobj *bo;
struct mount *mp;
int active;
ASSERT_VI_LOCKED(vp, "vdropl");
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
@ -2360,19 +2407,28 @@ vdropl(struct vnode *vp)
}
if ((vp->v_iflag & VI_DOOMED) == 0) {
/*
* Mark a vnode as free, putting it up for recycling.
* Mark a vnode as free: remove it from its active list
* and put it up for recycling on the freelist.
*/
mtx_lock(&vnode_free_list_mtx);
VNASSERT(vp->v_op != NULL, vp,
("vdropl: vnode already reclaimed."));
VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
("vnode already free"));
VNASSERT(VSHOULDFREE(vp), vp,
("vdropl: freeing when we shouldn't"));
active = vp->v_iflag & VI_ACTIVE;
vp->v_iflag &= ~VI_ACTIVE;
mp = vp->v_mount;
mtx_lock(&vnode_free_list_mtx);
if (active) {
TAILQ_REMOVE(&mp->mnt_activevnodelist, vp,
v_actfreelist);
mp->mnt_activevnodelistsize--;
}
if (vp->v_iflag & VI_AGE) {
TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_actfreelist);
} else {
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist);
}
freevnodes++;
vp->v_iflag &= ~VI_AGE;
@ -3010,6 +3066,8 @@ DB_SHOW_COMMAND(mount, db_show_mount)
db_printf(" mnt_ref = %d\n", mp->mnt_ref);
db_printf(" mnt_gen = %d\n", mp->mnt_gen);
db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize);
db_printf(" mnt_activevnodelistsize = %d\n",
mp->mnt_activevnodelistsize);
db_printf(" mnt_writeopcount = %d\n", mp->mnt_writeopcount);
db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen);
db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max);
@ -3019,15 +3077,23 @@ DB_SHOW_COMMAND(mount, db_show_mount)
mp->mnt_secondary_accwrites);
db_printf(" mnt_gjprovider = %s\n",
mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL");
db_printf("\n");
TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
db_printf("\n\nList of active vnodes\n");
TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) {
if (vp->v_type != VMARKER) {
vn_printf(vp, "vnode ");
if (db_pager_quit)
break;
}
}
db_printf("\n\nList of inactive vnodes\n");
TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
if (vp->v_type != VMARKER && (vp->v_iflag & VI_ACTIVE) == 0) {
vn_printf(vp, "vnode ");
if (db_pager_quit)
break;
}
}
}
#endif /* DDB */
@ -4558,3 +4624,100 @@ __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp)
free(*mvp, M_VNODE_MARKER);
*mvp = NULL;
}
/*
* These are helper functions for filesystems to traverse their
* active vnodes. See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h
*/
struct vnode *
__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
{
struct vnode *vp, *nvp;
if (should_yield())
kern_yield(PRI_UNCHANGED);
MNT_ILOCK(mp);
KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
vp = TAILQ_NEXT(*mvp, v_actfreelist);
while (vp != NULL) {
VI_LOCK(vp);
if (vp->v_mount == mp && vp->v_type != VMARKER &&
(vp->v_iflag & VI_DOOMED) == 0)
break;
nvp = TAILQ_NEXT(vp, v_actfreelist);
VI_UNLOCK(vp);
vp = nvp;
}
/* Check if we are done */
if (vp == NULL) {
__mnt_vnode_markerfree_active(mvp, mp);
/* MNT_IUNLOCK(mp); -- done in above function */
mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
return (NULL);
}
mtx_lock(&vnode_free_list_mtx);
TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
mtx_unlock(&vnode_free_list_mtx);
MNT_IUNLOCK(mp);
return (vp);
}
struct vnode *
__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp)
{
struct vnode *vp, *nvp;
*mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
MNT_ILOCK(mp);
MNT_REF(mp);
(*mvp)->v_type = VMARKER;
vp = TAILQ_NEXT(*mvp, v_actfreelist);
while (vp != NULL) {
VI_LOCK(vp);
if (vp->v_mount == mp && vp->v_type != VMARKER &&
(vp->v_iflag & VI_DOOMED) == 0)
break;
nvp = TAILQ_NEXT(vp, v_actfreelist);
VI_UNLOCK(vp);
vp = nvp;
}
/* Check if we are done */
if (vp == NULL) {
MNT_REL(mp);
MNT_IUNLOCK(mp);
free(*mvp, M_VNODE_MARKER);
*mvp = NULL;
return (NULL);
}
(*mvp)->v_mount = mp;
mtx_lock(&vnode_free_list_mtx);
TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
mtx_unlock(&vnode_free_list_mtx);
MNT_IUNLOCK(mp);
return (vp);
}
void
__mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
{
if (*mvp == NULL) {
MNT_IUNLOCK(mp);
return;
}
mtx_assert(MNT_MTX(mp), MA_OWNED);
KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
mtx_lock(&vnode_free_list_mtx);
TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
mtx_unlock(&vnode_free_list_mtx);
MNT_REL(mp);
MNT_IUNLOCK(mp);
free(*mvp, M_VNODE_MARKER);
*mvp = NULL;
}

View File

@ -164,6 +164,8 @@ struct mount {
int mnt_ref; /* (i) Reference count */
struct vnodelst mnt_nvnodelist; /* (i) list of vnodes */
int mnt_nvnodelistsize; /* (i) # of vnodes */
struct vnodelst mnt_activevnodelist; /* (i) list of active vnodes */
int mnt_activevnodelistsize;/* (i) # of active vnodes */
int mnt_writeopcount; /* (i) write syscalls pending */
int mnt_kern_flag; /* (i) kernel only flags */
uint64_t mnt_flag; /* (i) flags shared with user */
@ -206,6 +208,25 @@ void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp);
mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \
} while (0)
/*
* Definitions for MNT_VNODE_FOREACH_ACTIVE.
*/
struct vnode *__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp);
struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp);
void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \
for (vp = __mnt_vnode_first_active(&(mvp), (mp)); \
(vp) != NULL; vp = __mnt_vnode_next_active(&(mvp), (mp)))
#define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp) \
do { \
MNT_ILOCK(mp); \
__mnt_vnode_markerfree_active(&(mvp), (mp)); \
/* MNT_IUNLOCK(mp); -- done in above function */ \
mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \
} while (0)
/*
* Definitions for MNT_VNODE_FOREACH.
*

View File

@ -156,7 +156,7 @@ struct vnode {
/*
* The machinery of being a vnode
*/
TAILQ_ENTRY(vnode) v_freelist; /* f vnode freelist */
TAILQ_ENTRY(vnode) v_actfreelist; /* f vnode active/free lists */
struct bufobj v_bufobj; /* * Buffer cache object */
/*
@ -232,6 +232,7 @@ struct xvnode {
#define VI_AGE 0x0040 /* Insert vnode at head of free list */
#define VI_DOOMED 0x0080 /* This vnode is being recycled */
#define VI_FREE 0x0100 /* This vnode is on the freelist */
#define VI_ACTIVE 0x0200 /* This vnode is on the active list */
#define VI_DOINGINACT 0x0800 /* VOP_INACTIVE is in progress */
#define VI_OWEINACT 0x1000 /* Need to call inactive */