After extensive testing it has been determined that adding complexity

to avoid removing higher level directory vnodes from the namecache has
no perceivable effect and will be removed.  This is especially true
when vmiodirenable is turned on, which it is by default now.  ( vmiodirenable
makes a huge difference in directory caching ).  The vfs.vmiodirenable and
vfs.nameileafonly sysctls have been left in to allow further testing, but
I expect to rip out vfs.nameileafonly soon too.

I have also determined through testing that the real problem with numvnodes
getting too large is due to the VM Page cache preventing the vnode from
being reclaimed.  The directory stuff made only a tiny dent relative
to Poul's original code, enough so that some tests succeeded.  But tests
with several million small files show that the bigger problem is the VM Page
cache.  This will have to be addressed by a future commit.

MFC after:	3 days
This commit is contained in:
Matthew Dillon 2001-10-01 04:33:35 +00:00
parent e4fea9d1dd
commit b5810bab2d
3 changed files with 100 additions and 30 deletions

View File

@ -101,8 +101,10 @@ static u_long numcache; /* number of cache entries allocated */
SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
static u_long numcachehv; /* number of cache entries with vnodes held */
SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
#if 0
static u_long numcachepl; /* number of cache purge for leaf entries */
SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
#endif
struct nchstats nchstats; /* cache effectiveness statistics */
static int doingcache = 1; /* 1 => enable the cache */
@ -246,6 +248,31 @@ cache_zap(ncp)
free(ncp, M_VFSCACHE);
}
/*
* cache_leaf_test()
*
* Test whether this (directory) vnode's namei cache entry contains
* subdirectories or not. Used to determine whether the directory is
* a leaf in the namei cache or not. Note: the directory may still
* contain files in the namei cache.
*
* Returns 0 if the directory is a leaf, -1 if it isn't.
*/
int
cache_leaf_test(struct vnode *vp)
{
struct namecache *ncpc;
for (ncpc = LIST_FIRST(&vp->v_cache_src);
ncpc != NULL;
ncpc = LIST_NEXT(ncpc, nc_src)
) {
if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR)
return(-1);
}
return(0);
}
/*
* Lookup an entry in the cache
*
@ -499,6 +526,8 @@ cache_purgevfs(mp)
}
}
#if 0
/*
* Flush all dirctory entries with no child directories held in
* the cache.
@ -555,6 +584,8 @@ cache_purgeleafdirs(ndir)
numcachepl++;
}
#endif
/*
* Perform canonical checks and cache lookup and pass on to filesystem
* through the vop_cachedlookup only if needed.

View File

@ -110,6 +110,8 @@ SYSCTL_LONG(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""
/* Number of vnodes in the free list. */
static u_long freevnodes = 0;
SYSCTL_LONG(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
#if 0
/* Number of vnode allocation. */
static u_long vnodeallocs = 0;
SYSCTL_LONG(_debug, OID_AUTO, vnodeallocs, CTLFLAG_RD, &vnodeallocs, 0, "");
@ -125,6 +127,7 @@ SYSCTL_LONG(_debug, OID_AUTO, vnoderecycleminfreevn, CTLFLAG_RW, &vnoderecyclemi
/* Number of vnodes attempted to recycle at a time. */
static u_long vnoderecyclenumber = 3000;
SYSCTL_LONG(_debug, OID_AUTO, vnoderecyclenumber, CTLFLAG_RW, &vnoderecyclenumber, 0, "");
#endif
/*
* Various variables used for debugging the new implementation of
@ -142,6 +145,8 @@ SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad,
/* Set to 0 for old insertion-sort based reassignbuf, 1 for modern method. */
static int reassignbufmethod = 1;
SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
static int nameileafonly = 0;
SYSCTL_INT(_vfs, OID_AUTO, nameileafonly, CTLFLAG_RW, &nameileafonly, 0, "");
#ifdef ENABLE_VFS_IOOPT
/* See NOTES for a description of this setting. */
@ -238,6 +243,9 @@ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
int desiredvnodes;
SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW,
&desiredvnodes, 0, "Maximum number of vnodes");
static int minvnodes;
SYSCTL_INT(_kern, KERN_MAXVNODES, minvnodes, CTLFLAG_RW,
&minvnodes, 0, "Minimum number of vnodes");
/*
* Initialize the vnode management data structures.
@ -247,6 +255,7 @@ vntblinit(void *dummy __unused)
{
desiredvnodes = maxproc + cnt.v_page_count / 4;
minvnodes = desiredvnodes / 4;
mtx_init(&mountlist_mtx, "mountlist", MTX_DEF);
mtx_init(&mntvnode_mtx, "mntvnode", MTX_DEF);
mtx_init(&mntid_mtx, "mntid", MTX_DEF);
@ -539,40 +548,68 @@ getnewvnode(tag, mp, vops, vpp)
s = splbio();
mtx_lock(&vnode_free_list_mtx);
if (wantfreevnodes && freevnodes < wantfreevnodes) {
if (freevnodes < wantfreevnodes) {
vp = NULL;
} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
/*
* XXX: this is only here to be backwards compatible
*/
vp = NULL;
} else for (count = 0; count < freevnodes; count++) {
vp = TAILQ_FIRST(&vnode_free_list);
if (vp == NULL || vp->v_usecount)
panic("getnewvnode: free vnode isn't");
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
} else if (numvnodes >= minvnodes) {
for (count = 0; count < freevnodes; count++) {
vp = TAILQ_FIRST(&vnode_free_list);
if (vp == NULL || vp->v_usecount)
panic("getnewvnode: free vnode isn't");
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
/*
* Don't recycle if active in the namecache or
* if it still has cached pages or we cannot get
* its interlock.
*/
if (LIST_FIRST(&vp->v_cache_src) != NULL ||
(VOP_GETVOBJECT(vp, &object) == 0 &&
(object->resident_page_count || object->ref_count)) ||
!mtx_trylock(&vp->v_interlock)) {
/*
* Don't recycle if we still have cached pages or if
* we cannot get the interlock.
*/
if ((VOP_GETVOBJECT(vp, &object) == 0 &&
(object->resident_page_count ||
object->ref_count)) ||
!mtx_trylock(&vp->v_interlock)) {
TAILQ_INSERT_TAIL(&vnode_free_list, vp,
v_freelist);
vp = NULL;
continue;
}
if (LIST_FIRST(&vp->v_cache_src)) {
/*
* note: nameileafonly sysctl is temporary,
* for debugging only, and will eventually be
* removed.
*/
if (nameileafonly > 0) {
/*
* Do not reuse namei-cached directory
* vnodes that have cached
* subdirectories.
*/
if (cache_leaf_test(vp) < 0) {
mtx_unlock(&vp->v_interlock);
vp = NULL;
continue;
}
} else if (nameileafonly < 0 ||
vmiodirenable == 0) {
/*
* Do not reuse namei-cached directory
* vnodes if nameileafonly is -1 or
* if VMIO backing for directories is
* turned off (otherwise we reuse them
* too quickly).
*/
mtx_unlock(&vp->v_interlock);
vp = NULL;
continue;
}
}
/*
* Skip over it if its filesystem is being suspended.
*/
if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
break;
mtx_unlock(&vp->v_interlock);
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
vp = NULL;
continue;
}
/*
* Skip over it if its filesystem is being suspended.
*/
if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
break;
mtx_unlock(&vp->v_interlock);
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
vp = NULL;
}
if (vp) {
vp->v_flag |= VDOOMED;
@ -636,6 +673,7 @@ getnewvnode(tag, mp, vops, vpp)
vfs_object_create(vp, td, td->td_proc->p_ucred);
#if 0
vnodeallocs++;
if (vnodeallocs % vnoderecycleperiod == 0 &&
freevnodes < vnoderecycleminfreevn &&
@ -643,6 +681,7 @@ getnewvnode(tag, mp, vops, vpp)
/* Recycle vnodes. */
cache_purgeleafdirs(vnoderecyclenumber);
}
#endif
return (0);
}

View File

@ -559,7 +559,7 @@ int cache_lookup __P((struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp));
void cache_purge __P((struct vnode *vp));
void cache_purgevfs __P((struct mount *mp));
void cache_purgeleafdirs __P((int ndir));
int cache_leaf_test __P((struct vnode *vp));
void cvtstat __P((struct stat *st, struct ostat *ost));
void cvtnstat __P((struct stat *sb, struct nstat *nsb));
int getnewvnode __P((enum vtagtype tag,