After extensive testing it has been determined that adding complexity
to avoid removing higher level directory vnodes from the namecache has no perceivable effect and will be removed. This is especially true when vmiodirenable is turned on, which it is by default now. ( vmiodirenable makes a huge difference in directory caching ). The vfs.vmiodirenable and vfs.nameileafonly sysctls have been left in to allow further testing, but I expect to rip out vfs.nameileafonly soon too. I have also determined through testing that the real problem with numvnodes getting too large is due to the VM Page cache preventing the vnode from being reclaimed. The directory stuff made only a tiny dent relative to Poul's original code, enough so that some tests succeeded. But tests with several million small files show that the bigger problem is the VM Page cache. This will have to be addressed by a future commit. MFC after: 3 days
This commit is contained in:
parent
e4fea9d1dd
commit
b5810bab2d
@ -101,8 +101,10 @@ static u_long numcache; /* number of cache entries allocated */
|
||||
SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
|
||||
static u_long numcachehv; /* number of cache entries with vnodes held */
|
||||
SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
|
||||
#if 0
|
||||
static u_long numcachepl; /* number of cache purge for leaf entries */
|
||||
SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
|
||||
#endif
|
||||
struct nchstats nchstats; /* cache effectiveness statistics */
|
||||
|
||||
static int doingcache = 1; /* 1 => enable the cache */
|
||||
@ -246,6 +248,31 @@ cache_zap(ncp)
|
||||
free(ncp, M_VFSCACHE);
|
||||
}
|
||||
|
||||
/*
|
||||
* cache_leaf_test()
|
||||
*
|
||||
* Test whether this (directory) vnode's namei cache entry contains
|
||||
* subdirectories or not. Used to determine whether the directory is
|
||||
* a leaf in the namei cache or not. Note: the directory may still
|
||||
* contain files in the namei cache.
|
||||
*
|
||||
* Returns 0 if the directory is a leaf, -1 if it isn't.
|
||||
*/
|
||||
int
|
||||
cache_leaf_test(struct vnode *vp)
|
||||
{
|
||||
struct namecache *ncpc;
|
||||
|
||||
for (ncpc = LIST_FIRST(&vp->v_cache_src);
|
||||
ncpc != NULL;
|
||||
ncpc = LIST_NEXT(ncpc, nc_src)
|
||||
) {
|
||||
if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR)
|
||||
return(-1);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup an entry in the cache
|
||||
*
|
||||
@ -499,6 +526,8 @@ cache_purgevfs(mp)
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
/*
|
||||
* Flush all dirctory entries with no child directories held in
|
||||
* the cache.
|
||||
@ -555,6 +584,8 @@ cache_purgeleafdirs(ndir)
|
||||
numcachepl++;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Perform canonical checks and cache lookup and pass on to filesystem
|
||||
* through the vop_cachedlookup only if needed.
|
||||
|
@ -110,6 +110,8 @@ SYSCTL_LONG(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""
|
||||
/* Number of vnodes in the free list. */
|
||||
static u_long freevnodes = 0;
|
||||
SYSCTL_LONG(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
|
||||
|
||||
#if 0
|
||||
/* Number of vnode allocation. */
|
||||
static u_long vnodeallocs = 0;
|
||||
SYSCTL_LONG(_debug, OID_AUTO, vnodeallocs, CTLFLAG_RD, &vnodeallocs, 0, "");
|
||||
@ -125,6 +127,7 @@ SYSCTL_LONG(_debug, OID_AUTO, vnoderecycleminfreevn, CTLFLAG_RW, &vnoderecyclemi
|
||||
/* Number of vnodes attempted to recycle at a time. */
|
||||
static u_long vnoderecyclenumber = 3000;
|
||||
SYSCTL_LONG(_debug, OID_AUTO, vnoderecyclenumber, CTLFLAG_RW, &vnoderecyclenumber, 0, "");
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Various variables used for debugging the new implementation of
|
||||
@ -142,6 +145,8 @@ SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad,
|
||||
/* Set to 0 for old insertion-sort based reassignbuf, 1 for modern method. */
|
||||
static int reassignbufmethod = 1;
|
||||
SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
|
||||
static int nameileafonly = 0;
|
||||
SYSCTL_INT(_vfs, OID_AUTO, nameileafonly, CTLFLAG_RW, &nameileafonly, 0, "");
|
||||
|
||||
#ifdef ENABLE_VFS_IOOPT
|
||||
/* See NOTES for a description of this setting. */
|
||||
@ -238,6 +243,9 @@ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
|
||||
int desiredvnodes;
|
||||
SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW,
|
||||
&desiredvnodes, 0, "Maximum number of vnodes");
|
||||
static int minvnodes;
|
||||
SYSCTL_INT(_kern, KERN_MAXVNODES, minvnodes, CTLFLAG_RW,
|
||||
&minvnodes, 0, "Minimum number of vnodes");
|
||||
|
||||
/*
|
||||
* Initialize the vnode management data structures.
|
||||
@ -247,6 +255,7 @@ vntblinit(void *dummy __unused)
|
||||
{
|
||||
|
||||
desiredvnodes = maxproc + cnt.v_page_count / 4;
|
||||
minvnodes = desiredvnodes / 4;
|
||||
mtx_init(&mountlist_mtx, "mountlist", MTX_DEF);
|
||||
mtx_init(&mntvnode_mtx, "mntvnode", MTX_DEF);
|
||||
mtx_init(&mntid_mtx, "mntid", MTX_DEF);
|
||||
@ -539,40 +548,68 @@ getnewvnode(tag, mp, vops, vpp)
|
||||
s = splbio();
|
||||
mtx_lock(&vnode_free_list_mtx);
|
||||
|
||||
if (wantfreevnodes && freevnodes < wantfreevnodes) {
|
||||
if (freevnodes < wantfreevnodes) {
|
||||
vp = NULL;
|
||||
} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
|
||||
/*
|
||||
* XXX: this is only here to be backwards compatible
|
||||
*/
|
||||
vp = NULL;
|
||||
} else for (count = 0; count < freevnodes; count++) {
|
||||
vp = TAILQ_FIRST(&vnode_free_list);
|
||||
if (vp == NULL || vp->v_usecount)
|
||||
panic("getnewvnode: free vnode isn't");
|
||||
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
|
||||
} else if (numvnodes >= minvnodes) {
|
||||
for (count = 0; count < freevnodes; count++) {
|
||||
vp = TAILQ_FIRST(&vnode_free_list);
|
||||
if (vp == NULL || vp->v_usecount)
|
||||
panic("getnewvnode: free vnode isn't");
|
||||
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
|
||||
|
||||
/*
|
||||
* Don't recycle if active in the namecache or
|
||||
* if it still has cached pages or we cannot get
|
||||
* its interlock.
|
||||
*/
|
||||
if (LIST_FIRST(&vp->v_cache_src) != NULL ||
|
||||
(VOP_GETVOBJECT(vp, &object) == 0 &&
|
||||
(object->resident_page_count || object->ref_count)) ||
|
||||
!mtx_trylock(&vp->v_interlock)) {
|
||||
/*
|
||||
* Don't recycle if we still have cached pages or if
|
||||
* we cannot get the interlock.
|
||||
*/
|
||||
if ((VOP_GETVOBJECT(vp, &object) == 0 &&
|
||||
(object->resident_page_count ||
|
||||
object->ref_count)) ||
|
||||
!mtx_trylock(&vp->v_interlock)) {
|
||||
TAILQ_INSERT_TAIL(&vnode_free_list, vp,
|
||||
v_freelist);
|
||||
vp = NULL;
|
||||
continue;
|
||||
}
|
||||
if (LIST_FIRST(&vp->v_cache_src)) {
|
||||
/*
|
||||
* note: nameileafonly sysctl is temporary,
|
||||
* for debugging only, and will eventually be
|
||||
* removed.
|
||||
*/
|
||||
if (nameileafonly > 0) {
|
||||
/*
|
||||
* Do not reuse namei-cached directory
|
||||
* vnodes that have cached
|
||||
* subdirectories.
|
||||
*/
|
||||
if (cache_leaf_test(vp) < 0) {
|
||||
mtx_unlock(&vp->v_interlock);
|
||||
vp = NULL;
|
||||
continue;
|
||||
}
|
||||
} else if (nameileafonly < 0 ||
|
||||
vmiodirenable == 0) {
|
||||
/*
|
||||
* Do not reuse namei-cached directory
|
||||
* vnodes if nameileafonly is -1 or
|
||||
* if VMIO backing for directories is
|
||||
* turned off (otherwise we reuse them
|
||||
* too quickly).
|
||||
*/
|
||||
mtx_unlock(&vp->v_interlock);
|
||||
vp = NULL;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Skip over it if its filesystem is being suspended.
|
||||
*/
|
||||
if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
|
||||
break;
|
||||
mtx_unlock(&vp->v_interlock);
|
||||
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
|
||||
vp = NULL;
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Skip over it if its filesystem is being suspended.
|
||||
*/
|
||||
if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
|
||||
break;
|
||||
mtx_unlock(&vp->v_interlock);
|
||||
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
|
||||
vp = NULL;
|
||||
}
|
||||
if (vp) {
|
||||
vp->v_flag |= VDOOMED;
|
||||
@ -636,6 +673,7 @@ getnewvnode(tag, mp, vops, vpp)
|
||||
|
||||
vfs_object_create(vp, td, td->td_proc->p_ucred);
|
||||
|
||||
#if 0
|
||||
vnodeallocs++;
|
||||
if (vnodeallocs % vnoderecycleperiod == 0 &&
|
||||
freevnodes < vnoderecycleminfreevn &&
|
||||
@ -643,6 +681,7 @@ getnewvnode(tag, mp, vops, vpp)
|
||||
/* Recycle vnodes. */
|
||||
cache_purgeleafdirs(vnoderecyclenumber);
|
||||
}
|
||||
#endif
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -559,7 +559,7 @@ int cache_lookup __P((struct vnode *dvp, struct vnode **vpp,
|
||||
struct componentname *cnp));
|
||||
void cache_purge __P((struct vnode *vp));
|
||||
void cache_purgevfs __P((struct mount *mp));
|
||||
void cache_purgeleafdirs __P((int ndir));
|
||||
int cache_leaf_test __P((struct vnode *vp));
|
||||
void cvtstat __P((struct stat *st, struct ostat *ost));
|
||||
void cvtnstat __P((struct stat *sb, struct nstat *nsb));
|
||||
int getnewvnode __P((enum vtagtype tag,
|
||||
|
Loading…
x
Reference in New Issue
Block a user