As the kernel allocates and frees vnodes, it fully initializes them
on every allocation and fully releases them on every free. These are not trivial costs: it starts by zeroing a large structure then initializes a mutex, a lock manager lock, an rw lock, four lists, and six pointers. And looking at vfs.vnodes_created, these operations are being done millions of times an hour on a busy machine. As a performance optimization, this code update uses the uma_init and uma_fini routines to do these initializations and cleanups only as the vnodes enter and leave the vnode_zone. With this change the initializations are only done kern.maxvnodes times at system startup and then only rarely again. The frees are done only if the vnode_zone shrinks which never happens in practice. For those curious about the avoided work, look at the vnode_init() and vnode_fini() functions in kern/vfs_subr.c to see the code that has been removed from the main vnode allocation/free path. Reviewed by: kib Tested by: Peter Holm
This commit is contained in:
parent
43a993bb7d
commit
41d4f10391
@ -346,6 +346,66 @@ PCTRIE_DEFINE(BUF, buf, b_lblkno, buf_trie_alloc, buf_trie_free);
|
||||
#ifndef MAXVNODES_MAX
|
||||
#define MAXVNODES_MAX (512 * 1024 * 1024 / 64) /* 8M */
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Initialize a vnode as it first enters the zone.
|
||||
*/
|
||||
static int
|
||||
vnode_init(void *mem, int size, int flags)
|
||||
{
|
||||
struct vnode *vp;
|
||||
struct bufobj *bo;
|
||||
|
||||
vp = mem;
|
||||
bzero(vp, size);
|
||||
/*
|
||||
* Setup locks.
|
||||
*/
|
||||
vp->v_vnlock = &vp->v_lock;
|
||||
mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
|
||||
/*
|
||||
* By default, don't allow shared locks unless filesystems opt-in.
|
||||
*/
|
||||
lockinit(vp->v_vnlock, PVFS, "vnode", VLKTIMEOUT,
|
||||
LK_NOSHARE | LK_IS_VNODE);
|
||||
/*
|
||||
* Initialize bufobj.
|
||||
*/
|
||||
bo = &vp->v_bufobj;
|
||||
bo->__bo_vnode = vp;
|
||||
rw_init(BO_LOCKPTR(bo), "bufobj interlock");
|
||||
bo->bo_private = vp;
|
||||
TAILQ_INIT(&bo->bo_clean.bv_hd);
|
||||
TAILQ_INIT(&bo->bo_dirty.bv_hd);
|
||||
/*
|
||||
* Initialize namecache.
|
||||
*/
|
||||
LIST_INIT(&vp->v_cache_src);
|
||||
TAILQ_INIT(&vp->v_cache_dst);
|
||||
/*
|
||||
* Initialize rangelocks.
|
||||
*/
|
||||
rangelock_init(&vp->v_rl);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free a vnode when it is cleared from the zone.
|
||||
*/
|
||||
static void
|
||||
vnode_fini(void *mem, int size)
|
||||
{
|
||||
struct vnode *vp;
|
||||
struct bufobj *bo;
|
||||
|
||||
vp = mem;
|
||||
rangelock_destroy(&vp->v_rl);
|
||||
lockdestroy(vp->v_vnlock);
|
||||
mtx_destroy(&vp->v_interlock);
|
||||
bo = &vp->v_bufobj;
|
||||
rw_destroy(BO_LOCKPTR(bo));
|
||||
}
|
||||
|
||||
static void
|
||||
vntblinit(void *dummy __unused)
|
||||
{
|
||||
@ -379,7 +439,7 @@ vntblinit(void *dummy __unused)
|
||||
TAILQ_INIT(&vnode_free_list);
|
||||
mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF);
|
||||
vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
|
||||
NULL, NULL, UMA_ALIGN_PTR, 0);
|
||||
vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
|
||||
vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
|
||||
/*
|
||||
@ -1223,8 +1283,8 @@ getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
|
||||
struct vnode **vpp)
|
||||
{
|
||||
struct vnode *vp;
|
||||
struct bufobj *bo;
|
||||
struct thread *td;
|
||||
struct lock_object *lo;
|
||||
static int cyclecount;
|
||||
int error;
|
||||
|
||||
@ -1271,40 +1331,42 @@ getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
|
||||
mtx_unlock(&vnode_free_list_mtx);
|
||||
alloc:
|
||||
atomic_add_long(&vnodes_created, 1);
|
||||
vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO);
|
||||
vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK);
|
||||
/*
|
||||
* Setup locks.
|
||||
* Locks are given the generic name "vnode" when created.
|
||||
* Follow the historic practice of using the filesystem
|
||||
* name when they allocated, e.g., "zfs", "ufs", "nfs, etc.
|
||||
*
|
||||
* Locks live in a witness group keyed on their name. Thus,
|
||||
* when a lock is renamed, it must also move from the witness
|
||||
* group of its old name to the witness group of its new name.
|
||||
*
|
||||
* The change only needs to be made when the vnode moves
|
||||
* from one filesystem type to another. We ensure that each
|
||||
* filesystem use a single static name pointer for its tag so
|
||||
* that we can compare pointers rather than doing a strcmp().
|
||||
*/
|
||||
vp->v_vnlock = &vp->v_lock;
|
||||
mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
|
||||
lo = &vp->v_vnlock->lock_object;
|
||||
if (lo->lo_name != tag) {
|
||||
lo->lo_name = tag;
|
||||
WITNESS_DESTROY(lo);
|
||||
WITNESS_INIT(lo, tag);
|
||||
}
|
||||
/*
|
||||
* By default, don't allow shared locks unless filesystems
|
||||
* opt-in.
|
||||
* By default, don't allow shared locks unless filesystems opt-in.
|
||||
*/
|
||||
lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOSHARE | LK_IS_VNODE);
|
||||
/*
|
||||
* Initialize bufobj.
|
||||
*/
|
||||
bo = &vp->v_bufobj;
|
||||
bo->__bo_vnode = vp;
|
||||
rw_init(BO_LOCKPTR(bo), "bufobj interlock");
|
||||
bo->bo_ops = &buf_ops_bio;
|
||||
bo->bo_private = vp;
|
||||
TAILQ_INIT(&bo->bo_clean.bv_hd);
|
||||
TAILQ_INIT(&bo->bo_dirty.bv_hd);
|
||||
/*
|
||||
* Initialize namecache.
|
||||
*/
|
||||
LIST_INIT(&vp->v_cache_src);
|
||||
TAILQ_INIT(&vp->v_cache_dst);
|
||||
vp->v_vnlock->lock_object.lo_flags |= LK_NOSHARE;
|
||||
/*
|
||||
* Finalize various vnode identity bits.
|
||||
*/
|
||||
KASSERT(vp->v_object == NULL, ("stale v_object %p", vp));
|
||||
KASSERT(vp->v_lockf == NULL, ("stale v_lockf %p", vp));
|
||||
KASSERT(vp->v_pollinfo == NULL, ("stale v_pollinfo %p", vp));
|
||||
vp->v_type = VNON;
|
||||
vp->v_tag = tag;
|
||||
vp->v_op = vops;
|
||||
v_init_counters(vp);
|
||||
vp->v_data = NULL;
|
||||
vp->v_bufobj.bo_ops = &buf_ops_bio;
|
||||
#ifdef MAC
|
||||
mac_vnode_init(vp);
|
||||
if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0)
|
||||
@ -1313,11 +1375,10 @@ alloc:
|
||||
printf("NULL mp in getnewvnode()\n");
|
||||
#endif
|
||||
if (mp != NULL) {
|
||||
bo->bo_bsize = mp->mnt_stat.f_iosize;
|
||||
vp->v_bufobj.bo_bsize = mp->mnt_stat.f_iosize;
|
||||
if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0)
|
||||
vp->v_vflag |= VV_NOKNOTE;
|
||||
}
|
||||
rangelock_init(&vp->v_rl);
|
||||
|
||||
/*
|
||||
* For the filesystems which do not use vfs_hash_insert(),
|
||||
@ -2683,6 +2744,12 @@ _vdrop(struct vnode *vp, bool locked)
|
||||
}
|
||||
/*
|
||||
* The vnode has been marked for destruction, so free it.
|
||||
*
|
||||
* The vnode will be returned to the zone where it will
|
||||
* normally remain until it is needed for another vnode. We
|
||||
* need to cleanup (or verify that the cleanup has already
|
||||
* been done) any residual data left from its current use
|
||||
* so as not to contaminate the freshly allocated vnode.
|
||||
*/
|
||||
CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp);
|
||||
atomic_subtract_long(&numvnodes, 1);
|
||||
@ -2707,16 +2774,17 @@ _vdrop(struct vnode *vp, bool locked)
|
||||
#ifdef MAC
|
||||
mac_vnode_destroy(vp);
|
||||
#endif
|
||||
if (vp->v_pollinfo != NULL)
|
||||
if (vp->v_pollinfo != NULL) {
|
||||
destroy_vpollinfo(vp->v_pollinfo);
|
||||
vp->v_pollinfo = NULL;
|
||||
}
|
||||
#ifdef INVARIANTS
|
||||
/* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
|
||||
vp->v_op = NULL;
|
||||
#endif
|
||||
rangelock_destroy(&vp->v_rl);
|
||||
lockdestroy(vp->v_vnlock);
|
||||
mtx_destroy(&vp->v_interlock);
|
||||
rw_destroy(BO_LOCKPTR(bo));
|
||||
vp->v_iflag = 0;
|
||||
vp->v_vflag = 0;
|
||||
bo->bo_flag = 0;
|
||||
uma_zfree(vnode_zone, vp);
|
||||
}
|
||||
|
||||
@ -3081,6 +3149,7 @@ vgonel(struct vnode *vp)
|
||||
* Clear the advisory locks and wake up waiting threads.
|
||||
*/
|
||||
(void)VOP_ADVLOCKPURGE(vp);
|
||||
vp->v_lockf = NULL;
|
||||
/*
|
||||
* Delete from old mount point vnode list.
|
||||
*/
|
||||
|
Loading…
x
Reference in New Issue
Block a user