From 8947be9ba004add6deae3c56398bed461c4995f1 Mon Sep 17 00:00:00 2001 From: Jeff Roberson Date: Mon, 5 Aug 2002 10:15:56 +0000 Subject: [PATCH] - Move some logic from getnewvnode() to a new function vcanrecycle() - Unlock the free list mutex around vcanrecycle to prevent a lock order reversal. --- sys/kern/vfs_subr.c | 166 +++++++++++++++++++++++++------------------- 1 file changed, 96 insertions(+), 70 deletions(-) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 1cc90eab0ef6..bf71d049425b 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -80,6 +80,8 @@ static void vclean(struct vnode *vp, int flags, struct thread *td); static void vlruvp(struct vnode *vp); static int flushbuflist(struct buf *blist, int flags, struct vnode *vp, int slpflag, int slptimeo, int *errorp); +static int vcanrecycle(struct vnode *vp); + /* * Number of vnodes in existence. Increased whenever getnewvnode() @@ -659,6 +661,70 @@ SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp) * Routines having to do with the management of the vnode table. */ +/* + * Check to see if a free vnode can be recycled. If it can, return it locked + * with the vn lock, but not interlock. Otherwise indicate the error. + */ +static int +vcanrecycle(struct vnode *vp) +{ + struct thread *td = curthread; + vm_object_t object; + int error; + + /* Don't recycle if we can't get the interlock */ + if (!mtx_trylock(&vp->v_interlock)) + return (EWOULDBLOCK); + + /* We should be able to immediately acquire this */ + /* XXX This looks like it should panic if it fails */ + if (vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE, td) != 0) + return (EWOULDBLOCK); + /* + * Don't recycle if we still have cached pages. + */ + if (VOP_GETVOBJECT(vp, &object) == 0 && + (object->resident_page_count || + object->ref_count)) { + VOP_UNLOCK(vp, 0, td); + error = EBUSY; + goto done; + } + if (LIST_FIRST(&vp->v_cache_src)) { + /* + * note: nameileafonly sysctl is temporary, + * for debugging only, and will eventually be + * removed. + */ + if (nameileafonly > 0) { + /* + * Do not reuse namei-cached directory + * vnodes that have cached + * subdirectories. + */ + if (cache_leaf_test(vp) < 0) { + error = EISDIR; + goto done; + } + } else if (nameileafonly < 0 || + vmiodirenable == 0) { + /* + * Do not reuse namei-cached directory + * vnodes if nameileafonly is -1 or + * if VMIO backing for directories is + * turned off (otherwise we reuse them + * too quickly). + */ + error = EBUSY; + goto done; + } + } + return (0); +done: + VOP_UNLOCK(vp, 0, td); + return (error); +} + /* * Return the next vnode from the free list. */ @@ -673,7 +739,6 @@ getnewvnode(tag, mp, vops, vpp) struct thread *td = curthread; /* XXX */ struct vnode *vp = NULL; struct mount *vnmp; - vm_object_t object; s = splbio(); /* @@ -696,84 +761,47 @@ getnewvnode(tag, mp, vops, vpp) mtx_lock(&vnode_free_list_mtx); if (freevnodes >= wantfreevnodes && numvnodes >= minvnodes) { + int error; int count; for (count = 0; count < freevnodes; count++) { vp = TAILQ_FIRST(&vnode_free_list); - if (vp == NULL || vp->v_usecount) - panic("getnewvnode: free vnode isn't"); + + KASSERT(vp->v_usecount == 0, + ("getnewvnode: free vnode isn't")); + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); - - /* Don't recycle if we can't get the interlock */ - if (!mtx_trylock(&vp->v_interlock)) { - vp = NULL; - continue; - } - - /* We should be able to immediately acquire this */ - if (vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE, td) != 0) - continue; /* - * Don't recycle if we still have cached pages. + * We have to drop the free list mtx to avoid lock + * order reversals with interlock. */ - if (VOP_GETVOBJECT(vp, &object) == 0 && - (object->resident_page_count || - object->ref_count)) { - TAILQ_INSERT_TAIL(&vnode_free_list, vp, - v_freelist); - VOP_UNLOCK(vp, 0, td); - vp = NULL; - continue; - } - if (LIST_FIRST(&vp->v_cache_src)) { - /* - * note: nameileafonly sysctl is temporary, - * for debugging only, and will eventually be - * removed. - */ - if (nameileafonly > 0) { - /* - * Do not reuse namei-cached directory - * vnodes that have cached - * subdirectories. - */ - if (cache_leaf_test(vp) < 0) { - VOP_UNLOCK(vp, 0, td); - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - vp = NULL; - continue; - } - } else if (nameileafonly < 0 || - vmiodirenable == 0) { - /* - * Do not reuse namei-cached directory - * vnodes if nameileafonly is -1 or - * if VMIO backing for directories is - * turned off (otherwise we reuse them - * too quickly). - */ - VOP_UNLOCK(vp, 0, td); - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - vp = NULL; - continue; - } - } + mtx_unlock(&vnode_free_list_mtx); + error = vcanrecycle(vp); /* * Skip over it if its filesystem is being suspended. */ - if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0) + if (error == 0 && + vn_start_write(vp, &vnmp, V_NOWAIT) != 0) + error = EBUSY; + + mtx_lock(&vnode_free_list_mtx); + if (error != 0) + TAILQ_INSERT_TAIL(&vnode_free_list, vp, + v_freelist); + else break; - VOP_UNLOCK(vp, 0, td); - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); - vp = NULL; } } + /* + * Unlocked access to this vp is ok because we are assured that there + * are no other references to it. + */ if (vp) { - mp_fixme("Unlocked v_iflags access.\n"); - vp->v_iflag |= VI_DOOMED; - vp->v_iflag &= ~VI_FREE; freevnodes--; mtx_unlock(&vnode_free_list_mtx); + + vp->v_iflag |= VI_DOOMED; + vp->v_iflag &= ~VI_FREE; cache_purge(vp); if (vp->v_type != VBAD) { VOP_UNLOCK(vp, 0, td); @@ -785,16 +813,14 @@ getnewvnode(tag, mp, vops, vpp) #ifdef INVARIANTS { - int s; - if (vp->v_data) panic("cleaned vnode isn't"); - s = splbio(); + VI_LOCK(vp); if (vp->v_numoutput) panic("Clean vnode has pending I/O's"); - splx(s); if (vp->v_writecount != 0) panic("Non-zero write count"); + VI_UNLOCK(vp); } #endif if (vp->v_pollinfo) { @@ -815,15 +841,15 @@ getnewvnode(tag, mp, vops, vpp) KASSERT(vp->v_cleanblkroot == NULL, ("cleanblkroot not NULL")); KASSERT(vp->v_dirtyblkroot == NULL, ("dirtyblkroot not NULL")); } else { + numvnodes++; mtx_unlock(&vnode_free_list_mtx); - vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK); - bzero((char *) vp, sizeof *vp); + + vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO); mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF); vp->v_dd = vp; cache_purge(vp); LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); - numvnodes++; } TAILQ_INIT(&vp->v_cleanblkhd);