- Remove vx_lock, vx_unlock, vx_wait, etc.

- Add a vn_start_write/vn_finished_write around vlrureclaim so we don't do
   writing ops without suspending.  This could suspend the vlruproc which
   should not be a problem under normal circumstances.
 - Manually implement VMIGHTFREE in vlrureclaim as this was the only instance
   where it was used.
 - Acquire a lock before calling vgone() as it now requires it.
 - Move the acquisition of the vnode interlock from vtryrecycle() to
   getnewvnode() so that if it fails we don't drop and reacquire the
   vnode_free_list_mtx.
 - Check for a usecount or holdcount at the end of vtryrecycle() in case
   someone grabbed a ref while we were recycling.  Abort the recycle, and
   on the final ref drop this vnode will be placed on the head of the free
   list.
 - Move the redundant VOP_INACTIVE protection code into the local
   vinactive() routine to avoid code bloat.
 - Keep the vnode lock held across calls to vgone() in several places.
 - vgonel() no longer uses XLOCK, instead callers must hold an exclusive
   vnode lock.  The VI_DOOMED flag is set to allow other threads to detect
   a vnode which is no longer valid.  This flag is set until the last
   reference is gone, and there are no chances for a new ref.  vgonel()
   holds this lock across the entire function, which greatly simplifies
   logic.
 _ Only vfree() in one place in vgone() not three.
 - Adjust vget() to check the VI_DOOMED flag prior to waiting on the lock
   in the LK_NOWAIT case.  In other cases, check after we have slept and
   acquired an exlusive lock.  This will simulate the old vx_wait()
   behavior.

Sponsored by:	Isilon Systems, Inc.
This commit is contained in:
jeff 2005-03-13 11:54:28 +00:00
parent 7b0b6888cc
commit d29b61a365

View File

@ -88,11 +88,10 @@ static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo,
int slpflag, int slptimeo);
static void syncer_shutdown(void *arg, int howto);
static int vtryrecycle(struct vnode *vp);
static void vx_lock(struct vnode *vp);
static void vx_unlock(struct vnode *vp);
static void vbusy(struct vnode *vp);
static void vdropl(struct vnode *vp);
static void vholdl(struct vnode *);
static void vinactive(struct vnode *, struct thread *);
/*
* Enable Giant pushdown based on whether or not the vm is mpsafe in this
@ -545,6 +544,7 @@ vlrureclaim(struct mount *mp)
trigger = cnt.v_page_count * 2 / usevnodes;
done = 0;
vn_start_write(NULL, &mp, V_WAIT);
MNT_ILOCK(mp);
count = mp->mnt_nvnodelistsize / 10 + 1;
while (count && (vp = TAILQ_FIRST(&mp->mnt_nvnodelist)) != NULL) {
@ -554,11 +554,19 @@ vlrureclaim(struct mount *mp)
if (vp->v_type != VNON &&
vp->v_type != VBAD &&
VI_TRYLOCK(vp)) {
if (VMIGHTFREE(vp) && /* critical path opt */
/* critical path opt */
if (LIST_EMPTY(&(vp)->v_cache_src) &&
!(vp)->v_usecount &&
(vp->v_object == NULL ||
vp->v_object->resident_page_count < trigger)) {
struct thread *td;
td = curthread;
MNT_IUNLOCK(mp);
vgonel(vp, curthread);
VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE, td);
if ((vp->v_iflag & VI_DOOMED) == 0)
vgone(vp);
VOP_UNLOCK(vp, 0, td);
done++;
MNT_ILOCK(mp);
} else
@ -567,6 +575,7 @@ vlrureclaim(struct mount *mp)
--count;
}
MNT_IUNLOCK(mp);
vn_finished_write(mp);
return done;
}
@ -636,7 +645,6 @@ static struct kproc_desc vnlru_kp = {
};
SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp)
/*
* Routines having to do with the management of the vnode table.
*/
@ -653,18 +661,13 @@ vtryrecycle(struct vnode *vp)
struct mount *vnmp;
int error;
/* Don't recycle if we can't get the interlock */
if (!VI_TRYLOCK(vp))
return (EWOULDBLOCK);
if (!VCANRECYCLE(vp)) {
VI_UNLOCK(vp);
return (EBUSY);
}
ASSERT_VI_LOCKED(vp, "vtryrecycle");
error = 0;
/*
* This vnode may found and locked via some other list, if so we
* can't recycle it yet.
*/
if (vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_NOWAIT, td) != 0)
if (VOP_LOCK(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_NOWAIT, td) != 0)
return (EWOULDBLOCK);
/*
* Don't recycle if its filesystem is being suspended.
@ -733,14 +736,18 @@ vtryrecycle(struct vnode *vp)
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
vp->v_iflag &= ~VI_FREE;
mtx_unlock(&vnode_free_list_mtx);
vp->v_iflag |= VI_DOOMED;
if ((vp->v_type != VBAD) || (vp->v_data != NULL)) {
VOP_UNLOCK(vp, 0, td);
if ((vp->v_iflag & VI_DOOMED) == 0) {
vp->v_iflag |= VI_DOOMED;
vgonel(vp, td);
} else
VOP_UNLOCK(vp, LK_INTERLOCK, td);
vn_finished_write(vnmp);
return (0);
VI_LOCK(vp);
}
/*
* If someone ref'd the vnode while we were cleaning, we have to
* free it once the last ref is dropped.
*/
if (vp->v_usecount || vp->v_holdcnt)
error = EBUSY;
VI_UNLOCK(vp);
done:
VOP_UNLOCK(vp, 0, td);
vn_finished_write(vnmp);
@ -788,16 +795,30 @@ getnewvnode(tag, mp, vops, vpp)
int error;
int count;
for (count = 0; count < freevnodes; count++) {
for (count = 0; count < freevnodes; vp = NULL, count++) {
vp = TAILQ_FIRST(&vnode_free_list);
/*
* The list can be modified while the free_list_mtx
* has been dropped and vp could be NULL here.
*/
if (!vp)
break;
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
/*
* Don't recycle if we can't get the interlock.
*/
if (!VI_TRYLOCK(vp))
continue;
if (!VCANRECYCLE(vp)) {
VI_UNLOCK(vp);
continue;
}
mtx_unlock(&vnode_free_list_mtx);
error = vtryrecycle(vp);
mtx_lock(&vnode_free_list_mtx);
if (error == 0)
break;
vp = NULL;
}
}
if (vp) {
@ -1783,49 +1804,52 @@ vget(vp, flags, td)
{
int error;
error = 0;
if ((flags & LK_INTERLOCK) == 0)
VI_LOCK(vp);
/*
* If the vnode is in the process of being cleaned out for
* another use, we wait for the cleaning to finish and then
* return failure. Cleaning is determined by checking that
* the VI_XLOCK flag is set.
* the VI_DOOMED flag is set.
*/
if ((flags & LK_INTERLOCK) == 0)
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK && vp->v_vxthread != curthread) {
if ((flags & LK_NOWAIT) == 0) {
vx_waitl(vp);
VI_UNLOCK(vp);
return (ENOENT);
}
if (vp->v_iflag & VI_DOOMED && vp->v_vxthread != td &&
(flags & LK_NOWAIT)) {
VI_UNLOCK(vp);
return (EBUSY);
}
v_incr_usecount(vp, 1);
if (VSHOULDBUSY(vp))
vbusy(vp);
if (flags & LK_TYPE_MASK) {
if ((error = vn_lock(vp, flags | LK_INTERLOCK, td)) != 0) {
/*
* must expand vrele here because we do not want
* to call VOP_INACTIVE if the reference count
* drops back to zero since it was never really
* active. We must remove it from the free list
* before sleeping so that multiple processes do
* not try to recycle it.
*/
VI_LOCK(vp);
v_incr_usecount(vp, -1);
if (VSHOULDFREE(vp))
vfree(vp);
else
vlruvp(vp);
VI_UNLOCK(vp);
}
if ((flags & LK_TYPE_MASK) == 0) {
if (vp->v_iflag & VI_DOOMED)
error = ENOENT;
VI_UNLOCK(vp);
return (error);
}
VI_UNLOCK(vp);
if ((error = vn_lock(vp, flags | LK_INTERLOCK, td)) != 0) {
/*
* must expand vrele here because we do not want
* to call VOP_INACTIVE if the reference count
* drops back to zero since it was never really
* active. We must remove it from the free list
* before sleeping so that multiple processes do
* not try to recycle it.
*/
VI_LOCK(vp);
v_incr_usecount(vp, -1);
if (VSHOULDFREE(vp))
vfree(vp);
else
vlruvp(vp);
VI_UNLOCK(vp);
return (error);
}
if (vp->v_iflag & VI_DOOMED && vp->v_vxthread != td) {
VOP_UNLOCK(vp, 0, td);
return (ENOENT);
}
return (0);
}
@ -1897,15 +1921,8 @@ vrele(vp)
*/
if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td) == 0) {
VI_LOCK(vp);
VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp,
("vrele: recursed on VI_DOINGINACT"));
vp->v_iflag |= VI_DOINGINACT;
VI_UNLOCK(vp);
VOP_INACTIVE(vp, td);
VI_LOCK(vp);
VNASSERT(vp->v_iflag & VI_DOINGINACT, vp,
("vrele: lost VI_DOINGINACT"));
vp->v_iflag &= ~VI_DOINGINACT;
vinactive(vp, td);
VOP_UNLOCK(vp, 0, td);
} else
VI_LOCK(vp);
if (VSHOULDFREE(vp))
@ -1949,20 +1966,8 @@ vput(vp)
if (vp->v_usecount == 1) {
v_incr_usecount(vp, -1);
/*
* We must call VOP_INACTIVE with the node locked, so
* we just need to release the vnode mutex. Mark as
* as VI_DOINGINACT to avoid recursion.
*/
VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp,
("vput: recursed on VI_DOINGINACT"));
vp->v_iflag |= VI_DOINGINACT;
VI_UNLOCK(vp);
VOP_INACTIVE(vp, td);
VI_LOCK(vp);
VNASSERT(vp->v_iflag & VI_DOINGINACT, vp,
("vput: lost VI_DOINGINACT"));
vp->v_iflag &= ~VI_DOINGINACT;
vinactive(vp, td);
VOP_UNLOCK(vp, 0, td);
if (VSHOULDFREE(vp))
vfree(vp);
else
@ -2024,6 +2029,22 @@ vdropl(struct vnode *vp)
vlruvp(vp);
}
static void
vinactive(struct vnode *vp, struct thread *td)
{
ASSERT_VOP_LOCKED(vp, "vinactive");
ASSERT_VI_LOCKED(vp, "vinactive");
VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp,
("vinactive: recursed on VI_DOINGINACT"));
vp->v_iflag |= VI_DOINGINACT;
VI_UNLOCK(vp);
VOP_INACTIVE(vp, td);
VI_LOCK(vp);
VNASSERT(vp->v_iflag & VI_DOINGINACT, vp,
("vinactive: lost VI_DOINGINACT"));
vp->v_iflag &= ~VI_DOINGINACT;
}
/*
* Remove any vnodes in the vnode table belonging to mount point mp.
*
@ -2109,19 +2130,16 @@ vflush(mp, rootrefs, flags, td)
}
} else
VI_LOCK(vp);
VOP_UNLOCK(vp, 0, td);
/*
* With v_usecount == 0, all we need to do is clear out the
* vnode data structures and we are done.
*/
if (vp->v_usecount == 0) {
vgonel(vp, td);
VOP_UNLOCK(vp, 0, td);
MNT_ILOCK(mp);
continue;
}
/*
* If FORCECLOSE is set, forcibly close the vnode. For block
* or character devices, revert to an anonymous device. For
@ -2131,9 +2149,11 @@ vflush(mp, rootrefs, flags, td)
VNASSERT(vp->v_type != VCHR && vp->v_type != VBLK, vp,
("device VNODE %p is FORCECLOSED", vp));
vgonel(vp, td);
VOP_UNLOCK(vp, 0, td);
MNT_ILOCK(mp);
continue;
}
VOP_UNLOCK(vp, 0, td);
#ifdef DIAGNOSTIC
if (busyprt)
vprint("vflush: busy vnode", vp);
@ -2154,7 +2174,9 @@ vflush(mp, rootrefs, flags, td)
("vflush: usecount %d < rootrefs %d",
rootvp->v_usecount, rootrefs));
if (busy == 1 && rootvp->v_usecount == rootrefs) {
vgonel(rootvp, td);
VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK, td);
vgone(rootvp);
VOP_UNLOCK(rootvp, 0, td);
busy = 0;
} else
VI_UNLOCK(rootvp);
@ -2187,60 +2209,6 @@ vlruvp(struct vnode *vp)
#endif
}
static void
vx_lock(struct vnode *vp)
{
ASSERT_VI_LOCKED(vp, "vx_lock");
/*
* Prevent the vnode from being recycled or brought into use while we
* clean it out.
*/
if (vp->v_iflag & VI_XLOCK)
panic("vx_lock: deadlock");
vp->v_iflag |= VI_XLOCK;
vp->v_vxthread = curthread;
}
static void
vx_unlock(struct vnode *vp)
{
ASSERT_VI_LOCKED(vp, "vx_unlock");
vp->v_iflag &= ~VI_XLOCK;
vp->v_vxthread = NULL;
if (vp->v_iflag & VI_XWANT) {
vp->v_iflag &= ~VI_XWANT;
wakeup(vp);
}
}
int
vx_wait(struct vnode *vp)
{
int locked;
ASSERT_VI_UNLOCKED(vp, "vx_wait");
VI_LOCK(vp);
locked = vx_waitl(vp);
VI_UNLOCK(vp);
return (locked);
}
int
vx_waitl(struct vnode *vp)
{
int locked = 0;
ASSERT_VI_LOCKED(vp, "vx_wait");
while (vp->v_iflag & VI_XLOCK) {
locked = 1;
vp->v_iflag |= VI_XWANT;
msleep(vp, VI_MTX(vp), PINOD, "vxwait", 0);
}
return (locked);
}
/*
* Recycle an unused vnode to the front of the free list.
* Release the passed interlock if the vnode will be recycled.
@ -2249,6 +2217,7 @@ int
vrecycle(struct vnode *vp, struct thread *td)
{
ASSERT_VOP_LOCKED(vp, "vrecycle");
VI_LOCK(vp);
if (vp->v_usecount == 0) {
vgonel(vp, td);
@ -2266,6 +2235,7 @@ void
vgone(struct vnode *vp)
{
struct thread *td = curthread; /* XXX */
ASSERT_VOP_LOCKED(vp, "vgone");
VI_LOCK(vp);
vgonel(vp, td);
@ -2278,18 +2248,10 @@ void
vgonel(struct vnode *vp, struct thread *td)
{
int active;
int doomed;
/*
* If a vgone (or vclean) is already in progress,
* wait until it is done and return.
*/
ASSERT_VOP_LOCKED(vp, "vgonel");
ASSERT_VI_LOCKED(vp, "vgonel");
if (vx_waitl(vp)) {
VI_UNLOCK(vp);
return;
}
vx_lock(vp);
/*
* Check to see if the vnode is in use. If so we have to reference it
@ -2300,13 +2262,13 @@ vgonel(struct vnode *vp, struct thread *td)
v_incr_usecount(vp, 1);
/*
* Even if the count is zero, the VOP_INACTIVE routine may still
* have the object locked while it cleans it out. The VOP_LOCK
* ensures that the VOP_INACTIVE routine is done with its work.
* For active vnodes, it ensures that no other activity can
* occur while the underlying object is being cleaned out.
* See if we're already doomed, if so, this is coming from a
* successful vtryrecycle();
*/
VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, td);
doomed = (vp->v_iflag & VI_DOOMED);
vp->v_iflag |= VI_DOOMED;
vp->v_vxthread = curthread;
VI_UNLOCK(vp);
/*
* Clean out any buffers associated with the vnode.
@ -2317,72 +2279,36 @@ vgonel(struct vnode *vp, struct thread *td)
if (vinvalbuf(vp, V_SAVE, td, 0, 0) != 0)
vinvalbuf(vp, 0, td, 0, 0);
/*
* Any other processes trying to obtain this lock must first
* wait for VXLOCK to clear, then call the new lock operation.
*/
VOP_UNLOCK(vp, 0, td);
/*
* If purging an active vnode, it must be closed and
* deactivated before being reclaimed. Note that the
* VOP_INACTIVE will unlock the vnode.
* deactivated before being reclaimed.
*/
if (active) {
VOP_CLOSE(vp, FNONBLOCK, NOCRED, td);
VI_LOCK(vp);
if ((vp->v_iflag & VI_DOINGINACT) == 0) {
VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp,
("vclean: recursed on VI_DOINGINACT"));
vp->v_iflag |= VI_DOINGINACT;
VI_UNLOCK(vp);
if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT, td) != 0)
panic("vclean: cannot relock.");
VOP_INACTIVE(vp, td);
VI_LOCK(vp);
VNASSERT(vp->v_iflag & VI_DOINGINACT, vp,
("vclean: lost VI_DOINGINACT"));
vp->v_iflag &= ~VI_DOINGINACT;
}
if ((vp->v_iflag & VI_DOINGINACT) == 0)
vinactive(vp, td);
VI_UNLOCK(vp);
}
/*
* Reclaim the vnode.
*/
if (VOP_RECLAIM(vp, td))
panic("vclean: cannot reclaim");
panic("vgone: cannot reclaim");
VNASSERT(vp->v_object == NULL, vp,
("vop_reclaim left v_object vp=%p, tag=%s", vp, vp->v_tag));
if (active) {
/*
* Inline copy of vrele() since VOP_INACTIVE
* has already been called.
*/
VI_LOCK(vp);
v_incr_usecount(vp, -1);
if (vp->v_usecount <= 0) {
#ifdef INVARIANTS
if (vp->v_usecount < 0 || vp->v_writecount != 0) {
vprint("vclean: bad ref count", vp);
panic("vclean: ref cnt");
}
#endif
if (VSHOULDFREE(vp))
vfree(vp);
}
VI_UNLOCK(vp);
}
/*
* Delete from old mount point vnode list.
*/
delmntque(vp);
cache_purge(vp);
VI_LOCK(vp);
if (VSHOULDFREE(vp))
vfree(vp);
if (active) {
v_incr_usecount(vp, -1);
VNASSERT(vp->v_usecount >= 0, vp, ("vgone: bad ref count"));
}
/*
* Done with purge, reset to the standard lock and
* notify sleepers of the grim news.
@ -2390,14 +2316,8 @@ vgonel(struct vnode *vp, struct thread *td)
vp->v_vnlock = &vp->v_lock;
vp->v_op = &dead_vnodeops;
vp->v_tag = "none";
VI_UNLOCK(vp);
/*
* If special device, remove it from special device alias list
* if it is on one.
*/
VI_LOCK(vp);
vp->v_type = VBAD;
vp->v_vxthread = NULL;
/*
* If it is on the freelist and not already at the head,
@ -2409,7 +2329,7 @@ vgonel(struct vnode *vp, struct thread *td)
* incremented first, vgone would (incorrectly) try to
* close the previous instance of the underlying object.
*/
if (vp->v_usecount == 0 && !(vp->v_iflag & VI_DOOMED)) {
if (vp->v_usecount == 0 && vp->v_holdcnt == 0 && !doomed) {
mtx_lock(&vnode_free_list_mtx);
if (vp->v_iflag & VI_FREE) {
TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
@ -2420,9 +2340,6 @@ vgonel(struct vnode *vp, struct thread *td)
TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
mtx_unlock(&vnode_free_list_mtx);
}
vp->v_type = VBAD;
vx_unlock(vp);
VI_UNLOCK(vp);
}
@ -2483,10 +2400,6 @@ vn_printf(struct vnode *vp, const char *fmt, ...)
strcat(buf, "|VV_TEXT");
if (vp->v_vflag & VV_SYSTEM)
strcat(buf, "|VV_SYSTEM");
if (vp->v_iflag & VI_XLOCK)
strcat(buf, "|VI_XLOCK");
if (vp->v_iflag & VI_XWANT)
strcat(buf, "|VI_XWANT");
if (vp->v_iflag & VI_DOOMED)
strcat(buf, "|VI_DOOMED");
if (vp->v_iflag & VI_FREE)
@ -2495,7 +2408,9 @@ vn_printf(struct vnode *vp, const char *fmt, ...)
if (mtx_owned(VI_MTX(vp)))
printf(" VI_LOCKed");
if (vp->v_object != NULL);
printf(" v_object %p\n", vp->v_object);
printf(" v_object %p ref %d pages %d\n",
vp->v_object, vp->v_object->ref_count,
vp->v_object->resident_page_count);
printf(" ");
lockmgr_printinfo(vp->v_vnlock);
printf("\n");
@ -2795,11 +2710,6 @@ vfs_msync(struct mount *mp, int flags)
}
VI_LOCK(vp);
if (vp->v_iflag & VI_XLOCK) {
VI_UNLOCK(vp);
continue;
}
if ((vp->v_iflag & VI_OBJDIRTY) &&
(flags == MNT_WAIT || VOP_ISLOCKED(vp, NULL) == 0)) {
MNT_IUNLOCK(mp);
@ -2844,14 +2754,15 @@ vfree(struct vnode *vp)
ASSERT_VI_LOCKED(vp, "vfree");
mtx_lock(&vnode_free_list_mtx);
VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("vnode already free"));
if (vp->v_iflag & VI_AGE) {
VNASSERT(VSHOULDFREE(vp), vp, ("vfree: freeing when we shouldn't"));
if (vp->v_iflag & (VI_AGE|VI_DOOMED)) {
TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
} else {
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
}
freevnodes++;
mtx_unlock(&vnode_free_list_mtx);
vp->v_iflag &= ~VI_AGE;
vp->v_iflag &= ~(VI_AGE|VI_DOOMED);
vp->v_iflag |= VI_FREE;
}
@ -3061,7 +2972,6 @@ sync_inactive(ap)
} */ *ap;
{
VOP_UNLOCK(ap->a_vp, 0, ap->a_td);
vgone(ap->a_vp);
return (0);
}
@ -3376,8 +3286,7 @@ void
assert_vop_locked(struct vnode *vp, const char *str)
{
if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, NULL) == 0 &&
!((vp->v_iflag & VI_XLOCK) && vp->v_vxthread == curthread))
if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, NULL) == 0)
vfs_badlock("is not locked but should be", str, vp);
}
@ -3515,11 +3424,6 @@ vop_lock_pre(void *ap)
{
struct vop_lock_args *a = ap;
if (a->a_vp->v_iflag & VI_XLOCK &&
a->a_vp->v_vxthread != curthread) {
vprint("vop_lock_pre:", a->a_vp);
panic("vop_lock_pre: locked while xlock held.\n");
}
if ((a->a_flags & LK_INTERLOCK) == 0)
ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK");
else