MFp4: Fix two bugs causing possible deadlocks or panics, and one nit:

- Emulate lock draining (LK_DRAIN) in null_lock() to avoid deadlocks when the vnode is being recycled. - Don't allow null_nodeget() to return a nullfs vnode from the wrong mount when multiple nullfs's are mounted. It's unclear why these checks were removed in null_subr.c 1.35, but they are definitely necessary. Without the checks, trying to unmount a nullfs mount will erroneously return EBUSY, and forcibly unmounting with -f will cause a panic. - Bump LOG2_SIZEVNODE up to 8, since vnodes are >256 bytes now. The old value (7) didn't cause any problems, but made the hash algorithm suboptimal. These changes fix nullfs enough that a parallel buildworld succeeds. Submitted by: tegge (partially; LK_DRAIN) Tested by: kris
2003-06-17 08:52:45 +00:00 · 2003-06-17 08:52:45 +00:00 · 35c71928a0
commit 35c71928a0
parent b73ccc7a1b
3 changed files with 95 additions and 13 deletions
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@ -51,6 +51,8 @@ struct null_node {
 	LIST_ENTRY(null_node)	null_hash;	/* Hash list */
 	struct vnode	        *null_lowervp;	/* VREFed once */
 	struct vnode		*null_vnode;	/* Back pointer */
+	int			null_pending_locks;
+	int			null_drain_wakeup;
 };

 #define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@ -50,7 +50,7 @@

 #include <fs/nullfs/null.h>

-#define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
+#define LOG2_SIZEVNODE 8		/* log2(sizeof struct vnode) */
 #define	NNULLNODECACHE 16

 /*
@ -71,8 +71,8 @@ struct mtx null_hashmtx;
 static MALLOC_DEFINE(M_NULLFSHASH, "NULLFS hash", "NULLFS hash table");
 MALLOC_DEFINE(M_NULLFSNODE, "NULLFS node", "NULLFS vnode private part");

-static struct vnode * null_hashget(struct vnode *);
-static struct vnode * null_hashins(struct null_node *);
+static struct vnode * null_hashget(struct mount *, struct vnode *);
+static struct vnode * null_hashins(struct mount *, struct null_node *);

 /*
 * Initialise cache headers
@ -103,7 +103,8 @@ nullfs_uninit(vfsp)
 * Lower vnode should be locked on entry and will be left locked on exit.
 */
 static struct vnode *
-null_hashget(lowervp)
+null_hashget(mp, lowervp)
+	struct mount *mp;
 	struct vnode *lowervp;
 {
 	struct thread *td = curthread;	/* XXX */
@ -121,9 +122,20 @@ null_hashget(lowervp)
 loop:
 	mtx_lock(&null_hashmtx);
 	LIST_FOREACH(a, hd, null_hash) {
-		if (a->null_lowervp == lowervp) {
+		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
 			vp = NULLTOV(a);
 			mtx_lock(&vp->v_interlock);
+			/*
+			 * Don't block if nullfs vnode is being recycled.
+			 * We already hold a lock on the lower vnode, thus
+			 * waiting might deadlock against the thread
+			 * recycling the nullfs vnode or another thread
+			 * in vrele() waiting for the vnode lock.
+			 */
+			if ((vp->v_iflag & VI_XLOCK) != 0) {
+				VI_UNLOCK(vp);
+				continue;
+			}
 			mtx_unlock(&null_hashmtx);
 			/*
 			 * We need vget for the VXLOCK
@ -145,7 +157,8 @@ null_hashget(lowervp)
 * node found.
 */
 static struct vnode *
-null_hashins(xp)
+null_hashins(mp, xp)
+	struct mount *mp;
 	struct null_node *xp;
 {
 	struct thread *td = curthread;	/* XXX */
@ -157,9 +170,21 @@ null_hashins(xp)
 loop:
 	mtx_lock(&null_hashmtx);
 	LIST_FOREACH(oxp, hd, null_hash) {
-		if (oxp->null_lowervp == xp->null_lowervp) {
+		if (oxp->null_lowervp == xp->null_lowervp &&
+		    NULLTOV(oxp)->v_mount == mp) {
 			ovp = NULLTOV(oxp);
 			mtx_lock(&ovp->v_interlock);
+			/*
+			 * Don't block if nullfs vnode is being recycled.
+			 * We already hold a lock on the lower vnode, thus
+			 * waiting might deadlock against the thread
+			 * recycling the nullfs vnode or another thread
+			 * in vrele() waiting for the vnode lock.
+			 */
+			if ((ovp->v_iflag & VI_XLOCK) != 0) {
+				VI_UNLOCK(ovp);
+				continue;
+			}
 			mtx_unlock(&null_hashmtx);
 			if (vget(ovp, LK_EXCLUSIVE | LK_THISLAYER | LK_INTERLOCK, td))
 				goto loop;
@ -192,7 +217,7 @@ null_nodeget(mp, lowervp, vpp)
 	int error;

 	/* Lookup the hash firstly */
-	*vpp = null_hashget(lowervp);
+	*vpp = null_hashget(mp, lowervp);
 	if (*vpp != NULL) {
 		vrele(lowervp);
 		return (0);
@ -222,6 +247,8 @@ null_nodeget(mp, lowervp, vpp)

 	xp->null_vnode = vp;
 	xp->null_lowervp = lowervp;
+	xp->null_pending_locks = 0;
+	xp->null_drain_wakeup = 0;

 	vp->v_type = lowervp->v_type;
 	vp->v_data = xp;
@ -244,7 +271,7 @@ null_nodeget(mp, lowervp, vpp)
 	 * Atomically insert our new node into the hash or vget existing 
 	 * if someone else has beaten us to it.
 	 */
-	*vpp = null_hashins(xp);
+	*vpp = null_hashins(mp, xp);
 	if (*vpp != NULL) {
 		vrele(lowervp);
 		VOP_UNLOCK(vp, LK_THISLAYER, td);
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@ -592,6 +592,7 @@ null_lock(ap)
 	struct thread *td = ap->a_td;
 	struct vnode *lvp;
 	int error;
+	struct null_node *nn;

 	if (flags & LK_THISLAYER) {
 		if (vp->v_vnlock != NULL) {
@ -614,13 +615,65 @@ null_lock(ap)
 		 * going away doesn't mean the struct lock below us is.
 		 * LK_EXCLUSIVE is fine.
 		 */
+		if ((flags & LK_INTERLOCK) == 0) {
+			VI_LOCK(vp);
+			flags |= LK_INTERLOCK;
+		}
+		nn = VTONULL(vp);
 		if ((flags & LK_TYPE_MASK) == LK_DRAIN) {
 			NULLFSDEBUG("null_lock: avoiding LK_DRAIN\n");
-			return(lockmgr(vp->v_vnlock,
-				(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
-				&vp->v_interlock, td));
+			/*
+			 * Emulate lock draining by waiting for all other
+			 * pending locks to complete.  Afterwards the
+			 * lockmgr call might block, but no other threads
+			 * will attempt to use this nullfs vnode due to the
+			 * VI_XLOCK flag.
+			 */
+			while (nn->null_pending_locks > 0) {
+				nn->null_drain_wakeup = 1;
+				msleep(&nn->null_pending_locks,
+				       VI_MTX(vp),
+				       PVFS,
+				       "nuldr", 0);
 			}
-		return(lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td));
+			error = lockmgr(vp->v_vnlock,
+					(flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE,
+					VI_MTX(vp), td);
+			return error;
+		}
+		nn->null_pending_locks++;
+		error = lockmgr(vp->v_vnlock, flags, &vp->v_interlock, td);
+		VI_LOCK(vp);
+		/*
+		 * If we're called from vrele then v_usecount can have been 0
+		 * and another process might have initiated a recycle 
+		 * operation.  When that happens, just back out.
+		 */
+		if (error == 0 && (vp->v_iflag & VI_XLOCK) != 0 &&
+		    td != vp->v_vxproc) {
+			lockmgr(vp->v_vnlock,
+				(flags & ~LK_TYPE_MASK) | LK_RELEASE,
+				VI_MTX(vp), td);
+			VI_LOCK(vp);
+			error = ENOENT;
+		}
+		nn->null_pending_locks--;
+		/*
+		 * Wakeup the process draining the vnode after all
+		 * pending lock attempts has been failed.
+		 */
+		if (nn->null_pending_locks == 0 &&
+		    nn->null_drain_wakeup != 0) {
+			nn->null_drain_wakeup = 0;
+			wakeup(&nn->null_pending_locks);
+		}
+		if (error == ENOENT && (vp->v_iflag & VI_XLOCK) != 0 &&
+		    vp->v_vxproc != curthread) {
+			vp->v_iflag |= VI_XWANT;
+			msleep(vp, VI_MTX(vp), PINOD, "nulbo", 0);
+		}
+		VI_UNLOCK(vp);
+		return error;
 	} else {
 		/*
 		 * To prevent race conditions involving doing a lookup