Do not drop NFS vnode lock when performing consistency checks.

Currently several paths in the NFS client upgrade the shared vnode lock to exclusive, which might cause temporal dropping of the lock. This action appears to be fatal for nullfs mounts over NFS. If the operation is performed over nullfs vnode, then bypassed down to NFS VOP, and the lock is dropped, other thread might reclaim the upper nullfs vnode. Since on reclaim the nullfs vnode lock and NFS vnode lock are split, the original lock state of the nullfs vnode is not restored. As result, VFS operations receive not locked vnode after a VOP call. Stop upgrading the vnode lock when we check the consistency or flush buffers as result of detected inconsistency. Instead, allocate a new lockmgr lock for each NFS node, which is locked exclusive instead of the vnode lock upgrade. In other words, the other parallel modification of the vnode are excluded by either vnode lock conflict or exclusivity of the new lock when the vnode lock is shared. Also revert r316529 because now the vnode cannot be reclaimed during ncl_vinvalbuf(). In collaboration with: pho Reviewed by: rmacklem Reported and tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 2 weeks Differential revision: https://reviews.freebsd.org/D12083
svn path=/head/; revision=322722
2017-08-20 10:08:45 +00:00 · 2017-08-20 10:08:45 +00:00 · e5cffdd34b · 2020-12-20 02:59:44 +00:00
commit e5cffdd34b
parent b59ea73029
6 changed files with 48 additions and 78 deletions
--- a/sys/fs/nfsclient/nfs_clbio.c
+++ b/sys/fs/nfsclient/nfs_clbio.c
@ -365,20 +365,13 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
 	int error = 0;
 	struct vattr vattr;
 	struct nfsnode *np = VTONFS(vp);
-	int old_lock;
+	bool old_lock;

 	/*
-	 * Grab the exclusive lock before checking whether the cache is
-	 * consistent.
-	 * XXX - We can make this cheaper later (by acquiring cheaper locks).
-	 * But for now, this suffices.
+	 * Ensure the exclusove access to the node before checking
+	 * whether the cache is consistent.
 	 */
-	old_lock = ncl_upgrade_vnlock(vp);
-	if (vp->v_iflag & VI_DOOMED) {
-		error = EBADF;
-		goto out;
-	}
-
+	old_lock = ncl_excl_start(vp);
 	mtx_lock(&np->n_mtx);
 	if (np->n_flag & NMODIFIED) {
 		mtx_unlock(&np->n_mtx);
@ -386,9 +379,7 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
 			if (vp->v_type != VDIR)
 				panic("nfs: bioread, not dir");
 			ncl_invaldir(vp);
-			error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
-			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-				error = EBADF;
+			error = ncl_vinvalbuf(vp, V_SAVE | V_ALLOWCLEAN, td, 1);
 			if (error != 0)
 				goto out;
 		}
@ -404,16 +395,14 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
 		mtx_unlock(&np->n_mtx);
 		error = VOP_GETATTR(vp, &vattr, cred);
 		if (error)
-			return (error);
+			goto out;
 		mtx_lock(&np->n_mtx);
 		if ((np->n_flag & NSIZECHANGED)
 		    || (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) {
 			mtx_unlock(&np->n_mtx);
 			if (vp->v_type == VDIR)
 				ncl_invaldir(vp);
-			error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
-			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-				error = EBADF;
+			error = ncl_vinvalbuf(vp, V_SAVE | V_ALLOWCLEAN, td, 1);
 			if (error != 0)
 				goto out;
 			mtx_lock(&np->n_mtx);
@ -423,7 +412,7 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
 		mtx_unlock(&np->n_mtx);
 	}
 out:
-	ncl_downgrade_vnlock(vp, old_lock);
+	ncl_excl_finish(vp, old_lock);
 	return (error);
 }

@ -608,8 +597,6 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
 		    while (error == NFSERR_BAD_COOKIE) {
 			ncl_invaldir(vp);
 			error = ncl_vinvalbuf(vp, 0, td, 1);
-			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-				return (EBADF);

 			/*
 			 * Yuck! The directory has been modified on the
@ -933,8 +920,6 @@ ncl_write(struct vop_write_args *ap)
 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 			error = ncl_vinvalbuf(vp, V_SAVE | ((ioflag &
 			    IO_VMIO) != 0 ? V_VMIO : 0), td, 1);
-			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-				error = EBADF;
 			if (error != 0)
 				return (error);
 		} else
@ -1016,9 +1001,6 @@ ncl_write(struct vop_write_args *ap)
 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 				error = ncl_vinvalbuf(vp, V_SAVE | ((ioflag &
 				    IO_VMIO) != 0 ? V_VMIO : 0), td, 1);
-				if (error == 0 &&
-				    (vp->v_iflag & VI_DOOMED) != 0)
-					error = EBADF;
 				if (error != 0)
 					return (error);
 				wouldcommit = biosize;
@ -1336,7 +1318,7 @@ ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, slpflag, slptimeo;
-	int old_lock = 0;
+	bool old_lock;

 	ASSERT_VOP_LOCKED(vp, "ncl_vinvalbuf");

@ -1352,16 +1334,9 @@ ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
 		slptimeo = 0;
 	}

-	old_lock = ncl_upgrade_vnlock(vp);
-	if (vp->v_iflag & VI_DOOMED) {
-		/*
-		 * Since vgonel() uses the generic vinvalbuf() to flush
-		 * dirty buffers and it does not call this function, it
-		 * is safe to just return OK when VI_DOOMED is set.
-		 */
-		ncl_downgrade_vnlock(vp, old_lock);
-		return (0);
-	}
+	old_lock = ncl_excl_start(vp);
+	if (old_lock)
+		flags |= V_ALLOWCLEAN;

 	/*
 	 * Now, flush as required.
@ -1400,7 +1375,7 @@ ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
 		np->n_flag &= ~NMODIFIED;
 	mtx_unlock(&np->n_mtx);
 out:
-	ncl_downgrade_vnlock(vp, old_lock);
+	ncl_excl_finish(vp, old_lock);
 	return error;
 }

--- a/sys/fs/nfsclient/nfs_clnode.c
+++ b/sys/fs/nfsclient/nfs_clnode.c
@ -141,6 +141,9 @@ ncl_nget(struct mount *mntp, u_int8_t *fhp, int fhsize, struct nfsnode **npp,
 	 * happened to return an error no special casing is needed).
 	 */
 	mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK);
+	lockinit(&np->n_excl, PVFS, "nfsupg", VLKTIMEOUT, LK_NOSHARE |
+	    LK_CANRECURSE);
+
 	/*
 	 * NFS supports recursive and shared locking.
 	 */
@ -167,6 +170,7 @@ ncl_nget(struct mount *mntp, u_int8_t *fhp, int fhsize, struct nfsnode **npp,
 		*npp = NULL;
 		FREE((caddr_t)np->n_fhp, M_NFSFH);
 		mtx_destroy(&np->n_mtx);
+		lockdestroy(&np->n_excl);
 		uma_zfree(newnfsnode_zone, np);
 		return (error);
 	}
@ -332,6 +336,7 @@ ncl_reclaim(struct vop_reclaim_args *ap)
 	if (np->n_v4 != NULL)
 		FREE((caddr_t)np->n_v4, M_NFSV4NODE);
 	mtx_destroy(&np->n_mtx);
+	lockdestroy(&np->n_excl);
 	uma_zfree(newnfsnode_zone, vp->v_data);
 	vp->v_data = NULL;
 	return (0);
--- a/sys/fs/nfsclient/nfs_clport.c
+++ b/sys/fs/nfsclient/nfs_clport.c
@ -230,6 +230,8 @@ nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp,
 	 * happened to return an error no special casing is needed).
 	 */
 	mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK);
+	lockinit(&np->n_excl, PVFS, "nfsupg", VLKTIMEOUT, LK_NOSHARE |
+	    LK_CANRECURSE);

 	/* 
 	 * Are we getting the root? If so, make sure the vnode flags
@ -271,6 +273,7 @@ nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp,
 	if (error != 0) {
 		*npp = NULL;
 		mtx_destroy(&np->n_mtx);
+		lockdestroy(&np->n_excl);
 		FREE((caddr_t)nfhp, M_NFSFH);
 		if (np->n_v4 != NULL)
 			FREE((caddr_t)np->n_v4, M_NFSV4NODE);
--- a/sys/fs/nfsclient/nfs_clsubs.c
+++ b/sys/fs/nfsclient/nfs_clsubs.c
@ -135,30 +135,33 @@ ncl_dircookie_unlock(struct nfsnode *np)
 	mtx_unlock(&np->n_mtx);
 }

-int
-ncl_upgrade_vnlock(struct vnode *vp)
+bool
+ncl_excl_start(struct vnode *vp)
 {
-	int old_lock;
+	struct nfsnode *np;
+	int vn_lk;

-	ASSERT_VOP_LOCKED(vp, "ncl_upgrade_vnlock");
-	old_lock = NFSVOPISLOCKED(vp);
-	if (old_lock != LK_EXCLUSIVE) {
-		KASSERT(old_lock == LK_SHARED,
-		    ("ncl_upgrade_vnlock: wrong old_lock %d", old_lock));
-		/* Upgrade to exclusive lock, this might block */
-		NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
-  	}
-	return (old_lock);
+	ASSERT_VOP_LOCKED(vp, "ncl_excl_start");
+	vn_lk = NFSVOPISLOCKED(vp);
+	if (vn_lk == LK_EXCLUSIVE)
+		return (false);
+	KASSERT(vn_lk == LK_SHARED,
+	    ("ncl_excl_start: wrong vnode lock %d", vn_lk));
+	/* Ensure exclusive access, this might block */
+	np = VTONFS(vp);
+	lockmgr(&np->n_excl, LK_EXCLUSIVE, NULL);
+	return (true);
 }

 void
-ncl_downgrade_vnlock(struct vnode *vp, int old_lock)
+ncl_excl_finish(struct vnode *vp, bool old_lock)
 {
-	if (old_lock != LK_EXCLUSIVE) {
-		KASSERT(old_lock == LK_SHARED, ("wrong old_lock %d", old_lock));
-		/* Downgrade from exclusive lock. */
-		NFSVOPLOCK(vp, LK_DOWNGRADE | LK_RETRY);
-  	}
+	struct nfsnode *np;
+
+	if (!old_lock)
+		return;
+	np = VTONFS(vp);
+	lockmgr(&np->n_excl, LK_RELEASE, NULL);
 }

 #ifdef NFS_ACDEBUG
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@ -520,8 +520,6 @@ nfs_open(struct vop_open_args *ap)
 	if (np->n_flag & NMODIFIED) {
 		mtx_unlock(&np->n_mtx);
 		error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
-		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-			return (EBADF);
 		if (error == EINTR || error == EIO) {
 			if (NFS_ISV4(vp))
 				(void) nfsrpc_close(vp, 0, ap->a_td);
@ -558,8 +556,6 @@ nfs_open(struct vop_open_args *ap)
 				np->n_direofoffset = 0;
 			mtx_unlock(&np->n_mtx);
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
-			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-				return (EBADF);
 			if (error == EINTR || error == EIO) {
 				if (NFS_ISV4(vp))
 					(void) nfsrpc_close(vp, 0, ap->a_td);
@ -580,8 +576,6 @@ nfs_open(struct vop_open_args *ap)
 		if (np->n_directio_opens == 0) {
 			mtx_unlock(&np->n_mtx);
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
-			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-				return (EBADF);
 			if (error) {
 				if (NFS_ISV4(vp))
 					(void) nfsrpc_close(vp, 0, ap->a_td);
@ -722,8 +716,6 @@ nfs_close(struct vop_close_args *ap)
 			}
 		} else {
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
-			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-				return (EBADF);
 		}
 		mtx_lock(&np->n_mtx);
 	    }
@ -942,9 +934,7 @@ nfs_setattr(struct vop_setattr_args *ap)
 			    mtx_unlock(&np->n_mtx);
 			    error = ncl_vinvalbuf(vp, vap->va_size == 0 ?
 			        0 : V_SAVE, td, 1);
-			    if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-				    error = EBADF;
- 			    if (error != 0) {
+			    if (error != 0) {
 				    vnode_pager_setsize(vp, tsize);
 				    return (error);
 			    }
@ -971,8 +961,6 @@ nfs_setattr(struct vop_setattr_args *ap)
 		    (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
 			mtx_unlock(&np->n_mtx);
 			error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
-			if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-				return (EBADF);
 			if (error == EINTR || error == EIO)
 				return (error);
 		} else
@ -1676,9 +1664,7 @@ nfs_remove(struct vop_remove_args *ap)
 		 * unnecessary delayed writes later.
 		 */
 		error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1);
-		if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
-			error = EBADF;
-		else if (error != EINTR && error != EIO)
+		if (error != EINTR && error != EIO)
 			/* Do the rpc */
 			error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
@ -3089,10 +3075,6 @@ nfs_advlock(struct vop_advlock_args *ap)
 			if ((np->n_flag & NMODIFIED) || ret ||
 			    np->n_change != va.va_filerev) {
 				(void) ncl_vinvalbuf(vp, V_SAVE, td, 1);
-				if ((vp->v_iflag & VI_DOOMED) != 0) {
-					NFSVOPUNLOCK(vp, 0);
-					return (EBADF);
-				}
 				np->n_attrstamp = 0;
 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 				ret = VOP_GETATTR(vp, &va, cred);
--- a/sys/fs/nfsclient/nfsnode.h
+++ b/sys/fs/nfsclient/nfsnode.h
@ -92,6 +92,8 @@ struct nfs_accesscache {
 */
 struct nfsnode {
 	struct mtx 		n_mtx;		/* Protects all of these members */
+	struct lock		n_excl;		/* Exclusive helper for shared
+						   vnode lock */
 	u_quad_t		n_size;		/* Current size of file */
 	u_quad_t		n_brev;		/* Modify rev when cached */
 	u_quad_t		n_lrev;		/* Modify rev for lease */
@ -184,8 +186,8 @@ int	ncl_removeit(struct sillyrename *, struct vnode *);
 int	ncl_nget(struct mount *, u_int8_t *, int, struct nfsnode **, int);
 nfsuint64 *ncl_getcookie(struct nfsnode *, off_t, int);
 void	ncl_invaldir(struct vnode *);
-int	ncl_upgrade_vnlock(struct vnode *);
-void	ncl_downgrade_vnlock(struct vnode *, int);
+bool	ncl_excl_start(struct vnode *);
+void	ncl_excl_finish(struct vnode *, bool old_lock);
 void	ncl_dircookie_lock(struct nfsnode *);
 void	ncl_dircookie_unlock(struct nfsnode *);