Do not drop NFS vnode lock when performing consistency checks.

Currently several paths in the NFS client upgrade the shared vnode
lock to exclusive, which might cause temporal dropping of the lock.
This action appears to be fatal for nullfs mounts over NFS. If the
operation is performed over nullfs vnode, then bypassed down to NFS
VOP, and the lock is dropped, other thread might reclaim the upper
nullfs vnode.  Since on reclaim the nullfs vnode lock and NFS vnode
lock are split, the original lock state of the nullfs vnode is not
restored.  As result, VFS operations receive not locked vnode after a
VOP call.

Stop upgrading the vnode lock when we check the consistency or flush
buffers as result of detected inconsistency.  Instead, allocate a new
lockmgr lock for each NFS node, which is locked exclusive instead of
the vnode lock upgrade.  In other words, the other parallel
modification of the vnode are excluded by either vnode lock conflict
or exclusivity of the new lock when the vnode lock is shared.

Also revert r316529 because now the vnode cannot be reclaimed during
ncl_vinvalbuf().

In collaboration with:	pho
Reviewed by:	rmacklem
Reported and tested by:	pho
Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
Differential revision:	https://reviews.freebsd.org/D12083
This commit is contained in:
Konstantin Belousov 2017-08-20 10:08:45 +00:00
parent b59ea73029
commit e5cffdd34b
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=322722
6 changed files with 48 additions and 78 deletions

View File

@ -365,20 +365,13 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
int error = 0;
struct vattr vattr;
struct nfsnode *np = VTONFS(vp);
int old_lock;
bool old_lock;
/*
* Grab the exclusive lock before checking whether the cache is
* consistent.
* XXX - We can make this cheaper later (by acquiring cheaper locks).
* But for now, this suffices.
* Ensure the exclusove access to the node before checking
* whether the cache is consistent.
*/
old_lock = ncl_upgrade_vnlock(vp);
if (vp->v_iflag & VI_DOOMED) {
error = EBADF;
goto out;
}
old_lock = ncl_excl_start(vp);
mtx_lock(&np->n_mtx);
if (np->n_flag & NMODIFIED) {
mtx_unlock(&np->n_mtx);
@ -386,9 +379,7 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
if (vp->v_type != VDIR)
panic("nfs: bioread, not dir");
ncl_invaldir(vp);
error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
error = EBADF;
error = ncl_vinvalbuf(vp, V_SAVE | V_ALLOWCLEAN, td, 1);
if (error != 0)
goto out;
}
@ -404,16 +395,14 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
mtx_unlock(&np->n_mtx);
error = VOP_GETATTR(vp, &vattr, cred);
if (error)
return (error);
goto out;
mtx_lock(&np->n_mtx);
if ((np->n_flag & NSIZECHANGED)
|| (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime))) {
mtx_unlock(&np->n_mtx);
if (vp->v_type == VDIR)
ncl_invaldir(vp);
error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
error = EBADF;
error = ncl_vinvalbuf(vp, V_SAVE | V_ALLOWCLEAN, td, 1);
if (error != 0)
goto out;
mtx_lock(&np->n_mtx);
@ -423,7 +412,7 @@ nfs_bioread_check_cons(struct vnode *vp, struct thread *td, struct ucred *cred)
mtx_unlock(&np->n_mtx);
}
out:
ncl_downgrade_vnlock(vp, old_lock);
ncl_excl_finish(vp, old_lock);
return (error);
}
@ -608,8 +597,6 @@ ncl_bioread(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *cred)
while (error == NFSERR_BAD_COOKIE) {
ncl_invaldir(vp);
error = ncl_vinvalbuf(vp, 0, td, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
return (EBADF);
/*
* Yuck! The directory has been modified on the
@ -933,8 +920,6 @@ ncl_write(struct vop_write_args *ap)
KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
error = ncl_vinvalbuf(vp, V_SAVE | ((ioflag &
IO_VMIO) != 0 ? V_VMIO : 0), td, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
error = EBADF;
if (error != 0)
return (error);
} else
@ -1016,9 +1001,6 @@ ncl_write(struct vop_write_args *ap)
KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
error = ncl_vinvalbuf(vp, V_SAVE | ((ioflag &
IO_VMIO) != 0 ? V_VMIO : 0), td, 1);
if (error == 0 &&
(vp->v_iflag & VI_DOOMED) != 0)
error = EBADF;
if (error != 0)
return (error);
wouldcommit = biosize;
@ -1336,7 +1318,7 @@ ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
struct nfsnode *np = VTONFS(vp);
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
int error = 0, slpflag, slptimeo;
int old_lock = 0;
bool old_lock;
ASSERT_VOP_LOCKED(vp, "ncl_vinvalbuf");
@ -1352,16 +1334,9 @@ ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
slptimeo = 0;
}
old_lock = ncl_upgrade_vnlock(vp);
if (vp->v_iflag & VI_DOOMED) {
/*
* Since vgonel() uses the generic vinvalbuf() to flush
* dirty buffers and it does not call this function, it
* is safe to just return OK when VI_DOOMED is set.
*/
ncl_downgrade_vnlock(vp, old_lock);
return (0);
}
old_lock = ncl_excl_start(vp);
if (old_lock)
flags |= V_ALLOWCLEAN;
/*
* Now, flush as required.
@ -1400,7 +1375,7 @@ ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
np->n_flag &= ~NMODIFIED;
mtx_unlock(&np->n_mtx);
out:
ncl_downgrade_vnlock(vp, old_lock);
ncl_excl_finish(vp, old_lock);
return error;
}

View File

@ -141,6 +141,9 @@ ncl_nget(struct mount *mntp, u_int8_t *fhp, int fhsize, struct nfsnode **npp,
* happened to return an error no special casing is needed).
*/
mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK);
lockinit(&np->n_excl, PVFS, "nfsupg", VLKTIMEOUT, LK_NOSHARE |
LK_CANRECURSE);
/*
* NFS supports recursive and shared locking.
*/
@ -167,6 +170,7 @@ ncl_nget(struct mount *mntp, u_int8_t *fhp, int fhsize, struct nfsnode **npp,
*npp = NULL;
FREE((caddr_t)np->n_fhp, M_NFSFH);
mtx_destroy(&np->n_mtx);
lockdestroy(&np->n_excl);
uma_zfree(newnfsnode_zone, np);
return (error);
}
@ -332,6 +336,7 @@ ncl_reclaim(struct vop_reclaim_args *ap)
if (np->n_v4 != NULL)
FREE((caddr_t)np->n_v4, M_NFSV4NODE);
mtx_destroy(&np->n_mtx);
lockdestroy(&np->n_excl);
uma_zfree(newnfsnode_zone, vp->v_data);
vp->v_data = NULL;
return (0);

View File

@ -230,6 +230,8 @@ nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp,
* happened to return an error no special casing is needed).
*/
mtx_init(&np->n_mtx, "NEWNFSnode lock", NULL, MTX_DEF | MTX_DUPOK);
lockinit(&np->n_excl, PVFS, "nfsupg", VLKTIMEOUT, LK_NOSHARE |
LK_CANRECURSE);
/*
* Are we getting the root? If so, make sure the vnode flags
@ -271,6 +273,7 @@ nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp,
if (error != 0) {
*npp = NULL;
mtx_destroy(&np->n_mtx);
lockdestroy(&np->n_excl);
FREE((caddr_t)nfhp, M_NFSFH);
if (np->n_v4 != NULL)
FREE((caddr_t)np->n_v4, M_NFSV4NODE);

View File

@ -135,30 +135,33 @@ ncl_dircookie_unlock(struct nfsnode *np)
mtx_unlock(&np->n_mtx);
}
int
ncl_upgrade_vnlock(struct vnode *vp)
bool
ncl_excl_start(struct vnode *vp)
{
int old_lock;
struct nfsnode *np;
int vn_lk;
ASSERT_VOP_LOCKED(vp, "ncl_upgrade_vnlock");
old_lock = NFSVOPISLOCKED(vp);
if (old_lock != LK_EXCLUSIVE) {
KASSERT(old_lock == LK_SHARED,
("ncl_upgrade_vnlock: wrong old_lock %d", old_lock));
/* Upgrade to exclusive lock, this might block */
NFSVOPLOCK(vp, LK_UPGRADE | LK_RETRY);
}
return (old_lock);
ASSERT_VOP_LOCKED(vp, "ncl_excl_start");
vn_lk = NFSVOPISLOCKED(vp);
if (vn_lk == LK_EXCLUSIVE)
return (false);
KASSERT(vn_lk == LK_SHARED,
("ncl_excl_start: wrong vnode lock %d", vn_lk));
/* Ensure exclusive access, this might block */
np = VTONFS(vp);
lockmgr(&np->n_excl, LK_EXCLUSIVE, NULL);
return (true);
}
void
ncl_downgrade_vnlock(struct vnode *vp, int old_lock)
ncl_excl_finish(struct vnode *vp, bool old_lock)
{
if (old_lock != LK_EXCLUSIVE) {
KASSERT(old_lock == LK_SHARED, ("wrong old_lock %d", old_lock));
/* Downgrade from exclusive lock. */
NFSVOPLOCK(vp, LK_DOWNGRADE | LK_RETRY);
}
struct nfsnode *np;
if (!old_lock)
return;
np = VTONFS(vp);
lockmgr(&np->n_excl, LK_RELEASE, NULL);
}
#ifdef NFS_ACDEBUG

View File

@ -520,8 +520,6 @@ nfs_open(struct vop_open_args *ap)
if (np->n_flag & NMODIFIED) {
mtx_unlock(&np->n_mtx);
error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
return (EBADF);
if (error == EINTR || error == EIO) {
if (NFS_ISV4(vp))
(void) nfsrpc_close(vp, 0, ap->a_td);
@ -558,8 +556,6 @@ nfs_open(struct vop_open_args *ap)
np->n_direofoffset = 0;
mtx_unlock(&np->n_mtx);
error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
return (EBADF);
if (error == EINTR || error == EIO) {
if (NFS_ISV4(vp))
(void) nfsrpc_close(vp, 0, ap->a_td);
@ -580,8 +576,6 @@ nfs_open(struct vop_open_args *ap)
if (np->n_directio_opens == 0) {
mtx_unlock(&np->n_mtx);
error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
return (EBADF);
if (error) {
if (NFS_ISV4(vp))
(void) nfsrpc_close(vp, 0, ap->a_td);
@ -722,8 +716,6 @@ nfs_close(struct vop_close_args *ap)
}
} else {
error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
return (EBADF);
}
mtx_lock(&np->n_mtx);
}
@ -942,9 +934,7 @@ nfs_setattr(struct vop_setattr_args *ap)
mtx_unlock(&np->n_mtx);
error = ncl_vinvalbuf(vp, vap->va_size == 0 ?
0 : V_SAVE, td, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
error = EBADF;
if (error != 0) {
if (error != 0) {
vnode_pager_setsize(vp, tsize);
return (error);
}
@ -971,8 +961,6 @@ nfs_setattr(struct vop_setattr_args *ap)
(np->n_flag & NMODIFIED) && vp->v_type == VREG) {
mtx_unlock(&np->n_mtx);
error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
return (EBADF);
if (error == EINTR || error == EIO)
return (error);
} else
@ -1676,9 +1664,7 @@ nfs_remove(struct vop_remove_args *ap)
* unnecessary delayed writes later.
*/
error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1);
if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0)
error = EBADF;
else if (error != EINTR && error != EIO)
if (error != EINTR && error != EIO)
/* Do the rpc */
error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
@ -3089,10 +3075,6 @@ nfs_advlock(struct vop_advlock_args *ap)
if ((np->n_flag & NMODIFIED) || ret ||
np->n_change != va.va_filerev) {
(void) ncl_vinvalbuf(vp, V_SAVE, td, 1);
if ((vp->v_iflag & VI_DOOMED) != 0) {
NFSVOPUNLOCK(vp, 0);
return (EBADF);
}
np->n_attrstamp = 0;
KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
ret = VOP_GETATTR(vp, &va, cred);

View File

@ -92,6 +92,8 @@ struct nfs_accesscache {
*/
struct nfsnode {
struct mtx n_mtx; /* Protects all of these members */
struct lock n_excl; /* Exclusive helper for shared
vnode lock */
u_quad_t n_size; /* Current size of file */
u_quad_t n_brev; /* Modify rev when cached */
u_quad_t n_lrev; /* Modify rev for lease */
@ -184,8 +186,8 @@ int ncl_removeit(struct sillyrename *, struct vnode *);
int ncl_nget(struct mount *, u_int8_t *, int, struct nfsnode **, int);
nfsuint64 *ncl_getcookie(struct nfsnode *, off_t, int);
void ncl_invaldir(struct vnode *);
int ncl_upgrade_vnlock(struct vnode *);
void ncl_downgrade_vnlock(struct vnode *, int);
bool ncl_excl_start(struct vnode *);
void ncl_excl_finish(struct vnode *, bool old_lock);
void ncl_dircookie_lock(struct nfsnode *);
void ncl_dircookie_unlock(struct nfsnode *);