Fixes up the handling of shared vnode lock lookups in the NFS client,
adds a FS type specific flag indicating that the FS supports shared vnode lock lookups, adds some logic in vfs_lookup.c to test this flag and set lock flags appropriately. - amd on 6.x is a non-starter (without this change). Using amd under heavy load results in a deadlock (with cascading vnode locks all the way to the root) very quickly. - This change should also fix the more general problem of cascading vnode deadlocks when an NFS server goes down. Ideally, we wouldn't need these changes, as enabling shared vnode lock lookups globally would work. Unfortunately, UFS, for example isn't ready for shared vnode lock lookups, crashing pretty quickly. This change is the result of discussions with Stephan Uphoff (ups@). Reviewed by: ups@
This commit is contained in:
parent
cec65ede6c
commit
7d7d9e2242
@ -303,6 +303,16 @@ namei(struct nameidata *ndp)
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
compute_cn_lkflags(struct mount *mp, int lkflags)
|
||||
{
|
||||
if ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED)) {
|
||||
lkflags &= ~LK_SHARED;
|
||||
lkflags |= LK_EXCLUSIVE;
|
||||
}
|
||||
return lkflags;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search a pathname.
|
||||
* This is a very central and rather complicated routine.
|
||||
@ -359,6 +369,7 @@ lookup(struct nameidata *ndp)
|
||||
int vfslocked; /* VFS Giant state for child */
|
||||
int dvfslocked; /* VFS Giant state for parent */
|
||||
int tvfslocked;
|
||||
int lkflags_save;
|
||||
|
||||
/*
|
||||
* Setup: break out flag bits into variables.
|
||||
@ -387,7 +398,7 @@ lookup(struct nameidata *ndp)
|
||||
cnp->cn_lkflags = LK_EXCLUSIVE;
|
||||
dp = ndp->ni_startdir;
|
||||
ndp->ni_startdir = NULLVP;
|
||||
vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td);
|
||||
vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td);
|
||||
|
||||
dirloop:
|
||||
/*
|
||||
@ -524,7 +535,7 @@ lookup(struct nameidata *ndp)
|
||||
VREF(dp);
|
||||
vput(tdp);
|
||||
VFS_UNLOCK_GIANT(tvfslocked);
|
||||
vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td);
|
||||
vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td);
|
||||
}
|
||||
}
|
||||
|
||||
@ -560,7 +571,10 @@ lookup(struct nameidata *ndp)
|
||||
#ifdef NAMEI_DIAGNOSTIC
|
||||
vprint("lookup in", dp);
|
||||
#endif
|
||||
lkflags_save = cnp->cn_lkflags;
|
||||
cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags);
|
||||
if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) {
|
||||
cnp->cn_lkflags = lkflags_save;
|
||||
KASSERT(ndp->ni_vp == NULL, ("leaf should be empty"));
|
||||
#ifdef NAMEI_DIAGNOSTIC
|
||||
printf("not found\n");
|
||||
@ -575,7 +589,7 @@ lookup(struct nameidata *ndp)
|
||||
VREF(dp);
|
||||
vput(tdp);
|
||||
VFS_UNLOCK_GIANT(tvfslocked);
|
||||
vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td);
|
||||
vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td);
|
||||
goto unionlookup;
|
||||
}
|
||||
|
||||
@ -612,7 +626,8 @@ lookup(struct nameidata *ndp)
|
||||
VREF(ndp->ni_startdir);
|
||||
}
|
||||
goto success;
|
||||
}
|
||||
} else
|
||||
cnp->cn_lkflags = lkflags_save;
|
||||
#ifdef NAMEI_DIAGNOSTIC
|
||||
printf("found\n");
|
||||
#endif
|
||||
@ -643,9 +658,9 @@ lookup(struct nameidata *ndp)
|
||||
vfslocked = VFS_LOCK_GIANT(mp);
|
||||
if (dp != ndp->ni_dvp)
|
||||
VOP_UNLOCK(ndp->ni_dvp, 0, td);
|
||||
error = VFS_ROOT(mp, cnp->cn_lkflags, &tdp, td);
|
||||
error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), &tdp, td);
|
||||
vfs_unbusy(mp, td);
|
||||
vn_lock(ndp->ni_dvp, cnp->cn_lkflags | LK_RETRY, td);
|
||||
vn_lock(ndp->ni_dvp, compute_cn_lkflags(mp, cnp->cn_lkflags | LK_RETRY), td);
|
||||
if (error) {
|
||||
dpunlocked = 1;
|
||||
goto bad2;
|
||||
@ -859,6 +874,7 @@ relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
|
||||
*/
|
||||
return (0);
|
||||
}
|
||||
|
||||
dp = *vpp;
|
||||
|
||||
/*
|
||||
|
@ -200,7 +200,7 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
|
||||
#ifndef nolint
|
||||
sfp = NULL;
|
||||
#endif
|
||||
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
|
||||
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
|
||||
if (error)
|
||||
return (error);
|
||||
vp = NFSTOV(np);
|
||||
@ -724,7 +724,7 @@ nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
|
||||
int error;
|
||||
|
||||
nmp = VFSTONFS(mp);
|
||||
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
|
||||
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
|
||||
if (error)
|
||||
return (error);
|
||||
vp = NFSTOV(np);
|
||||
|
@ -497,7 +497,7 @@ nfs4_openrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
|
||||
if (vp == NULL) {
|
||||
/* New file */
|
||||
error = nfs_nget(dvp->v_mount, &getfh.fh_val,
|
||||
getfh.fh_len, &np);
|
||||
getfh.fh_len, &np, LK_EXCLUSIVE);
|
||||
if (error != 0)
|
||||
goto nfsmout;
|
||||
|
||||
@ -1031,7 +1031,7 @@ nfs4_lookup(struct vop_lookup_args *ap)
|
||||
if (NFS_CMPFH(np, fhp, fhsize))
|
||||
return (EISDIR);
|
||||
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
@ -1047,7 +1047,7 @@ nfs4_lookup(struct vop_lookup_args *ap)
|
||||
if (flags & ISDOTDOT) {
|
||||
VOP_UNLOCK(dvp, 0, td);
|
||||
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
|
||||
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
|
||||
if (error)
|
||||
return (error);
|
||||
@ -1058,7 +1058,7 @@ nfs4_lookup(struct vop_lookup_args *ap)
|
||||
VREF(dvp);
|
||||
newvp = dvp;
|
||||
} else {
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
|
||||
if (error)
|
||||
return (error);
|
||||
newvp = NFSTOV(np);
|
||||
@ -1431,7 +1431,7 @@ nfs4_createrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
|
||||
nfsm_v4dissect_getattr(&cp, &ga);
|
||||
nfsm_v4dissect_getfh(&cp, &gfh);
|
||||
|
||||
error = nfs_nget(dvp->v_mount, &gfh.fh_val, gfh.fh_len, &np);
|
||||
error = nfs_nget(dvp->v_mount, &gfh.fh_val, gfh.fh_len, &np, LK_EXCLUSIVE);
|
||||
if (error != 0)
|
||||
goto nfsmout;
|
||||
|
||||
@ -2336,7 +2336,7 @@ nfs4_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
|
||||
VREF(dvp);
|
||||
newvp = dvp;
|
||||
} else {
|
||||
error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
|
||||
error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
|
||||
if (error) {
|
||||
m_freem(mrep);
|
||||
return (error);
|
||||
|
@ -99,7 +99,7 @@ nfs_vncmpf(struct vnode *vp, void *arg)
|
||||
* nfsnode structure is returned.
|
||||
*/
|
||||
int
|
||||
nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
|
||||
nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp, int flags)
|
||||
{
|
||||
struct thread *td = curthread; /* XXX */
|
||||
struct nfsnode *np;
|
||||
@ -117,7 +117,7 @@ nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
|
||||
ncmp.fhsize = fhsize;
|
||||
ncmp.fh = fhp;
|
||||
|
||||
error = vfs_hash_get(mntp, hash, LK_EXCLUSIVE,
|
||||
error = vfs_hash_get(mntp, hash, flags,
|
||||
td, &nvp, nfs_vncmpf, &ncmp);
|
||||
if (error)
|
||||
return (error);
|
||||
@ -153,7 +153,7 @@ nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
|
||||
*/
|
||||
vp->v_vnlock->lk_flags |= LK_CANRECURSE;
|
||||
vp->v_vnlock->lk_flags &= ~LK_NOSHARE;
|
||||
error = vfs_hash_insert(vp, hash, LK_EXCLUSIVE,
|
||||
error = vfs_hash_insert(vp, hash, flags,
|
||||
td, &nvp, nfs_vncmpf, &ncmp);
|
||||
if (error)
|
||||
return (error);
|
||||
|
@ -930,7 +930,7 @@ nfsm_mtofh_xx(struct vnode *d, struct vnode **v, int v3, int *f,
|
||||
t1 = nfsm_getfh_xx(&ttfhp, &ttfhsize, (v3), md, dpos);
|
||||
if (t1 != 0)
|
||||
return t1;
|
||||
t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp);
|
||||
t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp, LK_EXCLUSIVE);
|
||||
if (t1 != 0)
|
||||
return t1;
|
||||
*v = NFSTOV(ttnp);
|
||||
|
@ -254,7 +254,7 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
|
||||
error = vfs_busy(mp, LK_NOWAIT, NULL, td);
|
||||
if (error)
|
||||
return (error);
|
||||
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
|
||||
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
|
||||
if (error) {
|
||||
vfs_unbusy(mp, td);
|
||||
return (error);
|
||||
@ -785,7 +785,7 @@ nfs_mount(struct mount *mp, struct thread *td)
|
||||
error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred);
|
||||
out:
|
||||
if (!error)
|
||||
mp->mnt_kern_flag |= MNTK_MPSAFE;
|
||||
mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -913,7 +913,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
|
||||
* this problem, because one can identify root inodes by their
|
||||
* number == ROOTINO (2).
|
||||
*/
|
||||
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
|
||||
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
|
||||
if (error)
|
||||
goto bad;
|
||||
*vpp = NFSTOV(np);
|
||||
@ -995,7 +995,7 @@ nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
|
||||
int error;
|
||||
|
||||
nmp = VFSTONFS(mp);
|
||||
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
|
||||
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
|
||||
if (error)
|
||||
return error;
|
||||
vp = NFSTOV(np);
|
||||
|
@ -899,7 +899,7 @@ nfs_lookup(struct vop_lookup_args *ap)
|
||||
m_freem(mrep);
|
||||
return (EISDIR);
|
||||
}
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
|
||||
if (error) {
|
||||
m_freem(mrep);
|
||||
return (error);
|
||||
@ -918,7 +918,7 @@ nfs_lookup(struct vop_lookup_args *ap)
|
||||
|
||||
if (flags & ISDOTDOT) {
|
||||
VOP_UNLOCK(dvp, 0, td);
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
|
||||
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
|
||||
if (error)
|
||||
return (error);
|
||||
@ -927,7 +927,7 @@ nfs_lookup(struct vop_lookup_args *ap)
|
||||
VREF(dvp);
|
||||
newvp = dvp;
|
||||
} else {
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
|
||||
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
|
||||
if (error) {
|
||||
m_freem(mrep);
|
||||
return (error);
|
||||
@ -2410,7 +2410,7 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
|
||||
np = dnp;
|
||||
} else {
|
||||
error = nfs_nget(vp->v_mount, fhp,
|
||||
fhsize, &np);
|
||||
fhsize, &np, LK_EXCLUSIVE);
|
||||
if (error)
|
||||
doit = 0;
|
||||
else
|
||||
@ -2604,7 +2604,7 @@ nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
|
||||
VREF(dvp);
|
||||
newvp = dvp;
|
||||
} else {
|
||||
error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
|
||||
error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
|
||||
if (error) {
|
||||
m_freem(mrep);
|
||||
return (error);
|
||||
|
@ -189,7 +189,7 @@ int nfs_reclaim(struct vop_reclaim_args *);
|
||||
/* other stuff */
|
||||
int nfs_removeit(struct sillyrename *);
|
||||
int nfs4_removeit(struct sillyrename *);
|
||||
int nfs_nget(struct mount *, nfsfh_t *, int, struct nfsnode **);
|
||||
int nfs_nget(struct mount *, nfsfh_t *, int, struct nfsnode **, int flags);
|
||||
nfsuint64 *nfs_getcookie(struct nfsnode *, off_t, int);
|
||||
uint64_t *nfs4_getcookie(struct nfsnode *, off_t, int);
|
||||
void nfs_invaldir(struct vnode *);
|
||||
|
@ -310,6 +310,7 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
|
||||
#define MNTK_SUSPENDED 0x10000000 /* write operations are suspended */
|
||||
#define MNTK_MPSAFE 0x20000000 /* Filesystem is MPSAFE. */
|
||||
#define MNTK_NOKNOTE 0x80000000 /* Don't send KNOTEs from VOP hooks */
|
||||
#define MNTK_LOOKUP_SHARED 0x40000000 /* FS supports shared lock lookups */
|
||||
|
||||
/*
|
||||
* Sysctl CTL_VFS definitions.
|
||||
|
Loading…
Reference in New Issue
Block a user