Fixes up the handling of shared vnode lock lookups in the NFS client,

adds a FS type specific flag indicating that the FS supports shared
vnode lock lookups, adds some logic in vfs_lookup.c to test this flag
and set lock flags appropriately.

- amd on 6.x is a non-starter (without this change). Using amd under
  heavy load results in a deadlock (with cascading vnode locks all the
  way to the root) very quickly.
- This change should also fix the more general problem of cascading
  vnode deadlocks when an NFS server goes down.

Ideally, we wouldn't need these changes, as enabling shared vnode lock
lookups globally would work. Unfortunately, UFS, for example isn't
ready for shared vnode lock lookups, crashing pretty quickly.

This change is the result of discussions with Stephan Uphoff (ups@).

Reviewed by:	ups@
This commit is contained in:
Mohan Srinivasan 2006-09-13 18:39:09 +00:00
parent cec65ede6c
commit 7d7d9e2242
9 changed files with 46 additions and 29 deletions

View File

@ -303,6 +303,16 @@ namei(struct nameidata *ndp)
return (error);
}
static int
compute_cn_lkflags(struct mount *mp, int lkflags)
{
if ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED)) {
lkflags &= ~LK_SHARED;
lkflags |= LK_EXCLUSIVE;
}
return lkflags;
}
/*
* Search a pathname.
* This is a very central and rather complicated routine.
@ -359,7 +369,8 @@ lookup(struct nameidata *ndp)
int vfslocked; /* VFS Giant state for child */
int dvfslocked; /* VFS Giant state for parent */
int tvfslocked;
int lkflags_save;
/*
* Setup: break out flag bits into variables.
*/
@ -387,7 +398,7 @@ lookup(struct nameidata *ndp)
cnp->cn_lkflags = LK_EXCLUSIVE;
dp = ndp->ni_startdir;
ndp->ni_startdir = NULLVP;
vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td);
vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td);
dirloop:
/*
@ -524,7 +535,7 @@ dirloop:
VREF(dp);
vput(tdp);
VFS_UNLOCK_GIANT(tvfslocked);
vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td);
vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td);
}
}
@ -560,7 +571,10 @@ unionlookup:
#ifdef NAMEI_DIAGNOSTIC
vprint("lookup in", dp);
#endif
lkflags_save = cnp->cn_lkflags;
cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags);
if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) {
cnp->cn_lkflags = lkflags_save;
KASSERT(ndp->ni_vp == NULL, ("leaf should be empty"));
#ifdef NAMEI_DIAGNOSTIC
printf("not found\n");
@ -575,7 +589,7 @@ unionlookup:
VREF(dp);
vput(tdp);
VFS_UNLOCK_GIANT(tvfslocked);
vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td);
vn_lock(dp, compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY), td);
goto unionlookup;
}
@ -612,7 +626,8 @@ unionlookup:
VREF(ndp->ni_startdir);
}
goto success;
}
} else
cnp->cn_lkflags = lkflags_save;
#ifdef NAMEI_DIAGNOSTIC
printf("found\n");
#endif
@ -643,9 +658,9 @@ unionlookup:
vfslocked = VFS_LOCK_GIANT(mp);
if (dp != ndp->ni_dvp)
VOP_UNLOCK(ndp->ni_dvp, 0, td);
error = VFS_ROOT(mp, cnp->cn_lkflags, &tdp, td);
error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), &tdp, td);
vfs_unbusy(mp, td);
vn_lock(ndp->ni_dvp, cnp->cn_lkflags | LK_RETRY, td);
vn_lock(ndp->ni_dvp, compute_cn_lkflags(mp, cnp->cn_lkflags | LK_RETRY), td);
if (error) {
dpunlocked = 1;
goto bad2;
@ -859,6 +874,7 @@ relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
*/
return (0);
}
dp = *vpp;
/*

View File

@ -200,7 +200,7 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
#ifndef nolint
sfp = NULL;
#endif
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
if (error)
return (error);
vp = NFSTOV(np);
@ -724,7 +724,7 @@ nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
int error;
nmp = VFSTONFS(mp);
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
if (error)
return (error);
vp = NFSTOV(np);

View File

@ -497,7 +497,7 @@ nfs4_openrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
if (vp == NULL) {
/* New file */
error = nfs_nget(dvp->v_mount, &getfh.fh_val,
getfh.fh_len, &np);
getfh.fh_len, &np, LK_EXCLUSIVE);
if (error != 0)
goto nfsmout;
@ -1031,7 +1031,7 @@ nfs4_lookup(struct vop_lookup_args *ap)
if (NFS_CMPFH(np, fhp, fhsize))
return (EISDIR);
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
if (error)
return (error);
@ -1047,7 +1047,7 @@ nfs4_lookup(struct vop_lookup_args *ap)
if (flags & ISDOTDOT) {
VOP_UNLOCK(dvp, 0, td);
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
if (error)
return (error);
@ -1058,7 +1058,7 @@ nfs4_lookup(struct vop_lookup_args *ap)
VREF(dvp);
newvp = dvp;
} else {
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
if (error)
return (error);
newvp = NFSTOV(np);
@ -1431,7 +1431,7 @@ nfs4_createrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
nfsm_v4dissect_getattr(&cp, &ga);
nfsm_v4dissect_getfh(&cp, &gfh);
error = nfs_nget(dvp->v_mount, &gfh.fh_val, gfh.fh_len, &np);
error = nfs_nget(dvp->v_mount, &gfh.fh_val, gfh.fh_len, &np, LK_EXCLUSIVE);
if (error != 0)
goto nfsmout;
@ -2336,7 +2336,7 @@ nfs4_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
VREF(dvp);
newvp = dvp;
} else {
error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
if (error) {
m_freem(mrep);
return (error);

View File

@ -99,7 +99,7 @@ nfs_vncmpf(struct vnode *vp, void *arg)
* nfsnode structure is returned.
*/
int
nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp, int flags)
{
struct thread *td = curthread; /* XXX */
struct nfsnode *np;
@ -117,7 +117,7 @@ nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
ncmp.fhsize = fhsize;
ncmp.fh = fhp;
error = vfs_hash_get(mntp, hash, LK_EXCLUSIVE,
error = vfs_hash_get(mntp, hash, flags,
td, &nvp, nfs_vncmpf, &ncmp);
if (error)
return (error);
@ -153,7 +153,7 @@ nfs_nget(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp)
*/
vp->v_vnlock->lk_flags |= LK_CANRECURSE;
vp->v_vnlock->lk_flags &= ~LK_NOSHARE;
error = vfs_hash_insert(vp, hash, LK_EXCLUSIVE,
error = vfs_hash_insert(vp, hash, flags,
td, &nvp, nfs_vncmpf, &ncmp);
if (error)
return (error);

View File

@ -930,7 +930,7 @@ nfsm_mtofh_xx(struct vnode *d, struct vnode **v, int v3, int *f,
t1 = nfsm_getfh_xx(&ttfhp, &ttfhsize, (v3), md, dpos);
if (t1 != 0)
return t1;
t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp);
t1 = nfs_nget(d->v_mount, ttfhp, ttfhsize, &ttnp, LK_EXCLUSIVE);
if (t1 != 0)
return t1;
*v = NFSTOV(ttnp);

View File

@ -254,7 +254,7 @@ nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td)
error = vfs_busy(mp, LK_NOWAIT, NULL, td);
if (error)
return (error);
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
if (error) {
vfs_unbusy(mp, td);
return (error);
@ -785,7 +785,7 @@ nfs_mount(struct mount *mp, struct thread *td)
error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred);
out:
if (!error)
mp->mnt_kern_flag |= MNTK_MPSAFE;
mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED);
return (error);
}
@ -913,7 +913,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam,
* this problem, because one can identify root inodes by their
* number == ROOTINO (2).
*/
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE);
if (error)
goto bad;
*vpp = NFSTOV(np);
@ -995,7 +995,7 @@ nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td)
int error;
nmp = VFSTONFS(mp);
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np);
error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags);
if (error)
return error;
vp = NFSTOV(np);

View File

@ -899,7 +899,7 @@ nfs_lookup(struct vop_lookup_args *ap)
m_freem(mrep);
return (EISDIR);
}
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE);
if (error) {
m_freem(mrep);
return (error);
@ -918,7 +918,7 @@ nfs_lookup(struct vop_lookup_args *ap)
if (flags & ISDOTDOT) {
VOP_UNLOCK(dvp, 0, td);
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
if (error)
return (error);
@ -927,7 +927,7 @@ nfs_lookup(struct vop_lookup_args *ap)
VREF(dvp);
newvp = dvp;
} else {
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags);
if (error) {
m_freem(mrep);
return (error);
@ -2410,7 +2410,7 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
np = dnp;
} else {
error = nfs_nget(vp->v_mount, fhp,
fhsize, &np);
fhsize, &np, LK_EXCLUSIVE);
if (error)
doit = 0;
else
@ -2604,7 +2604,7 @@ nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred,
VREF(dvp);
newvp = dvp;
} else {
error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE);
if (error) {
m_freem(mrep);
return (error);

View File

@ -189,7 +189,7 @@ int nfs_reclaim(struct vop_reclaim_args *);
/* other stuff */
int nfs_removeit(struct sillyrename *);
int nfs4_removeit(struct sillyrename *);
int nfs_nget(struct mount *, nfsfh_t *, int, struct nfsnode **);
int nfs_nget(struct mount *, nfsfh_t *, int, struct nfsnode **, int flags);
nfsuint64 *nfs_getcookie(struct nfsnode *, off_t, int);
uint64_t *nfs4_getcookie(struct nfsnode *, off_t, int);
void nfs_invaldir(struct vnode *);

View File

@ -310,6 +310,7 @@ void __mnt_vnode_markerfree(struct vnode **mvp, struct mount *mp);
#define MNTK_SUSPENDED 0x10000000 /* write operations are suspended */
#define MNTK_MPSAFE 0x20000000 /* Filesystem is MPSAFE. */
#define MNTK_NOKNOTE 0x80000000 /* Don't send KNOTEs from VOP hooks */
#define MNTK_LOOKUP_SHARED 0x40000000 /* FS supports shared lock lookups */
/*
* Sysctl CTL_VFS definitions.