This patch adds the "LOCKSHARED" option to namei which causes it to only acquire shared locks on leafs.
The stat() and open() calls have been changed to make use of this new functionality. Using shared locks in these cases is sufficient and can significantly reduce their latency if IO is pending to these vnodes. Also, this reduces the number of exclusive locks that are floating around in the system, which helps reduce the number of deadlocks that occur. A new kernel option "LOOKUP_SHARED" has been added. It defaults to off so this patch can be turned on for testing, and should eventually go away once it is proven to be stable. I have personally been running this patch for over a year now, so it is believed to be fully stable. Reviewed by: jake, obrien Approved by: jake
This commit is contained in:
parent
b6412800f5
commit
e6d26e8880
@ -383,6 +383,7 @@ BLKDEV_IOSIZE opt_global.h
|
||||
DEBUG opt_global.h
|
||||
DEBUG_LOCKS opt_global.h
|
||||
DEBUG_VFS_LOCKS opt_global.h
|
||||
LOOKUP_SHARED opt_global.h
|
||||
DIAGNOSTIC opt_global.h
|
||||
ENABLE_VFS_IOOPT opt_global.h
|
||||
INVARIANT_SUPPORT opt_global.h
|
||||
|
@ -570,8 +570,35 @@ vfs_cache_lookup(ap)
|
||||
|
||||
error = cache_lookup(dvp, vpp, cnp);
|
||||
|
||||
#ifdef LOOKUP_SHARED
|
||||
if (!error) {
|
||||
/* We do this because the rest of the system now expects to get
|
||||
* a shared lock, which is later upgraded if LOCKSHARED is not
|
||||
* set. We have so many cases here because of bugs that yield
|
||||
* inconsistant lock states. This all badly needs to be fixed
|
||||
*/
|
||||
error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
|
||||
if (!error) {
|
||||
int flock;
|
||||
|
||||
flock = VOP_ISLOCKED(*vpp, td);
|
||||
if (flock != LK_EXCLUSIVE) {
|
||||
if (flock == 0) {
|
||||
if ((flags & ISLASTCN) &&
|
||||
(flags & LOCKSHARED))
|
||||
VOP_LOCK(*vpp, LK_SHARED, td);
|
||||
else
|
||||
VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
|
||||
}
|
||||
} else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
|
||||
VOP_LOCK(*vpp, LK_DOWNGRADE, td);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
#else
|
||||
if (!error)
|
||||
return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
|
||||
#endif
|
||||
|
||||
if (error == ENOENT)
|
||||
return (error);
|
||||
@ -585,13 +612,28 @@ vfs_cache_lookup(ap)
|
||||
} else if (flags & ISDOTDOT) {
|
||||
VOP_UNLOCK(dvp, 0, td);
|
||||
cnp->cn_flags |= PDIRUNLOCK;
|
||||
#ifdef LOOKUP_SHARED
|
||||
if ((flags & ISLASTCN) && (flags & LOCKSHARED))
|
||||
error = vget(vp, LK_SHARED, td);
|
||||
else
|
||||
error = vget(vp, LK_EXCLUSIVE, td);
|
||||
#else
|
||||
error = vget(vp, LK_EXCLUSIVE, td);
|
||||
#endif
|
||||
|
||||
if (!error && lockparent && (flags & ISLASTCN)) {
|
||||
if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0)
|
||||
cnp->cn_flags &= ~PDIRUNLOCK;
|
||||
}
|
||||
} else {
|
||||
#ifdef LOOKUP_SHARED
|
||||
if ((flags & ISLASTCN) && (flags & LOCKSHARED))
|
||||
error = vget(vp, LK_SHARED, td);
|
||||
else
|
||||
error = vget(vp, LK_EXCLUSIVE, td);
|
||||
#else
|
||||
error = vget(vp, LK_EXCLUSIVE, td);
|
||||
#endif
|
||||
if (!lockparent || error || !(flags & ISLASTCN)) {
|
||||
VOP_UNLOCK(dvp, 0, td);
|
||||
cnp->cn_flags |= PDIRUNLOCK;
|
||||
@ -616,7 +658,28 @@ vfs_cache_lookup(ap)
|
||||
return (error);
|
||||
cnp->cn_flags &= ~PDIRUNLOCK;
|
||||
}
|
||||
#ifdef LOOKUP_SHARED
|
||||
error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
|
||||
|
||||
if (!error) {
|
||||
int flock = 0;
|
||||
|
||||
flock = VOP_ISLOCKED(*vpp, td);
|
||||
if (flock != LK_EXCLUSIVE) {
|
||||
if (flock == 0) {
|
||||
if ((flags & ISLASTCN) && (flags & LOCKSHARED))
|
||||
VOP_LOCK(*vpp, LK_SHARED, td);
|
||||
else
|
||||
VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
|
||||
}
|
||||
} else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
|
||||
VOP_LOCK(*vpp, LK_DOWNGRADE, td);
|
||||
}
|
||||
|
||||
return (error);
|
||||
#else
|
||||
return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -2029,8 +2029,13 @@ stat(td, uap)
|
||||
int error;
|
||||
struct nameidata nd;
|
||||
|
||||
#ifdef LOOKUP_SHARED
|
||||
NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
|
||||
UIO_USERSPACE, SCARG(uap, path), td);
|
||||
#else
|
||||
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
|
||||
SCARG(uap, path), td);
|
||||
#endif
|
||||
if ((error = namei(&nd)) != 0)
|
||||
return (error);
|
||||
error = vn_stat(nd.ni_vp, &sb, td);
|
||||
|
@ -2029,8 +2029,13 @@ stat(td, uap)
|
||||
int error;
|
||||
struct nameidata nd;
|
||||
|
||||
#ifdef LOOKUP_SHARED
|
||||
NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
|
||||
UIO_USERSPACE, SCARG(uap, path), td);
|
||||
#else
|
||||
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
|
||||
SCARG(uap, path), td);
|
||||
#endif
|
||||
if ((error = namei(&nd)) != 0)
|
||||
return (error);
|
||||
error = vn_stat(nd.ni_vp, &sb, td);
|
||||
|
@ -105,6 +105,11 @@ vn_open_cred(ndp, flagp, cmode, cred)
|
||||
struct vattr vat;
|
||||
struct vattr *vap = &vat;
|
||||
int mode, fmode, error;
|
||||
#ifdef LOOKUP_SHARED
|
||||
int exclusive; /* The current intended lock state */
|
||||
|
||||
exclusive = 0;
|
||||
#endif
|
||||
|
||||
restart:
|
||||
fmode = *flagp;
|
||||
@ -143,6 +148,9 @@ vn_open_cred(ndp, flagp, cmode, cred)
|
||||
ASSERT_VOP_LOCKED(ndp->ni_vp, "create");
|
||||
fmode &= ~O_TRUNC;
|
||||
vp = ndp->ni_vp;
|
||||
#ifdef LOOKUP_SHARED
|
||||
exclusive = 1;
|
||||
#endif
|
||||
} else {
|
||||
if (ndp->ni_dvp == ndp->ni_vp)
|
||||
vrele(ndp->ni_dvp);
|
||||
@ -158,8 +166,14 @@ vn_open_cred(ndp, flagp, cmode, cred)
|
||||
}
|
||||
} else {
|
||||
ndp->ni_cnd.cn_nameiop = LOOKUP;
|
||||
#ifdef LOOKUP_SHARED
|
||||
ndp->ni_cnd.cn_flags =
|
||||
((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
|
||||
LOCKSHARED | LOCKLEAF;
|
||||
#else
|
||||
ndp->ni_cnd.cn_flags =
|
||||
((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
|
||||
#endif
|
||||
if ((error = namei(ndp)) != 0)
|
||||
return (error);
|
||||
vp = ndp->ni_vp;
|
||||
@ -198,6 +212,21 @@ vn_open_cred(ndp, flagp, cmode, cred)
|
||||
* Make sure that a VM object is created for VMIO support.
|
||||
*/
|
||||
if (vn_canvmio(vp) == TRUE) {
|
||||
#ifdef LOOKUP_SHARED
|
||||
int flock;
|
||||
|
||||
if (!exclusive && vp->v_object == NULL)
|
||||
VOP_LOCK(vp, LK_UPGRADE, td);
|
||||
/*
|
||||
* In cases where the object is marked as dead object_create
|
||||
* will unlock and relock exclusive. It is safe to call in
|
||||
* here with a shared lock because we only examine fields that
|
||||
* the shared lock guarantees will be stable. In the UPGRADE
|
||||
* case it is not likely that anyone has used this vnode yet
|
||||
* so there will be no contention. The logic after this call
|
||||
* restores the requested locking state.
|
||||
*/
|
||||
#endif
|
||||
if ((error = vfs_object_create(vp, td, cred)) != 0) {
|
||||
VOP_UNLOCK(vp, 0, td);
|
||||
VOP_CLOSE(vp, fmode, cred, td);
|
||||
@ -206,6 +235,11 @@ vn_open_cred(ndp, flagp, cmode, cred)
|
||||
*flagp = fmode;
|
||||
return (error);
|
||||
}
|
||||
#ifdef LOOKUP_SHARED
|
||||
flock = VOP_ISLOCKED(vp, td);
|
||||
if (!exclusive && flock == LK_EXCLUSIVE)
|
||||
VOP_LOCK(vp, LK_DOWNGRADE, td);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (fmode & FWRITE)
|
||||
|
@ -113,8 +113,9 @@ struct nameidata {
|
||||
#define NOCACHE 0x0020 /* name must not be left in cache */
|
||||
#define FOLLOW 0x0040 /* follow symbolic links */
|
||||
#define NOOBJ 0x0080 /* don't create object */
|
||||
#define LOCKSHARED 0x0100 /* Shared lock leaf */
|
||||
#define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */
|
||||
#define MODMASK 0x00fc /* mask of operational modifiers */
|
||||
#define MODMASK 0x01fc /* mask of operational modifiers */
|
||||
/*
|
||||
* Namei parameter descriptors.
|
||||
*
|
||||
@ -129,7 +130,6 @@ struct nameidata {
|
||||
* name being sought. The caller is responsible for releasing the
|
||||
* buffer and for vrele'ing ni_startdir.
|
||||
*/
|
||||
#define NOCROSSMOUNT 0x000100 /* do not cross mount points */
|
||||
#define RDONLY 0x000200 /* lookup with read-only semantics */
|
||||
#define HASBUF 0x000400 /* has allocated pathname buffer */
|
||||
#define SAVENAME 0x000800 /* save pathname buffer */
|
||||
@ -143,7 +143,8 @@ struct nameidata {
|
||||
#define WILLBEDIR 0x080000 /* new files will be dirs; allow trailing / */
|
||||
#define ISUNICODE 0x100000 /* current component name is unicode*/
|
||||
#define PDIRUNLOCK 0x200000 /* file system lookup() unlocked parent dir */
|
||||
#define PARAMASK 0x1fff00 /* mask of parameter descriptors */
|
||||
#define NOCROSSMOUNT 0x400000 /* do not cross mount points */
|
||||
#define PARAMASK 0x3ffe00 /* mask of parameter descriptors */
|
||||
|
||||
/*
|
||||
* Initialization of an nameidata structure.
|
||||
|
Loading…
Reference in New Issue
Block a user