This patch adds the "LOCKSHARED" option to namei which causes it to only acquire shared locks on leafs.

The stat() and open() calls have been changed to make use of this new functionality.  Using shared locks in
these cases is sufficient and can significantly reduce their latency if IO is pending to these vnodes.  Also,
this reduces the number of exclusive locks that are floating around in the system, which helps reduce the
number of deadlocks that occur.

A new kernel option "LOOKUP_SHARED" has been added.  It defaults to off so this patch can be turned on for
testing, and should eventually go away once it is proven to be stable.  I have personally been running this
patch for over a year now, so it is believed to be fully stable.

Reviewed by:	jake, obrien
Approved by:	jake
This commit is contained in:
jeff 2002-03-12 04:00:11 +00:00
parent b6412800f5
commit e6d26e8880
6 changed files with 112 additions and 3 deletions

View File

@ -383,6 +383,7 @@ BLKDEV_IOSIZE opt_global.h
DEBUG opt_global.h
DEBUG_LOCKS opt_global.h
DEBUG_VFS_LOCKS opt_global.h
LOOKUP_SHARED opt_global.h
DIAGNOSTIC opt_global.h
ENABLE_VFS_IOOPT opt_global.h
INVARIANT_SUPPORT opt_global.h

View File

@ -570,8 +570,35 @@ vfs_cache_lookup(ap)
error = cache_lookup(dvp, vpp, cnp);
#ifdef LOOKUP_SHARED
if (!error) {
/* We do this because the rest of the system now expects to get
* a shared lock, which is later upgraded if LOCKSHARED is not
* set. We have so many cases here because of bugs that yield
* inconsistant lock states. This all badly needs to be fixed
*/
error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
if (!error) {
int flock;
flock = VOP_ISLOCKED(*vpp, td);
if (flock != LK_EXCLUSIVE) {
if (flock == 0) {
if ((flags & ISLASTCN) &&
(flags & LOCKSHARED))
VOP_LOCK(*vpp, LK_SHARED, td);
else
VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
}
} else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
VOP_LOCK(*vpp, LK_DOWNGRADE, td);
}
return (error);
}
#else
if (!error)
return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
#endif
if (error == ENOENT)
return (error);
@ -585,13 +612,28 @@ vfs_cache_lookup(ap)
} else if (flags & ISDOTDOT) {
VOP_UNLOCK(dvp, 0, td);
cnp->cn_flags |= PDIRUNLOCK;
#ifdef LOOKUP_SHARED
if ((flags & ISLASTCN) && (flags & LOCKSHARED))
error = vget(vp, LK_SHARED, td);
else
error = vget(vp, LK_EXCLUSIVE, td);
#else
error = vget(vp, LK_EXCLUSIVE, td);
#endif
if (!error && lockparent && (flags & ISLASTCN)) {
if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0)
cnp->cn_flags &= ~PDIRUNLOCK;
}
} else {
#ifdef LOOKUP_SHARED
if ((flags & ISLASTCN) && (flags & LOCKSHARED))
error = vget(vp, LK_SHARED, td);
else
error = vget(vp, LK_EXCLUSIVE, td);
#else
error = vget(vp, LK_EXCLUSIVE, td);
#endif
if (!lockparent || error || !(flags & ISLASTCN)) {
VOP_UNLOCK(dvp, 0, td);
cnp->cn_flags |= PDIRUNLOCK;
@ -616,7 +658,28 @@ vfs_cache_lookup(ap)
return (error);
cnp->cn_flags &= ~PDIRUNLOCK;
}
#ifdef LOOKUP_SHARED
error = VOP_CACHEDLOOKUP(dvp, vpp, cnp);
if (!error) {
int flock = 0;
flock = VOP_ISLOCKED(*vpp, td);
if (flock != LK_EXCLUSIVE) {
if (flock == 0) {
if ((flags & ISLASTCN) && (flags & LOCKSHARED))
VOP_LOCK(*vpp, LK_SHARED, td);
else
VOP_LOCK(*vpp, LK_EXCLUSIVE, td);
}
} else if ((flags & ISLASTCN) && (flags & LOCKSHARED))
VOP_LOCK(*vpp, LK_DOWNGRADE, td);
}
return (error);
#else
return (VOP_CACHEDLOOKUP(dvp, vpp, cnp));
#endif
}

View File

@ -2029,8 +2029,13 @@ stat(td, uap)
int error;
struct nameidata nd;
#ifdef LOOKUP_SHARED
NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
UIO_USERSPACE, SCARG(uap, path), td);
#else
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
#endif
if ((error = namei(&nd)) != 0)
return (error);
error = vn_stat(nd.ni_vp, &sb, td);

View File

@ -2029,8 +2029,13 @@ stat(td, uap)
int error;
struct nameidata nd;
#ifdef LOOKUP_SHARED
NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ,
UIO_USERSPACE, SCARG(uap, path), td);
#else
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
SCARG(uap, path), td);
#endif
if ((error = namei(&nd)) != 0)
return (error);
error = vn_stat(nd.ni_vp, &sb, td);

View File

@ -105,6 +105,11 @@ vn_open_cred(ndp, flagp, cmode, cred)
struct vattr vat;
struct vattr *vap = &vat;
int mode, fmode, error;
#ifdef LOOKUP_SHARED
int exclusive; /* The current intended lock state */
exclusive = 0;
#endif
restart:
fmode = *flagp;
@ -143,6 +148,9 @@ vn_open_cred(ndp, flagp, cmode, cred)
ASSERT_VOP_LOCKED(ndp->ni_vp, "create");
fmode &= ~O_TRUNC;
vp = ndp->ni_vp;
#ifdef LOOKUP_SHARED
exclusive = 1;
#endif
} else {
if (ndp->ni_dvp == ndp->ni_vp)
vrele(ndp->ni_dvp);
@ -158,8 +166,14 @@ vn_open_cred(ndp, flagp, cmode, cred)
}
} else {
ndp->ni_cnd.cn_nameiop = LOOKUP;
#ifdef LOOKUP_SHARED
ndp->ni_cnd.cn_flags =
((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) |
LOCKSHARED | LOCKLEAF;
#else
ndp->ni_cnd.cn_flags =
((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
#endif
if ((error = namei(ndp)) != 0)
return (error);
vp = ndp->ni_vp;
@ -198,6 +212,21 @@ vn_open_cred(ndp, flagp, cmode, cred)
* Make sure that a VM object is created for VMIO support.
*/
if (vn_canvmio(vp) == TRUE) {
#ifdef LOOKUP_SHARED
int flock;
if (!exclusive && vp->v_object == NULL)
VOP_LOCK(vp, LK_UPGRADE, td);
/*
* In cases where the object is marked as dead object_create
* will unlock and relock exclusive. It is safe to call in
* here with a shared lock because we only examine fields that
* the shared lock guarantees will be stable. In the UPGRADE
* case it is not likely that anyone has used this vnode yet
* so there will be no contention. The logic after this call
* restores the requested locking state.
*/
#endif
if ((error = vfs_object_create(vp, td, cred)) != 0) {
VOP_UNLOCK(vp, 0, td);
VOP_CLOSE(vp, fmode, cred, td);
@ -206,6 +235,11 @@ vn_open_cred(ndp, flagp, cmode, cred)
*flagp = fmode;
return (error);
}
#ifdef LOOKUP_SHARED
flock = VOP_ISLOCKED(vp, td);
if (!exclusive && flock == LK_EXCLUSIVE)
VOP_LOCK(vp, LK_DOWNGRADE, td);
#endif
}
if (fmode & FWRITE)

View File

@ -113,8 +113,9 @@ struct nameidata {
#define NOCACHE 0x0020 /* name must not be left in cache */
#define FOLLOW 0x0040 /* follow symbolic links */
#define NOOBJ 0x0080 /* don't create object */
#define LOCKSHARED 0x0100 /* Shared lock leaf */
#define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */
#define MODMASK 0x00fc /* mask of operational modifiers */
#define MODMASK 0x01fc /* mask of operational modifiers */
/*
* Namei parameter descriptors.
*
@ -129,7 +130,6 @@ struct nameidata {
* name being sought. The caller is responsible for releasing the
* buffer and for vrele'ing ni_startdir.
*/
#define NOCROSSMOUNT 0x000100 /* do not cross mount points */
#define RDONLY 0x000200 /* lookup with read-only semantics */
#define HASBUF 0x000400 /* has allocated pathname buffer */
#define SAVENAME 0x000800 /* save pathname buffer */
@ -143,7 +143,8 @@ struct nameidata {
#define WILLBEDIR 0x080000 /* new files will be dirs; allow trailing / */
#define ISUNICODE 0x100000 /* current component name is unicode*/
#define PDIRUNLOCK 0x200000 /* file system lookup() unlocked parent dir */
#define PARAMASK 0x1fff00 /* mask of parameter descriptors */
#define NOCROSSMOUNT 0x400000 /* do not cross mount points */
#define PARAMASK 0x3ffe00 /* mask of parameter descriptors */
/*
* Initialization of an nameidata structure.