Implement NFS ACCESS RPC result caching.

This yields startling performance increases for NFS clients for many
access profiles, due to the fact that ACCESS results are persistently
cached in the namecache in many cases.

Note that the code is somewhat conservative in that it requires an
exact credential match for a cache hit.  This bloats the nfsnode
structure by sizeof(struct ucred) (96 bytes).  Any less conservative
approach opens the possibility for a false veto in eg. setuid
applications.  Alternative suggestions would be welcomed.

The cache is normally disabled, to activate set the sysctl variable
vfs.nfs.access_cache_timeout to a nonzero value.  This is the time in
seconds that a cached entry will be considered valid; useful values appear
to be 2-10 seconds.  Performance of the cache can be monitored with the
vfs.nfs.access_cache_hits and vfs.nfs.access_cache_hits variables.
This commit is contained in:
msmith 1998-11-13 02:39:09 +00:00
parent 2f1d2c0d26
commit aebc5396ec
4 changed files with 198 additions and 50 deletions

View File

@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
* $Id: nfs_vnops.c,v 1.110 1998/10/31 15:31:26 peter Exp $
* $Id: nfs_vnops.c,v 1.111 1998/11/09 07:00:14 peter Exp $
*/
@ -60,6 +60,7 @@
#include <sys/fcntl.h>
#include <sys/lockf.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@ -247,6 +248,35 @@ struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
int nfs_numasync = 0;
#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
static int nfsaccess_cache_timeout;
SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
&nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
static int nfsaccess_cache_hits;
SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
&nfsaccess_cache_hits, 0, "NFS ACCESS cache hit count");
static int nfsaccess_cache_fills;
SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_fills, CTLFLAG_RD,
&nfsaccess_cache_fills, 0, "NFS ACCESS cache fill count");
/*
* Compare two ucred structures, returns zero on equality, nonzero
* otherwise.
*/
static int
nfsa_ucredcmp(struct ucred *c1, struct ucred *c2)
{
int i;
if ((c1->cr_uid != c2->cr_uid) || (c1->cr_ngroups != c2->cr_ngroups))
return(1);
for (i = 0; i < c1->cr_ngroups; i++)
if (c1->cr_groups[i] != c2->cr_groups[i])
return(1);
return(0);
}
/*
* nfs access vnode op.
* For nfs version 2, just return ok. File accesses may fail later.
@ -269,8 +299,9 @@ nfs_access(ap)
caddr_t bpos, dpos, cp2;
int error = 0, attrflag;
struct mbuf *mreq, *mrep, *md, *mb, *mb2;
u_int32_t mode, rmode;
u_int32_t mode, rmode, wmode;
int v3 = NFS_ISV3(vp);
struct nfsnode *np = VTONFS(vp);
/*
* Disallow write attempts on filesystems mounted read-only;
@ -288,18 +319,14 @@ nfs_access(ap)
}
}
/*
* For nfs v3, do an access rpc, otherwise you are stuck emulating
* For nfs v3, check to see if we have done this recently, and if
* so return our cached result instead of making an ACCESS call.
* If not, do an access rpc, otherwise you are stuck emulating
* ufs_access() locally using the vattr. This may not be correct,
* since the server may apply other access criteria such as
* client uid-->server uid mapping that we do not know about, but
* this is better than just returning anything that is lying about
* in the cache.
* client uid-->server uid mapping that we do not know about.
*/
if (v3) {
nfsstats.rpccnt[NFSPROC_ACCESS]++;
nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
nfsm_fhtom(vp, v3);
nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
if (ap->a_mode & VREAD)
mode = NFSV3ACCESS_READ;
else
@ -316,21 +343,49 @@ nfs_access(ap)
if (ap->a_mode & VEXEC)
mode |= NFSV3ACCESS_LOOKUP;
}
*tl = txdr_unsigned(mode);
nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred);
nfsm_postop_attr(vp, attrflag);
if (!error) {
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
rmode = fxdr_unsigned(u_int32_t, *tl);
/*
* The NFS V3 spec does not clarify whether or not
* the returned access bits can be a superset of
* the ones requested, so...
*/
if ((rmode & mode) != mode)
error = EACCES;
/* XXX safety belt, only make blanket request if caching */
if (nfsaccess_cache_timeout > 0) {
wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
} else {
wmode = mode;
}
/* do we have a cached result? */
if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
!nfsa_ucredcmp(ap->a_cred, &np->n_modecred)) {
nfsaccess_cache_hits++;
if ((np->n_mode & mode) != mode)
error = EACCES;
} else {
nfsstats.rpccnt[NFSPROC_ACCESS]++;
nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
nfsm_fhtom(vp, v3);
nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(wmode);
nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred);
nfsm_postop_attr(vp, attrflag);
if (!error) {
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
rmode = fxdr_unsigned(u_int32_t, *tl);
/*
* The NFS V3 spec does not clarify whether or not
* the returned access bits can be a superset of
* the ones requested, so...
*/
if ((rmode & mode) != mode) {
error = EACCES;
} else if (nfsaccess_cache_timeout > 0) {
/* cache the result */
nfsaccess_cache_fills++;
np->n_mode = rmode;
np->n_modecred = *ap->a_cred;
np->n_modestamp = time_second;
}
}
nfsm_reqdone;
}
nfsm_reqdone;
return (error);
} else {
if (error = nfsspec_access(ap))
@ -645,6 +700,11 @@ nfs_setattr(ap)
ap->a_p, 1)) == EINTR)
return (error);
error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
/*
* Attributes on server may have changed, make no assumptions about
* the server's reaction to these changes.
*/
np->n_modestamp = 0;
if (error && vap->va_size != VNOVAL) {
np->n_size = np->n_vattr.va_size = tsize;
vnode_pager_setsize(vp, np->n_size);
@ -1502,6 +1562,7 @@ nfs_remove(ap)
error = nfs_sillyrename(dvp, vp, cnp);
zfree(namei_zone, cnp->cn_pnbuf);
np->n_attrstamp = 0;
np->n_modestamp = 0;
return (error);
}
@ -1605,6 +1666,16 @@ nfs_rename(ap)
cache_purge(tdvp);
cache_purge(fdvp);
}
/*
* We can't presume too much on the server's access control method(s), and
* it may use rules based on filenames or locations. Moving to a more
* restrictive location would be harmless, but moving to a less restrictive
* location you would be forced to wait for the cache entry to time out.
*/
VTONFS(fvp)->n_modestamp = 0;
VTONFS(tvp)->n_modestamp = 0;
out:
if (tdvp == tvp)
vrele(tdvp);

View File

@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfsnode.h 8.9 (Berkeley) 5/14/95
* $Id: nfsnode.h,v 1.25 1998/03/06 09:46:52 msmith Exp $
* $Id: nfsnode.h,v 1.26 1998/05/31 18:32:23 peter Exp $
*/
@ -93,6 +93,9 @@ struct nfsnode {
u_quad_t n_lrev; /* Modify rev for lease */
struct vattr n_vattr; /* Vnode attribute cache */
time_t n_attrstamp; /* Attr. cache timestamp */
u_int32_t n_mode; /* ACCESS mode cache */
struct ucred n_modecred; /* credentials having mode */
time_t n_modestamp; /* mode cache timestamp */
time_t n_mtime; /* Prev modify time. */
time_t n_ctime; /* Prev create time. */
time_t n_expiry; /* Lease expiry time */

View File

@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95
* $Id: nfs_vnops.c,v 1.110 1998/10/31 15:31:26 peter Exp $
* $Id: nfs_vnops.c,v 1.111 1998/11/09 07:00:14 peter Exp $
*/
@ -60,6 +60,7 @@
#include <sys/fcntl.h>
#include <sys/lockf.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
@ -247,6 +248,35 @@ struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
int nfs_numasync = 0;
#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
static int nfsaccess_cache_timeout;
SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
&nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
static int nfsaccess_cache_hits;
SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD,
&nfsaccess_cache_hits, 0, "NFS ACCESS cache hit count");
static int nfsaccess_cache_fills;
SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_fills, CTLFLAG_RD,
&nfsaccess_cache_fills, 0, "NFS ACCESS cache fill count");
/*
* Compare two ucred structures, returns zero on equality, nonzero
* otherwise.
*/
static int
nfsa_ucredcmp(struct ucred *c1, struct ucred *c2)
{
int i;
if ((c1->cr_uid != c2->cr_uid) || (c1->cr_ngroups != c2->cr_ngroups))
return(1);
for (i = 0; i < c1->cr_ngroups; i++)
if (c1->cr_groups[i] != c2->cr_groups[i])
return(1);
return(0);
}
/*
* nfs access vnode op.
* For nfs version 2, just return ok. File accesses may fail later.
@ -269,8 +299,9 @@ nfs_access(ap)
caddr_t bpos, dpos, cp2;
int error = 0, attrflag;
struct mbuf *mreq, *mrep, *md, *mb, *mb2;
u_int32_t mode, rmode;
u_int32_t mode, rmode, wmode;
int v3 = NFS_ISV3(vp);
struct nfsnode *np = VTONFS(vp);
/*
* Disallow write attempts on filesystems mounted read-only;
@ -288,18 +319,14 @@ nfs_access(ap)
}
}
/*
* For nfs v3, do an access rpc, otherwise you are stuck emulating
* For nfs v3, check to see if we have done this recently, and if
* so return our cached result instead of making an ACCESS call.
* If not, do an access rpc, otherwise you are stuck emulating
* ufs_access() locally using the vattr. This may not be correct,
* since the server may apply other access criteria such as
* client uid-->server uid mapping that we do not know about, but
* this is better than just returning anything that is lying about
* in the cache.
* client uid-->server uid mapping that we do not know about.
*/
if (v3) {
nfsstats.rpccnt[NFSPROC_ACCESS]++;
nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
nfsm_fhtom(vp, v3);
nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
if (ap->a_mode & VREAD)
mode = NFSV3ACCESS_READ;
else
@ -316,21 +343,49 @@ nfs_access(ap)
if (ap->a_mode & VEXEC)
mode |= NFSV3ACCESS_LOOKUP;
}
*tl = txdr_unsigned(mode);
nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred);
nfsm_postop_attr(vp, attrflag);
if (!error) {
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
rmode = fxdr_unsigned(u_int32_t, *tl);
/*
* The NFS V3 spec does not clarify whether or not
* the returned access bits can be a superset of
* the ones requested, so...
*/
if ((rmode & mode) != mode)
error = EACCES;
/* XXX safety belt, only make blanket request if caching */
if (nfsaccess_cache_timeout > 0) {
wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY |
NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE |
NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
} else {
wmode = mode;
}
/* do we have a cached result? */
if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
!nfsa_ucredcmp(ap->a_cred, &np->n_modecred)) {
nfsaccess_cache_hits++;
if ((np->n_mode & mode) != mode)
error = EACCES;
} else {
nfsstats.rpccnt[NFSPROC_ACCESS]++;
nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
nfsm_fhtom(vp, v3);
nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
*tl = txdr_unsigned(wmode);
nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred);
nfsm_postop_attr(vp, attrflag);
if (!error) {
nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
rmode = fxdr_unsigned(u_int32_t, *tl);
/*
* The NFS V3 spec does not clarify whether or not
* the returned access bits can be a superset of
* the ones requested, so...
*/
if ((rmode & mode) != mode) {
error = EACCES;
} else if (nfsaccess_cache_timeout > 0) {
/* cache the result */
nfsaccess_cache_fills++;
np->n_mode = rmode;
np->n_modecred = *ap->a_cred;
np->n_modestamp = time_second;
}
}
nfsm_reqdone;
}
nfsm_reqdone;
return (error);
} else {
if (error = nfsspec_access(ap))
@ -645,6 +700,11 @@ nfs_setattr(ap)
ap->a_p, 1)) == EINTR)
return (error);
error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
/*
* Attributes on server may have changed, make no assumptions about
* the server's reaction to these changes.
*/
np->n_modestamp = 0;
if (error && vap->va_size != VNOVAL) {
np->n_size = np->n_vattr.va_size = tsize;
vnode_pager_setsize(vp, np->n_size);
@ -1502,6 +1562,7 @@ nfs_remove(ap)
error = nfs_sillyrename(dvp, vp, cnp);
zfree(namei_zone, cnp->cn_pnbuf);
np->n_attrstamp = 0;
np->n_modestamp = 0;
return (error);
}
@ -1605,6 +1666,16 @@ nfs_rename(ap)
cache_purge(tdvp);
cache_purge(fdvp);
}
/*
* We can't presume too much on the server's access control method(s), and
* it may use rules based on filenames or locations. Moving to a more
* restrictive location would be harmless, but moving to a less restrictive
* location you would be forced to wait for the cache entry to time out.
*/
VTONFS(fvp)->n_modestamp = 0;
VTONFS(tvp)->n_modestamp = 0;
out:
if (tdvp == tvp)
vrele(tdvp);

View File

@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* @(#)nfsnode.h 8.9 (Berkeley) 5/14/95
* $Id: nfsnode.h,v 1.25 1998/03/06 09:46:52 msmith Exp $
* $Id: nfsnode.h,v 1.26 1998/05/31 18:32:23 peter Exp $
*/
@ -93,6 +93,9 @@ struct nfsnode {
u_quad_t n_lrev; /* Modify rev for lease */
struct vattr n_vattr; /* Vnode attribute cache */
time_t n_attrstamp; /* Attr. cache timestamp */
u_int32_t n_mode; /* ACCESS mode cache */
struct ucred n_modecred; /* credentials having mode */
time_t n_modestamp; /* mode cache timestamp */
time_t n_mtime; /* Prev modify time. */
time_t n_ctime; /* Prev create time. */
time_t n_expiry; /* Lease expiry time */