Implement NFS ACCESS RPC result caching.

This yields startling performance increases for NFS clients for many access profiles, due to the fact that ACCESS results are persistently cached in the namecache in many cases. Note that the code is somewhat conservative in that it requires an exact credential match for a cache hit. This bloats the nfsnode structure by sizeof(struct ucred) (96 bytes). Any less conservative approach opens the possibility for a false veto in eg. setuid applications. Alternative suggestions would be welcomed. The cache is normally disabled, to activate set the sysctl variable vfs.nfs.access_cache_timeout to a nonzero value. This is the time in seconds that a cached entry will be considered valid; useful values appear to be 2-10 seconds. Performance of the cache can be monitored with the vfs.nfs.access_cache_hits and vfs.nfs.access_cache_hits variables.
svn path=/head/; revision=41127
1998-11-13 02:39:09 +00:00 · 1998-11-13 02:39:09 +00:00 · c5118de899 · 2020-12-20 02:59:44 +00:00
commit c5118de899
parent 1c680b45a2
4 changed files with 198 additions and 50 deletions
--- a/sys/nfs/nfs_vnops.c
+++ b/sys/nfs/nfs_vnops.c
@ -34,7 +34,7 @@
 * SUCH DAMAGE.
 *
 *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
- * $Id: nfs_vnops.c,v 1.110 1998/10/31 15:31:26 peter Exp $
+ * $Id: nfs_vnops.c,v 1.111 1998/11/09 07:00:14 peter Exp $
 */


@ -60,6 +60,7 @@
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
+#include <sys/sysctl.h>

 #include <vm/vm.h>
 #include <vm/vm_extern.h>
@ -247,6 +248,35 @@ struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 int nfs_numasync = 0;
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))

+static int	nfsaccess_cache_timeout;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 
+	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
+
+static int	nfsaccess_cache_hits;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 
+	   &nfsaccess_cache_hits, 0, "NFS ACCESS cache hit count");
+
+static int	nfsaccess_cache_fills;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_fills, CTLFLAG_RD, 
+	   &nfsaccess_cache_fills, 0, "NFS ACCESS cache fill count");
+
+/*
+ * Compare two ucred structures, returns zero on equality, nonzero
+ * otherwise.
+ */
+static int
+nfsa_ucredcmp(struct ucred *c1, struct ucred *c2)
+{
+    int		i;
+    
+    if ((c1->cr_uid != c2->cr_uid) || (c1->cr_ngroups != c2->cr_ngroups))
+	return(1);
+    for (i = 0; i < c1->cr_ngroups; i++)
+	if (c1->cr_groups[i] != c2->cr_groups[i])
+	    return(1);
+    return(0);
+}
+
 /*
 * nfs access vnode op.
 * For nfs version 2, just return ok. File accesses may fail later.
@ -269,8 +299,9 @@ nfs_access(ap)
 	caddr_t bpos, dpos, cp2;
 	int error = 0, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
-	u_int32_t mode, rmode;
+	u_int32_t mode, rmode, wmode;
 	int v3 = NFS_ISV3(vp);
+	struct nfsnode *np = VTONFS(vp);

 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
@ -288,18 +319,14 @@ nfs_access(ap)
 		}
 	}
 	/*
-	 * For nfs v3, do an access rpc, otherwise you are stuck emulating
+	 * For nfs v3, check to see if we have done this recently, and if
+	 * so return our cached result instead of making an ACCESS call.
+	 * If not, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
-	 * client uid-->server uid mapping that we do not know about, but
-	 * this is better than just returning anything that is lying about
-	 * in the cache.
+	 * client uid-->server uid mapping that we do not know about.
 	 */
 	if (v3) {
-		nfsstats.rpccnt[NFSPROC_ACCESS]++;
-		nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
-		nfsm_fhtom(vp, v3);
-		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (ap->a_mode & VREAD)
 			mode = NFSV3ACCESS_READ;
 		else
@ -316,21 +343,49 @@ nfs_access(ap)
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_LOOKUP;
 		}
-		*tl = txdr_unsigned(mode);
-		nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred);
-		nfsm_postop_attr(vp, attrflag);
-		if (!error) {
-			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
-			rmode = fxdr_unsigned(u_int32_t, *tl);
-			/*
-			 * The NFS V3 spec does not clarify whether or not
-			 * the returned access bits can be a superset of
-			 * the ones requested, so...
-			 */
-			if ((rmode & mode) != mode)
-				error = EACCES;
+		/* XXX safety belt, only make blanket request if caching */
+		if (nfsaccess_cache_timeout > 0) {
+			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 
+				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 
+				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
+		} else {
+			wmode = mode;
+		}
+
+		/* do we have a cached result? */
+		if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
+		    !nfsa_ucredcmp(ap->a_cred, &np->n_modecred)) {
+			nfsaccess_cache_hits++;
+			if ((np->n_mode & mode) != mode)
+				error = EACCES;
+		} else {
+			nfsstats.rpccnt[NFSPROC_ACCESS]++;
+			nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
+			nfsm_fhtom(vp, v3);
+			nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+			*tl = txdr_unsigned(wmode); 
+			nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred);
+			nfsm_postop_attr(vp, attrflag);
+			if (!error) {
+				nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+				rmode = fxdr_unsigned(u_int32_t, *tl);
+				/*
+				 * The NFS V3 spec does not clarify whether or not
+				 * the returned access bits can be a superset of
+				 * the ones requested, so...
+				 */
+				if ((rmode & mode) != mode) {
+					error = EACCES;
+				} else if (nfsaccess_cache_timeout > 0) {
+					/* cache the result */
+					nfsaccess_cache_fills++;
+					np->n_mode = rmode;
+					np->n_modecred = *ap->a_cred;
+					np->n_modestamp = time_second;
+				}
+			}
+			nfsm_reqdone;
 		}
-		nfsm_reqdone;
 		return (error);
 	} else {
 		if (error = nfsspec_access(ap))
@ -645,6 +700,11 @@ nfs_setattr(ap)
 		 ap->a_p, 1)) == EINTR)
 		return (error);
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
+	/* 
+	 * Attributes on server may have changed, make no assumptions about 
+	 * the server's reaction to these changes.
+	 */
+	np->n_modestamp = 0;
 	if (error && vap->va_size != VNOVAL) {
 		np->n_size = np->n_vattr.va_size = tsize;
 		vnode_pager_setsize(vp, np->n_size);
@ -1502,6 +1562,7 @@ nfs_remove(ap)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	zfree(namei_zone, cnp->cn_pnbuf);
 	np->n_attrstamp = 0;
+	np->n_modestamp = 0;
 	return (error);
 }

@ -1605,6 +1666,16 @@ nfs_rename(ap)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
+
+	/* 
+	 * We can't presume too much on the server's access control method(s), and
+	 * it may use rules based on filenames or locations.  Moving to a more
+	 * restrictive location would be harmless, but moving to a less restrictive
+	 * location you would be forced to wait for the cache entry to time out.
+	 */
+	VTONFS(fvp)->n_modestamp = 0;
+	VTONFS(tvp)->n_modestamp = 0;
+
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
--- a/sys/nfs/nfsnode.h
+++ b/sys/nfs/nfsnode.h
@ -34,7 +34,7 @@
 * SUCH DAMAGE.
 *
 *	@(#)nfsnode.h	8.9 (Berkeley) 5/14/95
- * $Id: nfsnode.h,v 1.25 1998/03/06 09:46:52 msmith Exp $
+ * $Id: nfsnode.h,v 1.26 1998/05/31 18:32:23 peter Exp $
 */


@ -93,6 +93,9 @@ struct nfsnode {
 	u_quad_t		n_lrev;		/* Modify rev for lease */
 	struct vattr		n_vattr;	/* Vnode attribute cache */
 	time_t			n_attrstamp;	/* Attr. cache timestamp */
+	u_int32_t		n_mode;		/* ACCESS mode cache */
+	struct ucred		n_modecred;	/* credentials having mode */
+	time_t			n_modestamp;	/* mode cache timestamp */
 	time_t			n_mtime;	/* Prev modify time. */
 	time_t			n_ctime;	/* Prev create time. */
 	time_t			n_expiry;	/* Lease expiry time */
--- a/sys/nfsclient/nfs_vnops.c
+++ b/sys/nfsclient/nfs_vnops.c
@ -34,7 +34,7 @@
 * SUCH DAMAGE.
 *
 *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
- * $Id: nfs_vnops.c,v 1.110 1998/10/31 15:31:26 peter Exp $
+ * $Id: nfs_vnops.c,v 1.111 1998/11/09 07:00:14 peter Exp $
 */


@ -60,6 +60,7 @@
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
+#include <sys/sysctl.h>

 #include <vm/vm.h>
 #include <vm/vm_extern.h>
@ -247,6 +248,35 @@ struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 int nfs_numasync = 0;
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))

+static int	nfsaccess_cache_timeout;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 
+	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
+
+static int	nfsaccess_cache_hits;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 
+	   &nfsaccess_cache_hits, 0, "NFS ACCESS cache hit count");
+
+static int	nfsaccess_cache_fills;
+SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_fills, CTLFLAG_RD, 
+	   &nfsaccess_cache_fills, 0, "NFS ACCESS cache fill count");
+
+/*
+ * Compare two ucred structures, returns zero on equality, nonzero
+ * otherwise.
+ */
+static int
+nfsa_ucredcmp(struct ucred *c1, struct ucred *c2)
+{
+    int		i;
+    
+    if ((c1->cr_uid != c2->cr_uid) || (c1->cr_ngroups != c2->cr_ngroups))
+	return(1);
+    for (i = 0; i < c1->cr_ngroups; i++)
+	if (c1->cr_groups[i] != c2->cr_groups[i])
+	    return(1);
+    return(0);
+}
+
 /*
 * nfs access vnode op.
 * For nfs version 2, just return ok. File accesses may fail later.
@ -269,8 +299,9 @@ nfs_access(ap)
 	caddr_t bpos, dpos, cp2;
 	int error = 0, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
-	u_int32_t mode, rmode;
+	u_int32_t mode, rmode, wmode;
 	int v3 = NFS_ISV3(vp);
+	struct nfsnode *np = VTONFS(vp);

 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
@ -288,18 +319,14 @@ nfs_access(ap)
 		}
 	}
 	/*
-	 * For nfs v3, do an access rpc, otherwise you are stuck emulating
+	 * For nfs v3, check to see if we have done this recently, and if
+	 * so return our cached result instead of making an ACCESS call.
+	 * If not, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
-	 * client uid-->server uid mapping that we do not know about, but
-	 * this is better than just returning anything that is lying about
-	 * in the cache.
+	 * client uid-->server uid mapping that we do not know about.
 	 */
 	if (v3) {
-		nfsstats.rpccnt[NFSPROC_ACCESS]++;
-		nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
-		nfsm_fhtom(vp, v3);
-		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (ap->a_mode & VREAD)
 			mode = NFSV3ACCESS_READ;
 		else
@ -316,21 +343,49 @@ nfs_access(ap)
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_LOOKUP;
 		}
-		*tl = txdr_unsigned(mode);
-		nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred);
-		nfsm_postop_attr(vp, attrflag);
-		if (!error) {
-			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
-			rmode = fxdr_unsigned(u_int32_t, *tl);
-			/*
-			 * The NFS V3 spec does not clarify whether or not
-			 * the returned access bits can be a superset of
-			 * the ones requested, so...
-			 */
-			if ((rmode & mode) != mode)
-				error = EACCES;
+		/* XXX safety belt, only make blanket request if caching */
+		if (nfsaccess_cache_timeout > 0) {
+			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 
+				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 
+				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
+		} else {
+			wmode = mode;
+		}
+
+		/* do we have a cached result? */
+		if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
+		    !nfsa_ucredcmp(ap->a_cred, &np->n_modecred)) {
+			nfsaccess_cache_hits++;
+			if ((np->n_mode & mode) != mode)
+				error = EACCES;
+		} else {
+			nfsstats.rpccnt[NFSPROC_ACCESS]++;
+			nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
+			nfsm_fhtom(vp, v3);
+			nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
+			*tl = txdr_unsigned(wmode); 
+			nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred);
+			nfsm_postop_attr(vp, attrflag);
+			if (!error) {
+				nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
+				rmode = fxdr_unsigned(u_int32_t, *tl);
+				/*
+				 * The NFS V3 spec does not clarify whether or not
+				 * the returned access bits can be a superset of
+				 * the ones requested, so...
+				 */
+				if ((rmode & mode) != mode) {
+					error = EACCES;
+				} else if (nfsaccess_cache_timeout > 0) {
+					/* cache the result */
+					nfsaccess_cache_fills++;
+					np->n_mode = rmode;
+					np->n_modecred = *ap->a_cred;
+					np->n_modestamp = time_second;
+				}
+			}
+			nfsm_reqdone;
 		}
-		nfsm_reqdone;
 		return (error);
 	} else {
 		if (error = nfsspec_access(ap))
@ -645,6 +700,11 @@ nfs_setattr(ap)
 		 ap->a_p, 1)) == EINTR)
 		return (error);
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
+	/* 
+	 * Attributes on server may have changed, make no assumptions about 
+	 * the server's reaction to these changes.
+	 */
+	np->n_modestamp = 0;
 	if (error && vap->va_size != VNOVAL) {
 		np->n_size = np->n_vattr.va_size = tsize;
 		vnode_pager_setsize(vp, np->n_size);
@ -1502,6 +1562,7 @@ nfs_remove(ap)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	zfree(namei_zone, cnp->cn_pnbuf);
 	np->n_attrstamp = 0;
+	np->n_modestamp = 0;
 	return (error);
 }

@ -1605,6 +1666,16 @@ nfs_rename(ap)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
+
+	/* 
+	 * We can't presume too much on the server's access control method(s), and
+	 * it may use rules based on filenames or locations.  Moving to a more
+	 * restrictive location would be harmless, but moving to a less restrictive
+	 * location you would be forced to wait for the cache entry to time out.
+	 */
+	VTONFS(fvp)->n_modestamp = 0;
+	VTONFS(tvp)->n_modestamp = 0;
+
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
--- a/sys/nfsclient/nfsnode.h
+++ b/sys/nfsclient/nfsnode.h
@ -34,7 +34,7 @@
 * SUCH DAMAGE.
 *
 *	@(#)nfsnode.h	8.9 (Berkeley) 5/14/95
- * $Id: nfsnode.h,v 1.25 1998/03/06 09:46:52 msmith Exp $
+ * $Id: nfsnode.h,v 1.26 1998/05/31 18:32:23 peter Exp $
 */


@ -93,6 +93,9 @@ struct nfsnode {
 	u_quad_t		n_lrev;		/* Modify rev for lease */
 	struct vattr		n_vattr;	/* Vnode attribute cache */
 	time_t			n_attrstamp;	/* Attr. cache timestamp */
+	u_int32_t		n_mode;		/* ACCESS mode cache */
+	struct ucred		n_modecred;	/* credentials having mode */
+	time_t			n_modestamp;	/* mode cache timestamp */
 	time_t			n_mtime;	/* Prev modify time. */
 	time_t			n_ctime;	/* Prev create time. */
 	time_t			n_expiry;	/* Lease expiry time */