From f54e0fab4c381ffac266474cd866b849f62dce62 Mon Sep 17 00:00:00 2001 From: rmacklem Date: Sun, 31 Jan 2010 19:12:24 +0000 Subject: [PATCH] Patch the experimental NFS client so that there is a timeout for negative name cache entries in a manner analogous to r202767 for the regular NFS client. Also, make the code in nfs_lookup() compatible with that of the regular client and replace the sysctl variable that enabled negative name caching with the mount point option. MFC after: 2 weeks --- sys/fs/nfsclient/nfs_clvfsops.c | 22 ++++- sys/fs/nfsclient/nfs_clvnops.c | 170 +++++++++++++++++++++----------- sys/fs/nfsclient/nfsmount.h | 5 + sys/fs/nfsclient/nfsnode.h | 1 + 4 files changed, 139 insertions(+), 59 deletions(-) diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index 1f7d20868b3d..0bf2bf4b4e50 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -99,7 +99,7 @@ static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp, struct ucred *, struct thread *); static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, u_char *, u_char *, u_char *, - struct vnode **, struct ucred *, struct thread *); + struct vnode **, struct ucred *, struct thread *, int); static vfs_mount_t nfs_mount; static vfs_cmount_t nfs_cmount; static vfs_unmount_t nfs_unmount; @@ -498,7 +498,7 @@ nfs_mountdiskless(char *path, nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, NULL, NULL, NULL, vpp, - td->td_ucred, td)) != 0) { + td->td_ucred, td, NFS_DEFAULT_NEGNAMETIMEO)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } @@ -669,6 +669,7 @@ static const char *nfs_opts[] = { "from", "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport", "readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec", "principal", "nfsv4", "gssname", "allgssname", "dirpath", + "negnametimeo", NULL }; /* @@ -717,6 +718,7 @@ nfs_mount(struct mount *mp) char hst[MNAMELEN]; u_char nfh[NFSX_FHMAX], krbname[100], dirpath[100], srvkrbname[100]; char *opt, *name, *secname; + int negnametimeo = NFS_DEFAULT_NEGNAMETIMEO; if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { error = EINVAL; @@ -891,6 +893,16 @@ nfs_mount(struct mount *mp) } args.flags |= NFSMNT_TIMEO; } + if (vfs_getopt(mp->mnt_optnew, "negnametimeo", (void **)&opt, NULL) + == 0) { + ret = sscanf(opt, "%d", &negnametimeo); + if (ret != 1 || negnametimeo < 0) { + vfs_mount_error(mp, "illegal negnametimeo: %s", + opt); + error = EINVAL; + goto out; + } + } if (vfs_getopt(mp->mnt_optnew, "sec", (void **) &secname, NULL) == 0) nfs_sec_name(secname, &args.flags); @@ -990,7 +1002,7 @@ nfs_mount(struct mount *mp) args.fh = nfh; error = mountnfs(&args, mp, nam, hst, krbname, dirpath, srvkrbname, - &vp, td->td_ucred, td); + &vp, td->td_ucred, td, negnametimeo); out: if (!error) { MNT_ILOCK(mp); @@ -1033,7 +1045,8 @@ nfs_cmount(struct mntarg *ma, void *data, int flags) static int mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, char *hst, u_char *krbname, u_char *dirpath, u_char *srvkrbname, - struct vnode **vpp, struct ucred *cred, struct thread *td) + struct vnode **vpp, struct ucred *cred, struct thread *td, + int negnametimeo) { struct nfsmount *nmp; struct nfsnode *np; @@ -1101,6 +1114,7 @@ mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, vfs_getnewfsid(mp); nmp->nm_mountp = mp; mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF | MTX_DUPOK); + nmp->nm_negnametimeo = negnametimeo; nfs_decode_args(mp, nmp, argp, cred, td); diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 251966df56f7..3be823f1969b 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -220,10 +220,6 @@ int newnfs_directio_enable = 0; SYSCTL_INT(_vfs_newnfs, OID_AUTO, directio_enable, CTLFLAG_RW, &newnfs_directio_enable, 0, "Enable NFS directio"); -static int newnfs_neglookup_enable = 1; -SYSCTL_INT(_vfs_newnfs, OID_AUTO, neglookup_enable, CTLFLAG_RW, - &newnfs_neglookup_enable, 0, "Enable NFS negative lookup caching"); - /* * This sysctl allows other processes to mmap a file that has been opened * O_DIRECT by a process. In general, having processes mmap the file while @@ -702,11 +698,11 @@ nfs_close(struct vop_close_args *ap) * enabled. (What does this have to do with negative lookup * caching? Well nothing, except it was reported by the * same user that needed negative lookup caching and I wanted - * there to be a way to disable it via sysctl to see if it + * there to be a way to disable it to see if it * is the cause of some caching/coherency issue that might * crop up.) */ - if (newnfs_neglookup_enable == 0) + if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) np->n_attrstamp = 0; if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; @@ -996,6 +992,8 @@ nfs_lookup(struct vop_lookup_args *ap) struct thread *td = cnp->cn_thread; struct nfsfh *nfhp; struct nfsvattr dnfsva, nfsva; + struct vattr vattr; + time_t dmtime; *vpp = NULLVP; if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && @@ -1016,37 +1014,68 @@ nfs_lookup(struct vop_lookup_args *ap) if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) return (error); - if ((error = cache_lookup(dvp, vpp, cnp)) && - (error != ENOENT || newnfs_neglookup_enable != 0)) { - struct vattr vattr; - - if (error == ENOENT) { - if (!VOP_GETATTR(dvp, &vattr, cnp->cn_cred) && - vattr.va_mtime.tv_sec == np->n_dmtime) { - NFSINCRGLOBAL(newnfsstats.lookupcache_hits); - return (ENOENT); - } - cache_purge_negative(dvp); - np->n_dmtime = 0; - } else { - newvp = *vpp; - if (nfscl_nodeleg(newvp, 0) == 0 || - (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred) && - vattr.va_ctime.tv_sec==VTONFS(newvp)->n_ctime)) { - NFSINCRGLOBAL(newnfsstats.lookupcache_hits); - if (cnp->cn_nameiop != LOOKUP && - (flags & ISLASTCN)) - cnp->cn_flags |= SAVENAME; - return (0); - } - cache_purge(newvp); - if (dvp != newvp) - vput(newvp); - else - vrele(newvp); - *vpp = NULLVP; + error = cache_lookup(dvp, vpp, cnp); + if (error > 0 && error != ENOENT) + return (error); + if (error == -1) { + /* + * We only accept a positive hit in the cache if the + * change time of the file matches our cached copy. + * Otherwise, we discard the cache entry and fallback + * to doing a lookup RPC. + */ + newvp = *vpp; + if (nfscl_nodeleg(newvp, 0) == 0 || + (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred) + && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime)) { + NFSINCRGLOBAL(newnfsstats.lookupcache_hits); + if (cnp->cn_nameiop != LOOKUP && + (flags & ISLASTCN)) + cnp->cn_flags |= SAVENAME; + return (0); } + cache_purge(newvp); + if (dvp != newvp) + vput(newvp); + else + vrele(newvp); + *vpp = NULLVP; + } else if (error == ENOENT) { + if (dvp->v_iflag & VI_DOOMED) + return (ENOENT); + /* + * We only accept a negative hit in the cache if the + * modification time of the parent directory matches + * our cached copy. Otherwise, we discard all of the + * negative cache entries for this directory. We also + * only trust -ve cache entries for less than + * nm_negative_namecache_timeout seconds. + */ + if ((u_int)(ticks - np->n_dmtime_ticks) < + (nmp->nm_negnametimeo * hz) && + VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && + vattr.va_mtime.tv_sec == np->n_dmtime) { + NFSINCRGLOBAL(newnfsstats.lookupcache_hits); + return (ENOENT); + } + cache_purge_negative(dvp); + mtx_lock(&np->n_mtx); + np->n_dmtime = 0; + mtx_unlock(&np->n_mtx); } + + /* + * Cache the modification time of the parent directory in case + * the lookup fails and results in adding the first negative + * name cache entry for the directory. Since this is reading + * a single time_t, don't bother with locking. The + * modification time may be a bit stale, but it must be read + * before performing the lookup RPC to prevent a race where + * another lookup updates the timestamp on the directory after + * the lookup RPC has been performed on the server but before + * n_dmtime is set at the end of this function. + */ + dmtime = np->n_vattr.na_mtime.tv_sec; error = 0; newvp = NULLVP; NFSINCRGLOBAL(newnfsstats.lookupcache_misses); @@ -1056,29 +1085,60 @@ nfs_lookup(struct vop_lookup_args *ap) if (dattrflag) (void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1); if (error) { - if (newnfs_neglookup_enable != 0 && - error == ENOENT && (cnp->cn_flags & MAKEENTRY) && - cnp->cn_nameiop != CREATE) { - if (np->n_dmtime == 0) - np->n_dmtime = np->n_vattr.na_mtime.tv_sec; - cache_enter(dvp, NULL, cnp); - } if (newvp != NULLVP) { vput(newvp); *vpp = NULLVP; } - if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && - (flags & ISLASTCN) && error == ENOENT) { - if (mp->mnt_flag & MNT_RDONLY) - error = EROFS; - else - error = EJUSTRETURN; + + if (error != ENOENT) { + if (NFS_ISV4(dvp)) + error = nfscl_maperr(td, error, (uid_t)0, + (gid_t)0); + return (error); } - if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) + + /* The requested file was not found. */ + if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && + (flags & ISLASTCN)) { + /* + * XXX: UFS does a full VOP_ACCESS(dvp, + * VWRITE) here instead of just checking + * MNT_RDONLY. + */ + if (mp->mnt_flag & MNT_RDONLY) + return (EROFS); cnp->cn_flags |= SAVENAME; - if (NFS_ISV4(dvp)) - error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0); - return (error); + return (EJUSTRETURN); + } + + if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) { + /* + * Maintain n_dmtime as the modification time + * of the parent directory when the oldest -ve + * name cache entry for this directory was + * added. If a -ve cache entry has already + * been added with a newer modification time + * by a concurrent lookup, then don't bother + * adding a cache entry. The modification + * time of the directory might have changed + * due to the file this lookup failed to find + * being created. In that case a subsequent + * lookup would incorrectly use the entry + * added here instead of doing an extra + * lookup. + */ + mtx_lock(&np->n_mtx); + if (np->n_dmtime <= dmtime) { + if (np->n_dmtime == 0) { + np->n_dmtime = dmtime; + np->n_dmtime_ticks = ticks; + } + mtx_unlock(&np->n_mtx); + cache_enter(dvp, NULL, cnp); + } else + mtx_unlock(&np->n_mtx); + } + return (ENOENT); } /* @@ -1829,7 +1889,7 @@ nfs_link(struct vop_link_args *ap) * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ - if (newnfs_neglookup_enable != 0 && + if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY)) cache_enter(tdvp, vp, cnp); if (error && NFS_ISV4(vp)) @@ -1893,7 +1953,7 @@ nfs_symlink(struct vop_symlink_args *ap) * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ - if (newnfs_neglookup_enable != 0 && + if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY)) cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; @@ -1973,7 +2033,7 @@ nfs_mkdir(struct vop_mkdir_args *ap) * but if negative caching is enabled, then the system * must care about lookup caching hit rate, so... */ - if (newnfs_neglookup_enable != 0 && + if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 && (cnp->cn_flags & MAKEENTRY)) cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; diff --git a/sys/fs/nfsclient/nfsmount.h b/sys/fs/nfsclient/nfsmount.h index 2f0ada61dc7d..9fd93311e683 100644 --- a/sys/fs/nfsclient/nfsmount.h +++ b/sys/fs/nfsclient/nfsmount.h @@ -69,6 +69,7 @@ struct nfsmount { u_int64_t nm_maxfilesize; /* maximum file size */ int nm_tprintf_initial_delay; /* initial delay */ int nm_tprintf_delay; /* interval for messages */ + int nm_negnametimeo; /* timeout for -ve entries (sec) */ /* Newnfs additions */ struct nfsclclient *nm_clp; @@ -99,6 +100,10 @@ struct nfsmount { */ #define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data)) +#ifndef NFS_DEFAULT_NEGNAMETIMEO +#define NFS_DEFAULT_NEGNAMETIMEO 60 +#endif + #endif /* _KERNEL */ #endif /* _NFSCLIENT_NFSMOUNT_H_ */ diff --git a/sys/fs/nfsclient/nfsnode.h b/sys/fs/nfsclient/nfsnode.h index c0610c96cff5..3a5731a3bf1d 100644 --- a/sys/fs/nfsclient/nfsnode.h +++ b/sys/fs/nfsclient/nfsnode.h @@ -108,6 +108,7 @@ struct nfsnode { struct timespec n_mtime; /* Prev modify time. */ time_t n_ctime; /* Prev create time. */ time_t n_dmtime; /* Prev dir modify time. */ + int n_dmtime_ticks; /* Tick of -ve cache entry */ time_t n_expiry; /* Lease expiry time */ struct nfsfh *n_fhp; /* NFS File Handle */ struct vnode *n_vnode; /* associated vnode */