Close a race in NFS lookup processing that could result in stale name cache

entries on one client when a directory was renamed on another client.  The
root cause for the stale entry being trusted is that each per-vnode nfsnode
structure has a single 'n_ctime' timestamp used to validate positive name
cache entries.  However, if there are multiple entries for a single vnode,
they all share a single timestamp.  To fix this, extend the name cache
to allow filesystems to optionally store a timestamp value in each name
cache entry.  The NFS clients now fetch the timestamp associated with
each name cache entry and use that to validate cache hits instead of the
timestamps previously stored in the nfsnode.  Another part of the fix is
that the NFS clients now use timestamps from the post-op attributes of
RPCs when adding name cache entries rather than pulling the timestamps out
of the file's attribute cache.  The latter is subject to races with other
lookups updating the attribute cache concurrently.  Some more details:
- Add a variant of nfsm_postop_attr() to the old NFS client that can return
  a vattr structure with a copy of the post-op attributes.
- Handle lookups of "." as a special case in the NFS clients since the name
  cache does not store name cache entries for ".", so we cannot get a
  useful timestamp.  It didn't really make much sense to recheck the
  attributes on the the directory to validate the namecache hit for "."
  anyway.
- ABI compat shims for the name cache routines are present in this commit
  so that it is safe to MFC.

MFC after:	2 weeks
This commit is contained in:
John Baldwin 2012-01-20 20:02:01 +00:00
parent 462386c6b1
commit 5aefb4cbbf
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=230394
9 changed files with 172 additions and 139 deletions

View File

@ -3317,8 +3317,9 @@ nfsrpc_readdirplus(vnode_t vp, struct uio *uiop, nfsuint64 *cookiep,
ndp->ni_vp = newvp; ndp->ni_vp = newvp;
NFSCNHASH(cnp, HASHINIT); NFSCNHASH(cnp, HASHINIT);
if (cnp->cn_namelen <= NCHNAMLEN) { if (cnp->cn_namelen <= NCHNAMLEN) {
np->n_ctime = np->n_vattr.na_ctime; cache_enter_time(ndp->ni_dvp,
cache_enter(ndp->ni_dvp,ndp->ni_vp,cnp); ndp->ni_vp, cnp,
&nfsva.na_ctime);
} }
if (unlocknewvp) if (unlocknewvp)
vput(newvp); vput(newvp);

View File

@ -1016,12 +1016,12 @@ nfs_lookup(struct vop_lookup_args *ap)
struct vnode *newvp; struct vnode *newvp;
struct nfsmount *nmp; struct nfsmount *nmp;
struct nfsnode *np, *newnp; struct nfsnode *np, *newnp;
int error = 0, attrflag, dattrflag, ltype; int error = 0, attrflag, dattrflag, ltype, ncticks;
struct thread *td = cnp->cn_thread; struct thread *td = cnp->cn_thread;
struct nfsfh *nfhp; struct nfsfh *nfhp;
struct nfsvattr dnfsva, nfsva; struct nfsvattr dnfsva, nfsva;
struct vattr vattr; struct vattr vattr;
struct timespec dmtime; struct timespec nctime;
*vpp = NULLVP; *vpp = NULLVP;
if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) && if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
@ -1042,10 +1042,23 @@ nfs_lookup(struct vop_lookup_args *ap)
if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0)
return (error); return (error);
error = cache_lookup(dvp, vpp, cnp); error = cache_lookup_times(dvp, vpp, cnp, &nctime, &ncticks);
if (error > 0 && error != ENOENT) if (error > 0 && error != ENOENT)
return (error); return (error);
if (error == -1) { if (error == -1) {
/*
* Lookups of "." are special and always return the
* current directory. cache_lookup() already handles
* associated locking bookkeeping, etc.
*/
if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
/* XXX: Is this really correct? */
if (cnp->cn_nameiop != LOOKUP &&
(flags & ISLASTCN))
cnp->cn_flags |= SAVENAME;
return (0);
}
/* /*
* We only accept a positive hit in the cache if the * We only accept a positive hit in the cache if the
* change time of the file matches our cached copy. * change time of the file matches our cached copy.
@ -1073,7 +1086,7 @@ nfs_lookup(struct vop_lookup_args *ap)
} }
if (nfscl_nodeleg(newvp, 0) == 0 || if (nfscl_nodeleg(newvp, 0) == 0 ||
(VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && (VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
timespeccmp(&vattr.va_ctime, &newnp->n_ctime, ==))) { timespeccmp(&vattr.va_ctime, &nctime, ==))) {
NFSINCRGLOBAL(newnfsstats.lookupcache_hits); NFSINCRGLOBAL(newnfsstats.lookupcache_hits);
if (cnp->cn_nameiop != LOOKUP && if (cnp->cn_nameiop != LOOKUP &&
(flags & ISLASTCN)) (flags & ISLASTCN))
@ -1092,36 +1105,21 @@ nfs_lookup(struct vop_lookup_args *ap)
/* /*
* We only accept a negative hit in the cache if the * We only accept a negative hit in the cache if the
* modification time of the parent directory matches * modification time of the parent directory matches
* our cached copy. Otherwise, we discard all of the * the cached copy in the name cache entry.
* negative cache entries for this directory. We also * Otherwise, we discard all of the negative cache
* only trust -ve cache entries for less than * entries for this directory. We also only trust
* nm_negative_namecache_timeout seconds. * negative cache entries for up to nm_negnametimeo
* seconds.
*/ */
if ((u_int)(ticks - np->n_dmtime_ticks) < if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
(nmp->nm_negnametimeo * hz) &&
VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
timespeccmp(&vattr.va_mtime, &np->n_dmtime, ==)) { timespeccmp(&vattr.va_mtime, &nctime, ==)) {
NFSINCRGLOBAL(newnfsstats.lookupcache_hits); NFSINCRGLOBAL(newnfsstats.lookupcache_hits);
return (ENOENT); return (ENOENT);
} }
cache_purge_negative(dvp); cache_purge_negative(dvp);
mtx_lock(&np->n_mtx);
timespecclear(&np->n_dmtime);
mtx_unlock(&np->n_mtx);
} }
/*
* Cache the modification time of the parent directory in case
* the lookup fails and results in adding the first negative
* name cache entry for the directory. Since this is reading
* a single time_t, don't bother with locking. The
* modification time may be a bit stale, but it must be read
* before performing the lookup RPC to prevent a race where
* another lookup updates the timestamp on the directory after
* the lookup RPC has been performed on the server but before
* n_dmtime is set at the end of this function.
*/
dmtime = np->n_vattr.na_mtime;
error = 0; error = 0;
newvp = NULLVP; newvp = NULLVP;
NFSINCRGLOBAL(newnfsstats.lookupcache_misses); NFSINCRGLOBAL(newnfsstats.lookupcache_misses);
@ -1157,30 +1155,22 @@ nfs_lookup(struct vop_lookup_args *ap)
return (EJUSTRETURN); return (EJUSTRETURN);
} }
if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) { if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE &&
dattrflag) {
/* /*
* Maintain n_dmtime as the modification time * Cache the modification time of the parent
* of the parent directory when the oldest -ve * directory from the post-op attributes in
* name cache entry for this directory was * the name cache entry. The negative cache
* added. If a -ve cache entry has already * entry will be ignored once the directory
* been added with a newer modification time * has changed. Don't bother adding the entry
* by a concurrent lookup, then don't bother * if the directory has already changed.
* adding a cache entry. The modification
* time of the directory might have changed
* due to the file this lookup failed to find
* being created. In that case a subsequent
* lookup would incorrectly use the entry
* added here instead of doing an extra
* lookup.
*/ */
mtx_lock(&np->n_mtx); mtx_lock(&np->n_mtx);
if (timespeccmp(&np->n_dmtime, &dmtime, <=)) { if (timespeccmp(&np->n_vattr.na_mtime,
if (!timespecisset(&np->n_dmtime)) { &dnfsva.na_mtime, ==)) {
np->n_dmtime = dmtime;
np->n_dmtime_ticks = ticks;
}
mtx_unlock(&np->n_mtx); mtx_unlock(&np->n_mtx);
cache_enter(dvp, NULL, cnp); cache_enter_time(dvp, NULL, cnp,
&dnfsva.na_mtime);
} else } else
mtx_unlock(&np->n_mtx); mtx_unlock(&np->n_mtx);
} }
@ -1279,9 +1269,8 @@ nfs_lookup(struct vop_lookup_args *ap)
if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
cnp->cn_flags |= SAVENAME; cnp->cn_flags |= SAVENAME;
if ((cnp->cn_flags & MAKEENTRY) && if ((cnp->cn_flags & MAKEENTRY) &&
(cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && attrflag) {
np->n_ctime = np->n_vattr.na_vattr.va_ctime; cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime);
cache_enter(dvp, newvp, cnp);
} }
*vpp = newvp; *vpp = newvp;
return (0); return (0);

View File

@ -99,9 +99,6 @@ struct nfsnode {
time_t n_attrstamp; /* Attr. cache timestamp */ time_t n_attrstamp; /* Attr. cache timestamp */
struct nfs_accesscache n_accesscache[NFS_ACCESSCACHESIZE]; struct nfs_accesscache n_accesscache[NFS_ACCESSCACHESIZE];
struct timespec n_mtime; /* Prev modify time. */ struct timespec n_mtime; /* Prev modify time. */
struct timespec n_ctime; /* Prev create time. */
struct timespec n_dmtime; /* Prev dir modify time. */
int n_dmtime_ticks; /* Tick of -ve cache entry */
struct nfsfh *n_fhp; /* NFS File Handle */ struct nfsfh *n_fhp; /* NFS File Handle */
struct vnode *n_vnode; /* associated vnode */ struct vnode *n_vnode; /* associated vnode */
struct vnode *n_dvp; /* parent vnode */ struct vnode *n_dvp; /* parent vnode */

View File

@ -97,6 +97,8 @@ struct namecache {
TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
struct vnode *nc_dvp; /* vnode of parent of name */ struct vnode *nc_dvp; /* vnode of parent of name */
struct vnode *nc_vp; /* vnode the name refers to */ struct vnode *nc_vp; /* vnode the name refers to */
struct timespec nc_time; /* timespec provided by fs */
int nc_ticks; /* ticks value when entry was added */
u_char nc_flag; /* flag bits */ u_char nc_flag; /* flag bits */
u_char nc_nlen; /* length of name */ u_char nc_nlen; /* length of name */
char nc_name[0]; /* segment name + nul */ char nc_name[0]; /* segment name + nul */
@ -394,10 +396,12 @@ cache_zap(ncp)
*/ */
int int
cache_lookup(dvp, vpp, cnp) cache_lookup_times(dvp, vpp, cnp, tsp, ticksp)
struct vnode *dvp; struct vnode *dvp;
struct vnode **vpp; struct vnode **vpp;
struct componentname *cnp; struct componentname *cnp;
struct timespec *tsp;
int *ticksp;
{ {
struct namecache *ncp; struct namecache *ncp;
uint32_t hash; uint32_t hash;
@ -422,6 +426,10 @@ cache_lookup(dvp, vpp, cnp)
dothits++; dothits++;
SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".", SDT_PROBE(vfs, namecache, lookup, hit, dvp, ".",
*vpp, 0, 0); *vpp, 0, 0);
if (tsp != NULL)
timespecclear(tsp);
if (ticksp != NULL)
*ticksp = ticks;
goto success; goto success;
} }
if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
@ -440,19 +448,22 @@ cache_lookup(dvp, vpp, cnp)
CACHE_WUNLOCK(); CACHE_WUNLOCK();
return (0); return (0);
} }
if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT) ncp = dvp->v_cache_dd;
*vpp = dvp->v_cache_dd->nc_vp; if (ncp->nc_flag & NCF_ISDOTDOT)
*vpp = ncp->nc_vp;
else else
*vpp = dvp->v_cache_dd->nc_dvp; *vpp = ncp->nc_dvp;
/* Return failure if negative entry was found. */ /* Return failure if negative entry was found. */
if (*vpp == NULL) { if (*vpp == NULL)
ncp = dvp->v_cache_dd;
goto negative_success; goto negative_success;
}
CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..", CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..",
dvp, cnp->cn_nameptr, *vpp); dvp, cnp->cn_nameptr, *vpp);
SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..", SDT_PROBE(vfs, namecache, lookup, hit, dvp, "..",
*vpp, 0, 0); *vpp, 0, 0);
if (tsp != NULL)
*tsp = ncp->nc_time;
if (ticksp != NULL)
*ticksp = ncp->nc_ticks;
goto success; goto success;
} }
} }
@ -499,6 +510,10 @@ cache_lookup(dvp, vpp, cnp)
dvp, cnp->cn_nameptr, *vpp, ncp); dvp, cnp->cn_nameptr, *vpp, ncp);
SDT_PROBE(vfs, namecache, lookup, hit, dvp, ncp->nc_name, SDT_PROBE(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
*vpp, 0, 0); *vpp, 0, 0);
if (tsp != NULL)
*tsp = ncp->nc_time;
if (ticksp != NULL)
*ticksp = ncp->nc_ticks;
goto success; goto success;
} }
@ -530,6 +545,10 @@ cache_lookup(dvp, vpp, cnp)
cnp->cn_flags |= ISWHITEOUT; cnp->cn_flags |= ISWHITEOUT;
SDT_PROBE(vfs, namecache, lookup, hit_negative, dvp, ncp->nc_name, SDT_PROBE(vfs, namecache, lookup, hit_negative, dvp, ncp->nc_name,
0, 0, 0); 0, 0, 0);
if (tsp != NULL)
*tsp = ncp->nc_time;
if (ticksp != NULL)
*ticksp = ncp->nc_ticks;
CACHE_WUNLOCK(); CACHE_WUNLOCK();
return (ENOENT); return (ENOENT);
@ -616,10 +635,11 @@ cache_lookup(dvp, vpp, cnp)
* Add an entry to the cache. * Add an entry to the cache.
*/ */
void void
cache_enter(dvp, vp, cnp) cache_enter_time(dvp, vp, cnp, tsp)
struct vnode *dvp; struct vnode *dvp;
struct vnode *vp; struct vnode *vp;
struct componentname *cnp; struct componentname *cnp;
struct timespec *tsp;
{ {
struct namecache *ncp, *n2; struct namecache *ncp, *n2;
struct nchashhead *ncpp; struct nchashhead *ncpp;
@ -692,6 +712,11 @@ cache_enter(dvp, vp, cnp)
ncp->nc_vp = vp; ncp->nc_vp = vp;
ncp->nc_dvp = dvp; ncp->nc_dvp = dvp;
ncp->nc_flag = flag; ncp->nc_flag = flag;
if (tsp != NULL)
ncp->nc_time = *tsp;
else
timespecclear(&ncp->nc_time);
ncp->nc_ticks = ticks;
len = ncp->nc_nlen = cnp->cn_namelen; len = ncp->nc_nlen = cnp->cn_namelen;
hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT);
strlcpy(ncp->nc_name, cnp->cn_nameptr, len + 1); strlcpy(ncp->nc_name, cnp->cn_nameptr, len + 1);
@ -708,6 +733,8 @@ cache_enter(dvp, vp, cnp)
if (n2->nc_dvp == dvp && if (n2->nc_dvp == dvp &&
n2->nc_nlen == cnp->cn_namelen && n2->nc_nlen == cnp->cn_namelen &&
!bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) { !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
n2->nc_time = ncp->nc_time;
n2->nc_ticks = ncp->nc_ticks;
CACHE_WUNLOCK(); CACHE_WUNLOCK();
cache_free(ncp); cache_free(ncp);
return; return;
@ -1280,6 +1307,29 @@ vn_commname(struct vnode *vp, char *buf, u_int buflen)
return (0); return (0);
} }
/* ABI compat shims for old kernel modules. */
#undef cache_enter
#undef cache_lookup
void cache_enter(struct vnode *dvp, struct vnode *vp,
struct componentname *cnp);
int cache_lookup(struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp);
void
cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
{
cache_enter_time(dvp, vp, cnp, NULL);
}
int
cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
{
return (cache_lookup_times(dvp, vpp, cnp, NULL, NULL));
}
/* /*
* This function updates path string to vnode's full global path * This function updates path string to vnode's full global path
* and checks the size of the new path string against the pathlen argument. * and checks the size of the new path string against the pathlen argument.

View File

@ -978,8 +978,8 @@ nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md,
} }
int int
nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md, nfsm_postop_attr_xx(struct vnode **v, int *f, struct vattr *va,
caddr_t *dpos) struct mbuf **md, caddr_t *dpos)
{ {
u_int32_t *tl; u_int32_t *tl;
int t1; int t1;
@ -990,7 +990,7 @@ nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md,
return EBADRPC; return EBADRPC;
*f = fxdr_unsigned(int, *tl); *f = fxdr_unsigned(int, *tl);
if (*f != 0) { if (*f != 0) {
t1 = nfs_loadattrcache(&ttvp, md, dpos, NULL, 1); t1 = nfs_loadattrcache(&ttvp, md, dpos, va, 1);
if (t1 != 0) { if (t1 != 0) {
*f = 0; *f = 0;
return t1; return t1;
@ -1020,7 +1020,7 @@ nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, caddr_t *dpos)
VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3))); VTONFS(*v)->n_mtime.tv_nsec == fxdr_unsigned(u_int32_t, *(tl + 3)));
mtx_unlock(&(VTONFS(*v))->n_mtx); mtx_unlock(&(VTONFS(*v))->n_mtx);
} }
t1 = nfsm_postop_attr_xx(v, &ttattrf, md, dpos); t1 = nfsm_postop_attr_xx(v, &ttattrf, NULL, md, dpos);
if (t1) if (t1)
return t1; return t1;
if (*f) if (*f)

View File

@ -913,7 +913,7 @@ nfs_lookup(struct vop_lookup_args *ap)
struct vnode **vpp = ap->a_vpp; struct vnode **vpp = ap->a_vpp;
struct mount *mp = dvp->v_mount; struct mount *mp = dvp->v_mount;
struct vattr vattr; struct vattr vattr;
struct timespec dmtime; struct timespec nctime;
int flags = cnp->cn_flags; int flags = cnp->cn_flags;
struct vnode *newvp; struct vnode *newvp;
struct nfsmount *nmp; struct nfsmount *nmp;
@ -922,7 +922,7 @@ nfs_lookup(struct vop_lookup_args *ap)
long len; long len;
nfsfh_t *fhp; nfsfh_t *fhp;
struct nfsnode *np, *newnp; struct nfsnode *np, *newnp;
int error = 0, attrflag, fhsize, ltype; int error = 0, attrflag, dattrflag, fhsize, ltype, ncticks;
int v3 = NFS_ISV3(dvp); int v3 = NFS_ISV3(dvp);
struct thread *td = cnp->cn_thread; struct thread *td = cnp->cn_thread;
@ -938,10 +938,23 @@ nfs_lookup(struct vop_lookup_args *ap)
*vpp = NULLVP; *vpp = NULLVP;
return (error); return (error);
} }
error = cache_lookup(dvp, vpp, cnp); error = cache_lookup_times(dvp, vpp, cnp, &nctime, &ncticks);
if (error > 0 && error != ENOENT) if (error > 0 && error != ENOENT)
return (error); return (error);
if (error == -1) { if (error == -1) {
/*
* Lookups of "." are special and always return the
* current directory. cache_lookup() already handles
* associated locking bookkeeping, etc.
*/
if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
/* XXX: Is this really correct? */
if (cnp->cn_nameiop != LOOKUP &&
(flags & ISLASTCN))
cnp->cn_flags |= SAVENAME;
return (0);
}
/* /*
* We only accept a positive hit in the cache if the * We only accept a positive hit in the cache if the
* change time of the file matches our cached copy. * change time of the file matches our cached copy.
@ -968,7 +981,7 @@ nfs_lookup(struct vop_lookup_args *ap)
mtx_unlock(&newnp->n_mtx); mtx_unlock(&newnp->n_mtx);
} }
if (VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 && if (VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
timespeccmp(&vattr.va_ctime, &newnp->n_ctime, ==)) { timespeccmp(&vattr.va_ctime, &nctime, ==)) {
nfsstats.lookupcache_hits++; nfsstats.lookupcache_hits++;
if (cnp->cn_nameiop != LOOKUP && if (cnp->cn_nameiop != LOOKUP &&
(flags & ISLASTCN)) (flags & ISLASTCN))
@ -987,36 +1000,22 @@ nfs_lookup(struct vop_lookup_args *ap)
/* /*
* We only accept a negative hit in the cache if the * We only accept a negative hit in the cache if the
* modification time of the parent directory matches * modification time of the parent directory matches
* our cached copy. Otherwise, we discard all of the * the cached copy in the name cache entry.
* negative cache entries for this directory. We also * Otherwise, we discard all of the negative cache
* only trust -ve cache entries for less than * entries for this directory. We also only trust
* nm_negative_namecache_timeout seconds. * negative cache entries for up to nm_negnametimeo
* seconds.
*/ */
if ((u_int)(ticks - np->n_dmtime_ticks) < if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
(nmp->nm_negnametimeo * hz) &&
VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 && VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
timespeccmp(&vattr.va_mtime, &np->n_dmtime, ==)) { timespeccmp(&vattr.va_mtime, &nctime, ==)) {
nfsstats.lookupcache_hits++; nfsstats.lookupcache_hits++;
return (ENOENT); return (ENOENT);
} }
cache_purge_negative(dvp); cache_purge_negative(dvp);
mtx_lock(&np->n_mtx);
timespecclear(&np->n_dmtime);
mtx_unlock(&np->n_mtx);
} }
/* attrflag = dattrflag = 0;
* Cache the modification time of the parent directory in case
* the lookup fails and results in adding the first negative
* name cache entry for the directory. Since this is reading
* a single time_t, don't bother with locking. The
* modification time may be a bit stale, but it must be read
* before performing the lookup RPC to prevent a race where
* another lookup updates the timestamp on the directory after
* the lookup RPC has been performed on the server but before
* n_dmtime is set at the end of this function.
*/
dmtime = np->n_vattr.va_mtime;
error = 0; error = 0;
newvp = NULLVP; newvp = NULLVP;
nfsstats.lookupcache_misses++; nfsstats.lookupcache_misses++;
@ -1031,7 +1030,7 @@ nfs_lookup(struct vop_lookup_args *ap)
nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred); nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred);
if (error) { if (error) {
if (v3) { if (v3) {
nfsm_postop_attr(dvp, attrflag); nfsm_postop_attr_va(dvp, dattrflag, &vattr);
m_freem(mrep); m_freem(mrep);
} }
goto nfsmout; goto nfsmout;
@ -1127,16 +1126,17 @@ nfs_lookup(struct vop_lookup_args *ap)
} }
} }
if (v3) { if (v3) {
nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr_va(newvp, attrflag, &vattr);
nfsm_postop_attr(dvp, attrflag); nfsm_postop_attr(dvp, dattrflag);
} else } else {
nfsm_loadattr(newvp, NULL); nfsm_loadattr(newvp, &vattr);
attrflag = 1;
}
if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
cnp->cn_flags |= SAVENAME; cnp->cn_flags |= SAVENAME;
if ((cnp->cn_flags & MAKEENTRY) && if ((cnp->cn_flags & MAKEENTRY) &&
(cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) && attrflag) {
np->n_ctime = np->n_vattr.va_ctime; cache_enter_time(dvp, newvp, cnp, &vattr.va_ctime);
cache_enter(dvp, newvp, cnp);
} }
*vpp = newvp; *vpp = newvp;
m_freem(mrep); m_freem(mrep);
@ -1164,30 +1164,22 @@ nfs_lookup(struct vop_lookup_args *ap)
return (EJUSTRETURN); return (EJUSTRETURN);
} }
if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE) { if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE &&
dattrflag) {
/* /*
* Maintain n_dmtime as the modification time * Cache the modification time of the parent
* of the parent directory when the oldest -ve * directory from the post-op attributes in
* name cache entry for this directory was * the name cache entry. The negative cache
* added. If a -ve cache entry has already * entry will be ignored once the directory
* been added with a newer modification time * has changed. Don't bother adding the entry
* by a concurrent lookup, then don't bother * if the directory has already changed.
* adding a cache entry. The modification
* time of the directory might have changed
* due to the file this lookup failed to find
* being created. In that case a subsequent
* lookup would incorrectly use the entry
* added here instead of doing an extra
* lookup.
*/ */
mtx_lock(&np->n_mtx); mtx_lock(&np->n_mtx);
if (timespeccmp(&np->n_dmtime, &dmtime, <=)) { if (timespeccmp(&np->n_vattr.va_mtime,
if (!timespecisset(&np->n_dmtime)) { &vattr.va_mtime, ==)) {
np->n_dmtime = dmtime;
np->n_dmtime_ticks = ticks;
}
mtx_unlock(&np->n_mtx); mtx_unlock(&np->n_mtx);
cache_enter(dvp, NULL, cnp); cache_enter_time(dvp, NULL, cnp,
&vattr.va_mtime);
} else } else
mtx_unlock(&np->n_mtx); mtx_unlock(&np->n_mtx);
} }
@ -2473,6 +2465,7 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
nfsuint64 cookie; nfsuint64 cookie;
struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsmount *nmp = VFSTONFS(vp->v_mount);
struct nfsnode *dnp = VTONFS(vp), *np; struct nfsnode *dnp = VTONFS(vp), *np;
struct vattr vattr;
nfsfh_t *fhp; nfsfh_t *fhp;
u_quad_t fileno; u_quad_t fileno;
int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
@ -2653,18 +2646,13 @@ nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
dpos = dpossav1; dpos = dpossav1;
mdsav2 = md; mdsav2 = md;
md = mdsav1; md = mdsav1;
nfsm_loadattr(newvp, NULL); nfsm_loadattr(newvp, &vattr);
dpos = dpossav2; dpos = dpossav2;
md = mdsav2; md = mdsav2;
dp->d_type = dp->d_type = IFTODT(VTTOIF(vattr.va_type));
IFTODT(VTTOIF(np->n_vattr.va_type));
ndp->ni_vp = newvp; ndp->ni_vp = newvp;
/* cache_enter_time(ndp->ni_dvp, ndp->ni_vp, cnp,
* Update n_ctime so subsequent lookup &vattr.va_ctime);
* doesn't purge entry.
*/
np->n_ctime = np->n_vattr.va_ctime;
cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
} }
} else { } else {
/* Just skip over the file handle */ /* Just skip over the file handle */

View File

@ -152,8 +152,8 @@ int nfsm_getfh_xx(nfsfh_t **f, int *s, int v3, struct mbuf **md,
caddr_t *dpos); caddr_t *dpos);
int nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md, int nfsm_loadattr_xx(struct vnode **v, struct vattr *va, struct mbuf **md,
caddr_t *dpos); caddr_t *dpos);
int nfsm_postop_attr_xx(struct vnode **v, int *f, struct mbuf **md, int nfsm_postop_attr_xx(struct vnode **v, int *f, struct vattr *va,
caddr_t *dpos); struct mbuf **md, caddr_t *dpos);
int nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md, int nfsm_wcc_data_xx(struct vnode **v, int *f, struct mbuf **md,
caddr_t *dpos); caddr_t *dpos);
@ -181,7 +181,14 @@ do { \
#define nfsm_postop_attr(v, f) \ #define nfsm_postop_attr(v, f) \
do { \ do { \
int32_t t1; \ int32_t t1; \
t1 = nfsm_postop_attr_xx(&v, &f, &md, &dpos); \ t1 = nfsm_postop_attr_xx(&v, &f, NULL, &md, &dpos); \
nfsm_dcheck(t1, mrep); \
} while (0)
#define nfsm_postop_attr_va(v, f, va) \
do { \
int32_t t1; \
t1 = nfsm_postop_attr_xx(&v, &f, va, &md, &dpos); \
nfsm_dcheck(t1, mrep); \ nfsm_dcheck(t1, mrep); \
} while (0) } while (0)

View File

@ -104,9 +104,6 @@ struct nfsnode {
time_t n_attrstamp; /* Attr. cache timestamp */ time_t n_attrstamp; /* Attr. cache timestamp */
struct nfs_accesscache n_accesscache[NFS_ACCESSCACHESIZE]; struct nfs_accesscache n_accesscache[NFS_ACCESSCACHESIZE];
struct timespec n_mtime; /* Prev modify time. */ struct timespec n_mtime; /* Prev modify time. */
struct timespec n_ctime; /* Prev create time. */
struct timespec n_dmtime; /* Prev dir modify time. */
int n_dmtime_ticks; /* Tick of -ve cache entry */
nfsfh_t *n_fhp; /* NFS File Handle */ nfsfh_t *n_fhp; /* NFS File Handle */
struct vnode *n_vnode; /* associated vnode */ struct vnode *n_vnode; /* associated vnode */
struct vnode *n_dvp; /* parent vnode */ struct vnode *n_dvp; /* parent vnode */

View File

@ -578,10 +578,14 @@ struct vattr;
struct vnode; struct vnode;
/* cache_* may belong in namei.h. */ /* cache_* may belong in namei.h. */
void cache_enter(struct vnode *dvp, struct vnode *vp, #define cache_enter(dvp, vp, cnp) \
struct componentname *cnp); cache_enter_time(dvp, vp, cnp, NULL)
int cache_lookup(struct vnode *dvp, struct vnode **vpp, void cache_enter_time(struct vnode *dvp, struct vnode *vp,
struct componentname *cnp); struct componentname *cnp, struct timespec *tsp);
#define cache_lookup(dvp, vpp, cnp) \
cache_lookup_times(dvp, vpp, cnp, NULL, NULL)
int cache_lookup_times(struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp, struct timespec *tsp, int *ticksp);
void cache_purge(struct vnode *vp); void cache_purge(struct vnode *vp);
void cache_purge_negative(struct vnode *vp); void cache_purge_negative(struct vnode *vp);
void cache_purgevfs(struct mount *mp); void cache_purgevfs(struct mount *mp);