vfs: prevent avoidable evictions on mkdir of existing directories
mkdir -p /foo/bar/baz will mkdir each path component and ignore EEXIST. The NOCACHE lookup will make the namecache unnecessarily evict the existing entry, and then fallback to the fs lookup routine eventually leading namei to return an error as the directory is already there. For invocations like mkdir -p /usr/obj/usr/src/sys/GENERIC/modules this triggers fallbacks to the slowpath for concurrently executing lookups. Tested by: pho Discussed with: kib
This commit is contained in:
parent
ad5e10ec0c
commit
c7520caa4f
@ -1676,7 +1676,8 @@ cache_lookup_fallback(struct vnode *dvp, struct vnode **vpp, struct componentnam
|
||||
int error;
|
||||
bool whiteout;
|
||||
|
||||
MPASS((cnp->cn_flags & (MAKEENTRY | ISDOTDOT)) == MAKEENTRY);
|
||||
MPASS((cnp->cn_flags & ISDOTDOT) == 0);
|
||||
MPASS((cnp->cn_flags & (MAKEENTRY | NC_KEEPPOSENTRY)) != 0);
|
||||
|
||||
retry:
|
||||
hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
|
||||
@ -1768,7 +1769,7 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
|
||||
|
||||
MPASS((cnp->cn_flags & ISDOTDOT) == 0);
|
||||
|
||||
if ((cnp->cn_flags & MAKEENTRY) == 0) {
|
||||
if ((cnp->cn_flags & (MAKEENTRY | NC_KEEPPOSENTRY)) == 0) {
|
||||
cache_remove_cnp(dvp, cnp);
|
||||
return (0);
|
||||
}
|
||||
@ -2596,6 +2597,35 @@ cache_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef INVARIANTS
|
||||
/*
|
||||
* Validate that if an entry exists it matches.
|
||||
*/
|
||||
void
|
||||
cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
|
||||
{
|
||||
struct namecache *ncp;
|
||||
struct mtx *blp;
|
||||
uint32_t hash;
|
||||
|
||||
hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
|
||||
if (CK_SLIST_EMPTY(NCHHASH(hash)))
|
||||
return;
|
||||
blp = HASH2BUCKETLOCK(hash);
|
||||
mtx_lock(blp);
|
||||
CK_SLIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
|
||||
if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
|
||||
!bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) {
|
||||
if (ncp->nc_vp != vp)
|
||||
panic("%s: mismatch (%p != %p); ncp %p [%s] dvp %p vp %p\n",
|
||||
__func__, vp, ncp->nc_vp, ncp, ncp->nc_name, ncp->nc_dvp,
|
||||
ncp->nc_vp);
|
||||
}
|
||||
}
|
||||
mtx_unlock(blp);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Flush all entries referencing a particular filesystem.
|
||||
*/
|
||||
|
@ -5587,6 +5587,18 @@ vop_mkdir_post(void *ap, int rc)
|
||||
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK);
|
||||
}
|
||||
|
||||
#ifdef DEBUG_VFS_LOCKS
|
||||
void
|
||||
vop_mkdir_debugpost(void *ap, int rc)
|
||||
{
|
||||
struct vop_mkdir_args *a;
|
||||
|
||||
a = ap;
|
||||
if (!rc)
|
||||
cache_validate(a->a_dvp, *a->a_vpp, a->a_cnp);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
vop_mknod_pre(void *ap)
|
||||
{
|
||||
|
@ -3758,8 +3758,8 @@ kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg,
|
||||
restart:
|
||||
bwillwrite();
|
||||
NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
|
||||
NOCACHE, segflg, path, fd, &cap_mkdirat_rights,
|
||||
td);
|
||||
NC_NOMAKEENTRY | NC_KEEPPOSENTRY, segflg, path, fd,
|
||||
&cap_mkdirat_rights, td);
|
||||
nd.ni_cnd.cn_flags |= WILLBEDIR;
|
||||
if ((error = namei(&nd)) != 0)
|
||||
return (error);
|
||||
|
@ -336,6 +336,7 @@ vop_rename {
|
||||
%% mkdir vpp - E -
|
||||
%! mkdir pre vop_mkdir_pre
|
||||
%! mkdir post vop_mkdir_post
|
||||
%! mkdir debugpost vop_mkdir_debugpost
|
||||
|
||||
vop_mkdir {
|
||||
IN struct vnode *dvp;
|
||||
|
@ -125,16 +125,19 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
|
||||
/*
|
||||
* namei operational modifier flags, stored in ni_cnd.flags
|
||||
*/
|
||||
#define NC_NOMAKEENTRY 0x0001 /* name must not be added to cache */
|
||||
#define NC_KEEPPOSENTRY 0x0002 /* don't evict a positive entry */
|
||||
#define NOCACHE NC_NOMAKEENTRY /* for compatibility with older code */
|
||||
#define LOCKLEAF 0x0004 /* lock vnode on return */
|
||||
#define LOCKPARENT 0x0008 /* want parent vnode returned locked */
|
||||
#define WANTPARENT 0x0010 /* want parent vnode returned unlocked */
|
||||
#define NOCACHE 0x0020 /* name must not be left in cache */
|
||||
/* UNUSED 0x0020 */
|
||||
#define FOLLOW 0x0040 /* follow symbolic links */
|
||||
#define BENEATH 0x0080 /* No escape from the start dir */
|
||||
#define LOCKSHARED 0x0100 /* Shared lock leaf */
|
||||
#define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */
|
||||
#define RBENEATH 0x100000000ULL /* No escape, even tmp, from start dir */
|
||||
#define MODMASK 0xf000001fcULL /* mask of operational modifiers */
|
||||
#define MODMASK 0xf000001ffULL /* mask of operational modifiers */
|
||||
/*
|
||||
* Namei parameter descriptors.
|
||||
*
|
||||
|
@ -644,6 +644,15 @@ void cache_purge_negative(struct vnode *vp);
|
||||
void cache_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
|
||||
struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp);
|
||||
void cache_purgevfs(struct mount *mp);
|
||||
#ifdef INVARIANTS
|
||||
void cache_validate(struct vnode *dvp, struct vnode *vp,
|
||||
struct componentname *cnp);
|
||||
#else
|
||||
static inline void
|
||||
cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
int change_dir(struct vnode *vp, struct thread *td);
|
||||
void cvtstat(struct stat *st, struct ostat *ost);
|
||||
void freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb);
|
||||
@ -880,6 +889,7 @@ void vop_lock_debugpost(void *a, int rc);
|
||||
void vop_unlock_debugpre(void *a);
|
||||
void vop_need_inactive_debugpre(void *a);
|
||||
void vop_need_inactive_debugpost(void *a, int rc);
|
||||
void vop_mkdir_debugpost(void *a, int rc);
|
||||
#else
|
||||
#define vop_fplookup_vexec_debugpre(x) do { } while (0)
|
||||
#define vop_fplookup_vexec_debugpost(x, y) do { } while (0)
|
||||
@ -889,6 +899,7 @@ void vop_need_inactive_debugpost(void *a, int rc);
|
||||
#define vop_unlock_debugpre(x) do { } while (0)
|
||||
#define vop_need_inactive_debugpre(x) do { } while (0)
|
||||
#define vop_need_inactive_debugpost(x, y) do { } while (0)
|
||||
#define vop_mkdir_debugpost(x, y) do { } while (0)
|
||||
#endif
|
||||
|
||||
void vop_rename_fail(struct vop_rename_args *ap);
|
||||
|
Loading…
Reference in New Issue
Block a user