vfs: prevent avoidable evictions on mkdir of existing directories

mkdir -p /foo/bar/baz will mkdir each path component and ignore EEXIST.

The NOCACHE lookup will make the namecache unnecessarily evict the existing entry,
and then fallback to the fs lookup routine eventually leading namei to return an
error as the directory is already there.

For invocations like mkdir -p /usr/obj/usr/src/sys/GENERIC/modules this triggers
fallbacks to the slowpath for concurrently executing lookups.

Tested by:	pho
Discussed with:	kib
This commit is contained in:
Mateusz Guzik 2020-10-22 19:28:12 +00:00
parent ad5e10ec0c
commit c7520caa4f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=366950
6 changed files with 63 additions and 6 deletions

View File

@ -1676,7 +1676,8 @@ cache_lookup_fallback(struct vnode *dvp, struct vnode **vpp, struct componentnam
int error;
bool whiteout;
MPASS((cnp->cn_flags & (MAKEENTRY | ISDOTDOT)) == MAKEENTRY);
MPASS((cnp->cn_flags & ISDOTDOT) == 0);
MPASS((cnp->cn_flags & (MAKEENTRY | NC_KEEPPOSENTRY)) != 0);
retry:
hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
@ -1768,7 +1769,7 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
MPASS((cnp->cn_flags & ISDOTDOT) == 0);
if ((cnp->cn_flags & MAKEENTRY) == 0) {
if ((cnp->cn_flags & (MAKEENTRY | NC_KEEPPOSENTRY)) == 0) {
cache_remove_cnp(dvp, cnp);
return (0);
}
@ -2596,6 +2597,35 @@ cache_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
}
}
#ifdef INVARIANTS
/*
* Validate that if an entry exists it matches.
*/
void
cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
{
struct namecache *ncp;
struct mtx *blp;
uint32_t hash;
hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
if (CK_SLIST_EMPTY(NCHHASH(hash)))
return;
blp = HASH2BUCKETLOCK(hash);
mtx_lock(blp);
CK_SLIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
!bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) {
if (ncp->nc_vp != vp)
panic("%s: mismatch (%p != %p); ncp %p [%s] dvp %p vp %p\n",
__func__, vp, ncp->nc_vp, ncp, ncp->nc_name, ncp->nc_dvp,
ncp->nc_vp);
}
}
mtx_unlock(blp);
}
#endif
/*
* Flush all entries referencing a particular filesystem.
*/

View File

@ -5587,6 +5587,18 @@ vop_mkdir_post(void *ap, int rc)
VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK);
}
#ifdef DEBUG_VFS_LOCKS
void
vop_mkdir_debugpost(void *ap, int rc)
{
struct vop_mkdir_args *a;
a = ap;
if (!rc)
cache_validate(a->a_dvp, *a->a_vpp, a->a_cnp);
}
#endif
void
vop_mknod_pre(void *ap)
{

View File

@ -3758,8 +3758,8 @@ kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg,
restart:
bwillwrite();
NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
NOCACHE, segflg, path, fd, &cap_mkdirat_rights,
td);
NC_NOMAKEENTRY | NC_KEEPPOSENTRY, segflg, path, fd,
&cap_mkdirat_rights, td);
nd.ni_cnd.cn_flags |= WILLBEDIR;
if ((error = namei(&nd)) != 0)
return (error);

View File

@ -336,6 +336,7 @@ vop_rename {
%% mkdir vpp - E -
%! mkdir pre vop_mkdir_pre
%! mkdir post vop_mkdir_post
%! mkdir debugpost vop_mkdir_debugpost
vop_mkdir {
IN struct vnode *dvp;

View File

@ -125,16 +125,19 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
/*
* namei operational modifier flags, stored in ni_cnd.flags
*/
#define NC_NOMAKEENTRY 0x0001 /* name must not be added to cache */
#define NC_KEEPPOSENTRY 0x0002 /* don't evict a positive entry */
#define NOCACHE NC_NOMAKEENTRY /* for compatibility with older code */
#define LOCKLEAF 0x0004 /* lock vnode on return */
#define LOCKPARENT 0x0008 /* want parent vnode returned locked */
#define WANTPARENT 0x0010 /* want parent vnode returned unlocked */
#define NOCACHE 0x0020 /* name must not be left in cache */
/* UNUSED 0x0020 */
#define FOLLOW 0x0040 /* follow symbolic links */
#define BENEATH 0x0080 /* No escape from the start dir */
#define LOCKSHARED 0x0100 /* Shared lock leaf */
#define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */
#define RBENEATH 0x100000000ULL /* No escape, even tmp, from start dir */
#define MODMASK 0xf000001fcULL /* mask of operational modifiers */
#define MODMASK 0xf000001ffULL /* mask of operational modifiers */
/*
* Namei parameter descriptors.
*

View File

@ -644,6 +644,15 @@ void cache_purge_negative(struct vnode *vp);
void cache_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp);
void cache_purgevfs(struct mount *mp);
#ifdef INVARIANTS
void cache_validate(struct vnode *dvp, struct vnode *vp,
struct componentname *cnp);
#else
static inline void
cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
{
}
#endif
int change_dir(struct vnode *vp, struct thread *td);
void cvtstat(struct stat *st, struct ostat *ost);
void freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb);
@ -880,6 +889,7 @@ void vop_lock_debugpost(void *a, int rc);
void vop_unlock_debugpre(void *a);
void vop_need_inactive_debugpre(void *a);
void vop_need_inactive_debugpost(void *a, int rc);
void vop_mkdir_debugpost(void *a, int rc);
#else
#define vop_fplookup_vexec_debugpre(x) do { } while (0)
#define vop_fplookup_vexec_debugpost(x, y) do { } while (0)
@ -889,6 +899,7 @@ void vop_need_inactive_debugpost(void *a, int rc);
#define vop_unlock_debugpre(x) do { } while (0)
#define vop_need_inactive_debugpre(x) do { } while (0)
#define vop_need_inactive_debugpost(x, y) do { } while (0)
#define vop_mkdir_debugpost(x, y) do { } while (0)
#endif
void vop_rename_fail(struct vop_rename_args *ap);