diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index da15237ab3c6..79b2a370438a 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -56,11 +56,13 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include #include +#include #ifdef KTRACE #include #endif @@ -104,7 +106,7 @@ SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *", */ struct namecache { - LIST_ENTRY(namecache) nc_hash; /* hash chain */ + CK_LIST_ENTRY(namecache) nc_hash;/* hash chain */ LIST_ENTRY(namecache) nc_src; /* source vnode list */ TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ struct vnode *nc_dvp; /* vnode of parent of name */ @@ -143,6 +145,25 @@ struct namecache_ts { #define NCF_DVDROP 0x10 #define NCF_NEGATIVE 0x20 #define NCF_HOTNEGATIVE 0x40 +#define NCF_INVALID 0x80 + +static bool +cache_ncp_invalid(struct namecache *ncp) +{ + + atomic_thread_fence_acq(); + return ((ncp->nc_flag & NCF_INVALID) != 0); +} + +static void +cache_ncp_invalidate(struct namecache *ncp) +{ + + atomic_thread_fence_rel(); + KASSERT((ncp->nc_flag & NCF_INVALID) == 0, + ("%s: entry %p already invalid", __func__, ncp)); + ncp->nc_flag |= NCF_INVALID; +} /* * Name caching works as follows: @@ -192,12 +213,14 @@ struct namecache_ts { * the first node, locking everything in order and revalidating the state. */ +VFS_SMR_DECLARE; + /* * Structures associated with name caching. */ #define NCHHASH(hash) \ (&nchashtbl[(hash) & nchash]) -static __read_mostly LIST_HEAD(nchashhead, namecache) *nchashtbl;/* Hash Table */ +static __read_mostly CK_LIST_HEAD(nchashhead, namecache) *nchashtbl;/* Hash Table */ static u_long __read_mostly nchash; /* size of hash table */ SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, "Size of namecache hash table"); @@ -275,15 +298,15 @@ cache_alloc(int len, int ts) if (__predict_false(ts)) { if (len <= CACHE_PATH_CUTOFF) - ncp_ts = uma_zalloc(cache_zone_small_ts, M_WAITOK); + ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK); else - ncp_ts = uma_zalloc(cache_zone_large_ts, M_WAITOK); + ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK); ncp = &ncp_ts->nc_nc; } else { if (len <= CACHE_PATH_CUTOFF) - ncp = uma_zalloc(cache_zone_small, M_WAITOK); + ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK); else - ncp = uma_zalloc(cache_zone_large, M_WAITOK); + ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK); } return (ncp); } @@ -300,14 +323,14 @@ cache_free(struct namecache *ncp) if (__predict_false(ncp->nc_flag & NCF_TS)) { ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc); if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) - uma_zfree(cache_zone_small_ts, ncp_ts); + uma_zfree_smr(cache_zone_small_ts, ncp_ts); else - uma_zfree(cache_zone_large_ts, ncp_ts); + uma_zfree_smr(cache_zone_large_ts, ncp_ts); } else { if (ncp->nc_nlen <= CACHE_PATH_CUTOFF) - uma_zfree(cache_zone_small, ncp); + uma_zfree_smr(cache_zone_small, ncp); else - uma_zfree(cache_zone_large, ncp); + uma_zfree_smr(cache_zone_large, ncp); } } @@ -606,7 +629,7 @@ sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) } /* Scan hash tables counting entries */ for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++) - LIST_FOREACH(ncp, ncpp, nc_hash) + CK_LIST_FOREACH(ncp, ncpp, nc_hash) cntbuf[i]++; cache_unlock_all_buckets(); for (error = 0, i = 0; i < n_nchash; i++) @@ -639,7 +662,7 @@ sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) /* Scan hash tables for applicable entries */ for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { count = 0; - LIST_FOREACH(ncp, ncpp, nc_hash) { + CK_LIST_FOREACH(ncp, ncpp, nc_hash) { count++; } if (count) @@ -860,7 +883,10 @@ cache_zap_locked(struct namecache *ncp, bool neg_locked) CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, (ncp->nc_flag & NCF_NEGATIVE) ? NULL : ncp->nc_vp); - LIST_REMOVE(ncp, nc_hash); + + cache_ncp_invalidate(ncp); + + CK_LIST_REMOVE(ncp, nc_hash); if (!(ncp->nc_flag & NCF_NEGATIVE)) { SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp, ncp->nc_name, ncp->nc_vp); @@ -1013,7 +1039,7 @@ cache_zap_unlocked_bucket(struct namecache *ncp, struct componentname *cnp, cache_sort_vnodes(&dvlp, &vlp); cache_lock_vnodes(dvlp, vlp); rw_wlock(blp); - LIST_FOREACH(rncp, (NCHHASH(hash)), nc_hash) { + CK_LIST_FOREACH(rncp, (NCHHASH(hash)), nc_hash) { if (rncp == ncp && rncp->nc_dvp == dvp && rncp->nc_nlen == cnp->cn_namelen && !bcmp(rncp->nc_name, cnp->cn_nameptr, rncp->nc_nlen)) @@ -1225,12 +1251,12 @@ cache_lookup_nomakeentry(struct vnode *dvp, struct vnode **vpp, hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp); blp = HASH2BUCKETLOCK(hash); retry: - if (LIST_EMPTY(NCHHASH(hash))) + if (CK_LIST_EMPTY(NCHHASH(hash))) goto out_no_entry; rw_wlock(blp); - LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { + CK_LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { counter_u64_add(numchecks, 1); if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) @@ -1305,6 +1331,7 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, uint32_t hash; enum vgetstate vs; int error, ltype; + bool try_smr, doing_smr; #ifdef DEBUG_CACHE if (__predict_false(!doingcache)) { @@ -1321,7 +1348,11 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, if ((cnp->cn_flags & MAKEENTRY) == 0) return (cache_lookup_nomakeentry(dvp, vpp, cnp, tsp, ticksp)); + try_smr = true; + if (cnp->cn_nameiop == CREATE) + try_smr = false; retry: + doing_smr = false; blp = NULL; dvlp = NULL; error = 0; @@ -1361,10 +1392,17 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, } hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp); - blp = HASH2BUCKETLOCK(hash); - rw_rlock(blp); +retry_hashed: + if (try_smr) { + vfs_smr_enter(); + doing_smr = true; + try_smr = false; + } else { + blp = HASH2BUCKETLOCK(hash); + rw_rlock(blp); + } - LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { + CK_LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { counter_u64_add(numchecks, 1); if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) @@ -1373,7 +1411,10 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, /* We failed to find an entry */ if (__predict_false(ncp == NULL)) { - rw_runlock(blp); + if (doing_smr) + vfs_smr_exit(); + else + rw_runlock(blp); SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr, NULL); counter_u64_add(nummiss, 1); @@ -1402,8 +1443,22 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, ltype = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp); } - vs = vget_prep(*vpp); - cache_lookup_unlock(blp, dvlp); + if (doing_smr) { + if (cache_ncp_invalid(ncp)) { + vfs_smr_exit(); + *vpp = NULL; + goto retry; + } + vs = vget_prep_smr(*vpp); + vfs_smr_exit(); + if (vs == VGET_NONE) { + *vpp = NULL; + goto retry; + } + } else { + vs = vget_prep(*vpp); + cache_lookup_unlock(blp, dvlp); + } error = vget_finish(*vpp, cnp->cn_lkflags, vs); if (cnp->cn_flags & ISDOTDOT) { vn_lock(dvp, ltype | LK_RETRY); @@ -1427,21 +1482,37 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, negative_success: /* We found a negative match, and want to create it, so purge */ if (cnp->cn_nameiop == CREATE) { + MPASS(!doing_smr); counter_u64_add(numnegzaps, 1); goto zap_and_exit; } + if (doing_smr) { + if ((ncp->nc_flag & NCF_HOTNEGATIVE) == 0) { + /* + * We need to take locks to promote the entry. + */ + vfs_smr_exit(); + doing_smr = false; + goto retry_hashed; + } + } else { + cache_negative_hit(ncp); + } counter_u64_add(numneghits, 1); - cache_negative_hit(ncp); if (ncp->nc_flag & NCF_WHITE) cnp->cn_flags |= ISWHITEOUT; SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp, ncp->nc_name); cache_out_ts(ncp, tsp, ticksp); - cache_lookup_unlock(blp, dvlp); + if (doing_smr) + vfs_smr_exit(); + else + cache_lookup_unlock(blp, dvlp); return (ENOENT); zap_and_exit: + MPASS(!doing_smr); if (blp != NULL) error = cache_zap_rlocked_bucket(ncp, cnp, hash, blp); else @@ -1775,7 +1846,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, * the same path name. */ ncpp = NCHHASH(hash); - LIST_FOREACH(n2, ncpp, nc_hash) { + CK_LIST_FOREACH(n2, ncpp, nc_hash) { if (n2->nc_dvp == dvp && n2->nc_nlen == cnp->cn_namelen && !bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) { @@ -1839,12 +1910,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); } - /* - * Insert the new namecache entry into the appropriate chain - * within the cache entries table. - */ - LIST_INSERT_HEAD(ncpp, ncp, nc_hash); - /* * If the entry is "negative", we place it into the * "negative" cache queue, otherwise, we place it into the @@ -1861,6 +1926,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, SDT_PROBE2(vfs, namecache, enter_negative, done, dvp, ncp->nc_name); } + + atomic_thread_fence_rel(); + /* + * Insert the new namecache entry into the appropriate chain + * within the cache entries table. + */ + CK_LIST_INSERT_HEAD(ncpp, ncp, nc_hash); + cache_enter_unlock(&cel); if (numneg * ncnegfactor > lnumcache) cache_negative_zap_one(); @@ -1908,6 +1981,11 @@ nchinit(void *dummy __unused) NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache_ts), UMA_ZONE_ZINIT); + VFS_SMR_ZONE_SET(cache_zone_small); + VFS_SMR_ZONE_SET(cache_zone_small_ts); + VFS_SMR_ZONE_SET(cache_zone_large); + VFS_SMR_ZONE_SET(cache_zone_large_ts); + ncsize = desiredvnodes * ncsizefactor; nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); ncbuckethash = cache_roundup_2(mp_ncpus * mp_ncpus) - 1; @@ -1973,11 +2051,11 @@ cache_changesize(u_long newmaxvnodes) nchashtbl = new_nchashtbl; nchash = new_nchash; for (i = 0; i <= old_nchash; i++) { - while ((ncp = LIST_FIRST(&old_nchashtbl[i])) != NULL) { + while ((ncp = CK_LIST_FIRST(&old_nchashtbl[i])) != NULL) { hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen, ncp->nc_dvp); - LIST_REMOVE(ncp, nc_hash); - LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash); + CK_LIST_REMOVE(ncp, nc_hash); + CK_LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash); } } ncsize = newncsize; @@ -2091,7 +2169,7 @@ cache_purgevfs(struct mount *mp, bool force) for (j = i; j < n_nchash; j += numbucketlocks) { retry: bucket = &nchashtbl[j]; - LIST_FOREACH_SAFE(ncp, bucket, nc_hash, nnp) { + CK_LIST_FOREACH_SAFE(ncp, bucket, nc_hash, nnp) { cache_assert_bucket_locked(ncp, RA_WLOCKED); if (ncp->nc_dvp->v_mount != mp) continue;