diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index ed1203578c2c..a916bdd4be8e 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -625,7 +625,7 @@ static struct witness_order_list_entry order_lists[] = { /* * VFS namecache */ - { "ncglobal", &lock_class_rw }, + { "ncvn", &lock_class_mtx_sleep }, { "ncbuc", &lock_class_rw }, { "vnode interlock", &lock_class_mtx_sleep }, { "ncneg", &lock_class_mtx_sleep }, diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 42ad0ac0ae07..2734c259dd92 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -151,21 +151,35 @@ struct namecache_ts { * name is located in the cache, it will be dropped. * * These locks are used (in the order in which they can be taken): - * NAME TYPE ROLE - * cache_lock rwlock global, needed for all modifications - * bucketlock rwlock for access to given hash bucket - * ncneg_mtx mtx negative entry LRU management + * NAME TYPE ROLE + * vnodelock mtx vnode lists and v_cache_dd field protection + * bucketlock rwlock for access to given set of hash buckets + * ncneg_mtx mtx negative entry LRU management * - * A name -> vnode lookup can be safely performed by either locking cache_lock - * or the relevant hash bucket. + * Additionally, ncneg_shrink_lock mtx is used to have at most one thread + * shrinking the LRU list. * - * ".." and vnode -> name lookups require cache_lock. + * It is legal to take multiple vnodelock and bucketlock locks. The locking + * order is lower address first. Both are recursive. * - * Modifications require both cache_lock and relevant bucketlock taken for - * writing. + * "." lookups are lockless. * - * Negative entry LRU management requires ncneg_mtx taken on top of either - * cache_lock or bucketlock. + * ".." and vnode -> name lookups require vnodelock. + * + * name -> vnode lookup requires the relevant bucketlock to be held for reading. + * + * Insertions and removals of entries require involved vnodes and bucketlocks + * to be write-locked to prevent other threads from seeing the entry. + * + * Some lookups result in removal of the found entry (e.g. getting rid of a + * negative entry with the intent to create a positive one), which poses a + * problem when multiple threads reach the state. Similarly, two different + * threads can purge two different vnodes and try to remove the same name. + * + * If the already held vnode lock is lower than the second required lock, we + * can just take the other lock. However, in the opposite case, this could + * deadlock. As such, this is resolved by trylocking and if that fails unlocking + * the first node, locking everything in order and revalidating the state. */ /* @@ -196,15 +210,9 @@ SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0, struct nchstats nchstats; /* cache effectiveness statistics */ -static struct rwlock cache_lock; -RW_SYSINIT(vfscache, &cache_lock, "ncglobal"); - -#define CACHE_TRY_WLOCK() rw_try_wlock(&cache_lock) -#define CACHE_UPGRADE_LOCK() rw_try_upgrade(&cache_lock) -#define CACHE_RLOCK() rw_rlock(&cache_lock) -#define CACHE_RUNLOCK() rw_runlock(&cache_lock) -#define CACHE_WLOCK() rw_wlock(&cache_lock) -#define CACHE_WUNLOCK() rw_wunlock(&cache_lock) +static struct mtx ncneg_shrink_lock; +MTX_SYSINIT(vfscache_shrink_neg, &ncneg_shrink_lock, "Name Cache shrink neg", + MTX_DEF); static struct mtx_padalign ncneg_mtx; MTX_SYSINIT(vfscache_neg, &ncneg_mtx, "ncneg", MTX_DEF); @@ -214,6 +222,19 @@ static struct rwlock_padalign *bucketlocks; #define HASH2BUCKETLOCK(hash) \ ((struct rwlock *)(&bucketlocks[((hash) % numbucketlocks)])) +static u_int numvnodelocks; +static struct mtx *vnodelocks; +static inline struct mtx * +VP2VNODELOCK(struct vnode *vp) +{ + struct mtx *vlp; + + if (vp == NULL) + return (NULL); + vlp = &vnodelocks[(((uintptr_t)(vp) >> 8) % numvnodelocks)]; + return (vlp); +} + /* * UMA zones for the VFS cache. * @@ -329,19 +350,49 @@ STATNODE_COUNTER(numfullpathfail2, "Number of fullpath search errors (VOP_VPTOCNP failures)"); STATNODE_COUNTER(numfullpathfail4, "Number of fullpath search errors (ENOMEM)"); STATNODE_COUNTER(numfullpathfound, "Number of successful fullpath calls"); -static long numupgrades; STATNODE_ULONG(numupgrades, - "Number of updates of the cache after lookup (write lock + retry)"); static long zap_and_exit_bucket_fail; STATNODE_ULONG(zap_and_exit_bucket_fail, - "Number of times bucketlocked zap_and_exit case failed to writelock"); + "Number of times zap_and_exit failed to lock"); +static long cache_lock_vnodes_cel_3_failures; +STATNODE_ULONG(cache_lock_vnodes_cel_3_failures, + "Number of times 3-way vnode locking failed"); -static void cache_zap(struct namecache *ncp); -static int vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, - u_int *buflen); +static void cache_zap_locked(struct namecache *ncp, bool neg_locked); static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen); static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); +static int cache_yield; +SYSCTL_INT(_vfs_cache, OID_AUTO, yield, CTLFLAG_RD, &cache_yield, 0, + "Number of times cache called yield"); + +static void +cache_maybe_yield(void) +{ + + if (should_yield()) { + cache_yield++; + kern_yield(PRI_USER); + } +} + +static inline void +cache_assert_vlp_locked(struct mtx *vlp) +{ + + if (vlp != NULL) + mtx_assert(vlp, MA_OWNED); +} + +static inline void +cache_assert_vnode_locked(struct vnode *vp) +{ + struct mtx *vlp; + + vlp = VP2VNODELOCK(vp); + cache_assert_vlp_locked(vlp); +} + static uint32_t cache_get_hash(char *name, u_char len, struct vnode *dvp) { @@ -352,21 +403,41 @@ cache_get_hash(char *name, u_char len, struct vnode *dvp) return (hash); } +static inline struct rwlock * +NCP2BUCKETLOCK(struct namecache *ncp) +{ + uint32_t hash; + + hash = cache_get_hash(nc_get_name(ncp), ncp->nc_nlen, ncp->nc_dvp); + return (HASH2BUCKETLOCK(hash)); +} + #ifdef INVARIANTS static void cache_assert_bucket_locked(struct namecache *ncp, int mode) { - struct rwlock *bucketlock; - uint32_t hash; + struct rwlock *blp; - hash = cache_get_hash(nc_get_name(ncp), ncp->nc_nlen, ncp->nc_dvp); - bucketlock = HASH2BUCKETLOCK(hash); - rw_assert(bucketlock, mode); + blp = NCP2BUCKETLOCK(ncp); + rw_assert(blp, mode); } #else #define cache_assert_bucket_locked(x, y) do { } while (0) #endif +#define cache_sort(x, y) _cache_sort((void **)(x), (void **)(y)) +static void +_cache_sort(void **p1, void **p2) +{ + void *tmp; + + if (*p1 > *p2) { + tmp = *p2; + *p2 = *p1; + *p1 = tmp; + } +} + static void cache_lock_all_buckets(void) { @@ -385,6 +456,56 @@ cache_unlock_all_buckets(void) rw_wunlock(&bucketlocks[i]); } +static void +cache_lock_all_vnodes(void) +{ + u_int i; + + for (i = 0; i < numvnodelocks; i++) + mtx_lock(&vnodelocks[i]); +} + +static void +cache_unlock_all_vnodes(void) +{ + u_int i; + + for (i = 0; i < numvnodelocks; i++) + mtx_unlock(&vnodelocks[i]); +} + +static int +cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp2) +{ + + cache_sort(&vlp1, &vlp2); + MPASS(vlp2 != NULL); + + if (vlp1 != NULL) { + if (!mtx_trylock(vlp1)) + return (EAGAIN); + } + if (!mtx_trylock(vlp2)) { + if (vlp1 != NULL) + mtx_unlock(vlp1); + return (EAGAIN); + } + + return (0); +} + +static void +cache_unlock_vnodes(struct mtx *vlp1, struct mtx *vlp2) +{ + + MPASS(vlp1 != NULL || vlp2 != NULL); + + if (vlp1 != NULL) + mtx_unlock(vlp1); + if (vlp2 != NULL) + mtx_unlock(vlp2); +} + static int sysctl_nchstats(SYSCTL_HANDLER_ARGS) { @@ -426,9 +547,9 @@ retry: if (req->oldptr == NULL) return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); cntbuf = malloc(n_nchash * sizeof(int), M_TEMP, M_ZERO | M_WAITOK); - CACHE_RLOCK(); + cache_lock_all_buckets(); if (n_nchash != nchash + 1) { - CACHE_RUNLOCK(); + cache_unlock_all_buckets(); free(cntbuf, M_TEMP); goto retry; } @@ -436,7 +557,7 @@ retry: for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++) LIST_FOREACH(ncp, ncpp, nc_hash) cntbuf[i]++; - CACHE_RUNLOCK(); + cache_unlock_all_buckets(); for (error = 0, i = 0; i < n_nchash; i++) if ((error = SYSCTL_OUT(req, &cntbuf[i], sizeof(int))) != 0) break; @@ -459,7 +580,7 @@ sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) if (!req->oldptr) return SYSCTL_OUT(req, 0, 4 * sizeof(int)); - CACHE_RLOCK(); + cache_lock_all_buckets(); n_nchash = nchash + 1; /* nchash is max index, not count */ used = 0; maxlength = 0; @@ -476,7 +597,7 @@ sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) maxlength = count; } n_nchash = nchash + 1; - CACHE_RUNLOCK(); + cache_unlock_all_buckets(); pct = (used * 100) / (n_nchash / 100); error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); if (error) @@ -504,6 +625,7 @@ static void cache_negative_hit(struct namecache *ncp) { + MPASS(ncp->nc_vp == NULL); mtx_lock(&ncneg_mtx); TAILQ_REMOVE(&ncneg, ncp, nc_dst); TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); @@ -514,9 +636,8 @@ static void cache_negative_insert(struct namecache *ncp) { - rw_assert(&cache_lock, RA_WLOCKED); - cache_assert_bucket_locked(ncp, RA_WLOCKED); MPASS(ncp->nc_vp == NULL); + cache_assert_bucket_locked(ncp, RA_WLOCKED); mtx_lock(&ncneg_mtx); TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); numneg++; @@ -524,43 +645,74 @@ cache_negative_insert(struct namecache *ncp) } static void -cache_negative_remove(struct namecache *ncp) +cache_negative_remove(struct namecache *ncp, bool neg_locked) { - rw_assert(&cache_lock, RA_WLOCKED); - cache_assert_bucket_locked(ncp, RA_WLOCKED); MPASS(ncp->nc_vp == NULL); - mtx_lock(&ncneg_mtx); + cache_assert_bucket_locked(ncp, RA_WLOCKED); + if (!neg_locked) + mtx_lock(&ncneg_mtx); + else + mtx_assert(&ncneg_mtx, MA_OWNED); TAILQ_REMOVE(&ncneg, ncp, nc_dst); numneg--; - mtx_unlock(&ncneg_mtx); + if (!neg_locked) + mtx_unlock(&ncneg_mtx); } -static struct namecache * +static void cache_negative_zap_one(void) { - struct namecache *ncp; + struct namecache *ncp, *ncp2; + struct mtx *dvlp; + struct rwlock *blp; - rw_assert(&cache_lock, RA_WLOCKED); + if (!mtx_trylock(&ncneg_shrink_lock)) + return; + + mtx_lock(&ncneg_mtx); ncp = TAILQ_FIRST(&ncneg); - KASSERT(ncp->nc_vp == NULL, ("ncp %p vp %p on ncneg", - ncp, ncp->nc_vp)); - cache_zap(ncp); - return (ncp); + if (ncp == NULL) { + mtx_unlock(&ncneg_mtx); + goto out; + } + MPASS(ncp->nc_vp == NULL); + dvlp = VP2VNODELOCK(ncp->nc_dvp); + blp = NCP2BUCKETLOCK(ncp); + mtx_unlock(&ncneg_mtx); + mtx_lock(dvlp); + rw_wlock(blp); + mtx_lock(&ncneg_mtx); + ncp2 = TAILQ_FIRST(&ncneg); + if (ncp != ncp2 || dvlp != VP2VNODELOCK(ncp2->nc_dvp) || + blp != NCP2BUCKETLOCK(ncp2) || ncp2->nc_vp != NULL) { + ncp = NULL; + goto out_unlock_all; + } + cache_zap_locked(ncp, true); +out_unlock_all: + mtx_unlock(&ncneg_mtx); + rw_wunlock(blp); + mtx_unlock(dvlp); +out: + mtx_unlock(&ncneg_shrink_lock); + cache_free(ncp); } /* - * cache_zap(): + * cache_zap_locked(): * * Removes a namecache entry from cache, whether it contains an actual * pointer to a vnode or if it is just a negative cache entry. */ static void -cache_zap_locked(struct namecache *ncp) +cache_zap_locked(struct namecache *ncp, bool neg_locked) { - rw_assert(&cache_lock, RA_WLOCKED); + cache_assert_vnode_locked(ncp->nc_vp); + cache_assert_vnode_locked(ncp->nc_dvp); cache_assert_bucket_locked(ncp, RA_WLOCKED); + CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); if (ncp->nc_vp != NULL) { SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp, @@ -577,7 +729,7 @@ cache_zap_locked(struct namecache *ncp) LIST_REMOVE(ncp, nc_src); if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { ncp->nc_flag |= NCF_DVDROP; - numcachehv--; + atomic_subtract_rel_long(&numcachehv, 1); } } if (ncp->nc_vp) { @@ -585,24 +737,198 @@ cache_zap_locked(struct namecache *ncp) if (ncp == ncp->nc_vp->v_cache_dd) ncp->nc_vp->v_cache_dd = NULL; } else { - cache_negative_remove(ncp); + cache_negative_remove(ncp, neg_locked); } - numcache--; + atomic_subtract_rel_long(&numcache, 1); } static void -cache_zap(struct namecache *ncp) +cache_zap_negative_locked_vnode_kl(struct namecache *ncp, struct vnode *vp) { - struct rwlock *bucketlock; - uint32_t hash; + struct rwlock *blp; - rw_assert(&cache_lock, RA_WLOCKED); + MPASS(ncp->nc_dvp == vp); + MPASS(ncp->nc_vp == NULL); + cache_assert_vnode_locked(vp); - hash = cache_get_hash(nc_get_name(ncp), ncp->nc_nlen, ncp->nc_dvp); - bucketlock = HASH2BUCKETLOCK(hash); - rw_wlock(bucketlock); - cache_zap_locked(ncp); - rw_wunlock(bucketlock); + blp = NCP2BUCKETLOCK(ncp); + rw_wlock(blp); + cache_zap_locked(ncp, false); + rw_wunlock(blp); +} + +static bool +cache_zap_locked_vnode_kl2(struct namecache *ncp, struct vnode *vp, + struct mtx **vlpp) +{ + struct mtx *pvlp, *vlp1, *vlp2, *to_unlock; + struct rwlock *blp; + + MPASS(vp == ncp->nc_dvp || vp == ncp->nc_vp); + cache_assert_vnode_locked(vp); + + if (ncp->nc_vp == NULL) { + if (*vlpp != NULL) { + mtx_unlock(*vlpp); + *vlpp = NULL; + } + cache_zap_negative_locked_vnode_kl(ncp, vp); + return (true); + } + + pvlp = VP2VNODELOCK(vp); + blp = NCP2BUCKETLOCK(ncp); + vlp1 = VP2VNODELOCK(ncp->nc_dvp); + vlp2 = VP2VNODELOCK(ncp->nc_vp); + + if (*vlpp == vlp1 || *vlpp == vlp2) { + to_unlock = *vlpp; + *vlpp = NULL; + } else { + if (*vlpp != NULL) { + mtx_unlock(*vlpp); + *vlpp = NULL; + } + cache_sort(&vlp1, &vlp2); + if (vlp1 == pvlp) { + mtx_lock(vlp2); + to_unlock = vlp2; + } else { + if (!mtx_trylock(vlp1)) + goto out_relock; + to_unlock = vlp1; + } + } + rw_wlock(blp); + cache_zap_locked(ncp, false); + rw_wunlock(blp); + if (to_unlock != NULL) + mtx_unlock(to_unlock); + return (true); + +out_relock: + mtx_unlock(vlp2); + mtx_lock(vlp1); + mtx_lock(vlp2); + MPASS(*vlpp == NULL); + *vlpp = vlp1; + return (false); +} + +static int +cache_zap_locked_vnode(struct namecache *ncp, struct vnode *vp) +{ + struct mtx *pvlp, *vlp1, *vlp2, *to_unlock; + struct rwlock *blp; + int error = 0; + + MPASS(vp == ncp->nc_dvp || vp == ncp->nc_vp); + cache_assert_vnode_locked(vp); + + pvlp = VP2VNODELOCK(vp); + if (ncp->nc_vp == NULL) { + cache_zap_negative_locked_vnode_kl(ncp, vp); + goto out; + } + + blp = NCP2BUCKETLOCK(ncp); + vlp1 = VP2VNODELOCK(ncp->nc_dvp); + vlp2 = VP2VNODELOCK(ncp->nc_vp); + cache_sort(&vlp1, &vlp2); + if (vlp1 == pvlp) { + mtx_lock(vlp2); + to_unlock = vlp2; + } else { + if (!mtx_trylock(vlp1)) { + error = EAGAIN; + goto out; + } + to_unlock = vlp1; + } + rw_wlock(blp); + cache_zap_locked(ncp, false); + rw_wunlock(blp); + mtx_unlock(to_unlock); +out: + mtx_unlock(pvlp); + return (error); +} + +static int +cache_zap_rlocked_bucket(struct namecache *ncp, struct rwlock *blp) +{ + struct mtx *dvlp, *vlp; + + cache_assert_bucket_locked(ncp, RA_RLOCKED); + + dvlp = VP2VNODELOCK(ncp->nc_dvp); + vlp = VP2VNODELOCK(ncp->nc_vp); + if (cache_trylock_vnodes(dvlp, vlp) == 0) { + rw_runlock(blp); + rw_wlock(blp); + cache_zap_locked(ncp, false); + rw_wunlock(blp); + cache_unlock_vnodes(dvlp, vlp); + return (0); + } + + rw_runlock(blp); + return (EAGAIN); +} + +static int +cache_zap_wlocked_bucket_kl(struct namecache *ncp, struct rwlock *blp, + struct mtx **vlpp1, struct mtx **vlpp2) +{ + struct mtx *dvlp, *vlp; + + cache_assert_bucket_locked(ncp, RA_WLOCKED); + + dvlp = VP2VNODELOCK(ncp->nc_dvp); + vlp = VP2VNODELOCK(ncp->nc_vp); + cache_sort(&dvlp, &vlp); + + if (*vlpp1 == dvlp && *vlpp2 == vlp) { + cache_zap_locked(ncp, false); + cache_unlock_vnodes(dvlp, vlp); + *vlpp1 = NULL; + *vlpp2 = NULL; + return (0); + } + + if (*vlpp1 != NULL) + mtx_unlock(*vlpp1); + if (*vlpp2 != NULL) + mtx_unlock(*vlpp2); + *vlpp1 = NULL; + *vlpp2 = NULL; + + if (cache_trylock_vnodes(dvlp, vlp) == 0) { + cache_zap_locked(ncp, false); + cache_unlock_vnodes(dvlp, vlp); + return (0); + } + + rw_wunlock(blp); + *vlpp1 = dvlp; + *vlpp2 = vlp; + if (*vlpp1 != NULL) + mtx_lock(*vlpp1); + mtx_lock(*vlpp2); + rw_wlock(blp); + return (EAGAIN); +} + +static void +cache_lookup_unlock(struct rwlock *blp, struct mtx *vlp) +{ + + if (blp != NULL) { + rw_runlock(blp); + mtx_assert(vlp, MA_NOTOWNED); + } else { + mtx_unlock(vlp); + } } /* @@ -622,44 +948,26 @@ cache_zap(struct namecache *ncp) * not recursively acquired. */ -enum { UNLOCKED, WLOCKED, RLOCKED }; - -static void -cache_unlock(int cache_locked) -{ - - switch (cache_locked) { - case UNLOCKED: - break; - case WLOCKED: - CACHE_WUNLOCK(); - break; - case RLOCKED: - CACHE_RUNLOCK(); - break; - } -} - int cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct timespec *tsp, int *ticksp) { - struct rwlock *bucketlock; struct namecache *ncp; + struct rwlock *blp; + struct mtx *dvlp, *dvlp2; uint32_t hash; - int error, ltype, cache_locked; + int error, ltype; if (!doingcache) { cnp->cn_flags &= ~MAKEENTRY; return (0); } retry: - bucketlock = NULL; - cache_locked = UNLOCKED; + blp = NULL; + dvlp = VP2VNODELOCK(dvp); error = 0; counter_u64_add(numcalls, 1); -retry_wlocked: if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) { *vpp = dvp; @@ -693,32 +1001,37 @@ retry_wlocked: } if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { counter_u64_add(dotdothits, 1); - if (cache_locked == UNLOCKED) { - CACHE_RLOCK(); - cache_locked = RLOCKED; - } - - if (dvp->v_cache_dd == NULL) { + dvlp2 = NULL; + mtx_lock(dvlp); +retry_dotdot: + ncp = dvp->v_cache_dd; + if (ncp == NULL) { SDT_PROBE3(vfs, namecache, lookup, miss, dvp, "..", NULL); - goto unlock; - } - if ((cnp->cn_flags & MAKEENTRY) == 0) { - if (cache_locked != WLOCKED && - !CACHE_UPGRADE_LOCK()) - goto wlock; - ncp = NULL; - if (dvp->v_cache_dd->nc_flag & NCF_ISDOTDOT) { - ncp = dvp->v_cache_dd; - cache_zap(ncp); - } - dvp->v_cache_dd = NULL; - CACHE_WUNLOCK(); - cache_free(ncp); + mtx_unlock(dvlp); return (0); } - ncp = dvp->v_cache_dd; - if (ncp->nc_flag & NCF_ISDOTDOT) + if ((cnp->cn_flags & MAKEENTRY) == 0) { + if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) { + if (ncp->nc_dvp != dvp) + panic("dvp %p v_cache_dd %p\n", dvp, ncp); + if (!cache_zap_locked_vnode_kl2(ncp, + dvp, &dvlp2)) + goto retry_dotdot; + MPASS(dvp->v_cache_dd == NULL); + mtx_unlock(dvlp); + if (dvlp2 != NULL) + mtx_unlock(dvlp2); + cache_free(ncp); + } else { + dvp->v_cache_dd = NULL; + mtx_unlock(dvlp); + if (dvlp2 != NULL) + mtx_unlock(dvlp2); + } + return (0); + } + if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) *vpp = ncp->nc_vp; else *vpp = ncp->nc_dvp; @@ -739,10 +1052,8 @@ retry_wlocked: } hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp); - if (cache_locked == UNLOCKED) { - bucketlock = HASH2BUCKETLOCK(hash); - rw_rlock(bucketlock); - } + blp = HASH2BUCKETLOCK(hash); + rw_rlock(blp); LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { counter_u64_add(numchecks, 1); @@ -795,24 +1106,9 @@ negative_success: SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp, nc_get_name(ncp)); cache_out_ts(ncp, tsp, ticksp); - MPASS(bucketlock != NULL || cache_locked != UNLOCKED); - if (bucketlock != NULL) - rw_runlock(bucketlock); - cache_unlock(cache_locked); + cache_lookup_unlock(blp, dvlp); return (ENOENT); -wlock: - /* - * We need to update the cache after our lookup, so upgrade to - * a write lock and retry the operation. - */ - CACHE_RUNLOCK(); -wlock_unlocked: - CACHE_WLOCK(); - numupgrades++; - cache_locked = WLOCKED; - goto retry_wlocked; - success: /* * On success we return a locked and ref'd vnode as per the lookup @@ -825,10 +1121,7 @@ success: VOP_UNLOCK(dvp, 0); } vhold(*vpp); - MPASS(bucketlock != NULL || cache_locked != UNLOCKED); - if (bucketlock != NULL) - rw_runlock(bucketlock); - cache_unlock(cache_locked); + cache_lookup_unlock(blp, dvlp); error = vget(*vpp, cnp->cn_lkflags | LK_VNHELD, cnp->cn_thread); if (cnp->cn_flags & ISDOTDOT) { vn_lock(dvp, ltype | LK_RETRY); @@ -850,32 +1143,232 @@ success: return (-1); unlock: - MPASS(bucketlock != NULL || cache_locked != UNLOCKED); - if (bucketlock != NULL) - rw_runlock(bucketlock); - cache_unlock(cache_locked); + cache_lookup_unlock(blp, dvlp); return (0); zap_and_exit: - if (bucketlock != NULL) { - rw_assert(&cache_lock, RA_UNLOCKED); - if (!CACHE_TRY_WLOCK()) { - rw_runlock(bucketlock); - bucketlock = NULL; - zap_and_exit_bucket_fail++; - goto wlock_unlocked; - } - cache_locked = WLOCKED; - rw_runlock(bucketlock); - bucketlock = NULL; - } else if (cache_locked != WLOCKED && !CACHE_UPGRADE_LOCK()) - goto wlock; - cache_zap(ncp); - CACHE_WUNLOCK(); + if (blp != NULL) + error = cache_zap_rlocked_bucket(ncp, blp); + else + error = cache_zap_locked_vnode(ncp, dvp); + if (error != 0) { + zap_and_exit_bucket_fail++; + cache_maybe_yield(); + goto retry; + } cache_free(ncp); return (0); } +struct celockstate { + struct mtx *vlp[3]; + struct rwlock *blp[2]; +}; +CTASSERT((nitems(((struct celockstate *)0)->vlp) == 3)); +CTASSERT((nitems(((struct celockstate *)0)->blp) == 2)); + +static inline void +cache_celockstate_init(struct celockstate *cel) +{ + + bzero(cel, sizeof(*cel)); +} + +static void +cache_lock_vnodes_cel(struct celockstate *cel, struct vnode *vp, + struct vnode *dvp) +{ + struct mtx *vlp1, *vlp2; + + MPASS(cel->vlp[0] == NULL); + MPASS(cel->vlp[1] == NULL); + MPASS(cel->vlp[2] == NULL); + + MPASS(vp != NULL || dvp != NULL); + + vlp1 = VP2VNODELOCK(vp); + vlp2 = VP2VNODELOCK(dvp); + cache_sort(&vlp1, &vlp2); + + if (vlp1 != NULL) { + mtx_lock(vlp1); + cel->vlp[0] = vlp1; + } + mtx_lock(vlp2); + cel->vlp[1] = vlp2; +} + +static void +cache_unlock_vnodes_cel(struct celockstate *cel) +{ + + MPASS(cel->vlp[0] != NULL || cel->vlp[1] != NULL); + + if (cel->vlp[0] != NULL) + mtx_unlock(cel->vlp[0]); + if (cel->vlp[1] != NULL) + mtx_unlock(cel->vlp[1]); + if (cel->vlp[2] != NULL) + mtx_unlock(cel->vlp[2]); +} + +static bool +cache_lock_vnodes_cel_3(struct celockstate *cel, struct vnode *vp) +{ + struct mtx *vlp; + bool ret; + + cache_assert_vlp_locked(cel->vlp[0]); + cache_assert_vlp_locked(cel->vlp[1]); + MPASS(cel->vlp[2] == NULL); + + vlp = VP2VNODELOCK(vp); + if (vlp == NULL) + return (true); + + ret = true; + if (vlp >= cel->vlp[1]) { + mtx_lock(vlp); + } else { + if (mtx_trylock(vlp)) + goto out; + cache_lock_vnodes_cel_3_failures++; + cache_unlock_vnodes_cel(cel); + if (vlp < cel->vlp[0]) { + mtx_lock(vlp); + mtx_lock(cel->vlp[0]); + mtx_lock(cel->vlp[1]); + } else { + if (cel->vlp[0] != NULL) + mtx_lock(cel->vlp[0]); + mtx_lock(vlp); + mtx_lock(cel->vlp[1]); + } + ret = false; + } +out: + cel->vlp[2] = vlp; + return (ret); +} + +static void +cache_lock_buckets_cel(struct celockstate *cel, struct rwlock *blp1, + struct rwlock *blp2) +{ + + MPASS(cel->blp[0] == NULL); + MPASS(cel->blp[1] == NULL); + + cache_sort(&blp1, &blp2); + + if (blp1 != NULL) { + rw_wlock(blp1); + cel->blp[0] = blp1; + } + rw_wlock(blp2); + cel->blp[1] = blp2; +} + +static void +cache_unlock_buckets_cel(struct celockstate *cel) +{ + + if (cel->blp[0] != NULL) + rw_wunlock(cel->blp[0]); + rw_wunlock(cel->blp[1]); +} + +/* + * Lock part of the cache affected by the insertion. + * + * This means vnodelocks for dvp, vp and the relevant bucketlock. + * However, insertion can result in removal of an old entry. In this + * case we have an additional vnode and bucketlock pair to lock. If the + * entry is negative, ncelock is locked instead of the vnode. + * + * That is, in the worst case we have to lock 3 vnodes and 2 bucketlocks, while + * preserving the locking order (smaller address first). + */ +static void +cache_enter_lock(struct celockstate *cel, struct vnode *dvp, struct vnode *vp, + uint32_t hash) +{ + struct namecache *ncp; + struct rwlock *blps[2]; + + blps[0] = HASH2BUCKETLOCK(hash); + for (;;) { + blps[1] = NULL; + cache_lock_vnodes_cel(cel, dvp, vp); + if (vp == NULL || vp->v_type != VDIR) + break; + ncp = vp->v_cache_dd; + if (ncp == NULL) + break; + if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) + break; + MPASS(ncp->nc_dvp == vp); + blps[1] = NCP2BUCKETLOCK(ncp); + if (cache_lock_vnodes_cel_3(cel, ncp->nc_vp)) + break; + /* + * All vnodes got re-locked. Re-validate the state and if + * nothing changed we are done. Otherwise restart. + */ + if (ncp == vp->v_cache_dd && + (ncp->nc_flag & NCF_ISDOTDOT) != 0 && + blps[1] == NCP2BUCKETLOCK(ncp) && + VP2VNODELOCK(ncp->nc_vp) == cel->vlp[2]) + break; + cache_unlock_vnodes_cel(cel); + cel->vlp[0] = NULL; + cel->vlp[1] = NULL; + cel->vlp[2] = NULL; + } + cache_lock_buckets_cel(cel, blps[0], blps[1]); +} + +static void +cache_enter_lock_dd(struct celockstate *cel, struct vnode *dvp, struct vnode *vp, + uint32_t hash) +{ + struct namecache *ncp; + struct rwlock *blps[2]; + + blps[0] = HASH2BUCKETLOCK(hash); + for (;;) { + blps[1] = NULL; + cache_lock_vnodes_cel(cel, dvp, vp); + ncp = dvp->v_cache_dd; + if (ncp == NULL) + break; + if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) + break; + MPASS(ncp->nc_dvp == dvp); + blps[1] = NCP2BUCKETLOCK(ncp); + if (cache_lock_vnodes_cel_3(cel, ncp->nc_vp)) + break; + if (ncp == dvp->v_cache_dd && + (ncp->nc_flag & NCF_ISDOTDOT) != 0 && + blps[1] == NCP2BUCKETLOCK(ncp) && + VP2VNODELOCK(ncp->nc_vp) == cel->vlp[2]) + break; + cache_unlock_vnodes_cel(cel); + cel->vlp[0] = NULL; + cel->vlp[1] = NULL; + cel->vlp[2] = NULL; + } + cache_lock_buckets_cel(cel, blps[0], blps[1]); +} + +static void +cache_enter_unlock(struct celockstate *cel) +{ + + cache_unlock_buckets_cel(cel); + cache_unlock_vnodes_cel(cel); +} + /* * Add an entry to the cache. */ @@ -883,8 +1376,8 @@ void cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, struct timespec *tsp, struct timespec *dtsp) { - struct rwlock *bucketlock; - struct namecache *ncp, *n2, *ndd, *nneg; + struct celockstate cel; + struct namecache *ncp, *n2, *ndd; struct namecache_ts *n3; struct nchashhead *ncpp; uint32_t hash; @@ -906,13 +1399,16 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (numcache >= desiredvnodes * ncsizefactor) return; - ndd = nneg = NULL; + cache_celockstate_init(&cel); + ndd = NULL; flag = 0; if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) return; if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { - CACHE_WLOCK(); + len = cnp->cn_namelen; + hash = cache_get_hash(cnp->cn_nameptr, len, dvp); + cache_enter_lock_dd(&cel, dvp, vp, hash); /* * If dotdot entry already exists, just retarget it * to new parent vnode, otherwise continue with new @@ -926,7 +1422,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); } else { - cache_negative_remove(ncp); + cache_negative_remove(ncp, false); } if (vp != NULL) { TAILQ_INSERT_HEAD(&vp->v_cache_dst, @@ -935,12 +1431,13 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, cache_negative_insert(ncp); } ncp->nc_vp = vp; - CACHE_WUNLOCK(); + cache_enter_unlock(&cel); return; } dvp->v_cache_dd = NULL; + cache_enter_unlock(&cel); + cache_celockstate_init(&cel); SDT_PROBE3(vfs, namecache, enter, done, dvp, "..", vp); - CACHE_WUNLOCK(); flag = NCF_ISDOTDOT; } } @@ -966,7 +1463,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, len = ncp->nc_nlen = cnp->cn_namelen; hash = cache_get_hash(cnp->cn_nameptr, len, dvp); strlcpy(nc_get_name(ncp), cnp->cn_nameptr, len + 1); - CACHE_WLOCK(); + cache_enter_lock(&cel, dvp, vp, hash); /* * See if this vnode or negative entry is already in the cache @@ -993,9 +1490,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, n3->nc_flag |= NCF_DTS; } } - CACHE_WUNLOCK(); - cache_free(ncp); - return; + goto out_unlock_free; } } @@ -1004,17 +1499,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, * See if we are trying to add .. entry, but some other lookup * has populated v_cache_dd pointer already. */ - if (dvp->v_cache_dd != NULL) { - CACHE_WUNLOCK(); - cache_free(ncp); - return; - } + if (dvp->v_cache_dd != NULL) + goto out_unlock_free; KASSERT(vp == NULL || vp->v_type == VDIR, ("wrong vnode type %p", vp)); dvp->v_cache_dd = ncp; } - numcache++; + atomic_add_rel_long(&numcache, 1); if (vp != NULL) { if (vp->v_type == VDIR) { if (flag != NCF_ISDOTDOT) { @@ -1025,7 +1517,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, */ if ((ndd = vp->v_cache_dd) != NULL) { if ((ndd->nc_flag & NCF_ISDOTDOT) != 0) - cache_zap(ndd); + cache_zap_locked(ndd, false); else ndd = NULL; } @@ -1039,14 +1531,11 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, if (flag != NCF_ISDOTDOT) { if (LIST_EMPTY(&dvp->v_cache_src)) { vhold(dvp); - numcachehv++; + atomic_add_rel_long(&numcachehv, 1); } LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); } - bucketlock = HASH2BUCKETLOCK(hash); - rw_wlock(bucketlock); - /* * Insert the new namecache entry into the appropriate chain * within the cache entries table. @@ -1069,12 +1558,15 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, SDT_PROBE2(vfs, namecache, enter_negative, done, dvp, nc_get_name(ncp)); } - rw_wunlock(bucketlock); + cache_enter_unlock(&cel); if (numneg * ncnegfactor > numcache) - nneg = cache_negative_zap_one(); - CACHE_WUNLOCK(); + cache_negative_zap_one(); cache_free(ndd); - cache_free(nneg); + return; +out_unlock_free: + cache_enter_unlock(&cel); + cache_free(ncp); + return; } static u_int @@ -1112,13 +1604,16 @@ nchinit(void *dummy __unused) NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); - numbucketlocks = cache_roundup_2(mp_ncpus * 16); - if (numbucketlocks > nchash) - numbucketlocks = nchash; + numbucketlocks = cache_roundup_2(mp_ncpus * 64); bucketlocks = malloc(sizeof(*bucketlocks) * numbucketlocks, M_VFSCACHE, M_WAITOK | M_ZERO); for (i = 0; i < numbucketlocks; i++) - rw_init_flags(&bucketlocks[i], "ncbuc", RW_DUPOK); + rw_init_flags(&bucketlocks[i], "ncbuc", RW_DUPOK | RW_RECURSE); + numvnodelocks = cache_roundup_2(mp_ncpus * 64); + vnodelocks = malloc(sizeof(*vnodelocks) * numvnodelocks, M_VFSCACHE, + M_WAITOK | M_ZERO); + for (i = 0; i < numvnodelocks; i++) + mtx_init(&vnodelocks[i], "ncvn", NULL, MTX_DUPOK | MTX_RECURSE); numcalls = counter_u64_alloc(M_WAITOK); dothits = counter_u64_alloc(M_WAITOK); @@ -1158,7 +1653,7 @@ cache_changesize(int newmaxvnodes) * None of the namecache entries in the table can be removed * because to do so, they have to be removed from the hash table. */ - CACHE_WLOCK(); + cache_lock_all_vnodes(); cache_lock_all_buckets(); old_nchashtbl = nchashtbl; old_nchash = nchash; @@ -1173,7 +1668,7 @@ cache_changesize(int newmaxvnodes) } } cache_unlock_all_buckets(); - CACHE_WUNLOCK(); + cache_unlock_all_vnodes(); free(old_nchashtbl, M_VFSCACHE); } @@ -1185,30 +1680,42 @@ cache_purge(struct vnode *vp) { TAILQ_HEAD(, namecache) ncps; struct namecache *ncp, *nnp; + struct mtx *vlp, *vlp2; CTR1(KTR_VFS, "cache_purge(%p)", vp); SDT_PROBE1(vfs, namecache, purge, done, vp); + if (LIST_EMPTY(&vp->v_cache_src) && TAILQ_EMPTY(&vp->v_cache_dst) && + vp->v_cache_dd == NULL) + return; TAILQ_INIT(&ncps); - CACHE_WLOCK(); + vlp = VP2VNODELOCK(vp); + vlp2 = NULL; + mtx_lock(vlp); +retry: while (!LIST_EMPTY(&vp->v_cache_src)) { ncp = LIST_FIRST(&vp->v_cache_src); - cache_zap(ncp); + if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2)) + goto retry; TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst); } while (!TAILQ_EMPTY(&vp->v_cache_dst)) { ncp = TAILQ_FIRST(&vp->v_cache_dst); - cache_zap(ncp); + if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2)) + goto retry; TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst); } - if (vp->v_cache_dd != NULL) { - ncp = vp->v_cache_dd; + ncp = vp->v_cache_dd; + if (ncp != NULL) { KASSERT(ncp->nc_flag & NCF_ISDOTDOT, ("lost dotdot link")); - cache_zap(ncp); + if (!cache_zap_locked_vnode_kl2(ncp, vp, &vlp2)) + goto retry; TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst); } KASSERT(vp->v_cache_dd == NULL, ("incomplete purge")); - CACHE_WUNLOCK(); + mtx_unlock(vlp); + if (vlp2 != NULL) + mtx_unlock(vlp2); TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) { cache_free(ncp); } @@ -1222,18 +1729,20 @@ cache_purge_negative(struct vnode *vp) { TAILQ_HEAD(, namecache) ncps; struct namecache *ncp, *nnp; + struct mtx *vlp; CTR1(KTR_VFS, "cache_purge_negative(%p)", vp); SDT_PROBE1(vfs, namecache, purge_negative, done, vp); TAILQ_INIT(&ncps); - CACHE_WLOCK(); + vlp = VP2VNODELOCK(vp); + mtx_lock(vlp); LIST_FOREACH_SAFE(ncp, &vp->v_cache_src, nc_src, nnp) { if (ncp->nc_vp != NULL) continue; - cache_zap(ncp); + cache_zap_negative_locked_vnode_kl(ncp, vp); TAILQ_INSERT_TAIL(&ncps, ncp, nc_dst); } - CACHE_WUNLOCK(); + mtx_unlock(vlp); TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) { cache_free(ncp); } @@ -1246,32 +1755,44 @@ void cache_purgevfs(struct mount *mp) { TAILQ_HEAD(, namecache) ncps; - struct rwlock *bucketlock; + struct mtx *vlp1, *vlp2; + struct rwlock *blp; struct nchashhead *bucket; struct namecache *ncp, *nnp; u_long i, j, n_nchash; + int error; /* Scan hash tables for applicable entries */ SDT_PROBE1(vfs, namecache, purgevfs, done, mp); TAILQ_INIT(&ncps); - CACHE_WLOCK(); n_nchash = nchash + 1; + vlp1 = vlp2 = NULL; for (i = 0; i < numbucketlocks; i++) { - bucketlock = (struct rwlock *)&bucketlocks[i]; - rw_wlock(bucketlock); + blp = (struct rwlock *)&bucketlocks[i]; + rw_wlock(blp); for (j = i; j < n_nchash; j += numbucketlocks) { +retry: bucket = &nchashtbl[j]; LIST_FOREACH_SAFE(ncp, bucket, nc_hash, nnp) { cache_assert_bucket_locked(ncp, RA_WLOCKED); if (ncp->nc_dvp->v_mount != mp) continue; - cache_zap_locked(ncp); + error = cache_zap_wlocked_bucket_kl(ncp, blp, + &vlp1, &vlp2); + if (error != 0) + goto retry; TAILQ_INSERT_HEAD(&ncps, ncp, nc_dst); } } - rw_wunlock(bucketlock); + rw_wunlock(blp); + if (vlp1 == NULL && vlp2 == NULL) + cache_maybe_yield(); } - CACHE_WUNLOCK(); + if (vlp1 != NULL) + mtx_unlock(vlp1); + if (vlp2 != NULL) + mtx_unlock(vlp2); + TAILQ_FOREACH_SAFE(ncp, &ncps, nc_dst, nnp) { cache_free(ncp); } @@ -1442,31 +1963,21 @@ vn_fullpath_global(struct thread *td, struct vnode *vn, int vn_vptocnp(struct vnode **vp, struct ucred *cred, char *buf, u_int *buflen) -{ - int error; - - CACHE_RLOCK(); - error = vn_vptocnp_locked(vp, cred, buf, buflen); - if (error == 0) - CACHE_RUNLOCK(); - return (error); -} - -static int -vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, - u_int *buflen) { struct vnode *dvp; struct namecache *ncp; + struct mtx *vlp; int error; + vlp = VP2VNODELOCK(*vp); + mtx_lock(vlp); TAILQ_FOREACH(ncp, &((*vp)->v_cache_dst), nc_dst) { if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; } if (ncp != NULL) { if (*buflen < ncp->nc_nlen) { - CACHE_RUNLOCK(); + mtx_unlock(vlp); vrele(*vp); counter_u64_add(numfullpathfail4, 1); error = ENOMEM; @@ -1481,14 +1992,13 @@ vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, dvp = *vp; *vp = ncp->nc_dvp; vref(*vp); - CACHE_RUNLOCK(); + mtx_unlock(vlp); vrele(dvp); - CACHE_RLOCK(); return (0); } SDT_PROBE1(vfs, namecache, fullpath, miss, vp); - CACHE_RUNLOCK(); + mtx_unlock(vlp); vn_lock(*vp, LK_SHARED | LK_RETRY); error = VOP_VPTOCNP(*vp, &dvp, cred, buf, buflen); vput(*vp); @@ -1499,10 +2009,8 @@ vn_vptocnp_locked(struct vnode **vp, struct ucred *cred, char *buf, } *vp = dvp; - CACHE_RLOCK(); if (dvp->v_iflag & VI_DOOMED) { /* forced unmount */ - CACHE_RUNLOCK(); vrele(dvp); error = ENOENT; SDT_PROBE3(vfs, namecache, fullpath, return, error, vp, NULL); @@ -1536,13 +2044,11 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, SDT_PROBE1(vfs, namecache, fullpath, entry, vp); counter_u64_add(numfullpathcalls, 1); vref(vp); - CACHE_RLOCK(); if (vp->v_type != VDIR) { - error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); + error = vn_vptocnp(&vp, td->td_ucred, buf, &buflen); if (error) return (error); if (buflen == 0) { - CACHE_RUNLOCK(); vrele(vp); return (ENOMEM); } @@ -1552,7 +2058,6 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, while (vp != rdir && vp != rootvnode) { if (vp->v_vflag & VV_ROOT) { if (vp->v_iflag & VI_DOOMED) { /* forced unmount */ - CACHE_RUNLOCK(); vrele(vp); error = ENOENT; SDT_PROBE3(vfs, namecache, fullpath, return, @@ -1561,14 +2066,11 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, } vp1 = vp->v_mount->mnt_vnodecovered; vref(vp1); - CACHE_RUNLOCK(); vrele(vp); vp = vp1; - CACHE_RLOCK(); continue; } if (vp->v_type != VDIR) { - CACHE_RUNLOCK(); vrele(vp); counter_u64_add(numfullpathfail1, 1); error = ENOTDIR; @@ -1576,11 +2078,10 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, error, vp, NULL); break; } - error = vn_vptocnp_locked(&vp, td->td_ucred, buf, &buflen); + error = vn_vptocnp(&vp, td->td_ucred, buf, &buflen); if (error) break; if (buflen == 0) { - CACHE_RUNLOCK(); vrele(vp); error = ENOMEM; SDT_PROBE3(vfs, namecache, fullpath, return, error, @@ -1594,7 +2095,6 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, return (error); if (!slash_prefixed) { if (buflen == 0) { - CACHE_RUNLOCK(); vrele(vp); counter_u64_add(numfullpathfail4, 1); SDT_PROBE3(vfs, namecache, fullpath, return, ENOMEM, @@ -1604,7 +2104,6 @@ vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, buf[--buflen] = '/'; } counter_u64_add(numfullpathfound, 1); - CACHE_RUNLOCK(); vrele(vp); SDT_PROBE3(vfs, namecache, fullpath, return, 0, startvp, buf + buflen); @@ -1617,20 +2116,22 @@ vn_dir_dd_ino(struct vnode *vp) { struct namecache *ncp; struct vnode *ddvp; + struct mtx *vlp; ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino"); - CACHE_RLOCK(); + vlp = VP2VNODELOCK(vp); + mtx_lock(vlp); TAILQ_FOREACH(ncp, &(vp->v_cache_dst), nc_dst) { if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) continue; ddvp = ncp->nc_dvp; vhold(ddvp); - CACHE_RUNLOCK(); + mtx_unlock(vlp); if (vget(ddvp, LK_SHARED | LK_NOWAIT | LK_VNHELD, curthread)) return (NULL); return (ddvp); } - CACHE_RUNLOCK(); + mtx_unlock(vlp); return (NULL); } @@ -1638,19 +2139,21 @@ int vn_commname(struct vnode *vp, char *buf, u_int buflen) { struct namecache *ncp; + struct mtx *vlp; int l; - CACHE_RLOCK(); + vlp = VP2VNODELOCK(vp); + mtx_lock(vlp); TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_dst) if ((ncp->nc_flag & NCF_ISDOTDOT) == 0) break; if (ncp == NULL) { - CACHE_RUNLOCK(); + mtx_unlock(vlp); return (ENOENT); } l = min(ncp->nc_nlen, buflen - 1); memcpy(buf, nc_get_name(ncp), l); - CACHE_RUNLOCK(); + mtx_unlock(vlp); buf[l] = '\0'; return (0); }