cache: lockless forward lookup with smr

This eliminates the need to take bucket locks in the common case.

Concurrent lookup utilizng the same vnodes is still bottlenecked on referencing
and locking path components, this will be taken care of separately.

Reviewed by:	kib
Tested by:	pho
Differential Revision:	https://reviews.freebsd.org/D23913
This commit is contained in:
Mateusz Guzik 2020-07-01 05:59:08 +00:00
parent f8022be3e6
commit 5d1c042d32
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=362828

View File

@ -56,11 +56,13 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/sdt.h>
#include <sys/smr.h>
#include <sys/smp.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/sysproto.h>
#include <sys/vnode.h>
#include <ck_queue.h>
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
@ -104,7 +106,7 @@ SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *",
*/
struct namecache {
LIST_ENTRY(namecache) nc_hash; /* hash chain */
CK_LIST_ENTRY(namecache) nc_hash;/* hash chain */
LIST_ENTRY(namecache) nc_src; /* source vnode list */
TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */
struct vnode *nc_dvp; /* vnode of parent of name */
@ -143,6 +145,25 @@ struct namecache_ts {
#define NCF_DVDROP 0x10
#define NCF_NEGATIVE 0x20
#define NCF_HOTNEGATIVE 0x40
#define NCF_INVALID 0x80
static bool
cache_ncp_invalid(struct namecache *ncp)
{
atomic_thread_fence_acq();
return ((ncp->nc_flag & NCF_INVALID) != 0);
}
static void
cache_ncp_invalidate(struct namecache *ncp)
{
atomic_thread_fence_rel();
KASSERT((ncp->nc_flag & NCF_INVALID) == 0,
("%s: entry %p already invalid", __func__, ncp));
ncp->nc_flag |= NCF_INVALID;
}
/*
* Name caching works as follows:
@ -192,12 +213,14 @@ struct namecache_ts {
* the first node, locking everything in order and revalidating the state.
*/
VFS_SMR_DECLARE;
/*
* Structures associated with name caching.
*/
#define NCHHASH(hash) \
(&nchashtbl[(hash) & nchash])
static __read_mostly LIST_HEAD(nchashhead, namecache) *nchashtbl;/* Hash Table */
static __read_mostly CK_LIST_HEAD(nchashhead, namecache) *nchashtbl;/* Hash Table */
static u_long __read_mostly nchash; /* size of hash table */
SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
"Size of namecache hash table");
@ -275,15 +298,15 @@ cache_alloc(int len, int ts)
if (__predict_false(ts)) {
if (len <= CACHE_PATH_CUTOFF)
ncp_ts = uma_zalloc(cache_zone_small_ts, M_WAITOK);
ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK);
else
ncp_ts = uma_zalloc(cache_zone_large_ts, M_WAITOK);
ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK);
ncp = &ncp_ts->nc_nc;
} else {
if (len <= CACHE_PATH_CUTOFF)
ncp = uma_zalloc(cache_zone_small, M_WAITOK);
ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK);
else
ncp = uma_zalloc(cache_zone_large, M_WAITOK);
ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK);
}
return (ncp);
}
@ -300,14 +323,14 @@ cache_free(struct namecache *ncp)
if (__predict_false(ncp->nc_flag & NCF_TS)) {
ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
uma_zfree(cache_zone_small_ts, ncp_ts);
uma_zfree_smr(cache_zone_small_ts, ncp_ts);
else
uma_zfree(cache_zone_large_ts, ncp_ts);
uma_zfree_smr(cache_zone_large_ts, ncp_ts);
} else {
if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
uma_zfree(cache_zone_small, ncp);
uma_zfree_smr(cache_zone_small, ncp);
else
uma_zfree(cache_zone_large, ncp);
uma_zfree_smr(cache_zone_large, ncp);
}
}
@ -606,7 +629,7 @@ sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS)
}
/* Scan hash tables counting entries */
for (ncpp = nchashtbl, i = 0; i < n_nchash; ncpp++, i++)
LIST_FOREACH(ncp, ncpp, nc_hash)
CK_LIST_FOREACH(ncp, ncpp, nc_hash)
cntbuf[i]++;
cache_unlock_all_buckets();
for (error = 0, i = 0; i < n_nchash; i++)
@ -639,7 +662,7 @@ sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS)
/* Scan hash tables for applicable entries */
for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) {
count = 0;
LIST_FOREACH(ncp, ncpp, nc_hash) {
CK_LIST_FOREACH(ncp, ncpp, nc_hash) {
count++;
}
if (count)
@ -860,7 +883,10 @@ cache_zap_locked(struct namecache *ncp, bool neg_locked)
CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp,
(ncp->nc_flag & NCF_NEGATIVE) ? NULL : ncp->nc_vp);
LIST_REMOVE(ncp, nc_hash);
cache_ncp_invalidate(ncp);
CK_LIST_REMOVE(ncp, nc_hash);
if (!(ncp->nc_flag & NCF_NEGATIVE)) {
SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
ncp->nc_name, ncp->nc_vp);
@ -1013,7 +1039,7 @@ cache_zap_unlocked_bucket(struct namecache *ncp, struct componentname *cnp,
cache_sort_vnodes(&dvlp, &vlp);
cache_lock_vnodes(dvlp, vlp);
rw_wlock(blp);
LIST_FOREACH(rncp, (NCHHASH(hash)), nc_hash) {
CK_LIST_FOREACH(rncp, (NCHHASH(hash)), nc_hash) {
if (rncp == ncp && rncp->nc_dvp == dvp &&
rncp->nc_nlen == cnp->cn_namelen &&
!bcmp(rncp->nc_name, cnp->cn_nameptr, rncp->nc_nlen))
@ -1225,12 +1251,12 @@ cache_lookup_nomakeentry(struct vnode *dvp, struct vnode **vpp,
hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
blp = HASH2BUCKETLOCK(hash);
retry:
if (LIST_EMPTY(NCHHASH(hash)))
if (CK_LIST_EMPTY(NCHHASH(hash)))
goto out_no_entry;
rw_wlock(blp);
LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
CK_LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
counter_u64_add(numchecks, 1);
if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
!bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
@ -1305,6 +1331,7 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
uint32_t hash;
enum vgetstate vs;
int error, ltype;
bool try_smr, doing_smr;
#ifdef DEBUG_CACHE
if (__predict_false(!doingcache)) {
@ -1321,7 +1348,11 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
if ((cnp->cn_flags & MAKEENTRY) == 0)
return (cache_lookup_nomakeentry(dvp, vpp, cnp, tsp, ticksp));
try_smr = true;
if (cnp->cn_nameiop == CREATE)
try_smr = false;
retry:
doing_smr = false;
blp = NULL;
dvlp = NULL;
error = 0;
@ -1361,10 +1392,17 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
}
hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
blp = HASH2BUCKETLOCK(hash);
rw_rlock(blp);
retry_hashed:
if (try_smr) {
vfs_smr_enter();
doing_smr = true;
try_smr = false;
} else {
blp = HASH2BUCKETLOCK(hash);
rw_rlock(blp);
}
LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
CK_LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
counter_u64_add(numchecks, 1);
if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
!bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
@ -1373,7 +1411,10 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
/* We failed to find an entry */
if (__predict_false(ncp == NULL)) {
rw_runlock(blp);
if (doing_smr)
vfs_smr_exit();
else
rw_runlock(blp);
SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
NULL);
counter_u64_add(nummiss, 1);
@ -1402,8 +1443,22 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
ltype = VOP_ISLOCKED(dvp);
VOP_UNLOCK(dvp);
}
vs = vget_prep(*vpp);
cache_lookup_unlock(blp, dvlp);
if (doing_smr) {
if (cache_ncp_invalid(ncp)) {
vfs_smr_exit();
*vpp = NULL;
goto retry;
}
vs = vget_prep_smr(*vpp);
vfs_smr_exit();
if (vs == VGET_NONE) {
*vpp = NULL;
goto retry;
}
} else {
vs = vget_prep(*vpp);
cache_lookup_unlock(blp, dvlp);
}
error = vget_finish(*vpp, cnp->cn_lkflags, vs);
if (cnp->cn_flags & ISDOTDOT) {
vn_lock(dvp, ltype | LK_RETRY);
@ -1427,21 +1482,37 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
negative_success:
/* We found a negative match, and want to create it, so purge */
if (cnp->cn_nameiop == CREATE) {
MPASS(!doing_smr);
counter_u64_add(numnegzaps, 1);
goto zap_and_exit;
}
if (doing_smr) {
if ((ncp->nc_flag & NCF_HOTNEGATIVE) == 0) {
/*
* We need to take locks to promote the entry.
*/
vfs_smr_exit();
doing_smr = false;
goto retry_hashed;
}
} else {
cache_negative_hit(ncp);
}
counter_u64_add(numneghits, 1);
cache_negative_hit(ncp);
if (ncp->nc_flag & NCF_WHITE)
cnp->cn_flags |= ISWHITEOUT;
SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
ncp->nc_name);
cache_out_ts(ncp, tsp, ticksp);
cache_lookup_unlock(blp, dvlp);
if (doing_smr)
vfs_smr_exit();
else
cache_lookup_unlock(blp, dvlp);
return (ENOENT);
zap_and_exit:
MPASS(!doing_smr);
if (blp != NULL)
error = cache_zap_rlocked_bucket(ncp, cnp, hash, blp);
else
@ -1775,7 +1846,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
* the same path name.
*/
ncpp = NCHHASH(hash);
LIST_FOREACH(n2, ncpp, nc_hash) {
CK_LIST_FOREACH(n2, ncpp, nc_hash) {
if (n2->nc_dvp == dvp &&
n2->nc_nlen == cnp->cn_namelen &&
!bcmp(n2->nc_name, cnp->cn_nameptr, n2->nc_nlen)) {
@ -1839,12 +1910,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
}
/*
* Insert the new namecache entry into the appropriate chain
* within the cache entries table.
*/
LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
/*
* If the entry is "negative", we place it into the
* "negative" cache queue, otherwise, we place it into the
@ -1861,6 +1926,14 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
SDT_PROBE2(vfs, namecache, enter_negative, done, dvp,
ncp->nc_name);
}
atomic_thread_fence_rel();
/*
* Insert the new namecache entry into the appropriate chain
* within the cache entries table.
*/
CK_LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
cache_enter_unlock(&cel);
if (numneg * ncnegfactor > lnumcache)
cache_negative_zap_one();
@ -1908,6 +1981,11 @@ nchinit(void *dummy __unused)
NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache_ts),
UMA_ZONE_ZINIT);
VFS_SMR_ZONE_SET(cache_zone_small);
VFS_SMR_ZONE_SET(cache_zone_small_ts);
VFS_SMR_ZONE_SET(cache_zone_large);
VFS_SMR_ZONE_SET(cache_zone_large_ts);
ncsize = desiredvnodes * ncsizefactor;
nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
ncbuckethash = cache_roundup_2(mp_ncpus * mp_ncpus) - 1;
@ -1973,11 +2051,11 @@ cache_changesize(u_long newmaxvnodes)
nchashtbl = new_nchashtbl;
nchash = new_nchash;
for (i = 0; i <= old_nchash; i++) {
while ((ncp = LIST_FIRST(&old_nchashtbl[i])) != NULL) {
while ((ncp = CK_LIST_FIRST(&old_nchashtbl[i])) != NULL) {
hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen,
ncp->nc_dvp);
LIST_REMOVE(ncp, nc_hash);
LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
CK_LIST_REMOVE(ncp, nc_hash);
CK_LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
}
}
ncsize = newncsize;
@ -2091,7 +2169,7 @@ cache_purgevfs(struct mount *mp, bool force)
for (j = i; j < n_nchash; j += numbucketlocks) {
retry:
bucket = &nchashtbl[j];
LIST_FOREACH_SAFE(ncp, bucket, nc_hash, nnp) {
CK_LIST_FOREACH_SAFE(ncp, bucket, nc_hash, nnp) {
cache_assert_bucket_locked(ncp, RA_WLOCKED);
if (ncp->nc_dvp->v_mount != mp)
continue;