cache: refactor alloc/free

This in particular centralizes manipulation of numcache.
This commit is contained in:
Mateusz Guzik 2020-10-24 01:14:17 +00:00
parent 1d44405690
commit 208cb7c4b6
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=366987

View File

@ -174,6 +174,19 @@ struct namecache_ts {
*/
#define CACHE_ZONE_ALIGNMENT UMA_ALIGNOF(time_t)
/*
* TODO: the initial value of CACHE_PATH_CUTOFF was inherited from the
* 4.4 BSD codebase. Later on struct namecache was tweaked to become
* smaller and the value was bumped to retain the total size, but it
* was never re-evaluated for suitability. A simple test counting
* lengths during package building shows that the value of 45 covers
* about 86% of all added entries, reaching 99% at 65.
*
* Regardless of the above, use of dedicated zones instead of malloc may be
* inducing additional waste. This may be hard to address as said zones are
* tied to VFS SMR. Even if retaining them, the current split should be
* reevaluated.
*/
#ifdef __LP64__
#define CACHE_PATH_CUTOFF 45
#define CACHE_LARGE_PAD 6
@ -212,6 +225,8 @@ _Static_assert((CACHE_ZONE_LARGE_TS_SIZE % (CACHE_ZONE_ALIGNMENT + 1)) == 0, "ba
*/
#define NEG_HOT 0x01
static bool cache_neg_evict_cond(u_long lnumcache);
/*
* Mark an entry as invalid.
*
@ -380,61 +395,6 @@ VP2VNODELOCK(struct vnode *vp)
return (&vnodelocks[(((uintptr_t)(vp) >> 8) & ncvnodehash)]);
}
/*
* UMA zones for the VFS cache.
*
* The small cache is used for entries with short names, which are the
* most common. The large cache is used for entries which are too big to
* fit in the small cache.
*/
static uma_zone_t __read_mostly cache_zone_small;
static uma_zone_t __read_mostly cache_zone_small_ts;
static uma_zone_t __read_mostly cache_zone_large;
static uma_zone_t __read_mostly cache_zone_large_ts;
static struct namecache *
cache_alloc(int len, int ts)
{
struct namecache_ts *ncp_ts;
struct namecache *ncp;
if (__predict_false(ts)) {
if (len <= CACHE_PATH_CUTOFF)
ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK);
else
ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK);
ncp = &ncp_ts->nc_nc;
} else {
if (len <= CACHE_PATH_CUTOFF)
ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK);
else
ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK);
}
return (ncp);
}
static void
cache_free(struct namecache *ncp)
{
struct namecache_ts *ncp_ts;
MPASS(ncp != NULL);
if ((ncp->nc_flag & NCF_DVDROP) != 0)
vdrop(ncp->nc_dvp);
if (__predict_false(ncp->nc_flag & NCF_TS)) {
ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
uma_zfree_smr(cache_zone_small_ts, ncp_ts);
else
uma_zfree_smr(cache_zone_large_ts, ncp_ts);
} else {
if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
uma_zfree_smr(cache_zone_small, ncp);
else
uma_zfree_smr(cache_zone_large, ncp);
}
}
static void
cache_out_ts(struct namecache *ncp, struct timespec *tsp, int *ticksp)
{
@ -546,6 +506,126 @@ cache_assert_vnode_locked(struct vnode *vp)
cache_assert_vlp_locked(vlp);
}
/*
* Directory vnodes with entries are held for two reasons:
* 1. make them less of a target for reclamation in vnlru
* 2. suffer smaller performance penalty in locked lookup as requeieing is avoided
*
* Note this preferably would not be done and it's a hold over from. It will be
* feasible to eliminate altogether if all filesystems start supporting
* lockless lookup.
*/
static void
cache_hold_vnode(struct vnode *vp)
{
cache_assert_vnode_locked(vp);
VNPASS(LIST_EMPTY(&vp->v_cache_src), vp);
vhold(vp);
counter_u64_add(numcachehv, 1);
}
static void
cache_drop_vnode(struct vnode *vp)
{
/*
* Called after all locks are dropped, meaning we can't assert
* on the state of v_cache_src.
*/
vdrop(vp);
counter_u64_add(numcachehv, -1);
}
/*
* UMA zones.
*/
static uma_zone_t __read_mostly cache_zone_small;
static uma_zone_t __read_mostly cache_zone_small_ts;
static uma_zone_t __read_mostly cache_zone_large;
static uma_zone_t __read_mostly cache_zone_large_ts;
static struct namecache *
cache_alloc_uma(int len, bool ts)
{
struct namecache_ts *ncp_ts;
struct namecache *ncp;
if (__predict_false(ts)) {
if (len <= CACHE_PATH_CUTOFF)
ncp_ts = uma_zalloc_smr(cache_zone_small_ts, M_WAITOK);
else
ncp_ts = uma_zalloc_smr(cache_zone_large_ts, M_WAITOK);
ncp = &ncp_ts->nc_nc;
} else {
if (len <= CACHE_PATH_CUTOFF)
ncp = uma_zalloc_smr(cache_zone_small, M_WAITOK);
else
ncp = uma_zalloc_smr(cache_zone_large, M_WAITOK);
}
return (ncp);
}
static void
cache_free_uma(struct namecache *ncp)
{
struct namecache_ts *ncp_ts;
if (__predict_false(ncp->nc_flag & NCF_TS)) {
ncp_ts = __containerof(ncp, struct namecache_ts, nc_nc);
if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
uma_zfree_smr(cache_zone_small_ts, ncp_ts);
else
uma_zfree_smr(cache_zone_large_ts, ncp_ts);
} else {
if (ncp->nc_nlen <= CACHE_PATH_CUTOFF)
uma_zfree_smr(cache_zone_small, ncp);
else
uma_zfree_smr(cache_zone_large, ncp);
}
}
static struct namecache *
cache_alloc(int len, bool ts)
{
u_long lnumcache;
/*
* Avoid blowout in namecache entries.
*
* Bugs:
* 1. filesystems may end up tryng to add an already existing entry
* (for example this can happen after a cache miss during concurrent
* lookup), in which case we will call cache_neg_evict despite not
* adding anything.
* 2. the routine may fail to free anything and no provisions are made
* to make it try harder (see the inside for failure modes)
* 3. it only ever looks at negative entries.
*/
lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
if (cache_neg_evict_cond(lnumcache)) {
lnumcache = atomic_load_long(&numcache);
}
if (__predict_false(lnumcache >= ncsize)) {
atomic_subtract_long(&numcache, 1);
counter_u64_add(numdrops, 1);
return (NULL);
}
return (cache_alloc_uma(len, ts));
}
static void
cache_free(struct namecache *ncp)
{
MPASS(ncp != NULL);
if ((ncp->nc_flag & NCF_DVDROP) != 0) {
cache_drop_vnode(ncp->nc_dvp);
}
cache_free_uma(ncp);
atomic_subtract_long(&numcache, 1);
}
/*
* TODO: With the value stored we can do better than computing the hash based
* on the address. The choice of FNV should also be revisited.
@ -1298,10 +1378,8 @@ cache_zap_locked(struct namecache *ncp)
LIST_REMOVE(ncp, nc_src);
if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
ncp->nc_flag |= NCF_DVDROP;
counter_u64_add(numcachehv, -1);
}
}
atomic_subtract_long(&numcache, 1);
}
static void
@ -2110,7 +2188,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
uint32_t hash;
int flag;
int len;
u_long lnumcache;
VNPASS(dvp != vp, dvp);
VNPASS(!VN_IS_DOOMED(dvp), dvp);
@ -2135,27 +2212,9 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
}
}
/*
* Avoid blowout in namecache entries.
*
* Bugs:
* 1. filesystems may end up tryng to add an already existing entry
* (for example this can happen after a cache miss during concurrent
* lookup), in which case we will call cache_neg_evict despite not
* adding anything.
* 2. the routine may fail to free anything and no provisions are made
* to make it try harder (see the inside for failure modes)
* 3. it only ever looks at negative entries.
*/
lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
if (cache_neg_evict_cond(lnumcache)) {
lnumcache = atomic_load_long(&numcache);
}
if (__predict_false(lnumcache >= ncsize)) {
atomic_subtract_long(&numcache, 1);
counter_u64_add(numdrops, 1);
ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
if (ncp == NULL)
return;
}
cache_celockstate_init(&cel);
ndd = NULL;
@ -2165,7 +2224,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
* Calculate the hash key and setup as much of the new
* namecache entry as possible before acquiring the lock.
*/
ncp = cache_alloc(cnp->cn_namelen, tsp != NULL);
ncp->nc_flag = flag | NCF_WIP;
ncp->nc_vp = vp;
if (vp == NULL)
@ -2276,8 +2334,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
if (flag != NCF_ISDOTDOT) {
if (LIST_EMPTY(&dvp->v_cache_src)) {
vhold(dvp);
counter_u64_add(numcachehv, 1);
cache_hold_vnode(dvp);
}
LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
}
@ -2318,7 +2375,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp,
return;
out_unlock_free:
cache_enter_unlock(&cel);
atomic_subtract_long(&numcache, 1);
cache_free(ncp);
return;
}