From f9955a0614d1eb8c967aca8aa64f4b769359c224 Mon Sep 17 00:00:00 2001 From: mm Date: Sun, 16 May 2010 07:16:28 +0000 Subject: [PATCH] Fix perfomance problem with ZFS prefetch caching [1] Add statistics for ZFS prefetch (sysctl kstat.zfs.misc.zfetchstats) Partial import of OpenSolaris onnv revision 10474:0e96dd3b905a Reported by: jhell@dataix.net (private e-mail) [1] Approved by: pjd, delphij (mentor) Obtained from: OpenSolaris (Bug ID 6859997, 6868951) MFC after: 3 days --- .../opensolaris/uts/common/fs/zfs/dmu.c | 2 + .../uts/common/fs/zfs/dmu_zfetch.c | 99 ++++++++++++++++--- .../uts/common/fs/zfs/sys/dmu_zfetch.h | 7 +- 3 files changed, 92 insertions(+), 16 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c index 377efb9d105e..55501e917d24 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -1192,6 +1192,7 @@ dmu_init(void) { dbuf_init(); dnode_init(); + zfetch_init(); arc_init(); l2arc_init(); } @@ -1200,6 +1201,7 @@ void dmu_fini(void) { arc_fini(); + zfetch_fini(); dnode_fini(); dbuf_fini(); l2arc_fini(); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c index 5cc56a9f5d8a..283b4ca3d021 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c @@ -19,18 +19,17 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include #include #include #include #include #include +#include /* * I'm against tune-ables, but these should probably exist as tweakable globals @@ -77,6 +76,41 @@ static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *); static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *); static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *); +typedef struct zfetch_stats { + kstat_named_t zfetchstat_hits; + kstat_named_t zfetchstat_misses; + kstat_named_t zfetchstat_colinear_hits; + kstat_named_t zfetchstat_colinear_misses; + kstat_named_t zfetchstat_stride_hits; + kstat_named_t zfetchstat_stride_misses; + kstat_named_t zfetchstat_reclaim_successes; + kstat_named_t zfetchstat_reclaim_failures; + kstat_named_t zfetchstat_stream_resets; + kstat_named_t zfetchstat_stream_noresets; + kstat_named_t zfetchstat_bogus_streams; +} zfetch_stats_t; + +static zfetch_stats_t zfetch_stats = { + { "hits", KSTAT_DATA_UINT64 }, + { "misses", KSTAT_DATA_UINT64 }, + { "colinear_hits", KSTAT_DATA_UINT64 }, + { "colinear_misses", KSTAT_DATA_UINT64 }, + { "stride_hits", KSTAT_DATA_UINT64 }, + { "stride_misses", KSTAT_DATA_UINT64 }, + { "reclaim_successes", KSTAT_DATA_UINT64 }, + { "reclaim_failures", KSTAT_DATA_UINT64 }, + { "streams_resets", KSTAT_DATA_UINT64 }, + { "streams_noresets", KSTAT_DATA_UINT64 }, + { "bogus_streams", KSTAT_DATA_UINT64 }, +}; + +#define ZFETCHSTAT_INCR(stat, val) \ + atomic_add_64(&zfetch_stats.stat.value.ui64, (val)); + +#define ZFETCHSTAT_BUMP(stat) ZFETCHSTAT_INCR(stat, 1); + +kstat_t *zfetch_ksp; + /* * Given a zfetch structure and a zstream structure, determine whether the * blocks to be read are part of a co-linear pair of existing prefetch @@ -213,6 +247,29 @@ dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs) zs->zst_last = LBOLT; } +void +zfetch_init(void) +{ + + zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc", + KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (zfetch_ksp != NULL) { + zfetch_ksp->ks_data = &zfetch_stats; + kstat_install(zfetch_ksp); + } +} + +void +zfetch_fini(void) +{ + if (zfetch_ksp != NULL) { + kstat_delete(zfetch_ksp); + zfetch_ksp = NULL; + } +} + /* * This takes a pointer to a zfetch structure and a dnode. It performs the * necessary setup for the zfetch structure, grokking data from the @@ -283,7 +340,7 @@ dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks) } /* - * given a zfetch and a zsearch structure, see if there is an associated zstream + * given a zfetch and a zstream structure, see if there is an associated zstream * for this block read. If so, it starts a prefetch for the stream it * located and returns true, otherwise it returns false */ @@ -315,6 +372,7 @@ dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched) */ if (zs->zst_len == 0) { /* bogus stream */ + ZFETCHSTAT_BUMP(zfetchstat_bogus_streams); continue; } @@ -324,9 +382,14 @@ dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched) */ if (zh->zst_offset >= zs->zst_offset && zh->zst_offset < zs->zst_offset + zs->zst_len) { - /* already fetched */ - rc = 1; - goto out; + if (prefetched) { + /* already fetched */ + ZFETCHSTAT_BUMP(zfetchstat_stride_hits); + rc = 1; + goto out; + } else { + ZFETCHSTAT_BUMP(zfetchstat_stride_misses); + } } /* @@ -439,6 +502,7 @@ dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched) if (reset) { zstream_t *remove = zs; + ZFETCHSTAT_BUMP(zfetchstat_stream_resets); rc = 0; mutex_exit(&zs->zst_lock); rw_exit(&zf->zf_rwlock); @@ -457,6 +521,7 @@ dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched) } } } else { + ZFETCHSTAT_BUMP(zfetchstat_stream_noresets); rc = 1; dmu_zfetch_dofetch(zf, zs); mutex_exit(&zs->zst_lock); @@ -513,13 +578,12 @@ dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs) zs_next = list_next(&zf->zf_stream, zs_walk); if (dmu_zfetch_streams_equal(zs_walk, zs)) { - return (0); + return (0); } } list_insert_head(&zf->zf_stream, zs); zf->zf_stream_cnt++; - return (1); } @@ -623,8 +687,15 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched) P2ALIGN(offset, blksz)) >> blkshft; fetched = dmu_zfetch_find(zf, &zst, prefetched); - if (!fetched) { - fetched = dmu_zfetch_colinear(zf, &zst); + if (fetched) { + ZFETCHSTAT_BUMP(zfetchstat_hits); + } else { + ZFETCHSTAT_BUMP(zfetchstat_misses); + if (fetched = dmu_zfetch_colinear(zf, &zst)) { + ZFETCHSTAT_BUMP(zfetchstat_colinear_hits); + } else { + ZFETCHSTAT_BUMP(zfetchstat_colinear_misses); + } } if (!fetched) { @@ -634,11 +705,14 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched) * we still couldn't find a stream, drop the lock, and allocate * one if possible. Otherwise, give up and go home. */ - if (newstream == NULL) { + if (newstream) { + ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes); + } else { uint64_t maxblocks; uint32_t max_streams; uint32_t cur_streams; + ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures); cur_streams = zf->zf_stream_cnt; maxblocks = zf->zf_dnode->dn_maxblkid; @@ -651,7 +725,6 @@ dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched) if (cur_streams >= max_streams) { return; } - newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h index c94bced933af..78cadd2b1ee1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_zfetch.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _DFETCH_H #define _DFETCH_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include #ifdef __cplusplus @@ -63,6 +61,9 @@ typedef struct zfetch { uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */ } zfetch_t; +void zfetch_init(void); +void zfetch_fini(void); + void dmu_zfetch_init(zfetch_t *, struct dnode *); void dmu_zfetch_rele(zfetch_t *); void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);