zfs: merge openzfs/zfs@c4c162c1e (master) into main

Notable upstream pull request merges:
  #12054 Avoid deadlock when removing L2ARC devices under I/O
  #12172 Use wmsum for arc, abd, dbuf and zfetch statistics
  #12221 vdev_draid_min_asize() ignores reserved space

Obtained from:	OpenZFS
OpenZFS commit:	c4c162c1e8
This commit is contained in:
Martin Matuska 2021-06-20 09:18:44 +02:00
commit 0d8fe23735
13 changed files with 788 additions and 232 deletions

View File

@ -14,7 +14,7 @@ ConditionPathIsDirectory=/sys/module/zfs
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=@sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN ${ZPOOL_IMPORT_OPTS}
ExecStart=@sbindir@/zpool import -c @sysconfdir@/zfs/zpool.cache -aN $ZPOOL_IMPORT_OPTS
[Install]
WantedBy=zfs-import.target

View File

@ -13,7 +13,7 @@ ConditionPathIsDirectory=/sys/module/zfs
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=@sbindir@/zpool import -aN -o cachefile=none ${ZPOOL_IMPORT_OPTS}
ExecStart=@sbindir@/zpool import -aN -o cachefile=none $ZPOOL_IMPORT_OPTS
[Install]
WantedBy=zfs-import.target

View File

@ -27,6 +27,7 @@
#define _ABD_IMPL_H
#include <sys/abd.h>
#include <sys/wmsum.h>
#ifdef __cplusplus
extern "C" {
@ -82,9 +83,8 @@ void abd_iter_unmap(struct abd_iter *);
/*
* Helper macros
*/
#define ABDSTAT(stat) (abd_stats.stat.value.ui64)
#define ABDSTAT_INCR(stat, val) \
atomic_add_64(&abd_stats.stat.value.ui64, (val))
wmsum_add(&abd_sums.stat, (val))
#define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1)
#define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1)

View File

@ -33,6 +33,7 @@
#include <sys/zio_crypt.h>
#include <sys/zthr.h>
#include <sys/aggsum.h>
#include <sys/wmsum.h>
#ifdef __cplusplus
extern "C" {
@ -563,7 +564,6 @@ typedef struct arc_stats {
kstat_named_t arcstat_c;
kstat_named_t arcstat_c_min;
kstat_named_t arcstat_c_max;
/* Not updated directly; only synced in arc_kstat_update. */
kstat_named_t arcstat_size;
/*
* Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
@ -592,14 +592,12 @@ typedef struct arc_stats {
* (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
* caches), and arc_buf_t structures (allocated via arc_buf_t
* cache).
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_hdr_size;
/*
* Number of bytes consumed by ARC buffers of type equal to
* ARC_BUFC_DATA. This is generally consumed by buffers backing
* on disk user data (e.g. plain file contents).
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_data_size;
/*
@ -607,22 +605,18 @@ typedef struct arc_stats {
* ARC_BUFC_METADATA. This is generally consumed by buffers
* backing on disk data that is used for internal ZFS
* structures (e.g. ZAP, dnode, indirect blocks, etc).
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_metadata_size;
/*
* Number of bytes consumed by dmu_buf_impl_t objects.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_dbuf_size;
/*
* Number of bytes consumed by dnode_t objects.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_dnode_size;
/*
* Number of bytes consumed by bonus buffers.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_bonus_size;
#if defined(COMPAT_FREEBSD11)
@ -637,7 +631,6 @@ typedef struct arc_stats {
* arc_anon state. This includes *all* buffers in the arc_anon
* state; e.g. data, metadata, evictable, and unevictable buffers
* are all included in this value.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_anon_size;
/*
@ -645,7 +638,6 @@ typedef struct arc_stats {
* following criteria: backing buffers of type ARC_BUFC_DATA,
* residing in the arc_anon state, and are eligible for eviction
* (e.g. have no outstanding holds on the buffer).
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_anon_evictable_data;
/*
@ -653,7 +645,6 @@ typedef struct arc_stats {
* following criteria: backing buffers of type ARC_BUFC_METADATA,
* residing in the arc_anon state, and are eligible for eviction
* (e.g. have no outstanding holds on the buffer).
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_anon_evictable_metadata;
/*
@ -661,7 +652,6 @@ typedef struct arc_stats {
* arc_mru state. This includes *all* buffers in the arc_mru
* state; e.g. data, metadata, evictable, and unevictable buffers
* are all included in this value.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mru_size;
/*
@ -669,7 +659,6 @@ typedef struct arc_stats {
* following criteria: backing buffers of type ARC_BUFC_DATA,
* residing in the arc_mru state, and are eligible for eviction
* (e.g. have no outstanding holds on the buffer).
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mru_evictable_data;
/*
@ -677,7 +666,6 @@ typedef struct arc_stats {
* following criteria: backing buffers of type ARC_BUFC_METADATA,
* residing in the arc_mru state, and are eligible for eviction
* (e.g. have no outstanding holds on the buffer).
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mru_evictable_metadata;
/*
@ -688,21 +676,18 @@ typedef struct arc_stats {
* don't actually have ARC buffers linked off of these headers.
* Thus, *if* the headers had associated ARC buffers, these
* buffers *would have* consumed this number of bytes.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mru_ghost_size;
/*
* Number of bytes that *would have been* consumed by ARC
* buffers that are eligible for eviction, of type
* ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mru_ghost_evictable_data;
/*
* Number of bytes that *would have been* consumed by ARC
* buffers that are eligible for eviction, of type
* ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mru_ghost_evictable_metadata;
/*
@ -710,42 +695,36 @@ typedef struct arc_stats {
* arc_mfu state. This includes *all* buffers in the arc_mfu
* state; e.g. data, metadata, evictable, and unevictable buffers
* are all included in this value.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mfu_size;
/*
* Number of bytes consumed by ARC buffers that are eligible for
* eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
* state.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mfu_evictable_data;
/*
* Number of bytes consumed by ARC buffers that are eligible for
* eviction, of type ARC_BUFC_METADATA, and reside in the
* arc_mfu state.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mfu_evictable_metadata;
/*
* Total number of bytes that *would have been* consumed by ARC
* buffers in the arc_mfu_ghost state. See the comment above
* arcstat_mru_ghost_size for more details.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mfu_ghost_size;
/*
* Number of bytes that *would have been* consumed by ARC
* buffers that are eligible for eviction, of type
* ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mfu_ghost_evictable_data;
/*
* Number of bytes that *would have been* consumed by ARC
* buffers that are eligible for eviction, of type
* ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
* Not updated directly; only synced in arc_kstat_update.
*/
kstat_named_t arcstat_mfu_ghost_evictable_metadata;
kstat_named_t arcstat_l2_hits;
@ -779,7 +758,6 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_io_error;
kstat_named_t arcstat_l2_lsize;
kstat_named_t arcstat_l2_psize;
/* Not updated directly; only synced in arc_kstat_update. */
kstat_named_t arcstat_l2_hdr_size;
/*
* Number of L2ARC log blocks written. These are used for restoring the
@ -860,7 +838,6 @@ typedef struct arc_stats {
kstat_named_t arcstat_tempreserve;
kstat_named_t arcstat_loaned_bytes;
kstat_named_t arcstat_prune;
/* Not updated directly; only synced in arc_kstat_update. */
kstat_named_t arcstat_meta_used;
kstat_named_t arcstat_meta_limit;
kstat_named_t arcstat_dnode_limit;
@ -876,6 +853,96 @@ typedef struct arc_stats {
kstat_named_t arcstat_abd_chunk_waste_size;
} arc_stats_t;
typedef struct arc_sums {
wmsum_t arcstat_hits;
wmsum_t arcstat_misses;
wmsum_t arcstat_demand_data_hits;
wmsum_t arcstat_demand_data_misses;
wmsum_t arcstat_demand_metadata_hits;
wmsum_t arcstat_demand_metadata_misses;
wmsum_t arcstat_prefetch_data_hits;
wmsum_t arcstat_prefetch_data_misses;
wmsum_t arcstat_prefetch_metadata_hits;
wmsum_t arcstat_prefetch_metadata_misses;
wmsum_t arcstat_mru_hits;
wmsum_t arcstat_mru_ghost_hits;
wmsum_t arcstat_mfu_hits;
wmsum_t arcstat_mfu_ghost_hits;
wmsum_t arcstat_deleted;
wmsum_t arcstat_mutex_miss;
wmsum_t arcstat_access_skip;
wmsum_t arcstat_evict_skip;
wmsum_t arcstat_evict_not_enough;
wmsum_t arcstat_evict_l2_cached;
wmsum_t arcstat_evict_l2_eligible;
wmsum_t arcstat_evict_l2_eligible_mfu;
wmsum_t arcstat_evict_l2_eligible_mru;
wmsum_t arcstat_evict_l2_ineligible;
wmsum_t arcstat_evict_l2_skip;
wmsum_t arcstat_hash_collisions;
wmsum_t arcstat_hash_chains;
aggsum_t arcstat_size;
wmsum_t arcstat_compressed_size;
wmsum_t arcstat_uncompressed_size;
wmsum_t arcstat_overhead_size;
wmsum_t arcstat_hdr_size;
wmsum_t arcstat_data_size;
wmsum_t arcstat_metadata_size;
wmsum_t arcstat_dbuf_size;
aggsum_t arcstat_dnode_size;
wmsum_t arcstat_bonus_size;
wmsum_t arcstat_l2_hits;
wmsum_t arcstat_l2_misses;
wmsum_t arcstat_l2_prefetch_asize;
wmsum_t arcstat_l2_mru_asize;
wmsum_t arcstat_l2_mfu_asize;
wmsum_t arcstat_l2_bufc_data_asize;
wmsum_t arcstat_l2_bufc_metadata_asize;
wmsum_t arcstat_l2_feeds;
wmsum_t arcstat_l2_rw_clash;
wmsum_t arcstat_l2_read_bytes;
wmsum_t arcstat_l2_write_bytes;
wmsum_t arcstat_l2_writes_sent;
wmsum_t arcstat_l2_writes_done;
wmsum_t arcstat_l2_writes_error;
wmsum_t arcstat_l2_writes_lock_retry;
wmsum_t arcstat_l2_evict_lock_retry;
wmsum_t arcstat_l2_evict_reading;
wmsum_t arcstat_l2_evict_l1cached;
wmsum_t arcstat_l2_free_on_write;
wmsum_t arcstat_l2_abort_lowmem;
wmsum_t arcstat_l2_cksum_bad;
wmsum_t arcstat_l2_io_error;
wmsum_t arcstat_l2_lsize;
wmsum_t arcstat_l2_psize;
aggsum_t arcstat_l2_hdr_size;
wmsum_t arcstat_l2_log_blk_writes;
wmsum_t arcstat_l2_log_blk_asize;
wmsum_t arcstat_l2_log_blk_count;
wmsum_t arcstat_l2_rebuild_success;
wmsum_t arcstat_l2_rebuild_abort_unsupported;
wmsum_t arcstat_l2_rebuild_abort_io_errors;
wmsum_t arcstat_l2_rebuild_abort_dh_errors;
wmsum_t arcstat_l2_rebuild_abort_cksum_lb_errors;
wmsum_t arcstat_l2_rebuild_abort_lowmem;
wmsum_t arcstat_l2_rebuild_size;
wmsum_t arcstat_l2_rebuild_asize;
wmsum_t arcstat_l2_rebuild_bufs;
wmsum_t arcstat_l2_rebuild_bufs_precached;
wmsum_t arcstat_l2_rebuild_log_blks;
wmsum_t arcstat_memory_throttle_count;
wmsum_t arcstat_memory_direct_count;
wmsum_t arcstat_memory_indirect_count;
wmsum_t arcstat_prune;
aggsum_t arcstat_meta_used;
wmsum_t arcstat_async_upgrade_sync;
wmsum_t arcstat_demand_hit_predictive_prefetch;
wmsum_t arcstat_demand_hit_prescient_prefetch;
wmsum_t arcstat_raw_size;
wmsum_t arcstat_cached_only_in_progress;
wmsum_t arcstat_abd_chunk_waste_size;
} arc_sums_t;
typedef struct arc_evict_waiter {
list_node_t aew_node;
kcondvar_t aew_cv;
@ -885,7 +952,7 @@ typedef struct arc_evict_waiter {
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
#define ARCSTAT_INCR(stat, val) \
atomic_add_64(&arc_stats.stat.value.ui64, (val))
wmsum_add(&arc_sums.stat, (val))
#define ARCSTAT_BUMP(stat) ARCSTAT_INCR(stat, 1)
#define ARCSTAT_BUMPDOWN(stat) ARCSTAT_INCR(stat, -1)
@ -899,6 +966,7 @@ typedef struct arc_evict_waiter {
extern taskq_t *arc_prune_taskq;
extern arc_stats_t arc_stats;
extern arc_sums_t arc_sums;
extern hrtime_t arc_growtime;
extern boolean_t arc_warm;
extern int arc_grow_retry;
@ -906,7 +974,6 @@ extern int arc_no_grow_shift;
extern int arc_shrink_shift;
extern kmutex_t arc_prune_mtx;
extern list_t arc_prune_list;
extern aggsum_t arc_size;
extern arc_state_t *arc_mfu;
extern arc_state_t *arc_mru;
extern uint_t zfs_arc_pc_percent;

View File

@ -1260,6 +1260,7 @@ zpool_find_import_scan_dir(libpc_handle_t *hdl, pthread_mutex_t *lock,
switch (dp->d_type) {
case DT_UNKNOWN:
case DT_BLK:
case DT_LNK:
#ifdef __FreeBSD__
case DT_CHR:
#endif

View File

@ -69,6 +69,15 @@ static abd_stats_t abd_stats = {
{ "linear_data_size", KSTAT_DATA_UINT64 },
};
struct {
wmsum_t abdstat_struct_size;
wmsum_t abdstat_scatter_cnt;
wmsum_t abdstat_scatter_data_size;
wmsum_t abdstat_scatter_chunk_waste;
wmsum_t abdstat_linear_cnt;
wmsum_t abdstat_linear_data_size;
} abd_sums;
/*
* The size of the chunks ABD allocates. Because the sizes allocated from the
* kmem_cache can't change, this tunable can only be modified at boot. Changing
@ -271,16 +280,46 @@ abd_free_zero_scatter(void)
kmem_free(abd_zero_buf, zfs_abd_chunk_size);
}
static int
abd_kstats_update(kstat_t *ksp, int rw)
{
abd_stats_t *as = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
as->abdstat_struct_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_struct_size);
as->abdstat_scatter_cnt.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_cnt);
as->abdstat_scatter_data_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_data_size);
as->abdstat_scatter_chunk_waste.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);
as->abdstat_linear_cnt.value.ui64 =
wmsum_value(&abd_sums.abdstat_linear_cnt);
as->abdstat_linear_data_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_linear_data_size);
return (0);
}
void
abd_init(void)
{
abd_chunk_cache = kmem_cache_create("abd_chunk", zfs_abd_chunk_size, 0,
NULL, NULL, NULL, NULL, 0, KMC_NODEBUG);
wmsum_init(&abd_sums.abdstat_struct_size, 0);
wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);
wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);
wmsum_init(&abd_sums.abdstat_linear_cnt, 0);
wmsum_init(&abd_sums.abdstat_linear_data_size, 0);
abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
if (abd_ksp != NULL) {
abd_ksp->ks_data = &abd_stats;
abd_ksp->ks_update = abd_kstats_update;
kstat_install(abd_ksp);
}
@ -297,6 +336,13 @@ abd_fini(void)
abd_ksp = NULL;
}
wmsum_fini(&abd_sums.abdstat_struct_size);
wmsum_fini(&abd_sums.abdstat_scatter_cnt);
wmsum_fini(&abd_sums.abdstat_scatter_data_size);
wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);
wmsum_fini(&abd_sums.abdstat_linear_cnt);
wmsum_fini(&abd_sums.abdstat_linear_data_size);
kmem_cache_destroy(abd_chunk_cache);
abd_chunk_cache = NULL;
}

View File

@ -132,6 +132,20 @@ static abd_stats_t abd_stats = {
{ "scatter_sg_table_retry", KSTAT_DATA_UINT64 },
};
struct {
wmsum_t abdstat_struct_size;
wmsum_t abdstat_linear_cnt;
wmsum_t abdstat_linear_data_size;
wmsum_t abdstat_scatter_cnt;
wmsum_t abdstat_scatter_data_size;
wmsum_t abdstat_scatter_chunk_waste;
wmsum_t abdstat_scatter_orders[MAX_ORDER];
wmsum_t abdstat_scatter_page_multi_chunk;
wmsum_t abdstat_scatter_page_multi_zone;
wmsum_t abdstat_scatter_page_alloc_retry;
wmsum_t abdstat_scatter_sg_table_retry;
} abd_sums;
#define abd_for_each_sg(abd, sg, n, i) \
for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i)
@ -687,6 +701,40 @@ abd_free_zero_scatter(void)
#endif /* _KERNEL */
}
static int
abd_kstats_update(kstat_t *ksp, int rw)
{
abd_stats_t *as = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
as->abdstat_struct_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_struct_size);
as->abdstat_linear_cnt.value.ui64 =
wmsum_value(&abd_sums.abdstat_linear_cnt);
as->abdstat_linear_data_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_linear_data_size);
as->abdstat_scatter_cnt.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_cnt);
as->abdstat_scatter_data_size.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_data_size);
as->abdstat_scatter_chunk_waste.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_chunk_waste);
for (int i = 0; i < MAX_ORDER; i++) {
as->abdstat_scatter_orders[i].value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_orders[i]);
}
as->abdstat_scatter_page_multi_chunk.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_page_multi_chunk);
as->abdstat_scatter_page_multi_zone.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_page_multi_zone);
as->abdstat_scatter_page_alloc_retry.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_page_alloc_retry);
as->abdstat_scatter_sg_table_retry.value.ui64 =
wmsum_value(&abd_sums.abdstat_scatter_sg_table_retry);
return (0);
}
void
abd_init(void)
{
@ -695,6 +743,19 @@ abd_init(void)
abd_cache = kmem_cache_create("abd_t", sizeof (abd_t),
0, NULL, NULL, NULL, NULL, NULL, 0);
wmsum_init(&abd_sums.abdstat_struct_size, 0);
wmsum_init(&abd_sums.abdstat_linear_cnt, 0);
wmsum_init(&abd_sums.abdstat_linear_data_size, 0);
wmsum_init(&abd_sums.abdstat_scatter_cnt, 0);
wmsum_init(&abd_sums.abdstat_scatter_data_size, 0);
wmsum_init(&abd_sums.abdstat_scatter_chunk_waste, 0);
for (i = 0; i < MAX_ORDER; i++)
wmsum_init(&abd_sums.abdstat_scatter_orders[i], 0);
wmsum_init(&abd_sums.abdstat_scatter_page_multi_chunk, 0);
wmsum_init(&abd_sums.abdstat_scatter_page_multi_zone, 0);
wmsum_init(&abd_sums.abdstat_scatter_page_alloc_retry, 0);
wmsum_init(&abd_sums.abdstat_scatter_sg_table_retry, 0);
abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
if (abd_ksp != NULL) {
@ -705,6 +766,7 @@ abd_init(void)
KSTAT_DATA_UINT64;
}
abd_ksp->ks_data = &abd_stats;
abd_ksp->ks_update = abd_kstats_update;
kstat_install(abd_ksp);
}
@ -721,6 +783,19 @@ abd_fini(void)
abd_ksp = NULL;
}
wmsum_fini(&abd_sums.abdstat_struct_size);
wmsum_fini(&abd_sums.abdstat_linear_cnt);
wmsum_fini(&abd_sums.abdstat_linear_data_size);
wmsum_fini(&abd_sums.abdstat_scatter_cnt);
wmsum_fini(&abd_sums.abdstat_scatter_data_size);
wmsum_fini(&abd_sums.abdstat_scatter_chunk_waste);
for (int i = 0; i < MAX_ORDER; i++)
wmsum_fini(&abd_sums.abdstat_scatter_orders[i]);
wmsum_fini(&abd_sums.abdstat_scatter_page_multi_chunk);
wmsum_fini(&abd_sums.abdstat_scatter_page_multi_zone);
wmsum_fini(&abd_sums.abdstat_scatter_page_alloc_retry);
wmsum_fini(&abd_sums.abdstat_scatter_sg_table_retry);
if (abd_cache) {
kmem_cache_destroy(abd_cache);
abd_cache = NULL;

View File

@ -135,7 +135,7 @@ arc_available_memory(void)
static uint64_t
arc_evictable_memory(void)
{
int64_t asize = aggsum_value(&arc_size);
int64_t asize = aggsum_value(&arc_sums.arcstat_size);
uint64_t arc_clean =
zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) +
zfs_refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) +

View File

@ -600,6 +600,8 @@ arc_stats_t arc_stats = {
{ "abd_chunk_waste_size", KSTAT_DATA_UINT64 },
};
arc_sums_t arc_sums;
#define ARCSTAT_MAX(stat, val) { \
uint64_t m; \
while ((val) > (m = arc_stats.stat.value.ui64) && \
@ -607,9 +609,6 @@ arc_stats_t arc_stats = {
continue; \
}
#define ARCSTAT_MAXSTAT(stat) \
ARCSTAT_MAX(stat##_max, arc_stats.stat.value.ui64)
/*
* We define a macro to allow ARC hits/misses to be easily broken down by
* two separate conditions, giving a total of four different subtypes for
@ -671,37 +670,8 @@ arc_state_t *arc_mfu;
/* max size for dnodes */
#define arc_dnode_size_limit ARCSTAT(arcstat_dnode_limit)
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
#define arc_need_free ARCSTAT(arcstat_need_free) /* waiting to be evicted */
/* size of all b_rabd's in entire arc */
#define arc_raw_size ARCSTAT(arcstat_raw_size)
/* compressed size of entire arc */
#define arc_compressed_size ARCSTAT(arcstat_compressed_size)
/* uncompressed size of entire arc */
#define arc_uncompressed_size ARCSTAT(arcstat_uncompressed_size)
/* number of bytes in the arc from arc_buf_t's */
#define arc_overhead_size ARCSTAT(arcstat_overhead_size)
/*
* There are also some ARC variables that we want to export, but that are
* updated so often that having the canonical representation be the statistic
* variable causes a performance bottleneck. We want to use aggsum_t's for these
* instead, but still be able to export the kstat in the same way as before.
* The solution is to always use the aggsum version, except in the kstat update
* callback.
*/
aggsum_t arc_size;
aggsum_t arc_meta_used;
wmsum_t astat_data_size;
wmsum_t astat_metadata_size;
wmsum_t astat_dbuf_size;
aggsum_t astat_dnode_size;
wmsum_t astat_bonus_size;
wmsum_t astat_hdr_size;
aggsum_t astat_l2_hdr_size;
wmsum_t astat_abd_chunk_waste_size;
hrtime_t arc_growtime;
list_t arc_prune_list;
kmutex_t arc_prune_mtx;
@ -819,9 +789,6 @@ uint64_t zfs_crc64_table[256];
*/
#define L2ARC_FEED_TYPES 4
#define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent)
#define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done)
/* L2ARC Performance Tunables */
unsigned long l2arc_write_max = L2ARC_WRITE_SIZE; /* def max write size */
unsigned long l2arc_write_boost = L2ARC_WRITE_SIZE; /* extra warmup write */
@ -1085,9 +1052,9 @@ buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
ARCSTAT_MAX(arcstat_hash_chain_max, i);
}
ARCSTAT_BUMP(arcstat_hash_elements);
ARCSTAT_MAXSTAT(arcstat_hash_elements);
uint64_t he = atomic_inc_64_nv(
&arc_stats.arcstat_hash_elements.value.ui64);
ARCSTAT_MAX(arcstat_hash_elements_max, he);
return (NULL);
}
@ -1111,7 +1078,7 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
arc_hdr_clear_flags(hdr, ARC_FLAG_IN_HASH_TABLE);
/* collect some hash table performance data */
ARCSTAT_BUMPDOWN(arcstat_hash_elements);
atomic_dec_64(&arc_stats.arcstat_hash_elements.value.ui64);
if (buf_hash_table.ht_table[idx] &&
buf_hash_table.ht_table[idx]->b_hash_next == NULL)
@ -2646,25 +2613,25 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
default:
break;
case ARC_SPACE_DATA:
wmsum_add(&astat_data_size, space);
ARCSTAT_INCR(arcstat_data_size, space);
break;
case ARC_SPACE_META:
wmsum_add(&astat_metadata_size, space);
ARCSTAT_INCR(arcstat_metadata_size, space);
break;
case ARC_SPACE_BONUS:
wmsum_add(&astat_bonus_size, space);
ARCSTAT_INCR(arcstat_bonus_size, space);
break;
case ARC_SPACE_DNODE:
aggsum_add(&astat_dnode_size, space);
aggsum_add(&arc_sums.arcstat_dnode_size, space);
break;
case ARC_SPACE_DBUF:
wmsum_add(&astat_dbuf_size, space);
ARCSTAT_INCR(arcstat_dbuf_size, space);
break;
case ARC_SPACE_HDRS:
wmsum_add(&astat_hdr_size, space);
ARCSTAT_INCR(arcstat_hdr_size, space);
break;
case ARC_SPACE_L2HDRS:
aggsum_add(&astat_l2_hdr_size, space);
aggsum_add(&arc_sums.arcstat_l2_hdr_size, space);
break;
case ARC_SPACE_ABD_CHUNK_WASTE:
/*
@ -2673,14 +2640,14 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
* scatter ABD's come from the ARC, because other users are
* very short-lived.
*/
wmsum_add(&astat_abd_chunk_waste_size, space);
ARCSTAT_INCR(arcstat_abd_chunk_waste_size, space);
break;
}
if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE)
aggsum_add(&arc_meta_used, space);
aggsum_add(&arc_sums.arcstat_meta_used, space);
aggsum_add(&arc_size, space);
aggsum_add(&arc_sums.arcstat_size, space);
}
void
@ -2692,45 +2659,41 @@ arc_space_return(uint64_t space, arc_space_type_t type)
default:
break;
case ARC_SPACE_DATA:
wmsum_add(&astat_data_size, -space);
ARCSTAT_INCR(arcstat_data_size, -space);
break;
case ARC_SPACE_META:
wmsum_add(&astat_metadata_size, -space);
ARCSTAT_INCR(arcstat_metadata_size, -space);
break;
case ARC_SPACE_BONUS:
wmsum_add(&astat_bonus_size, -space);
ARCSTAT_INCR(arcstat_bonus_size, -space);
break;
case ARC_SPACE_DNODE:
aggsum_add(&astat_dnode_size, -space);
aggsum_add(&arc_sums.arcstat_dnode_size, -space);
break;
case ARC_SPACE_DBUF:
wmsum_add(&astat_dbuf_size, -space);
ARCSTAT_INCR(arcstat_dbuf_size, -space);
break;
case ARC_SPACE_HDRS:
wmsum_add(&astat_hdr_size, -space);
ARCSTAT_INCR(arcstat_hdr_size, -space);
break;
case ARC_SPACE_L2HDRS:
aggsum_add(&astat_l2_hdr_size, -space);
aggsum_add(&arc_sums.arcstat_l2_hdr_size, -space);
break;
case ARC_SPACE_ABD_CHUNK_WASTE:
wmsum_add(&astat_abd_chunk_waste_size, -space);
ARCSTAT_INCR(arcstat_abd_chunk_waste_size, -space);
break;
}
if (type != ARC_SPACE_DATA && type != ARC_SPACE_ABD_CHUNK_WASTE) {
ASSERT(aggsum_compare(&arc_meta_used, space) >= 0);
/*
* We use the upper bound here rather than the precise value
* because the arc_meta_max value doesn't need to be
* precise. It's only consumed by humans via arcstats.
*/
if (arc_meta_max < aggsum_upper_bound(&arc_meta_used))
arc_meta_max = aggsum_upper_bound(&arc_meta_used);
aggsum_add(&arc_meta_used, -space);
ASSERT(aggsum_compare(&arc_sums.arcstat_meta_used,
space) >= 0);
ARCSTAT_MAX(arcstat_meta_max,
aggsum_upper_bound(&arc_sums.arcstat_meta_used));
aggsum_add(&arc_sums.arcstat_meta_used, -space);
}
ASSERT(aggsum_compare(&arc_size, space) >= 0);
aggsum_add(&arc_size, -space);
ASSERT(aggsum_compare(&arc_sums.arcstat_size, space) >= 0);
aggsum_add(&arc_sums.arcstat_size, -space);
}
/*
@ -4246,9 +4209,10 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
* Request that 10% of the LRUs be scanned by the superblock
* shrinker.
*/
if (type == ARC_BUFC_DATA && aggsum_compare(&astat_dnode_size,
arc_dnode_size_limit) > 0) {
arc_prune_async((aggsum_upper_bound(&astat_dnode_size) -
if (type == ARC_BUFC_DATA && aggsum_compare(
&arc_sums.arcstat_dnode_size, arc_dnode_size_limit) > 0) {
arc_prune_async((aggsum_upper_bound(
&arc_sums.arcstat_dnode_size) -
arc_dnode_size_limit) / sizeof (dnode_t) /
zfs_arc_dnode_reduce_percent);
}
@ -4478,7 +4442,7 @@ arc_evict_meta_balanced(uint64_t meta_used)
}
/*
* Evict metadata buffers from the cache, such that arc_meta_used is
* Evict metadata buffers from the cache, such that arcstat_meta_used is
* capped by the arc_meta_limit tunable.
*/
static uint64_t
@ -4599,7 +4563,7 @@ arc_evict_type(arc_state_t *state)
}
/*
* Evict buffers from the cache, such that arc_size is capped by arc_c.
* Evict buffers from the cache, such that arcstat_size is capped by arc_c.
*/
static uint64_t
arc_evict(void)
@ -4607,8 +4571,8 @@ arc_evict(void)
uint64_t total_evicted = 0;
uint64_t bytes;
int64_t target;
uint64_t asize = aggsum_value(&arc_size);
uint64_t ameta = aggsum_value(&arc_meta_used);
uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
uint64_t ameta = aggsum_value(&arc_sums.arcstat_meta_used);
/*
* If we're over arc_meta_limit, we want to correct that before
@ -4668,8 +4632,8 @@ arc_evict(void)
/*
* Re-sum ARC stats after the first round of evictions.
*/
asize = aggsum_value(&arc_size);
ameta = aggsum_value(&arc_meta_used);
asize = aggsum_value(&arc_sums.arcstat_size);
ameta = aggsum_value(&arc_sums.arcstat_meta_used);
/*
@ -4783,7 +4747,7 @@ arc_flush(spa_t *spa, boolean_t retry)
void
arc_reduce_target_size(int64_t to_free)
{
uint64_t asize = aggsum_value(&arc_size);
uint64_t asize = aggsum_value(&arc_sums.arcstat_size);
/*
* All callers want the ARC to actually evict (at least) this much
@ -4836,8 +4800,8 @@ arc_kmem_reap_soon(void)
extern kmem_cache_t *zio_data_buf_cache[];
#ifdef _KERNEL
if ((aggsum_compare(&arc_meta_used, arc_meta_limit) >= 0) &&
zfs_arc_meta_prune) {
if ((aggsum_compare(&arc_sums.arcstat_meta_used,
arc_meta_limit) >= 0) && zfs_arc_meta_prune) {
/*
* We are exceeding our meta-data cache limit.
* Prune some entries to release holds on meta-data.
@ -4940,7 +4904,7 @@ arc_evict_cb(void *arg, zthr_t *zthr)
*/
mutex_enter(&arc_evict_lock);
arc_evict_needed = !zthr_iscancelled(arc_evict_zthr) &&
evicted > 0 && aggsum_compare(&arc_size, arc_c) > 0;
evicted > 0 && aggsum_compare(&arc_sums.arcstat_size, arc_c) > 0;
if (!arc_evict_needed) {
/*
* We're either no longer overflowing, or we
@ -5153,7 +5117,7 @@ arc_adapt(int bytes, arc_state_t *state)
* cache size, increment the target cache size
*/
ASSERT3U(arc_c, >=, 2ULL << SPA_MAXBLOCKSHIFT);
if (aggsum_upper_bound(&arc_size) >=
if (aggsum_upper_bound(&arc_sums.arcstat_size) >=
arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
atomic_add_64(&arc_c, (int64_t)bytes);
if (arc_c > arc_c_max)
@ -5186,7 +5150,8 @@ arc_is_overflowing(void)
* in the ARC. In practice, that's in the tens of MB, which is low
* enough to be safe.
*/
return (aggsum_lower_bound(&arc_size) >= (int64_t)arc_c + overflow);
return (aggsum_lower_bound(&arc_sums.arcstat_size) >=
(int64_t)arc_c + overflow);
}
static abd_t *
@ -5355,7 +5320,7 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag,
* If we are growing the cache, and we are adding anonymous
* data, and we have outgrown arc_p, update arc_p
*/
if (aggsum_upper_bound(&arc_size) < arc_c &&
if (aggsum_upper_bound(&arc_sums.arcstat_size) < arc_c &&
hdr->b_l1hdr.b_state == arc_anon &&
(zfs_refcount_count(&arc_anon->arcs_size) +
zfs_refcount_count(&arc_mru->arcs_size) > arc_p))
@ -5944,6 +5909,12 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
* Embedded BP's have no DVA and require no I/O to "read".
* Create an anonymous arc buf to back it.
*/
if (!zfs_blkptr_verify(spa, bp, zio_flags &
ZIO_FLAG_CONFIG_WRITER, BLK_VERIFY_LOG)) {
rc = SET_ERROR(ECKSUM);
goto out;
}
hdr = buf_hash_find(guid, bp, &hash_lock);
}
@ -6115,17 +6086,6 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
goto out;
}
/*
* Gracefully handle a damaged logical block size as a
* checksum error.
*/
if (lsize > spa_maxblocksize(spa)) {
rc = SET_ERROR(ECKSUM);
if (hash_lock != NULL)
mutex_exit(hash_lock);
goto out;
}
if (hdr == NULL) {
/*
* This block is not in the cache or it has
@ -7245,9 +7205,86 @@ arc_kstat_update(kstat_t *ksp, int rw)
{
arc_stats_t *as = ksp->ks_data;
if (rw == KSTAT_WRITE) {
if (rw == KSTAT_WRITE)
return (SET_ERROR(EACCES));
} else {
as->arcstat_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_hits);
as->arcstat_misses.value.ui64 =
wmsum_value(&arc_sums.arcstat_misses);
as->arcstat_demand_data_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_demand_data_hits);
as->arcstat_demand_data_misses.value.ui64 =
wmsum_value(&arc_sums.arcstat_demand_data_misses);
as->arcstat_demand_metadata_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_demand_metadata_hits);
as->arcstat_demand_metadata_misses.value.ui64 =
wmsum_value(&arc_sums.arcstat_demand_metadata_misses);
as->arcstat_prefetch_data_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_prefetch_data_hits);
as->arcstat_prefetch_data_misses.value.ui64 =
wmsum_value(&arc_sums.arcstat_prefetch_data_misses);
as->arcstat_prefetch_metadata_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_prefetch_metadata_hits);
as->arcstat_prefetch_metadata_misses.value.ui64 =
wmsum_value(&arc_sums.arcstat_prefetch_metadata_misses);
as->arcstat_mru_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_mru_hits);
as->arcstat_mru_ghost_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_mru_ghost_hits);
as->arcstat_mfu_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_mfu_hits);
as->arcstat_mfu_ghost_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_mfu_ghost_hits);
as->arcstat_deleted.value.ui64 =
wmsum_value(&arc_sums.arcstat_deleted);
as->arcstat_mutex_miss.value.ui64 =
wmsum_value(&arc_sums.arcstat_mutex_miss);
as->arcstat_access_skip.value.ui64 =
wmsum_value(&arc_sums.arcstat_access_skip);
as->arcstat_evict_skip.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_skip);
as->arcstat_evict_not_enough.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_not_enough);
as->arcstat_evict_l2_cached.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_cached);
as->arcstat_evict_l2_eligible.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_eligible);
as->arcstat_evict_l2_eligible_mfu.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mfu);
as->arcstat_evict_l2_eligible_mru.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_eligible_mru);
as->arcstat_evict_l2_ineligible.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_ineligible);
as->arcstat_evict_l2_skip.value.ui64 =
wmsum_value(&arc_sums.arcstat_evict_l2_skip);
as->arcstat_hash_collisions.value.ui64 =
wmsum_value(&arc_sums.arcstat_hash_collisions);
as->arcstat_hash_chains.value.ui64 =
wmsum_value(&arc_sums.arcstat_hash_chains);
as->arcstat_size.value.ui64 =
aggsum_value(&arc_sums.arcstat_size);
as->arcstat_compressed_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_compressed_size);
as->arcstat_uncompressed_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_uncompressed_size);
as->arcstat_overhead_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_overhead_size);
as->arcstat_hdr_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_hdr_size);
as->arcstat_data_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_data_size);
as->arcstat_metadata_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_metadata_size);
as->arcstat_dbuf_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_dbuf_size);
#if defined(COMPAT_FREEBSD11)
as->arcstat_other_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_bonus_size) +
aggsum_value(&arc_sums.arcstat_dnode_size) +
wmsum_value(&arc_sums.arcstat_dbuf_size);
#endif
arc_kstat_update_state(arc_anon,
&as->arcstat_anon_size,
&as->arcstat_anon_evictable_data,
@ -7269,23 +7306,94 @@ arc_kstat_update(kstat_t *ksp, int rw)
&as->arcstat_mfu_ghost_evictable_data,
&as->arcstat_mfu_ghost_evictable_metadata);
ARCSTAT(arcstat_size) = aggsum_value(&arc_size);
ARCSTAT(arcstat_meta_used) = aggsum_value(&arc_meta_used);
ARCSTAT(arcstat_data_size) = wmsum_value(&astat_data_size);
ARCSTAT(arcstat_metadata_size) =
wmsum_value(&astat_metadata_size);
ARCSTAT(arcstat_hdr_size) = wmsum_value(&astat_hdr_size);
ARCSTAT(arcstat_l2_hdr_size) = aggsum_value(&astat_l2_hdr_size);
ARCSTAT(arcstat_dbuf_size) = wmsum_value(&astat_dbuf_size);
#if defined(COMPAT_FREEBSD11)
ARCSTAT(arcstat_other_size) = wmsum_value(&astat_bonus_size) +
aggsum_value(&astat_dnode_size) +
wmsum_value(&astat_dbuf_size);
#endif
ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size);
ARCSTAT(arcstat_bonus_size) = wmsum_value(&astat_bonus_size);
ARCSTAT(arcstat_abd_chunk_waste_size) =
wmsum_value(&astat_abd_chunk_waste_size);
as->arcstat_dnode_size.value.ui64 =
aggsum_value(&arc_sums.arcstat_dnode_size);
as->arcstat_bonus_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_bonus_size);
as->arcstat_l2_hits.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_hits);
as->arcstat_l2_misses.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_misses);
as->arcstat_l2_prefetch_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_prefetch_asize);
as->arcstat_l2_mru_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_mru_asize);
as->arcstat_l2_mfu_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_mfu_asize);
as->arcstat_l2_bufc_data_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_bufc_data_asize);
as->arcstat_l2_bufc_metadata_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_bufc_metadata_asize);
as->arcstat_l2_feeds.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_feeds);
as->arcstat_l2_rw_clash.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rw_clash);
as->arcstat_l2_read_bytes.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_read_bytes);
as->arcstat_l2_write_bytes.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_write_bytes);
as->arcstat_l2_writes_sent.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_writes_sent);
as->arcstat_l2_writes_done.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_writes_done);
as->arcstat_l2_writes_error.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_writes_error);
as->arcstat_l2_writes_lock_retry.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_writes_lock_retry);
as->arcstat_l2_evict_lock_retry.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_evict_lock_retry);
as->arcstat_l2_evict_reading.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_evict_reading);
as->arcstat_l2_evict_l1cached.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_evict_l1cached);
as->arcstat_l2_free_on_write.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_free_on_write);
as->arcstat_l2_abort_lowmem.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_abort_lowmem);
as->arcstat_l2_cksum_bad.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_cksum_bad);
as->arcstat_l2_io_error.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_io_error);
as->arcstat_l2_lsize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_lsize);
as->arcstat_l2_psize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_psize);
as->arcstat_l2_hdr_size.value.ui64 =
aggsum_value(&arc_sums.arcstat_l2_hdr_size);
as->arcstat_l2_log_blk_writes.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_log_blk_writes);
as->arcstat_l2_log_blk_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_log_blk_asize);
as->arcstat_l2_log_blk_count.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_log_blk_count);
as->arcstat_l2_rebuild_success.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_success);
as->arcstat_l2_rebuild_abort_unsupported.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_unsupported);
as->arcstat_l2_rebuild_abort_io_errors.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_io_errors);
as->arcstat_l2_rebuild_abort_dh_errors.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_dh_errors);
as->arcstat_l2_rebuild_abort_cksum_lb_errors.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors);
as->arcstat_l2_rebuild_abort_lowmem.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_abort_lowmem);
as->arcstat_l2_rebuild_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_size);
as->arcstat_l2_rebuild_asize.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_asize);
as->arcstat_l2_rebuild_bufs.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs);
as->arcstat_l2_rebuild_bufs_precached.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_bufs_precached);
as->arcstat_l2_rebuild_log_blks.value.ui64 =
wmsum_value(&arc_sums.arcstat_l2_rebuild_log_blks);
as->arcstat_memory_throttle_count.value.ui64 =
wmsum_value(&arc_sums.arcstat_memory_throttle_count);
as->arcstat_memory_direct_count.value.ui64 =
wmsum_value(&arc_sums.arcstat_memory_direct_count);
as->arcstat_memory_indirect_count.value.ui64 =
wmsum_value(&arc_sums.arcstat_memory_indirect_count);
as->arcstat_memory_all_bytes.value.ui64 =
arc_all_memory();
@ -7293,7 +7401,23 @@ arc_kstat_update(kstat_t *ksp, int rw)
arc_free_memory();
as->arcstat_memory_available_bytes.value.i64 =
arc_available_memory();
}
as->arcstat_prune.value.ui64 =
wmsum_value(&arc_sums.arcstat_prune);
as->arcstat_meta_used.value.ui64 =
aggsum_value(&arc_sums.arcstat_meta_used);
as->arcstat_async_upgrade_sync.value.ui64 =
wmsum_value(&arc_sums.arcstat_async_upgrade_sync);
as->arcstat_demand_hit_predictive_prefetch.value.ui64 =
wmsum_value(&arc_sums.arcstat_demand_hit_predictive_prefetch);
as->arcstat_demand_hit_prescient_prefetch.value.ui64 =
wmsum_value(&arc_sums.arcstat_demand_hit_prescient_prefetch);
as->arcstat_raw_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_raw_size);
as->arcstat_cached_only_in_progress.value.ui64 =
wmsum_value(&arc_sums.arcstat_cached_only_in_progress);
as->arcstat_abd_chunk_waste_size.value.ui64 =
wmsum_value(&arc_sums.arcstat_abd_chunk_waste_size);
return (0);
}
@ -7516,16 +7640,93 @@ arc_state_init(void)
zfs_refcount_create(&arc_mfu_ghost->arcs_size);
zfs_refcount_create(&arc_l2c_only->arcs_size);
aggsum_init(&arc_meta_used, 0);
aggsum_init(&arc_size, 0);
wmsum_init(&astat_data_size, 0);
wmsum_init(&astat_metadata_size, 0);
wmsum_init(&astat_hdr_size, 0);
aggsum_init(&astat_l2_hdr_size, 0);
wmsum_init(&astat_bonus_size, 0);
aggsum_init(&astat_dnode_size, 0);
wmsum_init(&astat_dbuf_size, 0);
wmsum_init(&astat_abd_chunk_waste_size, 0);
wmsum_init(&arc_sums.arcstat_hits, 0);
wmsum_init(&arc_sums.arcstat_misses, 0);
wmsum_init(&arc_sums.arcstat_demand_data_hits, 0);
wmsum_init(&arc_sums.arcstat_demand_data_misses, 0);
wmsum_init(&arc_sums.arcstat_demand_metadata_hits, 0);
wmsum_init(&arc_sums.arcstat_demand_metadata_misses, 0);
wmsum_init(&arc_sums.arcstat_prefetch_data_hits, 0);
wmsum_init(&arc_sums.arcstat_prefetch_data_misses, 0);
wmsum_init(&arc_sums.arcstat_prefetch_metadata_hits, 0);
wmsum_init(&arc_sums.arcstat_prefetch_metadata_misses, 0);
wmsum_init(&arc_sums.arcstat_mru_hits, 0);
wmsum_init(&arc_sums.arcstat_mru_ghost_hits, 0);
wmsum_init(&arc_sums.arcstat_mfu_hits, 0);
wmsum_init(&arc_sums.arcstat_mfu_ghost_hits, 0);
wmsum_init(&arc_sums.arcstat_deleted, 0);
wmsum_init(&arc_sums.arcstat_mutex_miss, 0);
wmsum_init(&arc_sums.arcstat_access_skip, 0);
wmsum_init(&arc_sums.arcstat_evict_skip, 0);
wmsum_init(&arc_sums.arcstat_evict_not_enough, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_cached, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_eligible, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mfu, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_eligible_mru, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_ineligible, 0);
wmsum_init(&arc_sums.arcstat_evict_l2_skip, 0);
wmsum_init(&arc_sums.arcstat_hash_collisions, 0);
wmsum_init(&arc_sums.arcstat_hash_chains, 0);
aggsum_init(&arc_sums.arcstat_size, 0);
wmsum_init(&arc_sums.arcstat_compressed_size, 0);
wmsum_init(&arc_sums.arcstat_uncompressed_size, 0);
wmsum_init(&arc_sums.arcstat_overhead_size, 0);
wmsum_init(&arc_sums.arcstat_hdr_size, 0);
wmsum_init(&arc_sums.arcstat_data_size, 0);
wmsum_init(&arc_sums.arcstat_metadata_size, 0);
wmsum_init(&arc_sums.arcstat_dbuf_size, 0);
aggsum_init(&arc_sums.arcstat_dnode_size, 0);
wmsum_init(&arc_sums.arcstat_bonus_size, 0);
wmsum_init(&arc_sums.arcstat_l2_hits, 0);
wmsum_init(&arc_sums.arcstat_l2_misses, 0);
wmsum_init(&arc_sums.arcstat_l2_prefetch_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_mru_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_mfu_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_bufc_data_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_bufc_metadata_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_feeds, 0);
wmsum_init(&arc_sums.arcstat_l2_rw_clash, 0);
wmsum_init(&arc_sums.arcstat_l2_read_bytes, 0);
wmsum_init(&arc_sums.arcstat_l2_write_bytes, 0);
wmsum_init(&arc_sums.arcstat_l2_writes_sent, 0);
wmsum_init(&arc_sums.arcstat_l2_writes_done, 0);
wmsum_init(&arc_sums.arcstat_l2_writes_error, 0);
wmsum_init(&arc_sums.arcstat_l2_writes_lock_retry, 0);
wmsum_init(&arc_sums.arcstat_l2_evict_lock_retry, 0);
wmsum_init(&arc_sums.arcstat_l2_evict_reading, 0);
wmsum_init(&arc_sums.arcstat_l2_evict_l1cached, 0);
wmsum_init(&arc_sums.arcstat_l2_free_on_write, 0);
wmsum_init(&arc_sums.arcstat_l2_abort_lowmem, 0);
wmsum_init(&arc_sums.arcstat_l2_cksum_bad, 0);
wmsum_init(&arc_sums.arcstat_l2_io_error, 0);
wmsum_init(&arc_sums.arcstat_l2_lsize, 0);
wmsum_init(&arc_sums.arcstat_l2_psize, 0);
aggsum_init(&arc_sums.arcstat_l2_hdr_size, 0);
wmsum_init(&arc_sums.arcstat_l2_log_blk_writes, 0);
wmsum_init(&arc_sums.arcstat_l2_log_blk_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_log_blk_count, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_success, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_unsupported, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_io_errors, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_dh_errors, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_abort_lowmem, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_size, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_asize, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_bufs_precached, 0);
wmsum_init(&arc_sums.arcstat_l2_rebuild_log_blks, 0);
wmsum_init(&arc_sums.arcstat_memory_throttle_count, 0);
wmsum_init(&arc_sums.arcstat_memory_direct_count, 0);
wmsum_init(&arc_sums.arcstat_memory_indirect_count, 0);
wmsum_init(&arc_sums.arcstat_prune, 0);
aggsum_init(&arc_sums.arcstat_meta_used, 0);
wmsum_init(&arc_sums.arcstat_async_upgrade_sync, 0);
wmsum_init(&arc_sums.arcstat_demand_hit_predictive_prefetch, 0);
wmsum_init(&arc_sums.arcstat_demand_hit_prescient_prefetch, 0);
wmsum_init(&arc_sums.arcstat_raw_size, 0);
wmsum_init(&arc_sums.arcstat_cached_only_in_progress, 0);
wmsum_init(&arc_sums.arcstat_abd_chunk_waste_size, 0);
arc_anon->arcs_state = ARC_STATE_ANON;
arc_mru->arcs_state = ARC_STATE_MRU;
@ -7569,16 +7770,93 @@ arc_state_fini(void)
multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
aggsum_fini(&arc_meta_used);
aggsum_fini(&arc_size);
wmsum_fini(&astat_data_size);
wmsum_fini(&astat_metadata_size);
wmsum_fini(&astat_hdr_size);
aggsum_fini(&astat_l2_hdr_size);
wmsum_fini(&astat_bonus_size);
aggsum_fini(&astat_dnode_size);
wmsum_fini(&astat_dbuf_size);
wmsum_fini(&astat_abd_chunk_waste_size);
wmsum_fini(&arc_sums.arcstat_hits);
wmsum_fini(&arc_sums.arcstat_misses);
wmsum_fini(&arc_sums.arcstat_demand_data_hits);
wmsum_fini(&arc_sums.arcstat_demand_data_misses);
wmsum_fini(&arc_sums.arcstat_demand_metadata_hits);
wmsum_fini(&arc_sums.arcstat_demand_metadata_misses);
wmsum_fini(&arc_sums.arcstat_prefetch_data_hits);
wmsum_fini(&arc_sums.arcstat_prefetch_data_misses);
wmsum_fini(&arc_sums.arcstat_prefetch_metadata_hits);
wmsum_fini(&arc_sums.arcstat_prefetch_metadata_misses);
wmsum_fini(&arc_sums.arcstat_mru_hits);
wmsum_fini(&arc_sums.arcstat_mru_ghost_hits);
wmsum_fini(&arc_sums.arcstat_mfu_hits);
wmsum_fini(&arc_sums.arcstat_mfu_ghost_hits);
wmsum_fini(&arc_sums.arcstat_deleted);
wmsum_fini(&arc_sums.arcstat_mutex_miss);
wmsum_fini(&arc_sums.arcstat_access_skip);
wmsum_fini(&arc_sums.arcstat_evict_skip);
wmsum_fini(&arc_sums.arcstat_evict_not_enough);
wmsum_fini(&arc_sums.arcstat_evict_l2_cached);
wmsum_fini(&arc_sums.arcstat_evict_l2_eligible);
wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mfu);
wmsum_fini(&arc_sums.arcstat_evict_l2_eligible_mru);
wmsum_fini(&arc_sums.arcstat_evict_l2_ineligible);
wmsum_fini(&arc_sums.arcstat_evict_l2_skip);
wmsum_fini(&arc_sums.arcstat_hash_collisions);
wmsum_fini(&arc_sums.arcstat_hash_chains);
aggsum_fini(&arc_sums.arcstat_size);
wmsum_fini(&arc_sums.arcstat_compressed_size);
wmsum_fini(&arc_sums.arcstat_uncompressed_size);
wmsum_fini(&arc_sums.arcstat_overhead_size);
wmsum_fini(&arc_sums.arcstat_hdr_size);
wmsum_fini(&arc_sums.arcstat_data_size);
wmsum_fini(&arc_sums.arcstat_metadata_size);
wmsum_fini(&arc_sums.arcstat_dbuf_size);
aggsum_fini(&arc_sums.arcstat_dnode_size);
wmsum_fini(&arc_sums.arcstat_bonus_size);
wmsum_fini(&arc_sums.arcstat_l2_hits);
wmsum_fini(&arc_sums.arcstat_l2_misses);
wmsum_fini(&arc_sums.arcstat_l2_prefetch_asize);
wmsum_fini(&arc_sums.arcstat_l2_mru_asize);
wmsum_fini(&arc_sums.arcstat_l2_mfu_asize);
wmsum_fini(&arc_sums.arcstat_l2_bufc_data_asize);
wmsum_fini(&arc_sums.arcstat_l2_bufc_metadata_asize);
wmsum_fini(&arc_sums.arcstat_l2_feeds);
wmsum_fini(&arc_sums.arcstat_l2_rw_clash);
wmsum_fini(&arc_sums.arcstat_l2_read_bytes);
wmsum_fini(&arc_sums.arcstat_l2_write_bytes);
wmsum_fini(&arc_sums.arcstat_l2_writes_sent);
wmsum_fini(&arc_sums.arcstat_l2_writes_done);
wmsum_fini(&arc_sums.arcstat_l2_writes_error);
wmsum_fini(&arc_sums.arcstat_l2_writes_lock_retry);
wmsum_fini(&arc_sums.arcstat_l2_evict_lock_retry);
wmsum_fini(&arc_sums.arcstat_l2_evict_reading);
wmsum_fini(&arc_sums.arcstat_l2_evict_l1cached);
wmsum_fini(&arc_sums.arcstat_l2_free_on_write);
wmsum_fini(&arc_sums.arcstat_l2_abort_lowmem);
wmsum_fini(&arc_sums.arcstat_l2_cksum_bad);
wmsum_fini(&arc_sums.arcstat_l2_io_error);
wmsum_fini(&arc_sums.arcstat_l2_lsize);
wmsum_fini(&arc_sums.arcstat_l2_psize);
aggsum_fini(&arc_sums.arcstat_l2_hdr_size);
wmsum_fini(&arc_sums.arcstat_l2_log_blk_writes);
wmsum_fini(&arc_sums.arcstat_l2_log_blk_asize);
wmsum_fini(&arc_sums.arcstat_l2_log_blk_count);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_success);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_unsupported);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_io_errors);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_dh_errors);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_cksum_lb_errors);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_abort_lowmem);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_size);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_asize);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_bufs_precached);
wmsum_fini(&arc_sums.arcstat_l2_rebuild_log_blks);
wmsum_fini(&arc_sums.arcstat_memory_throttle_count);
wmsum_fini(&arc_sums.arcstat_memory_direct_count);
wmsum_fini(&arc_sums.arcstat_memory_indirect_count);
wmsum_fini(&arc_sums.arcstat_prune);
aggsum_fini(&arc_sums.arcstat_meta_used);
wmsum_fini(&arc_sums.arcstat_async_upgrade_sync);
wmsum_fini(&arc_sums.arcstat_demand_hit_predictive_prefetch);
wmsum_fini(&arc_sums.arcstat_demand_hit_prescient_prefetch);
wmsum_fini(&arc_sums.arcstat_raw_size);
wmsum_fini(&arc_sums.arcstat_cached_only_in_progress);
wmsum_fini(&arc_sums.arcstat_abd_chunk_waste_size);
}
uint64_t
@ -7628,8 +7906,6 @@ arc_init(void)
/* Set min to 1/2 of arc_c_min */
arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT;
/* Initialize maximum observed usage to zero */
arc_meta_max = 0;
/*
* Set arc_meta_limit to a percent of arc_c_max with a floor of
* arc_meta_min, and a ceiling of arc_c_max.
@ -8355,7 +8631,7 @@ l2arc_write_done(zio_t *zio)
}
}
atomic_inc_64(&l2arc_writes_done);
ARCSTAT_BUMP(arcstat_l2_writes_done);
list_remove(buflist, head);
ASSERT(!HDR_HAS_L1HDR(head));
kmem_cache_free(hdr_l2only_cache, head);
@ -9327,7 +9603,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
static boolean_t
l2arc_hdr_limit_reached(void)
{
int64_t s = aggsum_upper_bound(&astat_l2_hdr_size);
int64_t s = aggsum_upper_bound(&arc_sums.arcstat_l2_hdr_size);
return (arc_reclaim_needed() || (s > arc_meta_limit * 3 / 4) ||
(s > (arc_warm ? arc_c : arc_c_max) * l2arc_meta_percent / 100));
@ -9661,8 +9937,6 @@ l2arc_init(void)
{
l2arc_thread_exit = 0;
l2arc_ndev = 0;
l2arc_writes_sent = 0;
l2arc_writes_done = 0;
mutex_init(&l2arc_feed_thr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&l2arc_feed_thr_cv, NULL, CV_DEFAULT, NULL);

View File

@ -52,6 +52,7 @@
#include <sys/vdev.h>
#include <cityhash.h>
#include <sys/spa_impl.h>
#include <sys/wmsum.h>
kstat_t *dbuf_ksp;
@ -135,8 +136,22 @@ dbuf_stats_t dbuf_stats = {
{ "metadata_cache_overflow", KSTAT_DATA_UINT64 }
};
struct {
wmsum_t cache_count;
wmsum_t cache_total_evicts;
wmsum_t cache_levels[DN_MAX_LEVELS];
wmsum_t cache_levels_bytes[DN_MAX_LEVELS];
wmsum_t hash_hits;
wmsum_t hash_misses;
wmsum_t hash_collisions;
wmsum_t hash_chains;
wmsum_t hash_insert_race;
wmsum_t metadata_cache_count;
wmsum_t metadata_cache_overflow;
} dbuf_sums;
#define DBUF_STAT_INCR(stat, val) \
atomic_add_64(&dbuf_stats.stat.value.ui64, (val));
wmsum_add(&dbuf_sums.stat, val);
#define DBUF_STAT_DECR(stat, val) \
DBUF_STAT_INCR(stat, -(val));
#define DBUF_STAT_BUMP(stat) \
@ -297,8 +312,6 @@ dbuf_dest(void *vdb, void *unused)
*/
static dbuf_hash_table_t dbuf_hash_table;
static uint64_t dbuf_hash_count;
/*
* We use Cityhash for this. It's fast, and has good hash properties without
* requiring any large static buffers.
@ -409,8 +422,8 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
db->db_hash_next = h->hash_table[idx];
h->hash_table[idx] = db;
mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_inc_64(&dbuf_hash_count);
DBUF_STAT_MAX(hash_elements_max, dbuf_hash_count);
uint64_t he = atomic_inc_64_nv(&dbuf_stats.hash_elements.value.ui64);
DBUF_STAT_MAX(hash_elements_max, he);
return (NULL);
}
@ -483,7 +496,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
h->hash_table[idx]->db_hash_next == NULL)
DBUF_STAT_BUMPDOWN(hash_chains);
mutex_exit(DBUF_HASH_MUTEX(h, idx));
atomic_dec_64(&dbuf_hash_count);
atomic_dec_64(&dbuf_stats.hash_elements.value.ui64);
}
typedef enum {
@ -767,19 +780,40 @@ dbuf_kstat_update(kstat_t *ksp, int rw)
{
dbuf_stats_t *ds = ksp->ks_data;
if (rw == KSTAT_WRITE) {
if (rw == KSTAT_WRITE)
return (SET_ERROR(EACCES));
} else {
ds->metadata_cache_size_bytes.value.ui64 = zfs_refcount_count(
&dbuf_caches[DB_DBUF_METADATA_CACHE].size);
ds->cache_count.value.ui64 =
wmsum_value(&dbuf_sums.cache_count);
ds->cache_size_bytes.value.ui64 =
zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size);
ds->cache_target_bytes.value.ui64 = dbuf_cache_target_bytes();
ds->cache_hiwater_bytes.value.ui64 = dbuf_cache_hiwater_bytes();
ds->cache_lowater_bytes.value.ui64 = dbuf_cache_lowater_bytes();
ds->hash_elements.value.ui64 = dbuf_hash_count;
ds->cache_total_evicts.value.ui64 =
wmsum_value(&dbuf_sums.cache_total_evicts);
for (int i = 0; i < DN_MAX_LEVELS; i++) {
ds->cache_levels[i].value.ui64 =
wmsum_value(&dbuf_sums.cache_levels[i]);
ds->cache_levels_bytes[i].value.ui64 =
wmsum_value(&dbuf_sums.cache_levels_bytes[i]);
}
ds->hash_hits.value.ui64 =
wmsum_value(&dbuf_sums.hash_hits);
ds->hash_misses.value.ui64 =
wmsum_value(&dbuf_sums.hash_misses);
ds->hash_collisions.value.ui64 =
wmsum_value(&dbuf_sums.hash_collisions);
ds->hash_chains.value.ui64 =
wmsum_value(&dbuf_sums.hash_chains);
ds->hash_insert_race.value.ui64 =
wmsum_value(&dbuf_sums.hash_insert_race);
ds->metadata_cache_count.value.ui64 =
wmsum_value(&dbuf_sums.metadata_cache_count);
ds->metadata_cache_size_bytes.value.ui64 = zfs_refcount_count(
&dbuf_caches[DB_DBUF_METADATA_CACHE].size);
ds->metadata_cache_overflow.value.ui64 =
wmsum_value(&dbuf_sums.metadata_cache_overflow);
return (0);
}
@ -846,6 +880,20 @@ dbuf_init(void)
dbuf_cache_evict_thread = thread_create(NULL, 0, dbuf_evict_thread,
NULL, 0, &p0, TS_RUN, minclsyspri);
wmsum_init(&dbuf_sums.cache_count, 0);
wmsum_init(&dbuf_sums.cache_total_evicts, 0);
for (i = 0; i < DN_MAX_LEVELS; i++) {
wmsum_init(&dbuf_sums.cache_levels[i], 0);
wmsum_init(&dbuf_sums.cache_levels_bytes[i], 0);
}
wmsum_init(&dbuf_sums.hash_hits, 0);
wmsum_init(&dbuf_sums.hash_misses, 0);
wmsum_init(&dbuf_sums.hash_collisions, 0);
wmsum_init(&dbuf_sums.hash_chains, 0);
wmsum_init(&dbuf_sums.hash_insert_race, 0);
wmsum_init(&dbuf_sums.metadata_cache_count, 0);
wmsum_init(&dbuf_sums.metadata_cache_overflow, 0);
dbuf_ksp = kstat_create("zfs", 0, "dbufstats", "misc",
KSTAT_TYPE_NAMED, sizeof (dbuf_stats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
@ -908,6 +956,20 @@ dbuf_fini(void)
kstat_delete(dbuf_ksp);
dbuf_ksp = NULL;
}
wmsum_fini(&dbuf_sums.cache_count);
wmsum_fini(&dbuf_sums.cache_total_evicts);
for (i = 0; i < DN_MAX_LEVELS; i++) {
wmsum_fini(&dbuf_sums.cache_levels[i]);
wmsum_fini(&dbuf_sums.cache_levels_bytes[i]);
}
wmsum_fini(&dbuf_sums.hash_hits);
wmsum_fini(&dbuf_sums.hash_misses);
wmsum_fini(&dbuf_sums.hash_collisions);
wmsum_fini(&dbuf_sums.hash_chains);
wmsum_fini(&dbuf_sums.hash_insert_race);
wmsum_fini(&dbuf_sums.metadata_cache_count);
wmsum_fini(&dbuf_sums.metadata_cache_overflow);
}
/*
@ -3708,9 +3770,11 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
db->db_caching_status = dcs;
multilist_insert(&dbuf_caches[dcs].cache, db);
uint64_t db_size = db->db.db_size;
size = zfs_refcount_add_many(
&dbuf_caches[dcs].size,
db->db.db_size, db);
&dbuf_caches[dcs].size, db_size, db);
uint8_t db_level = db->db_level;
mutex_exit(&db->db_mtx);
if (dcs == DB_DBUF_METADATA_CACHE) {
DBUF_STAT_BUMP(metadata_cache_count);
@ -3718,16 +3782,14 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
metadata_cache_size_bytes_max,
size);
} else {
DBUF_STAT_BUMP(
cache_levels[db->db_level]);
DBUF_STAT_BUMP(cache_count);
DBUF_STAT_INCR(
cache_levels_bytes[db->db_level],
db->db.db_size);
DBUF_STAT_MAX(cache_size_bytes_max,
size);
DBUF_STAT_BUMP(cache_levels[db_level]);
DBUF_STAT_INCR(
cache_levels_bytes[db_level],
db_size);
}
mutex_exit(&db->db_mtx);
if (dcs == DB_DBUF_CACHE && !evicting)
dbuf_evict_notify(size);

View File

@ -34,6 +34,7 @@
#include <sys/dmu.h>
#include <sys/dbuf.h>
#include <sys/kstat.h>
#include <sys/wmsum.h>
/*
* This tunable disables predictive prefetch. Note that it leaves "prescient"
@ -69,27 +70,54 @@ static zfetch_stats_t zfetch_stats = {
{ "io_issued", KSTAT_DATA_UINT64 },
};
struct {
wmsum_t zfetchstat_hits;
wmsum_t zfetchstat_misses;
wmsum_t zfetchstat_max_streams;
wmsum_t zfetchstat_io_issued;
} zfetch_sums;
#define ZFETCHSTAT_BUMP(stat) \
atomic_inc_64(&zfetch_stats.stat.value.ui64)
wmsum_add(&zfetch_sums.stat, 1)
#define ZFETCHSTAT_ADD(stat, val) \
atomic_add_64(&zfetch_stats.stat.value.ui64, val)
#define ZFETCHSTAT_SET(stat, val) \
zfetch_stats.stat.value.ui64 = val
#define ZFETCHSTAT_GET(stat) \
zfetch_stats.stat.value.ui64
wmsum_add(&zfetch_sums.stat, val)
kstat_t *zfetch_ksp;
static int
zfetch_kstats_update(kstat_t *ksp, int rw)
{
zfetch_stats_t *zs = ksp->ks_data;
if (rw == KSTAT_WRITE)
return (EACCES);
zs->zfetchstat_hits.value.ui64 =
wmsum_value(&zfetch_sums.zfetchstat_hits);
zs->zfetchstat_misses.value.ui64 =
wmsum_value(&zfetch_sums.zfetchstat_misses);
zs->zfetchstat_max_streams.value.ui64 =
wmsum_value(&zfetch_sums.zfetchstat_max_streams);
zs->zfetchstat_io_issued.value.ui64 =
wmsum_value(&zfetch_sums.zfetchstat_io_issued);
return (0);
}
void
zfetch_init(void)
{
wmsum_init(&zfetch_sums.zfetchstat_hits, 0);
wmsum_init(&zfetch_sums.zfetchstat_misses, 0);
wmsum_init(&zfetch_sums.zfetchstat_max_streams, 0);
wmsum_init(&zfetch_sums.zfetchstat_io_issued, 0);
zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
if (zfetch_ksp != NULL) {
zfetch_ksp->ks_data = &zfetch_stats;
zfetch_ksp->ks_update = zfetch_kstats_update;
kstat_install(zfetch_ksp);
}
}
@ -101,6 +129,11 @@ zfetch_fini(void)
kstat_delete(zfetch_ksp);
zfetch_ksp = NULL;
}
wmsum_fini(&zfetch_sums.zfetchstat_hits);
wmsum_fini(&zfetch_sums.zfetchstat_misses);
wmsum_fini(&zfetch_sums.zfetchstat_max_streams);
wmsum_fini(&zfetch_sums.zfetchstat_io_issued);
}
/*

View File

@ -1132,7 +1132,8 @@ vdev_draid_min_asize(vdev_t *vd)
ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
return ((vd->vdev_min_asize + vdc->vdc_ndisks - 1) / (vdc->vdc_ndisks));
return (VDEV_DRAID_REFLOW_RESERVE +
(vd->vdev_min_asize + vdc->vdc_ndisks - 1) / (vdc->vdc_ndisks));
}
/*

View File

@ -1107,9 +1107,6 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
{
zio_t *zio;
(void) zfs_blkptr_verify(spa, bp, flags & ZIO_FLAG_CONFIG_WRITER,
BLK_VERIFY_HALT);
zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
data, size, size, done, private,
ZIO_TYPE_READ, priority, flags, NULL, 0, zb,