From 570827e129ed81e066e894530bbe24642f473154 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 20 Jan 2012 10:58:57 -0800 Subject: [PATCH] Add 'dmu_tx' kstats entry Keep counters for the various reasons that a thread may end up in txg_wait_open() waiting on a new txg. This can be useful when attempting to determine why a particular workload is under performing. Signed-off-by: Brian Behlendorf --- include/sys/dmu_tx.h | 28 ++++++++++++++++++++++ module/zfs/arc.c | 8 ++++++- module/zfs/dmu.c | 2 ++ module/zfs/dmu_tx.c | 51 ++++++++++++++++++++++++++++++++++++++-- module/zfs/dsl_dataset.c | 2 ++ module/zfs/dsl_pool.c | 4 +++- module/zfs/txg.c | 2 ++ 7 files changed, 93 insertions(+), 4 deletions(-) diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h index c5ea50fa8d82..d87a09beca3e 100644 --- a/include/sys/dmu_tx.h +++ b/include/sys/dmu_tx.h @@ -104,6 +104,31 @@ typedef struct dmu_tx_callback { void *dcb_data; /* caller private data */ } dmu_tx_callback_t; +/* + * Used for dmu tx kstat. + */ +typedef struct dmu_tx_stats { + kstat_named_t dmu_tx_assigned; + kstat_named_t dmu_tx_delay; + kstat_named_t dmu_tx_error; + kstat_named_t dmu_tx_suspended; + kstat_named_t dmu_tx_group; + kstat_named_t dmu_tx_how; + kstat_named_t dmu_tx_memory_reserve; + kstat_named_t dmu_tx_memory_reclaim; + kstat_named_t dmu_tx_memory_inflight; + kstat_named_t dmu_tx_dirty_throttle; + kstat_named_t dmu_tx_write_limit; + kstat_named_t dmu_tx_quota; +} dmu_tx_stats_t; + +extern dmu_tx_stats_t dmu_tx_stats; + +#define DMU_TX_STAT_INCR(stat, val) \ + atomic_add_64(&dmu_tx_stats.stat.value.ui64, (val)); +#define DMU_TX_STAT_BUMP(stat) \ + DMU_TX_STAT_INCR(stat, 1); + /* * These routines are defined in dmu.h, and are called by the user. */ @@ -141,6 +166,9 @@ void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space); #define DMU_TX_DIRTY_BUF(tx, db) #endif +void dmu_tx_init(void); +void dmu_tx_fini(void); + #ifdef __cplusplus } #endif diff --git a/module/zfs/arc.c b/module/zfs/arc.c index cfd4c7981e1d..10317b64242b 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -138,6 +138,7 @@ #endif #include #include +#include #include static kmutex_t arc_reclaim_thr_lock; @@ -3584,6 +3585,7 @@ arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg) } else if (page_load > 0 && arc_reclaim_needed()) { /* memory is low, delay before restarting */ ARCSTAT_INCR(arcstat_memory_throttle_count, 1); + DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim); return (EAGAIN); } page_load = 0; @@ -3599,6 +3601,7 @@ arc_memory_throttle(uint64_t reserve, uint64_t inflight_data, uint64_t txg) if (inflight_data > available_memory / 4) { ARCSTAT_INCR(arcstat_memory_throttle_count, 1); + DMU_TX_STAT_BUMP(dmu_tx_memory_inflight); return (ERESTART); } #endif @@ -3629,8 +3632,10 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg) #endif if (reserve > arc_c/4 && !arc_no_grow) arc_c = MIN(arc_c_max, reserve * 4); - if (reserve > arc_c) + if (reserve > arc_c) { + DMU_TX_STAT_BUMP(dmu_tx_memory_reserve); return (ENOMEM); + } /* * Don't count loaned bufs as in flight dirty data to prevent long @@ -3663,6 +3668,7 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg) arc_anon->arcs_lsize[ARC_BUFC_METADATA]>>10, arc_anon->arcs_lsize[ARC_BUFC_DATA]>>10, reserve>>10, arc_c>>10); + DMU_TX_STAT_BUMP(dmu_tx_dirty_throttle); return (ERESTART); } atomic_add_64(&arc_tempreserve, reserve); diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index ae68c717bdee..9702d50224d0 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1861,6 +1861,7 @@ dmu_init(void) dnode_init(); dbuf_init(); zfetch_init(); + dmu_tx_init(); arc_init(); l2arc_init(); } @@ -1870,6 +1871,7 @@ dmu_fini(void) { l2arc_fini(); arc_fini(); + dmu_tx_fini(); zfetch_fini(); dbuf_fini(); dnode_fini(); diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index fa64b3af602b..17eb52708fbb 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -40,6 +40,22 @@ typedef void (*dmu_tx_hold_func_t)(dmu_tx_t *tx, struct dnode *dn, uint64_t arg1, uint64_t arg2); +dmu_tx_stats_t dmu_tx_stats = { + { "dmu_tx_assigned", KSTAT_DATA_UINT64 }, + { "dmu_tx_delay", KSTAT_DATA_UINT64 }, + { "dmu_tx_error", KSTAT_DATA_UINT64 }, + { "dmu_tx_suspended", KSTAT_DATA_UINT64 }, + { "dmu_tx_group", KSTAT_DATA_UINT64 }, + { "dmu_tx_how", KSTAT_DATA_UINT64 }, + { "dmu_tx_memory_reserve", KSTAT_DATA_UINT64 }, + { "dmu_tx_memory_reclaim", KSTAT_DATA_UINT64 }, + { "dmu_tx_memory_inflight", KSTAT_DATA_UINT64 }, + { "dmu_tx_dirty_throttle", KSTAT_DATA_UINT64 }, + { "dmu_tx_write_limit", KSTAT_DATA_UINT64 }, + { "dmu_tx_quota", KSTAT_DATA_UINT64 }, +}; + +static kstat_t *dmu_tx_ksp; dmu_tx_t * dmu_tx_create_dd(dsl_dir_t *dd) @@ -899,10 +915,14 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) ASSERT3U(tx->tx_txg, ==, 0); - if (tx->tx_err) + if (tx->tx_err) { + DMU_TX_STAT_BUMP(dmu_tx_error); return (tx->tx_err); + } if (spa_suspended(spa)) { + DMU_TX_STAT_BUMP(dmu_tx_suspended); + /* * If the user has indicated a blocking failure mode * then return ERESTART which will block in dmu_tx_wait(). @@ -937,6 +957,7 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) if (dn->dn_assigned_txg == tx->tx_txg - 1) { mutex_exit(&dn->dn_mtx); tx->tx_needassign_txh = txh; + DMU_TX_STAT_BUMP(dmu_tx_group); return (ERESTART); } if (dn->dn_assigned_txg == 0) @@ -957,8 +978,10 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) * NB: This check must be after we've held the dnodes, so that * the dmu_tx_unassign() logic will work properly */ - if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) + if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg) { + DMU_TX_STAT_BUMP(dmu_tx_how); return (ERESTART); + } /* * If a snapshot has been taken since we made our estimates, @@ -1000,6 +1023,8 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how) return (err); } + DMU_TX_STAT_BUMP(dmu_tx_assigned); + return (0); } @@ -1382,6 +1407,28 @@ dmu_tx_hold_sa(dmu_tx_t *tx, sa_handle_t *hdl, boolean_t may_grow) } } +void +dmu_tx_init(void) +{ + dmu_tx_ksp = kstat_create("zfs", 0, "dmu_tx", "misc", + KSTAT_TYPE_NAMED, sizeof (dmu_tx_stats) / sizeof (kstat_named_t), + KSTAT_FLAG_VIRTUAL); + + if (dmu_tx_ksp != NULL) { + dmu_tx_ksp->ks_data = &dmu_tx_stats; + kstat_install(dmu_tx_ksp); + } +} + +void +dmu_tx_fini(void) +{ + if (dmu_tx_ksp != NULL) { + kstat_delete(dmu_tx_ksp); + dmu_tx_ksp = NULL; + } +} + #if defined(_KERNEL) && defined(HAVE_SPL) EXPORT_SYMBOL(dmu_tx_create); EXPORT_SYMBOL(dmu_tx_hold_write); diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 26362c95c1b6..718c3ad52b27 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -3290,6 +3290,8 @@ dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota, error = ERESTART; else error = EDQUOT; + + DMU_TX_STAT_BUMP(dmu_tx_quota); } mutex_exit(&ds->ds_lock); diff --git a/module/zfs/dsl_pool.c b/module/zfs/dsl_pool.c index ba42f0bdddb9..d428b7ad7398 100644 --- a/module/zfs/dsl_pool.c +++ b/module/zfs/dsl_pool.c @@ -517,8 +517,10 @@ dsl_pool_tempreserve_space(dsl_pool_t *dp, uint64_t space, dmu_tx_t *tx) reserved = dp->dp_space_towrite[tx->tx_txg & TXG_MASK] + dp->dp_tempreserved[tx->tx_txg & TXG_MASK] / 2; - if (reserved && reserved > write_limit) + if (reserved && reserved > write_limit) { + DMU_TX_STAT_BUMP(dmu_tx_write_limit); return (ERESTART); + } } atomic_add_64(&dp->dp_tempreserved[tx->tx_txg & TXG_MASK], space); diff --git a/module/zfs/txg.c b/module/zfs/txg.c index d0d2b1716e88..6e64adf9376e 100644 --- a/module/zfs/txg.c +++ b/module/zfs/txg.c @@ -524,6 +524,8 @@ txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks) (void) cv_timedwait(&tx->tx_quiesce_more_cv, &tx->tx_sync_lock, timeout); + DMU_TX_STAT_BUMP(dmu_tx_delay); + mutex_exit(&tx->tx_sync_lock); }