9617 too-frequent TXG sync causes excessive write inflation

illumos/illumos-gate@7928f4baf4

Reviewed by: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Andrew Stormont <andyjstormont@gmail.com>
Approved by: Robert Mustacchi <rm@joyent.com>
Author:     Matthew Ahrens <mahrens@delphix.com>
This commit is contained in:
Alexander Motin 2019-06-03 16:57:06 +00:00
parent ba6517c8a4
commit 380d790f2b
3 changed files with 11 additions and 5 deletions

View File

@ -103,9 +103,11 @@ uint64_t zfs_dirty_data_max_max = 4ULL * 1024 * 1024 * 1024;
int zfs_dirty_data_max_percent = 10;
/*
* If there is at least this much dirty data, push out a txg.
* If there's at least this much dirty data (as a percentage of
* zfs_dirty_data_max), push out a txg. This should be less than
* zfs_vdev_async_write_active_min_dirty_percent.
*/
uint64_t zfs_dirty_data_sync = 64 * 1024 * 1024;
uint64_t zfs_dirty_data_sync_pct = 20;
/*
* Once there is this amount of dirty data, the dmu_tx_delay() will kick in
@ -824,10 +826,12 @@ dsl_pool_need_dirty_delay(dsl_pool_t *dp)
{
uint64_t delay_min_bytes =
zfs_dirty_data_max * zfs_delay_min_dirty_percent / 100;
uint64_t dirty_min_bytes =
zfs_dirty_data_max * zfs_dirty_data_sync_pct / 100;
boolean_t rv;
mutex_enter(&dp->dp_lock);
if (dp->dp_dirty_total > zfs_dirty_data_sync)
if (dp->dp_dirty_total > dirty_min_bytes)
txg_kick(dp);
rv = (dp->dp_dirty_total > delay_min_bytes);
mutex_exit(&dp->dp_lock);

View File

@ -53,7 +53,7 @@ struct dsl_scan;
extern uint64_t zfs_dirty_data_max;
extern uint64_t zfs_dirty_data_max_max;
extern uint64_t zfs_dirty_data_sync;
extern uint64_t zfs_dirty_data_sync_pct;
extern int zfs_dirty_data_max_percent;
extern int zfs_delay_min_dirty_percent;
extern uint64_t zfs_delay_scale;

View File

@ -484,6 +484,8 @@ txg_sync_thread(void *arg)
uint64_t timeout = zfs_txg_timeout * hz;
uint64_t timer;
uint64_t txg;
uint64_t dirty_min_bytes =
zfs_dirty_data_max * zfs_dirty_data_sync_pct / 100;
/*
* We sync when we're scanning, there's someone waiting
@ -495,7 +497,7 @@ txg_sync_thread(void *arg)
!tx->tx_exiting && timer > 0 &&
tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
!txg_has_quiesced_to_sync(dp) &&
dp->dp_dirty_total < zfs_dirty_data_sync) {
dp->dp_dirty_total < dirty_min_bytes) {
dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer);