MFC r275738: MFV r275546:

Reduce scrub activities when system there is enough dirty data, namely when dirty data is more than zfs_vdev_async_write_active_min_dirty_percent (once we start to increase the number of concurrent async writes). While there also correct rounding error which would make scrub end up pausing for (zfs_txg_timeout + 1) seconds instead of the desired zfs_txg_timeout seconds. Illumos issue: 5351 scrub goes for an extra second each txg 5352 scrub should pause when there is some dirty data
2015-01-23 17:41:34 +00:00 · 2015-01-23 17:41:34 +00:00 · 9a50f3f3a8
commit 9a50f3f3a8
parent 85cd2fc0bf
1 changed files with 22 additions and 7 deletions
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
@ -423,12 +423,11 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
 	    &scn->scn_phys, tx));
 }

+extern int zfs_vdev_async_write_active_min_dirty_percent;
+
 static boolean_t
 dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
 {
-	uint64_t elapsed_nanosecs;
-	unsigned int mintime;
-
 	/* we never skip user/group accounting objects */
 	if (zb && (int64_t)zb->zb_object < 0)
 		return (B_FALSE);
@ -443,12 +442,28 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
 	if (zb && zb->zb_level != 0)
 		return (B_FALSE);

-	mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
+	/*
+	 * We pause if:
+	 *  - we have scanned for the maximum time: an entire txg
+	 *    timeout (default 5 sec)
+	 *  or
+	 *  - we have scanned for at least the minimum time (default 1 sec
+	 *    for scrub, 3 sec for resilver), and either we have sufficient
+	 *    dirty data that we are starting to write more quickly
+	 *    (default 30%), or someone is explicitly waiting for this txg
+	 *    to complete.
+	 *  or
+	 *  - the spa is shutting down because this pool is being exported
+	 *    or the machine is rebooting.
+	 */
+	int mintime = (scn->scn_phys.scn_func == POOL_SCAN_RESILVER) ?
 	    zfs_resilver_min_time_ms : zfs_scan_min_time_ms;
-	elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
-	if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
+	uint64_t elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
+	int dirty_pct = scn->scn_dp->dp_dirty_total * 100 / zfs_dirty_data_max;
+	if (elapsed_nanosecs / NANOSEC >= zfs_txg_timeout ||
 	    (NSEC2MSEC(elapsed_nanosecs) > mintime &&
-	    txg_sync_waiting(scn->scn_dp)) ||
+	    (txg_sync_waiting(scn->scn_dp) ||
+	    dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent)) ||
 	    spa_shutting_down(scn->scn_dp->dp_spa)) {
 		if (zb) {
 			dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",