Fix possible hang when replaying large truncations.

OpenSolaris onnv revision: 7904:6a124a4ca9c5 Approved by: pjd, delphij (mentor) Obtained from: OpenSolaris (Bug ID 6761624) MFC after: 3 days
2010-05-12 09:51:57 +00:00 · 2010-05-12 09:51:57 +00:00 · ee02e5dd0e
commit ee02e5dd0e
parent 08f80e1af0
1 changed files with 24 additions and 0 deletions
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
@ -1566,6 +1566,29 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
 		}
 	}

+	/*
+	 * Replay of large truncates can end up needing additional txs
+	 * and a different txg. If they are nested within the replay tx
+	 * as below then a hang is possible. So we do the truncate here
+	 * and redo the truncate later (a no-op) and update the sequence
+	 * number whilst in the replay tx. Fortunately, it's safe to repeat
+	 * a truncate if we crash and the truncate commits. A create over
+	 * an existing file will also come in as a TX_TRUNCATE record.
+	 *
+	 * Note, remove of large files and renames over large files is
+	 * handled by putting the deleted object on a stable list
+	 * and if necessary force deleting the object outside of the replay
+	 * transaction using the zr_replay_cleaner.
+	 */
+	if (txtype == TX_TRUNCATE) {
+		*zr->zr_txgp = TXG_NOWAIT;
+		error = zr->zr_replay[TX_TRUNCATE](zr->zr_arg, zr->zr_lrbuf,
+		    zr->zr_byteswap);
+		if (error)
+			goto bad;
+		zr->zr_byteswap = 0; /* only byteswap once */
+	}
+
 	/*
 	 * We must now do two things atomically: replay this log record,
 	 * and update the log header to reflect the fact that we did so.
@ -1636,6 +1659,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
 		dprintf("pass %d, retrying\n", pass);
 	}

+bad:
 	ASSERT(error && error != ERESTART);
 	name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 	dmu_objset_name(zr->zr_os, name);