From ee02e5dd0ee6cca1f60bddf090ec30d378fd0c01 Mon Sep 17 00:00:00 2001 From: mm Date: Wed, 12 May 2010 09:51:57 +0000 Subject: [PATCH] Fix possible hang when replaying large truncations. OpenSolaris onnv revision: 7904:6a124a4ca9c5 Approved by: pjd, delphij (mentor) Obtained from: OpenSolaris (Bug ID 6761624) MFC after: 3 days --- .../opensolaris/uts/common/fs/zfs/zil.c | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c index 030e56c29edc..1e7909167cec 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c @@ -1566,6 +1566,29 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) } } + /* + * Replay of large truncates can end up needing additional txs + * and a different txg. If they are nested within the replay tx + * as below then a hang is possible. So we do the truncate here + * and redo the truncate later (a no-op) and update the sequence + * number whilst in the replay tx. Fortunately, it's safe to repeat + * a truncate if we crash and the truncate commits. A create over + * an existing file will also come in as a TX_TRUNCATE record. + * + * Note, remove of large files and renames over large files is + * handled by putting the deleted object on a stable list + * and if necessary force deleting the object outside of the replay + * transaction using the zr_replay_cleaner. + */ + if (txtype == TX_TRUNCATE) { + *zr->zr_txgp = TXG_NOWAIT; + error = zr->zr_replay[TX_TRUNCATE](zr->zr_arg, zr->zr_lrbuf, + zr->zr_byteswap); + if (error) + goto bad; + zr->zr_byteswap = 0; /* only byteswap once */ + } + /* * We must now do two things atomically: replay this log record, * and update the log header to reflect the fact that we did so. @@ -1636,6 +1659,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) dprintf("pass %d, retrying\n", pass); } +bad: ASSERT(error && error != ERESTART); name = kmem_alloc(MAXNAMELEN, KM_SLEEP); dmu_objset_name(zr->zr_os, name);