From ee45fbd89465f12b39e97173a088175d4b712b5f Mon Sep 17 00:00:00 2001 From: LOLi <loli10K@users.noreply.github.com> Date: Fri, 27 Oct 2017 01:58:38 +0200 Subject: [PATCH] ZFS send fails to dump objects larger than 128PiB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dumping objects larger than 128PiB it's possible for do_dump() to miscalculate the FREE_RECORD offset due to an integer overflow condition: this prevents the receiving end from correctly restoring the dumped object. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com> Signed-off-by: loli10K <ezomori.nozomu@gmail.com> Closes #6760 --- module/zfs/bpobj.c | 4 +- module/zfs/dmu.c | 2 +- module/zfs/dmu_send.c | 33 ++++---- tests/runfiles/linux.run | 3 +- .../functional/cli_root/zfs_send/Makefile.am | 3 +- .../cli_root/zfs_send/zfs_send_sparse.ksh | 83 +++++++++++++++++++ 6 files changed, 109 insertions(+), 19 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_sparse.ksh diff --git a/module/zfs/bpobj.c b/module/zfs/bpobj.c index 82ca94e1d11b..32459c9a8305 100644 --- a/module/zfs/bpobj.c +++ b/module/zfs/bpobj.c @@ -261,7 +261,7 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, } if (free) { VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_object, - (i + 1) * sizeof (blkptr_t), -1ULL, tx)); + (i + 1) * sizeof (blkptr_t), DMU_OBJECT_END, tx)); } if (err || !bpo->bpo_havesubobj || bpo->bpo_phys->bpo_subobjs == 0) goto out; @@ -339,7 +339,7 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx, if (free) { VERIFY3U(0, ==, dmu_free_range(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, - (i + 1) * sizeof (uint64_t), -1ULL, tx)); + (i + 1) * sizeof (uint64_t), DMU_OBJECT_END, tx)); } out: diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 42889504f3f3..0a7b398f5f3a 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -967,7 +967,7 @@ dmu_free_range(objset_t *os, uint64_t object, uint64_t offset, if (err) return (err); ASSERT(offset < UINT64_MAX); - ASSERT(size == -1ULL || size <= UINT64_MAX - offset); + ASSERT(size == DMU_OBJECT_END || size <= UINT64_MAX - offset); dnode_free_range(dn, offset, size, tx); dnode_rele(dn, FTAG); return (0); diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c index cc6b97d53028..1984e71b1529 100644 --- a/module/zfs/dmu_send.c +++ b/module/zfs/dmu_send.c @@ -223,9 +223,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, (object == dsp->dsa_last_data_object && offset > dsp->dsa_last_data_offset)); - if (length != -1ULL && offset + length < offset) - length = -1ULL; - /* * If there is a pending op, but it's not PENDING_FREE, push it out, * since free block aggregation can only be done for blocks of the @@ -242,19 +239,22 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, if (dsp->dsa_pending_op == PENDING_FREE) { /* - * There should never be a PENDING_FREE if length is -1 - * (because dump_dnode is the only place where this - * function is called with a -1, and only after flushing - * any pending record). + * There should never be a PENDING_FREE if length is + * DMU_OBJECT_END (because dump_dnode is the only place where + * this function is called with a DMU_OBJECT_END, and only after + * flushing any pending record). */ - ASSERT(length != -1ULL); + ASSERT(length != DMU_OBJECT_END); /* * Check to see whether this free block can be aggregated * with pending one. */ if (drrf->drr_object == object && drrf->drr_offset + drrf->drr_length == offset) { - drrf->drr_length += length; + if (offset + length < offset) + drrf->drr_length = DMU_OBJECT_END; + else + drrf->drr_length += length; return (0); } else { /* not a continuation. Push out pending record */ @@ -268,9 +268,12 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset, dsp->dsa_drr->drr_type = DRR_FREE; drrf->drr_object = object; drrf->drr_offset = offset; - drrf->drr_length = length; + if (offset + length < offset) + drrf->drr_length = DMU_OBJECT_END; + else + drrf->drr_length = length; drrf->drr_toguid = dsp->dsa_toguid; - if (length == -1ULL) { + if (length == DMU_OBJECT_END) { if (dump_record(dsp, NULL, 0) != 0) return (SET_ERROR(EINTR)); } else { @@ -587,7 +590,7 @@ dump_dnode(dmu_sendarg_t *dsp, const blkptr_t *bp, uint64_t object, /* Free anything past the end of the file. */ if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) * - (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL) != 0) + (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), DMU_OBJECT_END) != 0) return (SET_ERROR(EINTR)); if (dsp->dsa_err != 0) return (SET_ERROR(EINTR)); @@ -771,7 +774,9 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data) } else if (BP_IS_HOLE(bp)) { uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level); uint64_t offset = zb->zb_blkid * span; - err = dump_free(dsa, zb->zb_object, offset, span); + /* Don't dump free records for offsets > DMU_OBJECT_END */ + if (zb->zb_blkid == 0 || span <= DMU_OBJECT_END / zb->zb_blkid) + err = dump_free(dsa, zb->zb_object, offset, span); } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) { return (0); } else if (type == DMU_OT_DNODE) { @@ -2860,7 +2865,7 @@ receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf) { int err; - if (drrf->drr_length != -1ULL && + if (drrf->drr_length != DMU_OBJECT_END && drrf->drr_offset + drrf->drr_length < drrf->drr_offset) return (SET_ERROR(EINVAL)); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 6edb2e1a01a6..19b093a7cf5d 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -167,7 +167,8 @@ tests = ['zfs_rollback_001_pos', 'zfs_rollback_002_pos', [tests/functional/cli_root/zfs_send] tests = ['zfs_send_001_pos', 'zfs_send_002_pos', 'zfs_send_003_pos', 'zfs_send_004_neg', 'zfs_send_005_pos', 'zfs_send_006_pos', - 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw'] + 'zfs_send_007_pos', 'zfs_send_encrypted', 'zfs_send_raw', + 'zfs_send_sparse'] [tests/functional/cli_root/zfs_set] tests = ['cache_001_pos', 'cache_002_neg', 'canmount_001_pos', diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am index 08ab72a3dc22..e82df61c7364 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/Makefile.am @@ -11,4 +11,5 @@ dist_pkgdata_SCRIPTS = \ zfs_send_006_pos.ksh \ zfs_send_007_pos.ksh \ zfs_send_encrypted.ksh \ - zfs_send_raw.ksh + zfs_send_raw.ksh \ + zfs_send_sparse.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_sparse.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_sparse.ksh new file mode 100755 index 000000000000..735430506642 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_send/zfs_send_sparse.ksh @@ -0,0 +1,83 @@ +#!/bin/ksh -p +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# 'zfs send' should be able to send (big) sparse files correctly. +# +# STRATEGY: +# 1. Create sparse files of various size +# 2. Snapshot and send these sparse files +# 3. Verify these files are received correctly and we don't trigger any issue +# like the one described in https://github.com/zfsonlinux/zfs/pull/6760 +# + +verify_runnable "both" + +function cleanup +{ + datasetexists $SENDFS && log_must zfs destroy -r $SENDFS + datasetexists $RECVFS && log_must zfs destroy -r $RECVFS +} + +# +# Write 1 random byte at $offset of "source" file in $sendfs dataset +# Snapshot and send $sendfs dataset to $recvfs +# Compare the received file with its source +# +function write_compare_files # <sendfs> <recvfs> <offset> +{ + typeset sendfs="$1" + typeset recvfs="$2" + typeset offset="$3" + + # create source filesystem + log_must zfs create $sendfs + # write sparse file + sendfile="$(get_prop mountpoint $sendfs)/data.bin" + log_must dd if=/dev/urandom of=$sendfile bs=1 count=1 seek=$offset + # send/receive the file + log_must zfs snapshot $sendfs@snap + log_must eval "zfs send $sendfs@snap | zfs receive $recvfs" + # compare sparse files + recvfile="$(get_prop mountpoint $recvfs)/data.bin" + log_must cmp $sendfile $recvfile $offset $offset + sendsz=$(stat -c '%s' $sendfile) + recvsz=$(stat -c '%s' $recvfile) + if [[ $sendsz -ne $recvsz ]]; then + log_fail "$sendfile ($sendsz) and $recvfile ($recvsz) differ." + fi + # cleanup + log_must zfs destroy -r $sendfs + log_must zfs destroy -r $recvfs +} + +log_assert "'zfs send' should be able to send (big) sparse files correctly." +log_onexit cleanup + +SENDFS="$TESTPOOL/sendfs" +RECVFS="$TESTPOOL/recvfs" +OFF_T_MAX="$(echo '2 ^ 40 * 8 - 1' | bc)" + +for i in {1..60}; do + offset=$(echo "2 ^ $i" | bc) + [[ is_32bit ]] && [[ $offset -ge $OFF_T_MAX ]] && continue; + write_compare_files $SENDFS $RECVFS $offset +done + +log_pass "'zfs send' sends (big) sparse files correctly."