From 2d3fcc82a17619cdcc7a432dc1ba0e17be2c3307 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Fri, 14 Apr 2017 18:27:12 +0000 Subject: [PATCH 1/3] 7990 libzfs: snapspec_cb() does not need to call zfs_strdup() illumos/illumos-gate@d8584ba6fb7a5e46da1725845b99ae5fab5a4baf https://github.com/illumos/illumos-gate/commit/d8584ba6fb7a5e46da1725845b99ae5fab5a4baf https://www.illumos.org/issues/7990 The snapspec_cb() callback function in libzfs does not need to call zfs_strdup(). Reviewed by: Yuri Pankov Reviewed by: Toomas Soome Approved by: Matthew Ahrens Author: Marcel Telka --- lib/libzfs/common/libzfs_iter.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/lib/libzfs/common/libzfs_iter.c b/lib/libzfs/common/libzfs_iter.c index 91ae6f302b28..f5ac68fc94e4 100644 --- a/lib/libzfs/common/libzfs_iter.c +++ b/lib/libzfs/common/libzfs_iter.c @@ -317,16 +317,17 @@ static int snapspec_cb(zfs_handle_t *zhp, void *arg) { snapspec_arg_t *ssa = arg; - char *shortsnapname; + const char *shortsnapname; int err = 0; if (ssa->ssa_seenlast) return (0); - shortsnapname = zfs_strdup(zhp->zfs_hdl, - strchr(zfs_get_name(zhp), '@') + 1); + shortsnapname = strchr(zfs_get_name(zhp), '@') + 1; if (!ssa->ssa_seenfirst && strcmp(shortsnapname, ssa->ssa_first) == 0) ssa->ssa_seenfirst = B_TRUE; + if (strcmp(shortsnapname, ssa->ssa_last) == 0) + ssa->ssa_seenlast = B_TRUE; if (ssa->ssa_seenfirst) { err = ssa->ssa_func(zhp, ssa->ssa_arg); @@ -334,10 +335,6 @@ snapspec_cb(zfs_handle_t *zhp, void *arg) zfs_close(zhp); } - if (strcmp(shortsnapname, ssa->ssa_last) == 0) - ssa->ssa_seenlast = B_TRUE; - free(shortsnapname); - return (err); } From 99988b024d2ea59a4cb1c0b1d4fa6fe6196e2e06 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Fri, 14 Apr 2017 18:28:40 +0000 Subject: [PATCH 2/3] 7885 zpool list can report 16.0e for expandsz illumos/illumos-gate@c040c10cdd1e4eab0fc88203758367dd81e057b7 https://github.com/illumos/illumos-gate/commit/c040c10cdd1e4eab0fc88203758367dd81e057b7 https://www.illumos.org/issues/7885 When a member of a RAIDZ has been replaced with a device smaller than the original, then the top level vdev can report its expand size as 16.0E. The reduced child asize causes the RAIDZ to have a vdev_asize lower than its vdev_max_asize which then results in an underflow during the calculation of the parents expand size. Also for RAIDZ vdevs the sum of their child vdev_min_asize could be smaller than the parents vdev_min_size. Fixed by: https://github.com/openzfs/openzfs/pull/296 Reviewed by: Matthew Ahrens Reviewed by: George Wilson Approved by: Gordon Ross Author: Steven Hartland --- uts/common/fs/zfs/vdev.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/uts/common/fs/zfs/vdev.c b/uts/common/fs/zfs/vdev.c index 41812b895374..a081deb7ea6f 100644 --- a/uts/common/fs/zfs/vdev.c +++ b/uts/common/fs/zfs/vdev.c @@ -135,7 +135,8 @@ vdev_get_min_asize(vdev_t *vd) * so each child must provide at least 1/Nth of its asize. */ if (pvd->vdev_ops == &vdev_raidz_ops) - return (pvd->vdev_min_asize / pvd->vdev_children); + return ((pvd->vdev_min_asize + pvd->vdev_children - 1) / + pvd->vdev_children); return (pvd->vdev_min_asize); } @@ -1273,7 +1274,7 @@ vdev_open(vdev_t *vd) vd->vdev_psize = psize; /* - * Make sure the allocatable size hasn't shrunk. + * Make sure the allocatable size hasn't shrunk too much. */ if (asize < vd->vdev_min_asize) { vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN, @@ -1306,12 +1307,21 @@ vdev_open(vdev_t *vd) } /* - * If all children are healthy and the asize has increased, - * then we've experienced dynamic LUN growth. If automatic - * expansion is enabled then use the additional space. + * If all children are healthy we update asize if either: + * The asize has increased, due to a device expansion caused by dynamic + * LUN growth or vdev replacement, and automatic expansion is enabled; + * making the additional space available. + * + * The asize has decreased, due to a device shrink usually caused by a + * vdev replace with a smaller device. This ensures that calculations + * based of max_asize and asize e.g. esize are always valid. It's safe + * to do this as we've already validated that asize is greater than + * vdev_min_asize. */ - if (vd->vdev_state == VDEV_STATE_HEALTHY && asize > vd->vdev_asize && - (vd->vdev_expanding || spa->spa_autoexpand)) + if (vd->vdev_state == VDEV_STATE_HEALTHY && + ((asize > vd->vdev_asize && + (vd->vdev_expanding || spa->spa_autoexpand)) || + (asize < vd->vdev_asize))) vd->vdev_asize = asize; vdev_set_min_asize(vd); From bf644e9b6f7033b08a39df87cc83a037ebc37822 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Fri, 14 Apr 2017 18:29:13 +0000 Subject: [PATCH 3/3] 8023 Panic destroying a metaslab deferred range tree illumos/illumos-gate@3991b535a8e990c0369be677746a87c259b13e9f https://github.com/illumos/illumos-gate/commit/3991b535a8e990c0369be677746a87c259b13e9f https://www.illumos.org/issues/8023 $C ffffff0011bc0970 vpanic() ffffff0011bc0a00 strlog() ffffff0011bc0a30 range_tree_destroy+0x72(ffffff043769ad00) ffffff0011bc0a70 metaslab_fini+0xd5(ffffff0449acf380) ffffff0011bc0ab0 vdev_metaslab_fini+0x56(ffffff0462bae800) ffffff0011bc0af0 spa_unload+0x9b(ffffff03e3dac000) ffffff0011bc0b70 spa_export_common+0x115(ffffff047f4b4000, 2, 0, 0, 0) ffffff0011bc0b90 spa_destroy+0x1d(ffffff047f4b4000) ffffff0011bc0bd0 zfs_ioc_pool_destroy+0x20(ffffff047f4b4000) ffffff0011bc0c80 zfsdev_ioctl+0x4d7(11400000000, 5a01, 8040190, 100003, ffffff03e1956b10, ffffff0011bc0e68) ffffff0011bc0cc0 cdev_ioctl+0x39(11400000000, 5a01, 8040190, 100003, ffffff03e1956b10, ffffff0011bc0e68) ffffff0011bc0d10 spec_ioctl+0x60(ffffff03d9153b00, 5a01, 8040190, 100003, ffffff03e1956b10, ffffff0011bc0e68, 0) ffffff0011bc0da0 fop_ioctl+0x55(ffffff03d9153b00, 5a01, 8040190, 100003, ffffff03e1956b10, ffffff0011bc0e68, 0) ffffff0011bc0ec0 ioctl+0x9b(3, 5a01, 8040190) ffffff0011bc0f10 _sys_sysenter_post_swapgs+0x149() Reviewed by: Brad Lewis Reviewed by: Matt Ahrens Reviewed by: Dan Kimmel Reviewed by: Saso Kiselkov Approved by: Dan McDonald Author: George Wilson --- uts/common/fs/zfs/dbuf.c | 1 + uts/common/fs/zfs/metaslab.c | 20 ++++++++++++++++---- uts/common/fs/zfs/spa_misc.c | 10 ++++++++++ uts/common/fs/zfs/space_map.c | 10 ++++++---- uts/common/fs/zfs/sys/spa.h | 1 + 5 files changed, 34 insertions(+), 8 deletions(-) diff --git a/uts/common/fs/zfs/dbuf.c b/uts/common/fs/zfs/dbuf.c index b96cc83bf998..c8a981d512b8 100644 --- a/uts/common/fs/zfs/dbuf.c +++ b/uts/common/fs/zfs/dbuf.c @@ -1552,6 +1552,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx) * this assertion only if we're not already dirty. */ os = dn->dn_objset; + VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(os->os_spa)); #ifdef DEBUG if (dn->dn_objset->os_dsl_dataset != NULL) rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_READER, FTAG); diff --git a/uts/common/fs/zfs/metaslab.c b/uts/common/fs/zfs/metaslab.c index f9a691c7d893..5eb43fa7c450 100644 --- a/uts/common/fs/zfs/metaslab.c +++ b/uts/common/fs/zfs/metaslab.c @@ -1563,11 +1563,19 @@ metaslab_set_fragmentation(metaslab_t *msp) uint64_t txg = spa_syncing_txg(spa); vdev_t *vd = msp->ms_group->mg_vd; - if (spa_writeable(spa)) { + /* + * If we've reached the final dirty txg, then we must + * be shutting down the pool. We don't want to dirty + * any data past this point so skip setting the condense + * flag. We can retry this action the next time the pool + * is imported. + */ + if (spa_writeable(spa) && txg < spa_final_dirty_txg(spa)) { msp->ms_condense_wanted = B_TRUE; vdev_dirty(vd, VDD_METASLAB, msp, txg + 1); spa_dbgmsg(spa, "txg %llu, requesting force condense: " - "msp %p, vd %p", txg, msp, vd); + "ms_id %llu, vdev_id %llu", txg, msp->ms_id, + vd->vdev_id); } msp->ms_fragmentation = ZFS_FRAG_INVALID; return; @@ -2189,13 +2197,17 @@ metaslab_sync(metaslab_t *msp, uint64_t txg) /* * Normally, we don't want to process a metaslab if there * are no allocations or frees to perform. However, if the metaslab - * is being forced to condense we need to let it through. + * is being forced to condense and it's loaded, we need to let it + * through. */ if (range_tree_space(alloctree) == 0 && range_tree_space(msp->ms_freeingtree) == 0 && - !msp->ms_condense_wanted) + !(msp->ms_loaded && msp->ms_condense_wanted)) return; + + VERIFY(txg <= spa_final_dirty_txg(spa)); + /* * The only state that can actually be changing concurrently with * metaslab_sync() is the metaslab's ms_tree. No other thread can diff --git a/uts/common/fs/zfs/spa_misc.c b/uts/common/fs/zfs/spa_misc.c index 2ec8057e59db..1aebbca741a4 100644 --- a/uts/common/fs/zfs/spa_misc.c +++ b/uts/common/fs/zfs/spa_misc.c @@ -1621,6 +1621,16 @@ spa_syncing_txg(spa_t *spa) return (spa->spa_syncing_txg); } +/* + * Return the last txg where data can be dirtied. The final txgs + * will be used to just clear out any deferred frees that remain. + */ +uint64_t +spa_final_dirty_txg(spa_t *spa) +{ + return (spa->spa_final_txg - TXG_DEFER_SIZE); +} + pool_state_t spa_state(spa_t *spa) { diff --git a/uts/common/fs/zfs/space_map.c b/uts/common/fs/zfs/space_map.c index 0b3af50a11fd..45a4071101cc 100644 --- a/uts/common/fs/zfs/space_map.c +++ b/uts/common/fs/zfs/space_map.c @@ -23,7 +23,7 @@ * Use is subject to license terms. */ /* - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2016 by Delphix. All rights reserved. */ #include @@ -403,6 +403,7 @@ space_map_truncate(space_map_t *sm, dmu_tx_t *tx) ASSERT(dsl_pool_sync_context(dmu_objset_pool(os))); ASSERT(dmu_tx_is_syncing(tx)); + VERIFY3U(dmu_tx_get_txg(tx), <=, spa_final_dirty_txg(spa)); dmu_object_info_from_db(sm->sm_dbuf, &doi); @@ -417,9 +418,10 @@ space_map_truncate(space_map_t *sm, dmu_tx_t *tx) if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) && doi.doi_bonus_size != sizeof (space_map_phys_t)) || doi.doi_data_block_size != space_map_blksz) { - zfs_dbgmsg("txg %llu, spa %s, reallocating: " - "old bonus %u, old blocksz %u", dmu_tx_get_txg(tx), - spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size); + zfs_dbgmsg("txg %llu, spa %s, sm %p, reallocating " + "object[%llu]: old bonus %u, old blocksz %u", + dmu_tx_get_txg(tx), spa_name(spa), sm, sm->sm_object, + doi.doi_bonus_size, doi.doi_data_block_size); space_map_free(sm, tx); dmu_buf_rele(sm->sm_dbuf, sm); diff --git a/uts/common/fs/zfs/sys/spa.h b/uts/common/fs/zfs/sys/spa.h index 50ffe676a38e..d0bb43186623 100644 --- a/uts/common/fs/zfs/sys/spa.h +++ b/uts/common/fs/zfs/sys/spa.h @@ -771,6 +771,7 @@ extern uint64_t spa_load_guid(spa_t *spa); extern uint64_t spa_last_synced_txg(spa_t *spa); extern uint64_t spa_first_txg(spa_t *spa); extern uint64_t spa_syncing_txg(spa_t *spa); +extern uint64_t spa_final_dirty_txg(spa_t *spa); extern uint64_t spa_version(spa_t *spa); extern pool_state_t spa_state(spa_t *spa); extern spa_load_state_t spa_load_state(spa_t *spa);