From fc592d9aa839312c3774967ab850de1346a8b5b6 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Tue, 12 Jul 2016 11:16:43 +0000 Subject: [PATCH] 6874 rollback and receive need to reset ZPL state to what's on disk illumos/illumos-gate@1fdcbd00c9cbac286b5f92e08877e8cb3c448420 https://github.com/illumos/illumos-gate/commit/1fdcbd00c9cbac286b5f92e08877e8cb3c448420 https://www.illumos.org/issues/6874 When we do a clone swap (caused by "zfs rollback" or "zfs receive"), the ZPL doesn't completely reload the state from the DMU; some values remain cached in the zfsvfs_t. steps to reproduce: ``` #!/bin/bash -x zfs destroy -R test/fs zfs destroy -R test/recvd zfs create test/fs zfs snapshot test/fs@a zfs set userquota@$USER=1m test/fs zfs snapshot test/fs@b zfs send test/fs@a | zfs recv test/recvd zfs send -i @a test/fs@b | zfs recv test/recvd zfs userspace test/recvd 1. should show 1m quota dd if=/dev/urandom of=/test/recvd/file bs=1k count=1024 sync dd if=/dev/urandom of=/test/recvd/file2 bs=1k count=1024 2. should fail with ENOSPC sync zfs unmount test/recvd zfs mount test/recvd zfs userspace test/recvd 3. if bug above, now shows 1m quota dd if=/dev/urandom of=/test/recvd/file3 bs=1k count=1024 4. if bug above, now fails with ENOSPC ``` Reviewed by: George Wilson Reviewed by: Paul Dagnelie Approved by: Garrett D'Amore Author: Matthew Ahrens --- uts/common/fs/zfs/zfs_vfsops.c | 267 ++++++++++++++++----------------- 1 file changed, 131 insertions(+), 136 deletions(-) diff --git a/uts/common/fs/zfs/zfs_vfsops.c b/uts/common/fs/zfs/zfs_vfsops.c index ebacf850ec16..0d02fd5bec4b 100644 --- a/uts/common/fs/zfs/zfs_vfsops.c +++ b/uts/common/fs/zfs/zfs_vfsops.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ @@ -852,14 +852,125 @@ zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup) return (zfs_fuid_overquota(zfsvfs, isgroup, fuid)); } +/* + * Associate this zfsvfs with the given objset, which must be owned. + * This will cache a bunch of on-disk state from the objset in the + * zfsvfs. + */ +static int +zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) +{ + int error; + uint64_t val; + + zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; + zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; + zfsvfs->z_os = os; + + error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); + if (error != 0) + return (error); + if (zfsvfs->z_version > + zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { + (void) printf("Can't mount a version %lld file system " + "on a version %lld pool\n. Pool must be upgraded to mount " + "this file system.", (u_longlong_t)zfsvfs->z_version, + (u_longlong_t)spa_version(dmu_objset_spa(os))); + return (SET_ERROR(ENOTSUP)); + } + error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); + if (error != 0) + return (error); + zfsvfs->z_norm = (int)val; + + error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); + if (error != 0) + return (error); + zfsvfs->z_utf8 = (val != 0); + + error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); + if (error != 0) + return (error); + zfsvfs->z_case = (uint_t)val; + + /* + * Fold case on file systems that are always or sometimes case + * insensitive. + */ + if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || + zfsvfs->z_case == ZFS_CASE_MIXED) + zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; + + zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); + zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); + + uint64_t sa_obj = 0; + if (zfsvfs->z_use_sa) { + /* should either have both of these objects or none */ + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, + &sa_obj); + if (error != 0) + return (error); + } + + error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, + &zfsvfs->z_attr_table); + if (error != 0) + return (error); + + if (zfsvfs->z_version >= ZPL_VERSION_SA) + sa_register_update_callback(os, zfs_sa_upgrade); + + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, + &zfsvfs->z_root); + if (error != 0) + return (error); + ASSERT(zfsvfs->z_root != 0); + + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, + &zfsvfs->z_unlinkedobj); + if (error != 0) + return (error); + + error = zap_lookup(os, MASTER_NODE_OBJ, + zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], + 8, 1, &zfsvfs->z_userquota_obj); + if (error == ENOENT) + zfsvfs->z_userquota_obj = 0; + else if (error != 0) + return (error); + + error = zap_lookup(os, MASTER_NODE_OBJ, + zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], + 8, 1, &zfsvfs->z_groupquota_obj); + if (error == ENOENT) + zfsvfs->z_groupquota_obj = 0; + else if (error != 0) + return (error); + + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, + &zfsvfs->z_fuid_obj); + if (error == ENOENT) + zfsvfs->z_fuid_obj = 0; + else if (error != 0) + return (error); + + error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, + &zfsvfs->z_shares_dir); + if (error == ENOENT) + zfsvfs->z_shares_dir = 0; + else if (error != 0) + return (error); + + return (0); +} + int zfsvfs_create(const char *osname, zfsvfs_t **zfvp) { objset_t *os; zfsvfs_t *zfsvfs; - uint64_t zval; - int i, error; - uint64_t sa_obj; + int error; zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); @@ -873,106 +984,8 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) return (error); } - /* - * Initialize the zfs-specific filesystem structure. - * Should probably make this a kmem cache, shuffle fields, - * and just bzero up to z_hold_mtx[]. - */ zfsvfs->z_vfs = NULL; zfsvfs->z_parent = zfsvfs; - zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; - zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; - zfsvfs->z_os = os; - - error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); - if (error) { - goto out; - } else if (zfsvfs->z_version > - zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { - (void) printf("Can't mount a version %lld file system " - "on a version %lld pool\n. Pool must be upgraded to mount " - "this file system.", (u_longlong_t)zfsvfs->z_version, - (u_longlong_t)spa_version(dmu_objset_spa(os))); - error = SET_ERROR(ENOTSUP); - goto out; - } - if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) - goto out; - zfsvfs->z_norm = (int)zval; - - if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) - goto out; - zfsvfs->z_utf8 = (zval != 0); - - if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) - goto out; - zfsvfs->z_case = (uint_t)zval; - - /* - * Fold case on file systems that are always or sometimes case - * insensitive. - */ - if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || - zfsvfs->z_case == ZFS_CASE_MIXED) - zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; - - zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); - zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); - - if (zfsvfs->z_use_sa) { - /* should either have both of these objects or none */ - error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, - &sa_obj); - if (error) - goto out; - } else { - /* - * Pre SA versions file systems should never touch - * either the attribute registration or layout objects. - */ - sa_obj = 0; - } - - error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, - &zfsvfs->z_attr_table); - if (error) - goto out; - - if (zfsvfs->z_version >= ZPL_VERSION_SA) - sa_register_update_callback(os, zfs_sa_upgrade); - - error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, - &zfsvfs->z_root); - if (error) - goto out; - ASSERT(zfsvfs->z_root != 0); - - error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, - &zfsvfs->z_unlinkedobj); - if (error) - goto out; - - error = zap_lookup(os, MASTER_NODE_OBJ, - zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], - 8, 1, &zfsvfs->z_userquota_obj); - if (error && error != ENOENT) - goto out; - - error = zap_lookup(os, MASTER_NODE_OBJ, - zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], - 8, 1, &zfsvfs->z_groupquota_obj); - if (error && error != ENOENT) - goto out; - - error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, - &zfsvfs->z_fuid_obj); - if (error && error != ENOENT) - goto out; - - error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, - &zfsvfs->z_shares_dir); - if (error && error != ENOENT) - goto out; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); @@ -981,17 +994,19 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp) rrm_init(&zfsvfs->z_teardown_lock, B_FALSE); rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL); rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); - for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) + for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); + error = zfsvfs_init(zfsvfs, os); + if (error != 0) { + dmu_objset_disown(os, zfsvfs); + *zfvp = NULL; + kmem_free(zfsvfs, sizeof (zfsvfs_t)); + return (error); + } + *zfvp = zfsvfs; return (0); - -out: - dmu_objset_disown(os, zfsvfs); - *zfvp = NULL; - kmem_free(zfsvfs, sizeof (zfsvfs_t)); - return (error); } static int @@ -2008,7 +2023,6 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) { int err; znode_t *zp; - uint64_t sa_obj = 0; ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock)); ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)); @@ -2017,35 +2031,16 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname) * We already own this, so just hold and rele it to update the * objset_t, as the one we had before may have been evicted. */ - VERIFY0(dmu_objset_hold(osname, zfsvfs, &zfsvfs->z_os)); - VERIFY3P(zfsvfs->z_os->os_dsl_dataset->ds_owner, ==, zfsvfs); - VERIFY(dsl_dataset_long_held(zfsvfs->z_os->os_dsl_dataset)); - dmu_objset_rele(zfsvfs->z_os, zfsvfs); + objset_t *os; + VERIFY0(dmu_objset_hold(osname, zfsvfs, &os)); + VERIFY3P(os->os_dsl_dataset->ds_owner, ==, zfsvfs); + VERIFY(dsl_dataset_long_held(os->os_dsl_dataset)); + dmu_objset_rele(os, zfsvfs); - /* - * Make sure version hasn't changed - */ - - err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION, - &zfsvfs->z_version); - - if (err) + err = zfsvfs_init(zfsvfs, os); + if (err != 0) goto bail; - err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ, - ZFS_SA_ATTRS, 8, 1, &sa_obj); - - if (err && zfsvfs->z_version >= ZPL_VERSION_SA) - goto bail; - - if ((err = sa_setup(zfsvfs->z_os, sa_obj, - zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0) - goto bail; - - if (zfsvfs->z_version >= ZPL_VERSION_SA) - sa_register_update_callback(zfsvfs->z_os, - zfs_sa_upgrade); - VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0); zfs_set_fuid_feature(zfsvfs);