Illumos #3875
3875 panic in zfs_root() after failed rollback Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Approved by: Gordon Ross <gwr@nexenta.com> References: https://www.illumos.org/issues/3875 illumos/illumos-gate@91948b51b8 Ported-by: Richard Yao <ryao@gentoo.org> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #1775
This commit is contained in:
parent
1958067629
commit
831baf06ef
@ -136,6 +136,7 @@ struct objset {
|
||||
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
|
||||
int dmu_objset_own(const char *name, dmu_objset_type_t type,
|
||||
boolean_t readonly, void *tag, objset_t **osp);
|
||||
void dmu_objset_refresh_ownership(objset_t *os, void *tag);
|
||||
void dmu_objset_rele(objset_t *os, void *tag);
|
||||
void dmu_objset_disown(objset_t *os, void *tag);
|
||||
int dmu_objset_from_ds(struct dsl_dataset *ds, objset_t **osp);
|
||||
|
@ -55,12 +55,13 @@ typedef struct dmu_recv_cookie {
|
||||
struct avl_tree *drc_guid_to_ds_map;
|
||||
zio_cksum_t drc_cksum;
|
||||
uint64_t drc_newsnapobj;
|
||||
void *drc_owner;
|
||||
} dmu_recv_cookie_t;
|
||||
|
||||
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
|
||||
boolean_t force, char *origin, dmu_recv_cookie_t *drc);
|
||||
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
|
||||
int cleanup_fd, uint64_t *action_handlep);
|
||||
int dmu_recv_end(dmu_recv_cookie_t *drc);
|
||||
int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner);
|
||||
|
||||
#endif /* _DMU_SEND_H */
|
||||
|
@ -248,7 +248,7 @@ void dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag);
|
||||
boolean_t dsl_dataset_long_held(dsl_dataset_t *ds);
|
||||
|
||||
int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
|
||||
dsl_dataset_t *origin_head, boolean_t force);
|
||||
dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx);
|
||||
void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
|
||||
dsl_dataset_t *origin_head, dmu_tx_t *tx);
|
||||
int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
|
||||
@ -265,7 +265,7 @@ int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
|
||||
int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx);
|
||||
void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
|
||||
zprop_source_t source, uint64_t value, dmu_tx_t *tx);
|
||||
int dsl_dataset_rollback(const char *fsname);
|
||||
int dsl_dataset_rollback(const char *fsname, void *owner);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
#define dprintf_ds(ds, fmt, ...) do { \
|
||||
|
@ -517,6 +517,38 @@ dmu_objset_rele(objset_t *os, void *tag)
|
||||
dsl_pool_rele(dp, tag);
|
||||
}
|
||||
|
||||
/*
|
||||
* When we are called, os MUST refer to an objset associated with a dataset
|
||||
* that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
|
||||
* == tag. We will then release and reacquire ownership of the dataset while
|
||||
* holding the pool config_rwlock to avoid intervening namespace or ownership
|
||||
* changes may occur.
|
||||
*
|
||||
* This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
|
||||
* release the hold on its dataset and acquire a new one on the dataset of the
|
||||
* same name so that it can be partially torn down and reconstructed.
|
||||
*/
|
||||
void
|
||||
dmu_objset_refresh_ownership(objset_t *os, void *tag)
|
||||
{
|
||||
dsl_pool_t *dp;
|
||||
dsl_dataset_t *ds, *newds;
|
||||
char name[MAXNAMELEN];
|
||||
|
||||
ds = os->os_dsl_dataset;
|
||||
VERIFY3P(ds, !=, NULL);
|
||||
VERIFY3P(ds->ds_owner, ==, tag);
|
||||
VERIFY(dsl_dataset_long_held(ds));
|
||||
|
||||
dsl_dataset_name(ds, name);
|
||||
dp = dmu_objset_pool(os);
|
||||
dsl_pool_config_enter(dp, FTAG);
|
||||
dmu_objset_disown(os, tag);
|
||||
VERIFY0(dsl_dataset_own(dp, name, tag, &newds));
|
||||
VERIFY3P(newds, ==, os->os_dsl_dataset);
|
||||
dsl_pool_config_exit(dp, FTAG);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_objset_disown(objset_t *os, void *tag)
|
||||
{
|
||||
|
@ -1612,7 +1612,7 @@ dmu_recv_end_check(void *arg, dmu_tx_t *tx)
|
||||
}
|
||||
}
|
||||
error = dsl_dataset_clone_swap_check_impl(drc->drc_ds,
|
||||
origin_head, drc->drc_force);
|
||||
origin_head, drc->drc_force, drc->drc_owner, tx);
|
||||
if (error != 0) {
|
||||
dsl_dataset_rele(origin_head, FTAG);
|
||||
return (error);
|
||||
@ -1685,6 +1685,9 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
|
||||
|
||||
dsl_dataset_rele(origin_head, FTAG);
|
||||
dsl_destroy_head_sync_impl(drc->drc_ds, tx);
|
||||
|
||||
if (drc->drc_owner != NULL)
|
||||
VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner);
|
||||
} else {
|
||||
dsl_dataset_t *ds = drc->drc_ds;
|
||||
|
||||
@ -1787,8 +1790,10 @@ dmu_recv_new_end(dmu_recv_cookie_t *drc)
|
||||
}
|
||||
|
||||
int
|
||||
dmu_recv_end(dmu_recv_cookie_t *drc)
|
||||
dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
|
||||
{
|
||||
drc->drc_owner = owner;
|
||||
|
||||
if (drc->drc_newfs)
|
||||
return (dmu_recv_new_end(drc));
|
||||
else
|
||||
|
@ -1669,16 +1669,52 @@ dsl_dataset_rename_snapshot(const char *fsname,
|
||||
dsl_dataset_rename_snapshot_sync, &ddrsa, 1));
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're doing an ownership handoff, we need to make sure that there is
|
||||
* only one long hold on the dataset. We're not allowed to change anything here
|
||||
* so we don't permanently release the long hold or regular hold here. We want
|
||||
* to do this only when syncing to avoid the dataset unexpectedly going away
|
||||
* when we release the long hold.
|
||||
*/
|
||||
static int
|
||||
dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
|
||||
{
|
||||
boolean_t held;
|
||||
|
||||
if (!dmu_tx_is_syncing(tx))
|
||||
return (0);
|
||||
|
||||
if (owner != NULL) {
|
||||
VERIFY3P(ds->ds_owner, ==, owner);
|
||||
dsl_dataset_long_rele(ds, owner);
|
||||
}
|
||||
|
||||
held = dsl_dataset_long_held(ds);
|
||||
|
||||
if (owner != NULL)
|
||||
dsl_dataset_long_hold(ds, owner);
|
||||
|
||||
if (held)
|
||||
return (SET_ERROR(EBUSY));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
typedef struct dsl_dataset_rollback_arg {
|
||||
const char *ddra_fsname;
|
||||
void *ddra_owner;
|
||||
} dsl_dataset_rollback_arg_t;
|
||||
|
||||
static int
|
||||
dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
const char *fsname = arg;
|
||||
dsl_dataset_rollback_arg_t *ddra = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
dsl_dataset_t *ds;
|
||||
int64_t unused_refres_delta;
|
||||
int error;
|
||||
|
||||
error = dsl_dataset_hold(dp, fsname, FTAG, &ds);
|
||||
error = dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
@ -1694,9 +1730,10 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
if (dsl_dataset_long_held(ds)) {
|
||||
error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx);
|
||||
if (error != 0) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (SET_ERROR(EBUSY));
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1733,12 +1770,12 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
|
||||
static void
|
||||
dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
const char *fsname = arg;
|
||||
dsl_dataset_rollback_arg_t *ddra = arg;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
dsl_dataset_t *ds, *clone;
|
||||
uint64_t cloneobj;
|
||||
|
||||
VERIFY0(dsl_dataset_hold(dp, fsname, FTAG, &ds));
|
||||
VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds));
|
||||
|
||||
cloneobj = dsl_dataset_create_sync(ds->ds_dir, "%rollback",
|
||||
ds->ds_prev, DS_CREATE_FLAG_NODIRTY, kcred, tx);
|
||||
@ -1754,11 +1791,26 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* If owner != NULL:
|
||||
*
|
||||
* - The existing dataset MUST be owned by the specified owner at entry
|
||||
* - Upon return, dataset will still be held by the same owner, whether we
|
||||
* succeed or not.
|
||||
*
|
||||
* This mode is required any time the existing filesystem is mounted. See
|
||||
* notes above zfs_suspend_fs() for further details.
|
||||
*/
|
||||
int
|
||||
dsl_dataset_rollback(const char *fsname)
|
||||
dsl_dataset_rollback(const char *fsname, void *owner)
|
||||
{
|
||||
dsl_dataset_rollback_arg_t ddra;
|
||||
|
||||
ddra.ddra_fsname = fsname;
|
||||
ddra.ddra_owner = owner;
|
||||
|
||||
return (dsl_sync_task(fsname, dsl_dataset_rollback_check,
|
||||
dsl_dataset_rollback_sync, (void *)fsname, 1));
|
||||
dsl_dataset_rollback_sync, (void *)&ddra, 1));
|
||||
}
|
||||
|
||||
struct promotenode {
|
||||
@ -2276,7 +2328,7 @@ dsl_dataset_promote(const char *name, char *conflsnap)
|
||||
|
||||
int
|
||||
dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
|
||||
dsl_dataset_t *origin_head, boolean_t force)
|
||||
dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
|
||||
{
|
||||
int64_t unused_refres_delta;
|
||||
|
||||
@ -2305,7 +2357,7 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
|
||||
return (SET_ERROR(ETXTBSY));
|
||||
|
||||
/* origin_head should have no long holds (e.g. is not mounted) */
|
||||
if (dsl_dataset_long_held(origin_head))
|
||||
if (dsl_dataset_handoff_check(origin_head, owner, tx))
|
||||
return (SET_ERROR(EBUSY));
|
||||
|
||||
/* check amount of any unconsumed refreservation */
|
||||
|
@ -1349,7 +1349,7 @@ zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
|
||||
/*
|
||||
* XXX we could probably try again, since the unmounting
|
||||
* thread should be just about to disassociate the
|
||||
* objset from the zfsvfs.
|
||||
* objset from the zsb.
|
||||
*/
|
||||
rrw_exit(&(*zsbp)->z_teardown_lock, tag);
|
||||
return (SET_ERROR(EBUSY));
|
||||
@ -3504,13 +3504,13 @@ zfs_ioc_rollback(zfs_cmd_t *zc)
|
||||
if (error == 0) {
|
||||
int resume_err;
|
||||
|
||||
error = dsl_dataset_rollback(zc->zc_name);
|
||||
error = dsl_dataset_rollback(zc->zc_name, zsb);
|
||||
resume_err = zfs_resume_fs(zsb, zc->zc_name);
|
||||
error = error ? error : resume_err;
|
||||
}
|
||||
deactivate_super(zsb->z_sb);
|
||||
} else {
|
||||
error = dsl_dataset_rollback(zc->zc_name);
|
||||
error = dsl_dataset_rollback(zc->zc_name, NULL);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
@ -4038,13 +4038,13 @@ zfs_ioc_recv(zfs_cmd_t *zc)
|
||||
* If the suspend fails, then the recv_end will
|
||||
* likely also fail, and clean up after itself.
|
||||
*/
|
||||
end_err = dmu_recv_end(&drc);
|
||||
end_err = dmu_recv_end(&drc, zsb);
|
||||
if (error == 0)
|
||||
error = zfs_resume_fs(zsb, tofs);
|
||||
error = error ? error : end_err;
|
||||
deactivate_super(zsb->z_sb);
|
||||
} else {
|
||||
error = dmu_recv_end(&drc);
|
||||
error = dmu_recv_end(&drc, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4528,8 +4528,11 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
|
||||
* objset_phys_t). Suspend/resume the fs will do that.
|
||||
*/
|
||||
error = zfs_suspend_fs(zsb);
|
||||
if (error == 0)
|
||||
if (error == 0) {
|
||||
dmu_objset_refresh_ownership(zsb->z_os,
|
||||
zsb);
|
||||
error = zfs_resume_fs(zsb, zc->zc_name);
|
||||
}
|
||||
}
|
||||
if (error == 0)
|
||||
error = dmu_objset_userspace_upgrade(zsb->z_os);
|
||||
|
@ -1453,7 +1453,9 @@ EXPORT_SYMBOL(zfs_vget);
|
||||
* Block out VFS ops and close zfs_sb_t
|
||||
*
|
||||
* Note, if successful, then we return with the 'z_teardown_lock' and
|
||||
* 'z_teardown_inactive_lock' write held.
|
||||
* 'z_teardown_inactive_lock' write held. We leave ownership of the underlying
|
||||
* dataset and objset intact so that they can be atomically handed off during
|
||||
* a subsequent rollback or recv operation and the resume thereafter.
|
||||
*/
|
||||
int
|
||||
zfs_suspend_fs(zfs_sb_t *zsb)
|
||||
@ -1463,8 +1465,6 @@ zfs_suspend_fs(zfs_sb_t *zsb)
|
||||
if ((error = zfs_sb_teardown(zsb, B_FALSE)) != 0)
|
||||
return (error);
|
||||
|
||||
dmu_objset_disown(zsb->z_os, zsb);
|
||||
|
||||
return (0);
|
||||
}
|
||||
EXPORT_SYMBOL(zfs_suspend_fs);
|
||||
@ -1476,66 +1476,69 @@ int
|
||||
zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
|
||||
{
|
||||
int err;
|
||||
znode_t *zp;
|
||||
uint64_t sa_obj = 0;
|
||||
|
||||
ASSERT(RRW_WRITE_HELD(&zsb->z_teardown_lock));
|
||||
ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock));
|
||||
|
||||
err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zsb, &zsb->z_os);
|
||||
if (err) {
|
||||
zsb->z_os = NULL;
|
||||
} else {
|
||||
znode_t *zp;
|
||||
uint64_t sa_obj = 0;
|
||||
/*
|
||||
* We already own this, so just hold and rele it to update the
|
||||
* objset_t, as the one we had before may have been evicted.
|
||||
*/
|
||||
VERIFY0(dmu_objset_hold(osname, zsb, &zsb->z_os));
|
||||
VERIFY3P(zsb->z_os->os_dsl_dataset->ds_owner, ==, zsb);
|
||||
VERIFY(dsl_dataset_long_held(zsb->z_os->os_dsl_dataset));
|
||||
dmu_objset_rele(zsb->z_os, zsb);
|
||||
|
||||
/*
|
||||
* Make sure version hasn't changed
|
||||
*/
|
||||
/*
|
||||
* Make sure version hasn't changed
|
||||
*/
|
||||
|
||||
err = zfs_get_zplprop(zsb->z_os, ZFS_PROP_VERSION,
|
||||
&zsb->z_version);
|
||||
err = zfs_get_zplprop(zsb->z_os, ZFS_PROP_VERSION,
|
||||
&zsb->z_version);
|
||||
|
||||
if (err)
|
||||
goto bail;
|
||||
if (err)
|
||||
goto bail;
|
||||
|
||||
err = zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
|
||||
ZFS_SA_ATTRS, 8, 1, &sa_obj);
|
||||
err = zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
|
||||
ZFS_SA_ATTRS, 8, 1, &sa_obj);
|
||||
|
||||
if (err && zsb->z_version >= ZPL_VERSION_SA)
|
||||
goto bail;
|
||||
if (err && zsb->z_version >= ZPL_VERSION_SA)
|
||||
goto bail;
|
||||
|
||||
if ((err = sa_setup(zsb->z_os, sa_obj,
|
||||
zfs_attr_table, ZPL_END, &zsb->z_attr_table)) != 0)
|
||||
goto bail;
|
||||
if ((err = sa_setup(zsb->z_os, sa_obj,
|
||||
zfs_attr_table, ZPL_END, &zsb->z_attr_table)) != 0)
|
||||
goto bail;
|
||||
|
||||
if (zsb->z_version >= ZPL_VERSION_SA)
|
||||
sa_register_update_callback(zsb->z_os,
|
||||
zfs_sa_upgrade);
|
||||
if (zsb->z_version >= ZPL_VERSION_SA)
|
||||
sa_register_update_callback(zsb->z_os,
|
||||
zfs_sa_upgrade);
|
||||
|
||||
VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);
|
||||
VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);
|
||||
|
||||
zfs_set_fuid_feature(zsb);
|
||||
zsb->z_rollback_time = jiffies;
|
||||
zfs_set_fuid_feature(zsb);
|
||||
zsb->z_rollback_time = jiffies;
|
||||
|
||||
/*
|
||||
* Attempt to re-establish all the active inodes with their
|
||||
* dbufs. If a zfs_rezget() fails, then we unhash the inode
|
||||
* and mark it stale. This prevents a collision if a new
|
||||
* inode/object is created which must use the same inode
|
||||
* number. The stale inode will be be released when the
|
||||
* VFS prunes the dentry holding the remaining references
|
||||
* on the stale inode.
|
||||
*/
|
||||
mutex_enter(&zsb->z_znodes_lock);
|
||||
for (zp = list_head(&zsb->z_all_znodes); zp;
|
||||
zp = list_next(&zsb->z_all_znodes, zp)) {
|
||||
err2 = zfs_rezget(zp);
|
||||
if (err2) {
|
||||
remove_inode_hash(ZTOI(zp));
|
||||
zp->z_is_stale = B_TRUE;
|
||||
}
|
||||
/*
|
||||
* Attempt to re-establish all the active inodes with their
|
||||
* dbufs. If a zfs_rezget() fails, then we unhash the inode
|
||||
* and mark it stale. This prevents a collision if a new
|
||||
* inode/object is created which must use the same inode
|
||||
* number. The stale inode will be be released when the
|
||||
* VFS prunes the dentry holding the remaining references
|
||||
* on the stale inode.
|
||||
*/
|
||||
mutex_enter(&zsb->z_znodes_lock);
|
||||
for (zp = list_head(&zsb->z_all_znodes); zp;
|
||||
zp = list_next(&zsb->z_all_znodes, zp)) {
|
||||
err = zfs_rezget(zp);
|
||||
if (err) {
|
||||
remove_inode_hash(ZTOI(zp));
|
||||
zp->z_is_stale = B_TRUE;
|
||||
}
|
||||
mutex_exit(&zsb->z_znodes_lock);
|
||||
}
|
||||
mutex_exit(&zsb->z_znodes_lock);
|
||||
|
||||
bail:
|
||||
/* release the VFS ops */
|
||||
@ -1544,8 +1547,8 @@ zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
|
||||
|
||||
if (err) {
|
||||
/*
|
||||
* Since we couldn't reopen zfs_sb_t or, or
|
||||
* setup the sa framework force unmount this file system.
|
||||
* Since we couldn't setup the sa framework, try to force
|
||||
* unmount this file system.
|
||||
*/
|
||||
if (zsb->z_os)
|
||||
(void) zfs_umount(zsb->z_sb);
|
||||
|
Loading…
Reference in New Issue
Block a user