Add 'zfs wait' command
Add a mechanism to wait for delete queue to drain. When doing redacted send/recv, many workflows involve deleting files that contain sensitive data. Because of the way zfs handles file deletions, snapshots taken quickly after a rm operation can sometimes still contain the file in question, especially if the file is very large. This can result in issues for redacted send/recv users who expect the deleted files to be redacted in the send streams, and not appear in their clones. This change duplicates much of the zpool wait related logic into a zfs wait command, which can be used to wait until the internal deleteq has been drained. Additional wait activities may be added in the future. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: John Gallagher <john.gallagher@delphix.com> Signed-off-by: Paul Dagnelie <pcd@delphix.com> Closes #9707
This commit is contained in:
parent
c9e3efdb3a
commit
5a42ef04fd
@ -122,6 +122,7 @@ static int zfs_do_change_key(int argc, char **argv);
|
||||
static int zfs_do_project(int argc, char **argv);
|
||||
static int zfs_do_version(int argc, char **argv);
|
||||
static int zfs_do_redact(int argc, char **argv);
|
||||
static int zfs_do_wait(int argc, char **argv);
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
static int zfs_do_jail(int argc, char **argv);
|
||||
@ -183,7 +184,8 @@ typedef enum {
|
||||
HELP_VERSION,
|
||||
HELP_REDACT,
|
||||
HELP_JAIL,
|
||||
HELP_UNJAIL
|
||||
HELP_UNJAIL,
|
||||
HELP_WAIT,
|
||||
} zfs_help_t;
|
||||
|
||||
typedef struct zfs_command {
|
||||
@ -248,6 +250,7 @@ static zfs_command_t command_table[] = {
|
||||
{ "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY },
|
||||
{ "change-key", zfs_do_change_key, HELP_CHANGE_KEY },
|
||||
{ "redact", zfs_do_redact, HELP_REDACT },
|
||||
{ "wait", zfs_do_wait, HELP_WAIT },
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
{ "jail", zfs_do_jail, HELP_JAIL },
|
||||
@ -410,6 +413,8 @@ get_usage(zfs_help_t idx)
|
||||
return (gettext("\tjail <jailid|jailname> <filesystem>\n"));
|
||||
case HELP_UNJAIL:
|
||||
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
|
||||
case HELP_WAIT:
|
||||
return (gettext("\twait [-t <activity>] <filesystem>\n"));
|
||||
}
|
||||
|
||||
abort();
|
||||
@ -8317,6 +8322,90 @@ zfs_do_project(int argc, char **argv)
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_do_wait(int argc, char **argv)
|
||||
{
|
||||
boolean_t enabled[ZFS_WAIT_NUM_ACTIVITIES];
|
||||
int error, i;
|
||||
char c;
|
||||
|
||||
/* By default, wait for all types of activity. */
|
||||
for (i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++)
|
||||
enabled[i] = B_TRUE;
|
||||
|
||||
while ((c = getopt(argc, argv, "t:")) != -1) {
|
||||
switch (c) {
|
||||
case 't':
|
||||
{
|
||||
static char *col_subopts[] = { "deleteq", NULL };
|
||||
char *value;
|
||||
|
||||
/* Reset activities array */
|
||||
bzero(&enabled, sizeof (enabled));
|
||||
while (*optarg != '\0') {
|
||||
int activity = getsubopt(&optarg, col_subopts,
|
||||
&value);
|
||||
|
||||
if (activity < 0) {
|
||||
(void) fprintf(stderr,
|
||||
gettext("invalid activity '%s'\n"),
|
||||
value);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
enabled[activity] = B_TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case '?':
|
||||
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
||||
optopt);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
argv += optind;
|
||||
argc -= optind;
|
||||
if (argc < 1) {
|
||||
(void) fprintf(stderr, gettext("missing 'filesystem' "
|
||||
"argument\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
if (argc > 1) {
|
||||
(void) fprintf(stderr, gettext("too many arguments\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
zfs_handle_t *zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_FILESYSTEM);
|
||||
if (zhp == NULL)
|
||||
return (1);
|
||||
|
||||
for (;;) {
|
||||
boolean_t missing = B_FALSE;
|
||||
boolean_t any_waited = B_FALSE;
|
||||
|
||||
for (int i = 0; i < ZFS_WAIT_NUM_ACTIVITIES; i++) {
|
||||
boolean_t waited;
|
||||
|
||||
if (!enabled[i])
|
||||
continue;
|
||||
|
||||
error = zfs_wait_status(zhp, i, &missing, &waited);
|
||||
if (error != 0 || missing)
|
||||
break;
|
||||
|
||||
any_waited = (any_waited || waited);
|
||||
}
|
||||
|
||||
if (error != 0 || missing || !any_waited)
|
||||
break;
|
||||
}
|
||||
|
||||
zfs_close(zhp);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Display version message
|
||||
*/
|
||||
|
@ -264,6 +264,7 @@ AC_CONFIG_FILES([
|
||||
tests/zfs-tests/tests/functional/cli_root/zfs_unmount/Makefile
|
||||
tests/zfs-tests/tests/functional/cli_root/zfs_unshare/Makefile
|
||||
tests/zfs-tests/tests/functional/cli_root/zfs_upgrade/Makefile
|
||||
tests/zfs-tests/tests/functional/cli_root/zfs_wait/Makefile
|
||||
tests/zfs-tests/tests/functional/cli_root/zpool/Makefile
|
||||
tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile
|
||||
tests/zfs-tests/tests/functional/cli_root/zpool_attach/Makefile
|
||||
|
@ -507,6 +507,9 @@ extern nvlist_t *zfs_get_user_props(zfs_handle_t *);
|
||||
extern nvlist_t *zfs_get_recvd_props(zfs_handle_t *);
|
||||
extern nvlist_t *zfs_get_clones_nvl(zfs_handle_t *);
|
||||
|
||||
extern int zfs_wait_status(zfs_handle_t *, zfs_wait_activity_t,
|
||||
boolean_t *, boolean_t *);
|
||||
|
||||
/*
|
||||
* zfs encryption management
|
||||
*/
|
||||
|
@ -133,6 +133,7 @@ int lzc_pool_checkpoint_discard(const char *);
|
||||
|
||||
int lzc_wait(const char *, zpool_wait_activity_t, boolean_t *);
|
||||
int lzc_wait_tag(const char *, zpool_wait_activity_t, uint64_t, boolean_t *);
|
||||
int lzc_wait_fs(const char *, zfs_wait_activity_t, boolean_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -121,6 +121,11 @@ struct dsl_dir {
|
||||
bplist_t dd_pending_frees;
|
||||
bplist_t dd_pending_allocs;
|
||||
|
||||
kmutex_t dd_activity_lock;
|
||||
kcondvar_t dd_activity_cv;
|
||||
boolean_t dd_activity_cancelled;
|
||||
uint64_t dd_activity_waiters;
|
||||
|
||||
/* protected by dd_lock; keep at end of struct for better locality */
|
||||
char dd_myname[ZFS_MAX_DATASET_NAME_LEN];
|
||||
};
|
||||
@ -192,6 +197,9 @@ boolean_t dsl_dir_is_zapified(dsl_dir_t *dd);
|
||||
void dsl_dir_livelist_open(dsl_dir_t *dd, uint64_t obj);
|
||||
void dsl_dir_livelist_close(dsl_dir_t *dd);
|
||||
void dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total);
|
||||
int dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
|
||||
boolean_t *waited);
|
||||
void dsl_dir_cancel_waiters(dsl_dir_t *dd);
|
||||
|
||||
/* internal reserved dir name */
|
||||
#define MOS_DIR_NAME "$MOS"
|
||||
|
@ -1282,6 +1282,7 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_REDACT, /* 0x5a51 */
|
||||
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
|
||||
ZFS_IOC_WAIT, /* 0x5a53 */
|
||||
ZFS_IOC_WAIT_FS, /* 0x5a54 */
|
||||
|
||||
/*
|
||||
* Per-platform (Optional) - 6/128 numbers reserved.
|
||||
@ -1358,6 +1359,11 @@ typedef enum {
|
||||
ZPOOL_WAIT_NUM_ACTIVITIES
|
||||
} zpool_wait_activity_t;
|
||||
|
||||
typedef enum {
|
||||
ZFS_WAIT_DELETEQ,
|
||||
ZFS_WAIT_NUM_ACTIVITIES
|
||||
} zfs_wait_activity_t;
|
||||
|
||||
/*
|
||||
* Bookmark name values.
|
||||
*/
|
||||
@ -1415,6 +1421,12 @@ typedef enum {
|
||||
#define ZPOOL_WAIT_TAG "wait_tag"
|
||||
#define ZPOOL_WAIT_WAITED "wait_waited"
|
||||
|
||||
/*
|
||||
* The following are names used when invoking ZFS_IOC_WAIT_FS.
|
||||
*/
|
||||
#define ZFS_WAIT_ACTIVITY "wait_activity"
|
||||
#define ZFS_WAIT_WAITED "wait_waited"
|
||||
|
||||
/*
|
||||
* Flags for ZFS_IOC_VDEV_SET_STATE
|
||||
*/
|
||||
|
@ -5599,3 +5599,31 @@ zvol_volsize_to_reservation(zpool_handle_t *zph, uint64_t volsize,
|
||||
volsize += numdb;
|
||||
return (volsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the given activity and return the status of the wait (whether or not
|
||||
* any waiting was done) in the 'waited' parameter. Non-existent fses are
|
||||
* reported via the 'missing' parameter, rather than by printing an error
|
||||
* message. This is convenient when this function is called in a loop over a
|
||||
* long period of time (as it is, for example, by zfs's wait cmd). In that
|
||||
* scenario, a fs being exported or destroyed should be considered a normal
|
||||
* event, so we don't want to print an error when we find that the fs doesn't
|
||||
* exist.
|
||||
*/
|
||||
int
|
||||
zfs_wait_status(zfs_handle_t *zhp, zfs_wait_activity_t activity,
|
||||
boolean_t *missing, boolean_t *waited)
|
||||
{
|
||||
int error = lzc_wait_fs(zhp->zfs_name, activity, waited);
|
||||
*missing = (error == ENOENT);
|
||||
if (*missing)
|
||||
return (0);
|
||||
|
||||
if (error != 0) {
|
||||
(void) zfs_standard_error_fmt(zhp->zfs_hdl, error,
|
||||
dgettext(TEXT_DOMAIN, "error waiting in fs '%s'"),
|
||||
zhp->zfs_name);
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
@ -1621,3 +1621,23 @@ lzc_wait_tag(const char *pool, zpool_wait_activity_t activity, uint64_t tag,
|
||||
{
|
||||
return (wait_common(pool, activity, B_TRUE, tag, waited));
|
||||
}
|
||||
|
||||
int
|
||||
lzc_wait_fs(const char *fs, zfs_wait_activity_t activity, boolean_t *waited)
|
||||
{
|
||||
nvlist_t *args = fnvlist_alloc();
|
||||
nvlist_t *result = NULL;
|
||||
|
||||
fnvlist_add_int32(args, ZFS_WAIT_ACTIVITY, activity);
|
||||
|
||||
int error = lzc_ioctl(ZFS_IOC_WAIT_FS, fs, args, &result);
|
||||
|
||||
if (error == 0 && waited != NULL)
|
||||
*waited = fnvlist_lookup_boolean_value(result,
|
||||
ZFS_WAIT_WAITED);
|
||||
|
||||
fnvlist_free(args);
|
||||
fnvlist_free(result);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
@ -41,6 +41,7 @@ dist_man_MANS = \
|
||||
zfs-unmount.8 \
|
||||
zfs-upgrade.8 \
|
||||
zfs-userspace.8 \
|
||||
zfs-wait.8 \
|
||||
zgenhostid.8 \
|
||||
zinject.8 \
|
||||
zpool.8 \
|
||||
|
71
man/man8/zfs-wait.8
Normal file
71
man/man8/zfs-wait.8
Normal file
@ -0,0 +1,71 @@
|
||||
.\"
|
||||
.\" CDDL HEADER START
|
||||
.\"
|
||||
.\" The contents of this file are subject to the terms of the
|
||||
.\" Common Development and Distribution License (the "License").
|
||||
.\" You may not use this file except in compliance with the License.
|
||||
.\"
|
||||
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
.\" or http://www.opensolaris.org/os/licensing.
|
||||
.\" See the License for the specific language governing permissions
|
||||
.\" and limitations under the License.
|
||||
.\"
|
||||
.\" When distributing Covered Code, include this CDDL HEADER in each
|
||||
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
.\" If applicable, add the following below this CDDL HEADER, with the
|
||||
.\" fields enclosed by brackets "[]" replaced with your own identifying
|
||||
.\" information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
.\" CDDL HEADER END
|
||||
.\"
|
||||
.\"
|
||||
.\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved.
|
||||
.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||
.\" Copyright (c) 2012 Cyril Plisko. All Rights Reserved.
|
||||
.\" Copyright (c) 2017 Datto Inc.
|
||||
.\" Copyright (c) 2018 George Melikov. All Rights Reserved.
|
||||
.\" Copyright 2017 Nexenta Systems, Inc.
|
||||
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
.\"
|
||||
.Dd August 9, 2019
|
||||
.Dt ZFS-WAIT 8
|
||||
.Os Linux
|
||||
.Sh NAME
|
||||
.Nm zfs Ns Pf - Cm wait
|
||||
.Nd Wait for background activity to stop in a ZFS filesystem
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Cm wait
|
||||
.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns ...
|
||||
.Ar fs
|
||||
.Sh DESCRIPTION
|
||||
.Bl -tag -width Ds
|
||||
.It Xo
|
||||
.Nm
|
||||
.Cm wait
|
||||
.Op Fl t Ar activity Ns Oo , Ns Ar activity Ns Oc Ns ...
|
||||
.Ar fs
|
||||
.Xc
|
||||
Waits until all background activity of the given types has ceased in the given
|
||||
filesystem.
|
||||
The activity could cease because it has completed or because the filesystem has
|
||||
been destroyed or unmounted.
|
||||
If no activities are specified, the command waits until background activity of
|
||||
every type listed below has ceased.
|
||||
If there is no activity of the given types in progress, the command returns
|
||||
immediately.
|
||||
.Pp
|
||||
These are the possible values for
|
||||
.Ar activity ,
|
||||
along with what each one waits for:
|
||||
.Bd -literal
|
||||
deleteq The filesystem's internal delete queue to empty
|
||||
.Ed
|
||||
.Pp
|
||||
Note that the internal delete queue does not finish draining until
|
||||
all large files have had time to be fully destroyed and all open file
|
||||
handles to unlinked files are closed.
|
||||
.El
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr lsof 8
|
@ -281,6 +281,11 @@ Attaches a filesystem to a jail.
|
||||
.It Xr zfs-unjail 8
|
||||
Detaches a filesystem from a jail.
|
||||
.El
|
||||
.Ss Waiting
|
||||
.Bl -tag -width ""
|
||||
.It Xr zfs-wait 8
|
||||
Wait for background activity in a filesystem to complete.
|
||||
.El
|
||||
.Sh EXIT STATUS
|
||||
The
|
||||
.Nm
|
||||
|
@ -52,6 +52,8 @@
|
||||
#include <sys/zfs_fuid.h>
|
||||
#include <sys/sa.h>
|
||||
#include <sys/zfs_sa.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
|
||||
/*
|
||||
* zfs_match_find() is used by zfs_dirent_lock() to perform zap lookups
|
||||
@ -739,6 +741,8 @@ zfs_rmnode(znode_t *zp)
|
||||
zfs_unlinked_add(xzp, tx);
|
||||
}
|
||||
|
||||
mutex_enter(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
|
||||
|
||||
/*
|
||||
* Remove this znode from the unlinked set. If a has rollback has
|
||||
* occurred while a file is open and unlinked. Then when the file
|
||||
@ -749,6 +753,13 @@ zfs_rmnode(znode_t *zp)
|
||||
zp->z_id, tx);
|
||||
VERIFY(error == 0 || error == ENOENT);
|
||||
|
||||
uint64_t count;
|
||||
if (zap_count(os, zfsvfs->z_unlinkedobj, &count) == 0 && count == 0) {
|
||||
cv_broadcast(&os->os_dsl_dataset->ds_dir->dd_activity_cv);
|
||||
}
|
||||
|
||||
mutex_exit(&os->os_dsl_dataset->ds_dir->dd_activity_lock);
|
||||
|
||||
dataset_kstats_update_nunlinked_kstat(&zfsvfs->z_kstat, 1);
|
||||
|
||||
zfs_znode_delete(zp, tx);
|
||||
|
@ -55,6 +55,7 @@
|
||||
#include <sys/zfs_quota.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/spa_boot.h>
|
||||
#include <sys/objlist.h>
|
||||
#include <sys/zpl.h>
|
||||
@ -872,6 +873,8 @@ zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
|
||||
"num_entries in unlinked set: %llu",
|
||||
zs.zs_num_entries);
|
||||
zfs_unlinked_drain(zfsvfs);
|
||||
dsl_dir_t *dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
|
||||
dd->dd_activity_cancelled = B_FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1423,6 +1426,8 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
|
||||
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
|
||||
}
|
||||
dmu_objset_evict_dbufs(zfsvfs->z_os);
|
||||
dsl_dir_t *dd = os->os_dsl_dataset->ds_dir;
|
||||
dsl_dir_cancel_waiters(dd);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -1813,6 +1818,7 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
|
||||
if (err != 0)
|
||||
goto bail;
|
||||
|
||||
ds->ds_dir->dd_activity_cancelled = B_FALSE;
|
||||
VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
|
||||
|
||||
zfs_set_fuid_feature(zfsvfs);
|
||||
|
@ -3077,20 +3077,26 @@ dsl_dataset_rename_snapshot(const char *fsname,
|
||||
static int
|
||||
dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
|
||||
{
|
||||
boolean_t held;
|
||||
boolean_t held = B_FALSE;
|
||||
|
||||
if (!dmu_tx_is_syncing(tx))
|
||||
return (0);
|
||||
|
||||
if (owner != NULL) {
|
||||
VERIFY3P(ds->ds_owner, ==, owner);
|
||||
dsl_dataset_long_rele(ds, owner);
|
||||
}
|
||||
|
||||
held = dsl_dataset_long_held(ds);
|
||||
|
||||
if (owner != NULL)
|
||||
dsl_dataset_long_hold(ds, owner);
|
||||
dsl_dir_t *dd = ds->ds_dir;
|
||||
mutex_enter(&dd->dd_activity_lock);
|
||||
uint64_t holds = zfs_refcount_count(&ds->ds_longholds) -
|
||||
(owner != NULL ? 1 : 0);
|
||||
/*
|
||||
* The value of dd_activity_waiters can chance as soon as we drop the
|
||||
* lock, but we're fine with that; new waiters coming in or old
|
||||
* waiters leaving doesn't cause problems, since we're going to cancel
|
||||
* waiters later anyway. The goal of this check is to verify that no
|
||||
* non-waiters have long-holds, and all new long-holds will be
|
||||
* prevented because we're holding the pool config as writer.
|
||||
*/
|
||||
if (holds != dd->dd_activity_waiters)
|
||||
held = B_TRUE;
|
||||
mutex_exit(&dd->dd_activity_lock);
|
||||
|
||||
if (held)
|
||||
return (SET_ERROR(EBUSY));
|
||||
@ -4036,6 +4042,8 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
|
||||
DMU_MAX_ACCESS * spa_asize_inflation);
|
||||
ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
|
||||
|
||||
dsl_dir_cancel_waiters(origin_head->ds_dir);
|
||||
|
||||
/*
|
||||
* Swap per-dataset feature flags.
|
||||
*/
|
||||
|
@ -766,6 +766,8 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
|
||||
if (zfs_refcount_count(&ds->ds_longholds) != expected_holds)
|
||||
return (SET_ERROR(EBUSY));
|
||||
|
||||
ASSERT0(ds->ds_dir->dd_activity_waiters);
|
||||
|
||||
mos = ds->ds_dir->dd_pool->dp_meta_objset;
|
||||
|
||||
/*
|
||||
@ -1002,6 +1004,8 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
|
||||
/* We need to log before removing it from the namespace. */
|
||||
spa_history_log_internal_ds(ds, "destroy", tx, " ");
|
||||
|
||||
dsl_dir_cancel_waiters(ds->ds_dir);
|
||||
|
||||
rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
|
||||
DS_IS_DEFER_DESTROY(ds->ds_prev) &&
|
||||
dsl_dataset_phys(ds->ds_prev)->ds_num_children == 2 &&
|
||||
|
@ -51,6 +51,9 @@
|
||||
#include <sys/zthr.h>
|
||||
#include "zfs_namecheck.h"
|
||||
#include "zfs_prop.h"
|
||||
#ifdef _KERNEL
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Filesystem and Snapshot Limits
|
||||
@ -160,6 +163,8 @@ dsl_dir_evict_async(void *dbu)
|
||||
dsl_dir_livelist_close(dd);
|
||||
|
||||
dsl_prop_fini(dd);
|
||||
cv_destroy(&dd->dd_activity_cv);
|
||||
mutex_destroy(&dd->dd_activity_lock);
|
||||
mutex_destroy(&dd->dd_lock);
|
||||
kmem_free(dd, sizeof (dsl_dir_t));
|
||||
}
|
||||
@ -207,6 +212,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
}
|
||||
|
||||
mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&dd->dd_activity_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
cv_init(&dd->dd_activity_cv, NULL, CV_DEFAULT, NULL);
|
||||
dsl_prop_init(dd);
|
||||
|
||||
dsl_dir_snap_cmtime_update(dd);
|
||||
@ -280,6 +287,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
if (dsl_deadlist_is_open(&dd->dd_livelist))
|
||||
dsl_dir_livelist_close(dd);
|
||||
dsl_prop_fini(dd);
|
||||
cv_destroy(&dd->dd_activity_cv);
|
||||
mutex_destroy(&dd->dd_activity_lock);
|
||||
mutex_destroy(&dd->dd_lock);
|
||||
kmem_free(dd, sizeof (dsl_dir_t));
|
||||
dd = winner;
|
||||
@ -310,6 +319,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
if (dsl_deadlist_is_open(&dd->dd_livelist))
|
||||
dsl_dir_livelist_close(dd);
|
||||
dsl_prop_fini(dd);
|
||||
cv_destroy(&dd->dd_activity_cv);
|
||||
mutex_destroy(&dd->dd_activity_lock);
|
||||
mutex_destroy(&dd->dd_lock);
|
||||
kmem_free(dd, sizeof (dsl_dir_t));
|
||||
dmu_buf_rele(dbuf, tag);
|
||||
@ -2282,6 +2293,108 @@ dsl_dir_remove_livelist(dsl_dir_t *dd, dmu_tx_t *tx, boolean_t total)
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_dir_activity_in_progress(dsl_dir_t *dd, dsl_dataset_t *ds,
|
||||
zfs_wait_activity_t activity, boolean_t *in_progress)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
ASSERT(MUTEX_HELD(&dd->dd_activity_lock));
|
||||
|
||||
switch (activity) {
|
||||
case ZFS_WAIT_DELETEQ: {
|
||||
#ifdef _KERNEL
|
||||
objset_t *os;
|
||||
error = dmu_objset_from_ds(ds, &os);
|
||||
if (error != 0)
|
||||
break;
|
||||
|
||||
mutex_enter(&os->os_user_ptr_lock);
|
||||
void *user = dmu_objset_get_user(os);
|
||||
mutex_exit(&os->os_user_ptr_lock);
|
||||
if (dmu_objset_type(os) != DMU_OST_ZFS ||
|
||||
user == NULL || zfs_get_vfs_flag_unmounted(os)) {
|
||||
*in_progress = B_FALSE;
|
||||
return (0);
|
||||
}
|
||||
|
||||
uint64_t readonly = B_FALSE;
|
||||
error = zfs_get_temporary_prop(ds, ZFS_PROP_READONLY, &readonly,
|
||||
NULL);
|
||||
|
||||
if (error != 0)
|
||||
break;
|
||||
|
||||
if (readonly || !spa_writeable(dd->dd_pool->dp_spa)) {
|
||||
*in_progress = B_FALSE;
|
||||
return (0);
|
||||
}
|
||||
|
||||
uint64_t count, unlinked_obj;
|
||||
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
|
||||
&unlinked_obj);
|
||||
if (error != 0) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
break;
|
||||
}
|
||||
error = zap_count(os, unlinked_obj, &count);
|
||||
|
||||
if (error == 0)
|
||||
*in_progress = (count != 0);
|
||||
break;
|
||||
#else
|
||||
/*
|
||||
* The delete queue is ZPL specific, and libzpool doesn't have
|
||||
* it. It doesn't make sense to wait for it.
|
||||
*/
|
||||
*in_progress = B_FALSE;
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
default:
|
||||
panic("unrecognized value for activity %d", activity);
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_dir_wait(dsl_dir_t *dd, dsl_dataset_t *ds, zfs_wait_activity_t activity,
|
||||
boolean_t *waited)
|
||||
{
|
||||
int error = 0;
|
||||
boolean_t in_progress;
|
||||
dsl_pool_t *dp = dd->dd_pool;
|
||||
for (;;) {
|
||||
dsl_pool_config_enter(dp, FTAG);
|
||||
error = dsl_dir_activity_in_progress(dd, ds, activity,
|
||||
&in_progress);
|
||||
dsl_pool_config_exit(dp, FTAG);
|
||||
if (error != 0 || !in_progress)
|
||||
break;
|
||||
|
||||
*waited = B_TRUE;
|
||||
|
||||
if (cv_wait_sig(&dd->dd_activity_cv, &dd->dd_activity_lock) ==
|
||||
0 || dd->dd_activity_cancelled) {
|
||||
error = SET_ERROR(EINTR);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dir_cancel_waiters(dsl_dir_t *dd)
|
||||
{
|
||||
mutex_enter(&dd->dd_activity_lock);
|
||||
dd->dd_activity_cancelled = B_TRUE;
|
||||
cv_broadcast(&dd->dd_activity_cv);
|
||||
while (dd->dd_activity_waiters > 0)
|
||||
cv_wait(&dd->dd_activity_cv, &dd->dd_activity_lock);
|
||||
mutex_exit(&dd->dd_activity_lock);
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
EXPORT_SYMBOL(dsl_dir_set_quota);
|
||||
EXPORT_SYMBOL(dsl_dir_set_reservation);
|
||||
|
@ -4072,6 +4072,83 @@ zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* This ioctl waits for activity of a particular type to complete. If there is
|
||||
* no activity of that type in progress, it returns immediately, and the
|
||||
* returned value "waited" is false. If there is activity in progress, and no
|
||||
* tag is passed in, the ioctl blocks until all activity of that type is
|
||||
* complete, and then returns with "waited" set to true.
|
||||
*
|
||||
* If a thread waiting in the ioctl receives a signal, the call will return
|
||||
* immediately, and the return value will be EINTR.
|
||||
*
|
||||
* innvl: {
|
||||
* "wait_activity" -> int32_t
|
||||
* }
|
||||
*
|
||||
* outnvl: "waited" -> boolean_t
|
||||
*/
|
||||
static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
|
||||
{ZFS_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
|
||||
};
|
||||
|
||||
static int
|
||||
zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
{
|
||||
int32_t activity;
|
||||
boolean_t waited = B_FALSE;
|
||||
int error;
|
||||
dsl_pool_t *dp;
|
||||
dsl_dir_t *dd;
|
||||
dsl_dataset_t *ds;
|
||||
|
||||
if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
|
||||
return (error);
|
||||
|
||||
if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
|
||||
dsl_pool_rele(dp, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
dd = ds->ds_dir;
|
||||
mutex_enter(&dd->dd_activity_lock);
|
||||
dd->dd_activity_waiters++;
|
||||
|
||||
/*
|
||||
* We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
|
||||
* aren't evicted while we're waiting. Normally this is prevented by
|
||||
* holding the pool, but we can't do that while we're waiting since
|
||||
* that would prevent TXGs from syncing out. Some of the functionality
|
||||
* of long-holds (e.g. preventing deletion) is unnecessary for this
|
||||
* case, since we would cancel the waiters before proceeding with a
|
||||
* deletion. An alternative mechanism for keeping the dataset around
|
||||
* could be developed but this is simpler.
|
||||
*/
|
||||
dsl_dataset_long_hold(ds, FTAG);
|
||||
dsl_pool_rele(dp, FTAG);
|
||||
|
||||
error = dsl_dir_wait(dd, ds, activity, &waited);
|
||||
|
||||
dsl_dataset_long_rele(ds, FTAG);
|
||||
dd->dd_activity_waiters--;
|
||||
if (dd->dd_activity_waiters == 0)
|
||||
cv_signal(&dd->dd_activity_cv);
|
||||
mutex_exit(&dd->dd_activity_lock);
|
||||
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
|
||||
if (error == 0)
|
||||
fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* fsname is name of dataset to rollback (to most recent snapshot)
|
||||
*
|
||||
@ -6915,6 +6992,11 @@ zfs_ioctl_init(void)
|
||||
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
|
||||
zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
|
||||
|
||||
zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
|
||||
zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
|
||||
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
|
||||
zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
|
||||
|
||||
/* IOCTLS that use the legacy function signature */
|
||||
|
||||
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
|
||||
|
@ -288,6 +288,10 @@ tests = ['zfs_upgrade_001_pos', 'zfs_upgrade_002_pos', 'zfs_upgrade_003_pos',
|
||||
'zfs_upgrade_007_neg']
|
||||
tags = ['functional', 'cli_root', 'zfs_upgrade']
|
||||
|
||||
[tests/functional/cli_root/zfs_wait]
|
||||
tests = ['zfs_wait_deleteq']
|
||||
tags = ['functional', 'cli_root', 'zfs_wait']
|
||||
|
||||
[tests/functional/cli_root/zpool]
|
||||
tests = ['zpool_001_neg', 'zpool_002_pos', 'zpool_003_pos', 'zpool_colors']
|
||||
tags = ['functional', 'cli_root', 'zpool']
|
||||
|
@ -739,6 +739,18 @@ test_wait(const char *pool)
|
||||
nvlist_free(optional);
|
||||
}
|
||||
|
||||
static void
|
||||
test_wait_fs(const char *dataset)
|
||||
{
|
||||
nvlist_t *required = fnvlist_alloc();
|
||||
|
||||
fnvlist_add_int32(required, "wait_activity", 2);
|
||||
|
||||
IOC_INPUT_TEST(ZFS_IOC_WAIT_FS, dataset, required, NULL, EINVAL);
|
||||
|
||||
nvlist_free(required);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_ioc_input_tests(const char *pool)
|
||||
{
|
||||
@ -826,6 +838,7 @@ zfs_ioc_input_tests(const char *pool)
|
||||
test_vdev_trim(pool);
|
||||
|
||||
test_wait(pool);
|
||||
test_wait_fs(dataset);
|
||||
|
||||
/*
|
||||
* cleanup
|
||||
@ -980,6 +993,7 @@ validate_ioc_values(void)
|
||||
CHECK(ZFS_IOC_BASE + 81 == ZFS_IOC_REDACT);
|
||||
CHECK(ZFS_IOC_BASE + 82 == ZFS_IOC_GET_BOOKMARK_PROPS);
|
||||
CHECK(ZFS_IOC_BASE + 83 == ZFS_IOC_WAIT);
|
||||
CHECK(ZFS_IOC_BASE + 84 == ZFS_IOC_WAIT_FS);
|
||||
CHECK(ZFS_IOC_PLATFORM_BASE + 1 == ZFS_IOC_EVENTS_NEXT);
|
||||
CHECK(ZFS_IOC_PLATFORM_BASE + 2 == ZFS_IOC_EVENTS_CLEAR);
|
||||
CHECK(ZFS_IOC_PLATFORM_BASE + 3 == ZFS_IOC_EVENTS_SEEK);
|
||||
|
@ -32,6 +32,7 @@ SUBDIRS = \
|
||||
zfs_unmount \
|
||||
zfs_unshare \
|
||||
zfs_upgrade \
|
||||
zfs_wait \
|
||||
zpool \
|
||||
zpool_add \
|
||||
zpool_attach \
|
||||
|
@ -0,0 +1,8 @@
|
||||
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zfs_wait
|
||||
dist_pkgdata_SCRIPTS = \
|
||||
setup.ksh \
|
||||
cleanup.ksh \
|
||||
zfs_wait_deleteq.ksh
|
||||
|
||||
dist_pkgdata_DATA = \
|
||||
zfs_wait.kshlib
|
20
tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh
Executable file
20
tests/zfs-tests/tests/functional/cli_root/zfs_wait/cleanup.ksh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2018 by Delphix. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
|
||||
default_cleanup
|
21
tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh
Executable file
21
tests/zfs-tests/tests/functional/cli_root/zfs_wait/setup.ksh
Executable file
@ -0,0 +1,21 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2018 by Delphix. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
DISK=${DISKS%% *}
|
||||
|
||||
default_setup $DISK
|
@ -0,0 +1,80 @@
|
||||
#!/bin/ksh
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2018, 2019 by Delphix. All rights reserved.
|
||||
#
|
||||
|
||||
typeset -a disk_array=($(find_disks $DISKS))
|
||||
|
||||
typeset -r DISK1=${disk_array[0]}
|
||||
typeset -r DISK2=${disk_array[1]}
|
||||
typeset -r DISK3=${disk_array[2]}
|
||||
|
||||
#
|
||||
# When the condition it is waiting for becomes true, 'zfs wait' should return
|
||||
# promptly. We want to enforce this, but any check will be racey because it will
|
||||
# take some small but indeterminate amount of time for the waiting thread to be
|
||||
# woken up and for the process to exit.
|
||||
#
|
||||
# To deal with this, we provide a grace period after the condition becomes true
|
||||
# during which 'zfs wait' can exit. If it hasn't exited by the time the grace
|
||||
# period expires we assume something is wrong and fail the test. While there is
|
||||
# no value that can really be correct, the idea is we choose something large
|
||||
# enough that it shouldn't cause issues in practice.
|
||||
#
|
||||
typeset -r WAIT_EXIT_GRACE=2.0
|
||||
|
||||
function proc_exists # pid
|
||||
{
|
||||
ps -p $1 >/dev/null
|
||||
}
|
||||
|
||||
function proc_must_exist # pid
|
||||
{
|
||||
proc_exists $1 || log_fail "zpool process exited too soon"
|
||||
}
|
||||
|
||||
function proc_must_not_exist # pid
|
||||
{
|
||||
proc_exists $1 && log_fail "zpool process took too long to exit"
|
||||
}
|
||||
|
||||
function get_time
|
||||
{
|
||||
date +'%H:%M:%S'
|
||||
}
|
||||
|
||||
function kill_if_running
|
||||
{
|
||||
typeset pid=$1
|
||||
[[ $pid ]] && proc_exists $pid && log_must kill -s TERM $pid
|
||||
}
|
||||
|
||||
# Log a command and then start it running in the background
|
||||
function log_bkgrnd
|
||||
{
|
||||
log_note "$(get_time) Starting cmd in background '$@'"
|
||||
"$@" &
|
||||
}
|
||||
|
||||
# Check that a background process has completed and exited with a status of 0
|
||||
function bkgrnd_proc_succeeded
|
||||
{
|
||||
typeset pid=$1
|
||||
|
||||
log_must sleep $WAIT_EXIT_GRACE
|
||||
|
||||
proc_must_not_exist $pid
|
||||
wait $pid || log_fail "process exited with status $?"
|
||||
log_note "$(get_time) wait completed successfully"
|
||||
}
|
57
tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh
Executable file
57
tests/zfs-tests/tests/functional/cli_root/zfs_wait/zfs_wait_deleteq.ksh
Executable file
@ -0,0 +1,57 @@
|
||||
#!/bin/ksh -p
|
||||
#
|
||||
# This file and its contents are supplied under the terms of the
|
||||
# Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
# You may only use this file in accordance with the terms of version
|
||||
# 1.0 of the CDDL.
|
||||
#
|
||||
# A full copy of the text of the CDDL should have accompanied this
|
||||
# source. A copy of the CDDL is also available via the Internet at
|
||||
# http://www.illumos.org/license/CDDL.
|
||||
#
|
||||
|
||||
#
|
||||
# Copyright (c) 2018 by Delphix. All rights reserved.
|
||||
#
|
||||
|
||||
. $STF_SUITE/include/libtest.shlib
|
||||
. $STF_SUITE/tests/functional/cli_root/zfs_wait/zfs_wait.kshlib
|
||||
|
||||
#
|
||||
# DESCRIPTION:
|
||||
# 'zfs wait' works when waiting for checkpoint discard to complete.
|
||||
#
|
||||
# STRATEGY:
|
||||
# 1. Create a file
|
||||
# 2. Open a file descriptor pointing to that file.
|
||||
# 3. Delete the file.
|
||||
# 4. Start a background process waiting for the delete queue to empty.
|
||||
# 5. Verify that the command doesn't return immediately.
|
||||
# 6. Close the open file descriptor.
|
||||
# 7. Verify that the command returns soon after the descriptor is closed.
|
||||
#
|
||||
|
||||
function cleanup
|
||||
{
|
||||
kill_if_running $pid
|
||||
exec 3<&-
|
||||
}
|
||||
|
||||
|
||||
typeset -r TESTFILE="/$TESTPOOL/testfile"
|
||||
typeset pid
|
||||
|
||||
log_onexit cleanup
|
||||
|
||||
log_must touch $TESTFILE
|
||||
exec 3<> $TESTFILE
|
||||
log_must rm $TESTFILE
|
||||
log_bkgrnd zfs wait -t deleteq $TESTPOOL
|
||||
pid=$!
|
||||
proc_must_exist $pid
|
||||
|
||||
exec 3<&-
|
||||
log_must sleep 0.5
|
||||
bkgrnd_proc_succeeded $pid
|
||||
|
||||
log_pass "'zfs wait -t discard' works."
|
Loading…
Reference in New Issue
Block a user