MFV r247580:

Merge synctask code restructuring from vendor.

Modify forward and backward compatibility to support new change.

Illumos ZFS issues:
  3464 zfs synctask code needs restructuring

Sponsored by:	Hybrid Logic Ltd.
This commit is contained in:
mm 2013-03-19 12:51:18 +00:00
commit 7c87858955
77 changed files with 6565 additions and 5943 deletions

View File

@ -1692,7 +1692,9 @@ dump_dir(objset_t *os)
int print_header = 1;
int i, error;
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
dmu_objset_fast_stat(os, &dds);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (dds.dds_type < DMU_OST_NUMTYPES)
type = objset_types[dds.dds_type];
@ -2101,7 +2103,6 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
free(data);
if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
zcb->zcb_haderrors = 1;
zcb->zcb_errors[ioerr]++;

View File

@ -1618,7 +1618,11 @@ multiple snapshots.
Destroy (or mark for deferred deletion) all snapshots with this name in
descendent file systems.
.It Fl R
Recursively destroy all dependents.
Recursively destroy all clones of these snapshots, including the clones,
snapshots, and children.
If this flag is specified, the
.Op fl d
flag will have no effect.
.It Fl n
Do a dry-run ("No-op") deletion. No data will be deleted. This is useful in
conjunction with the

View File

@ -916,6 +916,7 @@ typedef struct destroy_cbdata {
boolean_t cb_parsable;
boolean_t cb_dryrun;
nvlist_t *cb_nvl;
nvlist_t *cb_batchedsnaps;
/* first snap in contiguous run */
char *cb_firstsnap;
@ -1012,9 +1013,27 @@ destroy_callback(zfs_handle_t *zhp, void *data)
zfs_close(zhp);
return (0);
}
if (cb->cb_dryrun) {
zfs_close(zhp);
return (0);
}
if (!cb->cb_dryrun) {
if (zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
/*
* We batch up all contiguous snapshots (even of different
* filesystems) and destroy them with one ioctl. We can't
* simply do all snap deletions and then all fs deletions,
* because we must delete a clone before its origin.
*/
if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT) {
fnvlist_add_boolean(cb->cb_batchedsnaps, name);
} else {
int error = zfs_destroy_snaps_nvl(g_zfs,
cb->cb_batchedsnaps, B_FALSE);
fnvlist_free(cb->cb_batchedsnaps);
cb->cb_batchedsnaps = fnvlist_alloc();
if (error != 0 ||
zfs_unmount(zhp, NULL, cb->cb_force ? MS_FORCE : 0) != 0 ||
zfs_destroy(zhp, cb->cb_defer_destroy) != 0) {
zfs_close(zhp);
return (-1);
@ -1170,8 +1189,10 @@ static int
zfs_do_destroy(int argc, char **argv)
{
destroy_cbdata_t cb = { 0 };
int rv = 0;
int err = 0;
int c;
zfs_handle_t *zhp;
zfs_handle_t *zhp = NULL;
char *at;
zfs_type_t type = ZFS_TYPE_DATASET;
@ -1225,11 +1246,9 @@ zfs_do_destroy(int argc, char **argv)
at = strchr(argv[0], '@');
if (at != NULL) {
int err = 0;
/* Build the list of snaps to destroy in cb_nvl. */
if (nvlist_alloc(&cb.cb_nvl, NV_UNIQUE_NAME, 0) != 0)
nomem();
cb.cb_nvl = fnvlist_alloc();
*at = '\0';
zhp = zfs_open(g_zfs, argv[0],
@ -1240,17 +1259,15 @@ zfs_do_destroy(int argc, char **argv)
cb.cb_snapspec = at + 1;
if (gather_snapshots(zfs_handle_dup(zhp), &cb) != 0 ||
cb.cb_error) {
zfs_close(zhp);
nvlist_free(cb.cb_nvl);
return (1);
rv = 1;
goto out;
}
if (nvlist_empty(cb.cb_nvl)) {
(void) fprintf(stderr, gettext("could not find any "
"snapshots to destroy; check snapshot names.\n"));
zfs_close(zhp);
nvlist_free(cb.cb_nvl);
return (1);
rv = 1;
goto out;
}
if (cb.cb_verbose) {
@ -1269,18 +1286,26 @@ zfs_do_destroy(int argc, char **argv)
}
if (!cb.cb_dryrun) {
if (cb.cb_doclones)
if (cb.cb_doclones) {
cb.cb_batchedsnaps = fnvlist_alloc();
err = destroy_clones(&cb);
if (err == 0) {
err = zfs_destroy_snaps_nvl(g_zfs,
cb.cb_batchedsnaps, B_FALSE);
}
if (err != 0) {
rv = 1;
goto out;
}
}
if (err == 0) {
err = zfs_destroy_snaps_nvl(zhp, cb.cb_nvl,
err = zfs_destroy_snaps_nvl(g_zfs, cb.cb_nvl,
cb.cb_defer_destroy);
}
}
zfs_close(zhp);
nvlist_free(cb.cb_nvl);
if (err != 0)
return (1);
rv = 1;
} else {
/* Open the given dataset */
if ((zhp = zfs_open(g_zfs, argv[0], type)) == NULL)
@ -1301,8 +1326,8 @@ zfs_do_destroy(int argc, char **argv)
zfs_get_name(zhp));
(void) fprintf(stderr, gettext("use 'zpool destroy %s' "
"to destroy the pool itself\n"), zfs_get_name(zhp));
zfs_close(zhp);
return (1);
rv = 1;
goto out;
}
/*
@ -1312,30 +1337,42 @@ zfs_do_destroy(int argc, char **argv)
if (!cb.cb_doclones &&
zfs_iter_dependents(zhp, B_TRUE, destroy_check_dependent,
&cb) != 0) {
zfs_close(zhp);
return (1);
rv = 1;
goto out;
}
if (cb.cb_error) {
zfs_close(zhp);
return (1);
rv = 1;
goto out;
}
cb.cb_batchedsnaps = fnvlist_alloc();
if (zfs_iter_dependents(zhp, B_FALSE, destroy_callback,
&cb) != 0) {
zfs_close(zhp);
return (1);
rv = 1;
goto out;
}
/*
* Do the real thing. The callback will close the
* handle regardless of whether it succeeds or not.
*/
if (destroy_callback(zhp, &cb) != 0)
return (1);
err = destroy_callback(zhp, &cb);
zhp = NULL;
if (err == 0) {
err = zfs_destroy_snaps_nvl(g_zfs,
cb.cb_batchedsnaps, cb.cb_defer_destroy);
}
if (err != 0)
rv = 1;
}
return (0);
out:
fnvlist_free(cb.cb_batchedsnaps);
fnvlist_free(cb.cb_nvl);
if (zhp != NULL)
zfs_close(zhp);
return (rv);
}
static boolean_t
@ -5121,28 +5158,12 @@ cleanup2:
return (error);
}
/*
* zfs allow [-r] [-t] <tag> <snap> ...
*
* -r Recursively hold
* -t Temporary hold (hidden option)
*
* Apply a user-hold with the given tag to the list of snapshots.
*/
static int
zfs_do_allow(int argc, char **argv)
{
return (zfs_do_allow_unallow_impl(argc, argv, B_FALSE));
}
/*
* zfs unallow [-r] [-t] <tag> <snap> ...
*
* -r Recursively hold
* -t Temporary hold (hidden option)
*
* Apply a user-hold with the given tag to the list of snapshots.
*/
static int
zfs_do_unallow(int argc, char **argv)
{
@ -5156,7 +5177,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
int i;
const char *tag;
boolean_t recursive = B_FALSE;
boolean_t temphold = B_FALSE;
const char *opts = holding ? "rt" : "r";
int c;
@ -5166,9 +5186,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
case 'r':
recursive = B_TRUE;
break;
case 't':
temphold = B_TRUE;
break;
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
@ -5217,7 +5234,7 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
}
if (holding) {
if (zfs_hold(zhp, delim+1, tag, recursive,
temphold, B_FALSE, -1, 0, 0) != 0)
B_FALSE, -1) != 0)
++errors;
} else {
if (zfs_release(zhp, delim+1, tag, recursive) != 0)
@ -5233,7 +5250,6 @@ zfs_do_hold_rele_impl(int argc, char **argv, boolean_t holding)
* zfs hold [-r] [-t] <tag> <snap> ...
*
* -r Recursively hold
* -t Temporary hold (hidden option)
*
* Apply a user-hold with the given tag to the list of snapshots.
*/

View File

@ -46,6 +46,7 @@
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/zfeature.h>
#include <sys/dmu_tx.h>
#undef ZFS_MAXNAMELEN
#undef verify
#include <libzfs.h>
@ -273,10 +274,10 @@ zhack_do_feature_stat(int argc, char **argv)
}
static void
feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx)
feature_enable_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
zfeature_info_t *feature = arg;
spa_feature_enable(spa, feature, tx);
spa_history_log_internal(spa, "zhack enable feature", tx,
@ -344,8 +345,8 @@ zhack_do_feature_enable(int argc, char **argv)
if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
fatal("feature already enabled: %s", feature.fi_guid);
VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
feature_enable_sync, spa, &feature, 5));
VERIFY0(dsl_sync_task(spa_name(spa), NULL,
feature_enable_sync, &feature, 5));
spa_close(spa, FTAG);
@ -353,10 +354,10 @@ zhack_do_feature_enable(int argc, char **argv)
}
static void
feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
feature_incr_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
zfeature_info_t *feature = arg;
spa_feature_incr(spa, feature, tx);
spa_history_log_internal(spa, "zhack feature incr", tx,
@ -364,10 +365,10 @@ feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
static void
feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
feature_decr_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
zfeature_info_t *feature = arg;
spa_feature_decr(spa, feature, tx);
spa_history_log_internal(spa, "zhack feature decr", tx,
@ -442,8 +443,8 @@ zhack_do_feature_ref(int argc, char **argv)
if (decr && !spa_feature_is_active(spa, &feature))
fatal("feature refcount already 0: %s", feature.fi_guid);
VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
decr ? feature_decr_sync : feature_incr_sync, spa, &feature, 5));
VERIFY0(dsl_sync_task(spa_name(spa), NULL,
decr ? feature_decr_sync : feature_incr_sync, &feature, 5));
spa_close(spa, FTAG);
}

View File

@ -104,10 +104,12 @@
#include <sys/metaslab_impl.h>
#include <sys/dsl_prop.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_scan.h>
#include <sys/zio_checksum.h>
#include <sys/refcount.h>
#include <sys/zfeature.h>
#include <sys/dsl_userhold.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
@ -367,7 +369,7 @@ ztest_info_t ztest_info[] = {
{ ztest_scrub, 1, &zopt_rarely },
{ ztest_spa_upgrade, 1, &zopt_rarely },
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
{ ztest_vdev_attach_detach, 1, &zopt_rarely },
{ ztest_vdev_attach_detach, 1, &zopt_sometimes },
{ ztest_vdev_LUN_growth, 1, &zopt_rarely },
{ ztest_vdev_add_remove, 1,
&ztest_opts.zo_vdevtime },
@ -1008,9 +1010,8 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
uint64_t curval;
int error;
error = dsl_prop_set(osname, propname,
(inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL),
sizeof (value), 1, &value);
error = dsl_prop_set_int(osname, propname,
(inherit ? ZPROP_SRC_NONE : ZPROP_SRC_LOCAL), value);
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@ -1018,8 +1019,7 @@ ztest_dsl_prop_set_uint64(char *osname, zfs_prop_t prop, uint64_t value,
}
ASSERT0(error);
VERIFY3U(dsl_prop_get(osname, propname, sizeof (curval),
1, &curval, setpoint), ==, 0);
VERIFY0(dsl_prop_get_integer(osname, propname, &curval, setpoint));
if (ztest_opts.zo_verbose >= 6) {
VERIFY(zfs_prop_index_to_string(prop, curval, &valname) == 0);
@ -2481,8 +2481,7 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
int error;
VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
leaves =
MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
@ -3182,7 +3181,7 @@ ztest_objset_destroy_cb(const char *name, void *arg)
/*
* Verify that the dataset contains a directory object.
*/
VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os));
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_TRUE, FTAG, &os));
error = dmu_object_info(os, ZTEST_DIROBJ, &doi);
if (error != ENOENT) {
/* We could have crashed in the middle of destroying it */
@ -3190,12 +3189,16 @@ ztest_objset_destroy_cb(const char *name, void *arg)
ASSERT3U(doi.doi_type, ==, DMU_OT_ZAP_OTHER);
ASSERT3S(doi.doi_physical_blocks_512, >=, 0);
}
dmu_objset_rele(os, FTAG);
dmu_objset_disown(os, FTAG);
/*
* Destroy the dataset.
*/
VERIFY3U(0, ==, dmu_objset_destroy(name, B_FALSE));
if (strchr(name, '@') != NULL) {
VERIFY0(dsl_destroy_snapshot(name, B_FALSE));
} else {
VERIFY0(dsl_destroy_head(name));
}
return (0);
}
@ -3205,16 +3208,17 @@ ztest_snapshot_create(char *osname, uint64_t id)
char snapname[MAXNAMELEN];
int error;
(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
(u_longlong_t)id);
(void) snprintf(snapname, sizeof (snapname), "%llu", (u_longlong_t)id);
error = dmu_objset_snapshot_one(osname, strchr(snapname, '@') + 1);
error = dmu_objset_snapshot_one(osname, snapname);
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
return (B_FALSE);
}
if (error != 0 && error != EEXIST)
fatal(0, "ztest_snapshot_create(%s) = %d", snapname, error);
if (error != 0 && error != EEXIST) {
fatal(0, "ztest_snapshot_create(%s@%s) = %d", osname,
snapname, error);
}
return (B_TRUE);
}
@ -3227,7 +3231,7 @@ ztest_snapshot_destroy(char *osname, uint64_t id)
(void) snprintf(snapname, MAXNAMELEN, "%s@%llu", osname,
(u_longlong_t)id);
error = dmu_objset_destroy(snapname, B_FALSE);
error = dsl_destroy_snapshot(snapname, B_FALSE);
if (error != 0 && error != ENOENT)
fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error);
return (B_TRUE);
@ -3273,7 +3277,8 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
/*
* Verify that the destroyed dataset is no longer in the namespace.
*/
VERIFY3U(ENOENT, ==, dmu_objset_hold(name, FTAG, &os));
VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
FTAG, &os));
/*
* Verify that we can create a new dataset.
@ -3288,8 +3293,7 @@ ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", name, error);
}
VERIFY3U(0, ==,
dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
ztest_zd_init(&zdtmp, NULL, os);
@ -3365,21 +3369,21 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
(void) snprintf(clone2name, MAXNAMELEN, "%s/c2_%llu", osname, id);
(void) snprintf(snap3name, MAXNAMELEN, "%s@s3_%llu", clone1name, id);
error = dmu_objset_destroy(clone2name, B_FALSE);
error = dsl_destroy_head(clone2name);
if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", clone2name, error);
error = dmu_objset_destroy(snap3name, B_FALSE);
fatal(0, "dsl_destroy_head(%s) = %d", clone2name, error);
error = dsl_destroy_snapshot(snap3name, B_FALSE);
if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", snap3name, error);
error = dmu_objset_destroy(snap2name, B_FALSE);
fatal(0, "dsl_destroy_snapshot(%s) = %d", snap3name, error);
error = dsl_destroy_snapshot(snap2name, B_FALSE);
if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", snap2name, error);
error = dmu_objset_destroy(clone1name, B_FALSE);
fatal(0, "dsl_destroy_snapshot(%s) = %d", snap2name, error);
error = dsl_destroy_head(clone1name);
if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", clone1name, error);
error = dmu_objset_destroy(snap1name, B_FALSE);
fatal(0, "dsl_destroy_head(%s) = %d", clone1name, error);
error = dsl_destroy_snapshot(snap1name, B_FALSE);
if (error && error != ENOENT)
fatal(0, "dmu_objset_destroy(%s) = %d", snap1name, error);
fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error);
}
/*
@ -3388,8 +3392,7 @@ ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
void
ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
{
objset_t *clone;
dsl_dataset_t *ds;
objset_t *os;
char snap1name[MAXNAMELEN];
char clone1name[MAXNAMELEN];
char snap2name[MAXNAMELEN];
@ -3417,12 +3420,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_take_snapshot(%s) = %d", snap1name, error);
}
error = dmu_objset_hold(snap1name, FTAG, &clone);
if (error)
fatal(0, "dmu_open_snapshot(%s) = %d", snap1name, error);
error = dmu_objset_clone(clone1name, dmu_objset_ds(clone), 0);
dmu_objset_rele(clone, FTAG);
error = dmu_objset_clone(clone1name, snap1name);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@ -3449,12 +3447,7 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
}
error = dmu_objset_hold(snap3name, FTAG, &clone);
if (error)
fatal(0, "dmu_open_snapshot(%s) = %d", snap3name, error);
error = dmu_objset_clone(clone2name, dmu_objset_ds(clone), 0);
dmu_objset_rele(clone, FTAG);
error = dmu_objset_clone(clone2name, snap3name);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc(FTAG);
@ -3463,14 +3456,14 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
}
error = dsl_dataset_own(snap2name, B_FALSE, FTAG, &ds);
error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os);
if (error)
fatal(0, "dsl_dataset_own(%s) = %d", snap2name, error);
fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
error = dsl_dataset_promote(clone2name, NULL);
if (error != EBUSY)
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
error);
dsl_dataset_disown(ds, FTAG);
dmu_objset_disown(os, FTAG);
out:
ztest_dsl_dataset_cleanup(osname, id);
@ -4282,7 +4275,7 @@ ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
}
count = -1ULL;
VERIFY(zap_count(os, object, &count) == 0);
VERIFY0(zap_count(os, object, &count));
ASSERT(count != -1ULL);
/*
@ -4593,6 +4586,22 @@ ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
(void) rw_unlock(&ztest_name_lock);
}
static int
user_release_one(const char *snapname, const char *holdname)
{
nvlist_t *snaps, *holds;
int error;
snaps = fnvlist_alloc();
holds = fnvlist_alloc();
fnvlist_add_boolean(holds, holdname);
fnvlist_add_nvlist(snaps, snapname, holds);
fnvlist_free(holds);
error = dsl_dataset_user_release(snaps, NULL);
fnvlist_free(snaps);
return (error);
}
/*
* Test snapshot hold/release and deferred destroy.
*/
@ -4607,22 +4616,30 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
char clonename[100];
char tag[100];
char osname[MAXNAMELEN];
nvlist_t *holds;
(void) rw_rdlock(&ztest_name_lock);
dmu_objset_name(os, osname);
(void) snprintf(snapname, 100, "sh1_%llu", id);
(void) snprintf(fullname, 100, "%s@%s", osname, snapname);
(void) snprintf(clonename, 100, "%s/ch1_%llu", osname, id);
(void) snprintf(tag, 100, "%tag_%llu", id);
(void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id);
(void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname);
(void) snprintf(clonename, sizeof (clonename),
"%s/ch1_%llu", osname, id);
(void) snprintf(tag, sizeof (tag), "tag_%llu", id);
/*
* Clean up from any previous run.
*/
(void) dmu_objset_destroy(clonename, B_FALSE);
(void) dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
(void) dmu_objset_destroy(fullname, B_FALSE);
error = dsl_destroy_head(clonename);
if (error != ENOENT)
ASSERT0(error);
error = user_release_one(fullname, tag);
if (error != ESRCH && error != ENOENT)
ASSERT0(error);
error = dsl_destroy_snapshot(fullname, B_FALSE);
if (error != ENOENT)
ASSERT0(error);
/*
* Create snapshot, clone it, mark snap for deferred destroy,
@ -4637,12 +4654,7 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
}
error = dmu_objset_hold(fullname, FTAG, &origin);
if (error)
fatal(0, "dmu_objset_hold(%s) = %d", fullname, error);
error = dmu_objset_clone(clonename, dmu_objset_ds(origin), 0);
dmu_objset_rele(origin, FTAG);
error = dmu_objset_clone(clonename, fullname);
if (error) {
if (error == ENOSPC) {
ztest_record_enospc("dmu_objset_clone");
@ -4651,15 +4663,15 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_clone(%s) = %d", clonename, error);
}
error = dmu_objset_destroy(fullname, B_TRUE);
error = dsl_destroy_snapshot(fullname, B_TRUE);
if (error) {
fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
fullname, error);
}
error = dmu_objset_destroy(clonename, B_FALSE);
error = dsl_destroy_head(clonename);
if (error)
fatal(0, "dmu_objset_destroy(%s) = %d", clonename, error);
fatal(0, "dsl_destroy_head(%s) = %d", clonename, error);
error = dmu_objset_hold(fullname, FTAG, &origin);
if (error != ENOENT)
@ -4679,28 +4691,31 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
fatal(0, "dmu_objset_snapshot(%s) = %d", fullname, error);
}
error = dsl_dataset_user_hold(osname, snapname, tag, B_FALSE,
B_TRUE, -1);
holds = fnvlist_alloc();
fnvlist_add_string(holds, fullname, tag);
error = dsl_dataset_user_hold(holds, 0, NULL);
fnvlist_free(holds);
if (error)
fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
error = dmu_objset_destroy(fullname, B_FALSE);
error = dsl_destroy_snapshot(fullname, B_FALSE);
if (error != EBUSY) {
fatal(0, "dmu_objset_destroy(%s, B_FALSE) = %d",
fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d",
fullname, error);
}
error = dmu_objset_destroy(fullname, B_TRUE);
error = dsl_destroy_snapshot(fullname, B_TRUE);
if (error) {
fatal(0, "dmu_objset_destroy(%s, B_TRUE) = %d",
fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
fullname, error);
}
error = dsl_dataset_user_release(osname, snapname, tag, B_FALSE);
error = user_release_one(fullname, tag);
if (error)
fatal(0, "dsl_dataset_user_release(%s)", fullname, tag);
fatal(0, "user_release_one(%s)", fullname, tag);
VERIFY(dmu_objset_hold(fullname, FTAG, &origin) == ENOENT);
VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
out:
(void) rw_unlock(&ztest_name_lock);
@ -4954,8 +4969,12 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
*/
for (int i = 0; i < copies; i++) {
uint64_t offset = i * blocksize;
VERIFY0(dmu_buf_hold(os, object, offset, FTAG, &db,
DMU_READ_NO_PREFETCH));
int error = dmu_buf_hold(os, object, offset, FTAG, &db,
DMU_READ_NO_PREFETCH);
if (error != 0) {
fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u",
os, (long long)object, (long long) offset, error);
}
ASSERT(db->db_offset == offset);
ASSERT(db->db_size == blocksize);
ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
@ -5166,6 +5185,7 @@ ztest_spa_import_export(char *oldname, char *newname)
nvlist_t *config, *newconfig;
uint64_t pool_guid;
spa_t *spa;
int error;
if (ztest_opts.zo_verbose >= 4) {
(void) printf("import/export: old = %s, new = %s\n",
@ -5210,7 +5230,12 @@ ztest_spa_import_export(char *oldname, char *newname)
/*
* Import it under the new name.
*/
VERIFY3U(0, ==, spa_import(newname, config, NULL, 0));
error = spa_import(newname, config, NULL, 0);
if (error != 0) {
dump_nvlist(config, 0);
fatal(B_FALSE, "couldn't import pool %s as %s: error %u",
oldname, newname, error);
}
ztest_walk_pool_directory("pools after import");
@ -5417,7 +5442,7 @@ ztest_dataset_open(int d)
}
ASSERT(error == 0 || error == EEXIST);
VERIFY0(dmu_objset_hold(name, zd, &os));
VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os));
(void) rw_unlock(&ztest_name_lock);
ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
@ -5458,7 +5483,7 @@ ztest_dataset_close(int d)
ztest_ds_t *zd = &ztest_ds[d];
zil_close(zd->zd_zilog);
dmu_objset_rele(zd->zd_os, zd);
dmu_objset_disown(zd->zd_os, zd);
ztest_zd_fini(zd);
}
@ -5502,13 +5527,14 @@ ztest_run(ztest_shared_t *zs)
* Open our pool.
*/
kernel_init(FREAD | FWRITE);
VERIFY(spa_open(ztest_opts.zo_pool, &spa, FTAG) == 0);
VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
spa->spa_debug = B_TRUE;
ztest_spa = spa;
VERIFY3U(0, ==, dmu_objset_hold(ztest_opts.zo_pool, FTAG, &os));
VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
DMU_OST_ANY, B_TRUE, FTAG, &os));
zs->zs_guid = dmu_objset_fsid_guid(os);
dmu_objset_rele(os, FTAG);
dmu_objset_disown(os, FTAG);
spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;

View File

@ -554,7 +554,7 @@ extern int zfs_create(libzfs_handle_t *, const char *, zfs_type_t,
extern int zfs_create_ancestors(libzfs_handle_t *, const char *);
extern int zfs_destroy(zfs_handle_t *, boolean_t);
extern int zfs_destroy_snaps(zfs_handle_t *, char *, boolean_t);
extern int zfs_destroy_snaps_nvl(zfs_handle_t *, nvlist_t *, boolean_t);
extern int zfs_destroy_snaps_nvl(libzfs_handle_t *, nvlist_t *, boolean_t);
extern int zfs_clone(zfs_handle_t *, const char *, nvlist_t *);
extern int zfs_snapshot(libzfs_handle_t *, const char *, boolean_t, nvlist_t *);
extern int zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps,
@ -610,8 +610,8 @@ extern int zfs_send(zfs_handle_t *, const char *, const char *,
sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
extern int zfs_promote(zfs_handle_t *);
extern int zfs_hold(zfs_handle_t *, const char *, const char *, boolean_t,
boolean_t, boolean_t, int, uint64_t, uint64_t);
extern int zfs_hold(zfs_handle_t *, const char *, const char *,
boolean_t, boolean_t, int);
extern int zfs_release(zfs_handle_t *, const char *, const char *, boolean_t);
extern int zfs_get_holds(zfs_handle_t *, nvlist_t **);
extern uint64_t zvol_volsize_to_reservation(uint64_t, nvlist_t *);

View File

@ -2016,10 +2016,7 @@ get_clones_cb(zfs_handle_t *zhp, void *arg)
NULL, NULL, 0, B_TRUE) != 0)
goto out;
if (strcmp(gca->buf, gca->origin) == 0) {
if (nvlist_add_boolean(gca->value, zfs_get_name(zhp)) != 0) {
zfs_close(zhp);
return (no_memory(zhp->zfs_hdl));
}
fnvlist_add_boolean(gca->value, zfs_get_name(zhp));
gca->numclones--;
}
@ -3197,45 +3194,49 @@ zfs_destroy_snaps(zfs_handle_t *zhp, char *snapname, boolean_t defer)
dgettext(TEXT_DOMAIN, "cannot destroy '%s@%s'"),
zhp->zfs_name, snapname);
} else {
ret = zfs_destroy_snaps_nvl(zhp, dd.nvl, defer);
ret = zfs_destroy_snaps_nvl(zhp->zfs_hdl, dd.nvl, defer);
}
nvlist_free(dd.nvl);
return (ret);
}
/*
* Destroys all the snapshots named in the nvlist. They must be underneath
* the zhp (either snapshots of it, or snapshots of its descendants).
* Destroys all the snapshots named in the nvlist.
*/
int
zfs_destroy_snaps_nvl(zfs_handle_t *zhp, nvlist_t *snaps, boolean_t defer)
zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
{
int ret;
nvlist_t *errlist;
ret = lzc_destroy_snaps(snaps, defer, &errlist);
if (ret != 0) {
for (nvpair_t *pair = nvlist_next_nvpair(errlist, NULL);
pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
nvpair_name(pair));
if (ret == 0)
return (0);
switch (fnvpair_value_int32(pair)) {
case EEXIST:
zfs_error_aux(zhp->zfs_hdl,
dgettext(TEXT_DOMAIN,
"snapshot is cloned"));
ret = zfs_error(zhp->zfs_hdl, EZFS_EXISTS,
errbuf);
break;
default:
ret = zfs_standard_error(zhp->zfs_hdl, errno,
errbuf);
break;
}
if (nvlist_next_nvpair(errlist, NULL) == NULL) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
ret = zfs_standard_error(hdl, ret, errbuf);
}
for (nvpair_t *pair = nvlist_next_nvpair(errlist, NULL);
pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
nvpair_name(pair));
switch (fnvpair_value_int32(pair)) {
case EEXIST:
zfs_error_aux(hdl,
dgettext(TEXT_DOMAIN, "snapshot is cloned"));
ret = zfs_error(hdl, EZFS_EXISTS, errbuf);
break;
default:
ret = zfs_standard_error(hdl, errno, errbuf);
break;
}
}
@ -4134,7 +4135,7 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
zc.zc_nvlist_dst_size = sizeof (buf);
if (zfs_ioctl(hdl, ZFS_IOC_USERSPACE_MANY, &zc) != 0) {
char errbuf[ZFS_MAXNAMELEN + 32];
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
@ -4156,37 +4157,83 @@ zfs_userspace(zfs_handle_t *zhp, zfs_userquota_prop_t type,
return (0);
}
struct holdarg {
nvlist_t *nvl;
const char *snapname;
const char *tag;
boolean_t recursive;
};
static int
zfs_hold_one(zfs_handle_t *zhp, void *arg)
{
struct holdarg *ha = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, ha->snapname);
szhp = make_dataset_handle(zhp->zfs_hdl, name);
if (szhp) {
fnvlist_add_string(ha->nvl, name, ha->tag);
zfs_close(szhp);
}
if (ha->recursive)
rv = zfs_iter_filesystems(zhp, zfs_hold_one, ha);
zfs_close(zhp);
return (rv);
}
int
zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
boolean_t recursive, boolean_t temphold, boolean_t enoent_ok,
int cleanup_fd, uint64_t dsobj, uint64_t createtxg)
boolean_t recursive, boolean_t enoent_ok, int cleanup_fd)
{
zfs_cmd_t zc = { 0 };
int ret;
struct holdarg ha;
nvlist_t *errors;
libzfs_handle_t *hdl = zhp->zfs_hdl;
char errbuf[1024];
nvpair_t *elem;
ASSERT(!recursive || dsobj == 0);
ha.nvl = fnvlist_alloc();
ha.snapname = snapname;
ha.tag = tag;
ha.recursive = recursive;
(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
ret = lzc_hold(ha.nvl, cleanup_fd, &errors);
fnvlist_free(ha.nvl);
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
>= sizeof (zc.zc_string))
return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
zc.zc_cookie = recursive;
zc.zc_temphold = temphold;
zc.zc_cleanup_fd = cleanup_fd;
zc.zc_sendobj = dsobj;
zc.zc_createtxg = createtxg;
if (ret == 0)
return (0);
if (zfs_ioctl(hdl, ZFS_IOC_HOLD, &zc) != 0) {
char errbuf[ZFS_MAXNAMELEN+32];
if (nvlist_next_nvpair(errors, NULL) == NULL) {
/* no hold-specific errors */
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot hold"));
switch (ret) {
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
break;
case EINVAL:
(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
break;
default:
(void) zfs_standard_error(hdl, ret, errbuf);
}
}
/*
* if it was recursive, the one that actually failed will be in
* zc.zc_name.
*/
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot hold '%s@%s'"), zc.zc_name, snapname);
switch (errno) {
for (elem = nvlist_next_nvpair(errors, NULL);
elem != NULL;
elem = nvlist_next_nvpair(errors, elem)) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
"cannot hold snapshot '%s'"), nvpair_name(elem));
switch (fnvpair_value_int32(elem)) {
case E2BIG:
/*
* Temporary tags wind up having the ds object id
@ -4194,66 +4241,122 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
* above, it's still possible for the tag to wind
* up being slightly too long.
*/
return (zfs_error(hdl, EZFS_TAGTOOLONG, errbuf));
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
(void) zfs_error(hdl, EZFS_TAGTOOLONG, errbuf);
break;
case EINVAL:
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
break;
case EEXIST:
return (zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf));
(void) zfs_error(hdl, EZFS_REFTAG_HOLD, errbuf);
break;
case ENOENT:
if (enoent_ok)
return (ENOENT);
/* FALLTHROUGH */
default:
return (zfs_standard_error_fmt(hdl, errno, errbuf));
(void) zfs_standard_error(hdl,
fnvpair_value_int32(elem), errbuf);
}
}
return (0);
fnvlist_free(errors);
return (ret);
}
struct releasearg {
nvlist_t *nvl;
const char *snapname;
const char *tag;
boolean_t recursive;
};
static int
zfs_release_one(zfs_handle_t *zhp, void *arg)
{
struct holdarg *ha = arg;
zfs_handle_t *szhp;
char name[ZFS_MAXNAMELEN];
int rv = 0;
(void) snprintf(name, sizeof (name),
"%s@%s", zhp->zfs_name, ha->snapname);
szhp = make_dataset_handle(zhp->zfs_hdl, name);
if (szhp) {
nvlist_t *holds = fnvlist_alloc();
fnvlist_add_boolean(holds, ha->tag);
fnvlist_add_nvlist(ha->nvl, name, holds);
zfs_close(szhp);
}
if (ha->recursive)
rv = zfs_iter_filesystems(zhp, zfs_release_one, ha);
zfs_close(zhp);
return (rv);
}
int
zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
boolean_t recursive)
{
zfs_cmd_t zc = { 0 };
int ret;
struct holdarg ha;
nvlist_t *errors;
nvpair_t *elem;
libzfs_handle_t *hdl = zhp->zfs_hdl;
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
if (strlcpy(zc.zc_string, tag, sizeof (zc.zc_string))
>= sizeof (zc.zc_string))
return (zfs_error(hdl, EZFS_TAGTOOLONG, tag));
zc.zc_cookie = recursive;
ha.nvl = fnvlist_alloc();
ha.snapname = snapname;
ha.tag = tag;
ha.recursive = recursive;
(void) zfs_release_one(zfs_handle_dup(zhp), &ha);
ret = lzc_release(ha.nvl, &errors);
fnvlist_free(ha.nvl);
if (zfs_ioctl(hdl, ZFS_IOC_RELEASE, &zc) != 0) {
char errbuf[ZFS_MAXNAMELEN+32];
if (ret == 0)
return (0);
if (nvlist_next_nvpair(errors, NULL) == NULL) {
/* no hold-specific errors */
char errbuf[1024];
/*
* if it was recursive, the one that actually failed will be in
* zc.zc_name.
*/
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"cannot release '%s' from '%s@%s'"), tag, zc.zc_name,
snapname);
"cannot release"));
switch (errno) {
case ESRCH:
return (zfs_error(hdl, EZFS_REFTAG_RELE, errbuf));
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
case EINVAL:
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
break;
default:
return (zfs_standard_error_fmt(hdl, errno, errbuf));
(void) zfs_standard_error_fmt(hdl, errno, errbuf);
}
}
return (0);
for (elem = nvlist_next_nvpair(errors, NULL);
elem != NULL;
elem = nvlist_next_nvpair(errors, elem)) {
char errbuf[1024];
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN,
"cannot release hold from snapshot '%s'"),
nvpair_name(elem));
switch (fnvpair_value_int32(elem)) {
case ESRCH:
(void) zfs_error(hdl, EZFS_REFTAG_RELE, errbuf);
break;
case EINVAL:
(void) zfs_error(hdl, EZFS_BADTYPE, errbuf);
break;
default:
(void) zfs_standard_error_fmt(hdl,
fnvpair_value_int32(elem), errbuf);
}
}
fnvlist_free(errors);
return (ret);
}
int
@ -4264,7 +4367,7 @@ zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
int nvsz = 2048;
void *nvbuf;
int err = 0;
char errbuf[ZFS_MAXNAMELEN+32];
char errbuf[1024];
assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
@ -4329,7 +4432,7 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
zfs_cmd_t zc = { 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
char *nvbuf;
char errbuf[ZFS_MAXNAMELEN+32];
char errbuf[1024];
size_t nvsz;
int err;
@ -4380,38 +4483,18 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
int
zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
{
zfs_cmd_t zc = { 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
int nvsz = 2048;
void *nvbuf;
int err = 0;
char errbuf[ZFS_MAXNAMELEN+32];
int err;
char errbuf[1024];
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
err = lzc_get_holds(zhp->zfs_name, nvl);
tryagain:
if (err != 0) {
libzfs_handle_t *hdl = zhp->zfs_hdl;
nvbuf = malloc(nvsz);
if (nvbuf == NULL) {
err = (zfs_error(hdl, EZFS_NOMEM, strerror(errno)));
goto out;
}
zc.zc_nvlist_dst_size = nvsz;
zc.zc_nvlist_dst = (uintptr_t)nvbuf;
(void) strlcpy(zc.zc_name, zhp->zfs_name, ZFS_MAXNAMELEN);
if (zfs_ioctl(hdl, ZFS_IOC_GET_HOLDS, &zc) != 0) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
zc.zc_name);
switch (errno) {
case ENOMEM:
free(nvbuf);
nvsz = zc.zc_nvlist_dst_size;
goto tryagain;
zhp->zfs_name);
switch (err) {
case ENOTSUP:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"pool must be upgraded"));
@ -4427,19 +4510,8 @@ tryagain:
err = zfs_standard_error_fmt(hdl, errno, errbuf);
break;
}
} else {
/* success */
int rc = nvlist_unpack(nvbuf, zc.zc_nvlist_dst_size, nvl, 0);
if (rc) {
(void) snprintf(errbuf, sizeof (errbuf),
dgettext(TEXT_DOMAIN, "cannot get holds for '%s'"),
zc.zc_name);
err = zfs_standard_error_fmt(hdl, rc, errbuf);
}
}
free(nvbuf);
out:
return (err);
}

View File

@ -53,6 +53,10 @@
#include <sys/zio_checksum.h>
#include <sys/ddt.h>
#ifdef __FreeBSD__
extern int zfs_ioctl_version;
#endif
/* in libzfs_dataset.c */
extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
/* We need to use something for ENODATA. */
@ -978,9 +982,7 @@ hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
*/
if (pzhp) {
error = zfs_hold(pzhp, thissnap, sdd->holdtag,
B_FALSE, B_TRUE, B_TRUE, sdd->cleanup_fd,
zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID),
zfs_prop_get_int(zhp, ZFS_PROP_CREATETXG));
B_FALSE, B_TRUE, sdd->cleanup_fd);
zfs_close(pzhp);
}
@ -1719,12 +1721,11 @@ recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
err = ENOENT;
}
if (err != 0 && strncmp(name+baselen, "recv-", 5) != 0) {
if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
seq++;
(void) strncpy(newname, name, baselen);
(void) snprintf(newname+baselen, ZFS_MAXNAMELEN-baselen,
"recv-%u-%u", getpid(), seq);
(void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u",
baselen, name, getpid(), seq);
(void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
if (flags->verbose) {
@ -2676,9 +2677,17 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
/*
* Determine name of destination snapshot, store in zc_value.
*/
(void) strcpy(zc.zc_top_ds, tosnap);
(void) strcpy(zc.zc_value, tosnap);
(void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
#ifdef __FreeBSD__
if (zfs_ioctl_version == ZFS_IOCVER_UNDEF)
zfs_ioctl_version = get_zfs_ioctl_version();
/*
* For forward compatibility hide tosnap in zc_value
*/
if (zfs_ioctl_version < ZFS_IOCVER_LZC)
(void) strcpy(zc.zc_value + strlen(zc.zc_value) + 1, tosnap);
#endif
free(cp);
if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
zcmd_free_nvlists(&zc);

View File

@ -156,6 +156,7 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name,
zc.zc_nvlist_src_size = size;
if (resultp != NULL) {
*resultp = NULL;
zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
malloc(zc.zc_nvlist_dst_size);
@ -196,8 +197,6 @@ lzc_ioctl(zfs_ioc_t ioc, const char *name,
if (zc.zc_nvlist_dst_filled) {
*resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
zc.zc_nvlist_dst_size);
} else if (resultp != NULL) {
*resultp = NULL;
}
#ifdef __FreeBSD__
if (zfs_ioctl_version < ZFS_IOCVER_LZC)
@ -256,7 +255,7 @@ lzc_clone(const char *fsname, const char *origin,
* The value will be the (int32) error code.
*
* The return value will be 0 if all snapshots were created, otherwise it will
* be the errno of a (undetermined) snapshot that failed.
* be the errno of a (unspecified) snapshot that failed.
*/
int
lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
@ -305,7 +304,7 @@ lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
* The return value will be 0 if all snapshots were destroyed (or marked for
* later destruction if 'defer' is set) or didn't exist to begin with.
*
* Otherwise the return value will be the errno of a (undetermined) snapshot
* Otherwise the return value will be the errno of a (unspecified) snapshot
* that failed, no snapshots will be destroyed, and the errlist will have an
* entry for each snapshot that failed. The value in the errlist will be
* the (int32) error code.
@ -379,6 +378,101 @@ lzc_exists(const char *dataset)
return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
}
/*
* Create "user holds" on snapshots. If there is a hold on a snapshot,
* the snapshot can not be destroyed. (However, it can be marked for deletion
* by lzc_destroy_snaps(defer=B_TRUE).)
*
* The keys in the nvlist are snapshot names.
* The snapshots must all be in the same pool.
* The value is the name of the hold (string type).
*
* If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
* In this case, when the cleanup_fd is closed (including on process
* termination), the holds will be released. If the system is shut down
* uncleanly, the holds will be released when the pool is next opened
* or imported.
*
* The return value will be 0 if all holds were created. Otherwise the return
* value will be the errno of a (unspecified) hold that failed, no holds will
* be created, and the errlist will have an entry for each hold that
* failed (name = snapshot). The value in the errlist will be the error
* code (int32).
*/
int
lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
{
char pool[MAXNAMELEN];
nvlist_t *args;
nvpair_t *elem;
int error;
/* determine the pool name */
elem = nvlist_next_nvpair(holds, NULL);
if (elem == NULL)
return (0);
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
pool[strcspn(pool, "/@")] = '\0';
args = fnvlist_alloc();
fnvlist_add_nvlist(args, "holds", holds);
if (cleanup_fd != -1)
fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
nvlist_free(args);
return (error);
}
/*
* Release "user holds" on snapshots. If the snapshot has been marked for
* deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
* any clones, and all the user holds are removed, then the snapshot will be
* destroyed.
*
* The keys in the nvlist are snapshot names.
* The snapshots must all be in the same pool.
* The value is a nvlist whose keys are the holds to remove.
*
* The return value will be 0 if all holds were removed.
* Otherwise the return value will be the errno of a (unspecified) release
* that failed, no holds will be released, and the errlist will have an
* entry for each snapshot that has failed releases (name = snapshot).
* The value in the errlist will be the error code (int32) of a failed release.
*/
int
lzc_release(nvlist_t *holds, nvlist_t **errlist)
{
char pool[MAXNAMELEN];
nvpair_t *elem;
/* determine the pool name */
elem = nvlist_next_nvpair(holds, NULL);
if (elem == NULL)
return (0);
(void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
pool[strcspn(pool, "/@")] = '\0';
return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
}
/*
* Retrieve list of user holds on the specified snapshot.
*
* On success, *holdsp will be set to a nvlist which the caller must free.
* The keys are the names of the holds, and the value is the creation time
* of the hold (uint64) in seconds since the epoch.
*/
int
lzc_get_holds(const char *snapname, nvlist_t **holdsp)
{
int error;
nvlist_t *innvl = fnvlist_alloc();
error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
fnvlist_free(innvl);
return (error);
}
/*
* If fromsnap is NULL, a full (non-incremental) stream will be sent.
*/

View File

@ -47,6 +47,10 @@ int lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist);
int lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
uint64_t *usedp);
int lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist);
int lzc_release(nvlist_t *holds, nvlist_t **errlist);
int lzc_get_holds(const char *snapname, nvlist_t **holdsp);
int lzc_send(const char *snapname, const char *fromsnap, int fd);
int lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
boolean_t force, int fd);

View File

@ -33,10 +33,11 @@ int
lzc_compat_pre(zfs_cmd_t *zc, zfs_ioc_t *ioc, nvlist_t **source)
{
nvlist_t *nvl = NULL;
nvpair_t *pair;
char *buf;
nvpair_t *pair, *hpair;
char *buf, *val;
zfs_ioc_t vecnum;
uint32_t type32;
int32_t cleanup_fd;
int error = 0;
int pos;
@ -68,7 +69,7 @@ lzc_compat_pre(zfs_cmd_t *zc, zfs_ioc_t *ioc, nvlist_t **source)
strlcpy(zc->zc_name, buf, pos + 1);
strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN);
} else
error = EOPNOTSUPP;
error = EINVAL;
/* old kernel cannot create multiple snapshots */
if (!error && nvlist_next_nvpair(nvl, pair) != NULL)
error = EOPNOTSUPP;
@ -88,9 +89,62 @@ lzc_compat_pre(zfs_cmd_t *zc, zfs_ioc_t *ioc, nvlist_t **source)
buf = nvpair_name(pair);
pos = strcspn(buf, "@");
strlcpy(zc->zc_name, buf, pos + 1);
}
} else
error = EINVAL;
/* old kernel cannot atomically destroy multiple snaps */
if (!error && nvlist_next_nvpair(nvl, pair) != NULL)
error = EOPNOTSUPP;
*source = nvl;
break;
case ZFS_IOC_HOLD:
nvl = fnvlist_lookup_nvlist(*source, "holds");
pair = nvlist_next_nvpair(nvl, NULL);
if (pair != NULL) {
buf = nvpair_name(pair);
pos = strcspn(buf, "@");
strlcpy(zc->zc_name, buf, pos + 1);
strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN);
if (nvpair_value_string(pair, &val) == 0)
strlcpy(zc->zc_string, val, MAXNAMELEN);
else
error = EINVAL;
} else
error = EINVAL;
/* old kernel cannot atomically create multiple holds */
if (!error && nvlist_next_nvpair(nvl, pair) != NULL)
error = EOPNOTSUPP;
nvlist_free(nvl);
if (nvlist_lookup_int32(*source, "cleanup_fd",
&cleanup_fd) == 0)
zc->zc_cleanup_fd = cleanup_fd;
else
zc->zc_cleanup_fd = -1;
break;
case ZFS_IOC_RELEASE:
pair = nvlist_next_nvpair(*source, NULL);
if (pair != NULL) {
buf = nvpair_name(pair);
pos = strcspn(buf, "@");
strlcpy(zc->zc_name, buf, pos + 1);
strlcpy(zc->zc_value, buf + pos + 1, MAXPATHLEN);
if (nvpair_value_nvlist(pair, &nvl) == 0) {
hpair = nvlist_next_nvpair(nvl, NULL);
if (hpair != NULL)
strlcpy(zc->zc_string,
nvpair_name(hpair), MAXNAMELEN);
else
error = EINVAL;
if (!error && nvlist_next_nvpair(nvl,
hpair) != NULL)
error = EOPNOTSUPP;
} else
error = EINVAL;
} else
error = EINVAL;
/* old kernel cannot atomically release multiple holds */
if (!error && nvlist_next_nvpair(nvl, pair) != NULL)
error = EOPNOTSUPP;
break;
}
return (error);

View File

@ -33,6 +33,7 @@
#include <sys/stat.h>
#include <sys/processor.h>
#include <sys/zfs_context.h>
#include <sys/rrwlock.h>
#include <sys/zmod.h>
#include <sys/utsname.h>
#include <sys/systeminfo.h>
@ -885,6 +886,8 @@ umem_out_of_memory(void)
void
kernel_init(int mode)
{
extern uint_t rrw_tsd_key;
umem_nofail_callback(umem_out_of_memory);
physmem = sysconf(_SC_PHYS_PAGES);
@ -905,6 +908,8 @@ kernel_init(int mode)
#endif
spa_init(mode);
tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
}
void

View File

@ -60,6 +60,8 @@ extern "C" {
#include <umem.h>
#include <inttypes.h>
#include <fsshare.h>
#include <pthread.h>
#include <sys/debug.h>
#include <sys/note.h>
#include <sys/types.h>
#include <sys/cred.h>
@ -242,6 +244,9 @@ typedef int krw_t;
#define RW_WRITE_HELD(x) ((x)->rw_owner == curthread)
#define RW_LOCK_HELD(x) rw_lock_held(x)
#undef RW_LOCK_HELD
#define RW_LOCK_HELD(x) (RW_READ_HELD(x) || RW_WRITE_HELD(x))
extern void rw_init(krwlock_t *rwlp, char *name, int type, void *arg);
extern void rw_destroy(krwlock_t *rwlp);
extern void rw_enter(krwlock_t *rwlp, krw_t rw);
@ -271,6 +276,14 @@ extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
extern void cv_signal(kcondvar_t *cv);
extern void cv_broadcast(kcondvar_t *cv);
/*
* Thread-specific data
*/
#define tsd_get(k) pthread_getspecific(k)
#define tsd_set(k, v) pthread_setspecific(k, v)
#define tsd_create(kp, d) pthread_key_create(kp, d)
#define tsd_destroy(kp) /* nothing */
/*
* Kernel memory
*/
@ -520,7 +533,7 @@ typedef struct callb_cpr {
#define INGLOBALZONE(z) (1)
extern char *kmem_asprintf(const char *fmt, ...);
#define strfree(str) kmem_free((str), strlen(str)+1)
#define strfree(str) kmem_free((str), strlen(str) + 1)
/*
* Hostname information

View File

@ -30,6 +30,8 @@
#else
#include <sys/debug.h>
#include <sys/kmem.h>
#include <sys/param.h>
#include <sys/debug.h>
#endif
/*
@ -116,6 +118,18 @@ fnvlist_merge(nvlist_t *dst, nvlist_t *src)
VERIFY0(nvlist_merge(dst, src, KM_SLEEP));
}
size_t
fnvlist_num_pairs(nvlist_t *nvl)
{
size_t count = 0;
nvpair_t *pair;
for (pair = nvlist_next_nvpair(nvl, 0); pair != NULL;
pair = nvlist_next_nvpair(nvl, pair))
count++;
return (count);
}
void
fnvlist_add_boolean(nvlist_t *nvl, const char *name)
{

View File

@ -59,7 +59,6 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag)
strlcpy(zc->zc_name, zcdm_c->zc_name, MAXPATHLEN);
strlcpy(zc->zc_value, zcdm_c->zc_value, MAXPATHLEN * 2);
strlcpy(zc->zc_string, zcdm_c->zc_string, MAXPATHLEN);
strlcpy(zc->zc_top_ds, zcdm_c->zc_top_ds, MAXPATHLEN);
zc->zc_guid = zcdm_c->zc_guid;
zc->zc_nvlist_conf = zcdm_c->zc_nvlist_conf;
zc->zc_nvlist_conf_size = zcdm_c->zc_nvlist_conf_size;
@ -104,7 +103,6 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag)
strlcpy(zc->zc_name, zc28_c->zc_name, MAXPATHLEN);
strlcpy(zc->zc_value, zc28_c->zc_value, MAXPATHLEN * 2);
strlcpy(zc->zc_string, zc28_c->zc_string, MAXPATHLEN);
strlcpy(zc->zc_top_ds, zc28_c->zc_top_ds, MAXPATHLEN);
zc->zc_guid = zc28_c->zc_guid;
zc->zc_nvlist_conf = zc28_c->zc_nvlist_conf;
zc->zc_nvlist_conf_size = zc28_c->zc_nvlist_conf_size;
@ -220,7 +218,8 @@ zfs_cmd_compat_get(zfs_cmd_t *zc, caddr_t addr, const int cflag)
}
void
zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag)
zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int request,
const int cflag)
{
zfs_cmd_v15_t *zc_c;
zfs_cmd_v28_t *zc28_c;
@ -233,7 +232,6 @@ zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag)
strlcpy(zcdm_c->zc_name, zc->zc_name, MAXPATHLEN);
strlcpy(zcdm_c->zc_value, zc->zc_value, MAXPATHLEN * 2);
strlcpy(zcdm_c->zc_string, zc->zc_string, MAXPATHLEN);
strlcpy(zcdm_c->zc_top_ds, zc->zc_top_ds, MAXPATHLEN);
zcdm_c->zc_guid = zc->zc_guid;
zcdm_c->zc_nvlist_conf = zc->zc_nvlist_conf;
zcdm_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size;
@ -266,7 +264,12 @@ zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag)
/* zc_inject_record doesn't change in libzfs_core */
zc->zc_inject_record = zcdm_c->zc_inject_record;
#ifndef _KERNEL
if (request == ZFS_IOC_RECV)
strlcpy(zcdm_c->zc_top_ds,
zc->zc_value + strlen(zc->zc_value) + 1,
(MAXPATHLEN * 2) - strlen(zc->zc_value) - 1);
#endif
break;
case ZFS_CMD_COMPAT_V28:
@ -275,7 +278,6 @@ zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag)
strlcpy(zc28_c->zc_name, zc->zc_name, MAXPATHLEN);
strlcpy(zc28_c->zc_value, zc->zc_value, MAXPATHLEN * 2);
strlcpy(zc28_c->zc_string, zc->zc_string, MAXPATHLEN);
strlcpy(zc28_c->zc_top_ds, zc->zc_top_ds, MAXPATHLEN);
zc28_c->zc_guid = zc->zc_guid;
zc28_c->zc_nvlist_conf = zc->zc_nvlist_conf;
zc28_c->zc_nvlist_conf_size = zc->zc_nvlist_conf_size;
@ -305,7 +307,12 @@ zfs_cmd_compat_put(zfs_cmd_t *zc, caddr_t addr, const int cflag)
zc28_c->zc_fromobj = zc->zc_fromobj;
zc28_c->zc_createtxg = zc->zc_createtxg;
zc28_c->zc_stat = zc->zc_stat;
#ifndef _KERNEL
if (request == ZFS_IOC_RECV)
strlcpy(zc28_c->zc_top_ds,
zc->zc_value + strlen(zc->zc_value) + 1,
MAXPATHLEN * 2 - strlen(zc->zc_value) - 1);
#endif
/* zc_inject_record */
zc28_c->zc_inject_record.zi_objset =
zc->zc_inject_record.zi_objset;
@ -596,7 +603,8 @@ zcmd_ioctl_compat(int fd, int request, zfs_cmd_t *zc, const int cflag)
if (ZFS_IOCREQ(ncmd) == ZFS_IOC_COMPAT_FAIL)
return (ENOTSUP);
zfs_cmd_compat_put(zc, (caddr_t)zc_c, cflag);
zfs_cmd_compat_put(zc, (caddr_t)zc_c, request, cflag);
ret = ioctl(fd, ncmd, zc_c);
if (cflag == ZFS_CMD_COMPAT_V15 &&
nc == ZFS_IOC_POOL_IMPORT)
@ -664,7 +672,8 @@ nvlist_t *
zfs_ioctl_compat_innvl(zfs_cmd_t *zc, nvlist_t * innvl, const int vec,
const int cflag)
{
nvlist_t *nvl, *tmpnvl;
nvlist_t *nvl, *tmpnvl, *hnvl;
nvpair_t *elem;
char *poolname, *snapname;
int err;
@ -745,6 +754,69 @@ zfs_ioctl_compat_innvl(zfs_cmd_t *zc, nvlist_t * innvl, const int vec,
zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
return (nvl);
break;
case ZFS_IOC_HOLD:
nvl = fnvlist_alloc();
tmpnvl = fnvlist_alloc();
if (zc->zc_cleanup_fd != -1)
fnvlist_add_int32(nvl, "cleanup_fd",
(int32_t)zc->zc_cleanup_fd);
if (zc->zc_cookie) {
hnvl = fnvlist_alloc();
if (dmu_get_recursive_snaps_nvl(zc->zc_name,
zc->zc_value, hnvl) == 0) {
elem = NULL;
while ((elem = nvlist_next_nvpair(hnvl,
elem)) != NULL) {
nvlist_add_string(tmpnvl,
nvpair_name(elem), zc->zc_string);
}
}
nvlist_free(hnvl);
} else {
snapname = kmem_asprintf("%s@%s", zc->zc_name,
zc->zc_value);
nvlist_add_string(tmpnvl, snapname, zc->zc_string);
kmem_free(snapname, strlen(snapname + 1));
}
fnvlist_add_nvlist(nvl, "holds", tmpnvl);
nvlist_free(tmpnvl);
if (innvl != NULL)
nvlist_free(innvl);
/* strip dataset part from zc->zc_name */
zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
return (nvl);
break;
case ZFS_IOC_RELEASE:
nvl = fnvlist_alloc();
tmpnvl = fnvlist_alloc();
if (zc->zc_cookie) {
hnvl = fnvlist_alloc();
if (dmu_get_recursive_snaps_nvl(zc->zc_name,
zc->zc_value, hnvl) == 0) {
elem = NULL;
while ((elem = nvlist_next_nvpair(hnvl,
elem)) != NULL) {
fnvlist_add_boolean(tmpnvl,
zc->zc_string);
fnvlist_add_nvlist(nvl,
nvpair_name(elem), tmpnvl);
}
}
nvlist_free(hnvl);
} else {
snapname = kmem_asprintf("%s@%s", zc->zc_name,
zc->zc_value);
fnvlist_add_boolean(tmpnvl, zc->zc_string);
fnvlist_add_nvlist(nvl, snapname, tmpnvl);
kmem_free(snapname, strlen(snapname + 1));
}
nvlist_free(tmpnvl);
if (innvl != NULL)
nvlist_free(innvl);
/* strip dataset part from zc->zc_name */
zc->zc_name[strcspn(zc->zc_name, "/@")] = '\0';
return (nvl);
break;
}
out:
return (innvl);
@ -773,6 +845,8 @@ zfs_ioctl_compat_outnvl(zfs_cmd_t *zc, nvlist_t * outnvl, const int vec,
break;
case ZFS_IOC_CREATE:
case ZFS_IOC_CLONE:
case ZFS_IOC_HOLD:
case ZFS_IOC_RELEASE:
nvlist_free(outnvl);
/* return empty outnvl */
tmpnvl = fnvlist_alloc();

View File

@ -326,7 +326,7 @@ nvlist_t *zfs_ioctl_compat_outnvl(zfs_cmd_t *, nvlist_t *, const int,
int zcmd_ioctl_compat(int, int, zfs_cmd_t *, const int);
#endif /* _KERNEL */
void zfs_cmd_compat_get(zfs_cmd_t *, caddr_t, const int);
void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int);
void zfs_cmd_compat_put(zfs_cmd_t *, caddr_t, const int, const int);
#ifdef __cplusplus
}

View File

@ -49,8 +49,10 @@ ZFS_COMMON_OBJS += \
dsl_dir.o \
dsl_dataset.o \
dsl_deadlist.o \
dsl_destroy.o \
dsl_pool.o \
dsl_synctask.o \
dsl_userhold.o \
dmu_zfetch.o \
dsl_deleg.o \
dsl_prop.o \
@ -61,6 +63,7 @@ ZFS_COMMON_OBJS += \
lzjb.o \
metaslab.o \
refcount.o \
rrwlock.o \
sa.o \
sha256.o \
spa.o \
@ -120,7 +123,6 @@ ZFS_OBJS += \
zfs_onexit.o \
zfs_replay.o \
zfs_rlock.o \
rrwlock.o \
zfs_vfsops.o \
zfs_vnops.o \
zvol.o

View File

@ -1787,12 +1787,12 @@ arc_buf_free(arc_buf_t *buf, void *tag)
}
}
int
boolean_t
arc_buf_remove_ref(arc_buf_t *buf, void* tag)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
kmutex_t *hash_lock = HDR_LOCK(hdr);
int no_callback = (buf->b_efunc == NULL);
boolean_t no_callback = (buf->b_efunc == NULL);
if (hdr->b_state == arc_anon) {
ASSERT(hdr->b_datacnt == 1);
@ -2042,7 +2042,7 @@ evict_start:
ARCSTAT_INCR(arcstat_mutex_miss, missed);
/*
* We have just evicted some date into the ghost state, make
* We have just evicted some data into the ghost state, make
* sure we also adjust the ghost state size if necessary.
*/
if (arc_no_grow &&
@ -2875,7 +2875,7 @@ arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
{
if (zio == NULL || zio->io_error == 0)
bcopy(buf->b_data, arg, buf->b_hdr->b_size);
VERIFY(arc_buf_remove_ref(buf, arg) == 1);
VERIFY(arc_buf_remove_ref(buf, arg));
}
/* a generic arc_done_func_t */
@ -2884,7 +2884,7 @@ arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
{
arc_buf_t **bufp = arg;
if (zio && zio->io_error) {
VERIFY(arc_buf_remove_ref(buf, arg) == 1);
VERIFY(arc_buf_remove_ref(buf, arg));
*bufp = NULL;
} else {
*bufp = buf;

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/bplist.h>
@ -52,6 +53,12 @@ bplist_append(bplist_t *bpl, const blkptr_t *bp)
mutex_exit(&bpl->bpl_lock);
}
/*
* To aid debugging, we keep the most recently removed entry. This way if
* we are in the callback, we can easily locate the entry.
*/
static bplist_entry_t *bplist_iterate_last_removed;
void
bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
{
@ -59,6 +66,7 @@ bplist_iterate(bplist_t *bpl, bplist_itor_t *func, void *arg, dmu_tx_t *tx)
mutex_enter(&bpl->bpl_lock);
while (bpe = list_head(&bpl->bpl_list)) {
bplist_iterate_last_removed = bpe;
list_remove(&bpl->bpl_list, bpe);
mutex_exit(&bpl->bpl_lock);
func(arg, &bpe->bpe_blk, tx);

View File

@ -392,6 +392,10 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
}
dmu_object_info_t doi;
ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
mutex_enter(&bpo->bpo_lock);
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),

View File

@ -39,7 +39,7 @@
#include <sys/sa_impl.h>
static void dbuf_destroy(dmu_buf_impl_t *db);
static int dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
/*
@ -499,7 +499,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
} else {
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT3P(db->db_buf, ==, NULL);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
db->db_state = DB_UNCACHED;
}
cv_broadcast(&db->db_changed);
@ -828,10 +828,12 @@ dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
continue;
/* found a level 0 buffer in the range */
if (dbuf_undirty(db, tx))
continue;
mutex_enter(&db->db_mtx);
if (dbuf_undirty(db, tx)) {
/* mutex has been dropped and dbuf destroyed */
continue;
}
if (db->db_state == DB_UNCACHED ||
db->db_state == DB_NOFILL ||
db->db_state == DB_EVICTING) {
@ -958,7 +960,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
mutex_enter(&db->db_mtx);
dbuf_set_data(db, buf);
VERIFY(arc_buf_remove_ref(obuf, db) == 1);
VERIFY(arc_buf_remove_ref(obuf, db));
db->db.db_size = size;
if (db->db_level == 0) {
@ -1258,7 +1260,10 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
return (dr);
}
static int
/*
* Return TRUE if this evicted the dbuf.
*/
static boolean_t
dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
{
dnode_t *dn;
@ -1267,18 +1272,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
ASSERT(txg != 0);
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT0(db->db_level);
ASSERT(MUTEX_HELD(&db->db_mtx));
mutex_enter(&db->db_mtx);
/*
* If this buffer is not dirty, we're done.
*/
for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
if (dr->dr_txg <= txg)
break;
if (dr == NULL || dr->dr_txg < txg) {
mutex_exit(&db->db_mtx);
return (0);
}
if (dr == NULL || dr->dr_txg < txg)
return (B_FALSE);
ASSERT(dr->dr_txg == txg);
ASSERT(dr->dr_dbuf == db);
@ -1286,24 +1290,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
dn = DB_DNODE(db);
/*
* If this buffer is currently held, we cannot undirty
* it, since one of the current holders may be in the
* middle of an update. Note that users of dbuf_undirty()
* should not place a hold on the dbuf before the call.
* Also note: we can get here with a spill block, so
* test for that similar to how dbuf_dirty does.
* Note: This code will probably work even if there are concurrent
* holders, but it is untested in that scenerio, as the ZPL and
* ztest have additional locking (the range locks) that prevents
* that type of concurrent access.
*/
if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
mutex_exit(&db->db_mtx);
/* Make sure we don't toss this buffer at sync phase */
if (db->db_blkid != DMU_SPILL_BLKID) {
mutex_enter(&dn->dn_mtx);
dnode_clear_range(dn, db->db_blkid, 1, tx);
mutex_exit(&dn->dn_mtx);
}
DB_DNODE_EXIT(db);
return (0);
}
ASSERT3U(refcount_count(&db->db_holds), ==, db->db_dirtycnt);
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
@ -1332,21 +1324,13 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
}
DB_DNODE_EXIT(db);
if (db->db_level == 0) {
if (db->db_state != DB_NOFILL) {
dbuf_unoverride(dr);
if (db->db_state != DB_NOFILL) {
dbuf_unoverride(dr);
ASSERT(db->db_buf != NULL);
ASSERT(dr->dt.dl.dr_data != NULL);
if (dr->dt.dl.dr_data != db->db_buf)
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
db) == 1);
}
} else {
ASSERT(db->db_buf != NULL);
ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
mutex_destroy(&dr->dt.di.dr_mtx);
list_destroy(&dr->dt.di.dr_children);
ASSERT(dr->dt.dl.dr_data != NULL);
if (dr->dt.dl.dr_data != db->db_buf)
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db));
}
kmem_free(dr, sizeof (dbuf_dirty_record_t));
@ -1358,13 +1342,12 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
dbuf_set_data(db, NULL);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
return (1);
return (B_TRUE);
}
mutex_exit(&db->db_mtx);
return (0);
return (B_FALSE);
}
#pragma weak dmu_buf_will_dirty = dbuf_will_dirty
@ -1463,7 +1446,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
bcopy(buf->b_data, db->db.db_data, db->db.db_size);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
xuio_stat_wbuf_copied();
return;
}
@ -1481,10 +1464,10 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
arc_release(db->db_buf, db);
}
dr->dt.dl.dr_data = buf;
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
VERIFY(arc_buf_remove_ref(db->db_buf, db));
} else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
arc_release(db->db_buf, db);
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 1);
VERIFY(arc_buf_remove_ref(db->db_buf, db));
}
db->db_buf = NULL;
}
@ -2067,10 +2050,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
* This dbuf has anonymous data associated with it.
*/
dbuf_set_data(db, NULL);
VERIFY(arc_buf_remove_ref(buf, db) == 1);
VERIFY(arc_buf_remove_ref(buf, db));
dbuf_evict(db);
} else {
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
VERIFY(!arc_buf_remove_ref(db->db_buf, db));
/*
* A dbuf will be eligible for eviction if either the
@ -2571,7 +2554,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
if (db->db_state != DB_NOFILL) {
if (dr->dt.dl.dr_data != db->db_buf)
VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
db) == 1);
db));
else if (!arc_released(db->db_buf))
arc_set_callback(db->db_buf, dbuf_do_evict, db);
}

View File

@ -1204,7 +1204,7 @@ void
dmu_return_arcbuf(arc_buf_t *buf)
{
arc_return_buf(buf, FTAG);
VERIFY(arc_buf_remove_ref(buf, FTAG) == 1);
VERIFY(arc_buf_remove_ref(buf, FTAG));
}
/*

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@ -178,51 +179,53 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
}
int
dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp, offset_t *offp)
dmu_diff(const char *tosnap_name, const char *fromsnap_name,
#ifdef illumos
struct vnode *vp, offset_t *offp)
#else
struct file *fp, offset_t *offp)
#endif
{
struct diffarg da;
dsl_dataset_t *ds = tosnap->os_dsl_dataset;
dsl_dataset_t *fromds = fromsnap->os_dsl_dataset;
dsl_dataset_t *findds;
dsl_dataset_t *relds;
int err = 0;
dsl_dataset_t *fromsnap;
dsl_dataset_t *tosnap;
dsl_pool_t *dp;
int error;
uint64_t fromtxg;
/* make certain we are looking at snapshots */
if (!dsl_dataset_is_snapshot(ds) || !dsl_dataset_is_snapshot(fromds))
if (strchr(tosnap_name, '@') == NULL ||
strchr(fromsnap_name, '@') == NULL)
return (EINVAL);
/* fromsnap must be earlier and from the same lineage as tosnap */
if (fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg)
return (EXDEV);
error = dsl_pool_hold(tosnap_name, FTAG, &dp);
if (error != 0)
return (error);
relds = NULL;
findds = ds;
while (fromds->ds_dir != findds->ds_dir) {
dsl_pool_t *dp = ds->ds_dir->dd_pool;
if (!dsl_dir_is_clone(findds->ds_dir)) {
if (relds)
dsl_dataset_rele(relds, FTAG);
return (EXDEV);
}
rw_enter(&dp->dp_config_rwlock, RW_READER);
err = dsl_dataset_hold_obj(dp,
findds->ds_dir->dd_phys->dd_origin_obj, FTAG, &findds);
rw_exit(&dp->dp_config_rwlock);
if (relds)
dsl_dataset_rele(relds, FTAG);
if (err)
return (EXDEV);
relds = findds;
error = dsl_dataset_hold(dp, tosnap_name, FTAG, &tosnap);
if (error != 0) {
dsl_pool_rele(dp, FTAG);
return (error);
}
if (relds)
dsl_dataset_rele(relds, FTAG);
error = dsl_dataset_hold(dp, fromsnap_name, FTAG, &fromsnap);
if (error != 0) {
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
}
if (!dsl_dataset_is_before(tosnap, fromsnap)) {
dsl_dataset_rele(fromsnap, FTAG);
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (EXDEV);
}
fromtxg = fromsnap->ds_phys->ds_creation_txg;
dsl_dataset_rele(fromsnap, FTAG);
dsl_dataset_long_hold(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
da.da_fp = fp;
da.da_offp = offp;
@ -231,15 +234,18 @@ dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp, offset_t *offp)
da.da_err = 0;
da.da_td = curthread;
err = traverse_dataset(ds, fromds->ds_phys->ds_creation_txg,
error = traverse_dataset(tosnap, fromtxg,
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, diff_cb, &da);
if (err) {
da.da_err = err;
if (error != 0) {
da.da_err = error;
} else {
/* we set the da.da_err we return as side-effect */
(void) write_record(&da);
}
dsl_dataset_long_rele(tosnap, FTAG);
dsl_dataset_rele(tosnap, FTAG);
return (da.da_err);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -265,7 +265,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
if (err != 0)
return (err);
cbp = buf->b_data;
@ -282,7 +282,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
zb->zb_level - 1,
zb->zb_blkid * epb + i);
err = traverse_visitbp(td, dnp, &cbp[i], &czb);
if (err) {
if (err != 0) {
if (!hard)
break;
lasterr = err;
@ -295,7 +295,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
if (err != 0)
return (err);
dnp = buf->b_data;
@ -308,7 +308,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
for (i = 0; i < epb; i++) {
err = traverse_dnode(td, &dnp[i], zb->zb_objset,
zb->zb_blkid * epb + i);
if (err) {
if (err != 0) {
if (!hard)
break;
lasterr = err;
@ -321,7 +321,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
if (err != 0)
return (err);
osp = buf->b_data;
@ -405,7 +405,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
for (j = 0; j < dnp->dn_nblkptr; j++) {
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb);
if (err) {
if (err != 0) {
if (!hard)
break;
lasterr = err;
@ -415,7 +415,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
if (err) {
if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@ -514,14 +514,20 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL);
/* See comment on ZIL traversal in dsl_scan_visitds. */
if (ds != NULL && !dsl_dataset_is_snapshot(ds)) {
objset_t *os;
if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) {
uint32_t flags = ARC_WAIT;
objset_phys_t *osp;
arc_buf_t *buf;
err = dmu_objset_from_ds(ds, &os);
if (err)
err = arc_read(NULL, td.td_spa, rootbp,
arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL);
if (err != 0)
return (err);
traverse_zil(&td, &os->os_zil_header);
osp = buf->b_data;
traverse_zil(&td, &osp->os_zil_header);
(void) arc_buf_remove_ref(buf, &buf);
}
if (!(flags & TRAVERSE_PREFETCH_DATA) ||
@ -583,7 +589,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
/* visit the MOS */
err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
txg_start, NULL, flags, func, arg);
if (err)
if (err != 0)
return (err);
/* visit each dataset */
@ -592,7 +598,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
dmu_object_info_t doi;
err = dmu_object_info(mos, obj, &doi);
if (err) {
if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@ -603,10 +609,10 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
dsl_dataset_t *ds;
uint64_t txg = txg_start;
rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_pool_config_enter(dp, FTAG);
err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
rw_exit(&dp->dp_config_rwlock);
if (err) {
dsl_pool_config_exit(dp, FTAG);
if (err != 0) {
if (!hard)
return (err);
lasterr = err;
@ -616,7 +622,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
txg = ds->ds_phys->ds_prev_snap_txg;
err = traverse_dataset(ds, txg, flags, func, arg);
dsl_dataset_rele(ds, FTAG);
if (err) {
if (err != 0) {
if (!hard)
return (err);
lasterr = err;

View File

@ -898,7 +898,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
#endif
static int
dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
{
dmu_tx_hold_t *txh;
spa_t *spa = tx->tx_pool->dp_spa;
@ -961,13 +961,6 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
fudge += txh->txh_fudge;
}
/*
* NB: This check must be after we've held the dnodes, so that
* the dmu_tx_unassign() logic will work properly
*/
if (txg_how >= TXG_INITIAL && txg_how != tx->tx_txg)
return (ERESTART);
/*
* If a snapshot has been taken since we made our estimates,
* assume that we won't be able to free or overwrite anything.
@ -1048,26 +1041,25 @@ dmu_tx_unassign(dmu_tx_t *tx)
*
* (1) TXG_WAIT. If the current open txg is full, waits until there's
* a new one. This should be used when you're not holding locks.
* If will only fail if we're truly out of space (or over quota).
* It will only fail if we're truly out of space (or over quota).
*
* (2) TXG_NOWAIT. If we can't assign into the current open txg without
* blocking, returns immediately with ERESTART. This should be used
* whenever you're holding locks. On an ERESTART error, the caller
* should drop locks, do a dmu_tx_wait(tx), and try again.
*
* (3) A specific txg. Use this if you need to ensure that multiple
* transactions all sync in the same txg. Like TXG_NOWAIT, it
* returns ERESTART if it can't assign you into the requested txg.
*/
int
dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
{
int err;
ASSERT(tx->tx_txg == 0);
ASSERT(txg_how != 0);
ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT);
ASSERT(!dsl_pool_sync_context(tx->tx_pool));
/* If we might wait, we must not hold the config lock. */
ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
dmu_tx_unassign(tx);
@ -1088,6 +1080,7 @@ dmu_tx_wait(dmu_tx_t *tx)
spa_t *spa = tx->tx_pool->dp_spa;
ASSERT(tx->tx_txg == 0);
ASSERT(!dsl_pool_config_held(tx->tx_pool));
/*
* It's possible that the pool has become active after this thread
@ -1214,6 +1207,14 @@ dmu_tx_get_txg(dmu_tx_t *tx)
return (tx->tx_txg);
}
dsl_pool_t *
dmu_tx_pool(dmu_tx_t *tx)
{
ASSERT(tx->tx_pool != NULL);
return (tx->tx_pool);
}
void
dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *func, void *data)
{

View File

@ -74,7 +74,11 @@ dnode_cons(void *arg, void *unused, int kmflag)
mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
refcount_create(&dn->dn_holds);
/*
* Every dbuf has a reference, and dropping a tracked reference is
* O(number of references), so don't track dn_holds.
*/
refcount_create_untracked(&dn->dn_holds);
refcount_create(&dn->dn_tx_holds);
list_link_init(&dn->dn_link);

View File

@ -480,6 +480,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
dnode_evict_dbufs(dn);
ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
ASSERT3P(dn->dn_bonus, ==, NULL);
/*
* XXX - It would be nice to assert this, but we may still

File diff suppressed because it is too large Load Diff

View File

@ -147,28 +147,37 @@ dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr)
return (0);
}
static void
dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
nvlist_t *nvp = arg2;
objset_t *mos = dd->dd_pool->dp_meta_objset;
nvpair_t *whopair = NULL;
uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
typedef struct dsl_deleg_arg {
const char *dda_name;
nvlist_t *dda_nvlist;
} dsl_deleg_arg_t;
static void
dsl_deleg_set_sync(void *arg, dmu_tx_t *tx)
{
dsl_deleg_arg_t *dda = arg;
dsl_dir_t *dd;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
nvpair_t *whopair = NULL;
uint64_t zapobj;
VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));
zapobj = dd->dd_phys->dd_deleg_zapobj;
if (zapobj == 0) {
dmu_buf_will_dirty(dd->dd_dbuf, tx);
zapobj = dd->dd_phys->dd_deleg_zapobj = zap_create(mos,
DMU_OT_DSL_PERMS, DMU_OT_NONE, 0, tx);
}
while (whopair = nvlist_next_nvpair(nvp, whopair)) {
while (whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair)) {
const char *whokey = nvpair_name(whopair);
nvlist_t *perms;
nvpair_t *permpair = NULL;
uint64_t jumpobj;
VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
perms = fnvpair_value_nvlist(whopair);
if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
@ -185,21 +194,27 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
"%s %s", whokey, perm);
}
}
dsl_dir_rele(dd, FTAG);
}
static void
dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_deleg_unset_sync(void *arg, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
nvlist_t *nvp = arg2;
objset_t *mos = dd->dd_pool->dp_meta_objset;
dsl_deleg_arg_t *dda = arg;
dsl_dir_t *dd;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
nvpair_t *whopair = NULL;
uint64_t zapobj = dd->dd_phys->dd_deleg_zapobj;
uint64_t zapobj;
if (zapobj == 0)
VERIFY0(dsl_dir_hold(dp, dda->dda_name, FTAG, &dd, NULL));
zapobj = dd->dd_phys->dd_deleg_zapobj;
if (zapobj == 0) {
dsl_dir_rele(dd, FTAG);
return;
}
while (whopair = nvlist_next_nvpair(nvp, whopair)) {
while (whopair = nvlist_next_nvpair(dda->dda_nvlist, whopair)) {
const char *whokey = nvpair_name(whopair);
nvlist_t *perms;
nvpair_t *permpair = NULL;
@ -234,35 +249,40 @@ dsl_deleg_unset_sync(void *arg1, void *arg2, dmu_tx_t *tx)
"%s %s", whokey, perm);
}
}
dsl_dir_rele(dd, FTAG);
}
static int
dsl_deleg_check(void *arg, dmu_tx_t *tx)
{
dsl_deleg_arg_t *dda = arg;
dsl_dir_t *dd;
int error;
if (spa_version(dmu_tx_pool(tx)->dp_spa) <
SPA_VERSION_DELEGATED_PERMS) {
return (ENOTSUP);
}
error = dsl_dir_hold(dmu_tx_pool(tx), dda->dda_name, FTAG, &dd, NULL);
if (error == 0)
dsl_dir_rele(dd, FTAG);
return (error);
}
int
dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset)
{
dsl_dir_t *dd;
int error;
nvpair_t *whopair = NULL;
int blocks_modified = 0;
dsl_deleg_arg_t dda;
error = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (error)
return (error);
/* nvp must already have been verified to be valid */
if (spa_version(dmu_objset_spa(dd->dd_pool->dp_meta_objset)) <
SPA_VERSION_DELEGATED_PERMS) {
dsl_dir_close(dd, FTAG);
return (ENOTSUP);
}
dda.dda_name = ddname;
dda.dda_nvlist = nvp;
while (whopair = nvlist_next_nvpair(nvp, whopair))
blocks_modified++;
error = dsl_sync_task_do(dd->dd_pool, NULL,
return (dsl_sync_task(ddname, dsl_deleg_check,
unset ? dsl_deleg_unset_sync : dsl_deleg_set_sync,
dd, nvp, blocks_modified);
dsl_dir_close(dd, FTAG);
return (error);
&dda, fnvlist_num_pairs(nvp)));
}
/*
@ -290,16 +310,21 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
int error;
objset_t *mos;
error = dsl_dir_open(ddname, FTAG, &startdd, NULL);
if (error)
error = dsl_pool_hold(ddname, FTAG, &dp);
if (error != 0)
return (error);
error = dsl_dir_hold(dp, ddname, FTAG, &startdd, NULL);
if (error != 0) {
dsl_pool_rele(dp, FTAG);
return (error);
}
dp = startdd->dd_pool;
mos = dp->dp_meta_objset;
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
rw_enter(&dp->dp_config_rwlock, RW_READER);
for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
zap_cursor_t basezc;
zap_attribute_t baseza;
@ -307,15 +332,12 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
uint64_t n;
char source[MAXNAMELEN];
if (dd->dd_phys->dd_deleg_zapobj &&
(zap_count(mos, dd->dd_phys->dd_deleg_zapobj,
&n) == 0) && n) {
VERIFY(nvlist_alloc(&sp_nvp,
NV_UNIQUE_NAME, KM_SLEEP) == 0);
} else {
if (dd->dd_phys->dd_deleg_zapobj == 0 ||
zap_count(mos, dd->dd_phys->dd_deleg_zapobj, &n) != 0 ||
n == 0)
continue;
}
sp_nvp = fnvlist_alloc();
for (zap_cursor_init(&basezc, mos,
dd->dd_phys->dd_deleg_zapobj);
zap_cursor_retrieve(&basezc, &baseza) == 0;
@ -327,29 +349,26 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
ASSERT(baseza.za_integer_length == 8);
ASSERT(baseza.za_num_integers == 1);
VERIFY(nvlist_alloc(&perms_nvp,
NV_UNIQUE_NAME, KM_SLEEP) == 0);
perms_nvp = fnvlist_alloc();
for (zap_cursor_init(&zc, mos, baseza.za_first_integer);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
VERIFY(nvlist_add_boolean(perms_nvp,
za.za_name) == 0);
fnvlist_add_boolean(perms_nvp, za.za_name);
}
zap_cursor_fini(&zc);
VERIFY(nvlist_add_nvlist(sp_nvp, baseza.za_name,
perms_nvp) == 0);
nvlist_free(perms_nvp);
fnvlist_add_nvlist(sp_nvp, baseza.za_name, perms_nvp);
fnvlist_free(perms_nvp);
}
zap_cursor_fini(&basezc);
dsl_dir_name(dd, source);
VERIFY(nvlist_add_nvlist(*nvp, source, sp_nvp) == 0);
fnvlist_add_nvlist(*nvp, source, sp_nvp);
nvlist_free(sp_nvp);
}
rw_exit(&dp->dp_config_rwlock);
dsl_dir_close(startdd, FTAG);
dsl_dir_rele(startdd, FTAG);
dsl_pool_rele(dp, FTAG);
return (0);
}
@ -555,7 +574,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
avl_create(&permsets, perm_set_compare, sizeof (perm_set_t),
offsetof(perm_set_t, p_node));
rw_enter(&dp->dp_config_rwlock, RW_READER);
ASSERT(dsl_pool_config_held(dp));
for (dd = ds->ds_dir; dd != NULL; dd = dd->dd_parent,
checkflag = ZFS_DELEG_DESCENDENT) {
uint64_t zapobj;
@ -616,7 +635,6 @@ again:
}
error = EPERM;
success:
rw_exit(&dp->dp_config_rwlock);
cookie = NULL;
while ((setnode = avl_destroy_nodes(&permsets, &cookie)) != NULL)
@ -628,15 +646,19 @@ success:
int
dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int error;
error = dsl_dataset_hold(dsname, FTAG, &ds);
if (error)
error = dsl_pool_hold(dsname, FTAG, &dp);
if (error != 0)
return (error);
error = dsl_deleg_access_impl(ds, perm, cr);
dsl_dataset_rele(ds, FTAG);
error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
if (error == 0) {
error = dsl_deleg_access_impl(ds, perm, cr);
dsl_dataset_rele(ds, FTAG);
}
dsl_pool_rele(dp, FTAG);
return (error);
}

View File

@ -0,0 +1,926 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/dsl_userhold.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_synctask.h>
#include <sys/dmu_tx.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
#include <sys/dmu_traverse.h>
#include <sys/dsl_scan.h>
#include <sys/dmu_objset.h>
#include <sys/zap.h>
#include <sys/zfeature.h>
#include <sys/zfs_ioctl.h>
#include <sys/dsl_deleg.h>
typedef struct dmu_snapshots_destroy_arg {
nvlist_t *dsda_snaps;
nvlist_t *dsda_successful_snaps;
boolean_t dsda_defer;
nvlist_t *dsda_errlist;
} dmu_snapshots_destroy_arg_t;
/*
* ds must be owned.
*/
static int
dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
{
if (!dsl_dataset_is_snapshot(ds))
return (EINVAL);
if (dsl_dataset_long_held(ds))
return (EBUSY);
/*
* Only allow deferred destroy on pools that support it.
* NOTE: deferred destroy is only supported on snapshots.
*/
if (defer) {
if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
SPA_VERSION_USERREFS)
return (ENOTSUP);
return (0);
}
/*
* If this snapshot has an elevated user reference count,
* we can't destroy it yet.
*/
if (ds->ds_userrefs > 0)
return (EBUSY);
/*
* Can't delete a branch point.
*/
if (ds->ds_phys->ds_num_children > 1)
return (EEXIST);
return (0);
}
static int
dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
{
dmu_snapshots_destroy_arg_t *dsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int error = 0;
if (!dmu_tx_is_syncing(tx))
return (0);
for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
dsl_dataset_t *ds;
error = dsl_dataset_hold(dp, nvpair_name(pair),
FTAG, &ds);
/*
* If the snapshot does not exist, silently ignore it
* (it's "already destroyed").
*/
if (error == ENOENT)
continue;
if (error == 0) {
error = dsl_destroy_snapshot_check_impl(ds,
dsda->dsda_defer);
dsl_dataset_rele(ds, FTAG);
}
if (error == 0) {
fnvlist_add_boolean(dsda->dsda_successful_snaps,
nvpair_name(pair));
} else {
fnvlist_add_int32(dsda->dsda_errlist,
nvpair_name(pair), error);
}
}
pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
if (pair != NULL)
return (fnvpair_value_int32(pair));
return (0);
}
struct process_old_arg {
dsl_dataset_t *ds;
dsl_dataset_t *ds_prev;
boolean_t after_branch_point;
zio_t *pio;
uint64_t used, comp, uncomp;
};
static int
process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
struct process_old_arg *poa = arg;
dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
if (poa->ds_prev && !poa->after_branch_point &&
bp->blk_birth >
poa->ds_prev->ds_phys->ds_prev_snap_txg) {
poa->ds_prev->ds_phys->ds_unique_bytes +=
bp_get_dsize_sync(dp->dp_spa, bp);
}
} else {
poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
poa->comp += BP_GET_PSIZE(bp);
poa->uncomp += BP_GET_UCSIZE(bp);
dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
}
return (0);
}
static void
process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
{
struct process_old_arg poa = { 0 };
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t deadlist_obj;
ASSERT(ds->ds_deadlist.dl_oldfmt);
ASSERT(ds_next->ds_deadlist.dl_oldfmt);
poa.ds = ds;
poa.ds_prev = ds_prev;
poa.after_branch_point = after_branch_point;
poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
process_old_cb, &poa, tx));
VERIFY0(zio_wait(poa.pio));
ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
/* change snapused */
dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
-poa.used, -poa.comp, -poa.uncomp, tx);
/* swap next's deadlist to our deadlist */
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_close(&ds_next->ds_deadlist);
deadlist_obj = ds->ds_phys->ds_deadlist_obj;
ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj;
ds_next->ds_phys->ds_deadlist_obj = deadlist_obj;
dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
dsl_deadlist_open(&ds_next->ds_deadlist, mos,
ds_next->ds_phys->ds_deadlist_obj);
}
static void
dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
zap_cursor_t zc;
zap_attribute_t za;
/*
* If it is the old version, dd_clones doesn't exist so we can't
* find the clones, but dsl_deadlist_remove_key() is a no-op so it
* doesn't matter.
*/
if (ds->ds_dir->dd_phys->dd_clones == 0)
return;
for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
za.za_first_integer, FTAG, &clone));
if (clone->ds_dir->dd_origin_txg > mintxg) {
dsl_deadlist_remove_key(&clone->ds_deadlist,
mintxg, tx);
dsl_dataset_remove_clones_key(clone, mintxg, tx);
}
dsl_dataset_rele(clone, FTAG);
}
zap_cursor_fini(&zc);
}
void
dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
{
int err;
int after_branch_point = FALSE;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
dsl_dataset_t *ds_prev = NULL;
uint64_t obj;
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
ASSERT(refcount_is_zero(&ds->ds_longholds));
if (defer &&
(ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) {
ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
return;
}
ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
/* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, "");
dsl_scan_ds_destroyed(ds, tx);
obj = ds->ds_object;
if (ds->ds_phys->ds_prev_snap_obj != 0) {
ASSERT3P(ds->ds_prev, ==, NULL);
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
after_branch_point =
(ds_prev->ds_phys->ds_next_snap_obj != obj);
dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
if (after_branch_point &&
ds_prev->ds_phys->ds_next_clones_obj != 0) {
dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
if (ds->ds_phys->ds_next_snap_obj != 0) {
VERIFY0(zap_add_int(mos,
ds_prev->ds_phys->ds_next_clones_obj,
ds->ds_phys->ds_next_snap_obj, tx));
}
}
if (!after_branch_point) {
ds_prev->ds_phys->ds_next_snap_obj =
ds->ds_phys->ds_next_snap_obj;
}
}
dsl_dataset_t *ds_next;
uint64_t old_unique;
uint64_t used = 0, comp = 0, uncomp = 0;
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
old_unique = ds_next->ds_phys->ds_unique_bytes;
dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
ds_next->ds_phys->ds_prev_snap_obj =
ds->ds_phys->ds_prev_snap_obj;
ds_next->ds_phys->ds_prev_snap_txg =
ds->ds_phys->ds_prev_snap_txg;
ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
if (ds_next->ds_deadlist.dl_oldfmt) {
process_old_deadlist(ds, ds_prev, ds_next,
after_branch_point, tx);
} else {
/* Adjust prev's unique space. */
if (ds_prev && !after_branch_point) {
dsl_deadlist_space_range(&ds_next->ds_deadlist,
ds_prev->ds_phys->ds_prev_snap_txg,
ds->ds_phys->ds_prev_snap_txg,
&used, &comp, &uncomp);
ds_prev->ds_phys->ds_unique_bytes += used;
}
/* Adjust snapused. */
dsl_deadlist_space_range(&ds_next->ds_deadlist,
ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
&used, &comp, &uncomp);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
-used, -comp, -uncomp, tx);
/* Move blocks to be freed to pool's free list. */
dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
&dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
tx);
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
DD_USED_HEAD, used, comp, uncomp, tx);
/* Merge our deadlist into next's and free it. */
dsl_deadlist_merge(&ds_next->ds_deadlist,
ds->ds_phys->ds_deadlist_obj, tx);
}
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_deadlist_obj = 0;
/* Collapse range in clone heads */
dsl_dataset_remove_clones_key(ds,
ds->ds_phys->ds_creation_txg, tx);
if (dsl_dataset_is_snapshot(ds_next)) {
dsl_dataset_t *ds_nextnext;
/*
* Update next's unique to include blocks which
* were previously shared by only this snapshot
* and it. Those blocks will be born after the
* prev snap and before this snap, and will have
* died after the next snap and before the one
* after that (ie. be on the snap after next's
* deadlist).
*/
VERIFY0(dsl_dataset_hold_obj(dp,
ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext));
dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
ds->ds_phys->ds_prev_snap_txg,
ds->ds_phys->ds_creation_txg,
&used, &comp, &uncomp);
ds_next->ds_phys->ds_unique_bytes += used;
dsl_dataset_rele(ds_nextnext, FTAG);
ASSERT3P(ds_next->ds_prev, ==, NULL);
/* Collapse range in this head. */
dsl_dataset_t *hds;
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds));
dsl_deadlist_remove_key(&hds->ds_deadlist,
ds->ds_phys->ds_creation_txg, tx);
dsl_dataset_rele(hds, FTAG);
} else {
ASSERT3P(ds_next->ds_prev, ==, ds);
dsl_dataset_rele(ds_next->ds_prev, ds_next);
ds_next->ds_prev = NULL;
if (ds_prev) {
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj,
ds_next, &ds_next->ds_prev));
}
dsl_dataset_recalc_head_uniq(ds_next);
/*
* Reduce the amount of our unconsumed refreservation
* being charged to our parent by the amount of
* new unique data we have gained.
*/
if (old_unique < ds_next->ds_reserved) {
int64_t mrsdelta;
uint64_t new_unique =
ds_next->ds_phys->ds_unique_bytes;
ASSERT(old_unique <= new_unique);
mrsdelta = MIN(new_unique - old_unique,
ds_next->ds_reserved - old_unique);
dsl_dir_diduse_space(ds->ds_dir,
DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
}
}
dsl_dataset_rele(ds_next, FTAG);
/*
* This must be done after the dsl_traverse(), because it will
* re-open the objset.
*/
if (ds->ds_objset) {
dmu_objset_evict(ds->ds_objset);
ds->ds_objset = NULL;
}
/* remove from snapshot namespace */
dsl_dataset_t *ds_head;
ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
VERIFY0(dsl_dataset_get_snapname(ds));
#ifdef ZFS_DEBUG
{
uint64_t val;
err = dsl_dataset_snap_lookup(ds_head,
ds->ds_snapname, &val);
ASSERT0(err);
ASSERT3U(val, ==, obj);
}
#endif
VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx));
dsl_dataset_rele(ds_head, FTAG);
if (ds_prev != NULL)
dsl_dataset_rele(ds_prev, FTAG);
spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
if (ds->ds_phys->ds_next_clones_obj != 0) {
uint64_t count;
ASSERT0(zap_count(mos,
ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
VERIFY0(dmu_object_free(mos,
ds->ds_phys->ds_next_clones_obj, tx));
}
if (ds->ds_phys->ds_props_obj != 0)
VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
if (ds->ds_phys->ds_userrefs_obj != 0)
VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
dsl_dir_rele(ds->ds_dir, ds);
ds->ds_dir = NULL;
VERIFY0(dmu_object_free(mos, obj, tx));
}
static void
dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
{
dmu_snapshots_destroy_arg_t *dsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
pair != NULL;
pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
dsl_dataset_rele(ds, FTAG);
}
}
/*
* The semantics of this function are described in the comment above
* lzc_destroy_snaps(). To summarize:
*
* The snapshots must all be in the same pool.
*
* Snapshots that don't exist will be silently ignored (considered to be
* "already deleted").
*
* On success, all snaps will be destroyed and this will return 0.
* On failure, no snaps will be destroyed, the errlist will be filled in,
* and this will return an errno.
*/
int
dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
nvlist_t *errlist)
{
dmu_snapshots_destroy_arg_t dsda;
int error;
nvpair_t *pair;
pair = nvlist_next_nvpair(snaps, NULL);
if (pair == NULL)
return (0);
dsda.dsda_snaps = snaps;
dsda.dsda_successful_snaps = fnvlist_alloc();
dsda.dsda_defer = defer;
dsda.dsda_errlist = errlist;
error = dsl_sync_task(nvpair_name(pair),
dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
&dsda, 0);
fnvlist_free(dsda.dsda_successful_snaps);
return (error);
}
int
dsl_destroy_snapshot(const char *name, boolean_t defer)
{
int error;
nvlist_t *nvl = fnvlist_alloc();
nvlist_t *errlist = fnvlist_alloc();
fnvlist_add_boolean(nvl, name);
error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
fnvlist_free(errlist);
fnvlist_free(nvl);
return (error);
}
struct killarg {
dsl_dataset_t *ds;
dmu_tx_t *tx;
};
/* ARGSUSED */
static int
kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
struct killarg *ka = arg;
dmu_tx_t *tx = ka->tx;
if (bp == NULL)
return (0);
if (zb->zb_level == ZB_ZIL_LEVEL) {
ASSERT(zilog != NULL);
/*
* It's a block in the intent log. It has no
* accounting, so just free it.
*/
dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
} else {
ASSERT(zilog == NULL);
ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
(void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
}
return (0);
}
static void
old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
{
struct killarg ka;
/*
* Free everything that we point to (that's born after
* the previous snapshot, if we are a clone)
*
* NB: this should be very quick, because we already
* freed all the objects in open context.
*/
ka.ds = ds;
ka.tx = tx;
VERIFY0(traverse_dataset(ds,
ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
kill_blkptr, &ka));
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
}
typedef struct dsl_destroy_head_arg {
const char *ddha_name;
} dsl_destroy_head_arg_t;
int
dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
{
int error;
uint64_t count;
objset_t *mos;
if (dsl_dataset_is_snapshot(ds))
return (EINVAL);
if (refcount_count(&ds->ds_longholds) != expected_holds)
return (EBUSY);
mos = ds->ds_dir->dd_pool->dp_meta_objset;
/*
* Can't delete a head dataset if there are snapshots of it.
* (Except if the only snapshots are from the branch we cloned
* from.)
*/
if (ds->ds_prev != NULL &&
ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
return (EBUSY);
/*
* Can't delete if there are children of this fs.
*/
error = zap_count(mos,
ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
if (error != 0)
return (error);
if (count != 0)
return (EEXIST);
if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
ds->ds_prev->ds_phys->ds_num_children == 2 &&
ds->ds_prev->ds_userrefs == 0) {
/* We need to remove the origin snapshot as well. */
if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
return (EBUSY);
}
return (0);
}
static int
dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int error;
error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
if (error != 0)
return (error);
error = dsl_destroy_head_check_impl(ds, 0);
dsl_dataset_rele(ds, FTAG);
return (error);
}
static void
dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
{
dsl_dir_t *dd;
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
dd_used_t t;
ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
ASSERT0(dd->dd_phys->dd_head_dataset_obj);
/*
* Remove our reservation. The impl() routine avoids setting the
* actual property, which would require the (already destroyed) ds.
*/
dsl_dir_set_reservation_sync_impl(dd, 0, tx);
ASSERT0(dd->dd_phys->dd_used_bytes);
ASSERT0(dd->dd_phys->dd_reserved);
for (t = 0; t < DD_USED_NUM; t++)
ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
VERIFY0(zap_remove(mos,
dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
dsl_dir_rele(dd, FTAG);
VERIFY0(dmu_object_free(mos, ddobj, tx));
}
void
dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
{
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
uint64_t obj, ddobj, prevobj = 0;
boolean_t rmorigin;
ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
ASSERT(ds->ds_prev == NULL ||
ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
/* We need to log before removing it from the namespace. */
spa_history_log_internal_ds(ds, "destroy", tx, "");
rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
DS_IS_DEFER_DESTROY(ds->ds_prev) &&
ds->ds_prev->ds_phys->ds_num_children == 2 &&
ds->ds_prev->ds_userrefs == 0);
/* Remove our reservation */
if (ds->ds_reserved != 0) {
dsl_dataset_set_refreservation_sync_impl(ds,
(ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
0, tx);
ASSERT0(ds->ds_reserved);
}
dsl_scan_ds_destroyed(ds, tx);
obj = ds->ds_object;
if (ds->ds_phys->ds_prev_snap_obj != 0) {
/* This is a clone */
ASSERT(ds->ds_prev != NULL);
ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj);
ASSERT0(ds->ds_phys->ds_next_snap_obj);
dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) {
dsl_dataset_remove_from_next_clones(ds->ds_prev,
obj, tx);
}
ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1);
ds->ds_prev->ds_phys->ds_num_children--;
}
zfeature_info_t *async_destroy =
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
objset_t *os;
/*
* Destroy the deadlist. Unless it's a clone, the
* deadlist should be empty. (If it's a clone, it's
* safe to ignore the deadlist contents.)
*/
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_deadlist_obj = 0;
VERIFY0(dmu_objset_from_ds(ds, &os));
if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
old_synchronous_dataset_destroy(ds, tx);
} else {
/*
* Move the bptree into the pool's list of trees to
* clean up and update space accounting information.
*/
uint64_t used, comp, uncomp;
zil_destroy_sync(dmu_objset_zil(os), tx);
if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
spa_feature_incr(dp->dp_spa, async_destroy, tx);
dp->dp_bptree_obj = bptree_alloc(mos, tx);
VERIFY0(zap_add(mos,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
&dp->dp_bptree_obj, tx));
}
used = ds->ds_dir->dd_phys->dd_used_bytes;
comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
ds->ds_phys->ds_unique_bytes == used);
bptree_add(mos, dp->dp_bptree_obj,
&ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
used, comp, uncomp, tx);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
-used, -comp, -uncomp, tx);
dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
used, comp, uncomp, tx);
}
if (ds->ds_prev != NULL) {
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
VERIFY0(zap_remove_int(mos,
ds->ds_prev->ds_dir->dd_phys->dd_clones,
ds->ds_object, tx));
}
prevobj = ds->ds_prev->ds_object;
dsl_dataset_rele(ds->ds_prev, ds);
ds->ds_prev = NULL;
}
/*
* This must be done after the dsl_traverse(), because it will
* re-open the objset.
*/
if (ds->ds_objset) {
dmu_objset_evict(ds->ds_objset);
ds->ds_objset = NULL;
}
/* Erase the link in the dir */
dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
ddobj = ds->ds_dir->dd_object;
ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx));
spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
ASSERT0(ds->ds_phys->ds_next_clones_obj);
ASSERT0(ds->ds_phys->ds_props_obj);
ASSERT0(ds->ds_phys->ds_userrefs_obj);
dsl_dir_rele(ds->ds_dir, ds);
ds->ds_dir = NULL;
VERIFY0(dmu_object_free(mos, obj, tx));
dsl_dir_destroy_sync(ddobj, tx);
if (rmorigin) {
dsl_dataset_t *prev;
VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
dsl_dataset_rele(prev, FTAG);
}
}
static void
dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
dsl_destroy_head_sync_impl(ds, tx);
dsl_dataset_rele(ds, FTAG);
}
static void
dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
{
dsl_destroy_head_arg_t *ddha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
/* Mark it as inconsistent on-disk, in case we crash */
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
spa_history_log_internal_ds(ds, "destroy begin", tx, "");
dsl_dataset_rele(ds, FTAG);
}
int
dsl_destroy_head(const char *name)
{
dsl_destroy_head_arg_t ddha;
int error;
spa_t *spa;
boolean_t isenabled;
#ifdef _KERNEL
zfs_destroy_unmount_origin(name);
#endif
error = spa_open(name, &spa, FTAG);
if (error != 0)
return (error);
isenabled = spa_feature_is_enabled(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]);
spa_close(spa, FTAG);
ddha.ddha_name = name;
if (!isenabled) {
objset_t *os;
error = dsl_sync_task(name, dsl_destroy_head_check,
dsl_destroy_head_begin_sync, &ddha, 0);
if (error != 0)
return (error);
/*
* Head deletion is processed in one txg on old pools;
* remove the objects from open context so that the txg sync
* is not too long.
*/
error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
if (error == 0) {
uint64_t prev_snap_txg =
dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg;
for (uint64_t obj = 0; error == 0;
error = dmu_object_next(os, &obj, FALSE,
prev_snap_txg))
(void) dmu_free_object(os, obj);
/* sync out all frees */
txg_wait_synced(dmu_objset_pool(os), 0);
dmu_objset_disown(os, FTAG);
}
}
return (dsl_sync_task(name, dsl_destroy_head_check,
dsl_destroy_head_sync, &ddha, 0));
}
/*
* Note, this function is used as the callback for dmu_objset_find(). We
* always return 0 so that we will continue to find and process
* inconsistent datasets, even if we encounter an error trying to
* process one of them.
*/
/* ARGSUSED */
int
dsl_destroy_inconsistent(const char *dsname, void *arg)
{
objset_t *os;
if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
dmu_objset_rele(os, FTAG);
if (inconsistent)
(void) dsl_destroy_head(dsname);
}
return (0);
}

View File

@ -46,8 +46,6 @@
#include "zfs_namecheck.h"
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
static void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd,
uint64_t value, dmu_tx_t *tx);
/* ARGSUSED */
static void
@ -64,7 +62,7 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
}
if (dd->dd_parent)
dsl_dir_close(dd->dd_parent, dd);
dsl_dir_rele(dd->dd_parent, dd);
spa_close(dd->dd_pool->dp_spa, dd);
@ -78,18 +76,17 @@ dsl_dir_evict(dmu_buf_t *db, void *arg)
}
int
dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
const char *tail, void *tag, dsl_dir_t **ddp)
{
dmu_buf_t *dbuf;
dsl_dir_t *dd;
int err;
ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
dsl_pool_sync_context(dp));
ASSERT(dsl_pool_config_held(dp));
err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
if (err)
if (err != 0)
return (err);
dd = dmu_buf_get_user(dbuf);
#ifdef ZFS_DEBUG
@ -116,9 +113,9 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_snap_cmtime_update(dd);
if (dd->dd_phys->dd_parent_obj) {
err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
err = dsl_dir_hold_obj(dp, dd->dd_phys->dd_parent_obj,
NULL, dd, &dd->dd_parent);
if (err)
if (err != 0)
goto errout;
if (tail) {
#ifdef ZFS_DEBUG
@ -135,7 +132,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dd->dd_parent->dd_phys->dd_child_dir_zapobj,
ddobj, 0, dd->dd_myname);
}
if (err)
if (err != 0)
goto errout;
} else {
(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
@ -152,7 +149,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
*/
err = dmu_bonus_hold(dp->dp_meta_objset,
dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
if (err)
if (err != 0)
goto errout;
origin_phys = origin_bonus->db_data;
dd->dd_origin_txg =
@ -164,7 +161,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
dsl_dir_evict);
if (winner) {
if (dd->dd_parent)
dsl_dir_close(dd->dd_parent, dd);
dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dd = winner;
@ -191,7 +188,7 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
errout:
if (dd->dd_parent)
dsl_dir_close(dd->dd_parent, dd);
dsl_dir_rele(dd->dd_parent, dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dmu_buf_rele(dbuf, tag);
@ -199,7 +196,7 @@ errout:
}
void
dsl_dir_close(dsl_dir_t *dd, void *tag)
dsl_dir_rele(dsl_dir_t *dd, void *tag)
{
dprintf_dd(dd, "%s\n", "");
spa_close(dd->dd_pool->dp_spa, tag);
@ -256,6 +253,7 @@ static int
getcomponent(const char *path, char *component, const char **nextp)
{
char *p;
if ((path == NULL) || (path[0] == '\0'))
return (ENOENT);
/* This would be a good place to reserve some namespace... */
@ -278,10 +276,10 @@ getcomponent(const char *path, char *component, const char **nextp)
(void) strcpy(component, path);
p = NULL;
} else if (p[0] == '/') {
if (p-path >= MAXNAMELEN)
if (p - path >= MAXNAMELEN)
return (ENAMETOOLONG);
(void) strncpy(component, path, p - path);
component[p-path] = '\0';
component[p - path] = '\0';
p++;
} else if (p[0] == '@') {
/*
@ -290,65 +288,54 @@ getcomponent(const char *path, char *component, const char **nextp)
*/
if (strchr(path, '/'))
return (EINVAL);
if (p-path >= MAXNAMELEN)
if (p - path >= MAXNAMELEN)
return (ENAMETOOLONG);
(void) strncpy(component, path, p - path);
component[p-path] = '\0';
component[p - path] = '\0';
} else {
ASSERT(!"invalid p");
panic("invalid p=%p", (void *)p);
}
*nextp = p;
return (0);
}
/*
* same as dsl_open_dir, ignore the first component of name and use the
* spa instead
* Return the dsl_dir_t, and possibly the last component which couldn't
* be found in *tail. The name must be in the specified dsl_pool_t. This
* thread must hold the dp_config_rwlock for the pool. Returns NULL if the
* path is bogus, or if tail==NULL and we couldn't parse the whole name.
* (*tail)[0] == '@' means that the last component is a snapshot.
*/
int
dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t **ddp, const char **tailp)
{
char buf[MAXNAMELEN];
const char *next, *nextnext = NULL;
const char *spaname, *next, *nextnext = NULL;
int err;
dsl_dir_t *dd;
dsl_pool_t *dp;
uint64_t ddobj;
int openedspa = FALSE;
dprintf("%s\n", name);
err = getcomponent(name, buf, &next);
if (err)
if (err != 0)
return (err);
if (spa == NULL) {
err = spa_open(buf, &spa, FTAG);
if (err) {
dprintf("spa_open(%s) failed\n", buf);
return (err);
}
openedspa = TRUE;
/* XXX this assertion belongs in spa_open */
ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
}
/* Make sure the name is in the specified pool. */
spaname = spa_name(dp->dp_spa);
if (strcmp(buf, spaname) != 0)
return (EINVAL);
dp = spa_get_dsl(spa);
ASSERT(dsl_pool_config_held(dp));
rw_enter(&dp->dp_config_rwlock, RW_READER);
err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
if (err) {
rw_exit(&dp->dp_config_rwlock);
if (openedspa)
spa_close(spa, FTAG);
err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
if (err != 0) {
return (err);
}
while (next != NULL) {
dsl_dir_t *child_ds;
err = getcomponent(next, buf, &nextnext);
if (err)
if (err != 0)
break;
ASSERT(next[0] != '\0');
if (next[0] == '@')
@ -359,25 +346,22 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
err = zap_lookup(dp->dp_meta_objset,
dd->dd_phys->dd_child_dir_zapobj,
buf, sizeof (ddobj), 1, &ddobj);
if (err) {
if (err != 0) {
if (err == ENOENT)
err = 0;
break;
}
err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
if (err)
err = dsl_dir_hold_obj(dp, ddobj, buf, tag, &child_ds);
if (err != 0)
break;
dsl_dir_close(dd, tag);
dsl_dir_rele(dd, tag);
dd = child_ds;
next = nextnext;
}
rw_exit(&dp->dp_config_rwlock);
if (err) {
dsl_dir_close(dd, tag);
if (openedspa)
spa_close(spa, FTAG);
if (err != 0) {
dsl_dir_rele(dd, tag);
return (err);
}
@ -388,30 +372,16 @@ dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
if (next != NULL &&
(tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
/* bad path name */
dsl_dir_close(dd, tag);
dsl_dir_rele(dd, tag);
dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
err = ENOENT;
}
if (tailp)
if (tailp != NULL)
*tailp = next;
if (openedspa)
spa_close(spa, FTAG);
*ddp = dd;
return (err);
}
/*
* Return the dsl_dir_t, and possibly the last component which couldn't
* be found in *tail. Return NULL if the path is bogus, or if
* tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@'
* means that the last component is a snapshot.
*/
int
dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
{
return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
}
uint64_t
dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
dmu_tx_t *tx)
@ -449,71 +419,6 @@ dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
return (ddobj);
}
/* ARGSUSED */
int
dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err;
uint64_t count;
/*
* There should be exactly two holds, both from
* dsl_dataset_destroy: one on the dd directory, and one on its
* head ds. If there are more holds, then a concurrent thread is
* performing a lookup inside this dir while we're trying to destroy
* it. To minimize this possibility, we perform this check only
* in syncing context and fail the operation if we encounter
* additional holds. The dp_config_rwlock ensures that nobody else
* opens it after we check.
*/
if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2)
return (EBUSY);
err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
if (err)
return (err);
if (count != 0)
return (EEXIST);
return (0);
}
void
dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
objset_t *mos = dd->dd_pool->dp_meta_objset;
uint64_t obj;
dd_used_t t;
ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
/*
* Remove our reservation. The impl() routine avoids setting the
* actual property, which would require the (already destroyed) ds.
*/
dsl_dir_set_reservation_sync_impl(dd, 0, tx);
ASSERT0(dd->dd_phys->dd_used_bytes);
ASSERT0(dd->dd_phys->dd_reserved);
for (t = 0; t < DD_USED_NUM; t++)
ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
VERIFY(0 == zap_remove(mos,
dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
obj = dd->dd_object;
dsl_dir_close(dd, tag);
VERIFY(0 == dmu_object_free(mos, obj, tx));
}
boolean_t
dsl_dir_is_clone(dsl_dir_t *dd)
{
@ -551,18 +456,16 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
}
mutex_exit(&dd->dd_lock);
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
if (dsl_dir_is_clone(dd)) {
dsl_dataset_t *ds;
char buf[MAXNAMELEN];
VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
dd->dd_phys->dd_origin_obj, FTAG, &ds));
dsl_dataset_name(ds, buf);
dsl_dataset_rele(ds, FTAG);
dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
}
rw_exit(&dd->dd_pool->dp_config_rwlock);
}
void
@ -572,7 +475,7 @@ dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
ASSERT(dd->dd_phys);
if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg)) {
/* up the hold count until we can be written out */
dmu_buf_add_ref(dd->dd_dbuf, dd);
}
@ -859,7 +762,7 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
FALSE, asize > usize, tr_list, tx, TRUE);
}
if (err)
if (err != 0)
dsl_dir_tempreserve_clear(tr_list, tx);
else
*tr_cookiep = tr_list;
@ -1010,115 +913,123 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
mutex_exit(&dd->dd_lock);
}
typedef struct dsl_dir_set_qr_arg {
const char *ddsqra_name;
zprop_source_t ddsqra_source;
uint64_t ddsqra_value;
} dsl_dir_set_qr_arg_t;
static int
dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_quota_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
int err;
uint64_t towrite;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int error;
uint64_t towrite, newval;
if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
return (err);
error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
if (error != 0)
return (error);
if (psa->psa_effective_value == 0)
error = dsl_prop_predict(ds->ds_dir, "quota",
ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
return (error);
}
if (newval == 0) {
dsl_dataset_rele(ds, FTAG);
return (0);
}
mutex_enter(&dd->dd_lock);
mutex_enter(&ds->ds_dir->dd_lock);
/*
* If we are doing the preliminary check in open context, and
* there are pending changes, then don't fail it, since the
* pending changes could under-estimate the amount of space to be
* freed up.
*/
towrite = dsl_dir_space_towrite(dd);
towrite = dsl_dir_space_towrite(ds->ds_dir);
if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
(psa->psa_effective_value < dd->dd_phys->dd_reserved ||
psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
err = ENOSPC;
(newval < ds->ds_dir->dd_phys->dd_reserved ||
newval < ds->ds_dir->dd_phys->dd_used_bytes + towrite)) {
error = ENOSPC;
}
mutex_exit(&dd->dd_lock);
return (err);
mutex_exit(&ds->ds_dir->dd_lock);
dsl_dataset_rele(ds, FTAG);
return (error);
}
extern dsl_syncfunc_t dsl_prop_set_sync;
static void
dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_quota_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t effective_value = psa->psa_effective_value;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
uint64_t newval;
dsl_prop_set_sync(ds, psa, tx);
DSL_PROP_CHECK_PREDICTION(dd, psa);
VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
dmu_buf_will_dirty(dd->dd_dbuf, tx);
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_QUOTA),
ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
&ddsqra->ddsqra_value, tx);
mutex_enter(&dd->dd_lock);
dd->dd_phys->dd_quota = effective_value;
mutex_exit(&dd->dd_lock);
VERIFY0(dsl_prop_get_int_ds(ds,
zfs_prop_to_name(ZFS_PROP_QUOTA), &newval));
dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
mutex_enter(&ds->ds_dir->dd_lock);
ds->ds_dir->dd_phys->dd_quota = newval;
mutex_exit(&ds->ds_dir->dd_lock);
dsl_dataset_rele(ds, FTAG);
}
int
dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
{
dsl_dir_t *dd;
dsl_dataset_t *ds;
dsl_prop_setarg_t psa;
int err;
dsl_dir_set_qr_arg_t ddsqra;
dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
ddsqra.ddsqra_name = ddname;
ddsqra.ddsqra_source = source;
ddsqra.ddsqra_value = quota;
err = dsl_dataset_hold(ddname, FTAG, &ds);
if (err)
return (err);
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (err) {
dsl_dataset_rele(ds, FTAG);
return (err);
}
ASSERT(ds->ds_dir == dd);
/*
* If someone removes a file, then tries to set the quota, we want to
* make sure the file freeing takes effect.
*/
txg_wait_open(dd->dd_pool, 0);
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
dsl_dir_set_quota_sync, ds, &psa, 0);
dsl_dir_close(dd, FTAG);
dsl_dataset_rele(ds, FTAG);
return (err);
return (dsl_sync_task(ddname, dsl_dir_set_quota_check,
dsl_dir_set_quota_sync, &ddsqra, 0));
}
int
dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_reservation_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t effective_value;
uint64_t used, avail;
int err;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
dsl_dir_t *dd;
uint64_t newval, used, avail;
int error;
if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
return (err);
effective_value = psa->psa_effective_value;
error = dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds);
if (error != 0)
return (error);
dd = ds->ds_dir;
/*
* If we are doing the preliminary check in open context, the
* space estimates may be inaccurate.
*/
if (!dmu_tx_is_syncing(tx))
if (!dmu_tx_is_syncing(tx)) {
dsl_dataset_rele(ds, FTAG);
return (0);
}
error = dsl_prop_predict(ds->ds_dir,
zfs_prop_to_name(ZFS_PROP_RESERVATION),
ddsqra->ddsqra_source, ddsqra->ddsqra_value, &newval);
if (error != 0) {
dsl_dataset_rele(ds, FTAG);
return (error);
}
mutex_enter(&dd->dd_lock);
used = dd->dd_phys->dd_used_bytes;
@ -1131,21 +1042,21 @@ dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
}
if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
uint64_t delta = MAX(used, effective_value) -
if (MAX(used, newval) > MAX(used, dd->dd_phys->dd_reserved)) {
uint64_t delta = MAX(used, newval) -
MAX(used, dd->dd_phys->dd_reserved);
if (delta > avail)
return (ENOSPC);
if (dd->dd_phys->dd_quota > 0 &&
effective_value > dd->dd_phys->dd_quota)
return (ENOSPC);
if (delta > avail ||
(dd->dd_phys->dd_quota > 0 &&
newval > dd->dd_phys->dd_quota))
error = ENOSPC;
}
return (0);
dsl_dataset_rele(ds, FTAG);
return (error);
}
static void
void
dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
{
uint64_t used;
@ -1167,51 +1078,38 @@ dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value, dmu_tx_t *tx)
}
static void
dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_set_reservation_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_setarg_t *psa = arg2;
uint64_t value = psa->psa_effective_value;
dsl_dir_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
uint64_t newval;
dsl_prop_set_sync(ds, psa, tx);
DSL_PROP_CHECK_PREDICTION(dd, psa);
VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
dsl_dir_set_reservation_sync_impl(dd, value, tx);
dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_RESERVATION),
ddsqra->ddsqra_source, sizeof (ddsqra->ddsqra_value), 1,
&ddsqra->ddsqra_value, tx);
spa_history_log_internal_dd(dd, "set reservation", tx,
"reservation=%lld", (longlong_t)value);
VERIFY0(dsl_prop_get_int_ds(ds,
zfs_prop_to_name(ZFS_PROP_RESERVATION), &newval));
dsl_dir_set_reservation_sync_impl(ds->ds_dir, newval, tx);
dsl_dataset_rele(ds, FTAG);
}
int
dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation)
{
dsl_dir_t *dd;
dsl_dataset_t *ds;
dsl_prop_setarg_t psa;
int err;
dsl_dir_set_qr_arg_t ddsqra;
dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
ddsqra.ddsqra_name = ddname;
ddsqra.ddsqra_source = source;
ddsqra.ddsqra_value = reservation;
err = dsl_dataset_hold(ddname, FTAG, &ds);
if (err)
return (err);
err = dsl_dir_open(ddname, FTAG, &dd, NULL);
if (err) {
dsl_dataset_rele(ds, FTAG);
return (err);
}
ASSERT(ds->ds_dir == dd);
err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
dsl_dir_set_reservation_sync, ds, &psa, 0);
dsl_dir_close(dd, FTAG);
dsl_dataset_rele(ds, FTAG);
return (err);
return (dsl_sync_task(ddname, dsl_dir_set_reservation_check,
dsl_dir_set_reservation_sync, &ddsqra, 0));
}
static dsl_dir_t *
@ -1243,84 +1141,125 @@ would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
return (would_change(dd->dd_parent, delta, ancestor));
}
struct renamearg {
dsl_dir_t *newparent;
const char *mynewname;
boolean_t allowmounted;
};
typedef struct dsl_dir_rename_arg {
const char *ddra_oldname;
const char *ddra_newname;
} dsl_dir_rename_arg_t;
/* ARGSUSED */
static int
dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
int *deltap = arg;
char namebuf[MAXNAMELEN];
dsl_dataset_name(ds, namebuf);
if (strlen(namebuf) + *deltap >= MAXNAMELEN)
return (ENAMETOOLONG);
return (0);
}
static int
dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_rename_check(void *arg, dmu_tx_t *tx)
{
dsl_dir_t *dd = arg1;
struct renamearg *ra = arg2;
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err;
uint64_t val;
dsl_dir_rename_arg_t *ddra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd, *newparent;
const char *mynewname;
int error;
int delta = strlen(ddra->ddra_newname) - strlen(ddra->ddra_oldname);
/*
* There should only be one reference, from dmu_objset_rename().
* Fleeting holds are also possible (eg, from "zfs list" getting
* stats), but any that are present in open context will likely
* be gone by syncing context, so only fail from syncing
* context.
* Don't check if we allow renaming of busy (mounted) dataset.
*/
if (!ra->allowmounted && dmu_tx_is_syncing(tx) &&
dmu_buf_refcount(dd->dd_dbuf) > 1) {
return (EBUSY);
/* target dir should exist */
error = dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL);
if (error != 0)
return (error);
/* new parent should exist */
error = dsl_dir_hold(dp, ddra->ddra_newname, FTAG,
&newparent, &mynewname);
if (error != 0) {
dsl_dir_rele(dd, FTAG);
return (error);
}
/* check for existing name */
err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
ra->mynewname, 8, 1, &val);
if (err == 0)
return (EEXIST);
if (err != ENOENT)
return (err);
/* can't rename to different pool */
if (dd->dd_pool != newparent->dd_pool) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (ENXIO);
}
if (ra->newparent != dd->dd_parent) {
/* new name should not already exist */
if (mynewname == NULL) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (EEXIST);
}
/* if the name length is growing, validate child name lengths */
if (delta > 0) {
error = dmu_objset_find_dp(dp, dd->dd_object, dsl_valid_rename,
&delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
if (error != 0) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (error);
}
}
if (newparent != dd->dd_parent) {
/* is there enough space? */
uint64_t myspace =
MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
/* no rename into our descendant */
if (closest_common_ancestor(dd, ra->newparent) == dd)
if (closest_common_ancestor(dd, newparent) == dd) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (EINVAL);
}
if (err = dsl_dir_transfer_possible(dd->dd_parent,
ra->newparent, myspace))
return (err);
error = dsl_dir_transfer_possible(dd->dd_parent,
newparent, myspace);
if (error != 0) {
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (error);
}
}
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
return (0);
}
static void
dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
{
char oldname[MAXPATHLEN], newname[MAXPATHLEN];
dsl_dir_t *dd = arg1;
struct renamearg *ra = arg2;
dsl_pool_t *dp = dd->dd_pool;
dsl_dir_rename_arg_t *ddra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd, *newparent;
const char *mynewname;
int error;
objset_t *mos = dp->dp_meta_objset;
int err;
char namebuf[MAXNAMELEN];
ASSERT(ra->allowmounted || dmu_buf_refcount(dd->dd_dbuf) <= 2);
ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
VERIFY0(dsl_dir_hold(dp, ddra->ddra_oldname, FTAG, &dd, NULL));
VERIFY0(dsl_dir_hold(dp, ddra->ddra_newname, FTAG, &newparent,
&mynewname));
/* Log this before we change the name. */
dsl_dir_name(ra->newparent, namebuf);
spa_history_log_internal_dd(dd, "rename", tx,
"-> %s/%s", namebuf, ra->mynewname);
"-> %s", ddra->ddra_newname);
if (ra->newparent != dd->dd_parent) {
if (newparent != dd->dd_parent) {
dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
-dd->dd_phys->dd_used_bytes,
-dd->dd_phys->dd_compressed_bytes,
-dd->dd_phys->dd_uncompressed_bytes, tx);
dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
dsl_dir_diduse_space(newparent, DD_USED_CHILD,
dd->dd_phys->dd_used_bytes,
dd->dd_phys->dd_compressed_bytes,
dd->dd_phys->dd_uncompressed_bytes, tx);
@ -1331,7 +1270,7 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
-unused_rsrv, 0, 0, tx);
dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
dsl_dir_diduse_space(newparent, DD_USED_CHILD_RSRV,
unused_rsrv, 0, 0, tx);
}
}
@ -1339,60 +1278,43 @@ dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dmu_buf_will_dirty(dd->dd_dbuf, tx);
/* remove from old parent zapobj */
dsl_dir_name(dd, oldname);
err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
error = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, tx);
ASSERT0(err);
ASSERT0(error);
(void) strcpy(dd->dd_myname, ra->mynewname);
dsl_dir_close(dd->dd_parent, dd);
dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
(void) strcpy(dd->dd_myname, mynewname);
dsl_dir_rele(dd->dd_parent, dd);
dd->dd_phys->dd_parent_obj = newparent->dd_object;
VERIFY0(dsl_dir_hold_obj(dp,
newparent->dd_object, NULL, dd, &dd->dd_parent));
/* add to new parent zapobj */
err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx);
ASSERT0(err);
dsl_dir_name(dd, newname);
VERIFY0(zap_add(mos, newparent->dd_phys->dd_child_dir_zapobj,
dd->dd_myname, 8, 1, &dd->dd_object, tx));
#ifdef __FreeBSD__
#ifdef _KERNEL
zfsvfs_update_fromname(oldname, newname);
zvol_rename_minors(oldname, newname);
zfsvfs_update_fromname(ddra->ddra_oldname, ddra->ddra_newname);
zvol_rename_minors(ddra->ddra_oldname, ddra->ddra_newname);
#endif
#endif
dsl_prop_notify_all(dd);
dsl_dir_rele(newparent, FTAG);
dsl_dir_rele(dd, FTAG);
}
int
dsl_dir_rename(dsl_dir_t *dd, const char *newname, int flags)
dsl_dir_rename(const char *oldname, const char *newname)
{
struct renamearg ra;
int err;
dsl_dir_rename_arg_t ddra;
/* new parent should exist */
err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
if (err)
return (err);
ddra.ddra_oldname = oldname;
ddra.ddra_newname = newname;
/* can't rename to different pool */
if (dd->dd_pool != ra.newparent->dd_pool) {
err = ENXIO;
goto out;
}
/* new name should not already exist */
if (ra.mynewname == NULL) {
err = EEXIST;
goto out;
}
ra.allowmounted = !!(flags & ZFS_RENAME_ALLOW_MOUNTED);
err = dsl_sync_task_do(dd->dd_pool,
dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
out:
dsl_dir_close(ra.newparent, FTAG);
return (err);
return (dsl_sync_task(oldname,
dsl_dir_rename_check, dsl_dir_rename_sync, &ddra, 3));
}
int

View File

@ -43,6 +43,7 @@
#include <sys/bptree.h>
#include <sys/zfeature.h>
#include <sys/zil_impl.h>
#include <sys/dsl_userhold.h>
int zfs_no_write_throttle = 0;
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
@ -94,7 +95,7 @@ dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
if (err)
return (err);
return (dsl_dir_open_obj(dp, obj, name, dp, ddp));
return (dsl_dir_hold_obj(dp, obj, name, dp, ddp));
}
static dsl_pool_t *
@ -106,7 +107,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP);
dp->dp_spa = spa;
dp->dp_meta_rootbp = *bp;
rw_init(&dp->dp_config_rwlock, NULL, RW_DEFAULT, NULL);
rrw_init(&dp->dp_config_rwlock, B_TRUE);
dp->dp_write_limit = zfs_write_limit_min;
txg_init(dp, txg);
@ -117,7 +118,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
txg_list_create(&dp->dp_dirty_dirs,
offsetof(dsl_dir_t, dd_dirty_link));
txg_list_create(&dp->dp_sync_tasks,
offsetof(dsl_sync_task_group_t, dstg_node));
offsetof(dsl_sync_task_t, dst_node));
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
@ -151,14 +152,14 @@ dsl_pool_open(dsl_pool_t *dp)
dsl_dataset_t *ds;
uint64_t obj;
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
&dp->dp_root_dir_obj);
if (err)
goto out;
err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
err = dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
NULL, dp, &dp->dp_root_dir);
if (err)
goto out;
@ -179,7 +180,7 @@ dsl_pool_open(dsl_pool_t *dp)
&dp->dp_origin_snap);
dsl_dataset_rele(ds, FTAG);
}
dsl_dir_close(dd, dp);
dsl_dir_rele(dd, dp);
if (err)
goto out;
}
@ -194,7 +195,7 @@ dsl_pool_open(dsl_pool_t *dp)
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj);
if (err)
goto out;
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
VERIFY0(bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
@ -227,7 +228,7 @@ dsl_pool_open(dsl_pool_t *dp)
err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
out:
rw_exit(&dp->dp_config_rwlock);
rrw_exit(&dp->dp_config_rwlock, FTAG);
return (err);
}
@ -242,13 +243,13 @@ dsl_pool_close(dsl_pool_t *dp)
* and not a hold, so just drop that here.
*/
if (dp->dp_origin_snap)
dsl_dataset_drop_ref(dp->dp_origin_snap, dp);
dsl_dataset_rele(dp->dp_origin_snap, dp);
if (dp->dp_mos_dir)
dsl_dir_close(dp->dp_mos_dir, dp);
dsl_dir_rele(dp->dp_mos_dir, dp);
if (dp->dp_free_dir)
dsl_dir_close(dp->dp_free_dir, dp);
dsl_dir_rele(dp->dp_free_dir, dp);
if (dp->dp_root_dir)
dsl_dir_close(dp->dp_root_dir, dp);
dsl_dir_rele(dp->dp_root_dir, dp);
bpobj_close(&dp->dp_free_bpobj);
@ -264,7 +265,7 @@ dsl_pool_close(dsl_pool_t *dp)
arc_flush(dp->dp_spa);
txg_fini(dp);
dsl_scan_fini(dp);
rw_destroy(&dp->dp_config_rwlock);
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
taskq_destroy(dp->dp_vnrele_taskq);
if (dp->dp_blkstats)
@ -282,6 +283,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
dsl_dataset_t *ds;
uint64_t obj;
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
/* create and open the MOS (meta-objset) */
dp->dp_meta_objset = dmu_objset_create_impl(spa,
NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx);
@ -292,30 +295,30 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
ASSERT0(err);
/* Initialize scan structures */
VERIFY3U(0, ==, dsl_scan_init(dp, txg));
VERIFY0(dsl_scan_init(dp, txg));
/* create and open the root dir */
dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx);
VERIFY(0 == dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj,
NULL, dp, &dp->dp_root_dir));
/* create and open the meta-objset dir */
(void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
VERIFY0(dsl_pool_open_special_dir(dp,
MOS_DIR_NAME, &dp->dp_mos_dir));
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
/* create and open the free dir */
(void) dsl_dir_create_sync(dp, dp->dp_root_dir,
FREE_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
VERIFY0(dsl_pool_open_special_dir(dp,
FREE_DIR_NAME, &dp->dp_free_dir));
/* create and open the free_bplist */
obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx);
VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
VERIFY0(bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
}
@ -326,7 +329,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx);
/* create the root objset */
VERIFY(0 == dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
os = dmu_objset_create_impl(dp->dp_spa, ds,
dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
#ifdef _KERNEL
@ -336,6 +339,8 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
dmu_tx_commit(tx);
rrw_exit(&dp->dp_config_rwlock, FTAG);
return (dp);
}
@ -358,10 +363,7 @@ static int
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
dsl_pool_t *dp = dmu_objset_pool(dl->dl_os);
rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_deadlist_insert(dl, bp, tx);
rw_exit(&dp->dp_config_rwlock);
return (0);
}
@ -383,7 +385,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
/*
* We need to copy dp_space_towrite() before doing
* dsl_sync_task_group_sync(), because
* dsl_sync_task_sync(), because
* dsl_dataset_snapshot_reserve_space() will increase
* dp_space_towrite but not actually write anything.
*/
@ -497,14 +499,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
*/
DTRACE_PROBE(pool_sync__3task);
if (!txg_list_empty(&dp->dp_sync_tasks, txg)) {
dsl_sync_task_group_t *dstg;
dsl_sync_task_t *dst;
/*
* No more sync tasks should have been added while we
* were syncing.
*/
ASSERT(spa_sync_pass(dp->dp_spa) == 1);
while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg))
dsl_sync_task_group_sync(dstg, tx);
while (dst = txg_list_remove(&dp->dp_sync_tasks, txg))
dsl_sync_task_sync(dst, tx);
}
dmu_tx_commit(tx);
@ -679,14 +681,13 @@ dsl_pool_willuse_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx)
/* ARGSUSED */
static int
upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds, *prev = NULL;
int err;
dsl_pool_t *dp = spa_get_dsl(spa);
err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
@ -712,7 +713,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
* The $ORIGIN can't have any data, or the accounting
* will be wrong.
*/
ASSERT(prev->ds_phys->ds_bp.blk_birth == 0);
ASSERT0(prev->ds_phys->ds_bp.blk_birth);
/* The origin doesn't get attached to itself */
if (ds->ds_object == prev->ds_object) {
@ -732,13 +733,13 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
if (ds->ds_phys->ds_next_snap_obj == 0) {
ASSERT(ds->ds_prev == NULL);
VERIFY(0 == dsl_dataset_hold_obj(dp,
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
}
}
ASSERT(ds->ds_dir->dd_phys->dd_origin_obj == prev->ds_object);
ASSERT(ds->ds_phys->ds_prev_snap_obj == prev->ds_object);
ASSERT3U(ds->ds_dir->dd_phys->dd_origin_obj, ==, prev->ds_object);
ASSERT3U(ds->ds_phys->ds_prev_snap_obj, ==, prev->ds_object);
if (prev->ds_phys->ds_next_clones_obj == 0) {
dmu_buf_will_dirty(prev->ds_dbuf, tx);
@ -746,7 +747,7 @@ upgrade_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
zap_create(dp->dp_meta_objset,
DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
}
VERIFY(0 == zap_add_int(dp->dp_meta_objset,
VERIFY0(zap_add_int(dp->dp_meta_objset,
prev->ds_phys->ds_next_clones_obj, ds->ds_object, tx));
dsl_dataset_rele(ds, FTAG);
@ -761,25 +762,21 @@ dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dp->dp_origin_snap != NULL);
VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL, upgrade_clones_cb,
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj, upgrade_clones_cb,
tx, DS_FIND_CHILDREN));
}
/* ARGSUSED */
static int
upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
upgrade_dir_clones_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds;
dsl_pool_t *dp = spa_get_dsl(spa);
objset_t *mos = dp->dp_meta_objset;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
if (ds->ds_dir->dd_phys->dd_origin_obj) {
if (ds->ds_dir->dd_phys->dd_origin_obj != 0) {
dsl_dataset_t *origin;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
VERIFY0(dsl_dataset_hold_obj(dp,
ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &origin));
if (origin->ds_dir->dd_phys->dd_clones == 0) {
@ -788,13 +785,11 @@ upgrade_dir_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
}
VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
VERIFY0(zap_add_int(dp->dp_meta_objset,
origin->ds_dir->dd_phys->dd_clones, ds->ds_object, tx));
dsl_dataset_rele(origin, FTAG);
}
dsl_dataset_rele(ds, FTAG);
return (0);
}
@ -805,7 +800,7 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
uint64_t obj;
(void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx);
VERIFY(0 == dsl_pool_open_special_dir(dp,
VERIFY0(dsl_pool_open_special_dir(dp,
FREE_DIR_NAME, &dp->dp_free_dir));
/*
@ -815,12 +810,11 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
*/
obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
VERIFY3U(0, ==, zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
VERIFY3U(0, ==, bpobj_open(&dp->dp_free_bpobj,
dp->dp_meta_objset, obj));
VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj));
VERIFY3U(0, ==, dmu_objset_find_spa(dp->dp_spa, NULL,
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
upgrade_dir_clones_cb, tx, DS_FIND_CHILDREN));
}
@ -832,17 +826,16 @@ dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dp->dp_origin_snap == NULL);
ASSERT(rrw_held(&dp->dp_config_rwlock, RW_WRITER));
/* create the origin dir, ds, & snap-ds */
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
dsobj = dsl_dataset_create_sync(dp->dp_root_dir, ORIGIN_DIR_NAME,
NULL, 0, kcred, tx);
VERIFY(0 == dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
dsl_dataset_snapshot_sync(ds, ORIGIN_DIR_NAME, tx);
VERIFY(0 == dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
dsl_dataset_snapshot_sync_impl(ds, ORIGIN_DIR_NAME, tx);
VERIFY0(dsl_dataset_hold_obj(dp, ds->ds_phys->ds_prev_snap_obj,
dp, &dp->dp_origin_snap));
dsl_dataset_rele(ds, FTAG);
rw_exit(&dp->dp_config_rwlock);
}
taskq_t *
@ -877,7 +870,7 @@ dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp)
*htag = '\0';
++htag;
dsobj = strtonum(za.za_name, NULL);
(void) dsl_dataset_user_release_tmp(dp, dsobj, htag, B_FALSE);
dsl_dataset_user_release_tmp(dp, dsobj, htag);
}
zap_cursor_fini(&zc);
}
@ -899,7 +892,7 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
static int
dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t *now, dmu_tx_t *tx, boolean_t holding)
const char *tag, uint64_t now, dmu_tx_t *tx, boolean_t holding)
{
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj = dp->dp_tmp_userrefs_obj;
@ -924,7 +917,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
name = kmem_asprintf("%llx-%s", (u_longlong_t)dsobj, tag);
if (holding)
error = zap_add(mos, zapobj, name, 8, 1, now, tx);
error = zap_add(mos, zapobj, name, 8, 1, &now, tx);
else
error = zap_remove(mos, zapobj, name, tx);
strfree(name);
@ -937,7 +930,7 @@ dsl_pool_user_hold_rele_impl(dsl_pool_t *dp, uint64_t dsobj,
*/
int
dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
uint64_t *now, dmu_tx_t *tx)
uint64_t now, dmu_tx_t *tx)
{
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, now, tx, B_TRUE));
}
@ -949,6 +942,109 @@ int
dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj, const char *tag,
dmu_tx_t *tx)
{
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, NULL,
return (dsl_pool_user_hold_rele_impl(dp, dsobj, tag, 0,
tx, B_FALSE));
}
/*
* DSL Pool Configuration Lock
*
* The dp_config_rwlock protects against changes to DSL state (e.g. dataset
* creation / destruction / rename / property setting). It must be held for
* read to hold a dataset or dsl_dir. I.e. you must call
* dsl_pool_config_enter() or dsl_pool_hold() before calling
* dsl_{dataset,dir}_hold{_obj}. In most circumstances, the dp_config_rwlock
* must be held continuously until all datasets and dsl_dirs are released.
*
* The only exception to this rule is that if a "long hold" is placed on
* a dataset, then the dp_config_rwlock may be dropped while the dataset
* is still held. The long hold will prevent the dataset from being
* destroyed -- the destroy will fail with EBUSY. A long hold can be
* obtained by calling dsl_dataset_long_hold(), or by "owning" a dataset
* (by calling dsl_{dataset,objset}_{try}own{_obj}).
*
* Legitimate long-holders (including owners) should be long-running, cancelable
* tasks that should cause "zfs destroy" to fail. This includes DMU
* consumers (i.e. a ZPL filesystem being mounted or ZVOL being open),
* "zfs send", and "zfs diff". There are several other long-holders whose
* uses are suboptimal (e.g. "zfs promote", and zil_suspend()).
*
* The usual formula for long-holding would be:
* dsl_pool_hold()
* dsl_dataset_hold()
* ... perform checks ...
* dsl_dataset_long_hold()
* dsl_pool_rele()
* ... perform long-running task ...
* dsl_dataset_long_rele()
* dsl_dataset_rele()
*
* Note that when the long hold is released, the dataset is still held but
* the pool is not held. The dataset may change arbitrarily during this time
* (e.g. it could be destroyed). Therefore you shouldn't do anything to the
* dataset except release it.
*
* User-initiated operations (e.g. ioctls, zfs_ioc_*()) are either read-only
* or modifying operations.
*
* Modifying operations should generally use dsl_sync_task(). The synctask
* infrastructure enforces proper locking strategy with respect to the
* dp_config_rwlock. See the comment above dsl_sync_task() for details.
*
* Read-only operations will manually hold the pool, then the dataset, obtain
* information from the dataset, then release the pool and dataset.
* dmu_objset_{hold,rele}() are convenience routines that also do the pool
* hold/rele.
*/
int
dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp)
{
spa_t *spa;
int error;
error = spa_open(name, &spa, tag);
if (error == 0) {
*dp = spa_get_dsl(spa);
dsl_pool_config_enter(*dp, tag);
}
return (error);
}
void
dsl_pool_rele(dsl_pool_t *dp, void *tag)
{
dsl_pool_config_exit(dp, tag);
spa_close(dp->dp_spa, tag);
}
void
dsl_pool_config_enter(dsl_pool_t *dp, void *tag)
{
/*
* We use a "reentrant" reader-writer lock, but not reentrantly.
*
* The rrwlock can (with the track_all flag) track all reading threads,
* which is very useful for debugging which code path failed to release
* the lock, and for verifying that the *current* thread does hold
* the lock.
*
* (Unlike a rwlock, which knows that N threads hold it for
* read, but not *which* threads, so rw_held(RW_READER) returns TRUE
* if any thread holds it for read, even if this thread doesn't).
*/
ASSERT(!rrw_held(&dp->dp_config_rwlock, RW_READER));
rrw_enter(&dp->dp_config_rwlock, RW_READER, tag);
}
void
dsl_pool_config_exit(dsl_pool_t *dp, void *tag)
{
rrw_exit(&dp->dp_config_rwlock, tag);
}
boolean_t
dsl_pool_config_held(dsl_pool_t *dp)
{
return (RRW_LOCK_HELD(&dp->dp_config_rwlock));
}

View File

@ -82,7 +82,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
char *inheritstr;
char *recvdstr;
ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
ASSERT(dsl_pool_config_held(dd->dd_pool));
if (setpoint)
setpoint[0] = '\0';
@ -97,8 +97,6 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
* after this loop.
*/
for (; dd != NULL; dd = dd->dd_parent) {
ASSERT(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
if (dd != target || snapshot) {
if (!inheritable)
break;
@ -167,7 +165,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
boolean_t snapshot;
uint64_t zapobj;
ASSERT(RW_LOCK_HELD(&ds->ds_dir->dd_pool->dp_config_rwlock));
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
snapshot = (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds));
zapobj = (ds->ds_phys == NULL ? 0 : ds->ds_phys->ds_props_obj);
@ -235,18 +233,12 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname,
uint64_t value;
dsl_prop_cb_record_t *cbr;
int err;
int need_rwlock;
need_rwlock = !RW_WRITE_HELD(&dp->dp_config_rwlock);
if (need_rwlock)
rw_enter(&dp->dp_config_rwlock, RW_READER);
ASSERT(dsl_pool_config_held(dp));
err = dsl_prop_get_ds(ds, propname, 8, 1, &value, NULL);
if (err != 0) {
if (need_rwlock)
rw_exit(&dp->dp_config_rwlock);
err = dsl_prop_get_int_ds(ds, propname, &value);
if (err != 0)
return (err);
}
cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP);
cbr->cbr_ds = ds;
@ -259,9 +251,6 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname,
mutex_exit(&dd->dd_lock);
cbr->cbr_func(cbr->cbr_arg, value);
if (need_rwlock)
rw_exit(&dp->dp_config_rwlock);
return (0);
}
@ -269,19 +258,18 @@ int
dsl_prop_get(const char *dsname, const char *propname,
int intsz, int numints, void *buf, char *setpoint)
{
dsl_dataset_t *ds;
int err;
objset_t *os;
int error;
err = dsl_dataset_hold(dsname, FTAG, &ds);
if (err)
return (err);
error = dmu_objset_hold(dsname, FTAG, &os);
if (error != 0)
return (error);
rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
err = dsl_prop_get_ds(ds, propname, intsz, numints, buf, setpoint);
rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
error = dsl_prop_get_ds(dmu_objset_ds(os), propname,
intsz, numints, buf, setpoint);
dsl_dataset_rele(ds, FTAG);
return (err);
dmu_objset_rele(os, FTAG);
return (error);
}
/*
@ -299,17 +287,11 @@ dsl_prop_get_integer(const char *ddname, const char *propname,
return (dsl_prop_get(ddname, propname, 8, 1, valuep, setpoint));
}
void
dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
zprop_source_t source, uint64_t *value)
int
dsl_prop_get_int_ds(dsl_dataset_t *ds, const char *propname,
uint64_t *valuep)
{
psa->psa_name = propname;
psa->psa_source = source;
psa->psa_intsz = 8;
psa->psa_numints = 1;
psa->psa_value = value;
psa->psa_effective_value = -1ULL;
return (dsl_prop_get_ds(ds, propname, 8, 1, valuep, NULL));
}
/*
@ -323,11 +305,10 @@ dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
* a property not handled by this function.
*/
int
dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
dsl_prop_predict(dsl_dir_t *dd, const char *propname,
zprop_source_t source, uint64_t value, uint64_t *newvalp)
{
const char *propname = psa->psa_name;
zfs_prop_t prop = zfs_name_to_prop(propname);
zprop_source_t source = psa->psa_source;
objset_t *mos;
uint64_t zapobj;
uint64_t version;
@ -359,36 +340,33 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
switch (source) {
case ZPROP_SRC_NONE:
/* Revert to the received value, if any. */
err = zap_lookup(mos, zapobj, recvdstr, 8, 1,
&psa->psa_effective_value);
err = zap_lookup(mos, zapobj, recvdstr, 8, 1, newvalp);
if (err == ENOENT)
psa->psa_effective_value = 0;
*newvalp = 0;
break;
case ZPROP_SRC_LOCAL:
psa->psa_effective_value = *(uint64_t *)psa->psa_value;
*newvalp = value;
break;
case ZPROP_SRC_RECEIVED:
/*
* If there's no local setting, then the new received value will
* be the effective value.
*/
err = zap_lookup(mos, zapobj, propname, 8, 1,
&psa->psa_effective_value);
err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
if (err == ENOENT)
psa->psa_effective_value = *(uint64_t *)psa->psa_value;
*newvalp = value;
break;
case (ZPROP_SRC_NONE | ZPROP_SRC_RECEIVED):
/*
* We're clearing the received value, so the local setting (if
* it exists) remains the effective value.
*/
err = zap_lookup(mos, zapobj, propname, 8, 1,
&psa->psa_effective_value);
err = zap_lookup(mos, zapobj, propname, 8, 1, newvalp);
if (err == ENOENT)
psa->psa_effective_value = 0;
*newvalp = 0;
break;
default:
cmn_err(CE_PANIC, "unexpected property source: %d", source);
panic("unexpected property source: %d", source);
}
strfree(recvdstr);
@ -399,37 +377,6 @@ dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
return (err);
}
#ifdef ZFS_DEBUG
void
dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa)
{
zfs_prop_t prop = zfs_name_to_prop(psa->psa_name);
uint64_t intval;
char setpoint[MAXNAMELEN];
uint64_t version = spa_version(dd->dd_pool->dp_spa);
int err;
if (version < SPA_VERSION_RECVD_PROPS) {
switch (prop) {
case ZFS_PROP_QUOTA:
case ZFS_PROP_RESERVATION:
return;
}
}
err = dsl_prop_get_dd(dd, psa->psa_name, 8, 1, &intval,
setpoint, B_FALSE);
if (err == 0 && intval != psa->psa_effective_value) {
cmn_err(CE_PANIC, "%s property, source: %x, "
"predicted effective value: %llu, "
"actual effective value: %llu (setpoint: %s)",
psa->psa_name, psa->psa_source,
(unsigned long long)psa->psa_effective_value,
(unsigned long long)intval, setpoint);
}
}
#endif
/*
* Unregister this callback. Return 0 on success, ENOENT if ddname is
* invalid, ENOMSG if no matching callback registered.
@ -464,25 +411,57 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
return (0);
}
/*
* Return the number of callbacks that are registered for this dataset.
*/
int
dsl_prop_numcb(dsl_dataset_t *ds)
boolean_t
dsl_prop_hascb(dsl_dataset_t *ds)
{
dsl_dir_t *dd = ds->ds_dir;
boolean_t rv = B_FALSE;
dsl_prop_cb_record_t *cbr;
mutex_enter(&dd->dd_lock);
for (cbr = list_head(&dd->dd_prop_cbs); cbr;
cbr = list_next(&dd->dd_prop_cbs, cbr)) {
if (cbr->cbr_ds == ds) {
rv = B_TRUE;
break;
}
}
mutex_exit(&dd->dd_lock);
return (rv);
}
/* ARGSUSED */
static int
dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
dsl_dir_t *dd = ds->ds_dir;
dsl_prop_cb_record_t *cbr;
int num = 0;
mutex_enter(&dd->dd_lock);
for (cbr = list_head(&dd->dd_prop_cbs);
cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
if (cbr->cbr_ds == ds)
num++;
for (cbr = list_head(&dd->dd_prop_cbs); cbr;
cbr = list_next(&dd->dd_prop_cbs, cbr)) {
uint64_t value;
if (dsl_prop_get_ds(cbr->cbr_ds, cbr->cbr_propname,
sizeof (value), 1, &value, NULL) == 0)
cbr->cbr_func(cbr->cbr_arg, value);
}
mutex_exit(&dd->dd_lock);
return (num);
return (0);
}
/*
* Update all property values for ddobj & its descendants. This is used
* when renaming the dir.
*/
void
dsl_prop_notify_all(dsl_dir_t *dd)
{
dsl_pool_t *dp = dd->dd_pool;
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
(void) dmu_objset_find_dp(dp, dd->dd_object, dsl_prop_notify_all_cb,
NULL, DS_FIND_CHILDREN);
}
static void
@ -496,8 +475,8 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
zap_attribute_t *za;
int err;
ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
err = dsl_dir_open_obj(dp, ddobj, NULL, FTAG, &dd);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
if (err)
return;
@ -508,7 +487,7 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
*/
err = zap_contains(mos, dd->dd_phys->dd_props_zapobj, propname);
if (err == 0) {
dsl_dir_close(dd, FTAG);
dsl_dir_rele(dd, FTAG);
return;
}
ASSERT3U(err, ==, ENOENT);
@ -543,26 +522,24 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
}
kmem_free(za, sizeof (zap_attribute_t));
zap_cursor_fini(&zc);
dsl_dir_close(dd, FTAG);
dsl_dir_rele(dd, FTAG);
}
void
dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
zprop_source_t source, int intsz, int numints, const void *value,
dmu_tx_t *tx)
{
dsl_dataset_t *ds = arg1;
dsl_prop_setarg_t *psa = arg2;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t zapobj, intval, dummy;
int isint;
char valbuf[32];
char *valstr = NULL;
const char *valstr = NULL;
char *inheritstr;
char *recvdstr;
char *tbuf = NULL;
int err;
uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
const char *propname = psa->psa_name;
zprop_source_t source = psa->psa_source;
isint = (dodefault(propname, 8, 1, &intval) == 0);
@ -612,8 +589,8 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
*/
err = zap_remove(mos, zapobj, inheritstr, tx);
ASSERT(err == 0 || err == ENOENT);
VERIFY(0 == zap_update(mos, zapobj, propname,
psa->psa_intsz, psa->psa_numints, psa->psa_value, tx));
VERIFY0(zap_update(mos, zapobj, propname,
intsz, numints, value, tx));
break;
case ZPROP_SRC_INHERITED:
/*
@ -624,12 +601,10 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
err = zap_remove(mos, zapobj, propname, tx);
ASSERT(err == 0 || err == ENOENT);
if (version >= SPA_VERSION_RECVD_PROPS &&
dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy,
NULL) == 0) {
dsl_prop_get_int_ds(ds, ZPROP_HAS_RECVD, &dummy) == 0) {
dummy = 0;
err = zap_update(mos, zapobj, inheritstr,
8, 1, &dummy, tx);
ASSERT(err == 0);
VERIFY0(zap_update(mos, zapobj, inheritstr,
8, 1, &dummy, tx));
}
break;
case ZPROP_SRC_RECEIVED:
@ -637,7 +612,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
* set propname$recvd -> value
*/
err = zap_update(mos, zapobj, recvdstr,
psa->psa_intsz, psa->psa_numints, psa->psa_value, tx);
intsz, numints, value, tx);
ASSERT(err == 0);
break;
case (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED):
@ -667,7 +642,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
strfree(recvdstr);
if (isint) {
VERIFY(0 == dsl_prop_get_ds(ds, propname, 8, 1, &intval, NULL));
VERIFY0(dsl_prop_get_int_ds(ds, propname, &intval));
if (ds->ds_phys != NULL && dsl_dataset_is_snapshot(ds)) {
dsl_prop_cb_record_t *cbr;
@ -694,7 +669,7 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
valstr = valbuf;
} else {
if (source == ZPROP_SRC_LOCAL) {
valstr = (char *)psa->psa_value;
valstr = value;
} else {
tbuf = kmem_alloc(ZAP_MAXVALUELEN, KM_SLEEP);
if (dsl_prop_get_ds(ds, propname, 1,
@ -711,118 +686,73 @@ dsl_prop_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
kmem_free(tbuf, ZAP_MAXVALUELEN);
}
void
dsl_props_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
int
dsl_prop_set_int(const char *dsname, const char *propname,
zprop_source_t source, uint64_t value)
{
dsl_dataset_t *ds = arg1;
dsl_props_arg_t *pa = arg2;
nvlist_t *props = pa->pa_props;
dsl_prop_setarg_t psa;
nvpair_t *elem = NULL;
nvlist_t *nvl = fnvlist_alloc();
int error;
psa.psa_source = pa->pa_source;
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
nvpair_t *pair = elem;
psa.psa_name = nvpair_name(pair);
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
/*
* dsl_prop_get_all_impl() returns properties in this
* format.
*/
nvlist_t *attrs;
VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
&pair) == 0);
}
if (nvpair_type(pair) == DATA_TYPE_STRING) {
VERIFY(nvpair_value_string(pair,
(char **)&psa.psa_value) == 0);
psa.psa_intsz = 1;
psa.psa_numints = strlen(psa.psa_value) + 1;
} else {
uint64_t intval;
VERIFY(nvpair_value_uint64(pair, &intval) == 0);
psa.psa_intsz = sizeof (intval);
psa.psa_numints = 1;
psa.psa_value = &intval;
}
dsl_prop_set_sync(ds, &psa, tx);
}
fnvlist_add_uint64(nvl, propname, value);
error = dsl_props_set(dsname, source, nvl);
fnvlist_free(nvl);
return (error);
}
int
dsl_prop_set(const char *dsname, const char *propname, zprop_source_t source,
int intsz, int numints, const void *buf)
dsl_prop_set_string(const char *dsname, const char *propname,
zprop_source_t source, const char *value)
{
dsl_dataset_t *ds;
uint64_t version;
int err;
dsl_prop_setarg_t psa;
nvlist_t *nvl = fnvlist_alloc();
int error;
/*
* We must do these checks before we get to the syncfunc, since
* it can't fail.
*/
if (strlen(propname) >= ZAP_MAXNAMELEN)
return (ENAMETOOLONG);
err = dsl_dataset_hold(dsname, FTAG, &ds);
if (err)
return (err);
version = spa_version(ds->ds_dir->dd_pool->dp_spa);
if (intsz * numints >= (version < SPA_VERSION_STMF_PROP ?
ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
dsl_dataset_rele(ds, FTAG);
return (E2BIG);
}
if (dsl_dataset_is_snapshot(ds) &&
version < SPA_VERSION_SNAP_PROPS) {
dsl_dataset_rele(ds, FTAG);
return (ENOTSUP);
}
psa.psa_name = propname;
psa.psa_source = source;
psa.psa_intsz = intsz;
psa.psa_numints = numints;
psa.psa_value = buf;
psa.psa_effective_value = -1ULL;
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
NULL, dsl_prop_set_sync, ds, &psa, 2);
dsl_dataset_rele(ds, FTAG);
return (err);
fnvlist_add_string(nvl, propname, value);
error = dsl_props_set(dsname, source, nvl);
fnvlist_free(nvl);
return (error);
}
int
dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
dsl_prop_inherit(const char *dsname, const char *propname,
zprop_source_t source)
{
nvlist_t *nvl = fnvlist_alloc();
int error;
fnvlist_add_boolean(nvl, propname);
error = dsl_props_set(dsname, source, nvl);
fnvlist_free(nvl);
return (error);
}
typedef struct dsl_props_set_arg {
const char *dpsa_dsname;
zprop_source_t dpsa_source;
nvlist_t *dpsa_props;
} dsl_props_set_arg_t;
static int
dsl_props_set_check(void *arg, dmu_tx_t *tx)
{
dsl_props_set_arg_t *dpsa = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
uint64_t version;
nvpair_t *elem = NULL;
dsl_props_arg_t pa;
int err;
if (err = dsl_dataset_hold(dsname, FTAG, &ds))
err = dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds);
if (err != 0)
return (err);
/*
* Do these checks before the syncfunc, since it can't fail.
*/
version = spa_version(ds->ds_dir->dd_pool->dp_spa);
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
while ((elem = nvlist_next_nvpair(dpsa->dpsa_props, elem)) != NULL) {
if (strlen(nvpair_name(elem)) >= ZAP_MAXNAMELEN) {
dsl_dataset_rele(ds, FTAG);
return (ENAMETOOLONG);
}
if (nvpair_type(elem) == DATA_TYPE_STRING) {
char *valstr;
VERIFY(nvpair_value_string(elem, &valstr) == 0);
char *valstr = fnvpair_value_string(elem);
if (strlen(valstr) >= (version <
SPA_VERSION_STMF_PROP ?
ZAP_OLDMAXVALUELEN : ZAP_MAXVALUELEN)) {
@ -832,20 +762,83 @@ dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
}
}
if (dsl_dataset_is_snapshot(ds) &&
version < SPA_VERSION_SNAP_PROPS) {
if (dsl_dataset_is_snapshot(ds) && version < SPA_VERSION_SNAP_PROPS) {
dsl_dataset_rele(ds, FTAG);
return (ENOTSUP);
}
pa.pa_props = props;
pa.pa_source = source;
err = dsl_sync_task_do(ds->ds_dir->dd_pool,
NULL, dsl_props_set_sync, ds, &pa, 2);
dsl_dataset_rele(ds, FTAG);
return (err);
return (0);
}
void
dsl_props_set_sync_impl(dsl_dataset_t *ds, zprop_source_t source,
nvlist_t *props, dmu_tx_t *tx)
{
nvpair_t *elem = NULL;
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
nvpair_t *pair = elem;
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
/*
* dsl_prop_get_all_impl() returns properties in this
* format.
*/
nvlist_t *attrs = fnvpair_value_nvlist(pair);
pair = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
}
if (nvpair_type(pair) == DATA_TYPE_STRING) {
const char *value = fnvpair_value_string(pair);
dsl_prop_set_sync_impl(ds, nvpair_name(pair),
source, 1, strlen(value) + 1, value, tx);
} else if (nvpair_type(pair) == DATA_TYPE_UINT64) {
uint64_t intval = fnvpair_value_uint64(pair);
dsl_prop_set_sync_impl(ds, nvpair_name(pair),
source, sizeof (intval), 1, &intval, tx);
} else if (nvpair_type(pair) == DATA_TYPE_BOOLEAN) {
dsl_prop_set_sync_impl(ds, nvpair_name(pair),
source, 0, 0, NULL, tx);
} else {
panic("invalid nvpair type");
}
}
}
static void
dsl_props_set_sync(void *arg, dmu_tx_t *tx)
{
dsl_props_set_arg_t *dpsa = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, dpsa->dpsa_dsname, FTAG, &ds));
dsl_props_set_sync_impl(ds, dpsa->dpsa_source, dpsa->dpsa_props, tx);
dsl_dataset_rele(ds, FTAG);
}
/*
* All-or-nothing; if any prop can't be set, nothing will be modified.
*/
int
dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *props)
{
dsl_props_set_arg_t dpsa;
int nblks = 0;
dpsa.dpsa_dsname = dsname;
dpsa.dpsa_source = source;
dpsa.dpsa_props = props;
/*
* If the source includes NONE, then we will only be removing entries
* from the ZAP object. In that case don't check for ENOSPC.
*/
if ((source & ZPROP_SRC_NONE) == 0)
nblks = 2 * fnvlist_num_pairs(props);
return (dsl_sync_task(dsname, dsl_props_set_check, dsl_props_set_sync,
&dpsa, nblks));
}
typedef enum dsl_prop_getflags {
@ -995,7 +988,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
if (dsl_dataset_is_snapshot(ds))
flags |= DSL_PROP_GET_SNAPSHOT;
rw_enter(&dp->dp_config_rwlock, RW_READER);
ASSERT(dsl_pool_config_held(dp));
if (ds->ds_phys->ds_props_obj != 0) {
ASSERT(flags & DSL_PROP_GET_SNAPSHOT);
@ -1020,58 +1013,51 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
break;
}
out:
rw_exit(&dp->dp_config_rwlock);
return (err);
}
boolean_t
dsl_prop_get_hasrecvd(objset_t *os)
dsl_prop_get_hasrecvd(const char *dsname)
{
dsl_dataset_t *ds = os->os_dsl_dataset;
int rc;
uint64_t dummy;
rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
rc = dsl_prop_get_ds(ds, ZPROP_HAS_RECVD, 8, 1, &dummy, NULL);
rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
ASSERT(rc != 0 || spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
return (rc == 0);
return (0 ==
dsl_prop_get_integer(dsname, ZPROP_HAS_RECVD, &dummy, NULL));
}
static void
dsl_prop_set_hasrecvd_impl(objset_t *os, zprop_source_t source)
static int
dsl_prop_set_hasrecvd_impl(const char *dsname, zprop_source_t source)
{
dsl_dataset_t *ds = os->os_dsl_dataset;
uint64_t dummy = 0;
dsl_prop_setarg_t psa;
uint64_t version;
spa_t *spa;
int error = 0;
if (spa_version(os->os_spa) < SPA_VERSION_RECVD_PROPS)
return;
VERIFY0(spa_open(dsname, &spa, FTAG));
version = spa_version(spa);
spa_close(spa, FTAG);
dsl_prop_setarg_init_uint64(&psa, ZPROP_HAS_RECVD, source, &dummy);
(void) dsl_sync_task_do(ds->ds_dir->dd_pool, NULL,
dsl_prop_set_sync, ds, &psa, 2);
if (version >= SPA_VERSION_RECVD_PROPS)
error = dsl_prop_set_int(dsname, ZPROP_HAS_RECVD, source, 0);
return (error);
}
/*
* Call after successfully receiving properties to ensure that only the first
* receive on or after SPA_VERSION_RECVD_PROPS blows away local properties.
*/
void
dsl_prop_set_hasrecvd(objset_t *os)
int
dsl_prop_set_hasrecvd(const char *dsname)
{
if (dsl_prop_get_hasrecvd(os)) {
ASSERT(spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS);
return;
}
dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_LOCAL);
int error = 0;
if (!dsl_prop_get_hasrecvd(dsname))
error = dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_LOCAL);
return (error);
}
void
dsl_prop_unset_hasrecvd(objset_t *os)
dsl_prop_unset_hasrecvd(const char *dsname)
{
dsl_prop_set_hasrecvd_impl(os, ZPROP_SRC_NONE);
VERIFY0(dsl_prop_set_hasrecvd_impl(dsname, ZPROP_SRC_NONE));
}
int
@ -1081,16 +1067,25 @@ dsl_prop_get_all(objset_t *os, nvlist_t **nvp)
}
int
dsl_prop_get_received(objset_t *os, nvlist_t **nvp)
dsl_prop_get_received(const char *dsname, nvlist_t **nvp)
{
objset_t *os;
int error;
/*
* Received properties are not distinguishable from local properties
* until the dataset has received properties on or after
* SPA_VERSION_RECVD_PROPS.
*/
dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(os) ?
dsl_prop_getflags_t flags = (dsl_prop_get_hasrecvd(dsname) ?
DSL_PROP_GET_RECEIVED : DSL_PROP_GET_LOCAL);
return (dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags));
error = dmu_objset_hold(dsname, FTAG, &os);
if (error != 0)
return (error);
error = dsl_prop_get_all_ds(os->os_dsl_dataset, nvp, flags);
dmu_objset_rele(os, FTAG);
return (error);
}
void

View File

@ -55,7 +55,7 @@ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *, const zbookmark_t *);
static scan_cb_t dsl_scan_defrag_cb;
static scan_cb_t dsl_scan_scrub_cb;
static scan_cb_t dsl_scan_remove_cb;
static dsl_syncfunc_t dsl_scan_cancel_sync;
static void dsl_scan_cancel_sync(void *, dmu_tx_t *);
static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
unsigned int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
@ -184,9 +184,9 @@ dsl_scan_fini(dsl_pool_t *dp)
/* ARGSUSED */
static int
dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_setup_check(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
if (scn->scn_phys.scn_state == DSS_SCANNING)
return (EBUSY);
@ -194,12 +194,11 @@ dsl_scan_setup_check(void *arg1, void *arg2, dmu_tx_t *tx)
return (0);
}
/* ARGSUSED */
static void
dsl_scan_setup_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
pool_scan_func_t *funcp = arg2;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
pool_scan_func_t *funcp = arg;
dmu_object_type_t ot = 0;
dsl_pool_t *dp = scn->scn_dp;
spa_t *spa = dp->dp_spa;
@ -345,9 +344,9 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
/* ARGSUSED */
static int
dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_cancel_check(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
if (scn->scn_phys.scn_state != DSS_SCANNING)
return (ENOENT);
@ -356,9 +355,9 @@ dsl_scan_cancel_check(void *arg1, void *arg2, dmu_tx_t *tx)
/* ARGSUSED */
static void
dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_scan_cancel_sync(void *arg, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg1;
dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
dsl_scan_done(scn, B_FALSE, tx);
dsl_scan_sync_state(scn, tx);
@ -367,12 +366,8 @@ dsl_scan_cancel_sync(void *arg1, void *arg2, dmu_tx_t *tx)
int
dsl_scan_cancel(dsl_pool_t *dp)
{
boolean_t complete = B_FALSE;
int err;
err = dsl_sync_task_do(dp, dsl_scan_cancel_check,
dsl_scan_cancel_sync, dp->dp_scan, &complete, 3);
return (err);
return (dsl_sync_task(spa_name(dp->dp_spa), dsl_scan_cancel_check,
dsl_scan_cancel_sync, NULL, 3));
}
static void dsl_scan_visitbp(blkptr_t *bp,
@ -409,7 +404,7 @@ dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
static void
dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
{
VERIFY(0 == zap_update(scn->scn_dp->dp_meta_objset,
VERIFY0(zap_update(scn->scn_dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_SCAN, sizeof (uint64_t), SCAN_PHYS_NUMINTS,
&scn->scn_phys, tx));
@ -981,33 +976,33 @@ struct enqueue_clones_arg {
/* ARGSUSED */
static int
enqueue_clones_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
struct enqueue_clones_arg *eca = arg;
dsl_dataset_t *ds;
int err;
dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_scan_t *scn = dp->dp_scan;
err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
if (hds->ds_dir->dd_phys->dd_origin_obj != eca->originobj)
return (0);
err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
if (ds->ds_dir->dd_phys->dd_origin_obj == eca->originobj) {
while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
dsl_dataset_t *prev;
err = dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
while (ds->ds_phys->ds_prev_snap_obj != eca->originobj) {
dsl_dataset_t *prev;
err = dsl_dataset_hold_obj(dp,
ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
dsl_dataset_rele(ds, FTAG);
if (err)
return (err);
ds = prev;
}
VERIFY(zap_add_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds->ds_object,
ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
dsl_dataset_rele(ds, FTAG);
if (err)
return (err);
ds = prev;
}
VERIFY(zap_add_int_key(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, ds->ds_object,
ds->ds_phys->ds_prev_snap_txg, eca->tx) == 0);
dsl_dataset_rele(ds, FTAG);
return (0);
}
@ -1096,17 +1091,17 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
}
if (usenext) {
VERIFY(zap_join_key(dp->dp_meta_objset,
VERIFY0(zap_join_key(dp->dp_meta_objset,
ds->ds_phys->ds_next_clones_obj,
scn->scn_phys.scn_queue_obj,
ds->ds_phys->ds_creation_txg, tx) == 0);
ds->ds_phys->ds_creation_txg, tx));
} else {
struct enqueue_clones_arg eca;
eca.tx = tx;
eca.originobj = ds->ds_object;
(void) dmu_objset_find_spa(ds->ds_dir->dd_pool->dp_spa,
NULL, enqueue_clones_cb, &eca, DS_FIND_CHILDREN);
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
enqueue_clones_cb, &eca, DS_FIND_CHILDREN));
}
}
@ -1116,15 +1111,14 @@ out:
/* ARGSUSED */
static int
enqueue_cb(spa_t *spa, uint64_t dsobj, const char *dsname, void *arg)
enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
{
dmu_tx_t *tx = arg;
dsl_dataset_t *ds;
int err;
dsl_pool_t *dp = spa->spa_dsl_pool;
dsl_scan_t *scn = dp->dp_scan;
err = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
err = dsl_dataset_hold_obj(dp, hds->ds_object, FTAG, &ds);
if (err)
return (err);
@ -1279,8 +1273,8 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
return;
if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
VERIFY(0 == dmu_objset_find_spa(dp->dp_spa,
NULL, enqueue_cb, tx, DS_FIND_CHILDREN));
VERIFY0(dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
enqueue_cb, tx, DS_FIND_CHILDREN));
} else {
dsl_scan_visitds(scn,
dp->dp_origin_snap->ds_object, tx);
@ -1415,7 +1409,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
func = POOL_SCAN_RESILVER;
zfs_dbgmsg("restarting scan func=%u txg=%llu",
func, tx->tx_txg);
dsl_scan_setup_sync(scn, &func, tx);
dsl_scan_setup_sync(&func, tx);
}
if (!dsl_scan_active(scn) ||
@ -1449,21 +1443,21 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
err = bptree_iterate(dp->dp_meta_objset,
dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
scn, tx);
VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
if (err != 0)
return;
VERIFY0(zio_wait(scn->scn_zio_root));
/* disable async destroy feature */
spa_feature_decr(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
ASSERT(!spa_feature_is_active(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, tx));
VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
dp->dp_bptree_obj, tx));
dp->dp_bptree_obj = 0;
if (err == 0) {
zfeature_info_t *feat = &spa_feature_table
[SPA_FEATURE_ASYNC_DESTROY];
/* finished; deactivate async destroy feature */
spa_feature_decr(spa, feat, tx);
ASSERT(!spa_feature_is_active(spa, feat));
VERIFY0(zap_remove(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, tx));
VERIFY0(bptree_free(dp->dp_meta_objset,
dp->dp_bptree_obj, tx));
dp->dp_bptree_obj = 0;
}
}
if (scn->scn_visited_this_txg) {
zfs_dbgmsg("freed %llu blocks in %llums from "
@ -1510,7 +1504,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_CANFAIL);
dsl_pool_config_enter(dp, FTAG);
dsl_scan_visit(scn, tx);
dsl_pool_config_exit(dp, FTAG);
(void) zio_wait(scn->scn_zio_root);
scn->scn_zio_root = NULL;
@ -1746,6 +1742,6 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0);
return (dsl_sync_task_do(dp, dsl_scan_setup_check,
dsl_scan_setup_sync, dp->dp_scan, &func, 0));
return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
dsl_scan_setup_sync, &func, 0));
}

View File

@ -34,136 +34,115 @@
/* ARGSUSED */
static int
dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_null_checkfunc(void *arg, dmu_tx_t *tx)
{
return (0);
}
dsl_sync_task_group_t *
dsl_sync_task_group_create(dsl_pool_t *dp)
{
dsl_sync_task_group_t *dstg;
dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
offsetof(dsl_sync_task_t, dst_node));
dstg->dstg_pool = dp;
return (dstg);
}
void
dsl_sync_task_create(dsl_sync_task_group_t *dstg,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified)
{
dsl_sync_task_t *dst;
if (checkfunc == NULL)
checkfunc = dsl_null_checkfunc;
dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
dst->dst_checkfunc = checkfunc;
dst->dst_syncfunc = syncfunc;
dst->dst_arg1 = arg1;
dst->dst_arg2 = arg2;
list_insert_tail(&dstg->dstg_tasks, dst);
dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
}
/*
* Called from open context to perform a callback in syncing context. Waits
* for the operation to complete.
*
* The checkfunc will be called from open context as a preliminary check
* which can quickly fail. If it succeeds, it will be called again from
* syncing context. The checkfunc should generally be designed to work
* properly in either context, but if necessary it can check
* dmu_tx_is_syncing(tx).
*
* The synctask infrastructure enforces proper locking strategy with respect
* to the dp_config_rwlock -- the lock will always be held when the callbacks
* are called. It will be held for read during the open-context (preliminary)
* call to the checkfunc, and then held for write from syncing context during
* the calls to the check and sync funcs.
*
* A dataset or pool name can be passed as the first argument. Typically,
* the check func will hold, check the return value of the hold, and then
* release the dataset. The sync func will VERIFYO(hold()) the dataset.
* This is safe because no changes can be made between the check and sync funcs,
* and the sync func will only be called if the check func successfully opened
* the dataset.
*/
int
dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified)
{
spa_t *spa;
dmu_tx_t *tx;
uint64_t txg;
dsl_sync_task_t *dst;
int err;
dsl_sync_task_t dst = { 0 };
dsl_pool_t *dp;
err = spa_open(pool, &spa, FTAG);
if (err != 0)
return (err);
dp = spa_get_dsl(spa);
top:
tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
tx = dmu_tx_create_dd(dp->dp_mos_dir);
VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
txg = dmu_tx_get_txg(tx);
dst.dst_pool = dp;
dst.dst_txg = dmu_tx_get_txg(tx);
dst.dst_space = blocks_modified << DST_AVG_BLKSHIFT;
dst.dst_checkfunc = checkfunc != NULL ? checkfunc : dsl_null_checkfunc;
dst.dst_syncfunc = syncfunc;
dst.dst_arg = arg;
dst.dst_error = 0;
dst.dst_nowaiter = B_FALSE;
/* Do a preliminary error check. */
dstg->dstg_err = 0;
#ifdef ZFS_DEBUG
/*
* Only check half the time, otherwise, the sync-context
* check will almost never fail.
*/
if (spa_get_random(2) == 0)
goto skip;
#endif
rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
dst->dst_err =
dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
if (dst->dst_err)
dstg->dstg_err = dst->dst_err;
}
rw_exit(&dstg->dstg_pool->dp_config_rwlock);
dsl_pool_config_enter(dp, FTAG);
err = dst.dst_checkfunc(arg, tx);
dsl_pool_config_exit(dp, FTAG);
if (dstg->dstg_err) {
if (err != 0) {
dmu_tx_commit(tx);
return (dstg->dstg_err);
spa_close(spa, FTAG);
return (err);
}
skip:
/*
* We don't generally have many sync tasks, so pay the price of
* add_tail to get the tasks executed in the right order.
*/
VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
dstg, txg));
VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, &dst, dst.dst_txg));
dmu_tx_commit(tx);
txg_wait_synced(dstg->dstg_pool, txg);
txg_wait_synced(dp, dst.dst_txg);
if (dstg->dstg_err == EAGAIN) {
txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
if (dst.dst_error == EAGAIN) {
txg_wait_synced(dp, dst.dst_txg + TXG_DEFER_SIZE);
goto top;
}
return (dstg->dstg_err);
spa_close(spa, FTAG);
return (dst.dst_error);
}
void
dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
dsl_sync_task_nowait(dsl_pool_t *dp, dsl_syncfunc_t *syncfunc, void *arg,
int blocks_modified, dmu_tx_t *tx)
{
uint64_t txg;
dsl_sync_task_t *dst = kmem_zalloc(sizeof (*dst), KM_SLEEP);
dstg->dstg_nowaiter = B_TRUE;
txg = dmu_tx_get_txg(tx);
/*
* We don't generally have many sync tasks, so pay the price of
* add_tail to get the tasks executed in the right order.
*/
VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
dstg, txg));
dst->dst_pool = dp;
dst->dst_txg = dmu_tx_get_txg(tx);
dst->dst_space = blocks_modified << DST_AVG_BLKSHIFT;
dst->dst_checkfunc = dsl_null_checkfunc;
dst->dst_syncfunc = syncfunc;
dst->dst_arg = arg;
dst->dst_error = 0;
dst->dst_nowaiter = B_TRUE;
VERIFY(txg_list_add_tail(&dp->dp_sync_tasks, dst, dst->dst_txg));
}
/*
* Called in syncing context to execute the synctask.
*/
void
dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx)
{
dsl_sync_task_t *dst;
while (dst = list_head(&dstg->dstg_tasks)) {
list_remove(&dstg->dstg_tasks, dst);
kmem_free(dst, sizeof (dsl_sync_task_t));
}
kmem_free(dstg, sizeof (dsl_sync_task_group_t));
}
void
dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
{
dsl_sync_task_t *dst;
dsl_pool_t *dp = dstg->dstg_pool;
dsl_pool_t *dp = dst->dst_pool;
uint64_t quota, used;
ASSERT0(dstg->dstg_err);
ASSERT0(dst->dst_error);
/*
* Check for sufficient space. We just check against what's
@ -175,63 +154,21 @@ dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
used = dp->dp_root_dir->dd_phys->dd_used_bytes;
/* MOS space is triple-dittoed, so we multiply by 3. */
if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
dstg->dstg_err = ENOSPC;
if (dst->dst_space > 0 && used + dst->dst_space * 3 > quota) {
dst->dst_error = ENOSPC;
if (dst->dst_nowaiter)
kmem_free(dst, sizeof (*dst));
return;
}
/*
* Check for errors by calling checkfuncs.
* Check for errors by calling checkfunc.
*/
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
dst->dst_err =
dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
if (dst->dst_err)
dstg->dstg_err = dst->dst_err;
}
if (dstg->dstg_err == 0) {
/*
* Execute sync tasks.
*/
for (dst = list_head(&dstg->dstg_tasks); dst;
dst = list_next(&dstg->dstg_tasks, dst)) {
dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
}
}
rw_exit(&dp->dp_config_rwlock);
if (dstg->dstg_nowaiter)
dsl_sync_task_group_destroy(dstg);
}
int
dsl_sync_task_do(dsl_pool_t *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified)
{
dsl_sync_task_group_t *dstg;
int err;
ASSERT(spa_writeable(dp->dp_spa));
dstg = dsl_sync_task_group_create(dp);
dsl_sync_task_create(dstg, checkfunc, syncfunc,
arg1, arg2, blocks_modified);
err = dsl_sync_task_group_wait(dstg);
dsl_sync_task_group_destroy(dstg);
return (err);
}
void
dsl_sync_task_do_nowait(dsl_pool_t *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
{
dsl_sync_task_group_t *dstg = dsl_sync_task_group_create(dp);
dsl_sync_task_create(dstg, checkfunc, syncfunc,
arg1, arg2, blocks_modified);
dsl_sync_task_group_nowait(dstg, tx);
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
dst->dst_error = dst->dst_checkfunc(dst->dst_arg, tx);
if (dst->dst_error == 0)
dst->dst_syncfunc(dst->dst_arg, tx);
rrw_exit(&dp->dp_config_rwlock, FTAG);
if (dst->dst_nowaiter)
kmem_free(dst, sizeof (*dst));
}

View File

@ -0,0 +1,536 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/dsl_userhold.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_synctask.h>
#include <sys/dmu_tx.h>
#include <sys/zfs_onexit.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
#include <sys/zfs_ioctl.h>
#include <sys/zap.h>
typedef struct dsl_dataset_user_hold_arg {
nvlist_t *dduha_holds;
nvlist_t *dduha_errlist;
minor_t dduha_minor;
} dsl_dataset_user_hold_arg_t;
/*
* If you add new checks here, you may need to add additional checks to the
* "temporary" case in snapshot_check() in dmu_objset.c.
*/
int
dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
boolean_t temphold, dmu_tx_t *tx)
{
dsl_pool_t *dp = dmu_tx_pool(tx);
objset_t *mos = dp->dp_meta_objset;
int error = 0;
if (strlen(htag) > MAXNAMELEN)
return (E2BIG);
/* Tempholds have a more restricted length */
if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
return (E2BIG);
/* tags must be unique (if ds already exists) */
if (ds != NULL) {
mutex_enter(&ds->ds_lock);
if (ds->ds_phys->ds_userrefs_obj != 0) {
uint64_t value;
error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
htag, 8, 1, &value);
if (error == 0)
error = EEXIST;
else if (error == ENOENT)
error = 0;
}
mutex_exit(&ds->ds_lock);
}
return (error);
}
static int
dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_hold_arg_t *dduha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int rv = 0;
if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
return (ENOTSUP);
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
int error = 0;
dsl_dataset_t *ds;
char *htag;
/* must be a snapshot */
if (strchr(nvpair_name(pair), '@') == NULL)
error = EINVAL;
if (error == 0)
error = nvpair_value_string(pair, &htag);
if (error == 0) {
error = dsl_dataset_hold(dp,
nvpair_name(pair), FTAG, &ds);
}
if (error == 0) {
error = dsl_dataset_user_hold_check_one(ds, htag,
dduha->dduha_minor != 0, tx);
dsl_dataset_rele(ds, FTAG);
}
if (error != 0) {
rv = error;
fnvlist_add_int32(dduha->dduha_errlist,
nvpair_name(pair), error);
}
}
return (rv);
}
void
dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
minor_t minor, uint64_t now, dmu_tx_t *tx)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj;
mutex_enter(&ds->ds_lock);
if (ds->ds_phys->ds_userrefs_obj == 0) {
/*
* This is the first user hold for this dataset. Create
* the userrefs zap object.
*/
dmu_buf_will_dirty(ds->ds_dbuf, tx);
zapobj = ds->ds_phys->ds_userrefs_obj =
zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
} else {
zapobj = ds->ds_phys->ds_userrefs_obj;
}
ds->ds_userrefs++;
mutex_exit(&ds->ds_lock);
VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
if (minor != 0) {
VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
htag, now, tx));
dsl_register_onexit_hold_cleanup(ds, htag, minor);
}
spa_history_log_internal_ds(ds, "hold", tx,
"tag=%s temp=%d refs=%llu",
htag, minor != 0, ds->ds_userrefs);
}
static void
dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_hold_arg_t *dduha = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
uint64_t now = gethrestime_sec();
for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
dduha->dduha_minor, now, tx);
dsl_dataset_rele(ds, FTAG);
}
}
/*
* holds is nvl of snapname -> holdname
* errlist will be filled in with snapname -> error
* if cleanup_minor is not 0, the holds will be temporary, cleaned up
* when the process exits.
*
* if any fails, all will fail.
*/
int
dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
{
dsl_dataset_user_hold_arg_t dduha;
nvpair_t *pair;
pair = nvlist_next_nvpair(holds, NULL);
if (pair == NULL)
return (0);
dduha.dduha_holds = holds;
dduha.dduha_errlist = errlist;
dduha.dduha_minor = cleanup_minor;
return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
}
typedef struct dsl_dataset_user_release_arg {
nvlist_t *ddura_holds;
nvlist_t *ddura_todelete;
nvlist_t *ddura_errlist;
} dsl_dataset_user_release_arg_t;
static int
dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
nvlist_t *holds, boolean_t *todelete)
{
uint64_t zapobj;
nvpair_t *pair;
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
int error;
int numholds = 0;
*todelete = B_FALSE;
if (!dsl_dataset_is_snapshot(ds))
return (EINVAL);
zapobj = ds->ds_phys->ds_userrefs_obj;
if (zapobj == 0)
return (ESRCH);
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
/* Make sure the hold exists */
uint64_t tmp;
error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
if (error == ENOENT)
error = ESRCH;
if (error != 0)
return (error);
numholds++;
}
if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
ds->ds_userrefs == numholds) {
/* we need to destroy the snapshot as well */
if (dsl_dataset_long_held(ds))
return (EBUSY);
*todelete = B_TRUE;
}
return (0);
}
static int
dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_arg_t *ddura = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
int rv = 0;
if (!dmu_tx_is_syncing(tx))
return (0);
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
const char *name = nvpair_name(pair);
int error;
dsl_dataset_t *ds;
nvlist_t *holds;
error = nvpair_value_nvlist(pair, &holds);
if (error != 0)
return (EINVAL);
error = dsl_dataset_hold(dp, name, FTAG, &ds);
if (error == 0) {
boolean_t deleteme;
error = dsl_dataset_user_release_check_one(ds,
holds, &deleteme);
if (error == 0 && deleteme) {
fnvlist_add_boolean(ddura->ddura_todelete,
name);
}
dsl_dataset_rele(ds, FTAG);
}
if (error != 0) {
if (ddura->ddura_errlist != NULL) {
fnvlist_add_int32(ddura->ddura_errlist,
name, error);
}
rv = error;
}
}
return (rv);
}
static void
dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
dmu_tx_t *tx)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
uint64_t zapobj;
int error;
nvpair_t *pair;
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
ds->ds_userrefs--;
error = dsl_pool_user_release(dp, ds->ds_object,
nvpair_name(pair), tx);
VERIFY(error == 0 || error == ENOENT);
zapobj = ds->ds_phys->ds_userrefs_obj;
VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
spa_history_log_internal_ds(ds, "release", tx,
"tag=%s refs=%lld", nvpair_name(pair),
(longlong_t)ds->ds_userrefs);
}
}
static void
dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_arg_t *ddura = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
nvpair_t *pair;
for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
dsl_dataset_user_release_sync_one(ds,
fnvpair_value_nvlist(pair), tx);
if (nvlist_exists(ddura->ddura_todelete,
nvpair_name(pair))) {
ASSERT(ds->ds_userrefs == 0 &&
ds->ds_phys->ds_num_children == 1 &&
DS_IS_DEFER_DESTROY(ds));
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
}
dsl_dataset_rele(ds, FTAG);
}
}
/*
* holds is nvl of snapname -> { holdname, ... }
* errlist will be filled in with snapname -> error
*
* if any fails, all will fail.
*/
int
dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
{
dsl_dataset_user_release_arg_t ddura;
nvpair_t *pair;
int error;
pair = nvlist_next_nvpair(holds, NULL);
if (pair == NULL)
return (0);
ddura.ddura_holds = holds;
ddura.ddura_errlist = errlist;
ddura.ddura_todelete = fnvlist_alloc();
error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
fnvlist_free(ddura.ddura_todelete);
return (error);
}
typedef struct dsl_dataset_user_release_tmp_arg {
uint64_t ddurta_dsobj;
nvlist_t *ddurta_holds;
boolean_t ddurta_deleteme;
} dsl_dataset_user_release_tmp_arg_t;
static int
dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
int error;
if (!dmu_tx_is_syncing(tx))
return (0);
error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
if (error)
return (error);
error = dsl_dataset_user_release_check_one(ds,
ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
dsl_dataset_rele(ds, FTAG);
return (error);
}
static void
dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds;
VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
if (ddurta->ddurta_deleteme) {
ASSERT(ds->ds_userrefs == 0 &&
ds->ds_phys->ds_num_children == 1 &&
DS_IS_DEFER_DESTROY(ds));
dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
}
dsl_dataset_rele(ds, FTAG);
}
/*
* Called at spa_load time to release a stale temporary user hold.
* Also called by the onexit code.
*/
void
dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
{
dsl_dataset_user_release_tmp_arg_t ddurta;
dsl_dataset_t *ds;
int error;
#ifdef _KERNEL
/* Make sure it is not mounted. */
dsl_pool_config_enter(dp, FTAG);
error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
if (error == 0) {
char name[MAXNAMELEN];
dsl_dataset_name(ds, name);
dsl_dataset_rele(ds, FTAG);
dsl_pool_config_exit(dp, FTAG);
zfs_unmount_snap(name);
} else {
dsl_pool_config_exit(dp, FTAG);
}
#endif
ddurta.ddurta_dsobj = dsobj;
ddurta.ddurta_holds = fnvlist_alloc();
fnvlist_add_boolean(ddurta.ddurta_holds, htag);
(void) dsl_sync_task(spa_name(dp->dp_spa),
dsl_dataset_user_release_tmp_check,
dsl_dataset_user_release_tmp_sync, &ddurta, 1);
fnvlist_free(ddurta.ddurta_holds);
}
typedef struct zfs_hold_cleanup_arg {
char zhca_spaname[MAXNAMELEN];
uint64_t zhca_spa_load_guid;
uint64_t zhca_dsobj;
char zhca_htag[MAXNAMELEN];
} zfs_hold_cleanup_arg_t;
static void
dsl_dataset_user_release_onexit(void *arg)
{
zfs_hold_cleanup_arg_t *ca = arg;
spa_t *spa;
int error;
error = spa_open(ca->zhca_spaname, &spa, FTAG);
if (error != 0) {
zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
"because pool is no longer loaded",
ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
return;
}
if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
"because pool is no longer loaded (guid doesn't match)",
ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
spa_close(spa, FTAG);
return;
}
dsl_dataset_user_release_tmp(spa_get_dsl(spa),
ca->zhca_dsobj, ca->zhca_htag);
kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
spa_close(spa, FTAG);
}
void
dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
minor_t minor)
{
zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
spa_t *spa = dsl_dataset_get_spa(ds);
(void) strlcpy(ca->zhca_spaname, spa_name(spa),
sizeof (ca->zhca_spaname));
ca->zhca_spa_load_guid = spa_load_guid(spa);
ca->zhca_dsobj = ds->ds_object;
(void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
VERIFY0(zfs_onexit_add_cb(minor,
dsl_dataset_user_release_onexit, ca, NULL));
}
int
dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
int err;
err = dsl_pool_hold(dsname, FTAG, &dp);
if (err != 0)
return (err);
err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
if (err != 0) {
dsl_pool_rele(dp, FTAG);
return (err);
}
if (ds->ds_phys->ds_userrefs_obj != 0) {
zap_attribute_t *za;
zap_cursor_t zc;
za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
ds->ds_phys->ds_userrefs_obj);
zap_cursor_retrieve(&zc, za) == 0;
zap_cursor_advance(&zc)) {
fnvlist_add_uint64(nvl, za->za_name,
za->za_first_integer);
}
zap_cursor_fini(&zc);
kmem_free(za, sizeof (zap_attribute_t));
}
dsl_dataset_rele(ds, FTAG);
dsl_pool_rele(dp, FTAG);
return (0);
}

View File

@ -1875,3 +1875,41 @@ metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg)
return (error);
}
static void
checkmap(space_map_t *sm, uint64_t off, uint64_t size)
{
space_seg_t *ss;
avl_index_t where;
mutex_enter(sm->sm_lock);
ss = space_map_find(sm, off, size, &where);
if (ss != NULL)
panic("freeing free block; ss=%p", (void *)ss);
mutex_exit(sm->sm_lock);
}
void
metaslab_check_free(spa_t *spa, const blkptr_t *bp)
{
if ((zfs_flags & ZFS_DEBUG_ZIO_FREE) == 0)
return;
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
for (int i = 0; i < BP_GET_NDVAS(bp); i++) {
uint64_t vdid = DVA_GET_VDEV(&bp->blk_dva[i]);
vdev_t *vd = vdev_lookup_top(spa, vdid);
uint64_t off = DVA_GET_OFFSET(&bp->blk_dva[i]);
uint64_t size = DVA_GET_ASIZE(&bp->blk_dva[i]);
metaslab_t *ms = vd->vdev_ms[off >> vd->vdev_ms_shift];
if (ms->ms_map->sm_loaded)
checkmap(ms->ms_map, off, size);
for (int j = 0; j < TXG_SIZE; j++)
checkmap(ms->ms_freemap[j], off, size);
for (int j = 0; j < TXG_DEFER_SIZE; j++)
checkmap(ms->ms_defermap[j], off, size);
}
spa_config_exit(spa, SCL_VDEV, FTAG);
}

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -32,7 +33,7 @@ int reference_tracking_enable = FALSE; /* runs out of memory too easily */
#else
int reference_tracking_enable = TRUE;
#endif
int reference_history = 4; /* tunable */
int reference_history = 3; /* tunable */
static kmem_cache_t *reference_cache;
static kmem_cache_t *reference_history_cache;
@ -64,6 +65,14 @@ refcount_create(refcount_t *rc)
offsetof(reference_t, ref_link));
rc->rc_count = 0;
rc->rc_removed_count = 0;
rc->rc_tracked = reference_tracking_enable;
}
void
refcount_create_untracked(refcount_t *rc)
{
refcount_create(rc);
rc->rc_tracked = B_FALSE;
}
void
@ -96,14 +105,12 @@ refcount_destroy(refcount_t *rc)
int
refcount_is_zero(refcount_t *rc)
{
ASSERT(rc->rc_count >= 0);
return (rc->rc_count == 0);
}
int64_t
refcount_count(refcount_t *rc)
{
ASSERT(rc->rc_count >= 0);
return (rc->rc_count);
}
@ -113,14 +120,14 @@ refcount_add_many(refcount_t *rc, uint64_t number, void *holder)
reference_t *ref = NULL;
int64_t count;
if (reference_tracking_enable) {
if (rc->rc_tracked) {
ref = kmem_cache_alloc(reference_cache, KM_SLEEP);
ref->ref_holder = holder;
ref->ref_number = number;
}
mutex_enter(&rc->rc_mtx);
ASSERT(rc->rc_count >= 0);
if (reference_tracking_enable)
if (rc->rc_tracked)
list_insert_head(&rc->rc_list, ref);
rc->rc_count += number;
count = rc->rc_count;
@ -144,7 +151,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder)
mutex_enter(&rc->rc_mtx);
ASSERT(rc->rc_count >= number);
if (!reference_tracking_enable) {
if (!rc->rc_tracked) {
rc->rc_count -= number;
count = rc->rc_count;
mutex_exit(&rc->rc_mtx);
@ -161,7 +168,7 @@ refcount_remove_many(refcount_t *rc, uint64_t number, void *holder)
KM_SLEEP);
list_insert_head(&rc->rc_removed, ref);
rc->rc_removed_count++;
if (rc->rc_removed_count >= reference_history) {
if (rc->rc_removed_count > reference_history) {
ref = list_tail(&rc->rc_removed);
list_remove(&rc->rc_removed, ref);
kmem_cache_free(reference_history_cache,

View File

@ -75,8 +75,9 @@
uint_t rrw_tsd_key;
typedef struct rrw_node {
struct rrw_node *rn_next;
rrwlock_t *rn_rrl;
struct rrw_node *rn_next;
rrwlock_t *rn_rrl;
void *rn_tag;
} rrw_node_t;
static rrw_node_t *
@ -98,13 +99,14 @@ rrn_find(rrwlock_t *rrl)
* Add a node to the head of the singly linked list.
*/
static void
rrn_add(rrwlock_t *rrl)
rrn_add(rrwlock_t *rrl, void *tag)
{
rrw_node_t *rn;
rn = kmem_alloc(sizeof (*rn), KM_SLEEP);
rn->rn_rrl = rrl;
rn->rn_next = tsd_get(rrw_tsd_key);
rn->rn_tag = tag;
VERIFY(tsd_set(rrw_tsd_key, rn) == 0);
}
@ -113,7 +115,7 @@ rrn_add(rrwlock_t *rrl)
* thread's list and return TRUE; otherwise return FALSE.
*/
static boolean_t
rrn_find_and_remove(rrwlock_t *rrl)
rrn_find_and_remove(rrwlock_t *rrl, void *tag)
{
rrw_node_t *rn;
rrw_node_t *prev = NULL;
@ -122,7 +124,7 @@ rrn_find_and_remove(rrwlock_t *rrl)
return (B_FALSE);
for (rn = tsd_get(rrw_tsd_key); rn != NULL; rn = rn->rn_next) {
if (rn->rn_rrl == rrl) {
if (rn->rn_rrl == rrl && rn->rn_tag == tag) {
if (prev)
prev->rn_next = rn->rn_next;
else
@ -136,7 +138,7 @@ rrn_find_and_remove(rrwlock_t *rrl)
}
void
rrw_init(rrwlock_t *rrl)
rrw_init(rrwlock_t *rrl, boolean_t track_all)
{
mutex_init(&rrl->rr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&rrl->rr_cv, NULL, CV_DEFAULT, NULL);
@ -144,6 +146,7 @@ rrw_init(rrwlock_t *rrl)
refcount_create(&rrl->rr_anon_rcount);
refcount_create(&rrl->rr_linked_rcount);
rrl->rr_writer_wanted = B_FALSE;
rrl->rr_track_all = track_all;
}
void
@ -156,12 +159,13 @@ rrw_destroy(rrwlock_t *rrl)
refcount_destroy(&rrl->rr_linked_rcount);
}
static void
void
rrw_enter_read(rrwlock_t *rrl, void *tag)
{
mutex_enter(&rrl->rr_lock);
#if !defined(DEBUG) && defined(_KERNEL)
if (!rrl->rr_writer && !rrl->rr_writer_wanted) {
if (rrl->rr_writer == NULL && !rrl->rr_writer_wanted &&
!rrl->rr_track_all) {
rrl->rr_anon_rcount.rc_count++;
mutex_exit(&rrl->rr_lock);
return;
@ -171,14 +175,14 @@ rrw_enter_read(rrwlock_t *rrl, void *tag)
ASSERT(rrl->rr_writer != curthread);
ASSERT(refcount_count(&rrl->rr_anon_rcount) >= 0);
while (rrl->rr_writer || (rrl->rr_writer_wanted &&
while (rrl->rr_writer != NULL || (rrl->rr_writer_wanted &&
refcount_is_zero(&rrl->rr_anon_rcount) &&
rrn_find(rrl) == NULL))
cv_wait(&rrl->rr_cv, &rrl->rr_lock);
if (rrl->rr_writer_wanted) {
if (rrl->rr_writer_wanted || rrl->rr_track_all) {
/* may or may not be a re-entrant enter */
rrn_add(rrl);
rrn_add(rrl, tag);
(void) refcount_add(&rrl->rr_linked_rcount, tag);
} else {
(void) refcount_add(&rrl->rr_anon_rcount, tag);
@ -187,7 +191,7 @@ rrw_enter_read(rrwlock_t *rrl, void *tag)
mutex_exit(&rrl->rr_lock);
}
static void
void
rrw_enter_write(rrwlock_t *rrl)
{
mutex_enter(&rrl->rr_lock);
@ -233,10 +237,12 @@ rrw_exit(rrwlock_t *rrl, void *tag)
if (rrl->rr_writer == NULL) {
int64_t count;
if (rrn_find_and_remove(rrl))
if (rrn_find_and_remove(rrl, tag)) {
count = refcount_remove(&rrl->rr_linked_rcount, tag);
else
} else {
ASSERT(!rrl->rr_track_all);
count = refcount_remove(&rrl->rr_anon_rcount, tag);
}
if (count == 0)
cv_broadcast(&rrl->rr_cv);
} else {
@ -249,6 +255,11 @@ rrw_exit(rrwlock_t *rrl, void *tag)
mutex_exit(&rrl->rr_lock);
}
/*
* If the lock was created with track_all, rrw_held(RW_READER) will return
* B_TRUE iff the current thread has the lock for reader. Otherwise it may
* return B_TRUE if any thread has the lock for reader.
*/
boolean_t
rrw_held(rrwlock_t *rrl, krw_t rw)
{
@ -259,7 +270,7 @@ rrw_held(rrwlock_t *rrl, krw_t rw)
held = (rrl->rr_writer == curthread);
} else {
held = (!refcount_is_zero(&rrl->rr_anon_rcount) ||
!refcount_is_zero(&rrl->rr_linked_rcount));
rrn_find(rrl) != NULL);
}
mutex_exit(&rrl->rr_lock);

View File

@ -1004,10 +1004,10 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
sa_attr_type_t *tb;
int error;
mutex_enter(&os->os_lock);
mutex_enter(&os->os_user_ptr_lock);
if (os->os_sa) {
mutex_enter(&os->os_sa->sa_lock);
mutex_exit(&os->os_lock);
mutex_exit(&os->os_user_ptr_lock);
tb = os->os_sa->sa_user_table;
mutex_exit(&os->os_sa->sa_lock);
*user_table = tb;
@ -1020,7 +1020,7 @@ sa_setup(objset_t *os, uint64_t sa_obj, sa_attr_reg_t *reg_attrs, int count,
os->os_sa = sa;
mutex_enter(&sa->sa_lock);
mutex_exit(&os->os_lock);
mutex_exit(&os->os_user_ptr_lock);
avl_create(&sa->sa_layout_num_tree, layout_num_compare,
sizeof (sa_lot_t), offsetof(sa_lot_t, lot_num_node));
avl_create(&sa->sa_layout_hash_tree, layout_hash_compare,

View File

@ -61,6 +61,9 @@
#include <sys/spa_boot.h>
#include <sys/zfs_ioctl.h>
#include <sys/dsl_scan.h>
#include <sys/dmu_send.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_userhold.h>
#include <sys/zfeature.h>
#include <sys/zvol.h>
#include <sys/trim_map.h>
@ -120,10 +123,8 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
};
static dsl_syncfunc_t spa_sync_version;
static dsl_syncfunc_t spa_sync_props;
static dsl_checkfunc_t spa_change_guid_check;
static dsl_syncfunc_t spa_change_guid_sync;
static void spa_sync_version(void *arg, dmu_tx_t *tx);
static void spa_sync_props(void *arg, dmu_tx_t *tx);
static boolean_t spa_has_active_shared_spare(spa_t *spa);
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
@ -324,10 +325,10 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
dsl_dataset_t *ds = NULL;
dp = spa_get_dsl(spa);
rw_enter(&dp->dp_config_rwlock, RW_READER);
dsl_pool_config_enter(dp, FTAG);
if (err = dsl_dataset_hold_obj(dp,
za.za_first_integer, FTAG, &ds)) {
rw_exit(&dp->dp_config_rwlock);
dsl_pool_config_exit(dp, FTAG);
break;
}
@ -336,7 +337,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
KM_SLEEP);
dsl_dataset_name(ds, strval);
dsl_dataset_rele(ds, FTAG);
rw_exit(&dp->dp_config_rwlock);
dsl_pool_config_exit(dp, FTAG);
} else {
strval = NULL;
intval = za.za_first_integer;
@ -490,9 +491,10 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
if (dmu_objset_type(os) != DMU_OST_ZFS) {
error = ENOTSUP;
} else if ((error = dsl_prop_get_integer(strval,
} else if ((error =
dsl_prop_get_int_ds(dmu_objset_ds(os),
zfs_prop_to_name(ZFS_PROP_COMPRESSION),
&compress, NULL)) == 0 &&
&compress)) == 0 &&
!BOOTFS_COMPRESS_VALID(compress)) {
error = ENOTSUP;
} else {
@ -659,8 +661,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
* read object, the features for write object, or the
* feature descriptions object.
*/
error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
spa_sync_version, spa, &ver, 6);
error = dsl_sync_task(spa->spa_name, NULL,
spa_sync_version, &ver, 6);
if (error)
return (error);
continue;
@ -671,8 +673,8 @@ spa_prop_set(spa_t *spa, nvlist_t *nvp)
}
if (need_sync) {
return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
spa, nvp, 6));
return (dsl_sync_task(spa->spa_name, NULL, spa_sync_props,
nvp, 6));
}
return (0);
@ -694,10 +696,10 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
/*ARGSUSED*/
static int
spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
spa_change_guid_check(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
uint64_t *newguid = arg2;
uint64_t *newguid = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
vdev_t *rvd = spa->spa_root_vdev;
uint64_t vdev_state;
@ -714,10 +716,10 @@ spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
}
static void
spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx)
spa_change_guid_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
uint64_t *newguid = arg2;
uint64_t *newguid = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
uint64_t oldguid;
vdev_t *rvd = spa->spa_root_vdev;
@ -751,8 +753,8 @@ spa_change_guid(spa_t *spa)
mutex_enter(&spa_namespace_lock);
guid = spa_generate_guid(NULL);
error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check,
spa_change_guid_sync, spa, &guid, 5);
error = dsl_sync_task(spa->spa_name, spa_change_guid_check,
spa_change_guid_sync, &guid, 5);
if (error == 0) {
spa_config_sync(spa, B_FALSE, B_TRUE);
@ -1654,21 +1656,22 @@ spa_config_valid(spa_t *spa, nvlist_t *config)
/*
* Check for missing log devices
*/
static int
static boolean_t
spa_check_logs(spa_t *spa)
{
boolean_t rv = B_FALSE;
switch (spa->spa_log_state) {
case SPA_LOG_MISSING:
/* need to recheck in case slog has been restored */
case SPA_LOG_UNKNOWN:
if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
DS_FIND_CHILDREN)) {
rv = (dmu_objset_find(spa->spa_name, zil_check_log_chain,
NULL, DS_FIND_CHILDREN) != 0);
if (rv)
spa_set_log_state(spa, SPA_LOG_MISSING);
return (1);
}
break;
}
return (0);
return (rv);
}
static boolean_t
@ -1714,11 +1717,11 @@ spa_activate_log(spa_t *spa)
int
spa_offline_log(spa_t *spa)
{
int error = 0;
if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
NULL, DS_FIND_CHILDREN)) == 0) {
int error;
error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
NULL, DS_FIND_CHILDREN);
if (error == 0) {
/*
* We successfully offlined the log device, sync out the
* current txg so that the "stubby" block can be removed
@ -3527,7 +3530,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
if (props != NULL) {
spa_configfile_set(spa, props, B_FALSE);
spa_sync_props(spa, props, tx);
spa_sync_props(props, tx);
}
dmu_tx_commit(tx);
@ -6000,10 +6003,11 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
}
static void
spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
spa_sync_version(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
uint64_t version = *(uint64_t *)arg2;
uint64_t *versionp = arg;
uint64_t version = *versionp;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
/*
* Setting the version is special cased when first creating the pool.
@ -6022,11 +6026,11 @@ spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
* Set zpool properties.
*/
static void
spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
spa_sync_props(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
nvlist_t *nvp = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
objset_t *mos = spa->spa_meta_objset;
nvlist_t *nvp = arg2;
nvpair_t *elem = NULL;
mutex_enter(&spa->spa_props_lock);
@ -6177,6 +6181,8 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
ASSERT(spa->spa_sync_pass == 1);
rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG);
if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
dsl_pool_create_origin(dp, tx);
@ -6202,6 +6208,7 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
spa_feature_create_zap_objects(spa, tx);
}
rrw_exit(&dp->dp_config_rwlock, FTAG);
}
/*

View File

@ -194,10 +194,10 @@ spa_history_zone(void)
*/
/*ARGSUSED*/
static void
spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
spa_history_log_sync(void *arg, dmu_tx_t *tx)
{
spa_t *spa = arg1;
nvlist_t *nvl = arg2;
nvlist_t *nvl = arg;
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
objset_t *mos = spa->spa_meta_objset;
dmu_buf_t *dbp;
spa_history_phys_t *shpp;
@ -219,7 +219,7 @@ spa_history_log_sync(void *arg1, void *arg2, dmu_tx_t *tx)
* Get the offset of where we need to write via the bonus buffer.
* Update the offset when the write completes.
*/
VERIFY(0 == dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
VERIFY0(dmu_bonus_hold(mos, spa->spa_history, FTAG, &dbp));
shpp = dbp->db_data;
dmu_buf_will_dirty(dbp, tx);
@ -323,8 +323,8 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl)
fnvlist_add_uint64(nvarg, ZPOOL_HIST_WHO, crgetruid(CRED()));
/* Kick this off asynchronously; errors are ignored. */
dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
spa_history_log_sync, spa, nvarg, 0, tx);
dsl_sync_task_nowait(spa_get_dsl(spa), spa_history_log_sync,
nvarg, 0, tx);
dmu_tx_commit(tx);
/* spa_history_log_sync will free nvl */
@ -462,10 +462,10 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa,
fnvlist_add_uint64(nvl, ZPOOL_HIST_TXG, tx->tx_txg);
if (dmu_tx_is_syncing(tx)) {
spa_history_log_sync(spa, nvl, tx);
spa_history_log_sync(nvl, tx);
} else {
dsl_sync_task_do_nowait(spa_get_dsl(spa), NULL,
spa_history_log_sync, spa, nvl, 0, tx);
dsl_sync_task_nowait(spa_get_dsl(spa),
spa_history_log_sync, nvl, 0, tx);
}
/* spa_history_log_sync() will free nvl */
}
@ -537,17 +537,9 @@ spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
void
spa_history_log_version(spa_t *spa, const char *operation)
{
#ifdef _KERNEL
uint64_t current_vers = spa_version(spa);
spa_history_log_internal(spa, operation, NULL,
"pool version %llu; software version %llu/%d; uts %s %s %s %s",
(u_longlong_t)current_vers, SPA_VERSION, ZPL_VERSION,
(u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION,
utsname.nodename, utsname.release, utsname.version,
utsname.machine);
#ifdef illumos
cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", operation,
(u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
#endif
#endif
}

View File

@ -238,8 +238,8 @@ kmem_cache_t *spa_buffer_pool;
int spa_mode_global;
#ifdef ZFS_DEBUG
/* Everything except dprintf is on by default in debug builds */
int zfs_flags = ~ZFS_DEBUG_DPRINTF;
/* Everything except dprintf and spa is on by default in debug builds */
int zfs_flags = ~(ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SPA);
#else
int zfs_flags = 0;
#endif
@ -314,7 +314,7 @@ spa_config_lock_init(spa_t *spa)
spa_config_lock_t *scl = &spa->spa_config_lock[i];
mutex_init(&scl->scl_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&scl->scl_cv, NULL, CV_DEFAULT, NULL);
refcount_create(&scl->scl_count);
refcount_create_untracked(&scl->scl_count);
scl->scl_writer = NULL;
scl->scl_write_wanted = 0;
}
@ -367,6 +367,8 @@ spa_config_enter(spa_t *spa, int locks, void *tag, krw_t rw)
{
int wlocks_held = 0;
ASSERT3U(SCL_LOCKS, <, sizeof (wlocks_held) * NBBY);
for (int i = 0; i < SCL_LOCKS; i++) {
spa_config_lock_t *scl = &spa->spa_config_lock[i];
if (scl->scl_writer == curthread)
@ -445,27 +447,22 @@ spa_lookup(const char *name)
static spa_t search; /* spa_t is large; don't allocate on stack */
spa_t *spa;
avl_index_t where;
char c;
char *cp;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
/*
* If it's a full dataset name, figure out the pool name and
* just use that.
*/
cp = strpbrk(name, "/@");
if (cp) {
c = *cp;
cp = strpbrk(search.spa_name, "/@");
if (cp != NULL)
*cp = '\0';
}
(void) strlcpy(search.spa_name, name, sizeof (search.spa_name));
spa = avl_find(&spa_namespace_avl, &search, &where);
if (cp)
*cp = c;
return (spa);
}
@ -600,6 +597,8 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
KM_SLEEP) == 0);
}
spa->spa_debug = ((zfs_flags & ZFS_DEBUG_SPA) != 0);
return (spa);
}

View File

@ -109,7 +109,7 @@ void
space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
{
avl_index_t where;
space_seg_t ssearch, *ss_before, *ss_after, *ss;
space_seg_t *ss_before, *ss_after, *ss;
uint64_t end = start + size;
int merge_before, merge_after;
@ -122,11 +122,8 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
again:
ssearch.ss_start = start;
ssearch.ss_end = end;
ss = avl_find(&sm->sm_root, &ssearch, &where);
if (ss != NULL && ss->ss_start <= start && ss->ss_end >= end) {
ss = space_map_find(sm, start, size, &where);
if (ss != NULL) {
zfs_panic_recover("zfs: allocating allocated segment"
"(offset=%llu size=%llu)\n",
(longlong_t)start, (longlong_t)size);
@ -194,19 +191,19 @@ again:
void
space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
{
space_seg_t ssearch, *ss, *newseg;
#ifdef illumos
avl_index_t where;
#endif
space_seg_t *ss, *newseg;
uint64_t end = start + size;
int left_over, right_over;
ASSERT(MUTEX_HELD(sm->sm_lock));
VERIFY(!sm->sm_condensing);
VERIFY(size != 0);
VERIFY(P2PHASE(start, 1ULL << sm->sm_shift) == 0);
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
ssearch.ss_start = start;
ssearch.ss_end = end;
ss = avl_find(&sm->sm_root, &ssearch, NULL);
#ifdef illumos
ss = space_map_find(sm, start, size, &where);
#else
ss = space_map_find(sm, start, size, NULL);
#endif
/* Make sure we completely overlap with someone */
if (ss == NULL) {
@ -249,12 +246,11 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
sm->sm_space -= size;
}
boolean_t
space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
space_seg_t *
space_map_find(space_map_t *sm, uint64_t start, uint64_t size,
avl_index_t *wherep)
{
avl_index_t where;
space_seg_t ssearch, *ss;
uint64_t end = start + size;
ASSERT(MUTEX_HELD(sm->sm_lock));
VERIFY(size != 0);
@ -262,10 +258,20 @@ space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
VERIFY(P2PHASE(size, 1ULL << sm->sm_shift) == 0);
ssearch.ss_start = start;
ssearch.ss_end = end;
ss = avl_find(&sm->sm_root, &ssearch, &where);
ssearch.ss_end = start + size;
ss = avl_find(&sm->sm_root, &ssearch, wherep);
return (ss != NULL && ss->ss_start <= start && ss->ss_end >= end);
if (ss != NULL && ss->ss_start <= start && ss->ss_end >= start + size)
return (ss);
return (NULL);
}
boolean_t
space_map_contains(space_map_t *sm, uint64_t start, uint64_t size)
{
avl_index_t where;
return (space_map_find(sm, start, size, &where) != 0);
}
void

View File

@ -89,7 +89,7 @@ arc_buf_t *arc_loan_buf(spa_t *spa, int size);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
int arc_buf_remove_ref(arc_buf_t *buf, void *tag);
boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);

View File

@ -311,20 +311,17 @@ void dbuf_fini(void);
boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
#define DBUF_IS_METADATA(_db) \
(dbuf_is_metadata(_db))
#define DBUF_GET_BUFC_TYPE(_db) \
(DBUF_IS_METADATA(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
(dbuf_is_metadata(_db) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
#define DBUF_IS_CACHEABLE(_db) \
((_db)->db_objset->os_primary_cache == ZFS_CACHE_ALL || \
(DBUF_IS_METADATA(_db) && \
(dbuf_is_metadata(_db) && \
((_db)->db_objset->os_primary_cache == ZFS_CACHE_METADATA)))
#define DBUF_IS_L2CACHEABLE(_db) \
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_ALL || \
(DBUF_IS_METADATA(_db) && \
(dbuf_is_metadata(_db) && \
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
#ifdef ZFS_DEBUG

View File

@ -217,6 +217,11 @@ typedef enum dmu_object_type {
DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
} dmu_object_type_t;
typedef enum txg_how {
TXG_WAIT = 1,
TXG_NOWAIT,
} txg_how_t;
void byteswap_uint64_array(void *buf, size_t size);
void byteswap_uint32_array(void *buf, size_t size);
void byteswap_uint16_array(void *buf, size_t size);
@ -255,24 +260,21 @@ void dmu_objset_rele(objset_t *os, void *tag);
void dmu_objset_disown(objset_t *os, void *tag);
int dmu_objset_open_ds(struct dsl_dataset *ds, objset_t **osp);
int dmu_objset_evict_dbufs(objset_t *os);
void dmu_objset_evict_dbufs(objset_t *os);
int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
uint64_t flags);
int dmu_objset_destroy(const char *name, boolean_t defer);
int dmu_get_recursive_snaps_nvl(const char *fsname, const char *snapname,
int dmu_get_recursive_snaps_nvl(char *fsname, const char *snapname,
struct nvlist *snaps);
int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer,
int dmu_objset_clone(const char *name, const char *origin);
int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist);
int dmu_objset_snapshot(struct nvlist *snaps, struct nvlist *, struct nvlist *);
int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
int dmu_objset_snapshot_tmp(const char *, const char *, int);
int dmu_objset_rename(const char *name, const char *newname,
boolean_t recursive);
int dmu_objset_find(const char *name, int func(const char *, void *), void *arg,
int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
int flags);
void dmu_objset_byteswap(void *buf, size_t size);
int dsl_dataset_rename_snapshot(const char *fsname,
const char *oldsnapname, const char *newsnapname, boolean_t recursive);
typedef struct dmu_buf {
uint64_t db_object; /* object that this buffer is part of */
@ -547,7 +549,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
void dmu_tx_abort(dmu_tx_t *tx);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
@ -789,37 +791,8 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
uint64_t object, uint64_t offset, int len);
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
dmu_traverse_cb_t cb, void *arg);
int dmu_send(objset_t *tosnap, objset_t *fromsnap,
int outfd, struct file *fp, offset_t *off);
int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, uint64_t *sizep);
typedef struct dmu_recv_cookie {
/*
* This structure is opaque!
*
* If logical and real are different, we are recving the stream
* into the "real" temporary clone, and then switching it with
* the "logical" target.
*/
struct dsl_dataset *drc_logical_ds;
struct dsl_dataset *drc_real_ds;
struct drr_begin *drc_drrb;
char *drc_tosnap;
char *drc_top_ds;
boolean_t drc_newfs;
boolean_t drc_force;
struct avl_tree *drc_guid_to_ds_map;
} dmu_recv_cookie_t;
int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
boolean_t force, objset_t *origin, dmu_recv_cookie_t *);
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
int cleanup_fd, uint64_t *action_handlep);
int dmu_recv_end(dmu_recv_cookie_t *drc);
int dmu_diff(objset_t *tosnap, objset_t *fromsnap, struct file *fp,
offset_t *off);
int dmu_diff(const char *tosnap_name, const char *fromsnap_name,
struct file *fp, offset_t *offp);
/* CRC64 table */
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */

View File

@ -43,6 +43,7 @@ extern "C" {
extern krwlock_t os_lock;
struct dsl_pool;
struct dsl_dataset;
struct dmu_tx;
@ -114,8 +115,6 @@ struct objset {
/* stuff we store for the user */
kmutex_t os_user_ptr_lock;
void *os_user_ptr;
/* SA layout/attribute registration */
sa_os_t *os_sa;
};
@ -143,10 +142,11 @@ void dmu_objset_fast_stat(objset_t *os, dmu_objset_stats_t *stat);
void dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp);
uint64_t dmu_objset_fsid_guid(objset_t *os);
int dmu_objset_find_spa(spa_t *spa, const char *name,
int func(spa_t *, uint64_t, const char *, void *), void *arg, int flags);
int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj,
int func(struct dsl_pool *, struct dsl_dataset *, void *),
void *arg, int flags);
int dmu_objset_prefetch(const char *name, void *arg);
int dmu_objset_evict_dbufs(objset_t *os);
void dmu_objset_evict_dbufs(objset_t *os);
timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */
@ -162,6 +162,7 @@ void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
boolean_t dmu_objset_userused_enabled(objset_t *os);
int dmu_objset_userspace_upgrade(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os);
int dmu_fsname(const char *snapname, char *buf);
void dmu_objset_init(void);
void dmu_objset_fini(void);

View File

@ -0,0 +1,77 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _DMU_SEND_H
#define _DMU_SEND_H
#include <sys/spa.h>
struct vnode;
struct dsl_dataset;
struct drr_begin;
struct avl_tree;
int dmu_send(const char *tosnap, const char *fromsnap, int outfd,
#ifdef illumos
struct vnode *vp, offset_t *off);
#else
struct file *fp, offset_t *off);
#endif
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
#ifdef illumos
int outfd, struct vnode *vp, offset_t *off);
#else
int outfd, struct file *fp, offset_t *off);
#endif
typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_ds;
struct drr_begin *drc_drrb;
const char *drc_tofs;
const char *drc_tosnap;
boolean_t drc_newfs;
boolean_t drc_byteswap;
boolean_t drc_force;
struct avl_tree *drc_guid_to_ds_map;
zio_cksum_t drc_cksum;
uint64_t drc_newsnapobj;
} dmu_recv_cookie_t;
int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
boolean_t force, char *origin, dmu_recv_cookie_t *drc);
#ifdef illumos
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
#else
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct file *fp, offset_t *voffp,
#endif
int cleanup_fd, uint64_t *action_handlep);
int dmu_recv_end(dmu_recv_cookie_t *drc);
#endif /* _DMU_SEND_H */

View File

@ -22,6 +22,9 @@
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_TX_H
#define _SYS_DMU_TX_H
@ -107,10 +110,11 @@ typedef struct dmu_tx_callback {
* These routines are defined in dmu.h, and are called by the user.
*/
dmu_tx_t *dmu_tx_create(objset_t *dd);
int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how);
void dmu_tx_commit(dmu_tx_t *tx);
void dmu_tx_abort(dmu_tx_t *tx);
uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
struct dsl_pool *dmu_tx_pool(dmu_tx_t *tx);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,

View File

@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
@ -37,6 +35,7 @@
#include <sys/dsl_synctask.h>
#include <sys/zfs_context.h>
#include <sys/dsl_deadlist.h>
#include <sys/refcount.h>
#ifdef __cplusplus
extern "C" {
@ -50,10 +49,8 @@ struct dsl_pool;
#define DS_IS_INCONSISTENT(ds) \
((ds)->ds_phys->ds_flags & DS_FLAG_INCONSISTENT)
/*
* NB: nopromote can not yet be set, but we want support for it in this
* on-disk version, so that we don't need to upgrade for it later. It
* will be needed when we implement 'zfs split' (where the split off
* clone should not be promoted).
* Note: nopromote can not yet be set, but we want support for it in this
* on-disk version, so that we don't need to upgrade for it later.
*/
#define DS_FLAG_NOPROMOTE (1ULL<<1)
@ -78,6 +75,8 @@ struct dsl_pool;
*/
#define DS_FLAG_CI_DATASET (1ULL<<16)
#define DS_CREATE_FLAG_NODIRTY (1ULL<<24)
typedef struct dsl_dataset_phys {
uint64_t ds_dir_obj; /* DMU_OT_DSL_DIR */
uint64_t ds_prev_snap_obj; /* DMU_OT_DSL_DATASET */
@ -127,9 +126,6 @@ typedef struct dsl_dataset {
dsl_deadlist_t ds_deadlist;
bplist_t ds_pending_deadlist;
/* to protect against multiple concurrent incremental recv */
kmutex_t ds_recvlock;
/* protected by lock on pool's dp_dirty_datasets list */
txg_node_t ds_dirty_link;
list_node_t ds_synced_link;
@ -141,13 +137,15 @@ typedef struct dsl_dataset {
kmutex_t ds_lock;
objset_t *ds_objset;
uint64_t ds_userrefs;
void *ds_owner;
/*
* ds_owner is protected by the ds_rwlock and the ds_lock
* Long holds prevent the ds from being destroyed; they allow the
* ds to remain held even after dropping the dp_config_rwlock.
* Owning counts as a long hold. See the comments above
* dsl_pool_hold() for details.
*/
krwlock_t ds_rwlock;
kcondvar_t ds_exclusive_cv;
void *ds_owner;
refcount_t ds_longholds;
/* no locking; only for making guesses */
uint64_t ds_trysnap_txg;
@ -165,82 +163,44 @@ typedef struct dsl_dataset {
char ds_snapname[MAXNAMELEN];
} dsl_dataset_t;
struct dsl_ds_destroyarg {
dsl_dataset_t *ds; /* ds to destroy */
dsl_dataset_t *rm_origin; /* also remove our origin? */
boolean_t is_origin_rm; /* set if removing origin snap */
boolean_t defer; /* destroy -d requested? */
boolean_t releasing; /* destroying due to release? */
boolean_t need_prep; /* do we need to retry due to EBUSY? */
};
/*
* The max length of a temporary tag prefix is the number of hex digits
* required to express UINT64_MAX plus one for the hyphen.
*/
#define MAX_TAG_PREFIX_LEN 17
struct dsl_ds_holdarg {
dsl_sync_task_group_t *dstg;
const char *htag;
char *snapname;
boolean_t recursive;
boolean_t gotone;
boolean_t temphold;
char failed[MAXPATHLEN];
};
/*
* Flags for dsl_dataset_rename().
*/
#define ZFS_RENAME_RECURSIVE 0x01
#define ZFS_RENAME_ALLOW_MOUNTED 0x02
#define dsl_dataset_is_snapshot(ds) \
((ds)->ds_phys->ds_num_children != 0)
#define DS_UNIQUE_IS_ACCURATE(ds) \
(((ds)->ds_phys->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
int dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
void *tag, dsl_dataset_t **);
int dsl_dataset_own(const char *name, boolean_t inconsistentok,
int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag,
dsl_dataset_t **dsp);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
int dsl_dataset_own(struct dsl_pool *dp, const char *name,
void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
boolean_t inconsistentok, void *tag, dsl_dataset_t **dsp);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
void *tag, dsl_dataset_t **dsp);
void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok,
void *tag);
void dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
void dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
minor_t minor);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx);
int dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer);
dsl_checkfunc_t dsl_dataset_destroy_check;
dsl_syncfunc_t dsl_dataset_destroy_sync;
dsl_syncfunc_t dsl_dataset_user_hold_sync;
int dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *, dmu_tx_t *tx);
void dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *, dmu_tx_t *tx);
int dsl_dataset_rename(char *name, const char *newname, int flags);
int dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors);
int dsl_dataset_promote(const char *name, char *conflsnap);
int dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
boolean_t force);
int dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
boolean_t recursive, boolean_t temphold, int cleanup_fd);
int dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
boolean_t temphold);
int dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
boolean_t recursive);
int dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
char *htag, boolean_t retry);
int dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp);
int dsl_dataset_rename_snapshot(const char *fsname,
const char *oldsnapname, const char *newsnapname, boolean_t recursive);
int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
minor_t cleanup_minor, const char *htag);
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
@ -277,13 +237,35 @@ int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
int dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
uint64_t asize, uint64_t inflight, uint64_t *used,
uint64_t *ref_rsrv);
int dsl_dataset_set_quota(const char *dsname, zprop_source_t source,
int dsl_dataset_set_refquota(const char *dsname, zprop_source_t source,
uint64_t quota);
dsl_syncfunc_t dsl_dataset_set_quota_sync;
int dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
int dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
uint64_t reservation);
int dsl_destroy_inconsistent(const char *dsname, void *arg);
boolean_t dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier);
void dsl_dataset_long_hold(dsl_dataset_t *ds, void *tag);
void dsl_dataset_long_rele(dsl_dataset_t *ds, void *tag);
boolean_t dsl_dataset_long_held(dsl_dataset_t *ds);
int dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
dsl_dataset_t *origin_head, boolean_t force);
void dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
dsl_dataset_t *origin_head, dmu_tx_t *tx);
int dsl_dataset_snapshot_check_impl(dsl_dataset_t *ds, const char *snapname,
dmu_tx_t *tx);
void dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dmu_tx_t *tx);
void dsl_dataset_remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj,
dmu_tx_t *tx);
void dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds);
int dsl_dataset_get_snapname(dsl_dataset_t *ds);
int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
uint64_t *value);
int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx);
void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
zprop_source_t source, uint64_t value, dmu_tx_t *tx);
int dsl_dataset_rollback(const char *fsname);
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \

View File

@ -0,0 +1,52 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_DSL_DESTROY_H
#define _SYS_DSL_DESTROY_H
#ifdef __cplusplus
extern "C" {
#endif
struct nvlist;
struct dsl_dataset;
struct dmu_tx;
int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
struct nvlist *errlist);
int dsl_destroy_snapshot(const char *name, boolean_t defer);
int dsl_destroy_head(const char *name);
int dsl_destroy_head_check_impl(struct dsl_dataset *ds, int expected_holds);
void dsl_destroy_head_sync_impl(struct dsl_dataset *ds, struct dmu_tx *tx);
int dsl_destroy_inconsistent(const char *dsname, void *arg);
void dsl_destroy_snapshot_sync_impl(struct dsl_dataset *ds,
boolean_t defer, struct dmu_tx *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_DESTROY_H */

View File

@ -20,8 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_DIR_H
@ -103,18 +102,15 @@ struct dsl_dir {
char dd_myname[MAXNAMELEN];
};
void dsl_dir_close(dsl_dir_t *dd, void *tag);
int dsl_dir_open(const char *name, void *tag, dsl_dir_t **, const char **tail);
int dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, dsl_dir_t **,
const char **tailp);
int dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
void dsl_dir_rele(dsl_dir_t *dd, void *tag);
int dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t **, const char **tail);
int dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
const char *tail, void *tag, dsl_dir_t **);
void dsl_dir_name(dsl_dir_t *dd, char *buf);
int dsl_dir_namelen(dsl_dir_t *dd);
uint64_t dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds,
const char *name, dmu_tx_t *tx);
dsl_checkfunc_t dsl_dir_destroy_check;
dsl_syncfunc_t dsl_dir_destroy_sync;
void dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv);
uint64_t dsl_dir_space_available(dsl_dir_t *dd,
dsl_dir_t *ancestor, int64_t delta, int ondiskonly);
@ -133,14 +129,15 @@ int dsl_dir_set_quota(const char *ddname, zprop_source_t source,
uint64_t quota);
int dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
uint64_t reservation);
int dsl_dir_rename(dsl_dir_t *dd, const char *newname, int flags);
int dsl_dir_rename(const char *oldname, const char *newname);
int dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space);
int dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx);
boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value,
dmu_tx_t *tx);
/* internal reserved dir name */
#define MOS_DIR_NAME "$MOS"

View File

@ -36,6 +36,7 @@
#include <sys/arc.h>
#include <sys/bpobj.h>
#include <sys/bptree.h>
#include <sys/rrwlock.h>
#ifdef __cplusplus
extern "C" {
@ -113,7 +114,7 @@ typedef struct dsl_pool {
* syncing context does not need to ever have it for read, since
* nobody else could possibly have it for write.
*/
krwlock_t dp_config_rwlock;
rrwlock_t dp_config_rwlock;
zfs_all_blkstats_t *dp_blkstats;
} dsl_pool_t;
@ -139,15 +140,20 @@ void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_mos_diduse_space(dsl_pool_t *dp,
int64_t used, int64_t comp, int64_t uncomp);
void dsl_pool_config_enter(dsl_pool_t *dp, void *tag);
void dsl_pool_config_exit(dsl_pool_t *dp, void *tag);
boolean_t dsl_pool_config_held(dsl_pool_t *dp);
taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
extern int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t *now, dmu_tx_t *tx);
extern int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
int dsl_pool_user_hold(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, uint64_t now, dmu_tx_t *tx);
int dsl_pool_user_release(dsl_pool_t *dp, uint64_t dsobj,
const char *tag, dmu_tx_t *tx);
extern void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
void dsl_pool_clean_tmp_userrefs(dsl_pool_t *dp);
int dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **);
int dsl_pool_hold(const char *name, void *tag, dsl_pool_t **dp);
void dsl_pool_rele(dsl_pool_t *dp, void *tag);
#ifdef __cplusplus
}

View File

@ -54,58 +54,47 @@ typedef struct dsl_props_arg {
zprop_source_t pa_source;
} dsl_props_arg_t;
typedef struct dsl_prop_set_arg {
const char *psa_name;
zprop_source_t psa_source;
int psa_intsz;
int psa_numints;
const void *psa_value;
/*
* Used to handle the special requirements of the quota and reservation
* properties.
*/
uint64_t psa_effective_value;
} dsl_prop_setarg_t;
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_numcb(struct dsl_dataset *ds);
void dsl_prop_notify_all(struct dsl_dir *dd);
boolean_t dsl_prop_hascb(struct dsl_dataset *ds);
int dsl_prop_get(const char *ddname, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_integer(const char *ddname, const char *propname,
uint64_t *valuep, char *setpoint);
int dsl_prop_get_all(objset_t *os, nvlist_t **nvp);
int dsl_prop_get_received(objset_t *os, nvlist_t **nvp);
int dsl_prop_get_received(const char *dsname, nvlist_t **nvp);
int dsl_prop_get_ds(struct dsl_dataset *ds, const char *propname,
int intsz, int numints, void *buf, char *setpoint);
int dsl_prop_get_int_ds(struct dsl_dataset *ds, const char *propname,
uint64_t *valuep);
int dsl_prop_get_dd(struct dsl_dir *dd, const char *propname,
int intsz, int numints, void *buf, char *setpoint,
boolean_t snapshot);
dsl_syncfunc_t dsl_props_set_sync;
int dsl_prop_set(const char *ddname, const char *propname,
zprop_source_t source, int intsz, int numints, const void *buf);
void dsl_props_set_sync_impl(struct dsl_dataset *ds, zprop_source_t source,
nvlist_t *props, dmu_tx_t *tx);
void dsl_prop_set_sync_impl(struct dsl_dataset *ds, const char *propname,
zprop_source_t source, int intsz, int numints, const void *value,
dmu_tx_t *tx);
int dsl_props_set(const char *dsname, zprop_source_t source, nvlist_t *nvl);
int dsl_prop_set_int(const char *dsname, const char *propname,
zprop_source_t source, uint64_t value);
int dsl_prop_set_string(const char *dsname, const char *propname,
zprop_source_t source, const char *value);
int dsl_prop_inherit(const char *dsname, const char *propname,
zprop_source_t source);
void dsl_prop_setarg_init_uint64(dsl_prop_setarg_t *psa, const char *propname,
zprop_source_t source, uint64_t *value);
int dsl_prop_predict_sync(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
#ifdef ZFS_DEBUG
void dsl_prop_check_prediction(dsl_dir_t *dd, dsl_prop_setarg_t *psa);
#define DSL_PROP_CHECK_PREDICTION(dd, psa) \
dsl_prop_check_prediction((dd), (psa))
#else
#define DSL_PROP_CHECK_PREDICTION(dd, psa) /* nothing */
#endif
int dsl_prop_predict(dsl_dir_t *dd, const char *propname,
zprop_source_t source, uint64_t value, uint64_t *newvalp);
/* flag first receive on or after SPA_VERSION_RECVD_PROPS */
boolean_t dsl_prop_get_hasrecvd(objset_t *os);
void dsl_prop_set_hasrecvd(objset_t *os);
void dsl_prop_unset_hasrecvd(objset_t *os);
boolean_t dsl_prop_get_hasrecvd(const char *dsname);
int dsl_prop_set_hasrecvd(const char *dsname);
void dsl_prop_unset_hasrecvd(const char *dsname);
void dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value);
void dsl_prop_nvlist_add_string(nvlist_t *nv,

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_SYNCTASK_H
@ -34,43 +35,26 @@ extern "C" {
struct dsl_pool;
typedef int (dsl_checkfunc_t)(void *, void *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, void *, dmu_tx_t *);
typedef int (dsl_checkfunc_t)(void *, dmu_tx_t *);
typedef void (dsl_syncfunc_t)(void *, dmu_tx_t *);
typedef struct dsl_sync_task {
list_node_t dst_node;
txg_node_t dst_node;
struct dsl_pool *dst_pool;
uint64_t dst_txg;
int dst_space;
dsl_checkfunc_t *dst_checkfunc;
dsl_syncfunc_t *dst_syncfunc;
void *dst_arg1;
void *dst_arg2;
int dst_err;
void *dst_arg;
int dst_error;
boolean_t dst_nowaiter;
} dsl_sync_task_t;
typedef struct dsl_sync_task_group {
txg_node_t dstg_node;
list_t dstg_tasks;
struct dsl_pool *dstg_pool;
uint64_t dstg_txg;
int dstg_err;
int dstg_space;
boolean_t dstg_nowaiter;
} dsl_sync_task_group_t;
dsl_sync_task_group_t *dsl_sync_task_group_create(struct dsl_pool *dp);
void dsl_sync_task_create(dsl_sync_task_group_t *dstg,
dsl_checkfunc_t *, dsl_syncfunc_t *,
void *arg1, void *arg2, int blocks_modified);
int dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg);
void dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
void dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg);
void dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx);
int dsl_sync_task_do(struct dsl_pool *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified);
void dsl_sync_task_do_nowait(struct dsl_pool *dp,
dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx);
void dsl_sync_task_sync(dsl_sync_task_t *dst, dmu_tx_t *tx);
int dsl_sync_task(const char *pool, dsl_checkfunc_t *checkfunc,
dsl_syncfunc_t *syncfunc, void *arg, int blocks_modified);
void dsl_sync_task_nowait(struct dsl_pool *dp, dsl_syncfunc_t *syncfunc,
void *arg, int blocks_modified, dmu_tx_t *tx);
#ifdef __cplusplus
}

View File

@ -0,0 +1,57 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
#ifndef _SYS_DSL_USERHOLD_H
#define _SYS_DSL_USERHOLD_H
#include <sys/nvpair.h>
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_pool;
struct dsl_dataset;
struct dmu_tx;
int dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor,
nvlist_t *errlist);
int dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist);
int dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl);
void dsl_dataset_user_release_tmp(struct dsl_pool *dp, uint64_t dsobj,
const char *htag);
int dsl_dataset_user_hold_check_one(struct dsl_dataset *ds, const char *htag,
boolean_t temphold, struct dmu_tx *tx);
void dsl_dataset_user_hold_sync_one(struct dsl_dataset *ds, const char *htag,
minor_t minor, uint64_t now, struct dmu_tx *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_DSL_USERHOLD_H */

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_H
@ -56,6 +56,7 @@ extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
extern void metaslab_free(spa_t *spa, const blkptr_t *bp, uint64_t txg,
boolean_t now);
extern int metaslab_claim(spa_t *spa, const blkptr_t *bp, uint64_t txg);
extern void metaslab_check_free(spa_t *spa, const blkptr_t *bp);
extern metaslab_class_t *metaslab_class_create(spa_t *spa,
space_map_ops_t *ops);

View File

@ -53,15 +53,17 @@ typedef struct reference {
typedef struct refcount {
kmutex_t rc_mtx;
boolean_t rc_tracked;
list_t rc_list;
list_t rc_removed;
uint64_t rc_count;
uint64_t rc_removed_count;
} refcount_t;
/* Note: refcount_t must be initialized with refcount_create() */
/* Note: refcount_t must be initialized with refcount_create[_untracked]() */
void refcount_create(refcount_t *rc);
void refcount_create_untracked(refcount_t *rc);
void refcount_destroy(refcount_t *rc);
void refcount_destroy_many(refcount_t *rc, uint64_t number);
int refcount_is_zero(refcount_t *rc);
@ -82,6 +84,7 @@ typedef struct refcount {
} refcount_t;
#define refcount_create(rc) ((rc)->rc_count = 0)
#define refcount_create_untracked(rc) ((rc)->rc_count = 0)
#define refcount_destroy(rc) ((rc)->rc_count = 0)
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
#define refcount_is_zero(rc) ((rc)->rc_count == 0)

View File

@ -57,6 +57,7 @@ typedef struct rrwlock {
refcount_t rr_anon_rcount;
refcount_t rr_linked_rcount;
boolean_t rr_writer_wanted;
boolean_t rr_track_all;
} rrwlock_t;
/*
@ -64,15 +65,19 @@ typedef struct rrwlock {
* 'tag' must be the same in a rrw_enter() as in its
* corresponding rrw_exit().
*/
void rrw_init(rrwlock_t *rrl);
void rrw_init(rrwlock_t *rrl, boolean_t track_all);
void rrw_destroy(rrwlock_t *rrl);
void rrw_enter(rrwlock_t *rrl, krw_t rw, void *tag);
void rrw_enter_read(rrwlock_t *rrl, void *tag);
void rrw_enter_write(rrwlock_t *rrl);
void rrw_exit(rrwlock_t *rrl, void *tag);
boolean_t rrw_held(rrwlock_t *rrl, krw_t rw);
void rrw_tsd_destroy(void *arg);
#define RRW_READ_HELD(x) rrw_held(x, RW_READER)
#define RRW_WRITE_HELD(x) rrw_held(x, RW_WRITER)
#define RRW_LOCK_HELD(x) \
(rrw_held(x, RW_WRITER) || rrw_held(x, RW_READER))
#ifdef __cplusplus
}

View File

@ -149,6 +149,8 @@ extern void space_map_add(space_map_t *sm, uint64_t start, uint64_t size);
extern void space_map_remove(space_map_t *sm, uint64_t start, uint64_t size);
extern boolean_t space_map_contains(space_map_t *sm,
uint64_t start, uint64_t size);
extern space_seg_t *space_map_find(space_map_t *sm, uint64_t start,
uint64_t size, avl_index_t *wherep);
extern void space_map_swap(space_map_t **msrc, space_map_t **mdest);
extern void space_map_vacate(space_map_t *sm,
space_map_func_t *func, space_map_t *mdest);

View File

@ -45,9 +45,6 @@ extern "C" {
/* Number of txgs worth of frees we defer adding to in-core spacemaps */
#define TXG_DEFER_SIZE 2
#define TXG_WAIT 1ULL
#define TXG_NOWAIT 2ULL
typedef struct tx_cpu tx_cpu_t;
typedef struct txg_handle {
@ -119,11 +116,11 @@ extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
extern void txg_list_create(txg_list_t *tl, size_t offset);
extern void txg_list_destroy(txg_list_t *tl);
extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg);
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
extern boolean_t txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
extern boolean_t txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
extern void *txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg);
extern int txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
extern boolean_t txg_list_member(txg_list_t *tl, void *p, uint64_t txg);
extern void *txg_list_head(txg_list_t *tl, uint64_t txg);
extern void *txg_list_next(txg_list_t *tl, void *p, uint64_t txg);

View File

@ -26,7 +26,6 @@
#ifndef _SYS_ZFEATURE_H
#define _SYS_ZFEATURE_H
#include <sys/dmu.h>
#include <sys/nvpair.h>
#include "zfeature_common.h"
@ -34,14 +33,18 @@
extern "C" {
#endif
extern boolean_t feature_is_supported(objset_t *os, uint64_t obj,
struct spa;
struct dmu_tx;
struct objset;
extern boolean_t feature_is_supported(struct objset *os, uint64_t obj,
uint64_t desc_obj, nvlist_t *unsup_feat, nvlist_t *enabled_feat);
struct spa;
extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *);
extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *);
extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *);
extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *);
extern void spa_feature_create_zap_objects(struct spa *, struct dmu_tx *);
extern void spa_feature_enable(struct spa *, zfeature_info_t *,
struct dmu_tx *);
extern void spa_feature_incr(struct spa *, zfeature_info_t *, struct dmu_tx *);
extern void spa_feature_decr(struct spa *, zfeature_info_t *, struct dmu_tx *);
extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *);
extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *);

View File

@ -50,11 +50,13 @@ extern "C" {
extern int zfs_flags;
#define ZFS_DEBUG_DPRINTF 0x0001
#define ZFS_DEBUG_DBUF_VERIFY 0x0002
#define ZFS_DEBUG_DNODE_VERIFY 0x0004
#define ZFS_DEBUG_SNAPNAMES 0x0008
#define ZFS_DEBUG_MODIFY 0x0010
#define ZFS_DEBUG_DPRINTF (1<<0)
#define ZFS_DEBUG_DBUF_VERIFY (1<<1)
#define ZFS_DEBUG_DNODE_VERIFY (1<<2)
#define ZFS_DEBUG_SNAPNAMES (1<<3)
#define ZFS_DEBUG_MODIFY (1<<4)
#define ZFS_DEBUG_SPA (1<<5)
#define ZFS_DEBUG_ZIO_FREE (1<<6)
#ifdef ZFS_DEBUG
extern void __dprintf(const char *file, const char *func,

View File

@ -309,7 +309,6 @@ typedef struct zfs_cmd {
uint64_t zc_history; /* really (char *) */
char zc_value[MAXPATHLEN * 2];
char zc_string[MAXNAMELEN];
char zc_top_ds[MAXPATHLEN];
uint64_t zc_guid;
uint64_t zc_nvlist_conf; /* really (char *) */
uint64_t zc_nvlist_conf_size;
@ -361,7 +360,8 @@ extern int zfs_secpolicy_rename_perms(const char *from,
const char *to, cred_t *cr);
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
extern int zfs_busy(void);
extern int zfs_unmount_snap(const char *, void *);
extern void zfs_unmount_snap(const char *);
extern void zfs_destroy_unmount_origin(const char *);
/*
* ZFS minor numbers can refer to either a control device instance or

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_FS_ZFS_ZNODE_H
@ -259,7 +260,7 @@ VTOZ(vnode_t *vp)
*/
#define ZFS_ENTER(zfsvfs) \
{ \
rrw_enter(&(zfsvfs)->z_teardown_lock, RW_READER, FTAG); \
rrw_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
if ((zfsvfs)->z_unmounted) { \
ZFS_EXIT(zfsvfs); \
return (EIO); \

View File

@ -411,8 +411,8 @@ extern int zil_check_log_chain(const char *osname, void *txarg);
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
extern void zil_clean(zilog_t *zilog, uint64_t synced_txg);
extern int zil_suspend(zilog_t *zilog);
extern void zil_resume(zilog_t *zilog);
extern int zil_suspend(const char *osname, void **cookiep);
extern void zil_resume(void *cookie);
extern void zil_add_block(zilog_t *zilog, const blkptr_t *bp);
extern int zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp);

View File

@ -585,6 +585,8 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
{
tx_state_t *tx = &dp->dp_tx;
ASSERT(!dsl_pool_config_held(dp));
mutex_enter(&tx->tx_sync_lock);
ASSERT(tx->tx_threads == 2);
if (txg == 0)
@ -608,6 +610,8 @@ txg_wait_open(dsl_pool_t *dp, uint64_t txg)
{
tx_state_t *tx = &dp->dp_tx;
ASSERT(!dsl_pool_config_held(dp));
mutex_enter(&tx->tx_sync_lock);
ASSERT(tx->tx_threads == 2);
if (txg == 0)
@ -673,42 +677,43 @@ txg_list_empty(txg_list_t *tl, uint64_t txg)
}
/*
* Add an entry to the list.
* Returns 0 if it's a new entry, 1 if it's already there.
* Add an entry to the list (unless it's already on the list).
* Returns B_TRUE if it was actually added.
*/
int
boolean_t
txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
int already_on_list;
boolean_t add;
mutex_enter(&tl->tl_lock);
already_on_list = tn->tn_member[t];
if (!already_on_list) {
add = (tn->tn_member[t] == 0);
if (add) {
tn->tn_member[t] = 1;
tn->tn_next[t] = tl->tl_head[t];
tl->tl_head[t] = tn;
}
mutex_exit(&tl->tl_lock);
return (already_on_list);
return (add);
}
/*
* Add an entry to the end of the list (walks list to find end).
* Returns 0 if it's a new entry, 1 if it's already there.
* Add an entry to the end of the list, unless it's already on the list.
* (walks list to find end)
* Returns B_TRUE if it was actually added.
*/
int
boolean_t
txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
int already_on_list;
boolean_t add;
mutex_enter(&tl->tl_lock);
already_on_list = tn->tn_member[t];
if (!already_on_list) {
add = (tn->tn_member[t] == 0);
if (add) {
txg_node_t **tp;
for (tp = &tl->tl_head[t]; *tp != NULL; tp = &(*tp)->tn_next[t])
@ -720,7 +725,7 @@ txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
}
mutex_exit(&tl->tl_lock);
return (already_on_list);
return (add);
}
/*
@ -771,13 +776,13 @@ txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg)
return (NULL);
}
int
boolean_t
txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
{
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
return (tn->tn_member[t]);
return (tn->tn_member[t] != 0);
}
/*

View File

@ -20,8 +20,6 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
@ -74,6 +72,7 @@
#include <sys/gfs.h>
#include <sys/stat.h>
#include <sys/dmu.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_deleg.h>
#include <sys/mount.h>
#include <sys/sunddi.h>
@ -744,7 +743,7 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
zfsvfs_t *zfsvfs;
avl_index_t where;
char from[MAXNAMELEN], to[MAXNAMELEN];
char real[MAXNAMELEN];
char real[MAXNAMELEN], fsname[MAXNAMELEN];
int err;
zfsvfs = sdvp->v_vfsp->vfs_data;
@ -763,12 +762,14 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
ZFS_EXIT(zfsvfs);
dmu_objset_name(zfsvfs->z_os, fsname);
err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
if (!err)
if (err == 0)
err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
if (!err)
if (err == 0)
err = zfs_secpolicy_rename_perms(from, to, cr);
if (err)
if (err != 0)
return (err);
/*
@ -788,7 +789,7 @@ zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
return (ENOENT);
}
err = dmu_objset_rename(from, to, 0);
err = dsl_dataset_rename_snapshot(fsname, snm, tnm, 0);
if (err == 0)
zfsctl_rename_snap(sdp, sep, tnm);
@ -830,9 +831,9 @@ zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
ZFS_EXIT(zfsvfs);
err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
if (!err)
if (err == 0)
err = zfs_secpolicy_destroy_perms(snapname, cr);
if (err)
if (err != 0)
return (err);
mutex_enter(&sdp->sd_lock);
@ -842,13 +843,10 @@ zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
if (sep) {
avl_remove(&sdp->sd_snaps, sep);
err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
if (err) {
avl_index_t where;
if (avl_find(&sdp->sd_snaps, sep, &where) == NULL)
avl_insert(&sdp->sd_snaps, sep, where);
} else
err = dmu_objset_destroy(snapname, B_FALSE);
if (err != 0)
avl_add(&sdp->sd_snaps, sep);
else
err = dsl_destroy_snapshot(snapname, B_FALSE);
} else {
err = ENOENT;
}
@ -881,12 +879,12 @@ zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp,
*vpp = NULL;
err = zfs_secpolicy_snapshot_perms(name, cr);
if (err)
if (err != 0)
return (err);
if (err == 0) {
err = dmu_objset_snapshot_one(name, dirname);
if (err)
if (err != 0)
return (err);
err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
}
@ -994,7 +992,7 @@ zfsctl_snapdir_lookup(ap)
*vpp = sep->se_root;
VN_HOLD(*vpp);
err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY);
if (err) {
if (err != 0) {
VN_RELE(*vpp);
*vpp = NULL;
} else if (*vpp == sep->se_root) {
@ -1021,7 +1019,7 @@ zfsctl_snapdir_lookup(ap)
* The requested snapshot is not currently mounted, look it up.
*/
err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
if (err) {
if (err != 0) {
mutex_exit(&sdp->sd_lock);
ZFS_EXIT(zfsvfs);
/*
@ -1074,8 +1072,20 @@ domount:
}
mutex_exit(&sdp->sd_lock);
ZFS_EXIT(zfsvfs);
#ifdef illumos
/*
* If we had an error, drop our hold on the vnode and
* zfsctl_snapshot_inactive() will clean up.
*/
if (err != 0) {
VN_RELE(*vpp);
*vpp = NULL;
}
#else
if (err != 0)
*vpp = NULL;
#endif
return (err);
}
@ -1133,8 +1143,10 @@ zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
ZFS_ENTER(zfsvfs);
cookie = *offp;
dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
&cookie, &case_conflict);
dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
if (error) {
ZFS_EXIT(zfsvfs);
if (error == ENOENT) {

File diff suppressed because it is too large Load Diff

View File

@ -396,8 +396,10 @@ zfs_register_callbacks(vfs_t *vfsp)
boolean_t do_setuid = B_FALSE;
boolean_t exec = B_FALSE;
boolean_t do_exec = B_FALSE;
#ifdef illumos
boolean_t devices = B_FALSE;
boolean_t do_devices = B_FALSE;
#endif
boolean_t xattr = B_FALSE;
boolean_t do_xattr = B_FALSE;
boolean_t atime = B_FALSE;
@ -493,25 +495,33 @@ zfs_register_callbacks(vfs_t *vfsp)
* overboard...
*/
ds = dmu_objset_ds(os);
error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
error = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"xattr", xattr_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"recordsize", blksz_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"readonly", readonly_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
#ifdef illumos
error = error ? error : dsl_prop_register(ds,
"setuid", setuid_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
#endif
error = error ? error : dsl_prop_register(ds,
"exec", exec_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"snapdir", snapdir_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"aclmode", acl_mode_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"aclinherit", acl_inherit_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
"vscan", vscan_changed_cb, zfsvfs);
zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
zfsvfs);
error = error ? error : dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (error)
goto unregister;
@ -539,27 +549,37 @@ unregister:
* registered, but this is OK; it will simply return ENOMSG,
* which we will ignore.
*/
(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
zfsvfs);
(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME),
atime_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR),
xattr_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
blksz_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
readonly_changed_cb, zfsvfs);
#ifdef illumos
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES),
devices_changed_cb, zfsvfs);
#endif
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID),
setuid_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC),
exec_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR),
snapdir_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLMODE),
acl_mode_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT),
acl_inherit_changed_cb, zfsvfs);
(void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN),
vscan_changed_cb, zfsvfs);
return (error);
}
static int
zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
uint64_t *userp, uint64_t *groupp)
{
int error = 0;
/*
* Is it a valid type of object to track?
*/
@ -616,7 +636,7 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
*groupp = BSWAP_64(*groupp);
}
}
return (error);
return (0);
}
static void
@ -968,7 +988,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node));
rrw_init(&zfsvfs->z_teardown_lock);
rrw_init(&zfsvfs->z_teardown_lock, B_FALSE);
rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
@ -1415,8 +1435,9 @@ zfs_mount_label_policy(vfs_t *vfsp, char *osname)
char *str = NULL;
if (l_to_str_internal(mnt_sl, &str) == 0 &&
dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
dsl_prop_set_string(osname,
zfs_prop_to_name(ZFS_PROP_MLSLABEL),
ZPROP_SRC_LOCAL, str) == 0)
retv = 0;
if (str != NULL)
kmem_free(str, strlen(str) + 1);
@ -1876,7 +1897,7 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
(void) dmu_objset_evict_dbufs(zfsvfs->z_os);
dmu_objset_evict_dbufs(zfsvfs->z_os);
return (0);
}

View File

@ -246,7 +246,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
}
}
VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
VERIFY(arc_buf_remove_ref(abuf, &abuf));
}
return (error);
@ -343,7 +343,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
break;
error = zil_read_log_block(zilog, &blk, &next_blk, lrbuf, &end);
if (error)
if (error != 0)
break;
for (lrp = lrbuf; lrp < end; lrp += reclen) {
@ -478,7 +478,7 @@ zilog_dirty(zilog_t *zilog, uint64_t txg)
if (dsl_dataset_is_snapshot(ds))
panic("dirtying snapshot!");
if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) {
if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg)) {
/* up the hold count until we can be written out */
dmu_buf_add_ref(ds->ds_dbuf, zilog);
}
@ -637,8 +637,8 @@ zil_claim(const char *osname, void *txarg)
objset_t *os;
int error;
error = dmu_objset_hold(osname, FTAG, &os);
if (error) {
error = dmu_objset_own(osname, DMU_OST_ANY, B_FALSE, FTAG, &os);
if (error != 0) {
cmn_err(CE_WARN, "can't open objset for %s", osname);
return (0);
}
@ -651,7 +651,7 @@ zil_claim(const char *osname, void *txarg)
zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
BP_ZERO(&zh->zh_log);
dsl_dataset_dirty(dmu_objset_ds(os), tx);
dmu_objset_rele(os, FTAG);
dmu_objset_disown(os, FTAG);
return (0);
}
@ -676,7 +676,7 @@ zil_claim(const char *osname, void *txarg)
}
ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
dmu_objset_rele(os, FTAG);
dmu_objset_disown(os, FTAG);
return (0);
}
@ -696,7 +696,7 @@ zil_check_log_chain(const char *osname, void *tx)
ASSERT(tx == NULL);
error = dmu_objset_hold(osname, FTAG, &os);
if (error) {
if (error != 0) {
cmn_err(CE_WARN, "can't open objset for %s", osname);
return (0);
}
@ -984,7 +984,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
/* pass the old blkptr in order to spread log blocks across devs */
error = zio_alloc_zil(spa, txg, bp, &lwb->lwb_blk, zil_blksz,
USE_SLOG(zilog));
if (!error) {
if (error == 0) {
ASSERT3U(bp->blk_birth, ==, txg);
bp->blk_cksum = lwb->lwb_blk.blk_cksum;
bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;
@ -1095,7 +1095,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
txg_wait_synced(zilog->zl_dmu_pool, txg);
return (lwb);
}
if (error) {
if (error != 0) {
ASSERT(error == ENOENT || error == EEXIST ||
error == EALREADY);
return (lwb);
@ -1719,6 +1719,9 @@ zil_free(zilog_t *zilog)
{
zilog->zl_stop_sync = 1;
ASSERT0(zilog->zl_suspend);
ASSERT0(zilog->zl_suspending);
ASSERT(list_is_empty(&zilog->zl_lwb_list));
list_destroy(&zilog->zl_lwb_list);
@ -1814,32 +1817,100 @@ zil_close(zilog_t *zilog)
mutex_exit(&zilog->zl_lock);
}
static char *suspend_tag = "zil suspending";
/*
* Suspend an intent log. While in suspended mode, we still honor
* synchronous semantics, but we rely on txg_wait_synced() to do it.
* We suspend the log briefly when taking a snapshot so that the snapshot
* contains all the data it's supposed to, and has an empty intent log.
* On old version pools, we suspend the log briefly when taking a
* snapshot so that it will have an empty intent log.
*
* Long holds are not really intended to be used the way we do here --
* held for such a short time. A concurrent caller of dsl_dataset_long_held()
* could fail. Therefore we take pains to only put a long hold if it is
* actually necessary. Fortunately, it will only be necessary if the
* objset is currently mounted (or the ZVOL equivalent). In that case it
* will already have a long hold, so we are not really making things any worse.
*
* Ideally, we would locate the existing long-holder (i.e. the zfsvfs_t or
* zvol_state_t), and use their mechanism to prevent their hold from being
* dropped (e.g. VFS_HOLD()). However, that would be even more pain for
* very little gain.
*
* if cookiep == NULL, this does both the suspend & resume.
* Otherwise, it returns with the dataset "long held", and the cookie
* should be passed into zil_resume().
*/
int
zil_suspend(zilog_t *zilog)
zil_suspend(const char *osname, void **cookiep)
{
const zil_header_t *zh = zilog->zl_header;
objset_t *os;
zilog_t *zilog;
const zil_header_t *zh;
int error;
error = dmu_objset_hold(osname, suspend_tag, &os);
if (error != 0)
return (error);
zilog = dmu_objset_zil(os);
mutex_enter(&zilog->zl_lock);
zh = zilog->zl_header;
if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */
mutex_exit(&zilog->zl_lock);
dmu_objset_rele(os, suspend_tag);
return (EBUSY);
}
if (zilog->zl_suspend++ != 0) {
/*
* Don't put a long hold in the cases where we can avoid it. This
* is when there is no cookie so we are doing a suspend & resume
* (i.e. called from zil_vdev_offline()), and there's nothing to do
* for the suspend because it's already suspended, or there's no ZIL.
*/
if (cookiep == NULL && !zilog->zl_suspending &&
(zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) {
mutex_exit(&zilog->zl_lock);
dmu_objset_rele(os, suspend_tag);
return (0);
}
dsl_dataset_long_hold(dmu_objset_ds(os), suspend_tag);
dsl_pool_rele(dmu_objset_pool(os), suspend_tag);
zilog->zl_suspend++;
if (zilog->zl_suspend > 1) {
/*
* Someone else already began a suspend.
* Someone else is already suspending it.
* Just wait for them to finish.
*/
while (zilog->zl_suspending)
cv_wait(&zilog->zl_cv_suspend, &zilog->zl_lock);
mutex_exit(&zilog->zl_lock);
if (cookiep == NULL)
zil_resume(os);
else
*cookiep = os;
return (0);
}
/*
* If there is no pointer to an on-disk block, this ZIL must not
* be active (e.g. filesystem not mounted), so there's nothing
* to clean up.
*/
if (BP_IS_HOLE(&zh->zh_log)) {
ASSERT(cookiep != NULL); /* fast path already handled */
*cookiep = os;
mutex_exit(&zilog->zl_lock);
return (0);
}
zilog->zl_suspending = B_TRUE;
mutex_exit(&zilog->zl_lock);
@ -1852,16 +1923,25 @@ zil_suspend(zilog_t *zilog)
cv_broadcast(&zilog->zl_cv_suspend);
mutex_exit(&zilog->zl_lock);
if (cookiep == NULL)
zil_resume(os);
else
*cookiep = os;
return (0);
}
void
zil_resume(zilog_t *zilog)
zil_resume(void *cookie)
{
objset_t *os = cookie;
zilog_t *zilog = dmu_objset_zil(os);
mutex_enter(&zilog->zl_lock);
ASSERT(zilog->zl_suspend != 0);
zilog->zl_suspend--;
mutex_exit(&zilog->zl_lock);
dsl_dataset_long_rele(dmu_objset_ds(os), suspend_tag);
dsl_dataset_rele(dmu_objset_ds(os), suspend_tag);
}
typedef struct zil_replay_arg {
@ -1934,7 +2014,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
if (txtype == TX_WRITE && reclen == sizeof (lr_write_t)) {
error = zil_read_log_data(zilog, (lr_write_t *)lr,
zr->zr_lr + reclen);
if (error)
if (error != 0)
return (zil_replay_error(zilog, lr, error));
}
@ -1955,7 +2035,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
* is updated if we are in replay mode.
*/
error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, zr->zr_byteswap);
if (error) {
if (error != 0) {
/*
* The DMU's dnode layer doesn't see removes until the txg
* commits, so a subsequent claim can spuriously fail with
@ -1965,7 +2045,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
*/
txg_wait_synced(spa_get_dsl(zilog->zl_spa), 0);
error = zr->zr_replay[txtype](zr->zr_arg, zr->zr_lr, B_FALSE);
if (error)
if (error != 0)
return (zil_replay_error(zilog, lr, error));
}
return (0);
@ -2039,19 +2119,10 @@ zil_replaying(zilog_t *zilog, dmu_tx_t *tx)
int
zil_vdev_offline(const char *osname, void *arg)
{
objset_t *os;
zilog_t *zilog;
int error;
error = dmu_objset_hold(osname, FTAG, &os);
if (error)
return (error);
zilog = dmu_objset_zil(os);
if (zil_suspend(zilog) != 0)
error = EEXIST;
else
zil_resume(zilog);
dmu_objset_rele(os, FTAG);
return (error);
error = zil_suspend(osname, NULL);
if (error != 0)
return (EEXIST);
return (0);
}

View File

@ -759,6 +759,7 @@ zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite)
void
zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
{
metaslab_check_free(spa, bp);
bplist_append(&spa->spa_free_bplist[txg & TXG_MASK], bp);
}
@ -775,6 +776,8 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
ASSERT(spa_syncing_txg(spa) == txg);
ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
metaslab_check_free(spa, bp);
zio = zio_create(pio, spa, txg, bp, NULL, size,
NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE);
@ -2079,7 +2082,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
bcmp(abuf->b_data, zio->io_orig_data,
zio->io_orig_size) != 0)
error = EEXIST;
VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
VERIFY(arc_buf_remove_ref(abuf, &abuf));
}
ddt_enter(ddt);
@ -2704,8 +2707,9 @@ zio_vdev_io_assess(zio_t *zio)
* set vdev_cant_write so that we stop trying to allocate from it.
*/
if (zio->io_error == ENXIO && zio->io_type == ZIO_TYPE_WRITE &&
vd != NULL && !vd->vdev_ops->vdev_op_leaf)
vd != NULL && !vd->vdev_ops->vdev_op_leaf) {
vd->vdev_cant_write = B_TRUE;
}
if (zio->io_error)
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;

View File

@ -695,7 +695,7 @@ zvol_last_close(zvol_state_t *zv)
if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
!(zv->zv_flags & ZVOL_RDONLY))
txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
(void) dmu_objset_evict_dbufs(zv->zv_objset);
dmu_objset_evict_dbufs(zv->zv_objset);
dmu_objset_disown(zv->zv_objset, zvol_tag);
zv->zv_objset = NULL;
@ -742,7 +742,7 @@ zvol_prealloc(zvol_state_t *zv)
}
#endif /* sun */
int
static int
zvol_update_volsize(objset_t *os, uint64_t volsize)
{
dmu_tx_t *tx;
@ -1225,6 +1225,9 @@ zvol_dumpio(zvol_state_t *zv, void *addr, uint64_t offset, uint64_t size,
ze = list_next(&zv->zv_extents, ze);
}
if (ze == NULL)
return (EINVAL);
if (!ddi_in_panic())
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
@ -1355,6 +1358,9 @@ zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks)
if (zv == NULL)
return (ENXIO);
if ((zv->zv_flags & ZVOL_DUMPIFIED) == 0)
return (EINVAL);
boff = ldbtob(blkno);
resid = ldbtob(nblocks);
@ -2178,8 +2184,10 @@ zvol_create_snapshots(objset_t *os, const char *name)
cookie = obj = 0;
sname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
#if 0
(void) dmu_objset_find(name, dmu_objset_prefetch, NULL,
DS_FIND_SNAPSHOTS);
#endif
for (;;) {
len = snprintf(sname, MAXPATHLEN, "%s@", name);
@ -2248,12 +2256,14 @@ zvol_create_minors(const char *name)
p = osname + strlen(osname);
len = MAXPATHLEN - (p - osname);
#if 0
/* Prefetch the datasets. */
cookie = 0;
while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
if (!dataset_name_hidden(osname))
(void) dmu_objset_prefetch(osname, NULL);
}
#endif
cookie = 0;
while (dmu_dir_list_next(os, MAXPATHLEN - (p - osname), p, NULL,

View File

@ -283,6 +283,7 @@ void fnvlist_pack_free(char *, size_t);
nvlist_t *fnvlist_unpack(char *, size_t);
nvlist_t *fnvlist_dup(nvlist_t *);
void fnvlist_merge(nvlist_t *, nvlist_t *);
size_t fnvlist_num_pairs(nvlist_t *);
void fnvlist_add_boolean(nvlist_t *, const char *);
void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);