Implement Redacted Send/Receive

Redacted send/receive allows users to send subsets of their data to 
a target system. One possible use case for this feature is to not 
transmit sensitive information to a data warehousing, test/dev, or 
analytics environment. Another is to save space by not replicating 
unimportant data within a given dataset, for example in backup tools 
like zrepl.

Redacted send/receive is a three-stage process. First, a clone (or 
clones) is made of the snapshot to be sent to the target. In this 
clone (or clones), all unnecessary or unwanted data is removed or
modified. This clone is then snapshotted to create the "redaction 
snapshot" (or snapshots). Second, the new zfs redact command is used 
to create a redaction bookmark. The redaction bookmark stores the 
list of blocks in a snapshot that were modified by the redaction 
snapshot(s). Finally, the redaction bookmark is passed as a parameter 
to zfs send. When sending to the snapshot that was redacted, the
redaction bookmark is used to filter out blocks that contain sensitive 
or unwanted information, and those blocks are not included in the send 
stream.  When sending from the redaction bookmark, the blocks it 
contains are considered as candidate blocks in addition to those 
blocks in the destination snapshot that were modified since the 
creation_txg of the redaction bookmark.  This step is necessary to 
allow the target to rehydrate data in the case where some blocks are 
accidentally or unnecessarily modified in the redaction snapshot.

The changes to bookmarks to enable fast space estimation involve 
adding deadlists to bookmarks. There is also logic to manage the 
life cycles of these deadlists.

The new size estimation process operates in cases where previously 
an accurate estimate could not be provided. In those cases, a send 
is performed where no data blocks are read, reducing the runtime 
significantly and providing a byte-accurate size estimate.

Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: Matt Ahrens <mahrens@delphix.com>
Reviewed-by: Prashanth Sreenivasa <pks@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: George Wilson <george.wilson@delphix.com>
Reviewed-by: Chris Williamson <chris.williamson@delphix.com>
Reviewed-by: Pavel Zhakarov <pavel.zakharov@delphix.com>
Reviewed-by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed-by: Prakash Surya <prakash.surya@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Paul Dagnelie <pcd@delphix.com>
Closes #7958
This commit is contained in:
Paul Dagnelie 2019-06-19 09:48:13 -07:00 committed by Brian Behlendorf
parent c1b5801bb5
commit 30af21b025
103 changed files with 11513 additions and 2668 deletions

View File

@ -50,11 +50,13 @@
#include <sys/dsl_dir.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_bookmark.h>
#include <sys/dbuf.h>
#include <sys/zil.h>
#include <sys/zil_impl.h>
#include <sys/stat.h>
#include <sys/resource.h>
#include <sys/dmu_send.h>
#include <sys/dmu_traverse.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
@ -143,6 +145,7 @@ usage(void)
"\t\t[<poolname> [<object> ...]]\n"
"\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset>\n"
"\t\t[<object> ...]\n"
"\t%s [-v] <bookmark>\n"
"\t%s -C [-A] [-U <cache>]\n"
"\t%s -l [-Aqu] <device>\n"
"\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
@ -154,7 +157,7 @@ usage(void)
"\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
"<poolname>\n\n",
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
cmdname, cmdname);
cmdname, cmdname, cmdname);
(void) fprintf(stderr, " Dataset name must include at least one "
"separator character '/' or '@'\n");
@ -420,6 +423,43 @@ dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
static void
dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
{
uint64_t *arr;
if (dump_opt['d'] < 6)
return;
if (data == NULL) {
dmu_object_info_t doi;
VERIFY0(dmu_object_info(os, object, &doi));
size = doi.doi_max_offset;
arr = kmem_alloc(size, KM_SLEEP);
int err = dmu_read(os, object, 0, size, arr, 0);
if (err != 0) {
(void) printf("got error %u from dmu_read\n", err);
kmem_free(arr, size);
return;
}
} else {
arr = data;
}
if (size == 0) {
(void) printf("\t\t[]\n");
return;
}
(void) printf("\t\t[%0llx", (u_longlong_t)arr[0]);
for (size_t i = 1; i * sizeof (uint64_t) < size; i++) {
if (i % 4 != 0)
(void) printf(", %0llx", (u_longlong_t)arr[i]);
else
(void) printf(",\n\t\t%0llx", (u_longlong_t)arr[i]);
}
(void) printf("]\n");
if (data == NULL)
kmem_free(arr, size);
}
/*ARGSUSED*/
@ -1498,6 +1538,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
arc_buf_t *buf;
uint64_t fill = 0;
ASSERT(!BP_IS_REDACTED(bp));
err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
@ -1784,6 +1825,128 @@ dump_full_bpobj(bpobj_t *bpo, const char *name, int indent)
}
}
static int
dump_bookmark(dsl_pool_t *dp, char *name, boolean_t print_redact,
boolean_t print_list)
{
int err = 0;
zfs_bookmark_phys_t prop;
objset_t *mos = dp->dp_spa->spa_meta_objset;
err = dsl_bookmark_lookup(dp, name, NULL, &prop);
if (err != 0) {
return (err);
}
(void) printf("\t#%s: ", strchr(name, '#') + 1);
(void) printf("{guid: %llx creation_txg: %llu creation_time: "
"%llu redaction_obj: %llu}\n", (u_longlong_t)prop.zbm_guid,
(u_longlong_t)prop.zbm_creation_txg,
(u_longlong_t)prop.zbm_creation_time,
(u_longlong_t)prop.zbm_redaction_obj);
IMPLY(print_list, print_redact);
if (!print_redact || prop.zbm_redaction_obj == 0)
return (0);
redaction_list_t *rl;
VERIFY0(dsl_redaction_list_hold_obj(dp,
prop.zbm_redaction_obj, FTAG, &rl));
redaction_list_phys_t *rlp = rl->rl_phys;
(void) printf("\tRedacted:\n\t\tProgress: ");
if (rlp->rlp_last_object != UINT64_MAX ||
rlp->rlp_last_blkid != UINT64_MAX) {
(void) printf("%llu %llu (incomplete)\n",
(u_longlong_t)rlp->rlp_last_object,
(u_longlong_t)rlp->rlp_last_blkid);
} else {
(void) printf("complete\n");
}
(void) printf("\t\tSnapshots: [");
for (unsigned int i = 0; i < rlp->rlp_num_snaps; i++) {
if (i > 0)
(void) printf(", ");
(void) printf("%0llu",
(u_longlong_t)rlp->rlp_snaps[i]);
}
(void) printf("]\n\t\tLength: %llu\n",
(u_longlong_t)rlp->rlp_num_entries);
if (!print_list) {
dsl_redaction_list_rele(rl, FTAG);
return (0);
}
if (rlp->rlp_num_entries == 0) {
dsl_redaction_list_rele(rl, FTAG);
(void) printf("\t\tRedaction List: []\n\n");
return (0);
}
redact_block_phys_t *rbp_buf;
uint64_t size;
dmu_object_info_t doi;
VERIFY0(dmu_object_info(mos, prop.zbm_redaction_obj, &doi));
size = doi.doi_max_offset;
rbp_buf = kmem_alloc(size, KM_SLEEP);
err = dmu_read(mos, prop.zbm_redaction_obj, 0, size,
rbp_buf, 0);
if (err != 0) {
dsl_redaction_list_rele(rl, FTAG);
kmem_free(rbp_buf, size);
return (err);
}
(void) printf("\t\tRedaction List: [{object: %llx, offset: "
"%llx, blksz: %x, count: %llx}",
(u_longlong_t)rbp_buf[0].rbp_object,
(u_longlong_t)rbp_buf[0].rbp_blkid,
(uint_t)(redact_block_get_size(&rbp_buf[0])),
(u_longlong_t)redact_block_get_count(&rbp_buf[0]));
for (size_t i = 1; i < rlp->rlp_num_entries; i++) {
(void) printf(",\n\t\t{object: %llx, offset: %llx, "
"blksz: %x, count: %llx}",
(u_longlong_t)rbp_buf[i].rbp_object,
(u_longlong_t)rbp_buf[i].rbp_blkid,
(uint_t)(redact_block_get_size(&rbp_buf[i])),
(u_longlong_t)redact_block_get_count(&rbp_buf[i]));
}
dsl_redaction_list_rele(rl, FTAG);
kmem_free(rbp_buf, size);
(void) printf("]\n\n");
return (0);
}
static void
dump_bookmarks(objset_t *os, int verbosity)
{
zap_cursor_t zc;
zap_attribute_t attr;
dsl_dataset_t *ds = dmu_objset_ds(os);
dsl_pool_t *dp = spa_get_dsl(os->os_spa);
objset_t *mos = os->os_spa->spa_meta_objset;
if (verbosity < 4)
return;
dsl_pool_config_enter(dp, FTAG);
for (zap_cursor_init(&zc, mos, ds->ds_bookmarks_obj);
zap_cursor_retrieve(&zc, &attr) == 0;
zap_cursor_advance(&zc)) {
char osname[ZFS_MAX_DATASET_NAME_LEN];
char buf[ZFS_MAX_DATASET_NAME_LEN];
dmu_objset_name(os, osname);
VERIFY0(snprintf(buf, sizeof (buf), "%s#%s", osname,
attr.za_name));
(void) dump_bookmark(dp, buf, verbosity >= 5, verbosity >= 6);
}
zap_cursor_fini(&zc);
dsl_pool_config_exit(dp, FTAG);
}
static void
bpobj_count_refd(bpobj_t *bpo)
{
@ -1886,19 +2049,26 @@ static objset_t *sa_os = NULL;
static sa_attr_type_t *sa_attr_table = NULL;
static int
open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
open_objset(const char *path, void *tag, objset_t **osp)
{
int err;
uint64_t sa_attrs = 0;
uint64_t version = 0;
VERIFY3P(sa_os, ==, NULL);
err = dmu_objset_own(path, type, B_TRUE, B_FALSE, tag, osp);
/*
* We can't own an objset if it's redacted. Therefore, we do this
* dance: hold the objset, then acquire a long hold on its dataset, then
* release the pool (which is held as part of holding the objset).
*/
err = dmu_objset_hold(path, tag, osp);
if (err != 0) {
(void) fprintf(stderr, "failed to own dataset '%s': %s\n", path,
strerror(err));
(void) fprintf(stderr, "failed to hold dataset '%s': %s\n",
path, strerror(err));
return (err);
}
dsl_dataset_long_hold(dmu_objset_ds(*osp), tag);
dsl_pool_rele(dmu_objset_pool(*osp), tag);
if (dmu_objset_type(*osp) == DMU_OST_ZFS && !(*osp)->os_encrypted) {
(void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
@ -1912,7 +2082,8 @@ open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
if (err != 0) {
(void) fprintf(stderr, "sa_setup failed: %s\n",
strerror(err));
dmu_objset_disown(*osp, B_FALSE, tag);
dsl_dataset_long_rele(dmu_objset_ds(*osp), tag);
dsl_dataset_rele(dmu_objset_ds(*osp), tag);
*osp = NULL;
}
}
@ -1927,7 +2098,8 @@ close_objset(objset_t *os, void *tag)
VERIFY3P(os, ==, sa_os);
if (os->os_sa != NULL)
sa_tear_down(os);
dmu_objset_disown(os, B_FALSE, tag);
dsl_dataset_long_rele(dmu_objset_ds(os), tag);
dsl_dataset_rele(dmu_objset_ds(os), tag);
sa_attr_table = NULL;
sa_os = NULL;
}
@ -2205,8 +2377,8 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
};
static void
dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header,
uint64_t *dnode_slots_used)
dump_object(objset_t *os, uint64_t object, int verbosity,
boolean_t *print_header, uint64_t *dnode_slots_used)
{
dmu_buf_t *db = NULL;
dmu_object_info_t doi;
@ -2325,7 +2497,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header,
(void) printf("\t\t(object encrypted)\n");
}
*print_header = 1;
*print_header = B_TRUE;
}
if (verbosity >= 5)
@ -2396,6 +2568,7 @@ count_ds_mos_objects(dsl_dataset_t *ds)
mos_obj_refd(dsl_dataset_phys(ds)->ds_props_obj);
mos_obj_refd(dsl_dataset_phys(ds)->ds_userrefs_obj);
mos_obj_refd(dsl_dataset_phys(ds)->ds_snapnames_zapobj);
mos_obj_refd(ds->ds_bookmarks_obj);
if (!dsl_dataset_is_snapshot(ds)) {
count_dir_mos_objects(ds->ds_dir);
@ -2416,7 +2589,7 @@ dump_dir(objset_t *os)
char osname[ZFS_MAX_DATASET_NAME_LEN];
const char *type = "UNKNOWN";
int verbosity = dump_opt['d'];
int print_header = 1;
boolean_t print_header;
unsigned i;
int error;
uint64_t total_slots_used = 0;
@ -2430,6 +2603,8 @@ dump_dir(objset_t *os)
dmu_objset_fast_stat(os, &dds);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
print_header = B_TRUE;
if (dds.dds_type < DMU_OST_NUMTYPES)
type = objset_types[dds.dds_type];
@ -2464,9 +2639,10 @@ dump_dir(objset_t *os)
(dds.dds_inconsistent) ? " (inconsistent)" : "");
if (zopt_objects != 0) {
for (i = 0; i < zopt_objects; i++)
for (i = 0; i < zopt_objects; i++) {
dump_object(os, zopt_object[i], verbosity,
&print_header, NULL);
}
(void) printf("\n");
return;
}
@ -2485,6 +2661,9 @@ dump_dir(objset_t *os)
count_ds_mos_objects(ds);
}
if (dmu_objset_ds(os) != NULL)
dump_bookmarks(os, verbosity);
if (verbosity < 2)
return;
@ -2962,7 +3141,7 @@ static int
dump_path_impl(objset_t *os, uint64_t obj, char *name)
{
int err;
int header = 1;
boolean_t header = B_TRUE;
uint64_t child_obj;
char *s;
dmu_buf_t *db;
@ -3033,7 +3212,7 @@ dump_path(char *ds, char *path)
objset_t *os;
uint64_t root_obj;
err = open_objset(ds, DMU_OST_ZFS, FTAG, &os);
err = open_objset(ds, FTAG, &os);
if (err != 0)
return (err);
@ -3041,7 +3220,7 @@ dump_path(char *ds, char *path)
if (err != 0) {
(void) fprintf(stderr, "can't lookup root znode: %s\n",
strerror(err));
dmu_objset_disown(os, B_FALSE, FTAG);
close_objset(os, FTAG);
return (EINVAL);
}
@ -3222,6 +3401,7 @@ dump_label(const char *dev)
}
static uint64_t dataset_feature_count[SPA_FEATURES];
static uint64_t global_feature_count[SPA_FEATURES];
static uint64_t remap_deadlist_count = 0;
/*ARGSUSED*/
@ -3232,7 +3412,7 @@ dump_one_dir(const char *dsname, void *arg)
objset_t *os;
spa_feature_t f;
error = open_objset(dsname, DMU_OST_ANY, FTAG, &os);
error = open_objset(dsname, FTAG, &os);
if (error != 0)
return (0);
@ -3248,6 +3428,16 @@ dump_one_dir(const char *dsname, void *arg)
remap_deadlist_count++;
}
for (dsl_bookmark_node_t *dbn =
avl_first(&dmu_objset_ds(os)->ds_bookmarks); dbn != NULL;
dbn = AVL_NEXT(&dmu_objset_ds(os)->ds_bookmarks, dbn)) {
mos_obj_refd(dbn->dbn_phys.zbm_redaction_obj);
if (dbn->dbn_phys.zbm_redaction_obj != 0)
global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS]++;
if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN)
global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN]++;
}
dump_dir(os);
close_objset(os, FTAG);
fuid_table_destroy();
@ -3484,7 +3674,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
dmu_object_type_t type;
boolean_t is_metadata;
if (bp == NULL)
if (zb->zb_level == ZB_DNODE_LEVEL)
return (0);
if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
@ -3499,7 +3689,7 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
blkbuf);
}
if (BP_IS_HOLE(bp))
if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))
return (0);
type = BP_GET_TYPE(bp);
@ -4545,7 +4735,8 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
avl_index_t where;
zdb_ddt_entry_t *zdde, zdde_search;
if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
BP_IS_EMBEDDED(bp))
return (0);
if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
@ -5381,6 +5572,12 @@ dump_zpool(spa_t *spa)
}
dump_dtl(spa->spa_root_vdev, 0);
}
for (spa_feature_t f = 0; f < SPA_FEATURES; f++)
global_feature_count[f] = UINT64_MAX;
global_feature_count[SPA_FEATURE_REDACTION_BOOKMARKS] = 0;
global_feature_count[SPA_FEATURE_BOOKMARK_WRITTEN] = 0;
(void) dmu_objset_find(spa_name(spa), dump_one_dir,
NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
@ -5390,21 +5587,31 @@ dump_zpool(spa_t *spa)
for (f = 0; f < SPA_FEATURES; f++) {
uint64_t refcount;
uint64_t *arr;
if (!(spa_feature_table[f].fi_flags &
ZFEATURE_FLAG_PER_DATASET) ||
!spa_feature_is_enabled(spa, f)) {
ASSERT0(dataset_feature_count[f]);
continue;
ZFEATURE_FLAG_PER_DATASET)) {
if (global_feature_count[f] == UINT64_MAX)
continue;
if (!spa_feature_is_enabled(spa, f)) {
ASSERT0(global_feature_count[f]);
continue;
}
arr = global_feature_count;
} else {
if (!spa_feature_is_enabled(spa, f)) {
ASSERT0(dataset_feature_count[f]);
continue;
}
arr = dataset_feature_count;
}
if (feature_get_refcount(spa, &spa_feature_table[f],
&refcount) == ENOTSUP)
continue;
if (dataset_feature_count[f] != refcount) {
if (arr[f] != refcount) {
(void) printf("%s feature refcount mismatch: "
"%lld datasets != %lld refcount\n",
"%lld consumers != %lld refcount\n",
spa_feature_table[f].fi_uname,
(longlong_t)dataset_feature_count[f],
(longlong_t)refcount);
(longlong_t)arr[f], (longlong_t)refcount);
rc = 2;
} else {
(void) printf("Verified %s feature refcount "
@ -6184,9 +6391,23 @@ main(int argc, char **argv)
FTAG, policy, NULL);
}
}
} else if (strpbrk(target, "#") != NULL) {
dsl_pool_t *dp;
error = dsl_pool_hold(target, FTAG, &dp);
if (error != 0) {
fatal("can't dump '%s': %s", target,
strerror(error));
}
error = dump_bookmark(dp, target, B_TRUE, verbose > 1);
dsl_pool_rele(dp, FTAG);
if (error != 0) {
fatal("can't dump '%s': %s", target,
strerror(error));
}
return (error);
} else {
zdb_set_skip_mmp(target);
error = open_objset(target, DMU_OST_ANY, FTAG, &os);
error = open_objset(target, FTAG, &os);
if (error == 0)
spa = dmu_objset_spa(os);
}
@ -6245,10 +6466,11 @@ main(int argc, char **argv)
free(checkpoint_target);
}
if (os != NULL)
if (os != NULL) {
close_objset(os, FTAG);
else
} else {
spa_close(spa, FTAG);
}
fuid_table_destroy();

View File

@ -33,6 +33,7 @@
#include <assert.h>
#include <ctype.h>
#include <sys/debug.h>
#include <errno.h>
#include <getopt.h>
#include <libgen.h>
@ -119,6 +120,7 @@ static int zfs_do_unload_key(int argc, char **argv);
static int zfs_do_change_key(int argc, char **argv);
static int zfs_do_project(int argc, char **argv);
static int zfs_do_version(int argc, char **argv);
static int zfs_do_redact(int argc, char **argv);
/*
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
@ -173,7 +175,8 @@ typedef enum {
HELP_LOAD_KEY,
HELP_UNLOAD_KEY,
HELP_CHANGE_KEY,
HELP_VERSION
HELP_VERSION,
HELP_REDACT,
} zfs_help_t;
typedef struct zfs_command {
@ -238,6 +241,7 @@ static zfs_command_t command_table[] = {
{ "load-key", zfs_do_load_key, HELP_LOAD_KEY },
{ "unload-key", zfs_do_unload_key, HELP_UNLOAD_KEY },
{ "change-key", zfs_do_change_key, HELP_CHANGE_KEY },
{ "redact", zfs_do_redact, HELP_REDACT },
};
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
@ -279,7 +283,7 @@ get_usage(zfs_help_t idx)
"[filesystem|volume|snapshot] ...\n"));
case HELP_MOUNT:
return (gettext("\tmount\n"
"\tmount [-lvO] [-o opts] <-a | filesystem>\n"));
"\tmount [-flvO] [-o opts] <-a | filesystem>\n"));
case HELP_PROMOTE:
return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE:
@ -302,6 +306,9 @@ get_usage(zfs_help_t idx)
"<snapshot>\n"
"\tsend [-nvPLecw] [-i snapshot|bookmark] "
"<filesystem|volume|snapshot>\n"
"[-i bookmark] <snapshot> <bookmark_name>\n"
"\tsend [-DnPpvLecr] [-i bookmark|snapshot] "
"--redact <bookmark> <snapshot>\n"
"\tsend [-nvPe] -t <receive_resume_token>\n"));
case HELP_SET:
return (gettext("\tset <property=value> ... "
@ -386,6 +393,9 @@ get_usage(zfs_help_t idx)
"\tchange-key -i [-l] <filesystem|volume>\n"));
case HELP_VERSION:
return (gettext("\tversion\n"));
case HELP_REDACT:
return (gettext("\tredact <snapshot> <bookmark> "
"<redaction_snapshot> ..."));
}
abort();
@ -543,6 +553,8 @@ usage(boolean_t requested)
(void) fprintf(fp, "YES NO <size> | none\n");
(void) fprintf(fp, "\t%-15s ", "written@<snap>");
(void) fprintf(fp, " NO NO <size>\n");
(void) fprintf(fp, "\t%-15s ", "written#<bookmark>");
(void) fprintf(fp, " NO NO <size>\n");
(void) fprintf(fp, gettext("\nSizes are specified in bytes "
"with standard units such as K, M, G, etc.\n"));
@ -1501,6 +1513,13 @@ zfs_do_destroy(int argc, char **argv)
return (-1);
}
/*
* Unfortunately, zfs_bookmark() doesn't honor the
* casesensitivity setting. However, we can't simply
* remove this check, because lzc_destroy_bookmarks()
* ignores non-existent bookmarks, so this is necessary
* to get a proper error message.
*/
if (!zfs_bookmark_exists(argv[0])) {
(void) fprintf(stderr, gettext("bookmark '%s' "
"does not exist.\n"), argv[0]);
@ -3595,6 +3614,73 @@ zfs_do_promote(int argc, char **argv)
return (ret);
}
static int
zfs_do_redact(int argc, char **argv)
{
char *snap = NULL;
char *bookname = NULL;
char **rsnaps = NULL;
int numrsnaps = 0;
argv++;
argc--;
if (argc < 3) {
(void) fprintf(stderr, gettext("too few arguments"));
usage(B_FALSE);
}
snap = argv[0];
bookname = argv[1];
rsnaps = argv + 2;
numrsnaps = argc - 2;
nvlist_t *rsnapnv = fnvlist_alloc();
for (int i = 0; i < numrsnaps; i++) {
fnvlist_add_boolean(rsnapnv, rsnaps[i]);
}
int err = lzc_redact(snap, bookname, rsnapnv);
fnvlist_free(rsnapnv);
switch (err) {
case 0:
break;
case ENOENT:
(void) fprintf(stderr,
gettext("provided snapshot %s does not exist"), snap);
break;
case EEXIST:
(void) fprintf(stderr, gettext("specified redaction bookmark "
"(%s) provided already exists"), bookname);
break;
case ENAMETOOLONG:
(void) fprintf(stderr, gettext("provided bookmark name cannot "
"be used, final name would be too long"));
break;
case E2BIG:
(void) fprintf(stderr, gettext("too many redaction snapshots "
"specified"));
break;
case EINVAL:
(void) fprintf(stderr, gettext("redaction snapshot must be "
"descendent of snapshot being redacted"));
break;
case EALREADY:
(void) fprintf(stderr, gettext("attempted to redact redacted "
"dataset or with respect to redacted dataset"));
break;
case ENOTSUP:
(void) fprintf(stderr, gettext("redaction bookmarks feature "
"not enabled"));
break;
default:
(void) fprintf(stderr, gettext("internal error: %s"),
strerror(errno));
}
return (err);
}
/*
* zfs rollback [-rRf] <snapshot>
*
@ -4006,6 +4092,7 @@ zfs_do_snapshot(int argc, char **argv)
return (-1);
}
/*
* Send a backup stream to stdout.
*/
@ -4020,10 +4107,11 @@ zfs_do_send(int argc, char **argv)
sendflags_t flags = { 0 };
int c, err;
nvlist_t *dbgnv = NULL;
boolean_t extraverbose = B_FALSE;
char *redactbook = NULL;
struct option long_options[] = {
{"replicate", no_argument, NULL, 'R'},
{"redact", required_argument, NULL, 'd'},
{"props", no_argument, NULL, 'p'},
{"parsable", no_argument, NULL, 'P'},
{"dedup", no_argument, NULL, 'D'},
@ -4040,8 +4128,8 @@ zfs_do_send(int argc, char **argv)
};
/* check options */
while ((c = getopt_long(argc, argv, ":i:I:RDpvnPLeht:cwb", long_options,
NULL)) != -1) {
while ((c = getopt_long(argc, argv, ":i:I:RDpvnPLeht:cwbd:",
long_options, NULL)) != -1) {
switch (c) {
case 'i':
if (fromname)
@ -4057,6 +4145,9 @@ zfs_do_send(int argc, char **argv)
case 'R':
flags.replicate = B_TRUE;
break;
case 'd':
redactbook = optarg;
break;
case 'p':
flags.props = B_TRUE;
break;
@ -4068,12 +4159,9 @@ zfs_do_send(int argc, char **argv)
break;
case 'P':
flags.parsable = B_TRUE;
flags.verbose = B_TRUE;
break;
case 'v':
if (flags.verbose)
extraverbose = B_TRUE;
flags.verbose = B_TRUE;
flags.verbosity++;
flags.progress = B_TRUE;
break;
case 'D':
@ -4141,19 +4229,22 @@ zfs_do_send(int argc, char **argv)
}
}
if (flags.parsable && flags.verbosity == 0)
flags.verbosity = 1;
argc -= optind;
argv += optind;
if (resume_token != NULL) {
if (fromname != NULL || flags.replicate || flags.props ||
flags.backup || flags.dedup) {
flags.backup || flags.dedup || flags.holds ||
redactbook != NULL) {
(void) fprintf(stderr,
gettext("invalid flags combined with -t\n"));
usage(B_FALSE);
}
if (argc != 0) {
(void) fprintf(stderr, gettext("no additional "
"arguments are permitted with -t\n"));
if (argc > 0) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
} else {
@ -4168,6 +4259,12 @@ zfs_do_send(int argc, char **argv)
}
}
if (flags.raw && redactbook != NULL) {
(void) fprintf(stderr,
gettext("Error: raw sends may not be redacted.\n"));
return (1);
}
if (!flags.dryrun && isatty(STDOUT_FILENO)) {
(void) fprintf(stderr,
gettext("Error: Stream can not be written to a terminal.\n"
@ -4181,43 +4278,70 @@ zfs_do_send(int argc, char **argv)
}
/*
* Special case sending a filesystem, or from a bookmark.
* For everything except -R and -I, use the new, cleaner code path.
*/
if (strchr(argv[0], '@') == NULL ||
(fromname && strchr(fromname, '#') != NULL)) {
if (!(flags.replicate || flags.doall)) {
char frombuf[ZFS_MAX_DATASET_NAME_LEN];
if (flags.replicate || flags.doall || flags.props ||
flags.backup || flags.dedup || flags.holds ||
(strchr(argv[0], '@') == NULL &&
(flags.dryrun || flags.verbose || flags.progress))) {
(void) fprintf(stderr, gettext("Error: "
"Unsupported flag with filesystem or bookmark.\n"));
return (1);
if (redactbook != NULL) {
if (strchr(argv[0], '@') == NULL) {
(void) fprintf(stderr, gettext("Error: Cannot "
"do a redacted send to a filesystem.\n"));
return (1);
}
}
zhp = zfs_open(g_zfs, argv[0], ZFS_TYPE_DATASET);
if (zhp == NULL)
return (1);
if (fromname != NULL && (strchr(fromname, '#') == NULL &&
strchr(fromname, '@') == NULL)) {
/*
* Neither bookmark or snapshot was specified. Print a
* warning, and assume snapshot.
*/
(void) fprintf(stderr, "Warning: incremental source "
"didn't specify type, assuming snapshot. Use '@' "
"or '#' prefix to avoid ambiguity.\n");
(void) snprintf(frombuf, sizeof (frombuf), "@%s",
fromname);
fromname = frombuf;
}
if (fromname != NULL &&
(fromname[0] == '#' || fromname[0] == '@')) {
/*
* Incremental source name begins with # or @.
* Default to same fs as target.
*/
char tmpbuf[ZFS_MAX_DATASET_NAME_LEN];
(void) strlcpy(tmpbuf, fromname, sizeof (tmpbuf));
(void) strlcpy(frombuf, argv[0], sizeof (frombuf));
cp = strchr(frombuf, '@');
if (cp != NULL)
*cp = '\0';
(void) strlcat(frombuf, fromname, sizeof (frombuf));
(void) strlcat(frombuf, tmpbuf, sizeof (frombuf));
fromname = frombuf;
}
err = zfs_send_one(zhp, fromname, STDOUT_FILENO, flags);
err = zfs_send_one(zhp, fromname, STDOUT_FILENO, &flags,
redactbook);
zfs_close(zhp);
return (err != 0);
}
if (fromname != NULL && strchr(fromname, '#')) {
(void) fprintf(stderr,
gettext("Error: multiple snapshots cannot be "
"sent from a bookmark.\n"));
return (1);
}
if (redactbook != NULL) {
(void) fprintf(stderr, gettext("Error: multiple snapshots "
"cannot be sent redacted.\n"));
return (1);
}
cp = strchr(argv[0], '@');
*cp = '\0';
toname = cp + 1;
@ -4261,9 +4385,9 @@ zfs_do_send(int argc, char **argv)
flags.doall = B_TRUE;
err = zfs_send(zhp, fromname, toname, &flags, STDOUT_FILENO, NULL, 0,
extraverbose ? &dbgnv : NULL);
flags.verbosity >= 3 ? &dbgnv : NULL);
if (extraverbose && dbgnv != NULL) {
if (flags.verbosity >= 3 && dbgnv != NULL) {
/*
* dump_nvlist prints to stdout, but that's been
* redirected to a file. Make it print to stderr
@ -6379,6 +6503,17 @@ share_mount_one(zfs_handle_t *zhp, int op, int flags, char *protocol,
return (1);
}
if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE)) {
if (!explicit)
return (0);
(void) fprintf(stderr, gettext("cannot %s '%s': "
"Dataset is not complete, was created by receiving "
"a redacted zfs send stream.\n"), cmdname,
zfs_get_name(zhp));
return (1);
}
/*
* At this point, we have verified that the mountpoint and/or
* shareopts are appropriate for auto management. If the
@ -6537,7 +6672,7 @@ share_mount(int op, int argc, char **argv)
int flags = 0;
/* check options */
while ((c = getopt(argc, argv, op == OP_MOUNT ? ":alvo:O" : "al"))
while ((c = getopt(argc, argv, op == OP_MOUNT ? ":alvo:Of" : "al"))
!= -1) {
switch (c) {
case 'a':
@ -6565,6 +6700,9 @@ share_mount(int op, int argc, char **argv)
case 'O':
flags |= MS_OVERLAY;
break;
case 'f':
flags |= MS_FORCE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);

View File

@ -236,6 +236,7 @@ main(int argc, char *argv[])
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
struct drr_object_range *drror = &thedrr.drr_u.drr_object_range;
struct drr_redact *drrr = &thedrr.drr_u.drr_redact;
struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
int c;
boolean_t verbose = B_FALSE;
@ -711,6 +712,21 @@ main(int argc, char *argv[])
mac);
}
break;
case DRR_REDACT:
if (do_byteswap) {
drrr->drr_object = BSWAP_64(drrr->drr_object);
drrr->drr_offset = BSWAP_64(drrr->drr_offset);
drrr->drr_length = BSWAP_64(drrr->drr_length);
drrr->drr_toguid = BSWAP_64(drrr->drr_toguid);
}
if (verbose) {
(void) printf("REDACT object = %llu offset = "
"%llu length = %llu\n",
(u_longlong_t)drrr->drr_object,
(u_longlong_t)drrr->drr_offset,
(u_longlong_t)drrr->drr_length);
}
break;
case DRR_NUMTYPES:
/* should never be reached */
exit(1);

View File

@ -171,6 +171,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/cmd/file_check/Makefile
tests/zfs-tests/cmd/file_trunc/Makefile
tests/zfs-tests/cmd/file_write/Makefile
tests/zfs-tests/cmd/get_diff/Makefile
tests/zfs-tests/cmd/largest_file/Makefile
tests/zfs-tests/cmd/libzfs_input_check/Makefile
tests/zfs-tests/cmd/mkbusy/Makefile
@ -188,6 +189,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/cmd/rm_lnkcnt_zero_file/Makefile
tests/zfs-tests/cmd/threadsappend/Makefile
tests/zfs-tests/cmd/xattrtest/Makefile
tests/zfs-tests/cmd/stride_dd/Makefile
tests/zfs-tests/include/Makefile
tests/zfs-tests/tests/Makefile
tests/zfs-tests/tests/functional/Makefile
@ -312,6 +314,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/functional/pyzfs/Makefile
tests/zfs-tests/tests/functional/quota/Makefile
tests/zfs-tests/tests/functional/raidz/Makefile
tests/zfs-tests/tests/functional/redacted_send/Makefile
tests/zfs-tests/tests/functional/redundancy/Makefile
tests/zfs-tests/tests/functional/refquota/Makefile
tests/zfs-tests/tests/functional/refreserv/Makefile

View File

@ -624,8 +624,8 @@ extern int zfs_rollback(zfs_handle_t *, zfs_handle_t *, boolean_t);
extern int zfs_rename(zfs_handle_t *, const char *, boolean_t, boolean_t);
typedef struct sendflags {
/* print informational messages (ie, -v was specified) */
boolean_t verbose;
/* Amount of extra information to print. */
int verbosity;
/* recursive send (ie, -R) */
boolean_t replicate;
@ -674,7 +674,9 @@ typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
extern int zfs_send(zfs_handle_t *, const char *, const char *,
sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t flags);
extern int zfs_send_one(zfs_handle_t *, const char *, int, sendflags_t *,
const char *);
extern int zfs_send_progress(zfs_handle_t *, int, uint64_t *, uint64_t *);
extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd,
const char *);
extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl,

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2017 Datto Inc.
* Copyright 2017 RackTop Systems.
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
@ -58,6 +58,7 @@ int lzc_promote(const char *, char *, int);
int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **);
int lzc_bookmark(nvlist_t *, nvlist_t **);
int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **);
int lzc_get_bookmark_props(const char *, nvlist_t **);
int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **);
int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t);
int lzc_unload_key(const char *);
@ -66,6 +67,7 @@ int lzc_initialize(const char *, pool_initialize_func_t, nvlist_t *,
nvlist_t **);
int lzc_trim(const char *, pool_trim_func_t, uint64_t, boolean_t,
nvlist_t *, nvlist_t **);
int lzc_redact(const char *, const char *, nvlist_t *);
int lzc_snaprange_space(const char *, const char *, uint64_t *);
@ -87,6 +89,10 @@ int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
struct dmu_replay_record;
int lzc_send_redacted(const char *, const char *, int, enum lzc_send_flags,
const char *);
int lzc_send_resume_redacted(const char *, const char *, int,
enum lzc_send_flags, uint64_t, uint64_t, const char *);
int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, boolean_t,
int);
int lzc_receive_resumable(const char *, nvlist_t *, const char *, boolean_t,
@ -100,6 +106,11 @@ int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *,
uint8_t *, uint_t, const char *, boolean_t, boolean_t, boolean_t, int,
const struct dmu_replay_record *, int, uint64_t *, uint64_t *,
uint64_t *, nvlist_t **);
int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
int lzc_send_space_resume_redacted(const char *, const char *,
enum lzc_send_flags, uint64_t, uint64_t, uint64_t, const char *,
int, uint64_t *);
uint64_t lzc_send_progress(int);
boolean_t lzc_exists(const char *);

View File

@ -21,6 +21,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/dmu_impl.h \
$(top_srcdir)/include/sys/dmu_objset.h \
$(top_srcdir)/include/sys/dmu_recv.h \
$(top_srcdir)/include/sys/dmu_redact.h \
$(top_srcdir)/include/sys/dmu_send.h \
$(top_srcdir)/include/sys/dmu_traverse.h \
$(top_srcdir)/include/sys/dmu_tx.h \
@ -50,6 +51,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/note.h \
$(top_srcdir)/include/sys/nvpair.h \
$(top_srcdir)/include/sys/nvpair_impl.h \
$(top_srcdir)/include/sys/objlist.h \
$(top_srcdir)/include/sys/pathname.h \
$(top_srcdir)/include/sys/policy.h \
$(top_srcdir)/include/sys/range_tree.h \

View File

@ -13,7 +13,7 @@
* CDDL HEADER END
*/
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
* Copyright (c) 2014, 2018 by Delphix. All rights reserved.
*/
#ifndef _BQUEUE_H
@ -32,6 +32,7 @@ typedef struct bqueue {
kcondvar_t bq_pop_cv;
uint64_t bq_size;
uint64_t bq_maxsize;
uint64_t bq_fill_fraction;
size_t bq_node_offset;
} bqueue_t;
@ -41,9 +42,10 @@ typedef struct bqueue_node {
} bqueue_node_t;
int bqueue_init(bqueue_t *, uint64_t, size_t);
int bqueue_init(bqueue_t *, uint64_t, uint64_t, size_t);
void bqueue_destroy(bqueue_t *);
void bqueue_enqueue(bqueue_t *, void *, uint64_t);
void bqueue_enqueue_flush(bqueue_t *, void *, uint64_t);
void *bqueue_dequeue(bqueue_t *);
boolean_t bqueue_empty(bqueue_t *);

View File

@ -329,6 +329,7 @@ void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
bp_embedded_type_t etype, enum zio_compress comp,
int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx);
void dmu_buf_redact(dmu_buf_t *dbuf, dmu_tx_t *tx);
void dbuf_destroy(dmu_buf_impl_t *db);
void dbuf_unoverride(dbuf_dirty_record_t *dr);
@ -345,6 +346,9 @@ void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
void dbuf_stats_init(dbuf_hash_table_t *hash);
void dbuf_stats_destroy(void);
int dbuf_dnode_findbp(dnode_t *dn, uint64_t level, uint64_t blkid,
blkptr_t *bp, uint16_t *datablkszsec, uint8_t *indblkshift);
#define DB_DNODE(_db) ((_db)->db_dnode_handle->dnh_dnode)
#define DB_DNODE_LOCK(_db) ((_db)->db_dnode_handle->dnh_zrlock)
#define DB_DNODE_ENTER(_db) (zrl_add(&DB_DNODE_LOCK(_db)))

View File

@ -504,6 +504,8 @@ int dmu_object_remap_indirects(objset_t *os, uint64_t object, uint64_t txg);
void dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
void *data, uint8_t etype, uint8_t comp, int uncompressed_size,
int compressed_size, int byteorder, dmu_tx_t *tx);
void dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_tx_t *tx);
/*
* Decide how to write a block: checksum, compression, number of copies, etc.
@ -951,6 +953,7 @@ typedef struct dmu_objset_stats {
dmu_objset_type_t dds_type;
uint8_t dds_is_snapshot;
uint8_t dds_inconsistent;
uint8_t dds_redacted;
char dds_origin[ZFS_MAX_DATASET_NAME_LEN];
} dmu_objset_stats_t;

View File

@ -24,7 +24,7 @@
*/
/*
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_IMPL_H
@ -243,39 +243,13 @@ typedef struct dmu_xuio {
iovec_t *iovp;
} dmu_xuio_t;
/*
* The list of data whose inclusion in a send stream can be pending from
* one call to backup_cb to another. Multiple calls to dump_free() and
* dump_freeobjects() can be aggregated into a single DRR_FREE or
* DRR_FREEOBJECTS replay record.
*/
typedef enum {
PENDING_NONE,
PENDING_FREE,
PENDING_FREEOBJECTS
} dmu_pendop_t;
typedef struct dmu_sendarg {
list_node_t dsa_link;
dmu_replay_record_t *dsa_drr;
vnode_t *dsa_vp;
int dsa_outfd;
proc_t *dsa_proc;
offset_t *dsa_off;
objset_t *dsa_os;
zio_cksum_t dsa_zc;
uint64_t dsa_toguid;
uint64_t dsa_fromtxg;
int dsa_err;
dmu_pendop_t dsa_pending_op;
uint64_t dsa_featureflags;
uint64_t dsa_last_data_object;
uint64_t dsa_last_data_offset;
uint64_t dsa_resume_object;
uint64_t dsa_resume_offset;
boolean_t dsa_sent_begin;
boolean_t dsa_sent_end;
} dmu_sendarg_t;
typedef struct dmu_sendstatus {
list_node_t dss_link;
int dss_outfd;
proc_t *dss_proc;
offset_t *dss_off;
uint64_t dss_blocks; /* blocks visited during the sending process */
} dmu_sendstatus_t;
void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);

View File

@ -33,6 +33,8 @@
#include <sys/dsl_bookmark.h>
#include <sys/dsl_dataset.h>
#include <sys/spa.h>
#include <sys/objlist.h>
#include <sys/dsl_bookmark.h>
extern const char *recv_clone_name;
@ -44,6 +46,7 @@ typedef struct dmu_recv_cookie {
const char *drc_tosnap;
boolean_t drc_newfs;
boolean_t drc_byteswap;
uint64_t drc_featureflags;
boolean_t drc_force;
boolean_t drc_resumable;
boolean_t drc_raw;
@ -51,20 +54,37 @@ typedef struct dmu_recv_cookie {
boolean_t drc_spill;
struct avl_tree *drc_guid_to_ds_map;
nvlist_t *drc_keynvl;
zio_cksum_t drc_cksum;
uint64_t drc_fromsnapobj;
uint64_t drc_newsnapobj;
uint64_t drc_ivset_guid;
void *drc_owner;
cred_t *drc_cred;
nvlist_t *drc_begin_nvl;
objset_t *drc_os;
vnode_t *drc_vp; /* The vnode to read the stream from */
uint64_t drc_voff; /* The current offset in the stream */
uint64_t drc_bytes_read;
/*
* A record that has had its payload read in, but hasn't yet been handed
* off to the worker thread.
*/
struct receive_record_arg *drc_rrd;
/* A record that has had its header read in, but not its payload. */
struct receive_record_arg *drc_next_rrd;
zio_cksum_t drc_cksum;
zio_cksum_t drc_prev_cksum;
int drc_err;
/* Sorted list of objects not to issue prefetches for. */
objlist_t *drc_ignore_objlist;
} dmu_recv_cookie_t;
int dmu_recv_begin(char *tofs, char *tosnap,
struct dmu_replay_record *drr_begin, boolean_t force, boolean_t resumable,
nvlist_t *localprops, nvlist_t *hidden_args, char *origin,
dmu_recv_cookie_t *drc);
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
int cleanup_fd, uint64_t *action_handlep);
int dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
boolean_t force, boolean_t resumable, nvlist_t *localprops,
nvlist_t *hidden_args, char *origin, dmu_recv_cookie_t *drc,
vnode_t *vp, offset_t *voffp);
int dmu_recv_stream(dmu_recv_cookie_t *drc, int cleanup_fd,
uint64_t *action_handlep, offset_t *voffp);
int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner);
boolean_t dmu_objset_is_receiving(objset_t *os);

58
include/sys/dmu_redact.h Normal file
View File

@ -0,0 +1,58 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
*/
#ifndef _DMU_REDACT_H_
#define _DMU_REDACT_H_
#include <sys/spa.h>
#include <sys/dsl_bookmark.h>
#define REDACT_BLOCK_MAX_COUNT (1ULL << 48)
static inline uint64_t
redact_block_get_size(redact_block_phys_t *rbp)
{
return (BF64_GET_SB((rbp)->rbp_size_count, 48, 16, SPA_MINBLOCKSHIFT,
0));
}
static inline void
redact_block_set_size(redact_block_phys_t *rbp, uint64_t size)
{
BF64_SET_SB((rbp)->rbp_size_count, 48, 16, SPA_MINBLOCKSHIFT, 0, size);
}
static inline uint64_t
redact_block_get_count(redact_block_phys_t *rbp)
{
return (BF64_GET_SB((rbp)->rbp_size_count, 0, 48, 0, 1));
}
static inline void
redact_block_set_count(redact_block_phys_t *rbp, uint64_t count)
{
BF64_SET_SB((rbp)->rbp_size_count, 0, 48, 0, 1, count);
}
int dmu_redact_snap(const char *, nvlist_t *, const char *);
#endif /* _DMU_REDACT_H_ */

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
@ -31,23 +31,40 @@
#include <sys/inttypes.h>
#include <sys/dsl_crypt.h>
#include <sys/dsl_bookmark.h>
#include <sys/spa.h>
#include <sys/objlist.h>
#include <sys/dsl_bookmark.h>
#include <sys/dmu_redact.h>
#define BEGINNV_REDACT_SNAPS "redact_snaps"
#define BEGINNV_REDACT_FROM_SNAPS "redact_from_snaps"
#define BEGINNV_RESUME_OBJECT "resume_object"
#define BEGINNV_RESUME_OFFSET "resume_offset"
struct vnode;
struct dsl_dataset;
struct drr_begin;
struct avl_tree;
struct dmu_replay_record;
int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
boolean_t large_block_ok, boolean_t compressok, boolean_t rawok, int outfd,
uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
boolean_t stream_compressed, uint64_t *sizep);
struct dmu_send_outparams;
int
dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
boolean_t large_block_ok, boolean_t compressok, boolean_t rawok,
uint64_t resumeobj, uint64_t resumeoff, const char *redactbook, int outfd,
offset_t *off, struct dmu_send_outparams *dsop);
int dmu_send_estimate_fast(struct dsl_dataset *ds, struct dsl_dataset *fromds,
zfs_bookmark_phys_t *frombook, boolean_t stream_compressed,
uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
boolean_t rawok, int outfd, struct vnode *vp, offset_t *off);
boolean_t rawok, int outfd, offset_t *off, struct dmu_send_outparams *dso);
typedef int (*dmu_send_outfunc_t)(objset_t *os, void *buf, int len, void *arg);
typedef struct dmu_send_outparams {
dmu_send_outfunc_t dso_outfunc;
void *dso_arg;
boolean_t dso_dryrun;
} dmu_send_outparams_t;
#endif /* _DMU_SEND_H */

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_TRAVERSE_H
@ -71,6 +71,20 @@ int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
int traverse_pool(spa_t *spa,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
/*
* Note that this calculation cannot overflow with the current maximum indirect
* block size (128k). If that maximum is increased to 1M, however, this
* calculation can overflow, and handling would need to be added to ensure
* continued correctness.
*/
static inline uint64_t
bp_span_in_blocks(uint8_t indblkshift, uint64_t level)
{
unsigned int shift = level * (indblkshift - SPA_BLKPTRSHIFT);
ASSERT3U(shift, <, 64);
return (1ULL << shift);
}
#ifdef __cplusplus
}
#endif

View File

@ -13,22 +13,21 @@
* CDDL HEADER END
*/
/*
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_BOOKMARK_H
#define _SYS_DSL_BOOKMARK_H
#include <sys/zfs_context.h>
#include <sys/refcount.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_pool.h>
#ifdef __cplusplus
extern "C" {
#endif
struct dsl_pool;
struct dsl_dataset;
/*
* On disk zap object.
*/
@ -55,12 +54,81 @@ typedef struct zfs_bookmark_phys {
#define BOOKMARK_PHYS_SIZE_V1 (3 * sizeof (uint64_t))
#define BOOKMARK_PHYS_SIZE_V2 (12 * sizeof (uint64_t))
typedef enum zbm_flags {
ZBM_FLAG_HAS_FBN = (1 << 0),
ZBM_FLAG_SNAPSHOT_EXISTS = (1 << 1),
} zbm_flags_t;
typedef struct redaction_list_phys {
uint64_t rlp_last_object;
uint64_t rlp_last_blkid;
uint64_t rlp_num_entries;
uint64_t rlp_num_snaps;
uint64_t rlp_snaps[]; /* variable length */
} redaction_list_phys_t;
typedef struct redaction_list {
dmu_buf_user_t rl_dbu;
redaction_list_phys_t *rl_phys;
dmu_buf_t *rl_dbuf;
uint64_t rl_object;
zfs_refcount_t rl_longholds;
objset_t *rl_mos;
} redaction_list_t;
/* node in ds_bookmarks */
typedef struct dsl_bookmark_node {
char *dbn_name; /* free with strfree() */
kmutex_t dbn_lock; /* protects dirty/phys in block_killed */
boolean_t dbn_dirty; /* in currently syncing txg */
zfs_bookmark_phys_t dbn_phys;
avl_node_t dbn_node;
} dsl_bookmark_node_t;
typedef struct redact_block_phys {
uint64_t rbp_object;
uint64_t rbp_blkid;
/*
* The top 16 bits of this field represent the block size in sectors of
* the blocks in question; the bottom 48 bits are used to store the
* number of consecutive blocks that are in the redaction list. They
* should be accessed using the inline functions below.
*/
uint64_t rbp_size_count;
uint64_t rbp_padding;
} redact_block_phys_t;
typedef int (*rl_traverse_callback_t)(redact_block_phys_t *, void *);
int dsl_bookmark_create(nvlist_t *, nvlist_t *);
int dsl_bookmark_create_redacted(const char *, const char *, uint64_t,
uint64_t *, void *, redaction_list_t **);
int dsl_get_bookmarks(const char *, nvlist_t *, nvlist_t *);
int dsl_get_bookmarks_impl(dsl_dataset_t *, nvlist_t *, nvlist_t *);
int dsl_get_bookmark_props(const char *, const char *, nvlist_t *);
int dsl_bookmark_destroy(nvlist_t *, nvlist_t *);
int dsl_bookmark_lookup(struct dsl_pool *, const char *,
struct dsl_dataset *, zfs_bookmark_phys_t *);
int dsl_bookmark_lookup_impl(dsl_dataset_t *, const char *,
zfs_bookmark_phys_t *);
int dsl_redaction_list_hold_obj(struct dsl_pool *, uint64_t, void *,
redaction_list_t **);
void dsl_redaction_list_rele(redaction_list_t *, void *);
void dsl_redaction_list_long_hold(struct dsl_pool *, redaction_list_t *,
void *);
void dsl_redaction_list_long_rele(redaction_list_t *, void *);
boolean_t dsl_redaction_list_long_held(redaction_list_t *);
int dsl_bookmark_init_ds(dsl_dataset_t *);
void dsl_bookmark_fini_ds(dsl_dataset_t *);
boolean_t dsl_bookmark_ds_destroyed(dsl_dataset_t *, dmu_tx_t *);
void dsl_bookmark_snapshotted(dsl_dataset_t *, dmu_tx_t *);
void dsl_bookmark_block_killed(dsl_dataset_t *, const blkptr_t *, dmu_tx_t *);
void dsl_bookmark_sync_done(dsl_dataset_t *, dmu_tx_t *);
void dsl_bookmark_node_add(dsl_dataset_t *, dsl_bookmark_node_t *, dmu_tx_t *);
uint64_t dsl_bookmark_latest_txg(dsl_dataset_t *);
int dsl_redaction_list_traverse(redaction_list_t *, zbookmark_phys_t *,
rl_traverse_callback_t, void *);
void dsl_bookmark_next_changed(dsl_dataset_t *, dsl_dataset_t *, dmu_tx_t *);
#ifdef __cplusplus
}

View File

@ -45,11 +45,13 @@
extern "C" {
#endif
extern int zfs_allow_redacted_dataset_mount;
struct dsl_dataset;
struct dsl_dir;
struct dsl_pool;
struct dsl_crypto_params;
struct dsl_key_mapping;
struct zfs_bookmark_phys;
#define DS_FLAG_INCONSISTENT (1ULL<<0)
#define DS_IS_INCONSISTENT(ds) \
@ -114,6 +116,13 @@ struct dsl_key_mapping;
*/
#define DS_FIELD_REMAP_DEADLIST "com.delphix:remap_deadlist"
/*
* We were receiving an incremental from a redaction bookmark, and these are the
* guids of its snapshots.
*/
#define DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS \
"com.delphix:resume_redact_book_snaps"
/*
* This field is set to the ivset guid for encrypted snapshots. This is used
* for validating raw receives.
@ -176,7 +185,8 @@ typedef struct dsl_dataset {
/* only used in syncing context, only valid for non-snapshots: */
struct dsl_dataset *ds_prev;
uint64_t ds_bookmarks; /* DMU_OTN_ZAP_METADATA */
uint64_t ds_bookmarks_obj; /* DMU_OTN_ZAP_METADATA */
avl_tree_t ds_bookmarks; /* dsl_bookmark_node_t */
/* has internal locking: */
dsl_deadlist_t ds_deadlist;
@ -314,23 +324,27 @@ int dsl_dataset_hold_flags(struct dsl_pool *dp, const char *name,
boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds,
void *tag);
int dsl_dataset_create_key_mapping(dsl_dataset_t *ds);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag,
dsl_dataset_t **);
int dsl_dataset_hold_obj_flags(struct dsl_pool *dp, uint64_t dsobj,
ds_hold_flags_t flags, void *tag, dsl_dataset_t **);
void dsl_dataset_remove_key_mapping(dsl_dataset_t *ds);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj,
void *tag, dsl_dataset_t **);
void dsl_dataset_rele_flags(dsl_dataset_t *ds, ds_hold_flags_t flags,
void *tag);
void dsl_dataset_rele(dsl_dataset_t *ds, void *tag);
int dsl_dataset_own(struct dsl_pool *dp, const char *name,
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_force(struct dsl_pool *dp, const char *name,
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj(struct dsl_pool *dp, uint64_t dsobj,
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
int dsl_dataset_own_obj_force(struct dsl_pool *dp, uint64_t dsobj,
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp);
void dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag, boolean_t override);
int dsl_dataset_namelen(dsl_dataset_t *ds);
boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *,
struct dsl_crypto_params *, dmu_tx_t *);
@ -387,9 +401,11 @@ uint64_t dsl_get_defer_destroy(dsl_dataset_t *ds);
uint64_t dsl_get_referenced(dsl_dataset_t *ds);
uint64_t dsl_get_numclones(dsl_dataset_t *ds);
uint64_t dsl_get_inconsistent(dsl_dataset_t *ds);
uint64_t dsl_get_redacted(dsl_dataset_t *ds);
uint64_t dsl_get_available(dsl_dataset_t *ds);
int dsl_get_written(dsl_dataset_t *ds, uint64_t *written);
int dsl_get_prev_snap(dsl_dataset_t *ds, char *snap);
void dsl_get_redact_snaps(dsl_dataset_t *ds, nvlist_t *propval);
int dsl_get_mountpoint(dsl_dataset_t *ds, const char *dsname, char *value,
char *source);
@ -403,6 +419,8 @@ void dsl_dataset_space(dsl_dataset_t *ds,
uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
int dsl_dataset_space_written_bookmark(struct zfs_bookmark_phys *bmp,
dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
@ -463,6 +481,9 @@ boolean_t dsl_dataset_feature_is_active(dsl_dataset_t *ds, spa_feature_t f);
boolean_t dsl_dataset_get_uint64_array_feature(dsl_dataset_t *ds,
spa_feature_t f, uint64_t *outlength, uint64_t **outp);
void dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps,
uint64_t num_redact_snaps, dmu_tx_t *tx);
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
@ -45,6 +45,7 @@ int dsl_destroy_inconsistent(const char *, void *);
int dsl_destroy_snapshot_check_impl(struct dsl_dataset *, boolean_t);
void dsl_destroy_snapshot_sync_impl(struct dsl_dataset *,
boolean_t, struct dmu_tx *);
void dsl_dir_remove_clones_key(dsl_dir_t *, uint64_t, dmu_tx_t *);
typedef struct dsl_destroy_snapshot_arg {
const char *ddsa_name;

View File

@ -118,11 +118,11 @@ typedef enum {
ZFS_PROP_PRIVATE, /* not exposed to user, temporary */
ZFS_PROP_ACLINHERIT,
ZFS_PROP_CREATETXG,
ZFS_PROP_NAME, /* not exposed to the user */
ZFS_PROP_NAME,
ZFS_PROP_CANMOUNT,
ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */
ZFS_PROP_ISCSIOPTIONS,
ZFS_PROP_XATTR,
ZFS_PROP_NUMCLONES, /* not exposed to the user */
ZFS_PROP_NUMCLONES,
ZFS_PROP_COPIES,
ZFS_PROP_VERSION,
ZFS_PROP_UTF8ONLY,
@ -140,12 +140,12 @@ typedef enum {
ZFS_PROP_USEDDS,
ZFS_PROP_USEDCHILD,
ZFS_PROP_USEDREFRESERV,
ZFS_PROP_USERACCOUNTING, /* not exposed to the user */
ZFS_PROP_STMF_SHAREINFO, /* not exposed to the user */
ZFS_PROP_USERACCOUNTING,
ZFS_PROP_STMF_SHAREINFO,
ZFS_PROP_DEFER_DESTROY,
ZFS_PROP_USERREFS,
ZFS_PROP_LOGBIAS,
ZFS_PROP_UNIQUE, /* not exposed to the user */
ZFS_PROP_UNIQUE,
ZFS_PROP_OBJSETID,
ZFS_PROP_DEDUP,
ZFS_PROP_MLSLABEL,
@ -156,7 +156,7 @@ typedef enum {
ZFS_PROP_CLONES,
ZFS_PROP_LOGICALUSED,
ZFS_PROP_LOGICALREFERENCED,
ZFS_PROP_INCONSISTENT, /* not exposed to the user */
ZFS_PROP_INCONSISTENT,
ZFS_PROP_VOLMODE,
ZFS_PROP_FILESYSTEM_LIMIT,
ZFS_PROP_SNAPSHOT_LIMIT,
@ -184,6 +184,8 @@ typedef enum {
ZFS_PROP_REMAPTXG, /* not exposed to the user */
ZFS_PROP_SPECIAL_SMALL_BLOCKS,
ZFS_PROP_IVSET_GUID, /* not exposed to the user */
ZFS_PROP_REDACTED,
ZFS_PROP_REDACT_SNAPS,
ZFS_NUM_PROPS
} zfs_prop_t;
@ -208,8 +210,7 @@ extern const char *zfs_userquota_prop_prefixes[ZFS_NUM_USERQUOTA_PROPS];
/*
* Pool properties are identified by these constants and must be added to the
* end of this list to ensure that external consumers are not affected
* by the change. If you make any changes to this list, be sure to update
* the property table in module/zcommon/zpool_prop.c.
* by the change. Properties must be registered in zfs_prop_init().
*/
typedef enum {
ZPOOL_PROP_INVAL = -1,
@ -1272,6 +1273,8 @@ typedef enum zfs_ioc {
ZFS_IOC_POOL_DISCARD_CHECKPOINT, /* 0x5a4e */
ZFS_IOC_POOL_INITIALIZE, /* 0x5a4f */
ZFS_IOC_POOL_TRIM, /* 0x5a50 */
ZFS_IOC_REDACT, /* 0x5a51 */
ZFS_IOC_GET_BOOKMARK_PROPS, /* 0x5a52 */
/*
* Linux - 3/64 numbers reserved.
@ -1318,6 +1321,7 @@ typedef enum {
ZFS_ERR_FROM_IVSET_GUID_MISSING,
ZFS_ERR_FROM_IVSET_GUID_MISMATCH,
ZFS_ERR_SPILL_BLOCK_FLAG_MISSING,
ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE,
} zfs_errno_t;
/*

51
include/sys/objlist.h Normal file
View File

@ -0,0 +1,51 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
*/
#ifndef _OBJLIST_H
#define _OBJLIST_H
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/zfs_context.h>
typedef struct objlist_node {
list_node_t on_node;
uint64_t on_object;
} objlist_node_t;
typedef struct objlist {
list_t ol_list; /* List of struct objnode. */
/*
* Last object looked up. Used to assert that objects are being looked
* up in ascending order.
*/
uint64_t ol_last_lookup;
} objlist_t;
objlist_t *objlist_create(void);
void objlist_destroy(objlist_t *);
boolean_t objlist_exists(objlist_t *, uint64_t);
void objlist_insert(objlist_t *, uint64_t);
#ifdef __cplusplus
}
#endif
#endif /* _OBJLIST_H */

View File

@ -94,13 +94,19 @@ _NOTE(CONSTCOND) } while (0)
#define BF64_GET_SB(x, low, len, shift, bias) \
((BF64_GET(x, low, len) + (bias)) << (shift))
/*
* We use ASSERT3U instead of ASSERT in these macros to prevent a lint error in
* the case where val is a constant. We can't fix ASSERT because it's used as
* an expression in several places in the kernel; as a result, changing it to
* the do{} while() syntax to allow us to _NOTE the CONSTCOND is not an option.
*/
#define BF32_SET_SB(x, low, len, shift, bias, val) do { \
ASSERT(IS_P2ALIGNED(val, 1U << shift)); \
ASSERT3U(IS_P2ALIGNED(val, 1U << shift), !=, B_FALSE); \
ASSERT3S((val) >> (shift), >=, bias); \
BF32_SET(x, low, len, ((val) >> (shift)) - (bias)); \
_NOTE(CONSTCOND) } while (0)
#define BF64_SET_SB(x, low, len, shift, bias, val) do { \
ASSERT(IS_P2ALIGNED(val, 1ULL << shift)); \
ASSERT3U(IS_P2ALIGNED(val, 1ULL << shift), !=, B_FALSE); \
ASSERT3S((val) >> (shift), >=, bias); \
BF64_SET(x, low, len, ((val) >> (shift)) - (bias)); \
_NOTE(CONSTCOND) } while (0)
@ -402,6 +408,7 @@ _NOTE(CONSTCOND) } while (0)
typedef enum bp_embedded_type {
BP_EMBEDDED_TYPE_DATA,
BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */
BP_EMBEDDED_TYPE_REDACTED,
NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED
} bp_embedded_type_t;
@ -602,6 +609,14 @@ _NOTE(CONSTCOND) } while (0)
#define BP_IS_HOLE(bp) \
(!BP_IS_EMBEDDED(bp) && DVA_IS_EMPTY(BP_IDENTITY(bp)))
#define BP_SET_REDACTED(bp) \
{ \
BP_SET_EMBEDDED(bp, B_TRUE); \
BPE_SET_ETYPE(bp, BP_EMBEDDED_TYPE_REDACTED); \
}
#define BP_IS_REDACTED(bp) \
(BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_REDACTED)
/* BP_IS_RAIDZ(bp) assumes no block compression */
#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
BP_GET_PSIZE(bp))
@ -678,6 +693,13 @@ _NOTE(CONSTCOND) } while (0)
(u_longlong_t)BPE_GET_LSIZE(bp), \
(u_longlong_t)BPE_GET_PSIZE(bp), \
(u_longlong_t)bp->blk_birth); \
} else if (BP_IS_REDACTED(bp)) { \
len += func(buf + len, size - len, \
"REDACTED [L%llu %s] size=%llxL birth=%lluL", \
(u_longlong_t)BP_GET_LEVEL(bp), \
type, \
(u_longlong_t)BP_GET_LSIZE(bp), \
(u_longlong_t)bp->blk_birth); \
} else { \
for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \
const dva_t *dva = &bp->blk_dva[d]; \

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
@ -122,6 +122,7 @@
#define noinline __attribute__((noinline))
#define likely(x) __builtin_expect((x), 1)
#define unlikely(x) __builtin_expect((x), 0)
/*
* Debugging
@ -305,6 +306,7 @@ typedef pthread_cond_t kcondvar_t;
extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
extern void cv_destroy(kcondvar_t *cv);
extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp);
extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
hrtime_t res, int flag);

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright 2016 RackTop Systems.
* Copyright (c) 2017, Intel Corporation.
*/
@ -101,7 +101,7 @@ typedef enum drr_headertype {
/* flag #18 is reserved for a Delphix feature */
#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19)
#define DMU_BACKUP_FEATURE_RESUMING (1 << 20)
/* flag #21 is reserved for the redacted send/receive feature */
#define DMU_BACKUP_FEATURE_REDACTED (1 << 21)
#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23)
#define DMU_BACKUP_FEATURE_RAW (1 << 24)
@ -116,7 +116,8 @@ typedef enum drr_headertype {
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS)
DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \
DMU_BACKUP_FEATURE_REDACTED)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
@ -212,7 +213,7 @@ typedef struct dmu_replay_record {
enum {
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE,
DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_OBJECT_RANGE, DRR_REDACT,
DRR_NUMTYPES
} drr_type;
uint32_t drr_payloadlen;
@ -337,6 +338,12 @@ typedef struct dmu_replay_record {
uint8_t drr_flags;
uint8_t drr_pad[3];
} drr_object_range;
struct drr_redact {
uint64_t drr_object;
uint64_t drr_offset;
uint64_t drr_length;
uint64_t drr_toguid;
} drr_redact;
/*
* Nore: drr_checksum is overlaid with all record types

View File

@ -36,6 +36,7 @@
#include <sys/rrwlock.h>
#include <sys/dsl_dataset.h>
#include <sys/zfs_ioctl.h>
#include <sys/objlist.h>
#ifdef __cplusplus
extern "C" {
@ -197,6 +198,7 @@ extern uint_t zfs_fsyncer_key;
extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
extern int zfs_resume_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
extern int zfs_end_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
const char *domain, uint64_t rid, uint64_t *valuep);
extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
@ -214,6 +216,7 @@ extern int zfsvfs_create(const char *name, boolean_t readony, zfsvfs_t **zfvp);
extern int zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os);
extern void zfsvfs_free(zfsvfs_t *zfsvfs);
extern int zfs_check_global_label(const char *dsname, const char *hexsl);
extern objlist_t *zfs_get_deleteq(objset_t *os);
extern boolean_t zfs_is_readonly(zfsvfs_t *zfsvfs);
extern int zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent);

View File

@ -67,6 +67,9 @@ typedef enum spa_feature {
SPA_FEATURE_ALLOCATION_CLASSES,
SPA_FEATURE_RESILVER_DEFER,
SPA_FEATURE_BOOKMARK_V2,
SPA_FEATURE_REDACTION_BOOKMARKS,
SPA_FEATURE_REDACTED_DATASETS,
SPA_FEATURE_BOOKMARK_WRITTEN,
SPA_FEATURES
} spa_feature_t;

View File

@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, Joyent, Inc. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2012 DEY Storage Systems, Inc. All rights reserved.
* Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
@ -597,7 +597,6 @@ zfs_bookmark_exists(const char *path)
int err;
boolean_t rv;
(void) strlcpy(fsname, path, sizeof (fsname));
pound = strchr(fsname, '#');
if (pound == NULL)
@ -2408,6 +2407,10 @@ get_numeric_property(zfs_handle_t *zhp, zfs_prop_t prop, zprop_source_t *src,
*val = zhp->zfs_dmustats.dds_inconsistent;
break;
case ZFS_PROP_REDACTED:
*val = zhp->zfs_dmustats.dds_redacted;
break;
default:
switch (zfs_prop_get_type(prop)) {
case PROP_TYPE_NUMBER:
@ -2620,6 +2623,37 @@ zfs_get_clones_nvl(zfs_handle_t *zhp)
return (value);
}
static int
get_rsnaps_string(zfs_handle_t *zhp, char *propbuf, size_t proplen)
{
nvlist_t *value;
uint64_t *snaps;
uint_t nsnaps;
if (nvlist_lookup_nvlist(zhp->zfs_props,
zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS), &value) != 0)
return (-1);
if (nvlist_lookup_uint64_array(value, ZPROP_VALUE, &snaps,
&nsnaps) != 0)
return (-1);
if (nsnaps == 0) {
/* There's no redaction snapshots; pass a special value back */
(void) snprintf(propbuf, proplen, "none");
return (0);
}
propbuf[0] = '\0';
for (int i = 0; i < nsnaps; i++) {
char buf[128];
if (propbuf[0] != '\0')
(void) strlcat(propbuf, ",", proplen);
(void) snprintf(buf, sizeof (buf), "%llu",
(u_longlong_t)snaps[i]);
(void) strlcat(propbuf, buf, proplen);
}
return (0);
}
/*
* Accepts a property and value and checks that the value
* matches the one found by the channel program. If they are
@ -2814,6 +2848,11 @@ zfs_prop_get(zfs_handle_t *zhp, zfs_prop_t prop, char *propbuf, size_t proplen,
zcp_check(zhp, prop, 0, str);
break;
case ZFS_PROP_REDACT_SNAPS:
if (get_rsnaps_string(zhp, propbuf, proplen) != 0)
return (-1);
break;
case ZFS_PROP_CLONES:
if (get_clones_string(zhp, propbuf, proplen) != 0)
return (-1);
@ -3333,6 +3372,9 @@ zfs_prop_get_userquota(zfs_handle_t *zhp, const char *propname,
return (0);
}
/*
* propname must start with "written@" or "written#".
*/
int
zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
uint64_t *propvalue)
@ -3343,8 +3385,10 @@ zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
snapname = strchr(propname, '@') + 1;
if (strchr(snapname, '@')) {
assert(zfs_prop_written(propname));
snapname = propname + strlen("written@");
if (strchr(snapname, '@') != NULL || strchr(snapname, '#') != NULL) {
/* full snapshot or bookmark name specified */
(void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
} else {
/* snapname is the short name, append it to zhp's fsname */
@ -3355,8 +3399,7 @@ zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
cp = strchr(zc.zc_value, '@');
if (cp != NULL)
*cp = '\0';
(void) strlcat(zc.zc_value, "@", sizeof (zc.zc_value));
(void) strlcat(zc.zc_value, snapname, sizeof (zc.zc_value));
(void) strlcat(zc.zc_value, snapname - 1, sizeof (zc.zc_value));
}
err = ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_SPACE_WRITTEN, &zc);

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013, 2019 by Delphix. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2019 Datto Inc.
*/
@ -212,10 +212,12 @@ zfs_iter_bookmarks(zfs_handle_t *zhp, zfs_iter_f func, void *data)
/* Setup the requested properties nvlist. */
props = fnvlist_alloc();
fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_GUID));
fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATETXG));
fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_CREATION));
fnvlist_add_boolean(props, zfs_prop_to_name(ZFS_PROP_IVSET_GUID));
for (zfs_prop_t p = 0; p < ZFS_NUM_PROPS; p++) {
if (zfs_prop_valid_for_type(p, ZFS_TYPE_BOOKMARK, B_FALSE)) {
fnvlist_add_boolean(props, zfs_prop_to_name(p));
}
}
fnvlist_add_boolean(props, "redact_complete");
if ((err = lzc_get_bookmarks(zhp->zfs_name, props, &bmarks)) != 0)
goto out;

View File

@ -22,7 +22,7 @@
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014, 2019 by Delphix. All rights reserved.
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
* Copyright 2017 RackTop Systems.
* Copyright (c) 2018 Datto Inc.
@ -306,7 +306,7 @@ zfs_is_mounted(zfs_handle_t *zhp, char **where)
*/
static boolean_t
zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
zprop_source_t *source)
zprop_source_t *source, int flags)
{
char sourceloc[MAXNAMELEN];
zprop_source_t sourcetype;
@ -329,6 +329,13 @@ zfs_is_mountable(zfs_handle_t *zhp, char *buf, size_t buflen,
getzoneid() == GLOBAL_ZONEID)
return (B_FALSE);
if (zfs_prop_get_int(zhp, ZFS_PROP_ZONED) &&
getzoneid() == GLOBAL_ZONEID)
return (B_FALSE);
if (zfs_prop_get_int(zhp, ZFS_PROP_REDACTED) && !(flags & MS_FORCE))
return (B_FALSE);
if (source)
*source = sourcetype;
@ -495,8 +502,10 @@ zfs_mount(zfs_handle_t *zhp, const char *options, int flags)
if (zpool_get_prop_int(zhp->zpool_hdl, ZPOOL_PROP_READONLY, NULL))
(void) strlcat(mntopts, "," MNTOPT_RO, sizeof (mntopts));
if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL,
flags)) {
return (0);
}
/*
* Append default mount options which apply to the mount point.
@ -868,7 +877,7 @@ zfs_share_proto(zfs_handle_t *zhp, zfs_share_proto_t *proto)
zprop_source_t sourcetype;
int ret;
if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL))
if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), NULL, 0))
return (0);
for (curr_proto = proto; *curr_proto != PROTO_END; curr_proto++) {
@ -1135,8 +1144,7 @@ remove_mountpoint(zfs_handle_t *zhp)
char mountpoint[ZFS_MAXPROPLEN];
zprop_source_t source;
if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint),
&source))
if (!zfs_is_mountable(zhp, mountpoint, sizeof (mountpoint), &source, 0))
return;
if (source == ZPROP_SRC_DEFAULT ||

File diff suppressed because it is too large Load Diff

View File

@ -468,6 +468,7 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case EREMOTEIO:
zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap);
break;
case ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE:
case ZFS_ERR_IOC_CMD_UNAVAIL:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs "
"module does not support this operation. A reboot may "

View File

@ -632,12 +632,42 @@ int
lzc_send(const char *snapname, const char *from, int fd,
enum lzc_send_flags flags)
{
return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
NULL));
}
int
lzc_send_redacted(const char *snapname, const char *from, int fd,
enum lzc_send_flags flags, const char *redactbook)
{
return (lzc_send_resume_redacted(snapname, from, fd, flags, 0, 0,
redactbook));
}
int
lzc_send_resume(const char *snapname, const char *from, int fd,
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
{
return (lzc_send_resume_redacted(snapname, from, fd, flags, resumeobj,
resumeoff, NULL));
}
/*
* snapname: The name of the "tosnap", or the snapshot whose contents we are
* sending.
* from: The name of the "fromsnap", or the incremental source.
* fd: File descriptor to write the stream to.
* flags: flags that determine features to be used by the stream.
* resumeobj: Object to resume from, for resuming send
* resumeoff: Offset to resume from, for resuming send.
* redactnv: nvlist of string -> boolean(ignored) containing the names of all
* the snapshots that we should redact with respect to.
* redactbook: Name of the redaction bookmark to create.
*/
int
lzc_send_resume_redacted(const char *snapname, const char *from, int fd,
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
const char *redactbook)
{
nvlist_t *args;
int err;
@ -658,6 +688,9 @@ lzc_send_resume(const char *snapname, const char *from, int fd,
fnvlist_add_uint64(args, "resume_object", resumeobj);
fnvlist_add_uint64(args, "resume_offset", resumeoff);
}
if (redactbook != NULL)
fnvlist_add_string(args, "redactbook", redactbook);
err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
nvlist_free(args);
return (err);
@ -676,11 +709,13 @@ lzc_send_resume(const char *snapname, const char *from, int fd,
* are traversed, looking for blocks with a birth time since the creation TXG of
* the snapshot this bookmark was created from. This will result in
* significantly more I/O and be less efficient than a send space estimation on
* an equivalent snapshot.
* an equivalent snapshot. This process is also used if redact_snaps is
* non-null.
*/
int
lzc_send_space(const char *snapname, const char *from,
enum lzc_send_flags flags, uint64_t *spacep)
lzc_send_space_resume_redacted(const char *snapname, const char *from,
enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff,
uint64_t resume_bytes, const char *redactbook, int fd, uint64_t *spacep)
{
nvlist_t *args;
nvlist_t *result;
@ -697,6 +732,16 @@ lzc_send_space(const char *snapname, const char *from,
fnvlist_add_boolean(args, "compressok");
if (flags & LZC_SEND_FLAG_RAW)
fnvlist_add_boolean(args, "rawok");
if (resumeobj != 0 || resumeoff != 0) {
fnvlist_add_uint64(args, "resume_object", resumeobj);
fnvlist_add_uint64(args, "resume_offset", resumeoff);
fnvlist_add_uint64(args, "bytes", resume_bytes);
}
if (redactbook != NULL)
fnvlist_add_string(args, "redactbook", redactbook);
if (fd != -1)
fnvlist_add_int32(args, "fd", fd);
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
nvlist_free(args);
if (err == 0)
@ -705,6 +750,14 @@ lzc_send_space(const char *snapname, const char *from,
return (err);
}
int
lzc_send_space(const char *snapname, const char *from,
enum lzc_send_flags flags, uint64_t *spacep)
{
return (lzc_send_space_resume_redacted(snapname, from, flags, 0, 0, 0,
NULL, -1, spacep));
}
static int
recv_read(int fd, void *buf, int ilen)
{
@ -744,6 +797,7 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
char fsname[MAXPATHLEN];
char *atp;
int error;
boolean_t payload = B_FALSE;
ASSERT3S(g_refcount, >, 0);
VERIFY3S(g_fd, !=, -1);
@ -774,13 +828,13 @@ recv_impl(const char *snapname, nvlist_t *recvdprops, nvlist_t *localprops,
return (error);
} else {
drr = *begin_record;
payload = (begin_record->drr_payloadlen != 0);
}
/*
* Raw receives, resumable receives, and receives that include a
* wrapping key all use the new interface.
* All recives with a payload should use the new interface.
*/
if (resumable || raw || wkeydata != NULL) {
if (resumable || raw || wkeydata != NULL || payload) {
nvlist_t *outnvl = NULL;
nvlist_t *innvl = fnvlist_alloc();
@ -1118,19 +1172,33 @@ lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
* parameter is an nvlist of property names (with no values) that will be
* returned for each bookmark.
*
* The following are valid properties on bookmarks, all of which are numbers
* (represented as uint64 in the nvlist)
* The following are valid properties on bookmarks, most of which are numbers
* (represented as uint64 in the nvlist), except redact_snaps, which is a
* uint64 array, and redact_complete, which is a boolean
*
* "guid" - globally unique identifier of the snapshot it refers to
* "createtxg" - txg when the snapshot it refers to was created
* "creation" - timestamp when the snapshot it refers to was created
* "ivsetguid" - IVset guid for identifying encrypted snapshots
* "redact_snaps" - list of guids of the redaction snapshots for the specified
* bookmark. If the bookmark is not a redaction bookmark, the nvlist will
* not contain an entry for this value. If it is redacted with respect to
* no snapshots, it will contain value -> NULL uint64 array
* "redact_complete" - boolean value; true if the redaction bookmark is
* complete, false otherwise.
*
* The format of the returned nvlist as follows:
* <short name of bookmark> -> {
* <name of property> -> {
* "value" -> uint64
* }
* ...
* "redact_snaps" -> {
* "value" -> uint64 array
* }
* "redact_complete" -> {
* "value" -> boolean value
* }
* }
*/
int
@ -1139,6 +1207,33 @@ lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
}
/*
* Get bookmark properties.
*
* Given a bookmark's full name, retrieve all properties for the bookmark.
*
* The format of the returned property list is as follows:
* {
* <name of property> -> {
* "value" -> uint64
* }
* ...
* "redact_snaps" -> {
* "value" -> uint64 array
* }
*/
int
lzc_get_bookmark_props(const char *bookmark, nvlist_t **props)
{
int error;
nvlist_t *innvl = fnvlist_alloc();
error = lzc_ioctl(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, innvl, props);
fnvlist_free(innvl);
return (error);
}
/*
* Destroys bookmarks.
*
@ -1479,3 +1574,18 @@ lzc_trim(const char *poolname, pool_trim_func_t cmd_type, uint64_t rate,
return (error);
}
/*
* Create a redaction bookmark named bookname by redacting snapshot with respect
* to all the snapshots in snapnv.
*/
int
lzc_redact(const char *snapshot, const char *bookname, nvlist_t *snapnv)
{
nvlist_t *args = fnvlist_alloc();
fnvlist_add_string(args, "bookname", bookname);
fnvlist_add_nvlist(args, "snapnv", snapnv);
int error = lzc_ioctl(ZFS_IOC_REDACT, snapshot, args, NULL);
fnvlist_free(args);
return (error);
}

View File

@ -59,6 +59,7 @@ KERNEL_C = \
dmu_object.c \
dmu_objset.c \
dmu_recv.c \
dmu_redact.c \
dmu_send.c \
dmu_traverse.c \
dmu_tx.c \
@ -86,6 +87,7 @@ KERNEL_C = \
metaslab.c \
mmp.c \
multilist.c \
objlist.c \
pathname.c \
range_tree.c \
refcount.c \

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/

View File

@ -1850,6 +1850,18 @@ regardless of this setting.
Default value: \fB1,048,576\fR.
.RE
.sp
.ne 2
.na
\fBzfs_allow_redacted_dataset_mount\fR (int)
.ad
.RS 12n
Allow datasets received with redacted send/receive to be mounted. Normally
disabled because these datasets may be missing key data.
.sp
Default value: \fB0\fR.
.RE
.sp
.ne 2
.na
@ -2438,18 +2450,65 @@ incorrectly removed.
Use \fB1\fR for yes (default) and \fB0\fR for no.
.RE
.sp
.ne 2
.na
\fBzfs_send_no_prefetch_queue_ff\fR (int)
.ad
.RS 12n
The fill fraction of the \fBzfs send\fR internal queues. The fill fraction
controls the timing with which internal threads are woken up.
.sp
Default value: \fB20\fR.
.RE
.sp
.ne 2
.na
\fBzfs_send_no_prefetch_queue_length\fR (int)
.ad
.RS 12n
The maximum number of bytes allowed in \fBzfs send\fR's internal queues.
.sp
Default value: \fB1,048,576\fR.
.RE
.sp
.ne 2
.na
\fBzfs_send_queue_ff\fR (int)
.ad
.RS 12n
The fill fraction of the \fBzfs send\fR prefetch queue. The fill fraction
controls the timing with which internal threads are woken up.
.sp
Default value: \fB20\fR.
.RE
.sp
.ne 2
.na
\fBzfs_send_queue_length\fR (int)
.ad
.RS 12n
The maximum number of bytes allowed in the \fBzfs send\fR queue. This value
must be at least twice the maximum block size in use.
The maximum number of bytes allowed that will be prefetched by \fBzfs send\fR.
This value must be at least twice the maximum block size in use.
.sp
Default value: \fB16,777,216\fR.
.RE
.sp
.ne 2
.na
\fBzfs_recv_queue_ff\fR (int)
.ad
.RS 12n
The fill fraction of the \fBzfs receive\fR queue. The fill fraction
controls the timing with which internal threads are woken up.
.sp
Default value: \fB20\fR.
.RE
.sp
.ne 2
.na
@ -2462,6 +2521,21 @@ must be at least twice the maximum block size in use.
Default value: \fB16,777,216\fR.
.RE
.sp
.ne 2
.na
\fBzfs_override_estimate_recordsize\fR (ulong)
.ad
.RS 12n
Setting this variable overrides the default logic for estimating block
sizes when doing a zfs send. The default heuristic is that the average
block size will be the current recordsize. Override this value if most data
in your dataset is not of that size and you require accurate zfs send size
estimates.
.sp
Default value: \fB0\fR.
.RE
.sp
.ne 2
.na

View File

@ -1,5 +1,5 @@
'\" te
.\" Copyright (c) 2013, 2017 by Delphix. All rights reserved.
.\" Copyright (c) 2012, 2018 by Delphix. All rights reserved.
.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
.\" The contents of this file are subject to the terms of the Common Development
@ -234,6 +234,27 @@ This feature becomes \fBactive\fR when a v2 bookmark is created and will be
returned to the \fBenabled\fR state when all v2 bookmarks are destroyed.
.RE
.sp
.ne 2
.na
\fBbookmark_written\fR
.ad
.RS 4n
.TS
l l .
GUID com.delphxi:bookmark_written
READ\-ONLY COMPATIBLE no
DEPENDENCIES bookmark, extensible_dataset, bookmark_v2
.TE
This feature enables additional bookmark accounting fields, enabling the
written#<bookmark> prperty (space written since a bookmark) and estimates of
send stream sizes for incrementals from bookmarks.
This feature becomes \fBactive\fR when a bookmark is created and will be
returned to the \fBenabled\fR state when all bookmarks with these fields are destroyed.
.RE
.sp
.ne 2
.na
@ -645,6 +666,46 @@ The upgrade process runs in the background and may take a while to complete
for the filesystems containing a large number of files.
.RE
.sp
.ne 2
.na
\fB\fBredaction_bookmarks\fR\fR
.ad
.RS 4n
.TS
l l .
GUID com.delphix:redaction_bookmarks
READ\-ONLY COMPATIBLE no
DEPENDENCIES bookmarks, extensible_dataset
.TE
This feature enables the use of the redacted zfs send. Redacted \fBzfs send\fR
creates redaction bookmarks, which store the list of blocks redacted by the
send that created them. For more information about redacted send,
see \fBzfs\fR(8).
.RE
.sp
.ne 2
.na
\fB\fBredacted_datasets\fR\fR
.ad
.RS 4n
.TS
l l .
GUID com.delphix:redacted_datasets
READ\-ONLY COMPATIBLE no
DEPENDENCIES extensible_dataset
.TE
This feature enables the receiving of redacted zfs send streams. Redacted zfs
send streams create redacted datasets when received. These datasets are
missing some of their blocks, and so cannot be safely mounted, and their
contents cannot be safely read. For more information about redacted receive,
see \fBzfs\fR(8).
.RE
.sp
.ne 2
.na

View File

@ -177,7 +177,7 @@
.Cm mount
.Nm
.Cm mount
.Op Fl Olv
.Op Fl Oflv
.Op Fl o Ar options
.Fl a | Ar filesystem
.Nm
@ -200,11 +200,18 @@
.Ar snapshot
.Nm
.Cm send
.Op Fl LPcenvw
.Op Fl i Ar snapshot Ns | Ns Ar bookmark
.Op Fl DLPcenpvw
.Oo Fl i Ar snapshot Ns | Ns Ar bookmark
.Oc
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Nm
.Cm send
.Fl -redact Ar redaction_bookmark
.Op Fl DLPcenpv
.Op Fl i Ar snapshot Ns | Ns Ar bookmark
.Ar snapshot
.Nm
.Cm send
.Op Fl Penv
.Fl t Ar receive_resume_token
.Nm
@ -227,6 +234,10 @@
.Fl A
.Ar filesystem Ns | Ns Ar volume
.Nm
.Cm redact
.Ar snapshot redaction_bookmark
.Op Ar redaction_snapshot Ns ...
.Nm
.Cm allow
.Ar filesystem Ns | Ns Ar volume
.Nm
@ -740,6 +751,11 @@ this opaque token can be provided to
.Sy zfs send -t
to resume and complete the
.Sy zfs receive .
.It Sy redact_snaps
For bookmarks, this is the list of snapshot guids the bookmark contains a redaction
list for.
For snapshots, this is the list of snapshot guids the snapshot is redacted with
respect to.
.It Sy referenced
The amount of data that is accessible by this dataset, which may or may not be
shared with other datasets in the pool.
@ -2454,6 +2470,76 @@ would normally be. Since compression is applied before encryption datasets may
be vulnerable to a CRIME-like attack if applications accessing the data allow
for it. Deduplication with encryption will leak information about which blocks
are equivalent in a dataset and will incur an extra CPU cost per block written.
.Ss Redaction
ZFS has support for a limited version of data subsetting, in the form of
redaction. Using the
.Sy zfs redact
command, a
.Sy redaction bookmark
can be created that stores a list of blocks containing sensitive information. When
provided to
.Sy zfs
.Sy send ,
this causes a
.Sy redacted send
to occur. Redacted sends omit the blocks containing sensitive information,
replacing them with REDACT records. When these send streams are received, a
.Sy redacted dataset
is created. A redacted dataset cannot be mounted by default, since it is
incomplete. It can be used to receive other send streams. In this way datasets
can be used for data backup and replication, with all the benefits that zfs send
and receive have to offer, while protecting sensitive information from being
stored on less-trusted machines or services.
.Pp
For the purposes of redaction, there are two steps to the process. A redact
step, and a send/receive step. First, a redaction bookmark is created. This is
done by providing the
.Sy zfs redact
command with a parent snapshot, a bookmark to be created, and a number of
redaction snapshots. These redaction snapshots must be descendants of the
parent snapshot, and they should modify data that is considered sensitive in
some way. Any blocks of data modified by all of the redaction snapshots will
be listed in the redaction bookmark, because it represents the truly sensitive
information. When it comes to the send step, the send process will not send
the blocks listed in the redaction bookmark, instead replacing them with
REDACT records. When received on the target system, this will create a
redacted dataset, missing the data that corresponds to the blocks in the
redaction bookmark on the sending system. The incremental send streams from
the original parent to the redaction snapshots can then also be received on
the target system, and this will produce a complete snapshot that can be used
normally. Incrementals from one snapshot on the parent filesystem and another
can also be done by sending from the redaction bookmark, rather than the
snapshots themselves.
.Pp
In order to make the purpose of the feature more clear, an example is
provided. Consider a zfs filesystem containing four files. These files
represent information for an online shopping service. One file contains a list
of usernames and passwords, another contains purchase histories, a third
contains click tracking data, and a fourth contains user preferences. The
owner of this data wants to make it available for their development teams to
test against, and their market research teams to do analysis on. The
development teams need information about user preferences and the click
tracking data, while the market research teams need information about purchase
histories and user preferences. Neither needs access to the usernames and
passwords. However, because all of this data is stored in one ZFS filesystem,
it must all be sent and received together. In addition, the owner of the data
wants to take advantage of features like compression, checksumming, and
snapshots, so they do want to continue to use ZFS to store and transmit their
data. Redaction can help them do so. First, they would make two clones of a
snapshot of the data on the source. In one clone, they create the setup they
want their market research team to see; they delete the usernames and
passwords file, and overwrite the click tracking data with dummy
information. In another, they create the setup they want the development teams
to see, by replacing the passwords with fake information and replacing the
purchase histories with randomly generated ones. They would then create a
redaction bookmark on the parent snapshot, using snapshots on the two clones
as redaction snapshots. The parent can then be sent, redacted, to the target
server where the research and development teams have access. Finally,
incremental sends from the parent snapshot to each of the clones can be send
to and received on the target server; these snapshots are identical to the
ones on the source, and are ready to be used, while the parent snapshot on the
target contains none of the username and password data present on the source,
because it was removed by the redacted send operation.
.Sh SUBCOMMANDS
All subcommands that modify state are logged persistently to the pool in their
original form.
@ -3329,7 +3415,7 @@ Displays all ZFS file systems currently mounted.
.It Xo
.Nm
.Cm mount
.Op Fl Olv
.Op Fl Oflv
.Op Fl o Ar options
.Fl a | Ar filesystem
.Xc
@ -3370,6 +3456,8 @@ of
this will cause the terminal to interactively block after asking for the key.
.It Fl v
Report mount progress.
.It Fl f
Attempt to force mounting of all filesystems, even those that couldn't normally be mounted (e.g. redacted datasets).
.El
.It Xo
.Nm
@ -3650,7 +3738,7 @@ You will be able to receive your streams on future versions of ZFS.
.It Xo
.Nm
.Cm send
.Op Fl LPcenvw
.Op Fl DLPRcenpvw
.Op Fl i Ar snapshot Ns | Ns Ar bookmark
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Xc
@ -3775,6 +3863,97 @@ This information includes a per-second report of how much data has been sent.
.It Xo
.Nm
.Cm send
.Fl -redact Ar redaction_bookmark
.Op Fl DLPcenpv
.br
.Op Fl i Ar snapshot Ns | Ns Ar bookmark
.Ar snapshot
.Xc
Generate a redacted send stream.
This send stream contains all blocks from the snapshot being sent that aren't
included in the redaction list contained in the bookmark specified by the
.Fl -redact
(or
.Fl -d
) flag.
The resulting send stream is said to be redacted with respect to the snapshots
the bookmark specified by the
.Fl -redact No flag was created with.
The bookmark must have been created by running
.Sy zfs redact
on the snapshot being sent.
.sp
This feature can be used to allow clones of a filesystem to be made available on
a remote system, in the case where their parent need not (or needs to not) be
usable.
For example, if a filesystem contains sensitive data, and it has clones where
that sensitive data has been secured or replaced with dummy data, redacted sends
can be used to replicate the secured data without replicating the original
sensitive data, while still sharing all possible blocks.
A snapshot that has been redacted with respect to a set of snapshots will
contain all blocks referenced by at least one snapshot in the set, but will
contain none of the blocks referenced by none of the snapshots in the set.
In other words, if all snapshots in the set have modified a given block in the
parent, that block will not be sent; but if one or more snapshots have not
modified a block in the parent, they will still reference the parent's block, so
that block will be sent.
Note that only user data will be redacted.
.sp
When the redacted send stream is received, we will generate a redacted
snapshot.
Due to the nature of redaction, a redacted dataset can only be used in the
following ways:
.sp
1. To receive, as a clone, an incremental send from the original snapshot to one
of the snapshots it was redacted with respect to.
In this case, the stream will produce a valid dataset when received because all
blocks that were redacted in the parent are guaranteed to be present in the
child's send stream.
This use case will produce a normal snapshot, which can be used just like other
snapshots.
.sp
2. To receive an incremental send from the original snapshot to something
redacted with respect to a subset of the set of snapshots the initial snapshot
was redacted with respect to.
In this case, each block that was redacted in the original is still redacted
(redacting with respect to additional snapshots causes less data to be redacted
(because the snapshots define what is permitted, and everything else is
redacted)).
This use case will produce a new redacted snapshot.
.sp
3. To receive an incremental send from a redaction bookmark of the original
snapshot that was created when redacting with respect to a subset of the set of
snapshots the initial snapshot was created with respect to
anything else.
A send stream from such a redaction bookmark will contain all of the blocks
necessary to fill in any redacted data, should it be needed, because the sending
system is aware of what blocks were originally redacted.
This will either produce a normal snapshot or a redacted one, depending on
whether the new send stream is redacted.
.sp
4. To receive an incremental send from a redacted version of the initial
snapshot that is redacted with respect to a subect of the set of snapshots the
initial snapshot was created with respect to.
A send stream from a compatible redacted dataset will contain all of the blocks
necessary to fill in any redacted data.
This will either produce a normal snapshot or a redacted one, depending on
whether the new send stream is redacted.
.sp
5. To receive a full send as a clone of the redacted snapshot.
Since the stream is a full send, it definitionally contains all the data needed
to create a new dataset.
This use case will either produce a normal snapshot or a redacted one, depending
on whether the full send stream was redacted.
.sp
These restrictions are detected and enforced by \fBzfs receive\fR; a
redacted send stream will contain the list of snapshots that the stream is
redacted with respsect to.
These are stored with the redacted snapshot, and are used to detect and
correctly handle the cases above. Note that for technical reasons, raw sends
and redacted sends cannot be combined at this time.
.It Xo
.Nm
.Cm send
.Op Fl Penv
.Fl t
.Ar receive_resume_token
@ -4091,6 +4270,24 @@ Abort an interrupted
deleting its saved partially received state.
.It Xo
.Nm
.Cm redact
.Ar snapshot redaction_bookmark
.Op Ar redaction_snapshot Ns ...
.Xc
Generate a new redaction bookmark.
In addition to the typical bookmark information, a redaction bookmark contains
the list of redacted blocks and the list of redaction snapshots specified.
The redacted blocks are blocks in the snapshot which are not referenced by any
of the redaction snapshots.
These blocks are found by iterating over the metadata in each redaction snapshot
to determine what has been changed since the target snapshot.
Redaction is designed to support redacted zfs sends; see the entry for
.Sy zfs send
for more information on the purpose of this operation.
If a redact operation fails partway through (due to an error or a system
failure), the redaction can be resumed by rerunning the same command.
.It Xo
.Nm
.Cm allow
.Ar filesystem Ns | Ns Ar volume
.Xc

View File

@ -408,6 +408,47 @@ zpool_feature_init(void)
edonr_deps);
}
{
static const spa_feature_t redact_books_deps[] = {
SPA_FEATURE_BOOKMARK_V2,
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_BOOKMARKS,
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_REDACTION_BOOKMARKS,
"com.delphix:redaction_bookmarks", "redaction_bookmarks",
"Support for bookmarks which store redaction lists for zfs "
"redacted send/recv.", 0, ZFEATURE_TYPE_BOOLEAN,
redact_books_deps);
}
{
static const spa_feature_t redact_datasets_deps[] = {
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_REDACTED_DATASETS,
"com.delphix:redacted_datasets", "redacted_datasets", "Support for "
"redacted datasets, produced by receiving a redacted zfs send "
"stream.", ZFEATURE_FLAG_PER_DATASET, ZFEATURE_TYPE_UINT64_ARRAY,
redact_datasets_deps);
}
{
static const spa_feature_t bookmark_written_deps[] = {
SPA_FEATURE_BOOKMARK_V2,
SPA_FEATURE_EXTENSIBLE_DATASET,
SPA_FEATURE_BOOKMARKS,
SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_BOOKMARK_WRITTEN,
"com.delphix:bookmark_written", "bookmark_written",
"Additional accounting, enabling the written#<bookmark> property"
"(space written since a bookmark), and estimates of send stream "
"sizes for incrementals from bookmarks.",
0, ZFEATURE_TYPE_BOOLEAN, bookmark_written_deps);
}
zfeature_register(SPA_FEATURE_DEVICE_REMOVAL,
"com.delphix:device_removal", "device_removal",
"Top-level vdevs can be removed, reducing logical pool size.",

View File

@ -418,6 +418,7 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
}
#if defined(_KERNEL)
EXPORT_SYMBOL(entity_namecheck);
EXPORT_SYMBOL(pool_namecheck);
EXPORT_SYMBOL(dataset_namecheck);
EXPORT_SYMBOL(zfs_component_namecheck);

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright 2016, Joyent, Inc.
*/
@ -458,6 +458,10 @@ zfs_prop_init(void)
zprop_register_string(ZFS_PROP_KEYLOCATION, "keylocation",
"none", PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"prompt | <file URI>", "KEYLOCATION");
zprop_register_string(ZFS_PROP_REDACT_SNAPS,
"redact_snaps", NULL, PROP_READONLY,
ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<snapshot>[,...]",
"RSNAPS");
/* readonly number properties */
zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
@ -465,9 +469,10 @@ zfs_prop_init(void)
zprop_register_number(ZFS_PROP_AVAILABLE, "available", 0, PROP_READONLY,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "AVAIL");
zprop_register_number(ZFS_PROP_REFERENCED, "referenced", 0,
PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "REFER");
PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<size>",
"REFER");
zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
PROP_READONLY, ZFS_TYPE_DATASET,
PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK,
"<1.00x or higher if compressed>", "RATIO");
zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0,
PROP_READONLY, ZFS_TYPE_DATASET,
@ -495,7 +500,8 @@ zfs_prop_init(void)
PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>",
"LUSED");
zprop_register_number(ZFS_PROP_LOGICALREFERENCED, "logicalreferenced",
0, PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "LREFER");
0, PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<size>",
"LREFER");
zprop_register_number(ZFS_PROP_FILESYSTEM_COUNT, "filesystem_count",
UINT64_MAX, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
"<count>", "FSCOUNT");
@ -569,6 +575,8 @@ zfs_prop_init(void)
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PBKDF2SALT");
zprop_register_hidden(ZFS_PROP_KEY_GUID, "keyguid", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_DATASET, "KEYGUID");
zprop_register_hidden(ZFS_PROP_REDACTED, "redacted", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_DATASET, "REDACTED");
/*
* Property to be removed once libbe is integrated
@ -668,8 +676,10 @@ zfs_prop_userquota(const char *name)
boolean_t
zfs_prop_written(const char *name)
{
static const char *prefix = "written@";
return (strncmp(name, prefix, strlen(prefix)) == 0);
static const char *prop_prefix = "written@";
static const char *book_prefix = "written#";
return (strncmp(name, prop_prefix, strlen(prop_prefix)) == 0 ||
strncmp(name, book_prefix, strlen(book_prefix)) == 0);
}
/*

View File

@ -35,6 +35,7 @@ $(MODULE)-objs += dmu_diff.o
$(MODULE)-objs += dmu_object.o
$(MODULE)-objs += dmu_objset.o
$(MODULE)-objs += dmu_recv.o
$(MODULE)-objs += dmu_redact.o
$(MODULE)-objs += dmu_send.o
$(MODULE)-objs += dmu_traverse.o
$(MODULE)-objs += dmu_tx.o
@ -60,6 +61,7 @@ $(MODULE)-objs += lz4.o
$(MODULE)-objs += metaslab.o
$(MODULE)-objs += mmp.o
$(MODULE)-objs += multilist.o
$(MODULE)-objs += objlist.o
$(MODULE)-objs += pathname.o
$(MODULE)-objs += policy.o
$(MODULE)-objs += range_tree.o

View File

@ -6170,6 +6170,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
ASSERT(!embedded_bp ||
BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
ASSERT(!BP_IS_HOLE(bp));
ASSERT(!BP_IS_REDACTED(bp));
top:
if (!embedded_bp) {

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
*/
#include <sys/arc.h>
@ -156,7 +156,8 @@ bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
int err;
struct bptree_args *ba = arg;
if (bp == NULL || BP_IS_HOLE(bp))
if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
BP_IS_REDACTED(bp))
return (0);
err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);

View File

@ -13,7 +13,7 @@
* CDDL HEADER END
*/
/*
* Copyright (c) 2014 by Delphix. All rights reserved.
* Copyright (c) 2014, 2018 by Delphix. All rights reserved.
*/
#include <sys/bqueue.h>
@ -27,13 +27,27 @@ obj2node(bqueue_t *q, void *data)
/*
* Initialize a blocking queue The maximum capacity of the queue is set to
* size. Types that want to be stored in a bqueue must contain a bqueue_node_t,
* and offset should give its offset from the start of the struct. Return 0 on
* success, or -1 on failure.
* size. Types that are stored in a bqueue must contain a bqueue_node_t,
* and node_offset must be its offset from the start of the struct.
* fill_fraction is a performance tuning value; when the queue is full, any
* threads attempting to enqueue records will block. They will block until
* they're signaled, which will occur when the queue is at least 1/fill_fraction
* empty. Similar behavior occurs on dequeue; if the queue is empty, threads
* block. They will be signalled when the queue has 1/fill_fraction full, or
* when bqueue_flush is called. As a result, you must call bqueue_flush when
* you enqueue your final record on a thread, in case the dequeueing threads are
* currently blocked and that enqueue does not cause them to be awoken.
* Alternatively, this behavior can be disabled (causing signaling to happen
* immediately) by setting fill_fraction to any value larger than size.
* Return 0 on success, or -1 on failure.
*/
int
bqueue_init(bqueue_t *q, uint64_t size, size_t node_offset)
bqueue_init(bqueue_t *q, uint64_t fill_fraction, uint64_t size,
size_t node_offset)
{
if (fill_fraction == 0) {
return (-1);
}
list_create(&q->bq_list, node_offset + sizeof (bqueue_node_t),
node_offset + offsetof(bqueue_node_t, bqn_node));
cv_init(&q->bq_add_cv, NULL, CV_DEFAULT, NULL);
@ -42,6 +56,7 @@ bqueue_init(bqueue_t *q, uint64_t size, size_t node_offset)
q->bq_node_offset = node_offset;
q->bq_size = 0;
q->bq_maxsize = size;
q->bq_fill_fraction = fill_fraction;
return (0);
}
@ -53,20 +68,18 @@ bqueue_init(bqueue_t *q, uint64_t size, size_t node_offset)
void
bqueue_destroy(bqueue_t *q)
{
mutex_enter(&q->bq_lock);
ASSERT0(q->bq_size);
cv_destroy(&q->bq_add_cv);
cv_destroy(&q->bq_pop_cv);
mutex_destroy(&q->bq_lock);
list_destroy(&q->bq_list);
mutex_exit(&q->bq_lock);
mutex_destroy(&q->bq_lock);
}
/*
* Add data to q, consuming size units of capacity. If there is insufficient
* capacity to consume size units, block until capacity exists. Asserts size is
* > 0.
*/
void
bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
static void
bqueue_enqueue_impl(bqueue_t *q, void *data, uint64_t item_size,
boolean_t flush)
{
ASSERT3U(item_size, >, 0);
ASSERT3U(item_size, <=, q->bq_maxsize);
@ -77,9 +90,38 @@ bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
}
q->bq_size += item_size;
list_insert_tail(&q->bq_list, data);
cv_signal(&q->bq_pop_cv);
if (q->bq_size >= q->bq_maxsize / q->bq_fill_fraction)
cv_signal(&q->bq_pop_cv);
if (flush)
cv_broadcast(&q->bq_pop_cv);
mutex_exit(&q->bq_lock);
}
/*
* Add data to q, consuming size units of capacity. If there is insufficient
* capacity to consume size units, block until capacity exists. Asserts size is
* > 0.
*/
void
bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
{
bqueue_enqueue_impl(q, data, item_size, B_FALSE);
}
/*
* Enqueue an entry, and then flush the queue. This forces the popping threads
* to wake up, even if we're below the fill fraction. We have this in a single
* function, rather than having a separate call, because it prevents race
* conditions between the enqueuing thread and the dequeueing thread, where the
* enqueueing thread will wake up the dequeueing thread, that thread will
* destroy the condvar before the enqueuing thread is done.
*/
void
bqueue_enqueue_flush(bqueue_t *q, void *data, uint64_t item_size)
{
bqueue_enqueue_impl(q, data, item_size, B_TRUE);
}
/*
* Take the first element off of q. If there are no elements on the queue, wait
* until one is put there. Return the removed element.
@ -97,7 +139,8 @@ bqueue_dequeue(bqueue_t *q)
ASSERT3P(ret, !=, NULL);
item_size = obj2node(q, ret)->bqn_size;
q->bq_size -= item_size;
cv_signal(&q->bq_add_cv);
if (q->bq_size <= q->bq_maxsize - (q->bq_maxsize / q->bq_fill_fraction))
cv_signal(&q->bq_add_cv);
mutex_exit(&q->bq_lock);
return (ret);
}

View File

@ -1359,6 +1359,20 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
return (0);
}
/*
* Any attempt to read a redacted block should result in an error. This
* will never happen under normal conditions, but can be useful for
* debugging purposes.
*/
if (BP_IS_REDACTED(db->db_blkptr)) {
ASSERT(dsl_dataset_feature_is_active(
db->db_objset->os_dsl_dataset,
SPA_FEATURE_REDACTED_DATASETS));
DB_DNODE_EXIT(db);
mutex_exit(&db->db_mtx);
return (SET_ERROR(EIO));
}
SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
db->db.db_object, db->db_level, db->db_blkid);
@ -2395,11 +2409,23 @@ dmu_buf_set_crypt_params(dmu_buf_t *db_fake, boolean_t byteorder,
bcopy(mac, dr->dt.dl.dr_mac, ZIO_DATA_MAC_LEN);
}
#pragma weak dmu_buf_fill_done = dbuf_fill_done
static void
dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx)
{
struct dirty_leaf *dl;
ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
dl = &db->db_last_dirty->dt.dl;
dl->dr_overridden_by = *bp;
dl->dr_override_state = DR_OVERRIDDEN;
dl->dr_overridden_by.blk_birth = db->db_last_dirty->dr_txg;
}
/* ARGSUSED */
void
dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx)
dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
mutex_enter(&db->db_mtx);
DBUF_VERIFY(db);
@ -2454,6 +2480,31 @@ dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
dl->dr_overridden_by.blk_birth = db->db_last_dirty->dr_txg;
}
void
dmu_buf_redact(dmu_buf_t *dbuf, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
dmu_object_type_t type;
ASSERT(dsl_dataset_feature_is_active(db->db_objset->os_dsl_dataset,
SPA_FEATURE_REDACTED_DATASETS));
DB_DNODE_ENTER(db);
type = DB_DNODE(db)->dn_type;
DB_DNODE_EXIT(db);
ASSERT0(db->db_level);
dmu_buf_will_not_fill(dbuf, tx);
blkptr_t bp = { { { {0} } } };
BP_SET_TYPE(&bp, type);
BP_SET_LEVEL(&bp, 0);
BP_SET_BIRTH(&bp, tx->tx_txg, 0);
BP_SET_REDACTED(&bp);
BPE_SET_LSIZE(&bp, dbuf->db_size);
dbuf_override_impl(db, &bp, tx);
}
/*
* Directly assign a provided arc buf to a given dbuf if it's not referenced
* by anybody except our caller. Otherwise copy arcbuf's contents to dbuf.
@ -2820,6 +2871,36 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
return (db);
}
/*
* This function returns a block pointer and information about the object,
* given a dnode and a block. This is a publicly accessible version of
* dbuf_findbp that only returns some information, rather than the
* dbuf. Note that the dnode passed in must be held, and the dn_struct_rwlock
* should be locked as (at least) a reader.
*/
int
dbuf_dnode_findbp(dnode_t *dn, uint64_t level, uint64_t blkid,
blkptr_t *bp, uint16_t *datablkszsec, uint8_t *indblkshift)
{
dmu_buf_impl_t *dbp = NULL;
blkptr_t *bp2;
int err = 0;
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
err = dbuf_findbp(dn, level, blkid, B_FALSE, &dbp, &bp2);
if (err == 0) {
*bp = *bp2;
if (dbp != NULL)
dbuf_rele(dbp, NULL);
if (datablkszsec != NULL)
*datablkszsec = dn->dn_phys->dn_datablkszsec;
if (indblkshift != NULL)
*indblkshift = dn->dn_phys->dn_indblkshift;
}
return (err);
}
typedef struct dbuf_prefetch_arg {
spa_t *dpa_spa; /* The spa to issue the prefetch in. */
zbookmark_phys_t dpa_zb; /* The target block to prefetch. */
@ -2837,7 +2918,12 @@ typedef struct dbuf_prefetch_arg {
static void
dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
{
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
ASSERT(!BP_IS_REDACTED(bp) ||
dsl_dataset_feature_is_active(
dpa->dpa_dnode->dn_objset->os_dsl_dataset,
SPA_FEATURE_REDACTED_DATASETS));
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp) || BP_IS_REDACTED(bp))
return;
int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
@ -2921,7 +3007,11 @@ dbuf_prefetch_indirect_done(zio_t *zio, const zbookmark_phys_t *zb,
blkptr_t *bp = ((blkptr_t *)abuf->b_data) +
P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs);
if (BP_IS_HOLE(bp)) {
ASSERT(!BP_IS_REDACTED(bp) ||
dsl_dataset_feature_is_active(
dpa->dpa_dnode->dn_objset->os_dsl_dataset,
SPA_FEATURE_REDACTED_DATASETS));
if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) {
kmem_free(dpa, sizeof (*dpa));
} else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) {
ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid);
@ -3025,7 +3115,10 @@ dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
ASSERT3U(curblkid, <, dn->dn_phys->dn_nblkptr);
bp = dn->dn_phys->dn_blkptr[curblkid];
}
if (BP_IS_HOLE(&bp))
ASSERT(!BP_IS_REDACTED(&bp) ||
dsl_dataset_feature_is_active(dn->dn_objset->os_dsl_dataset,
SPA_FEATURE_REDACTED_DATASETS));
if (BP_IS_HOLE(&bp) || BP_IS_REDACTED(&bp))
return;
ASSERT3U(curlevel, ==, BP_GET_LEVEL(&bp));

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved.
@ -1285,6 +1285,20 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
dmu_buf_rele(db, FTAG);
}
void
dmu_redact(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_tx_t *tx)
{
int numbufs, i;
dmu_buf_t **dbp;
VERIFY0(dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG,
&numbufs, &dbp));
for (i = 0; i < numbufs; i++)
dmu_buf_redact(dbp[i], tx);
dmu_buf_rele_array(dbp, numbufs, FTAG);
}
/*
* DMU support for xuio
*/

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@ -115,7 +115,8 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (issig(JUSTLOOKING) && issig(FORREAL))
return (SET_ERROR(EINTR));
if (bp == NULL || zb->zb_object != DMU_META_DNODE_OBJECT)
if (zb->zb_level == ZB_DNODE_LEVEL ||
zb->zb_object != DMU_META_DNODE_OBJECT)
return (0);
if (BP_IS_HOLE(bp)) {

View File

@ -412,6 +412,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
int i, err;
ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));
ASSERT(!BP_IS_REDACTED(bp));
/*
* The $ORIGIN dataset (if it exists) doesn't have an associated

File diff suppressed because it is too large Load Diff

1112
module/zfs/dmu_redact.c Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -67,8 +67,8 @@ typedef struct traverse_data {
boolean_t td_realloc_possible;
} traverse_data_t;
static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
uint64_t objset, uint64_t object);
static int traverse_dnode(traverse_data_t *td, const blkptr_t *bp,
const dnode_phys_t *dnp, uint64_t objset, uint64_t object);
static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *,
uint64_t objset, uint64_t object);
@ -194,6 +194,7 @@ traverse_prefetch_metadata(traverse_data_t *td,
return;
if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
return;
ASSERT(!BP_IS_REDACTED(bp));
if ((td->td_flags & TRAVERSE_NO_DECRYPT) && BP_IS_PROTECTED(bp))
zio_flags |= ZIO_FLAG_RAW;
@ -207,7 +208,7 @@ prefetch_needed(prefetch_data_t *pfd, const blkptr_t *bp)
{
ASSERT(pfd->pd_flags & TRAVERSE_PREFETCH_DATA);
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp) ||
BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG || BP_IS_REDACTED(bp))
return (B_FALSE);
return (B_TRUE);
}
@ -274,7 +275,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
mutex_exit(&pd->pd_mtx);
}
if (BP_IS_HOLE(bp)) {
if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp)) {
err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
if (err != 0)
goto post;
@ -354,7 +355,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
/* recursively visitbp() blocks below this */
for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
err = traverse_dnode(td, &child_dnp[i],
err = traverse_dnode(td, bp, &child_dnp[i],
zb->zb_objset, zb->zb_blkid * epb + i);
if (err != 0)
break;
@ -395,19 +396,19 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
zb->zb_objset, DMU_USERUSED_OBJECT);
}
err = traverse_dnode(td, &osp->os_meta_dnode, zb->zb_objset,
err = traverse_dnode(td, bp, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
if (err == 0 && OBJSET_BUF_HAS_USERUSED(buf)) {
if (OBJSET_BUF_HAS_PROJECTUSED(buf))
err = traverse_dnode(td,
err = traverse_dnode(td, bp,
&osp->os_projectused_dnode, zb->zb_objset,
DMU_PROJECTUSED_OBJECT);
if (err == 0)
err = traverse_dnode(td,
err = traverse_dnode(td, bp,
&osp->os_groupused_dnode, zb->zb_objset,
DMU_GROUPUSED_OBJECT);
if (err == 0)
err = traverse_dnode(td,
err = traverse_dnode(td, bp,
&osp->os_userused_dnode, zb->zb_objset,
DMU_USERUSED_OBJECT);
}
@ -475,7 +476,7 @@ prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *dnp,
}
static int
traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
traverse_dnode(traverse_data_t *td, const blkptr_t *bp, const dnode_phys_t *dnp,
uint64_t objset, uint64_t object)
{
int j, err = 0;
@ -488,7 +489,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
if (td->td_flags & TRAVERSE_PRE) {
SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
ZB_DNODE_BLKID);
err = td->td_func(td->td_spa, NULL, NULL, &czb, dnp,
err = td->td_func(td->td_spa, NULL, bp, &czb, dnp,
td->td_arg);
if (err == TRAVERSE_VISIT_NO_CHILDREN)
return (0);
@ -511,7 +512,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
if (err == 0 && (td->td_flags & TRAVERSE_POST)) {
SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
ZB_DNODE_BLKID);
err = td->td_func(td->td_spa, NULL, NULL, &czb, dnp,
err = td->td_func(td->td_spa, NULL, bp, &czb, dnp,
td->td_arg);
if (err == TRAVERSE_VISIT_NO_CHILDREN)
return (0);
@ -532,7 +533,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
ARC_FLAG_PRESCIENT_PREFETCH;
ASSERT(pfd->pd_bytes_fetched >= 0);
if (bp == NULL)
if (zb->zb_level == ZB_DNODE_LEVEL)
return (0);
if (pfd->pd_cancel)
return (SET_ERROR(EINTR));
@ -635,6 +636,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
uint32_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
arc_buf_t *buf;
ASSERT(!BP_IS_REDACTED(rootbp));
if ((td->td_flags & TRAVERSE_NO_DECRYPT) &&
BP_IS_PROTECTED(rootbp))

File diff suppressed because it is too large Load Diff

View File

@ -57,6 +57,7 @@
#include <sys/dsl_userhold.h>
#include <sys/dsl_bookmark.h>
#include <sys/policy.h>
#include <sys/dmu_send.h>
#include <sys/dmu_recv.h>
#include <sys/zio_compress.h>
#include <zfs_fletcher.h>
@ -72,6 +73,7 @@
* of this setting.
*/
int zfs_max_recordsize = 1 * 1024 * 1024;
int zfs_allow_redacted_dataset_mount = 0;
#define SWITCH64(x, y) \
{ \
@ -131,7 +133,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
/* It could have been compressed away to nothing */
if (BP_IS_HOLE(bp))
if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))
return;
ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
@ -220,7 +222,7 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
int compressed = BP_GET_PSIZE(bp);
int uncompressed = BP_GET_UCSIZE(bp);
if (BP_IS_HOLE(bp))
if (BP_IS_HOLE(bp) || BP_IS_REDACTED(bp))
return (0);
ASSERT(dmu_tx_is_syncing(tx));
@ -284,6 +286,9 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
DD_USED_HEAD, DD_USED_SNAP, tx);
}
}
dsl_bookmark_block_killed(ds, bp, tx);
mutex_enter(&ds->ds_lock);
ASSERT3U(dsl_dataset_phys(ds)->ds_referenced_bytes, >=, used);
dsl_dataset_phys(ds)->ds_referenced_bytes -= used;
@ -395,6 +400,8 @@ dsl_dataset_evict_async(void *dbu)
ds->ds_prev = NULL;
}
dsl_bookmark_fini_ds(ds);
bplist_destroy(&ds->ds_pending_deadlist);
if (dsl_deadlist_is_open(&ds->ds_deadlist))
dsl_deadlist_close(&ds->ds_deadlist);
@ -564,8 +571,8 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
bplist_create(&ds->ds_pending_deadlist);
list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
offsetof(dmu_sendarg_t, dsa_link));
list_create(&ds->ds_sendstreams, sizeof (dmu_sendstatus_t),
offsetof(dmu_sendstatus_t, dss_link));
list_create(&ds->ds_prop_cbs, sizeof (dsl_prop_cb_record_t),
offsetof(dsl_prop_cb_record_t, cbr_ds_node));
@ -588,14 +595,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
dsl_dataset_phys(ds)->ds_prev_snap_obj,
ds, &ds->ds_prev);
}
if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
int zaperr = zap_lookup(mos, ds->ds_object,
DS_FIELD_BOOKMARK_NAMES,
sizeof (ds->ds_bookmarks), 1,
&ds->ds_bookmarks);
if (zaperr != ENOENT)
VERIFY0(zaperr);
}
err = dsl_bookmark_init_ds(ds);
} else {
if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
err = dsl_dataset_get_snapname(ds);
@ -647,9 +647,15 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
dsl_deadlist_close(&ds->ds_deadlist);
if (dsl_deadlist_is_open(&ds->ds_remap_deadlist))
dsl_deadlist_close(&ds->ds_remap_deadlist);
dsl_bookmark_fini_ds(ds);
if (ds->ds_prev)
dsl_dataset_rele(ds->ds_prev, ds);
dsl_dir_rele(ds->ds_dir, ds);
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
if (dsl_dataset_feature_is_active(ds, f))
unload_zfeature(ds, f);
}
list_destroy(&ds->ds_prop_cbs);
list_destroy(&ds->ds_sendstreams);
mutex_destroy(&ds->ds_lock);
@ -784,14 +790,14 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name, void *tag,
return (dsl_dataset_hold_flags(dp, name, 0, tag, dsp));
}
int
dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
void *tag, dsl_dataset_t **dsp)
static int
dsl_dataset_own_obj_impl(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
void *tag, boolean_t override, dsl_dataset_t **dsp)
{
int err = dsl_dataset_hold_obj_flags(dp, dsobj, flags, tag, dsp);
if (err != 0)
return (err);
if (!dsl_dataset_tryown(*dsp, tag)) {
if (!dsl_dataset_tryown(*dsp, tag, override)) {
dsl_dataset_rele_flags(*dsp, flags, tag);
*dsp = NULL;
return (SET_ERROR(EBUSY));
@ -799,20 +805,49 @@ dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
return (0);
}
int
dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, ds_hold_flags_t flags,
void *tag, dsl_dataset_t **dsp)
{
return (dsl_dataset_own_obj_impl(dp, dsobj, flags, tag, B_FALSE, dsp));
}
int
dsl_dataset_own_obj_force(dsl_pool_t *dp, uint64_t dsobj,
ds_hold_flags_t flags, void *tag, dsl_dataset_t **dsp)
{
return (dsl_dataset_own_obj_impl(dp, dsobj, flags, tag, B_TRUE, dsp));
}
static int
dsl_dataset_own_impl(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
void *tag, boolean_t override, dsl_dataset_t **dsp)
{
int err = dsl_dataset_hold_flags(dp, name, flags, tag, dsp);
if (err != 0)
return (err);
if (!dsl_dataset_tryown(*dsp, tag)) {
if (!dsl_dataset_tryown(*dsp, tag, override)) {
dsl_dataset_rele_flags(*dsp, flags, tag);
return (SET_ERROR(EBUSY));
}
return (0);
}
int
dsl_dataset_own_force(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
void *tag, dsl_dataset_t **dsp)
{
return (dsl_dataset_own_impl(dp, name, flags, tag, B_TRUE, dsp));
}
int
dsl_dataset_own(dsl_pool_t *dp, const char *name, ds_hold_flags_t flags,
void *tag, dsl_dataset_t **dsp)
{
return (dsl_dataset_own_impl(dp, name, flags, tag, B_FALSE, dsp));
}
/*
* See the comment above dsl_pool_hold() for details. In summary, a long
* hold is used to prevent destruction of a dataset while the pool hold
@ -927,13 +962,16 @@ dsl_dataset_disown(dsl_dataset_t *ds, ds_hold_flags_t flags, void *tag)
}
boolean_t
dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
dsl_dataset_tryown(dsl_dataset_t *ds, void *tag, boolean_t override)
{
boolean_t gotit = FALSE;
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
mutex_enter(&ds->ds_lock);
if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) {
if (ds->ds_owner == NULL && (override || !(DS_IS_INCONSISTENT(ds) ||
(dsl_dataset_feature_is_active(ds,
SPA_FEATURE_REDACTED_DATASETS) &&
!zfs_allow_redacted_dataset_mount)))) {
ds->ds_owner = tag;
dsl_dataset_long_hold(ds, tag);
gotit = TRUE;
@ -1696,6 +1734,7 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dsl_dataset_phys(ds)->ds_deadlist_obj);
dsl_deadlist_add_key(&ds->ds_deadlist,
dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
dsl_bookmark_snapshotted(ds, tx);
if (dsl_dataset_remap_deadlist_exists(ds)) {
uint64_t remap_deadlist_obj =
@ -2013,6 +2052,8 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
bplist_iterate(&ds->ds_pending_deadlist,
deadlist_enqueue_cb, &ds->ds_deadlist, tx);
dsl_bookmark_sync_done(ds, tx);
if (os->os_synced_dnodes != NULL) {
multilist_destroy(os->os_synced_dnodes);
os->os_synced_dnodes = NULL;
@ -2151,6 +2192,34 @@ get_receive_resume_stats_impl(dsl_dataset_t *ds)
DS_FIELD_RESUME_RAWOK) == 0) {
fnvlist_add_boolean(token_nv, "rawok");
}
if (dsl_dataset_feature_is_active(ds,
SPA_FEATURE_REDACTED_DATASETS)) {
uint64_t num_redact_snaps;
uint64_t *redact_snaps;
VERIFY(dsl_dataset_get_uint64_array_feature(ds,
SPA_FEATURE_REDACTED_DATASETS, &num_redact_snaps,
&redact_snaps));
fnvlist_add_uint64_array(token_nv, "redact_snaps",
redact_snaps, num_redact_snaps);
}
if (zap_contains(dp->dp_meta_objset, ds->ds_object,
DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS) == 0) {
uint64_t num_redact_snaps, int_size;
uint64_t *redact_snaps;
VERIFY0(zap_length(dp->dp_meta_objset, ds->ds_object,
DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, &int_size,
&num_redact_snaps));
ASSERT3U(int_size, ==, sizeof (uint64_t));
redact_snaps = kmem_alloc(int_size * num_redact_snaps,
KM_SLEEP);
VERIFY0(zap_lookup(dp->dp_meta_objset, ds->ds_object,
DS_FIELD_RESUME_REDACT_BOOKMARK_SNAPS, int_size,
num_redact_snaps, redact_snaps));
fnvlist_add_uint64_array(token_nv, "book_redact_snaps",
redact_snaps, num_redact_snaps);
kmem_free(redact_snaps, int_size * num_redact_snaps);
}
packed = fnvlist_pack(token_nv, &packed_size);
fnvlist_free(token_nv);
compressed = kmem_alloc(packed_size, KM_SLEEP);
@ -2336,6 +2405,13 @@ dsl_get_inconsistent(dsl_dataset_t *ds)
1 : 0);
}
uint64_t
dsl_get_redacted(dsl_dataset_t *ds)
{
return (dsl_dataset_feature_is_active(ds,
SPA_FEATURE_REDACTED_DATASETS));
}
uint64_t
dsl_get_available(dsl_dataset_t *ds)
{
@ -2391,6 +2467,18 @@ dsl_get_prev_snap(dsl_dataset_t *ds, char *snap)
}
}
void
dsl_get_redact_snaps(dsl_dataset_t *ds, nvlist_t *propval)
{
uint64_t nsnaps;
uint64_t *snaps;
if (dsl_dataset_get_uint64_array_feature(ds,
SPA_FEATURE_REDACTED_DATASETS, &nsnaps, &snaps)) {
fnvlist_add_uint64_array(propval, ZPROP_VALUE, snaps,
nsnaps);
}
}
/*
* Returns the mountpoint property and source for the given dataset in the value
* and source buffers. The value buffer must be at least as large as MAXPATHLEN
@ -2496,6 +2584,12 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
dsl_dir_stats(ds->ds_dir, nv);
}
nvlist_t *propval = fnvlist_alloc();
dsl_get_redact_snaps(ds, propval);
fnvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_REDACT_SNAPS),
propval);
nvlist_free(propval);
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE,
dsl_get_available(ds));
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
@ -2564,6 +2658,7 @@ dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
stat->dds_creation_txg = dsl_get_creationtxg(ds);
stat->dds_inconsistent = dsl_get_inconsistent(ds);
stat->dds_guid = dsl_get_guid(ds);
stat->dds_redacted = dsl_get_redacted(ds);
stat->dds_origin[0] = '\0';
if (ds->ds_is_snapshot) {
stat->dds_is_snapshot = B_TRUE;
@ -2891,28 +2986,11 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
}
/* must not have any bookmarks after the most recent snapshot */
nvlist_t *proprequest = fnvlist_alloc();
fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG));
nvlist_t *bookmarks = fnvlist_alloc();
error = dsl_get_bookmarks_impl(ds, proprequest, bookmarks);
fnvlist_free(proprequest);
if (error != 0) {
if (dsl_bookmark_latest_txg(ds) >
dsl_dataset_phys(ds)->ds_prev_snap_txg) {
dsl_dataset_rele(ds, FTAG);
return (error);
return (SET_ERROR(EEXIST));
}
for (nvpair_t *pair = nvlist_next_nvpair(bookmarks, NULL);
pair != NULL; pair = nvlist_next_nvpair(bookmarks, pair)) {
nvlist_t *valuenv =
fnvlist_lookup_nvlist(fnvpair_value_nvlist(pair),
zfs_prop_to_name(ZFS_PROP_CREATETXG));
uint64_t createtxg = fnvlist_lookup_uint64(valuenv, "value");
if (createtxg > dsl_dataset_phys(ds)->ds_prev_snap_txg) {
fnvlist_free(bookmarks);
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EEXIST));
}
}
fnvlist_free(bookmarks);
error = dsl_dataset_handoff_check(ds, ddra->ddra_owner, tx);
if (error != 0) {
@ -3025,7 +3103,7 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *hds;
struct promotenode *snap;
dsl_dataset_t *origin_ds;
dsl_dataset_t *origin_ds, *origin_head;
int err;
uint64_t unused;
uint64_t ss_mv_cnt;
@ -3045,6 +3123,7 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
}
snap = list_head(&ddpa->shared_snaps);
origin_head = snap->ds;
if (snap == NULL) {
err = SET_ERROR(ENOENT);
goto out;
@ -3141,6 +3220,32 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
ddpa->uncomp += dluncomp;
}
/*
* Check that bookmarks that are being transferred don't have
* name conflicts.
*/
for (dsl_bookmark_node_t *dbn = avl_first(&origin_head->ds_bookmarks);
dbn != NULL && dbn->dbn_phys.zbm_creation_txg <=
dsl_dataset_phys(origin_ds)->ds_creation_txg;
dbn = AVL_NEXT(&origin_head->ds_bookmarks, dbn)) {
if (strlen(dbn->dbn_name) >= max_snap_len) {
err = SET_ERROR(ENAMETOOLONG);
goto out;
}
zfs_bookmark_phys_t bm;
err = dsl_bookmark_lookup_impl(ddpa->ddpa_clone,
dbn->dbn_name, &bm);
if (err == 0) {
fnvlist_add_boolean(ddpa->err_ds, dbn->dbn_name);
conflicting_snaps = B_TRUE;
} else if (err == ESRCH) {
err = 0;
} else if (err != 0) {
goto out;
}
}
/*
* In order to return the full list of conflicting snapshots, we check
* whether there was a conflict after traversing all of them.
@ -3298,6 +3403,25 @@ dsl_dataset_promote_sync(void *arg, dmu_tx_t *tx)
dsl_dir_phys(dd)->dd_clones, origin_head->ds_object, tx));
}
/*
* Move bookmarks to this dir.
*/
dsl_bookmark_node_t *dbn_next;
for (dsl_bookmark_node_t *dbn = avl_first(&origin_head->ds_bookmarks);
dbn != NULL && dbn->dbn_phys.zbm_creation_txg <=
dsl_dataset_phys(origin_ds)->ds_creation_txg;
dbn = dbn_next) {
dbn_next = AVL_NEXT(&origin_head->ds_bookmarks, dbn);
avl_remove(&origin_head->ds_bookmarks, dbn);
VERIFY0(zap_remove(dp->dp_meta_objset,
origin_head->ds_bookmarks_obj, dbn->dbn_name, tx));
dsl_bookmark_node_add(hds, dbn, tx);
}
dsl_bookmark_next_changed(hds, origin_ds, tx);
/* move snapshots to this dir */
for (snap = list_head(&ddpa->shared_snaps); snap;
snap = list_next(&ddpa->shared_snaps, snap)) {
@ -3758,9 +3882,9 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
dsl_dataset_phys(clone)->ds_unique_bytes);
/*
* Reset origin's unique bytes, if it exists.
* Reset origin's unique bytes.
*/
if (clone->ds_prev) {
{
dsl_dataset_t *origin = clone->ds_prev;
uint64_t comp, uncomp;
@ -3858,6 +3982,12 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
dsl_dataset_phys(origin_head)->ds_deadlist_obj);
dsl_dataset_swap_remap_deadlists(clone, origin_head, tx);
/*
* If there is a bookmark at the origin, its "next dataset" is
* changing, so we need to reset its FBN.
*/
dsl_bookmark_next_changed(origin_head, origin_head->ds_prev, tx);
dsl_scan_ds_clone_swapped(origin_head, clone, tx);
spa_history_log_internal_ds(clone, "clone swap", tx,
@ -4148,93 +4278,143 @@ dsl_dataset_set_refreservation(const char *dsname, zprop_source_t source,
}
/*
* Return (in *usedp) the amount of space written in new that is not
* present in oldsnap. New may be a snapshot or the head. Old must be
* a snapshot before new, in new's filesystem (or its origin). If not then
* fail and return EINVAL.
* Return (in *usedp) the amount of space referenced by "new" that was not
* referenced at the time the bookmark corresponds to. "New" may be a
* snapshot or a head. The bookmark must be before new, in
* new's filesystem (or its origin) -- caller verifies this.
*
* The written space is calculated by considering two components: First, we
* ignore any freed space, and calculate the written as new's used space
* minus old's used space. Next, we add in the amount of space that was freed
* between the two snapshots, thus reducing new's used space relative to old's.
* Specifically, this is the space that was born before old->ds_creation_txg,
* and freed before new (ie. on new's deadlist or a previous deadlist).
* between the two time points, thus reducing new's used space relative to
* old's. Specifically, this is the space that was born before
* zbm_creation_txg, and freed before new (ie. on new's deadlist or a
* previous deadlist).
*
* space freed [---------------------]
* snapshots ---O-------O--------O-------O------
* oldsnap new
* bookmark new
*
* Note, the bookmark's zbm_*_bytes_refd must be valid, but if the HAS_FBN
* flag is not set, we will calculate the freed_before_next based on the
* next snapshot's deadlist, rather than using zbm_*_freed_before_next_snap.
*/
static int
dsl_dataset_space_written_impl(zfs_bookmark_phys_t *bmp,
dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
{
int err = 0;
dsl_pool_t *dp = new->ds_dir->dd_pool;
ASSERT(dsl_pool_config_held(dp));
if (dsl_dataset_is_snapshot(new)) {
ASSERT3U(bmp->zbm_creation_txg, <,
dsl_dataset_phys(new)->ds_creation_txg);
}
*usedp = 0;
*usedp += dsl_dataset_phys(new)->ds_referenced_bytes;
*usedp -= bmp->zbm_referenced_bytes_refd;
*compp = 0;
*compp += dsl_dataset_phys(new)->ds_compressed_bytes;
*compp -= bmp->zbm_compressed_bytes_refd;
*uncompp = 0;
*uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes;
*uncompp -= bmp->zbm_uncompressed_bytes_refd;
dsl_dataset_t *snap = new;
while (dsl_dataset_phys(snap)->ds_prev_snap_txg >
bmp->zbm_creation_txg) {
uint64_t used, comp, uncomp;
dsl_deadlist_space_range(&snap->ds_deadlist,
0, bmp->zbm_creation_txg,
&used, &comp, &uncomp);
*usedp += used;
*compp += comp;
*uncompp += uncomp;
uint64_t snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj;
if (snap != new)
dsl_dataset_rele(snap, FTAG);
err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
if (err != 0)
break;
}
/*
* We might not have the FBN if we are calculating written from
* a snapshot (because we didn't know the correct "next" snapshot
* until now).
*/
if (bmp->zbm_flags & ZBM_FLAG_HAS_FBN) {
*usedp += bmp->zbm_referenced_freed_before_next_snap;
*compp += bmp->zbm_compressed_freed_before_next_snap;
*uncompp += bmp->zbm_uncompressed_freed_before_next_snap;
} else {
ASSERT3U(dsl_dataset_phys(snap)->ds_prev_snap_txg, ==,
bmp->zbm_creation_txg);
uint64_t used, comp, uncomp;
dsl_deadlist_space(&snap->ds_deadlist, &used, &comp, &uncomp);
*usedp += used;
*compp += comp;
*uncompp += uncomp;
}
if (snap != new)
dsl_dataset_rele(snap, FTAG);
return (err);
}
/*
* Return (in *usedp) the amount of space written in new that was not
* present at the time the bookmark corresponds to. New may be a
* snapshot or the head. Old must be a bookmark before new, in
* new's filesystem (or its origin) -- caller verifies this.
*/
int
dsl_dataset_space_written_bookmark(zfs_bookmark_phys_t *bmp,
dsl_dataset_t *new, uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
{
if (!(bmp->zbm_flags & ZBM_FLAG_HAS_FBN))
return (SET_ERROR(ENOTSUP));
return (dsl_dataset_space_written_impl(bmp, new,
usedp, compp, uncompp));
}
/*
* Return (in *usedp) the amount of space written in new that is not
* present in oldsnap. New may be a snapshot or the head. Old must be
* a snapshot before new, in new's filesystem (or its origin). If not then
* fail and return EINVAL.
*/
int
dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
{
int err = 0;
uint64_t snapobj;
dsl_pool_t *dp = new->ds_dir->dd_pool;
if (!dsl_dataset_is_before(new, oldsnap, 0))
return (SET_ERROR(EINVAL));
ASSERT(dsl_pool_config_held(dp));
zfs_bookmark_phys_t zbm = { 0 };
dsl_dataset_phys_t *dsp = dsl_dataset_phys(oldsnap);
zbm.zbm_guid = dsp->ds_guid;
zbm.zbm_creation_txg = dsp->ds_creation_txg;
zbm.zbm_creation_time = dsp->ds_creation_time;
zbm.zbm_referenced_bytes_refd = dsp->ds_referenced_bytes;
zbm.zbm_compressed_bytes_refd = dsp->ds_compressed_bytes;
zbm.zbm_uncompressed_bytes_refd = dsp->ds_uncompressed_bytes;
*usedp = 0;
*usedp += dsl_dataset_phys(new)->ds_referenced_bytes;
*usedp -= dsl_dataset_phys(oldsnap)->ds_referenced_bytes;
*compp = 0;
*compp += dsl_dataset_phys(new)->ds_compressed_bytes;
*compp -= dsl_dataset_phys(oldsnap)->ds_compressed_bytes;
*uncompp = 0;
*uncompp += dsl_dataset_phys(new)->ds_uncompressed_bytes;
*uncompp -= dsl_dataset_phys(oldsnap)->ds_uncompressed_bytes;
snapobj = new->ds_object;
while (snapobj != oldsnap->ds_object) {
dsl_dataset_t *snap;
uint64_t used, comp, uncomp;
if (snapobj == new->ds_object) {
snap = new;
} else {
err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
if (err != 0)
break;
}
if (dsl_dataset_phys(snap)->ds_prev_snap_txg ==
dsl_dataset_phys(oldsnap)->ds_creation_txg) {
/*
* The blocks in the deadlist can not be born after
* ds_prev_snap_txg, so get the whole deadlist space,
* which is more efficient (especially for old-format
* deadlists). Unfortunately the deadlist code
* doesn't have enough information to make this
* optimization itself.
*/
dsl_deadlist_space(&snap->ds_deadlist,
&used, &comp, &uncomp);
} else {
dsl_deadlist_space_range(&snap->ds_deadlist,
0, dsl_dataset_phys(oldsnap)->ds_creation_txg,
&used, &comp, &uncomp);
}
*usedp += used;
*compp += comp;
*uncompp += uncomp;
/*
* If we get to the beginning of the chain of snapshots
* (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
* was not a snapshot of/before new.
*/
snapobj = dsl_dataset_phys(snap)->ds_prev_snap_obj;
if (snap != new)
dsl_dataset_rele(snap, FTAG);
if (snapobj == 0) {
err = SET_ERROR(EINVAL);
break;
}
}
return (err);
/*
* If oldsnap is the origin (or origin's origin, ...) of new,
* we can't easily calculate the effective FBN. Therefore,
* we do not set ZBM_FLAG_HAS_FBN, so that the _impl will calculate
* it relative to the correct "next": the next snapshot towards "new",
* rather than the next snapshot in oldsnap's dsl_dir.
*/
return (dsl_dataset_space_written_impl(&zbm, new,
usedp, compp, uncompp));
}
/*
@ -4327,16 +4507,26 @@ dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
if (later->ds_dir == earlier->ds_dir)
return (B_TRUE);
if (!dsl_dir_is_clone(later->ds_dir))
/*
* We check dd_origin_obj explicitly here rather than using
* dsl_dir_is_clone() so that we will return TRUE if "earlier"
* is $ORIGIN@$ORIGIN. dsl_dataset_space_written() depends on
* this behavior.
*/
if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == 0)
return (B_FALSE);
if (dsl_dir_phys(later->ds_dir)->dd_origin_obj == earlier->ds_object)
return (B_TRUE);
dsl_dataset_t *origin;
error = dsl_dataset_hold_obj(dp,
dsl_dir_phys(later->ds_dir)->dd_origin_obj, FTAG, &origin);
if (error != 0)
return (B_FALSE);
if (dsl_dataset_phys(origin)->ds_creation_txg == earlier_txg &&
origin->ds_dir == earlier->ds_dir) {
dsl_dataset_rele(origin, FTAG);
return (B_TRUE);
}
ret = dsl_dataset_is_before(origin, earlier, earlier_txg);
dsl_dataset_rele(origin, FTAG);
return (ret);
@ -4453,6 +4643,26 @@ dsl_dataset_create_remap_deadlist(dsl_dataset_t *ds, dmu_tx_t *tx)
spa_feature_incr(spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
}
void
dsl_dataset_activate_redaction(dsl_dataset_t *ds, uint64_t *redact_snaps,
uint64_t num_redact_snaps, dmu_tx_t *tx)
{
uint64_t dsobj = ds->ds_object;
struct feature_type_uint64_array_arg *ftuaa =
kmem_zalloc(sizeof (*ftuaa), KM_SLEEP);
ftuaa->length = (int64_t)num_redact_snaps;
if (num_redact_snaps > 0) {
ftuaa->array = kmem_alloc(num_redact_snaps * sizeof (uint64_t),
KM_SLEEP);
bcopy(redact_snaps, ftuaa->array, num_redact_snaps *
sizeof (uint64_t));
}
dsl_dataset_activate_feature(dsobj, SPA_FEATURE_REDACTED_DATASETS,
ftuaa, tx);
ds->ds_feature[SPA_FEATURE_REDACTED_DATASETS] = ftuaa;
}
#if defined(_KERNEL)
#if defined(_LP64)
module_param(zfs_max_recordsize, int, 0644);
@ -4463,6 +4673,10 @@ module_param(zfs_max_recordsize, int, 0444);
MODULE_PARM_DESC(zfs_max_recordsize, "Max allowed record size");
#endif
module_param(zfs_allow_redacted_dataset_mount, int, 0644);
MODULE_PARM_DESC(zfs_allow_redacted_dataset_mount,
"Allow mounting of redacted datasets");
EXPORT_SYMBOL(dsl_dataset_hold);
EXPORT_SYMBOL(dsl_dataset_hold_flags);
EXPORT_SYMBOL(dsl_dataset_hold_obj);

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@ -80,7 +80,7 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl)
zap_cursor_advance(&zc)) {
dsl_deadlist_entry_t *dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
dle->dle_mintxg = zfs_strtonum(za.za_name, NULL);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os,
VERIFY0(bpobj_open(&dle->dle_bpobj, dl->dl_os,
za.za_first_integer));
avl_add(&dl->dl_tree, dle);
}
@ -98,13 +98,13 @@ dsl_deadlist_open(dsl_deadlist_t *dl, objset_t *os, uint64_t object)
mutex_init(&dl->dl_lock, NULL, MUTEX_DEFAULT, NULL);
dl->dl_os = os;
dl->dl_object = object;
VERIFY3U(0, ==, dmu_bonus_hold(os, object, dl, &dl->dl_dbuf));
VERIFY0(dmu_bonus_hold(os, object, dl, &dl->dl_dbuf));
dmu_object_info_from_db(dl->dl_dbuf, &doi);
if (doi.doi_type == DMU_OT_BPOBJ) {
dmu_buf_rele(dl->dl_dbuf, dl);
dl->dl_dbuf = NULL;
dl->dl_oldfmt = B_TRUE;
VERIFY3U(0, ==, bpobj_open(&dl->dl_bpobj, os, object));
VERIFY0(bpobj_open(&dl->dl_bpobj, os, object));
return;
}
@ -167,7 +167,7 @@ dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx)
zap_cursor_t zc;
zap_attribute_t za;
VERIFY3U(0, ==, dmu_object_info(os, dlobj, &doi));
VERIFY0(dmu_object_info(os, dlobj, &doi));
if (doi.doi_type == DMU_OT_BPOBJ) {
bpobj_free(os, dlobj, tx);
return;
@ -183,7 +183,7 @@ dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx)
bpobj_free(os, obj, tx);
}
zap_cursor_fini(&zc);
VERIFY3U(0, ==, dmu_object_free(os, dlobj, tx));
VERIFY0(dmu_object_free(os, dlobj, tx));
}
static void
@ -196,8 +196,8 @@ dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
bpobj_close(&dle->dle_bpobj);
bpobj_decr_empty(dl->dl_os, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
VERIFY3U(0, ==, zap_update_int_key(dl->dl_os, dl->dl_object,
VERIFY0(bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
VERIFY0(zap_update_int_key(dl->dl_os, dl->dl_object,
dle->dle_mintxg, obj, tx));
}
bpobj_enqueue(&dle->dle_bpobj, bp, tx);
@ -214,8 +214,8 @@ dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
} else {
bpobj_close(&dle->dle_bpobj);
bpobj_decr_empty(dl->dl_os, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
VERIFY3U(0, ==, zap_update_int_key(dl->dl_os, dl->dl_object,
VERIFY0(bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
VERIFY0(zap_update_int_key(dl->dl_os, dl->dl_object,
dle->dle_mintxg, obj, tx));
}
}
@ -279,10 +279,10 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
dsl_deadlist_load_tree(dl);
obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
VERIFY0(bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
avl_add(&dl->dl_tree, dle);
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
VERIFY0(zap_add_int_key(dl->dl_os, dl->dl_object,
mintxg, obj, tx));
mutex_exit(&dl->dl_lock);
}
@ -298,12 +298,12 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
if (dl->dl_oldfmt)
return;
mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
dle_tofind.dle_mintxg = mintxg;
dle = avl_find(&dl->dl_tree, &dle_tofind, NULL);
ASSERT3P(dle, !=, NULL);
dle_prev = AVL_PREV(&dl->dl_tree, dle);
dle_enqueue_subobj(dl, dle_prev, dle->dle_bpobj.bpo_object, tx);
@ -312,7 +312,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
bpobj_close(&dle->dle_bpobj);
kmem_free(dle, sizeof (*dle));
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
VERIFY0(zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
mutex_exit(&dl->dl_lock);
}
@ -334,7 +334,7 @@ dsl_deadlist_regenerate(objset_t *os, uint64_t dlobj,
while (mrs_obj != 0) {
dsl_dataset_t *ds;
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds));
VERIFY0(dsl_dataset_hold_obj(dp, mrs_obj, FTAG, &ds));
dsl_deadlist_add_key(&dl,
dsl_dataset_phys(ds)->ds_prev_snap_txg, tx);
mrs_obj = dsl_dataset_phys(ds)->ds_prev_snap_obj;
@ -368,7 +368,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
break;
obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
VERIFY0(zap_add_int_key(dl->dl_os, newobj,
dle->dle_mintxg, obj, tx));
}
mutex_exit(&dl->dl_lock);
@ -381,7 +381,7 @@ dsl_deadlist_space(dsl_deadlist_t *dl,
{
ASSERT(dsl_deadlist_is_open(dl));
if (dl->dl_oldfmt) {
VERIFY3U(0, ==, bpobj_space(&dl->dl_bpobj,
VERIFY0(bpobj_space(&dl->dl_bpobj,
usedp, compp, uncompp));
return;
}
@ -397,7 +397,7 @@ dsl_deadlist_space(dsl_deadlist_t *dl,
* return space used in the range (mintxg, maxtxg].
* Includes maxtxg, does not include mintxg.
* mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is
* larger than any bp in the deadlist (eg. UINT64_MAX)).
* UINT64_MAX).
*/
void
dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
@ -408,7 +408,7 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
avl_index_t where;
if (dl->dl_oldfmt) {
VERIFY3U(0, ==, bpobj_space_range(&dl->dl_bpobj,
VERIFY0(bpobj_space_range(&dl->dl_bpobj,
mintxg, maxtxg, usedp, compp, uncompp));
return;
}
@ -430,13 +430,20 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
dle = AVL_NEXT(&dl->dl_tree, dle)) {
uint64_t used, comp, uncomp;
VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
VERIFY0(bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp));
*usedp += used;
*compp += comp;
*uncompp += uncomp;
}
/*
* This assertion ensures that the maxtxg is a key in the deadlist
* (unless it's UINT64_MAX).
*/
ASSERT(maxtxg == UINT64_MAX ||
(dle != NULL && dle->dle_mintxg == maxtxg));
mutex_exit(&dl->dl_lock);
}
@ -452,8 +459,8 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
ASSERT(MUTEX_HELD(&dl->dl_lock));
VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
VERIFY0(bpobj_open(&bpo, dl->dl_os, obj));
VERIFY0(bpobj_space(&bpo, &used, &comp, &uncomp));
bpobj_close(&bpo);
dsl_deadlist_load_tree(dl);
@ -491,12 +498,11 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
dsl_deadlist_phys_t *dlp;
dmu_object_info_t doi;
VERIFY3U(0, ==, dmu_object_info(dl->dl_os, obj, &doi));
VERIFY0(dmu_object_info(dl->dl_os, obj, &doi));
if (doi.doi_type == DMU_OT_BPOBJ) {
bpobj_t bpo;
VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
VERIFY3U(0, ==, bpobj_iterate(&bpo,
dsl_deadlist_insert_cb, dl, tx));
VERIFY0(bpobj_open(&bpo, dl->dl_os, obj));
VERIFY0(bpobj_iterate(&bpo, dsl_deadlist_insert_cb, dl, tx));
bpobj_close(&bpo);
return;
}
@ -507,11 +513,11 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
zap_cursor_advance(&zc)) {
uint64_t mintxg = zfs_strtonum(za.za_name, NULL);
dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, obj, mintxg, tx));
VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx));
}
zap_cursor_fini(&zc);
VERIFY3U(0, ==, dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
VERIFY0(dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
dlp = bonus->db_data;
dmu_buf_will_dirty(bonus, tx);
bzero(dlp, sizeof (*dlp));
@ -520,7 +526,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
}
/*
* Remove entries on dl that are >= mintxg, and put them on the bpobj.
* Remove entries on dl that are born > mintxg, and put them on the bpobj.
*/
void
dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
@ -546,7 +552,7 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx);
VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
VERIFY0(bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp));
ASSERT3U(dl->dl_phys->dl_used, >=, used);
ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
@ -555,7 +561,7 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
dl->dl_phys->dl_comp -= comp;
dl->dl_phys->dl_uncomp -= uncomp;
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
VERIFY0(zap_remove_int(dl->dl_os, dl->dl_object,
dle->dle_mintxg, tx));
dle_next = AVL_NEXT(&dl->dl_tree, dle);

View File

@ -31,6 +31,7 @@
#include <sys/dsl_dataset.h>
#include <sys/dsl_synctask.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_bookmark.h>
#include <sys/dmu_tx.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_dir.h>
@ -181,70 +182,86 @@ process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
dsl_dataset_phys(ds_next)->ds_deadlist_obj);
}
struct removeclonesnode {
list_node_t link;
dsl_dataset_t *ds;
};
typedef struct remaining_clones_key {
dsl_dataset_t *rck_clone;
list_node_t rck_node;
} remaining_clones_key_t;
static remaining_clones_key_t *
rck_alloc(dsl_dataset_t *clone)
{
remaining_clones_key_t *rck = kmem_alloc(sizeof (*rck), KM_SLEEP);
rck->rck_clone = clone;
return (rck);
}
static void
dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
dsl_dir_remove_clones_key_impl(dsl_dir_t *dd, uint64_t mintxg, dmu_tx_t *tx,
list_t *stack, void *tag)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
list_t clones;
struct removeclonesnode *rcn;
objset_t *mos = dd->dd_pool->dp_meta_objset;
list_create(&clones, sizeof (struct removeclonesnode),
offsetof(struct removeclonesnode, link));
/*
* If it is the old version, dd_clones doesn't exist so we can't
* find the clones, but dsl_deadlist_remove_key() is a no-op so it
* doesn't matter.
*/
if (dsl_dir_phys(dd)->dd_clones == 0)
return;
rcn = kmem_zalloc(sizeof (struct removeclonesnode), KM_SLEEP);
rcn->ds = ds;
list_insert_head(&clones, rcn);
zap_cursor_t *zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
zap_attribute_t *za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
for (; rcn != NULL; rcn = list_next(&clones, rcn)) {
zap_cursor_t zc;
zap_attribute_t za;
/*
* If it is the old version, dd_clones doesn't exist so we can't
* find the clones, but dsl_deadlist_remove_key() is a no-op so
* it doesn't matter.
*/
if (dsl_dir_phys(rcn->ds->ds_dir)->dd_clones == 0)
continue;
for (zap_cursor_init(zc, mos, dsl_dir_phys(dd)->dd_clones);
zap_cursor_retrieve(zc, za) == 0;
zap_cursor_advance(zc)) {
dsl_dataset_t *clone;
for (zap_cursor_init(&zc, mos,
dsl_dir_phys(rcn->ds->ds_dir)->dd_clones);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
za->za_first_integer, tag, &clone));
VERIFY0(dsl_dataset_hold_obj(rcn->ds->ds_dir->dd_pool,
za.za_first_integer, FTAG, &clone));
if (clone->ds_dir->dd_origin_txg > mintxg) {
dsl_deadlist_remove_key(&clone->ds_deadlist,
mintxg, tx);
if (dsl_dataset_remap_deadlist_exists(clone)) {
dsl_deadlist_remove_key(
&clone->ds_remap_deadlist, mintxg,
tx);
}
rcn = kmem_zalloc(
sizeof (struct removeclonesnode), KM_SLEEP);
rcn->ds = clone;
list_insert_tail(&clones, rcn);
} else {
dsl_dataset_rele(clone, FTAG);
if (clone->ds_dir->dd_origin_txg > mintxg) {
dsl_deadlist_remove_key(&clone->ds_deadlist,
mintxg, tx);
if (dsl_dataset_remap_deadlist_exists(clone)) {
dsl_deadlist_remove_key(
&clone->ds_remap_deadlist, mintxg, tx);
}
list_insert_head(stack, rck_alloc(clone));
} else {
dsl_dataset_rele(clone, tag);
}
zap_cursor_fini(&zc);
}
zap_cursor_fini(zc);
kmem_free(za, sizeof (zap_attribute_t));
kmem_free(zc, sizeof (zap_cursor_t));
}
void
dsl_dir_remove_clones_key(dsl_dir_t *top_dd, uint64_t mintxg, dmu_tx_t *tx)
{
list_t stack;
list_create(&stack, sizeof (remaining_clones_key_t),
offsetof(remaining_clones_key_t, rck_node));
dsl_dir_remove_clones_key_impl(top_dd, mintxg, tx, &stack, FTAG);
for (remaining_clones_key_t *rck = list_remove_head(&stack);
rck != NULL; rck = list_remove_head(&stack)) {
dsl_dataset_t *clone = rck->rck_clone;
dsl_dir_t *clone_dir = clone->ds_dir;
kmem_free(rck, sizeof (*rck));
dsl_dir_remove_clones_key_impl(clone_dir, mintxg, tx,
&stack, FTAG);
dsl_dataset_rele(clone, FTAG);
}
rcn = list_remove_head(&clones);
kmem_free(rcn, sizeof (struct removeclonesnode));
while ((rcn = list_remove_head(&clones)) != NULL) {
dsl_dataset_rele(rcn->ds, FTAG);
kmem_free(rcn, sizeof (struct removeclonesnode));
}
list_destroy(&clones);
list_destroy(&stack);
}
static void
@ -314,6 +331,8 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
obj = ds->ds_object;
boolean_t book_exists = dsl_bookmark_ds_destroyed(ds, tx);
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
if (dsl_dataset_feature_is_active(ds, f))
dsl_dataset_deactivate_feature(ds, f, tx);
@ -400,9 +419,11 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
dsl_destroy_snapshot_handle_remaps(ds, ds_next, tx);
/* Collapse range in clone heads */
dsl_dataset_remove_clones_key(ds,
dsl_dataset_phys(ds)->ds_creation_txg, tx);
if (!book_exists) {
/* Collapse range in clone heads */
dsl_dir_remove_clones_key(ds->ds_dir,
dsl_dataset_phys(ds)->ds_creation_txg, tx);
}
if (ds_next->ds_is_snapshot) {
dsl_dataset_t *ds_nextnext;
@ -430,9 +451,13 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
/* Collapse range in this head. */
dsl_dataset_t *hds;
VERIFY0(dsl_dataset_hold_obj(dp,
dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj, FTAG, &hds));
dsl_deadlist_remove_key(&hds->ds_deadlist,
dsl_dataset_phys(ds)->ds_creation_txg, tx);
dsl_dir_phys(ds->ds_dir)->dd_head_dataset_obj,
FTAG, &hds));
if (!book_exists) {
/* Collapse range in this head. */
dsl_deadlist_remove_key(&hds->ds_deadlist,
dsl_dataset_phys(ds)->ds_creation_txg, tx);
}
if (dsl_dataset_remap_deadlist_exists(hds)) {
dsl_deadlist_remove_key(&hds->ds_remap_deadlist,
dsl_dataset_phys(ds)->ds_creation_txg, tx);
@ -675,7 +700,8 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
struct killarg *ka = arg;
dmu_tx_t *tx = ka->tx;
if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
BP_IS_EMBEDDED(bp))
return (0);
if (zb->zb_level == ZB_ZIL_LEVEL) {
@ -973,8 +999,28 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
VERIFY0(zap_destroy(mos,
dsl_dataset_phys(ds)->ds_snapnames_zapobj, tx));
if (ds->ds_bookmarks != 0) {
VERIFY0(zap_destroy(mos, ds->ds_bookmarks, tx));
if (ds->ds_bookmarks_obj != 0) {
void *cookie = NULL;
dsl_bookmark_node_t *dbn;
while ((dbn = avl_destroy_nodes(&ds->ds_bookmarks, &cookie)) !=
NULL) {
if (dbn->dbn_phys.zbm_redaction_obj != 0) {
VERIFY0(dmu_object_free(mos,
dbn->dbn_phys.zbm_redaction_obj, tx));
spa_feature_decr(dmu_objset_spa(mos),
SPA_FEATURE_REDACTION_BOOKMARKS, tx);
}
if (dbn->dbn_phys.zbm_flags & ZBM_FLAG_HAS_FBN) {
spa_feature_decr(dmu_objset_spa(mos),
SPA_FEATURE_BOOKMARK_WRITTEN, tx);
}
spa_strfree(dbn->dbn_name);
mutex_destroy(&dbn->dbn_lock);
kmem_free(dbn, sizeof (*dbn));
}
avl_destroy(&ds->ds_bookmarks);
VERIFY0(zap_destroy(mos, ds->ds_bookmarks_obj, tx));
spa_feature_decr(dp->dp_spa, SPA_FEATURE_BOOKMARKS, tx);
}

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
@ -42,7 +42,6 @@
#include <sys/fs/zfs.h>
#include <sys/zfs_znode.h>
#include <sys/spa_impl.h>
#include <sys/dsl_deadlist.h>
#include <sys/vdev_impl.h>
#include <sys/metaslab_impl.h>
#include <sys/bptree.h>

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2011, 2018 by Delphix. All rights reserved.
* Copyright 2016 Gary Mills
* Copyright (c) 2017 Datto Inc.
* Copyright 2017 Joyent, Inc.
@ -1343,6 +1343,7 @@ dsl_scan_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
zil_header_t *zh = zsa->zsa_zh;
zbookmark_phys_t zb;
ASSERT(!BP_IS_REDACTED(bp));
if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
return (0);
@ -1375,6 +1376,7 @@ dsl_scan_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
blkptr_t *bp = &lr->lr_blkptr;
zbookmark_phys_t zb;
ASSERT(!BP_IS_REDACTED(bp));
if (BP_IS_HOLE(bp) ||
bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
return (0);
@ -1519,7 +1521,7 @@ dsl_scan_prefetch(scan_prefetch_ctx_t *spc, blkptr_t *bp, zbookmark_phys_t *zb)
spa_t *spa = scn->scn_dp->dp_spa;
scan_prefetch_issue_ctx_t *spic;
if (zfs_no_scrub_prefetch)
if (zfs_no_scrub_prefetch || BP_IS_REDACTED(bp))
return;
if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_cur_min_txg ||
@ -1771,6 +1773,8 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD;
int err;
ASSERT(!BP_IS_REDACTED(bp));
if (BP_GET_LEVEL(bp) > 0) {
arc_flags_t flags = ARC_FLAG_WAIT;
int i;
@ -1924,6 +1928,12 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
return;
}
if (BP_IS_REDACTED(bp)) {
ASSERT(dsl_dataset_feature_is_active(ds,
SPA_FEATURE_REDACTED_DATASETS));
return;
}
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) {
scn->scn_lt_min_this_txg++;
return;

84
module/zfs/objlist.c Normal file
View File

@ -0,0 +1,84 @@
/*
* CDDL HEADER START
*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
*/
#include <sys/objlist.h>
#include <sys/zfs_context.h>
objlist_t *
objlist_create(void)
{
objlist_t *list = kmem_alloc(sizeof (*list), KM_SLEEP);
list_create(&list->ol_list, sizeof (objlist_node_t),
offsetof(objlist_node_t, on_node));
list->ol_last_lookup = 0;
return (list);
}
void
objlist_destroy(objlist_t *list)
{
for (objlist_node_t *n = list_remove_head(&list->ol_list);
n != NULL; n = list_remove_head(&list->ol_list)) {
kmem_free(n, sizeof (*n));
}
list_destroy(&list->ol_list);
kmem_free(list, sizeof (*list));
}
/*
* This function looks through the objlist to see if the specified object number
* is contained in the objlist. In the process, it will remove all object
* numbers in the list that are smaller than the specified object number. Thus,
* any lookup of an object number smaller than a previously looked up object
* number will always return false; therefore, all lookups should be done in
* ascending order.
*/
boolean_t
objlist_exists(objlist_t *list, uint64_t object)
{
objlist_node_t *node = list_head(&list->ol_list);
ASSERT3U(object, >=, list->ol_last_lookup);
list->ol_last_lookup = object;
while (node != NULL && node->on_object < object) {
VERIFY3P(node, ==, list_remove_head(&list->ol_list));
kmem_free(node, sizeof (*node));
node = list_head(&list->ol_list);
}
return (node != NULL && node->on_object == object);
}
/*
* The objlist is a list of object numbers stored in ascending order. However,
* the insertion of new object numbers does not seek out the correct location to
* store a new object number; instead, it appends it to the list for simplicity.
* Thus, any users must take care to only insert new object numbers in ascending
* order.
*/
void
objlist_insert(objlist_t *list, uint64_t object)
{
objlist_node_t *node = kmem_zalloc(sizeof (*node), KM_SLEEP);
node->on_object = object;
#ifdef ZFS_DEBUG
objlist_node_t *last_object = list_tail(&list->ol_list);
uint64_t last_objnum = (last_object != NULL ? last_object->on_object :
0);
ASSERT3U(node->on_object, >, last_objnum);
#endif
list_insert_tail(&list->ol_list, node);
}

View File

@ -86,7 +86,7 @@ zfs_refcount_destroy_many(zfs_refcount_t *rc, uint64_t number)
{
reference_t *ref;
ASSERT(rc->rc_count == number);
ASSERT3U(rc->rc_count, ==, number);
while ((ref = list_head(&rc->rc_list))) {
list_remove(&rc->rc_list, ref);
kmem_cache_free(reference_cache, ref);
@ -132,7 +132,7 @@ zfs_refcount_add_many(zfs_refcount_t *rc, uint64_t number, void *holder)
ref->ref_number = number;
}
mutex_enter(&rc->rc_mtx);
ASSERT(rc->rc_count >= 0);
ASSERT3U(rc->rc_count, >=, 0);
if (rc->rc_tracked)
list_insert_head(&rc->rc_list, ref);
rc->rc_count += number;
@ -155,7 +155,7 @@ zfs_refcount_remove_many(zfs_refcount_t *rc, uint64_t number, void *holder)
int64_t count;
mutex_enter(&rc->rc_mtx);
ASSERT(rc->rc_count >= number);
ASSERT3U(rc->rc_count, >=, number);
if (!rc->rc_tracked) {
rc->rc_count -= number;

View File

@ -2119,7 +2119,8 @@ static int
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
if (zb->zb_level == ZB_DNODE_LEVEL || BP_IS_HOLE(bp) ||
BP_IS_EMBEDDED(bp) || BP_IS_REDACTED(bp))
return (0);
/*
* Note: normally this routine will not be called if

View File

@ -692,7 +692,7 @@ txg_wait_synced(dsl_pool_t *dp, uint64_t txg)
txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
while (tx->tx_synced_txg < txg) {
dprintf("broadcasting sync more "
"tx_synced=%llu waiting=%llu dp=%p\n",
"tx_synced=%llu waiting=%llu dp=%px\n",
tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
cv_broadcast(&tx->tx_sync_more_cv);
cv_wait_io(&tx->tx_sync_done_cv, &tx->tx_sync_lock);

View File

@ -176,6 +176,7 @@
#include <sys/dsl_deleg.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_redact.h>
#include <sys/dmu_tx.h>
#include <sys/sunddi.h>
#include <sys/policy.h>
@ -194,6 +195,7 @@
#include <sys/dmu_recv.h>
#include <sys/dmu_send.h>
#include <sys/dmu_recv.h>
#include <sys/dsl_destroy.h>
#include <sys/dsl_bookmark.h>
#include <sys/dsl_userhold.h>
@ -271,7 +273,8 @@ typedef struct zfs_ioc_key {
typedef enum {
NO_NAME,
POOL_NAME,
DATASET_NAME
DATASET_NAME,
ENTITY_NAME
} zfs_ioc_namecheck_t;
typedef enum {
@ -3708,6 +3711,37 @@ zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
return (dsl_get_bookmarks(fsname, innvl, outnvl));
}
/*
* innvl is not used.
*
* outnvl: {
* property 1, property 2, ...
* }
*
*/
static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
/* no nvl keys */
};
/* ARGSUSED */
static int
zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
nvlist_t *outnvl)
{
char fsname[ZFS_MAX_DATASET_NAME_LEN];
char *bmname;
bmname = strchr(bookmark, '#');
if (bmname == NULL)
return (SET_ERROR(EINVAL));
bmname++;
(void) strlcpy(fsname, bookmark, sizeof (fsname));
*(strchr(fsname, '#')) = '\0';
return (dsl_get_bookmark_props(fsname, bmname, outnvl));
}
/*
* innvl: {
* bookmark name 1, bookmark name 2
@ -4111,6 +4145,40 @@ recursive_unmount(const char *fsname, void *arg)
return (0);
}
/*
*
* snapname is the snapshot to redact.
* innvl: {
* "bookname" -> (string)
* name of the redaction bookmark to generate
* "snapnv" -> (nvlist, values ignored)
* snapshots to redact snapname with respect to
* }
*
* outnvl is unused
*/
/* ARGSUSED */
static const zfs_ioc_key_t zfs_keys_redact[] = {
{"bookname", DATA_TYPE_STRING, 0},
{"snapnv", DATA_TYPE_NVLIST, 0},
};
static int
zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
{
nvlist_t *redactnvl = NULL;
char *redactbook = NULL;
if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
return (SET_ERROR(EINVAL));
if (fnvlist_num_pairs(redactnvl) == 0)
return (SET_ERROR(ENXIO));
if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
return (SET_ERROR(EINVAL));
return (dmu_redact_snap(snapname, redactnvl, redactbook));
}
/*
* inputs:
* zc_name old name of dataset
@ -4626,6 +4694,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
nvlist_t *origprops = NULL; /* existing properties */
nvlist_t *origrecvd = NULL; /* existing received properties */
boolean_t first_recvd_props = B_FALSE;
boolean_t tofs_was_redacted;
file_t *input_fp;
*read_bytes = 0;
@ -4636,10 +4705,13 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
if (input_fp == NULL)
return (SET_ERROR(EBADF));
off = input_fp->f_offset;
error = dmu_recv_begin(tofs, tosnap, begin_record, force,
resumable, localprops, hidden_args, origin, &drc);
resumable, localprops, hidden_args, origin, &drc, input_fp->f_vnode,
&off);
if (error != 0)
goto out;
tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
/*
* Set properties before we receive the stream so that they are applied
@ -4740,9 +4812,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
nvlist_free(xprops);
}
off = input_fp->f_offset;
error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
action_handle);
error = dmu_recv_stream(&drc, cleanup_fd, action_handle, &off);
if (error == 0) {
zfsvfs_t *zfsvfs = NULL;
@ -4752,6 +4822,9 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
/* online recv */
dsl_dataset_t *ds;
int end_err;
boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
begin_record->drr_u.drr_begin.
drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
ds = dmu_objset_ds(zfsvfs->z_os);
error = zfs_suspend_fs(zfsvfs);
@ -4760,8 +4833,17 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
* likely also fail, and clean up after itself.
*/
end_err = dmu_recv_end(&drc, zfsvfs);
if (error == 0)
/*
* If the dataset was not redacted, but we received a
* redacted stream onto it, we need to unmount the
* dataset. Otherwise, resume the filesystem.
*/
if (error == 0 && !drc.drc_newfs &&
stream_is_redacted && !tofs_was_redacted) {
error = zfs_end_fs(zfsvfs, ds);
} else if (error == 0) {
error = zfs_resume_fs(zfsvfs, ds);
}
error = error ? error : end_err;
deactivate_super(zfsvfs->z_sb);
} else if ((zv = zvol_suspend(tofs)) != NULL) {
@ -5118,6 +5200,49 @@ zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
}
typedef struct dump_bytes_io {
vnode_t *dbi_vp;
void *dbi_buf;
int dbi_len;
int dbi_err;
} dump_bytes_io_t;
static void
dump_bytes_cb(void *arg)
{
dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
ssize_t resid; /* have to get resid to get detailed errno */
dbi->dbi_err = vn_rdwr(UIO_WRITE, dbi->dbi_vp,
(caddr_t)dbi->dbi_buf, dbi->dbi_len,
0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
}
static int
dump_bytes(objset_t *os, void *buf, int len, void *arg)
{
dump_bytes_io_t dbi;
dbi.dbi_vp = arg;
dbi.dbi_buf = buf;
dbi.dbi_len = len;
#if defined(HAVE_LARGE_STACKS)
dump_bytes_cb(&dbi);
#else
/*
* The vn_rdwr() call is performed in a taskq to ensure that there is
* always enough stack space to write safely to the target filesystem.
* The ZIO_TYPE_FREE threads are used because there can be a lot of
* them and they are used in vdev_file.c for a similar purpose.
*/
spa_taskq_dispatch_sync(dmu_objset_spa(os), ZIO_TYPE_FREE,
ZIO_TASKQ_ISSUE, dump_bytes_cb, &dbi, TQ_SLEEP);
#endif /* HAVE_LARGE_STACKS */
return (dbi.dbi_err);
}
/*
* inputs:
* zc_name name of snapshot to send
@ -5193,8 +5318,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
}
}
error = dmu_send_estimate(tosnap, fromsnap, compressok || rawok,
&zc->zc_objset_type);
error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
compressok || rawok, &zc->zc_objset_type);
if (fromsnap != NULL)
dsl_dataset_rele(fromsnap, FTAG);
@ -5206,9 +5331,13 @@ zfs_ioc_send(zfs_cmd_t *zc)
return (SET_ERROR(EBADF));
off = fp->f_offset;
dmu_send_outparams_t out = {0};
out.dso_outfunc = dump_bytes;
out.dso_arg = fp->f_vnode;
out.dso_dryrun = B_FALSE;
error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
zc->zc_fromobj, embedok, large_block_ok, compressok, rawok,
zc->zc_cookie, fp->f_vnode, &off);
zc->zc_cookie, &off, &out);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
@ -5219,18 +5348,19 @@ zfs_ioc_send(zfs_cmd_t *zc)
/*
* inputs:
* zc_name name of snapshot on which to report progress
* zc_cookie file descriptor of send stream
* zc_name name of snapshot on which to report progress
* zc_cookie file descriptor of send stream
*
* outputs:
* zc_cookie number of bytes written in send stream thus far
* zc_cookie number of bytes written in send stream thus far
* zc_objset_type logical size of data traversed by send thus far
*/
static int
zfs_ioc_send_progress(zfs_cmd_t *zc)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
dmu_sendarg_t *dsp = NULL;
dmu_sendstatus_t *dsp = NULL;
int error;
error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
@ -5254,15 +5384,19 @@ zfs_ioc_send_progress(zfs_cmd_t *zc)
for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
dsp = list_next(&ds->ds_sendstreams, dsp)) {
if (dsp->dsa_outfd == zc->zc_cookie &&
dsp->dsa_proc->group_leader == curproc->group_leader)
if (dsp->dss_outfd == zc->zc_cookie &&
dsp->dss_proc == curproc)
break;
}
if (dsp != NULL)
zc->zc_cookie = *(dsp->dsa_off);
else
if (dsp != NULL) {
zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
0, 0);
/* This is the closest thing we have to atomic_read_64. */
zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
} else {
error = SET_ERROR(ENOENT);
}
mutex_exit(&ds->ds_sendstream_lock);
dsl_dataset_rele(ds, FTAG);
@ -5973,8 +6107,8 @@ zfs_ioc_events_seek(zfs_cmd_t *zc)
/*
* inputs:
* zc_name name of new filesystem or snapshot
* zc_value full name of old snapshot
* zc_name name of later filesystem or snapshot
* zc_value full name of old snapshot or bookmark
*
* outputs:
* zc_cookie space in bytes
@ -5986,7 +6120,7 @@ zfs_ioc_space_written(zfs_cmd_t *zc)
{
int error;
dsl_pool_t *dp;
dsl_dataset_t *new, *old;
dsl_dataset_t *new;
error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
if (error != 0)
@ -5996,16 +6130,26 @@ zfs_ioc_space_written(zfs_cmd_t *zc)
dsl_pool_rele(dp, FTAG);
return (error);
}
error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
if (error != 0) {
dsl_dataset_rele(new, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
}
if (strchr(zc->zc_value, '#') != NULL) {
zfs_bookmark_phys_t bmp;
error = dsl_bookmark_lookup(dp, zc->zc_value,
new, &bmp);
if (error == 0) {
error = dsl_dataset_space_written_bookmark(&bmp, new,
&zc->zc_cookie,
&zc->zc_objset_type, &zc->zc_perm_action);
}
} else {
dsl_dataset_t *old;
error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
&zc->zc_objset_type, &zc->zc_perm_action);
dsl_dataset_rele(old, FTAG);
if (error == 0) {
error = dsl_dataset_space_written(old, new,
&zc->zc_cookie,
&zc->zc_objset_type, &zc->zc_perm_action);
dsl_dataset_rele(old, FTAG);
}
}
dsl_dataset_rele(new, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
@ -6085,6 +6229,9 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
* presence indicates raw encrypted records should be used.
* (optional) "resume_object" and "resume_offset" -> (uint64)
* if present, resume send stream from specified object and offset.
* (optional) "redactbook" -> (string)
* if present, use this bookmark's redaction list to generate a redacted
* send stream
* }
*
* outnvl is unused
@ -6098,6 +6245,7 @@ static const zfs_ioc_key_t zfs_keys_send_new[] = {
{"rawok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
{"resume_object", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"resume_offset", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"redactbook", DATA_TYPE_STRING, ZK_OPTIONAL},
};
/* ARGSUSED */
@ -6115,6 +6263,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
boolean_t rawok;
uint64_t resumeobj = 0;
uint64_t resumeoff = 0;
char *redactbook = NULL;
fd = fnvlist_lookup_int32(innvl, "fd");
@ -6128,12 +6277,18 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
if ((fp = getf(fd)) == NULL)
return (SET_ERROR(EBADF));
off = fp->f_offset;
dmu_send_outparams_t out = {0};
out.dso_outfunc = dump_bytes;
out.dso_arg = fp->f_vnode;
out.dso_dryrun = B_FALSE;
error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
rawok, fd, resumeobj, resumeoff, fp->f_vnode, &off);
rawok, resumeobj, resumeoff, redactbook, fd, &off, &out);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
@ -6142,6 +6297,15 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
}
/* ARGSUSED */
int
send_space_sum(objset_t *os, void *buf, int len, void *arg)
{
uint64_t *size = arg;
*size += len;
return (0);
}
/*
* Determine approximately how large a zfs send stream will be -- the number
* of bytes that will be written to the fd supplied to zfs_ioc_send_new().
@ -6157,6 +6321,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
* presence indicates compressed DRR_WRITE records are permitted
* (optional) "rawok" -> (value ignored)
* presence indicates raw encrypted records should be used.
* (optional) "fd" -> file descriptor to use as a cookie for progress
* tracking (int32)
* }
*
* outnvl: {
@ -6170,6 +6336,11 @@ static const zfs_ioc_key_t zfs_keys_send_space[] = {
{"embedok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
{"compressok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
{"rawok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
{"fd", DATA_TYPE_INT32, ZK_OPTIONAL},
{"redactbook", DATA_TYPE_STRING, ZK_OPTIONAL},
{"resumeobj", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"resumeoff", DATA_TYPE_UINT64, ZK_OPTIONAL},
{"bytes", DATA_TYPE_UINT64, ZK_OPTIONAL},
};
static int
@ -6177,11 +6348,21 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
{
dsl_pool_t *dp;
dsl_dataset_t *tosnap;
dsl_dataset_t *fromsnap = NULL;
int error;
char *fromname;
char *fromname = NULL;
char *redactlist_book = NULL;
boolean_t largeblockok;
boolean_t embedok;
boolean_t compressok;
boolean_t rawok;
uint64_t space;
uint64_t space = 0;
boolean_t full_estimate = B_FALSE;
uint64_t resumeobj = 0;
uint64_t resumeoff = 0;
uint64_t resume_bytes = 0;
int32_t fd = -1;
zfs_bookmark_phys_t zbm = {0};
error = dsl_pool_hold(snapname, FTAG, &dp);
if (error != 0)
@ -6192,61 +6373,101 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
dsl_pool_rele(dp, FTAG);
return (error);
}
(void) nvlist_lookup_int32(innvl, "fd", &fd);
largeblockok = nvlist_exists(innvl, "largeblockok");
embedok = nvlist_exists(innvl, "embedok");
compressok = nvlist_exists(innvl, "compressok");
rawok = nvlist_exists(innvl, "rawok");
boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
&redactlist_book) == 0);
(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
(void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
if (altbook) {
full_estimate = B_TRUE;
} else if (from) {
if (strchr(fromname, '#')) {
error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
error = nvlist_lookup_string(innvl, "from", &fromname);
if (error == 0) {
if (strchr(fromname, '@') != NULL) {
/*
* If from is a snapshot, hold it and use the more
* efficient dmu_send_estimate to estimate send space
* size using deadlists.
* dsl_bookmark_lookup() will fail with EXDEV if
* the from-bookmark and tosnap are at the same txg.
* However, it's valid to do a send (and therefore,
* a send estimate) from and to the same time point,
* if the bookmark is redacted (the incremental send
* can change what's redacted on the target). In
* this case, dsl_bookmark_lookup() fills in zbm
* but returns EXDEV. Ignore this error.
*/
dsl_dataset_t *fromsnap;
if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
zbm.zbm_guid ==
dsl_dataset_phys(tosnap)->ds_guid)
error = 0;
if (error != 0) {
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
}
if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
ZBM_FLAG_HAS_FBN)) {
full_estimate = B_TRUE;
}
} else if (strchr(fromname, '@')) {
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
if (error != 0)
goto out;
error = dmu_send_estimate(tosnap, fromsnap,
compressok || rawok, &space);
dsl_dataset_rele(fromsnap, FTAG);
} else if (strchr(fromname, '#') != NULL) {
/*
* If from is a bookmark, fetch the creation TXG of the
* snapshot it was created from and use that to find
* blocks that were born after it.
*/
zfs_bookmark_phys_t frombm;
if (error != 0) {
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
}
error = dsl_bookmark_lookup(dp, fromname, tosnap,
&frombm);
if (error != 0)
goto out;
error = dmu_send_estimate_from_txg(tosnap,
frombm.zbm_creation_txg, compressok || rawok,
&space);
if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
full_estimate = B_TRUE;
dsl_dataset_rele(fromsnap, FTAG);
}
} else {
/*
* from is not properly formatted as a snapshot or
* bookmark
*/
error = SET_ERROR(EINVAL);
goto out;
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (SET_ERROR(EINVAL));
}
} else {
}
if (full_estimate) {
dmu_send_outparams_t out = {0};
offset_t off = 0;
out.dso_outfunc = send_space_sum;
out.dso_arg = &space;
out.dso_dryrun = B_TRUE;
/*
* If estimating the size of a full send, use dmu_send_estimate.
* We have to release these holds so dmu_send can take them. It
* will do all the error checking we need.
*/
error = dmu_send_estimate(tosnap, NULL, compressok || rawok,
&space);
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
error = dmu_send(snapname, fromname, embedok, largeblockok,
compressok, rawok, resumeobj, resumeoff, redactlist_book,
fd, &off, &out);
} else {
error = dmu_send_estimate_fast(tosnap, fromsnap,
(from && strchr(fromname, '#') != NULL ? &zbm : NULL),
compressok || rawok, &space);
space -= resume_bytes;
if (fromsnap != NULL)
dsl_dataset_rele(fromsnap, FTAG);
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
}
fnvlist_add_uint64(outnvl, "space", space);
out:
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
}
@ -6607,6 +6828,11 @@ zfs_ioctl_init(void)
POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
ARRAY_SIZE(zfs_keys_get_bookmark_props));
zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
POOL_NAME,
@ -6646,6 +6872,11 @@ zfs_ioctl_init(void)
B_TRUE, zfs_keys_channel_program,
ARRAY_SIZE(zfs_keys_channel_program));
zfs_ioctl_register("redact", ZFS_IOC_REDACT,
zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
@ -6891,7 +7122,8 @@ pool_status_check(const char *name, zfs_ioc_namecheck_t type,
spa_t *spa;
int error;
ASSERT(type == POOL_NAME || type == DATASET_NAME);
ASSERT(type == POOL_NAME || type == DATASET_NAME ||
type == ENTITY_NAME);
if (check & POOL_CHECK_NONE)
return (0);
@ -7162,10 +7394,18 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
vec->zvec_namecheck, vec->zvec_pool_check);
break;
case ENTITY_NAME:
if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
error = SET_ERROR(EINVAL);
} else {
error = pool_status_check(zc->zc_name,
vec->zvec_namecheck, vec->zvec_pool_check);
}
break;
case NO_NAME:
break;
}
/*
* Ensure that all input pairs are valid before we pass them down
* to the lower layers.

View File

@ -55,6 +55,7 @@
#include <sys/sunddi.h>
#include <sys/dmu_objset.h>
#include <sys/spa_boot.h>
#include <sys/objlist.h>
#include <sys/zpl.h>
#include <linux/vfs_compat.h>
#include "zfs_comutil.h"
@ -2205,11 +2206,14 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
}
bail:
if (err != 0)
zfsvfs->z_unmounted = B_TRUE;
/* release the VFS ops */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
if (err) {
if (err != 0) {
/*
* Since we couldn't setup the sa framework, try to force
* unmount this file system.
@ -2220,6 +2224,37 @@ zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
return (err);
}
/*
* Release VOPs and unmount a suspended filesystem.
*/
int
zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
{
ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
/*
* We already own this, so just hold and rele it to update the
* objset_t, as the one we had before may have been evicted.
*/
objset_t *os;
VERIFY3P(ds->ds_owner, ==, zfsvfs);
VERIFY(dsl_dataset_long_held(ds));
VERIFY0(dmu_objset_from_ds(ds, &os));
zfsvfs->z_os = os;
/* release the VOPs */
rw_exit(&zfsvfs->z_teardown_inactive_lock);
rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
/*
* Try to force unmount this file system.
*/
(void) zfs_umount(zfsvfs->z_sb);
zfsvfs->z_unmounted = B_TRUE;
return (0);
}
int
zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
{
@ -2394,6 +2429,71 @@ zfs_get_vfs_flag_unmounted(objset_t *os)
return (unmounted);
}
struct objnode {
avl_node_t node;
uint64_t obj;
};
static int
objnode_compare(const void *o1, const void *o2)
{
const struct objnode *obj1 = o1;
const struct objnode *obj2 = o2;
if (obj1->obj < obj2->obj)
return (-1);
if (obj1->obj > obj2->obj)
return (1);
return (0);
}
objlist_t *
zfs_get_deleteq(objset_t *os)
{
objlist_t *deleteq_objlist = objlist_create();
uint64_t deleteq_obj;
zap_cursor_t zc;
zap_attribute_t za;
dmu_object_info_t doi;
ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
VERIFY0(dmu_object_info(os, MASTER_NODE_OBJ, &doi));
ASSERT3U(doi.doi_type, ==, DMU_OT_MASTER_NODE);
VERIFY0(zap_lookup(os, MASTER_NODE_OBJ,
ZFS_UNLINKED_SET, sizeof (uint64_t), 1, &deleteq_obj));
/*
* In order to insert objects into the objlist, they must be in sorted
* order. We don't know what order we'll get them out of the ZAP in, so
* we insert them into and remove them from an avl_tree_t to sort them.
*/
avl_tree_t at;
avl_create(&at, objnode_compare, sizeof (struct objnode),
offsetof(struct objnode, node));
for (zap_cursor_init(&zc, os, deleteq_obj);
zap_cursor_retrieve(&zc, &za) == 0; zap_cursor_advance(&zc)) {
struct objnode *obj = kmem_zalloc(sizeof (*obj), KM_SLEEP);
obj->obj = za.za_first_integer;
avl_add(&at, obj);
}
zap_cursor_fini(&zc);
struct objnode *next, *found = avl_first(&at);
while (found != NULL) {
next = AVL_NEXT(&at, found);
objlist_insert(deleteq_objlist, found->obj);
found = next;
}
void *cookie = NULL;
while ((found = avl_destroy_nodes(&at, &cookie)) != NULL)
kmem_free(found, sizeof (*found));
avl_destroy(&at);
return (deleteq_objlist);
}
void
zfs_init(void)
{

View File

@ -4799,6 +4799,9 @@ zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2, uint8_t ibs2,
zb1->zb_blkid == zb2->zb_blkid)
return (0);
IMPLY(zb1->zb_level > 0, ibs1 >= SPA_MINBLOCKSHIFT);
IMPLY(zb2->zb_level > 0, ibs2 >= SPA_MINBLOCKSHIFT);
/*
* BP_SPANB calculates the span in blocks.
*/

View File

@ -735,6 +735,15 @@ tests = ['quota_001_pos', 'quota_002_pos', 'quota_003_pos',
'quota_004_pos', 'quota_005_pos', 'quota_006_neg']
tags = ['functional', 'quota']
[tests/functional/redacted_send]
tests = ['redacted_compressed', 'redacted_contents', 'redacted_deleted',
'redacted_disabled_feature', 'redacted_embedded', 'redacted_holes',
'redacted_incrementals', 'redacted_largeblocks', 'redacted_many_clones',
'redacted_mixed_recsize', 'redacted_mounts', 'redacted_negative',
'redacted_origin', 'redacted_props', 'redacted_resume', 'redacted_size',
'redacted_volume']
tags = ['functional', 'redacted_send']
[tests/functional/raidz]
tests = ['raidz_001_neg', 'raidz_002_pos']
tags = ['functional', 'raidz']

View File

@ -8,6 +8,7 @@ SUBDIRS = \
file_check \
file_trunc \
file_write \
get_diff \
largest_file \
libzfs_input_check \
mkbusy \
@ -24,4 +25,5 @@ SUBDIRS = \
rename_dir \
rm_lnkcnt_zero_file \
threadsappend \
xattrtest
xattrtest \
stride_dd

View File

@ -0,0 +1,6 @@
include $(top_srcdir)/config/Rules.am
pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
pkgexec_PROGRAMS = get_diff
get_diff_SOURCES = get_diff.c

View File

@ -0,0 +1,109 @@
/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*/
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
*/
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
static void
usage(char *msg, int exit_value)
{
(void) fprintf(stderr, "get_diff file redacted_file\n");
(void) fprintf(stderr, "%s\n", msg);
exit(exit_value);
}
/*
* This utility compares two files, an original and its redacted counterpart
* (in that order). It compares the files 512 bytes at a time, printing out
* any ranges (as offset and length) where the redacted file does not match
* the original. This output is used to verify that the expected ranges of
* a redacted file do not contain the original data.
*/
int
main(int argc, char *argv[])
{
off_t diff_off = 0, diff_len = 0, off = 0;
int fd1, fd2;
char *fname1, *fname2;
char buf1[DEV_BSIZE], buf2[DEV_BSIZE];
ssize_t bytes;
if (argc != 3)
usage("Incorrect number of arguments.", 1);
if ((fname1 = argv[1]) == NULL)
usage("Filename missing.", 1);
if ((fd1 = open(fname1, O_LARGEFILE | O_RDONLY)) < 0) {
perror("open1 failed");
exit(1);
}
if ((fname2 = argv[2]) == NULL)
usage("Redacted filename missing.", 1);
if ((fd2 = open(fname2, O_LARGEFILE | O_RDONLY)) < 0) {
perror("open2 failed");
exit(1);
}
while ((bytes = pread(fd1, buf1, DEV_BSIZE, off)) > 0) {
if (pread(fd2, buf2, DEV_BSIZE, off) < 0) {
if (errno == EIO) {
/*
* A read in a redacted section of a file will
* fail with EIO. If we get EIO, continue on
* but ensure that a comparison of buf1 and
* buf2 will fail, indicating a redacted block.
*/
buf2[0] = ~buf1[0];
} else {
perror("pread failed");
exit(1);
}
}
if (memcmp(buf1, buf2, bytes) == 0) {
if (diff_len != 0) {
(void) fprintf(stdout, "%lld,%lld\n",
(long long)diff_off, (long long)diff_len);
assert(off == diff_off + diff_len);
diff_len = 0;
}
diff_off = 0;
} else {
if (diff_len == 0)
diff_off = off;
assert(off == diff_off + diff_len);
diff_len += bytes;
}
off += bytes;
}
if (diff_len != 0 && diff_len != 0) {
(void) fprintf(stdout, "%lld,%lld\n", (long long)diff_off,
(long long)diff_len);
}
(void) close(fd1);
(void) close(fd2);
return (0);
}

View File

@ -690,6 +690,34 @@ zfs_destroy(const char *dataset)
return (err == 0 ? 0 : errno);
}
static void
test_redact(const char *snapshot1, const char *snapshot2)
{
nvlist_t *required = fnvlist_alloc();
nvlist_t *snapnv = fnvlist_alloc();
char bookmark[MAXNAMELEN + 32];
fnvlist_add_string(required, "bookname", "testbookmark");
fnvlist_add_boolean(snapnv, snapshot2);
fnvlist_add_nvlist(required, "snapnv", snapnv);
IOC_INPUT_TEST(ZFS_IOC_REDACT, snapshot1, required, NULL, 0);
nvlist_free(snapnv);
nvlist_free(required);
strncpy(bookmark, snapshot1, sizeof (bookmark) - 1);
*strchr(bookmark, '@') = '\0';
strncat(bookmark, "#testbookmark", sizeof (bookmark));
zfs_destroy(bookmark);
}
static void
test_get_bookmark_props(const char *bookmark)
{
IOC_INPUT_TEST(ZFS_IOC_GET_BOOKMARK_PROPS, bookmark, NULL, NULL, 0);
}
static void
zfs_ioc_input_tests(const char *pool)
{
@ -700,6 +728,7 @@ zfs_ioc_input_tests(const char *pool)
char bookmark[ZFS_MAX_DATASET_NAME_LEN + 32];
char backup[ZFS_MAX_DATASET_NAME_LEN];
char clone[ZFS_MAX_DATASET_NAME_LEN];
char clonesnap[ZFS_MAX_DATASET_NAME_LEN + 32];
int tmpfd, err;
/*
@ -710,6 +739,7 @@ zfs_ioc_input_tests(const char *pool)
(void) snprintf(snapshot, sizeof (snapshot), "%s@snapshot", dataset);
(void) snprintf(bookmark, sizeof (bookmark), "%s#bookmark", dataset);
(void) snprintf(clone, sizeof (clone), "%s/test-fs-clone", pool);
(void) snprintf(clonesnap, sizeof (clonesnap), "%s@snap", clone);
(void) snprintf(backup, sizeof (backup), "%s/backup", pool);
err = lzc_create(dataset, DMU_OST_ZFS, NULL, NULL, 0);
@ -747,6 +777,7 @@ zfs_ioc_input_tests(const char *pool)
test_bookmark(pool, snapshot, bookmark);
test_get_bookmarks(dataset);
test_get_bookmark_props(bookmark);
test_destroy_bookmarks(pool, bookmark);
test_hold(pool, snapshot);
@ -754,6 +785,9 @@ zfs_ioc_input_tests(const char *pool)
test_release(pool, snapshot);
test_clone(snapshot, clone);
test_snapshot(pool, clonesnap);
test_redact(snapshot, clonesnap);
zfs_destroy(clonesnap);
zfs_destroy(clone);
test_rollback(dataset, snapshot);
@ -909,6 +943,8 @@ validate_ioc_values(void)
ZFS_IOC_BASE + 78 == ZFS_IOC_POOL_DISCARD_CHECKPOINT &&
ZFS_IOC_BASE + 79 == ZFS_IOC_POOL_INITIALIZE &&
ZFS_IOC_BASE + 80 == ZFS_IOC_POOL_TRIM &&
ZFS_IOC_BASE + 81 == ZFS_IOC_REDACT &&
ZFS_IOC_BASE + 82 == ZFS_IOC_GET_BOOKMARK_PROPS &&
LINUX_IOC_BASE + 1 == ZFS_IOC_EVENTS_NEXT &&
LINUX_IOC_BASE + 2 == ZFS_IOC_EVENTS_CLEAR &&
LINUX_IOC_BASE + 3 == ZFS_IOC_EVENTS_SEEK);

View File

@ -0,0 +1 @@
/stride_dd

View File

@ -0,0 +1,7 @@
include $(top_srcdir)/config/Rules.am
pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
pkgexec_PROGRAMS = stride_dd
stride_dd_SOURCES = stride_dd.c
stride_dd_LDADD = -lrt

View File

@ -0,0 +1,214 @@
/*
* This file and its contents are supplied under the terms of the
* Common Development and Distribution License ("CDDL"), version 1.0.
* You may only use this file in accordance with the terms of version
* 1.0 of the CDDL.
*
* A full copy of the text of the CDDL should have accompanied this
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*/
/*
* Copyright (c) 2018 by Delphix. All rights reserved.
*/
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
static int bsize = 0;
static int count = 0;
static char *ifile = NULL;
static char *ofile = NULL;
static int stride = 0;
static int seek = 0;
static char *execname = "stride_dd";
static void usage(void);
static void parse_options(int argc, char *argv[]);
static void
usage(void)
{
(void) fprintf(stderr,
"usage: %s -i inputfile -o outputfile -b blocksize -c count \n"
" -s stride [ -k seekblocks]\n"
"\n"
"Simplified version of dd that supports the stride option.\n"
"A stride of n means that for each block written, n - 1 blocks\n"
"are skipped in both the input and output file. A stride of 1\n"
"means that blocks are read and written consecutively.\n"
"All numeric parameters must be integers.\n"
"\n"
" inputfile: File to read from\n"
" outputfile: File to write to\n"
" blocksize: Size of each block to read/write\n"
" count: Number of blocks to read/write\n"
" stride: Read/write a block then skip (stride - 1) blocks\n"
" seekblocks: Number of blocks to skip at start of output\n",
execname);
(void) exit(1);
}
static void
parse_options(int argc, char *argv[])
{
int c;
int errflag = 0;
execname = argv[0];
extern char *optarg;
extern int optind, optopt;
while ((c = getopt(argc, argv, ":b:c:i:o:s:k:")) != -1) {
switch (c) {
case 'b':
bsize = atoi(optarg);
break;
case 'c':
count = atoi(optarg);
break;
case 'i':
ifile = optarg;
break;
case 'o':
ofile = optarg;
break;
case 's':
stride = atoi(optarg);
break;
case 'k':
seek = atoi(optarg);
break;
case ':':
(void) fprintf(stderr,
"Option -%c requires an operand\n", optopt);
errflag++;
break;
case '?':
default:
(void) fprintf(stderr,
"Unrecognized option: -%c\n", optopt);
errflag++;
break;
}
if (errflag) {
(void) usage();
}
}
if (bsize <= 0 || count <= 0 || stride <= 0 || ifile == NULL ||
ofile == NULL || seek < 0) {
(void) fprintf(stderr,
"Required parameter(s) missing or invalid.\n");
(void) usage();
}
}
int
main(int argc, char *argv[])
{
int i;
int ifd;
int ofd;
void *buf;
int c;
parse_options(argc, argv);
ifd = open(ifile, O_RDONLY);
if (ifd == -1) {
(void) fprintf(stderr, "%s: %s: ", execname, ifile);
perror("open");
exit(2);
}
ofd = open(ofile, O_WRONLY | O_CREAT, 0666);
if (ofd == -1) {
(void) fprintf(stderr, "%s: %s: ", execname, ofile);
perror("open");
exit(2);
}
/*
* We use valloc because some character block devices expect a
* page-aligned buffer.
*/
int err = posix_memalign(&buf, 4096, bsize);
if (err != 0) {
(void) fprintf(stderr,
"%s: %s\n", execname, strerror(err));
exit(2);
}
if (seek > 0) {
if (lseek(ofd, seek * bsize, SEEK_CUR) == -1) {
perror("output lseek");
exit(2);
}
}
for (i = 0; i < count; i++) {
c = read(ifd, buf, bsize);
if (c != bsize) {
perror("read");
exit(2);
}
if (c != bsize) {
if (c < 0) {
perror("read");
} else {
(void) fprintf(stderr,
"%s: unexpected short read, read %d "
"bytes, expected %d\n", execname,
c, bsize);
}
exit(2);
}
c = write(ofd, buf, bsize);
if (c != bsize) {
if (c < 0) {
perror("write");
} else {
(void) fprintf(stderr,
"%s: unexpected short write, wrote %d "
"bytes, expected %d\n", execname,
c, bsize);
}
exit(2);
}
if (stride > 1) {
if (lseek(ifd, (stride - 1) * bsize, SEEK_CUR) == -1) {
perror("input lseek");
exit(2);
}
if (lseek(ofd, (stride - 1) * bsize, SEEK_CUR) == -1) {
perror("output lseek");
exit(2);
}
}
}
free(buf);
(void) close(ofd);
(void) close(ifd);
return (0);
}

View File

@ -1,4 +1,5 @@
#
# Copyright (c) 2016, 2018 by Delphix. All rights reserved.
# These variables are used by zfs-tests.sh to constrain which utilities
# may be used by the suite. The suite will create a directory which is
# the only element of $PATH and create symlinks from that dir to the
@ -163,6 +164,7 @@ export ZFSTEST_FILES='chg_usr_exec
file_check
file_trunc
file_write
get_diff
largest_file
libzfs_input_check
mkbusy
@ -180,4 +182,5 @@ export ZFSTEST_FILES='chg_usr_exec
rm_lnkcnt_zero_file
threadsappend
user_ns_exec
xattrtest'
xattrtest
stride_dd'

View File

@ -405,7 +405,8 @@ function create_recv_clone
log_must eval "zfs send $snap | zfs recv -u $recvfs"
log_must mkfile 1m "$mountpoint/data"
log_must zfs snapshot $incr
log_must eval "zfs send -i $snap $incr | dd bs=10K count=1 > $sendfile"
log_must eval "zfs send -i $snap $incr | dd bs=10K count=1 \
iflag=fullblock > $sendfile"
log_mustnot eval "zfs recv -su $recvfs < $sendfile"
destroy_dataset "$sendfs" "-r"
log_must rm -f "$sendfile"

View File

@ -52,6 +52,7 @@ SUBDIRS = \
projectquota \
quota \
raidz \
redacted_send \
redundancy \
refquota \
refreserv \

View File

@ -0,0 +1,77 @@
#!/bin/ksh -p
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2017 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# When a snapshot is destroyed, we used to recurse all clones
# that are downstream of the destroyed snapshot (e.g. to remove
# its key and merge its deadlist entries to the previous one).
# This recursion would break the stack on deeply nested clone
# hierarchies. To avoid this problem today, we keep heap-allocated
# records of all the clones as we traverse their hierarchy.
#
# This test ensures and showcases that our new method works with
# deeply nested clone hierarchies.
#
# STRATEGY:
# 1. Create an fs and take a snapshot of it (snapshot foo)
# 2. Take a second snapshot of the same fs (snapshot bar) on
# top of snapshot foo
# 3. Create a clone of snapshot bar and then take a snapshot
# of it.
# 4. Create a clone of the newly-created snapshot and then
# take a snapshot of it.
# 5. Repeat step [4] many times to create a deeply nested hierarchy.
# 6. Destroy snapshot foo.
#
verify_runnable "both"
typeset FS0=$TESTPOOL/0
typeset FOO=foo
typeset BAR=BAR
typeset FS0SNAPFOO=$FS0@$FOO
typeset FS0SNAPBAR=$FS0@$BAR
typeset -i numds=300
log_must zfs create $FS0
function test_cleanup
{
log_must zfs destroy -Rf $FS0
return 0
}
log_must zfs snapshot $FS0SNAPFOO
log_must zfs snapshot $FS0SNAPBAR
log_onexit test_cleanup
for (( i=1; i<numds; i++ )); do
log_must zfs clone $TESTPOOL/$((i-1))@$BAR $TESTPOOL/$i
log_must zfs snapshot $TESTPOOL/$i@$BAR
done
log_must zfs destroy $FS0SNAPFOO
log_pass "Snapshot deletion doesn't break the stack in deeply nested " \
"clone hierarchies."

View File

@ -65,7 +65,6 @@ done
for opt in ${opts[@]}; do
log_mustnot eval "zfs send -b$opt $SENDFS > /dev/null"
log_mustnot eval "zfs send -b$opt $SENDFS#bm > /dev/null"
log_mustnot eval "zfs send -b$opt -i $SENDFS#bm $SENDFS@s2 > /dev/null"
done
# Do 3..6 in a loop to verify various combination of "zfs send" options

View File

@ -15,7 +15,7 @@
#
#
# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
@ -36,6 +36,7 @@ verify_runnable "both"
function cleanup
{
log_must set_tunable32 zfs_override_estimate_recordsize 8192
for ds in $datasets; do
destroy_dataset $ds "-rf"
done
@ -90,6 +91,7 @@ function verify_size_estimates
log_assert "Verify 'zfs send -nvP' generates valid stream estimates"
log_onexit cleanup
log_must set_tunable32 zfs_override_estimate_recordsize 0
typeset -l block_count=0
typeset -l block_size
typeset -i PERCENT=1

View File

@ -77,6 +77,9 @@ typeset -a properties=(
"feature@obsolete_counts"
"feature@zpool_checkpoint"
"feature@spacemap_v2"
"feature@redaction_bookmarks"
"feature@redacted_datasets"
"feature@bookmark_written"
)
# Additional properties added for Linux.

View File

@ -0,0 +1,25 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/redacted_send
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
redacted_compressed.ksh \
redacted_contents.ksh \
redacted_deleted.ksh \
redacted_disabled_feature.ksh \
redacted_embedded.ksh \
redacted_holes.ksh \
redacted_incrementals.ksh \
redacted_largeblocks.ksh \
redacted_many_clones.ksh \
redacted_mixed_recsize.ksh \
redacted_mounts.ksh \
redacted_negative.ksh \
redacted_origin.ksh \
redacted_props.ksh \
redacted_resume.ksh \
redacted_size.ksh \
redacted_volume.ksh
dist_pkgdata_DATA = \
redacted.cfg \
redacted.kshlib

View File

@ -0,0 +1,33 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
destroy_pool $POOL
destroy_pool $POOL2
log_must set_tunable32 zfs_allow_redacted_dataset_mount 0
log_pass

View File

@ -0,0 +1,86 @@
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
export DISK1=$(echo $DISKS | awk '{print $1}')
export DISK2=$(echo $DISKS | awk '{print $2}')
export POOL=$TESTPOOL
export POOL2=$TESTPOOL2
export FS=$TESTFS
export FS2=$TESTFS2
#
# These are the byte ranges that differ between files and their redacted
# counterparts. See compare_files() for more detail.
#
typeset RANGE0="0,2097152"
typeset RANGE1="0,131072"
typeset RANGE2="1048576,2097152"
typeset RANGE3="0,131072
1966080,131072
3932160,131072"
typeset RANGE4="0,131072
262144,131072
524288,131072
786432,131072"
typeset RANGE5="0,1048576
7340032,1048576"
typeset RANGE6="393216,131072
655360,131072
917504,131072
1179648,131072
1441792,393216
1966080,393216
2621440,262144
3145728,262144
3670016,262144
4194304,262144
4718592,262144
5242880,262144"
typeset RANGE7="1048576,6291456"
typeset RANGE8="4063232,131072"
typeset RANGE9="0,131072
262144,131072
524288,131072
786432,131072
1048576,131072
1310720,131072
1572864,131072
1835008,131072
2097152,131072
2359296,131072
2621440,131072
2883584,131072
3145728,131072
3407872,131072
3670016,131072
3932160,131072"
typeset RANGE10="0,393216"
typeset RANGE11="0,1048576"
typeset RANGE12="0,2097152"
typeset RANGE13="0,16384"
typeset RANGE14=""
typeset RANGE15="0,4194304"
typeset RANGE16="0,6291456"

View File

@ -0,0 +1,270 @@
#!/bin/ksh
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2016, 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/rsend/rsend.kshlib
. $STF_SUITE/tests/functional/redacted_send/redacted.cfg
function setup_dataset
{
typeset ds_name=$1
typeset opts=$2
typeset file_create_func=$3
typeset sendfs="$POOL/$ds_name"
[[ -n $file_create_func ]] || file_create_func=setup_common
log_must zfs create $opts $sendfs
$file_create_func $sendfs
log_must zfs snapshot $sendfs@snap
log_must zfs clone $opts $sendfs@snap $POOL/${ds_name}_clone
log_must zfs snapshot $POOL/${ds_name}_clone@snap
}
function setup_common
{
typeset sendfs=$1
typeset mntpnt=$(get_prop mountpoint $sendfs)
typeset bs=$(get_prop recsize $sendfs)
log_must dd if=/dev/urandom of=$mntpnt/f1 bs=$bs count=16
log_must dd if=/dev/urandom of=$mntpnt/f2 bs=$bs count=32
}
function setup_embedded
{
typeset sendfs=$1
typeset recsize
typeset mntpnt=$(get_prop mountpoint $sendfs)
for recsize in 512 1024 2048 4096 8192 16384; do
if is_linux; then
log_must dd if=/dev/urandom of=$mntpnt/$recsize bs=8 \
count=1 seek=$(((recsize / 8) - 1))
else
log_must mkholes -d $((recsize - 8)):8 $mntpnt/$recsize
fi
done
}
function setup_holes
{
typeset sendfs=$1
typeset mntpnt=$(get_prop mountpoint $sendfs)
typeset M=$((1024 * 1024))
if is_linux; then
log_must dd if=/dev/urandom of=$mntpnt/f1 bs=8M count=1
log_must dd if=/dev/urandom of=$mntpnt/f2 bs=1M count=1
log_must dd if=/dev/urandom of=$mntpnt/f2 bs=1M count=1 seek=7 \
conv=notrunc
log_must dd if=/dev/urandom of=$mntpnt/f3 bs=1M count=6 seek=1
log_must truncate $mntpnt/f3 --size=$((8 * M))
log_must truncate $mntpnt/f4 --size=$((8 * M))
else
log_must mkholes -d 0:$((8 * M)) $mntpnt/f1
log_must mkholes -d 0:$M -d $((7 * M)):$M $mntpnt/f2
log_must mkholes -d $M:$((6 * M)) -h $((7 * M)):$M $mntpnt/f3
log_must mkholes -h 0:$((8 * M)) $mntpnt/f4
fi
log_must zfs create $sendfs/manyrm
for i in {1..256}; do
log_must stride_dd -i /dev/urandom -o $mntpnt/manyrm/f$i -b 512 \
-c $(random 100) -s $(random 4)
done
log_must zfs snapshot $sendfs/manyrm@snap
log_must zfs clone $sendfs/manyrm@snap $sendfs/manyrm_clone
log_must zfs snapshot $sendfs/manyrm_clone@snap
}
function setup_incrementals
{
typeset sendfs=$1
typeset mntpnt=$(get_prop mountpoint $sendfs)
typeset bs=$(get_prop recsize $sendfs)
log_must dd if=/dev/urandom of=$mntpnt/f1 bs=$bs count=16
log_must dd if=/dev/urandom of=$mntpnt/f2 bs=$bs count=32
log_must mkdir $mntpnt/d1
log_must eval "cat $mntpnt/f1 $mntpnt/f2 >$mntpnt/d1/f1"
log_must zfs snapshot $sendfs@snap0
log_must zfs clone $sendfs@snap0 $POOL/hole
mntpnt=$(get_prop mountpoint $POOL/hole)
log_must dd if=/dev/zero of=$mntpnt/f2 bs=$bs count=16 conv=notrunc
log_must zfs snapshot $POOL/hole@snap
log_must zfs clone $sendfs@snap0 $POOL/stride3
mntpnt=$(get_prop mountpoint $POOL/stride3)
log_must stride_dd -i /dev/urandom -o $mntpnt/f2 -b $bs -c 11 -s 3
log_must zfs snapshot $POOL/stride3@snap
log_must zfs clone $sendfs@snap0 $POOL/stride5
mntpnt=$(get_prop mountpoint $POOL/stride5)
log_must stride_dd -i /dev/urandom -o $mntpnt/f2 -b $bs -c 7 -s 5
log_must zfs snapshot $POOL/stride5@snap
log_must zfs clone $sendfs@snap0 $POOL/int
log_must zfs snapshot $POOL/int@snap
log_must zfs clone $POOL/int@snap $POOL/rm
mntpnt=$(get_prop mountpoint $POOL/rm)
log_must rm -rf $mntpnt/[df][12]
log_must zfs snapshot $POOL/rm@snap
log_must zfs clone $POOL/int@snap $POOL/write
mntpnt=$(get_prop mountpoint $POOL/write)
log_must dd if=/dev/urandom of=$mntpnt/f1 bs=512 count=16 conv=notrunc
log_must dd if=/dev/urandom of=$mntpnt/d1/f1 bs=512 count=16 seek=16 \
conv=notrunc
log_must zfs snapshot $POOL/write@snap
}
function setup_mounts
{
typeset sendfs=$1
typeset mntpnt=$(get_prop mountpoint $sendfs)
log_must touch $mntpnt/empty
log_must dd if=/dev/urandom of=$mntpnt/contents1 bs=512 count=2
log_must dd if=/dev/urandom of=$mntpnt/contents2 bs=512 count=2
log_must mkdir $mntpnt/dir1
log_must touch $mntpnt/dir1/empty
log_must dd if=/dev/urandom of=$mntpnt/dir1/contents1 bs=512 count=2
log_must dd if=/dev/urandom of=$mntpnt/dir1/contents2 bs=512 count=2
log_must mkdir $mntpnt/dir1/dir2
log_must touch $mntpnt/dir1/dir2/empty
log_must dd if=/dev/urandom of=$mntpnt/dir1/dir2/file bs=512 count=2
log_must zfs create -s -V 16p $sendfs/vol
log_must zfs snapshot $sendfs/vol@snap
log_must zfs clone $sendfs/vol@snap $sendfs/vol_clone
log_must zfs snapshot $sendfs/vol_clone@snap
}
function mount_redacted
{
typeset flag=''
while getopts "f" opt; do
case $opt in
f)
flag='-f'
;;
esac
done
shift $(($OPTIND - 1))
typeset ds=$1
log_must set_tunable32 zfs_allow_redacted_dataset_mount 1
zfs mount $flag -oro $ds || return 1
log_must set_tunable32 zfs_allow_redacted_dataset_mount 0
return 0
}
function unmount_redacted
{
typeset ds=$1
zfs unmount $ds
}
#
# This function calls a utility that prints out the ranges where a file
# and its redacted counterpart differ, each range on a new line like this:
#
# 0,131072
# 1966080,131072
# 3932160,131072
#
# The output is then checked against a variable containing the expected
# output to verify the redacted ranges are the ones expected.
#
function compare_files
{
typeset sendfs=$1
typeset recvfs=$2
typeset file=$3
typeset expected="$4"
typeset tmpfile="$tmpdir/get_file.out"
log_must mount_redacted -f $recvfs
typeset file1="$(get_prop mountpoint $sendfs)/$file"
typeset file2="$(get_prop mountpoint $recvfs)/$file"
log_note "Comparing $file1 and $file2"
[[ -f $file1 ]] || log_fail "File $file1 does not exist."
[[ -f $file2 ]] || log_fail "File $file2 does not exist."
log_must eval "get_diff $file1 $file2 >$tmpfile"
typeset range="$(cat $tmpfile)"
log_must unmount_redacted $recvfs
[[ "$expected" = "$range" ]] || log_fail "Unexpected range: $range"
}
function redacted_cleanup
{
typeset ds_list=$@
typeset ds
# Verify the receiving pool can still be exported and imported.
log_must zpool export $POOL2
log_must zpool import $POOL2
for ds in $ds_list; do
datasetexists $ds && log_must zfs destroy -R $ds
done
log_must set_tunable32 zfs_allow_redacted_dataset_mount 0
rm -f $(get_prop mountpoint $POOL)/tmp/*
}
# Retrieve the redaction list of a bookmark or snapshot, using
# the property or zdb output, as requested.
function get_guid_list
{
typeset filename=$1
typeset dataset=$2
typeset use_zdb=${3:-false}
if $use_zdb; then
guid_list=$(zdb -vvvv $dataset | sed -e 's/,//g' \
-ne 's/^.*Snapshots: \[\(.*\)\]/\1/p')
else
guid_list=$(get_prop redact_snaps $dataset)
fi
for guid in $(echo $guid_list | tr ',' ' '); do
echo $guid
done | sort >$filename
}

View File

@ -0,0 +1,71 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify that compressed send streams are redacted correctly.
#
# Strategy:
# 1. Receive a redacted compressed send stream, verifying compression and
# redaction.
# 2. Receive an incremental on the full receive, verifying compression and
# redaction.
#
typeset ds_name="compressed"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name "-o compress=lz4"
typeset send_mnt="$(get_prop mountpoint $sendfs)"
typeset clone_mnt="$(get_prop mountpoint $clone)"
log_onexit redacted_cleanup $sendfs $recvfs
log_must stride_dd -i /dev/urandom -o $clone_mnt/f1 -b $((128 * 1024)) -c 4 -s 2
log_must zfs snapshot $clone@snap1
log_must rm $clone_mnt/f2
log_must zfs snapshot $clone@snap2
log_must zfs redact $sendfs@snap book1 $clone@snap1 $clone@snap2
log_must eval "zfs send -c --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
log_must stream_has_features $stream compressed lz4 redacted
compare_files $sendfs $recvfs "f1" "$RANGE4"
verify_stream_size $stream $sendfs
log_must mount_redacted -f $recvfs
verify_stream_size $stream $recvfs
log_must unmount_redacted $recvfs
log_must eval "zfs send -c -i $sendfs@snap $clone@snap1 >$stream"
log_must eval "zfs recv $POOL2/inc1 <$stream"
log_must stream_has_features $stream compressed lz4
typeset mntpnt=$(get_prop mountpoint $POOL2)
log_must diff $clone_mnt/f1 $mntpnt/inc1/f1
log_must diff $send_mnt/f2 $mntpnt/inc1/f2
log_must eval "zfs send -c -i $sendfs@snap $clone@snap2 >$stream"
log_must eval "zfs recv $POOL2/inc2 <$stream"
log_must stream_has_features $stream compressed lz4
log_must diff $clone_mnt/f1 $mntpnt/inc1/f1
[[ -f $mntpnt/inc2/f2 ]] && log_fail "File f2 should not exist."
log_pass "Compressed send streams are redacted correctly."

View File

@ -0,0 +1,162 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify redaction works as expected for various scenarios.
#
# Strategy:
# 1. An unmodified file does not get redacted at all.
# 2. Empty redaction list redacts everything.
# 3. A file removed in the clone redacts the whole file.
# 4. A file moved in the clone does not redact the file.
# 5. A copied, then removed file in the clone redacts the whole file.
# 6. Overwriting a file with identical contents redacts the file.
# 7. A paritally modified block redacts the entire block.
# 8. Only overlapping areas of modified ranges are redacted.
# 9. Send from the root dataset of a pool work correctly.
#
typeset ds_name="contents"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name ''
typeset clone_mnt="$(get_prop mountpoint $clone)"
typeset send_mnt="$(get_prop mountpoint $sendfs)"
typeset recv_mnt="/$POOL2/$ds_name"
log_onexit redacted_cleanup $sendfs $recvfs
# An unmodified file does not get redacted at all.
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book1 $clone@snap1
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
log_must mount_redacted -f $recvfs
log_must diff $send_mnt/f1 $recv_mnt/f1
log_must diff $send_mnt/f2 $recv_mnt/f2
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Removing a file in the clone redacts the entire file.
log_must rm "$clone_mnt/f1"
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book3 $clone@snap1
log_must eval "zfs send --redact book3 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f1" "$RANGE0"
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Moving a file in the clone does not redact the file.
log_must mv "$clone_mnt/f1" "$clone_mnt/f1.moved"
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book4 $clone@snap1
log_must eval "zfs send --redact book4 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
log_must mount_redacted -f $recvfs
[[ -f $recv_mnt/f1.moved ]] && log_fail "Found moved file in redacted receive."
log_must diff $send_mnt/f1 $recv_mnt/f1
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Copying, then removing a file in the clone does redact the file.
log_must cp "$clone_mnt/f1" "$clone_mnt/f1.copied"
log_must rm "$clone_mnt/f1"
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book5 $clone@snap1
log_must eval "zfs send --redact book5 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f1" "$RANGE0"
log_must mount_redacted -f $recvfs
[[ -f $recv_mnt/f1.copied ]] && log_fail "Found moved file in redacted receive."
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Overwriting the contents of a block with identical contents redacts the file.
log_must cp "$clone_mnt/f1" "$clone_mnt/f1.copied"
log_must cp "$clone_mnt/f1.copied" "$clone_mnt/f1"
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book6 $clone@snap1
log_must eval "zfs send --redact book6 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f1" "$RANGE0"
log_must mount_redacted -f $recvfs
[[ -f $recv_mnt/f1.copied ]] && log_fail "Found moved file in redacted receive."
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Modifying some of a block redacts the whole block.
log_must dd if=/dev/urandom of=$clone_mnt/f1 conv=notrunc seek=2 count=1 bs=32k
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book7 $clone@snap1
log_must eval "zfs send --redact book7 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f1" "$RANGE1"
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Only overlapping areas of modified ranges are redacted.
log_must dd if=/dev/urandom of=$clone_mnt/f2 bs=1024k count=3 conv=notrunc
log_must zfs snapshot $clone@snap1
log_must zfs clone $sendfs@snap $clone/new
typeset mntpnt="$(get_prop mountpoint $clone/new)"
log_must dd if=/dev/urandom of=$mntpnt/f2 bs=1024k seek=1 count=3 \
conv=notrunc
log_must zfs snapshot $clone/new@snap
log_must zfs redact $sendfs@snap book8 $clone@snap1 $clone/new@snap
log_must eval "zfs send --redact book8 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f2" "$RANGE2"
log_must zfs destroy -R $clone/new
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# FizzBuzz version
log_must zfs clone $sendfs@snap $POOL/stride3
mntpnt="$(get_prop mountpoint $POOL/stride3)"
log_must stride_dd -i /dev/urandom -o $mntpnt/f2 -b $((128 * 1024)) -c 11 -s 3
log_must zfs snapshot $POOL/stride3@snap
log_must zfs clone $sendfs@snap $POOL/stride5
mntpnt="$(get_prop mountpoint $POOL/stride5)"
log_must stride_dd -i /dev/urandom -o $mntpnt/f2 -b $((128 * 1024)) -c 7 -s 5
log_must zfs snapshot $POOL/stride5@snap
log_must zfs redact $sendfs@snap book8a $POOL/stride3@snap $POOL/stride5@snap
log_must eval "zfs send --redact book8a $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f2" "$RANGE3"
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Send from the root dataset of a pool work correctly.
log_must dd if=/dev/urandom of=/$POOL/f1 bs=128k count=4
log_must zfs snapshot $POOL@snap
log_must zfs clone $POOL@snap $POOL/clone
log_must dd if=/dev/urandom of=/$POOL/clone/f1 bs=128k count=1 conv=notrunc
log_must zfs snapshot $POOL/clone@snap
log_must zfs redact $POOL@snap book9 $POOL/clone@snap
log_must eval "zfs send --redact book9 $POOL@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $POOL $recvfs "f1" "$RANGE1"
log_must zfs destroy -R $POOL@snap
log_pass "Redaction works as expected for various scenarios."

View File

@ -0,0 +1,103 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2017, 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify redaction works as expected with respect to deleted files
#
# Strategy:
# 1. A file on the delete queue counts as deleted when using it to calculate
# redaction.
# 2. A file that is removed in the tosnap of an incremental, where the fromsnap
# is a redaction bookmark that contains references to that file, does not
# result in records for that file.
#
typeset ds_name="deleted"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset clone2="$POOL/${ds_name}_clone2"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name ''
typeset clone_mnt="$(get_prop mountpoint $clone)"
typeset send_mnt="$(get_prop mountpoint $sendfs)"
typeset recv_mnt="/$POOL2/$ds_name"
log_onexit redacted_cleanup $sendfs $recvfs
#
# A file on the delete queue counts as deleted when using it to calculate
# redaction.
#
#
# Open file descriptor 5 for appending to $clone_mnt/f1 so that it will go on
# the delete queue when we rm it.
#
exec 5>>$clone_mnt/f1
log_must dd if=/dev/urandom of=$clone_mnt/f1 bs=512 count=1 conv=notrunc
log_must rm $clone_mnt/f1
log_must zfs snapshot $clone@snap1
# Close file descriptor 5
exec 5>&-
log_must zfs redact $sendfs@snap book1 $clone@snap1
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
log_must mount_redacted -f $recvfs
#
# We have temporarily disabled redaction blkptrs, so this will not
# fail as was originally intended. We should uncomment this line
# when we reenable redaction blkptrs.
#
#log_mustnot dd if=$recv_mnt/f1 of=/dev/null bs=512 count=1
log_must diff $send_mnt/f2 $recv_mnt/f2
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
#
# A file that is removed in the tosnap of an incremental, where the fromsnap
# is a redaction bookmark that contains references to that file, does not
# result in records for that file.
#
log_must zfs clone $sendfs@snap $clone2
typeset clone2_mnt="$(get_prop mountpoint $clone2)"
log_must rm -rf $clone2_mnt/*
log_must zfs snapshot $clone2@snap
log_must zfs redact $sendfs@snap book2 $clone2@snap
log_must zfs destroy -R $clone2
log_must eval "zfs send --redact book2 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
log_must rm $send_mnt/f1
log_must zfs snapshot $sendfs@snap2
log_must zfs clone $sendfs@snap2 $clone2
typeset clone2_mnt="$(get_prop mountpoint $clone2)"
log_must rm $clone2_mnt/*
log_must zfs snapshot $clone2@snap
log_must zfs redact $sendfs@snap2 book3 $clone2@snap
log_must zfs destroy -R $clone2
log_must eval "zfs send -i $sendfs#book2 --redact book3 $sendfs@snap2 >$stream"
log_must eval "zfs recv $recvfs <$stream"
log_must mount_redacted -f $recvfs
log_must diff <(ls $send_mnt) <(ls $recv_mnt)
log_must zfs destroy -R $recvfs
log_must zfs rollback -R $sendfs@snap
log_pass "Verify Redaction works as expected with respect to deleted files."

View File

@ -0,0 +1,71 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify the functionality of the redaction_bookmarks and redacted_datasets
# features.
#
# Strategy:
# 1. Create a pool with all features disabled.
# 2. Verify redacted send fails.
# 3. Enable redaction_bookmarks and verify redacted sends works.
# 4. Verify recepit of a redacted stream fails.
# 5. Enable recacted_datasets and verify zfs receive works.
#
typeset ds_name="disabled"
typeset sendfs="$POOL/$ds_name"
typeset sendfs1="$POOL2/${ds_name}1"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset clone1="$POOL2/${ds_name}_clone1"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name ''
function cleanup
{
destroy_pool $POOL2
create_pool $POOL2 $DISK2
log_must zfs snapshot $POOL2@init
redacted_cleanup $sendfs $recvfs
}
log_onexit cleanup
destroy_pool $POOL2
log_must zpool create -d $POOL2 $DISK2
log_must zfs create $sendfs1
log_must zfs snapshot $sendfs1@snap
log_must zfs clone $sendfs1@snap $clone1
log_must zfs snapshot $clone1@snap
log_mustnot zfs redact $sendfs1@snap book1 $clone1@snap
log_must zpool set feature@redaction_bookmarks=enabled $POOL2
log_must zfs redact $sendfs1@snap book1 $clone1@snap
log_must zfs redact $sendfs@snap book1 $clone@snap
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_mustnot eval "zfs recv $recvfs <$stream"
log_must zpool set feature@redacted_datasets=enabled $POOL2
log_must eval "zfs recv $recvfs <$stream"
log_pass "The redacted send/recv features work correctly."

View File

@ -0,0 +1,103 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify embedded blocks and redacted send work correctly together.
#
# Strategy:
# 1. Create recsize sized files with embedded blocks from size 512b to 16k.
# 2. Receive a redacted send stream with nothing redacted.
# 3. Verify the received files match the source, contain embedded blocks, and
# that the stream has the redacted and embedded data features.
# 4. Receive a redacted send stream with files 512, 2048 and 8192 redacted.
# 5. Verify that the redacted files no longer match, but the others still
# contain embedded blocks and the stream has the redacted and embedded
# data features.
#
typeset ds_name="embedded"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name '-o compress=lz4' setup_embedded
typeset clone_mnt="$(get_prop mountpoint $clone)"
typeset send_mnt="$(get_prop mountpoint $sendfs)"
typeset recv_mnt="/$POOL2/$ds_name"
typeset recsize send_obj recv_obj
log_onexit redacted_cleanup $sendfs $recvfs
log_must zfs redact $sendfs@snap book1 $clone@snap
log_must eval "zfs send -e --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
log_must stream_has_features $stream redacted embed_data
log_must mount_redacted -f $recvfs
for recsize in 512 1024 2048 4096 8192 16384; do
send_obj=$(get_objnum $send_mnt/$recsize)
recv_obj=$(get_objnum $recv_mnt/$recsize)
log_must diff $send_mnt/$recsize $recv_mnt/$recsize
log_must eval "zdb -ddddd $sendfs $send_obj >$tmpdir/send.zdb"
log_must eval "zdb -ddddd $recvfs $recv_obj >$tmpdir/recv.zdb"
grep -q "EMBEDDED" $tmpdir/send.zdb || \
log_fail "Obj $send_obj not embedded in $sendfs"
grep -q "EMBEDDED" $tmpdir/recv.zdb || \
log_fail "Obj $recv_obj not embedded in $recvfs"
cat $stream | zstreamdump -v | log_must grep -q \
"WRITE_EMBEDDED object = $send_obj offset = 0"
done
log_must zfs destroy -R $recvfs
for recsize in 512 2048 8192; do
log_must dd if=/dev/urandom of=$clone_mnt/$recsize bs=$recsize count=1
done
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book2 $clone@snap1
log_must eval "zfs send -e --redact book2 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
log_must stream_has_features $stream redacted embed_data
log_must mount_redacted -f $recvfs
for recsize in 512 2048 8192; do
log_mustnot diff $send_mnt/$recsize $recv_mnt/$recsize
done
for recsize in 1024 4096 16384; do
send_obj=$(get_objnum $send_mnt/$recsize)
recv_obj=$(get_objnum $recv_mnt/$recsize)
log_must diff $send_mnt/$recsize $recv_mnt/$recsize
log_must eval "zdb -ddddd $sendfs $send_obj >$tmpdir/send.zdb"
log_must eval "zdb -ddddd $recvfs $recv_obj >$tmpdir/recv.zdb"
grep -q "EMBEDDED" $tmpdir/send.zdb || \
log_fail "Obj $send_obj not embedded in $sendfs"
grep -q "EMBEDDED" $tmpdir/recv.zdb || \
log_fail "Obj $recv_obj not embedded in $recvfs"
cat $stream | zstreamdump -v | log_must grep -q \
"WRITE_EMBEDDED object = $send_obj offset = 0"
done
log_pass "Embedded blocks and redacted send work correctly together."

View File

@ -0,0 +1,120 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify redacted send streams reliably handle holes.
#
# Strategy:
# 1. Holes written at the beginning and end of a non-sparse file in the
# redacted list are correctly redacted.
# 2. Holes written throughout a non-sparse file in the redacted list are
# correctly redacted.
# 3. Data written into a hole in a sparse file in the redacted list are
# correctly redacted.
# 4. Holes in metadata blocks.
#
typeset ds_name="holes"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name '' setup_holes
typeset clone_mnt="$(get_prop mountpoint $clone)"
typeset send_mnt="$(get_prop mountpoint $sendfs)"
typeset recv_mnt="/$POOL2/$ds_name"
typeset M=$((1024 * 1024))
log_onexit redacted_cleanup $sendfs $recvfs
# Write holes at the start and end of a non-sparse file.
if is_linux; then
log_must dd if=/dev/zero of=$clone_mnt/f1 bs=1M count=1 conv=notrunc
log_must dd if=/dev/zero of=$clone_mnt/f1 bs=1M count=1 conv=notrunc seek=7
else
log_must mkholes -h 0:$M -h $((7 * M)):$M $clone_mnt/f1
fi
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book1 $clone@snap1
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f1" "$RANGE5"
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Write two overlapping sets of holes into the same non-sparse file.
log_must stride_dd -i /dev/zero -o $clone_mnt/f1 -b $((128 * 1024)) -c 8 -s 2 -k 3
log_must stride_dd -i /dev/zero -o $clone_mnt/f1 -b $((256 * 1024)) -c 8 -s 2 -k 6
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book2 $clone@snap1
log_must eval "zfs send --redact book2 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f1" "$RANGE6"
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Write data into the middle of a hole.
if is_linux; then
log_must dd if=/dev/urandom of=$clone_mnt/f2 bs=1M count=2 seek=3 \
conv=notrunc
else
log_must mkholes -d $((3 * M)):$((2 * M)) $clone_mnt/f2
fi
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book3 $clone@snap1
log_must eval "zfs send --redact book3 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f2" "$RANGE14"
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Remove a file with holes.
log_must rm $clone_mnt/f3
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book4 $clone@snap1
log_must eval "zfs send --redact book4 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f3" "$RANGE7"
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
# Create a hole in a L0 metadata block by removing files.
log_must rm $send_mnt/manyrm_clone/f{32..96}
log_must zfs snapshot $sendfs/manyrm_clone@snap1
log_must zfs redact $sendfs/manyrm@snap book6 $sendfs/manyrm_clone@snap1
log_must eval "zfs send --redact book6 $sendfs/manyrm@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
log_must mount_redacted -f $recvfs
for i in {1..31} {97..256}; do
diff $send_mnt/manyrm/f$i $recv_mnt/f$i || log_fail \
"File f$i did not match in the send and recv datasets."
done
for i in {32..96}; do
file_size=$(stat -c %s $send_mnt/manyrm/f$i)
redacted_size=$(stat -c %s $recv_mnt/f$i)
[[ $file_size -eq $redacted_size ]] || log_fail \
"File f$i has size $file_size and redacted size $redacted_size"
done
log_must zfs rollback -R $clone@snap
log_must zfs destroy -R $recvfs
log_pass "Redacted send streams reliably handle holes."

View File

@ -0,0 +1,152 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify that incrementals (redacted and normal) work with redacted datasets.
#
# Strategy:
# 1. Test normal incrementals from the original snap to a subset of the
# redaction list.
# 2. Test receipt of intermediate clones, and their children.
# 3. Test receipt with origin snap specified by '-o origin='.
# 4. Test incrementals from redaction bookmarks.
#
typeset ds_name="incrementals"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name '' setup_incrementals
typeset clone_mnt="$(get_prop mountpoint $clone)"
typeset send_mnt="$(get_prop mountpoint $sendfs)"
typeset recv_mnt="/$POOL2/$ds_name"
log_onexit redacted_cleanup $sendfs $recvfs $POOL2/rfs
# Setup a redacted send using a redaction list at varying depth.
log_must zfs redact $sendfs@snap0 book1 $POOL/rm@snap $POOL/stride3@snap \
$POOL/stride5@snap
log_must eval "zfs send --redact book1 $sendfs@snap0 >$stream"
log_must eval "zfs receive $POOL2/rfs <$stream"
# Verify receipt of normal incrementals to redaction list members.
log_must eval "zfs send -i $sendfs@snap0 $POOL/stride3@snap >$stream"
log_must eval "zfs recv $POOL2/rstride3 <$stream"
log_must diff -r /$POOL/stride3 /$POOL2/rstride3
log_must eval "zfs send -i $sendfs@snap0 $POOL/stride5@snap >$stream"
log_must eval "zfs recv $POOL2/rstride5 <$stream"
log_must diff -r /$POOL/stride5 /$POOL2/rstride5
# But not a normal child that we weren't redacted with respect to.
log_must eval "zfs send -i $sendfs@snap0 $POOL/hole@snap >$stream"
log_mustnot eval "zfs recv $POOL2/rhole@snap <$stream"
# Verify we can receive an intermediate clone redacted with respect to a
# subset of the original redaction list.
log_must zfs redact $POOL/int@snap book2 $POOL/rm@snap
log_must eval "zfs send -i $sendfs@snap0 --redact book2 $POOL/int@snap >$stream"
log_must eval "zfs recv $POOL2/rint <$stream"
compare_files $POOL/int $POOL2/rint "f1" "$RANGE0"
compare_files $POOL/int $POOL2/rint "f2" "$RANGE15"
compare_files $POOL/int $POOL2/rint "d1/f1" "$RANGE16"
log_must mount_redacted -f $POOL2/rint
# Verify we can receive grandchildren on the child.
log_must eval "zfs send -i $POOL/int@snap $POOL/rm@snap >$stream"
log_must eval "zfs receive $POOL2/rrm <$stream"
log_must diff -r /$POOL/rm /$POOL2/rrm
# But not a grandchild that the received child wasn't redacted with respect to.
log_must eval "zfs send -i $POOL/int@snap $POOL/write@snap >$stream"
log_mustnot eval "zfs recv $POOL2/rwrite<$stream"
# Verify we cannot receive an intermediate clone that isn't redacted with
# respect to a subset of the original redaction list.
log_must zfs redact $POOL/int@snap book4 $POOL/rm@snap $POOL/write@snap
log_must eval "zfs send -i $sendfs@snap0 --redact book4 $POOL/int@snap >$stream"
log_mustnot eval "zfs recv $POOL2/rint <$stream"
log_must zfs redact $POOL/int@snap book5 $POOL/write@snap
log_must eval "zfs send -i $sendfs@snap0 --redact book5 $POOL/int@snap >$stream"
log_mustnot eval "zfs recv $POOL2/rint <$stream"
log_mustnot zfs redact $POOL/int@snap book6 $POOL/hole@snap
# Verify we can receive a full clone of the grandchild on the child.
log_must eval "zfs send $POOL/write@snap >$stream"
log_must eval "zfs recv -o origin=$POOL2/rint@snap $POOL2/rwrite <$stream"
log_must diff -r /$POOL/write /$POOL2/rwrite
# Along with other origins.
log_must eval "zfs recv -o origin=$POOL2/rfs@snap0 $POOL2/rwrite1 <$stream"
log_must diff -r /$POOL/write /$POOL2/rwrite1
log_must eval "zfs recv -o origin=$POOL2@init $POOL2/rwrite2 <$stream"
log_must diff -r /$POOL/write /$POOL2/rwrite2
log_must zfs destroy -R $POOL2/rwrite2
log_must zfs destroy -R $POOL2/rfs
# Write some data for tests of incremental sends from bookmarks
log_must zfs snapshot $sendfs@snap1
log_must zfs clone $sendfs@snap1 $POOL/hole1
typeset mntpnt=$(get_prop mountpoint $POOL/hole1)
log_must dd if=/dev/zero of=$mntpnt/f2 bs=128k count=16 conv=notrunc
log_must zfs snapshot $POOL/hole1@snap
log_must zfs clone $sendfs@snap1 $POOL/write1
mntpnt=$(get_prop mountpoint $POOL/write1)
log_must dd if=/dev/urandom of=$mntpnt/f2 bs=128k count=16 conv=notrunc
log_must zfs snapshot $POOL/write1@snap
log_must zfs clone $POOL/int@snap $POOL/write2
mntpnt=$(get_prop mountpoint $POOL/write2)
log_must dd if=/dev/urandom of=$mntpnt/f2 bs=128k count=16 conv=notrunc
log_must zfs snapshot $POOL/write2@snap
# Setup a redacted send using a redaction list at varying depth.
log_must zfs redact $sendfs@snap0 book7 $POOL/rm@snap $POOL/stride3@snap \
$POOL/stride5@snap
log_must eval "zfs send --redact book7 $sendfs@snap0 >$stream"
log_must eval "zfs receive $POOL2/rfs <$stream"
# Verify we can receive a redacted incremental sending from the bookmark.
log_must zfs redact $sendfs@snap1 book8 $POOL/write1@snap
log_must eval "zfs send -i $sendfs#book7 --redact book8 $sendfs@snap1 >$stream"
log_must eval "zfs receive $POOL2/rfs <$stream"
# The stride3 and stride5 snaps redact 3 128k blocks at block offsets 0 15 and
# 30 of f2. The write1 snap only covers the first two of those three blocks.
compare_files $sendfs $POOL2/rfs "f2" "$RANGE12"
log_must mount_redacted -f $POOL2/rfs
log_must diff $send_mnt/f1 /$POOL2/rfs/f1
log_must diff $send_mnt/d1/f1 /$POOL2/rfs/d1/f1
unmount_redacted $POOL2/rfs
# Verify we can receive a normal child we weren't redacted with respect to by
# sending from the bookmark.
log_must eval "zfs send -i $sendfs#book7 $POOL/hole1@snap >$stream"
log_must eval "zfs recv $POOL2/rhole1 <$stream"
log_must diff -r /$POOL/hole1 /$POOL2/rhole1
# Verify we can receive an intermediate clone redacted with respect to a
# non-subset if we send from the bookmark.
log_must zfs redact $POOL/int@snap book9 $POOL/write2@snap
log_must eval "zfs send -i $sendfs#book7 --redact book9 $POOL/int@snap >$stream"
log_must eval "zfs receive $POOL2/rint <$stream"
compare_files $sendfs $POOL2/rint "f2" "$RANGE12"
log_pass "Incrementals (redacted and normal) work with redacted datasets."

View File

@ -0,0 +1,63 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify large blocks and redacted send work correctly together.
#
# Strategy:
# 1. Create a dataset and clone with a 1m recordsize, modifying a few k
# within the first 1m of a 16m file.
# 2. Verify that the whole first 1m of the file is redacted.
# 3. Receive an incremental stream from the original snap to the snap it
# was redacted with respect to.
# 4. Verify that the received dataset matches the clone
#
typeset ds_name="largeblocks"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name '-o recsize=1m'
typeset clone_mnt="$(get_prop mountpoint $clone)"
typeset send_mnt="$(get_prop mountpoint $sendfs)"
typeset recv_mnt="/$POOL2/$ds_name"
log_onexit redacted_cleanup $sendfs $recvfs
log_must dd if=/dev/urandom of=$clone_mnt/f1 bs=32k count=3 seek=8 conv=notrunc
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book1 $clone@snap1
log_must eval "zfs send -L --redact book1 $sendfs@snap >$stream"
log_must stream_has_features $stream redacted large_blocks
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f1" "$RANGE11"
log_must mount_redacted -f $recvfs
log_must diff $send_mnt/f2 $recv_mnt/f2
unmount_redacted $recvfs
log_must eval "zfs send -L -i $sendfs@snap $clone@snap1 >$stream"
log_must stream_has_features $stream large_blocks
log_must eval "zfs recv $recvfs/new <$stream"
log_must diff -r $clone_mnt $recv_mnt/new
log_pass "Large blocks and redacted send work correctly together."

View File

@ -0,0 +1,68 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify redacted send can deal with a large redaction list.
#
# Strategy:
# 1. Create 64 clones of sendfs each of which modifies two blocks in a file.
# The first modification is at an offset unique to each clone, and the
# second (the last block in the file) is common to them all.
# 2. Verify a redacted stream with a reasonable redaction list length can
# be correctly processed.
# 3. Verify that if the list is too long, the send fails gracefully.
#
typeset ds_name="many_clones"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name ''
typeset clone_mnt="$(get_prop mountpoint $clone)"
typeset send_mnt="$(get_prop mountpoint $sendfs)"
typeset recv_mnt="/$POOL2/$ds_name"
typeset redaction_list=''
typeset mntpnt
log_onexit redacted_cleanup $sendfs $recvfs
# Fill in both the last block, and a different block in every clone.
for i in {1..64}; do
log_must zfs clone $sendfs@snap ${clone}$i
mntpnt=$(get_prop mountpoint ${clone}$i)
log_must dd if=/dev/urandom of=$mntpnt/f2 bs=64k count=1 seek=$i \
conv=notrunc
log_must dd if=/dev/urandom of=$mntpnt/f2 bs=64k count=1 seek=63 \
conv=notrunc
log_must zfs snapshot ${clone}$i@snap
done
# The limit isn't necessarily 32 snapshots. The maximum number of snapshots in
# the redacted list is determined in dsl_bookmark_create_redacted_check().
log_must zfs redact $sendfs@snap book1 $clone{1..32}@snap
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
compare_files $sendfs $recvfs "f2" "$RANGE8"
log_mustnot zfs redact $sendfs@snap book2 $clone{1..64}@snap
log_pass "Redacted send can deal with a large redaction list."

View File

@ -0,0 +1,77 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify redacted send works with datasets of different sizes.
#
# Strategy:
# 1. Create two dataset one with recsize 512, and one 1m and create a 2m file.
# 2. For each dataset, create clones of both 512 and 1m recsize and modify
# the first 16k of the file.
# 3. Send each original dataset, redacted with respect to each of the clones
# into both a dataset inheriting a 512 recsize and a 1m one.
# 4. Verify that the smallest unit of redaction is that of the origin fs.
#
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
typeset mntpnt
log_onexit redacted_cleanup $POOL/512 $POOL/1m $POOL2/512 $POOL2/1m
# Set up the datasets we'll send and redact from.
log_must zfs create -o recsize=512 $POOL/512
mntpnt=$(get_prop mountpoint $POOL/512)
log_must dd if=/dev/urandom of=$mntpnt/f1 bs=1024k count=2
log_must zfs snapshot $POOL/512@snap
log_must zfs clone -o recsize=1m $POOL/512@snap $POOL/1mclone
mntpnt=$(get_prop mountpoint $POOL/1mclone)
log_must dd if=/dev/urandom of=$mntpnt/f1 bs=512 count=32 conv=notrunc
log_must zfs snapshot $POOL/1mclone@snap
log_must zfs create -o recsize=1m $POOL/1m
mntpnt=$(get_prop mountpoint $POOL/1m)
log_must dd if=/dev/urandom of=$mntpnt/f1 bs=1024k count=2
log_must zfs snapshot $POOL/1m@snap
log_must zfs clone -o recsize=512 $POOL/1m@snap $POOL/512clone
mntpnt=$(get_prop mountpoint $POOL/512clone)
log_must dd if=/dev/urandom of=$mntpnt/f1 bs=512 count=32 conv=notrunc
log_must zfs snapshot $POOL/512clone@snap
# Create datasets that allow received datasets to inherit recordsize.
log_must zfs create -o recsize=512 $POOL2/512
log_must zfs create -o recsize=1m $POOL2/1m
# Do the sends and verify the contents.
log_must zfs redact $POOL/512@snap book1 $POOL/1mclone@snap
log_must eval "zfs send --redact book1 $POOL/512@snap>$stream"
log_must eval "zfs recv $POOL2/512/recva <$stream"
compare_files $POOL/512 $POOL2/512/recva "f1" "$RANGE13"
log_must eval "zfs recv $POOL2/1m/recvb <$stream"
compare_files $POOL/512 $POOL2/1m/recvb "f1" "$RANGE13"
log_must zfs redact $POOL/1m@snap book2 $POOL/512clone@snap
log_must eval "zfs send --redact book2 $POOL/1m@snap >$stream"
log_must eval "zfs recv $POOL2/512/recvc <$stream"
compare_files $POOL/1m $POOL2/512/recvc "f1" "$RANGE11"
log_must eval "zfs recv $POOL2/1m/recvd <$stream"
compare_files $POOL/1m $POOL2/1m/recvd "f1" "$RANGE11"
log_pass "Redaction works correctly with different recordsizes."

View File

@ -0,0 +1,109 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify that received redacted datasets are not mounted by default, but
# can still be mounted after setting zfs_allow_redacted_dataset_mount.
#
# Strategy:
# 1. Verify a received redacted stream isn't mounted by default.
# 2. Set zfs_allow_redacted_dataset_mount and verify it can't be mounted
# without the -f flag, but can with -f.
# 3. Receive a redacted volume.
# 4. Verify the device file isn't present until the kernel variable is set.
# 5. Verify the files in the send fs are also present in the recv fs.
#
typeset ds_name="mounts"
typeset sendfs="$POOL/$ds_name"
typeset sendvol="$sendfs/vol"
typeset recvfs="$POOL2/$ds_name"
typeset recvvol="$POOL2/vol"
typeset clone="$POOL/${ds_name}_clone"
typeset clonevol="${sendvol}_clone"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name '' setup_mounts
typeset clone_mnt="$(get_prop mountpoint $clone)"
typeset send_mnt="$(get_prop mountpoint $sendfs)"
typeset recv_mnt="/$POOL2/$ds_name"
typeset recv_vol_file="/dev/zvol/$recvvol"
log_onexit redacted_cleanup $sendfs $recvfs $recvvol
log_must rm $clone_mnt/empty $clone_mnt/contents1
log_must dd if=/dev/urandom of=$clone_mnt/contents2 bs=512 count=1 conv=notrunc
log_must rm $clone_mnt/dir1/contents1
log_must rm -rf $clone_mnt/dir1/dir2
log_must dd if=/dev/urandom of=$clone_mnt/dir1/contents2 bs=512 count=1 \
conv=notrunc
log_must dd if=/dev/urandom of=$clone_mnt/dir1/empty bs=512 count=1
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendfs@snap book1 $clone@snap
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_must eval "zfs receive $recvfs <$stream"
log_mustnot ismounted $recvfs
log_mustnot mount_redacted $recvfs
log_mustnot ismounted $recvfs
log_must mount_redacted -f $recvfs
log_must ismounted $recvfs
# Verify that the send and recv fs both have the same files under their
# mountpoints by comparing find output with the name of the mountpoint
# deleted.
contents=$(log_must find $recv_mnt)
contents_orig=$(log_must find $send_mnt)
log_must diff <(echo ${contents//$recv_mnt/}) \
<(echo ${contents_orig//$send_mnt/})
log_must zfs redact $sendvol@snap book2 $clonevol@snap
log_must eval "zfs send --redact book2 $sendvol@snap >$stream"
log_must eval "zfs receive $recvvol <$stream"
[[ -b $recv_vol_file ]] && log_fail "Volume device file should not exist."
log_must set_tunable32 zfs_allow_redacted_dataset_mount 1
log_must zpool export $POOL2
log_must zpool import $POOL2
udevadm settle
# The device file isn't guaranteed to show up right away.
if [[ ! -b $recv_vol_file ]]; then
udevadm settle
for t in 10 5 3 2 1; do
log_note "Polling $t seconds for device file."
udevadm settle
sleep $t
[[ -b $recv_vol_file ]] && break
done
fi
[[ -b $recv_vol_file ]] || log_fail "Volume device file should exist."
log_must dd if=/dev/urandom of=$send_mnt/dir1/contents1 bs=512 count=2
log_must rm $send_mnt/dir1/dir2/empty
log_must zfs snapshot $sendfs@snap2
log_must eval "zfs send -i $sendfs#book1 $sendfs@snap2 >$stream"
log_must eval "zfs receive $recvfs <$stream"
log_must mount_redacted -f $recvfs
log_must ismounted $recvfs
contents=$(log_must find $recv_mnt)
contents_orig=$(log_must find $send_mnt)
log_must diff <(echo ${contents//$recv_mnt/}) \
<(echo ${contents_orig//$send_mnt/})
log_pass "Received redacted streams can be mounted."

View File

@ -0,0 +1,80 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Test that redacted send correctly detects invalid arguments.
#
typeset sendfs="$POOL2/sendfs"
typeset recvfs="$POOL2/recvfs"
typeset clone1="$POOL2/clone1"
typeset clone2="$POOL2/clone2"
typeset clone3="$POOL2/clone3"
typeset clone3="$POOL2/clone4"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
log_onexit redacted_cleanup $sendfs $recvfs $clone3
log_must zfs create $sendfs
log_must zfs snapshot $sendfs@snap1
log_must zfs snapshot $sendfs@snap2
log_must zfs snapshot $sendfs@snap3
log_must zfs clone $sendfs@snap2 $clone1
log_must zfs snapshot $clone1@snap
log_must zfs bookmark $clone1@snap $clone1#book
log_must zfs clone $sendfs@snap2 $clone2
log_must zfs snapshot $clone2@snap
# Incompatible flags
log_must zfs redact $sendfs@snap2 book $clone1@snap
log_mustnot eval "zfs send -R --redact book $sendfs@snap2 >/dev/null"
typeset arg
for arg in "$sendfs" "$clone1#book"; do
log_mustnot eval "zfs send --redact book $arg >/dev/null"
done
# Bad redaction list arguments
log_mustnot zfs redact $sendfs@snap1
log_mustnot zfs redact $sendfs@snap1 book
log_mustnot zfs redact $sendfs#book1 book4 $clone1
log_mustnot eval "zfs send --redact $sendfs#book $sendfs@snap >/dev/null"
# Redaction snapshots not a descendant of tosnap
log_mustnot zfs redact $sendfs@snap2 book $sendfs@snap2
log_must zfs redact $sendfs@snap2 book2 $clone1@snap $clone2@snap
log_must eval "zfs send --redact book2 $sendfs@snap2 >$stream"
log_must zfs redact $sendfs@snap2 book3 $clone1@snap $clone2@snap
log_must eval "zfs send -i $sendfs@snap1 --redact book3 $sendfs@snap2 \
>/dev/null"
log_mustnot zfs redact $sendfs@snap3 $sendfs@snap3 $clone1@snap
# Full redacted sends of redacted datasets are not allowed.
log_must eval "zfs recv $recvfs <$stream"
log_must zfs snapshot $recvfs@snap
log_must zfs clone $recvfs@snap $clone3
log_must zfs snapshot $clone3@snap
log_mustnot zfs redact $recvfs@snap book5 $clone3@snap
# Nor may a redacted dataset appear in the redaction list.
log_mustnot zfs redact testpool2/recvfs@snap2 book7 testpool2/recvfs@snap
log_pass "Verify that redacted send correctly detects invalid arguments."

View File

@ -0,0 +1,87 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Test that receiving sends from redaction bookmarks and redacted datasets
# works correctly in certain edge cases.
# 1. Send A(B,C,D) to pool2.
# 2. Verify send from A(B, C, D) can be received onto it.
# 3. Verify send from A(B, C) can be received onto it.
# 4. Verify send from A() can be received onto it.
# 5. Verify send from A(E) cannot be received onto it.
# 6. Verify send from redaction bookmark for A(B, C) can be received onto it.
# 7. Verify send from redaction bookmark for A() can be received onto it.
# 8. Verify send from redaction bookmark for A(E) cannot be received onto it.
#
typeset ds_name="origin"
typeset sendfs="$POOL/$ds_name"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name '' setup_incrementals
typeset dsA=$sendfs@snap0
typeset dsB=$POOL/hole@snap
typeset dsC=$POOL/rm@snap
typeset dsD=$POOL/write@snap
typeset dsE=$POOL/stride3@snap
typeset dsF=$POOL/stride5@snap
typeset targ=$POOL2/targfs@snap
log_onexit redacted_cleanup $sendfs $POOL2/rBCD $POOL2/targfs \
$POOL2/rBC $POOL2/rE
# Set up all the filesystems and clones.
log_must zfs redact $dsA BCD $dsB $dsC $dsD
log_must eval "zfs send --redact BCD $dsA >$stream"
log_must eval "zfs receive $POOL2/rBCD <$stream"
log_must eval "zfs receive $targ <$stream"
log_must zfs redact $dsA BC $dsB $dsC
log_must eval "zfs send --redact BC $dsA >$stream"
log_must eval "zfs receive $POOL2/rBC <$stream"
log_must zfs redact $dsA E $dsE
log_must eval "zfs send --redact E $dsA >$stream"
log_must eval "zfs receive $POOL2/rE <$stream"
log_must eval "zfs send $dsF >$stream"
log_must eval "zfs receive -o origin=$POOL2/rBCD@snap0 $POOL2/BCDrF <$stream"
log_must eval "zfs receive -o origin=$POOL2/rBC@snap0 $POOL2/BCrF <$stream"
log_must eval "zfs receive -o origin=$POOL2/rE@snap0 $POOL2/ErF <$stream"
# Run tests from redacted datasets.
log_must eval "zfs send -i $POOL2/rBCD@snap0 $POOL2/BCDrF@snap >$stream"
log_must eval "zfs receive -o origin=$targ $POOL2/tdBCD <$stream"
log_must eval "zfs send -i $POOL2/rBC@snap0 $POOL2/BCrF@snap >$stream"
log_must eval "zfs receive -o origin=$targ $POOL2/tdBC <$stream"
log_must eval "zfs send -i $POOL2/rE@snap0 $POOL2/ErF@snap >$stream"
log_mustnot eval "zfs receive -o origin=$targ $POOL2/tdE <$stream"
# Run tests from redaction bookmarks.
log_must eval "zfs send -i $sendfs#BC $dsF >$stream"
log_must eval "zfs receive -o origin=$targ $POOL2/tbBC <$stream"
log_must eval "zfs send -i $sendfs#E $dsF >$stream"
log_mustnot eval "zfs receive -o origin=$targ $POOL2/tbE <$stream"
log_pass "Verify sends from redacted datasets and bookmarks work correctly."

View File

@ -0,0 +1,77 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify the list of redacted snapshot guids as properties.
#
# Strategy:
# 1. Create a redacted dataset and receive it into another pool.
# 2. Verify that the redaction list in the book mark (according to zdb)
# matches the list shown in the redact_snaps property.
# 3. Verify that the received snapshot has a matching redaction list.
#
typeset ds_name="props"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name ''
typeset mntpnt
log_onexit redacted_cleanup $sendfs $recvfs
# Verify a plain dataset, snapshot or bookmark has an empty list.
log_must zfs snapshot $sendfs@empty_snapshot
log_must zfs bookmark $sendfs@empty_snapshot $sendfs#empty_bookmark
found_list=$(get_prop redact_snaps $sendfs)
[[ $found_list = "-" ]] || log_fail "Unexpected dataset list: $found_list"
found_list=$(get_prop redact_snaps $sendfs@empty_snapshot)
[[ $found_list = "-" ]] || log_fail "Unexpected snapshot list: $found_list"
found_list=$(get_prop redact_snaps $sendfs#empty_bookmark)
[[ $found_list = "-" ]] || log_fail "Unexpected bookmark list: $found_list"
# Fill in a different block in every clone.
for i in {1..16}; do
log_must zfs clone $sendfs@snap ${clone}$i
mntpnt=$(get_prop mountpoint ${clone}$i)
log_must dd if=/dev/urandom of=$mntpnt/f2 bs=64k count=1 seek=$i \
conv=notrunc
log_must zfs snapshot ${clone}$i@snap
done
log_must zfs redact $sendfs@snap book1 $clone{1..16}@snap
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
log_must eval "zfs recv $recvfs <$stream"
get_guid_list $tmpdir/prop_list $sendfs#book1
get_guid_list $tmpdir/zdb_list $sendfs#book1 true
get_guid_list $tmpdir/recvd_prop_list $recvfs@snap
count=$(wc -l $tmpdir/prop_list | awk '{print $1}')
[[ $count -eq 16 ]] || log_fail "Found incorrect number of redaction snapshots."
diff $tmpdir/prop_list $tmpdir/zdb_list || \
log_fail "Property list differed from zdb output"
diff $tmpdir/prop_list $tmpdir/recvd_prop_list || \
log_fail "Received property list differed from sent"
log_pass "The redaction list is consistent between sent and received datasets."

View File

@ -0,0 +1,87 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify that resumable send works correctly with redacted streams.
#
# Strategy:
# 1. Do a full redacted resumable send.
# 2. Verify the received contents are correct.
# 3. Do an incremental redacted resumable send.
# 4. Verify the received contents are correct.
# 5. Verify that recv -A removes a partially received dataset.
#
typeset ds_name="resume"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset clone1="$POOL/${ds_name}_clone1"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
setup_dataset $ds_name ''
typeset clone_mnt="$(get_prop mountpoint $clone)"
typeset send_mnt="$(get_prop mountpoint $sendfs)"
typeset recv_mnt="/$POOL2/$ds_name"
log_onexit redacted_cleanup $sendfs $recvfs
log_must stride_dd -i /dev/urandom -o $clone_mnt/f2 -b 512 -c 64 -s 512
log_must zfs snapshot $clone@snap1
# Do the full resumable send
log_must zfs redact $sendfs@snap book1 $clone@snap1
resume_test "zfs send --redact book1 $sendfs@snap" $tmpdir $recvfs
log_must mount_redacted -f $recvfs
log_must set_tunable32 zfs_allow_redacted_dataset_mount 1
log_must diff $send_mnt/f1 $recv_mnt/f1
log_must eval "get_diff $send_mnt/f2 $recv_mnt/f2 >$tmpdir/get_diff.out"
typeset range=$(cat $tmpdir/get_diff.out)
[[ "$RANGE9" = "$range" ]] || log_fail "Unexpected range: $range"
log_must dd if=/dev/urandom of=$send_mnt/f3 bs=1024k count=3
log_must zfs snapshot $sendfs@snap2
log_must zfs clone $sendfs@snap2 $clone1
typeset clone1_mnt="$(get_prop mountpoint $clone1)"
log_must dd if=/dev/urandom of=$clone1_mnt/f3 bs=128k count=3 conv=notrunc
log_must zfs snapshot $clone1@snap
# Do the incremental resumable send
log_must zfs redact $sendfs@snap2 book2 $clone1@snap
resume_test "zfs send --redact book2 -i $sendfs#book1 $sendfs@snap2" \
$tmpdir $recvfs
log_must diff $send_mnt/f1 $recv_mnt/f1
log_must diff $send_mnt/f2 $recv_mnt/f2
log_must eval "get_diff $send_mnt/f3 $recv_mnt/f3 >$tmpdir/get_diff.out"
range=$(cat $tmpdir/get_diff.out)
[[ "$RANGE10" = "$range" ]] || log_fail "Unexpected range: $range"
# Test recv -A works properly
log_mustnot zfs recv -A $recvfs
log_must zfs destroy -R $recvfs
log_mustnot zfs recv -A $recvfs
log_must eval "zfs send --redact book1 $sendfs@snap >$stream"
dd if=$stream bs=64k count=1 | log_mustnot zfs receive -s $recvfs
[[ "-" = $(get_prop receive_resume_token $recvfs) ]] && \
log_fail "Receive token not found."
log_must zfs recv -A $recvfs
log_must datasetnonexists $recvfs
log_pass "Resumable send works correctly with redacted streams."

View File

@ -0,0 +1,64 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify that send size estimates of redacted sends work correctly
#
# Strategy:
# 1. Perform a redacted send with -nv and without, and verify the
# size estimate is the same as the size of the actual send.
# 2. Receive an incremental send from the redaction bookmark with
# -nv and without, and verify the size estimate is the same as
# the size of the actual send.
#
ds_name="sizes"
typeset sendfs="$POOL/$ds_name"
typeset clone="$POOL/${ds_name}_clone2"
setup_dataset $ds_name "-o compress=lz4"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset size=$(mktemp $tmpdir/size.XXXX)
typeset size2=$(mktemp $tmpdir/size.XXXX)
log_onexit redacted_cleanup $sendfs $clone
log_must zfs clone $sendfs@snap $clone
typeset clone_mnt="$(get_prop mountpoint $clone)"
log_must rm -rf $clone_mnt/*
log_must zfs snapshot $clone@snap
log_must zfs redact $sendfs@snap book $clone@snap
log_must eval "zfs send -nvP --redact book $sendfs@snap | \
grep '^size' | awk '{print \$2}' >$size"
log_must eval "zfs send --redact book $sendfs@snap | wc --bytes \
>$size2"
bytes1=$(cat $size | tr -d '[[:space:]]')
bytes2=$(cat $size2 | tr -d '[[:space:]]')
[[ "$bytes1" -eq "$bytes2" ]] || \
log_fail "Full sizes differ: estimate $bytes1 and actual $bytes2"
log_must zfs snapshot $sendfs@snap2
log_must eval "zfs send -nvP -i $sendfs#book $sendfs@snap2 | \
grep '^size' | awk '{print \$2}' >$size"
log_must eval "zfs send -i $sendfs#book $sendfs@snap2 | wc --bytes >$size2"
bytes1=$(cat $size | tr -d '[[:space:]]')
bytes2=$(cat $size2 | tr -d '[[:space:]]')
[[ "$bytes1" -eq "$bytes2" ]] || \
log_fail "Incremental sizes differ: estimate $bytes1 and actual $bytes2"
log_pass "Size estimates of redacted sends estimate accurately."

View File

@ -0,0 +1,105 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2018 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify that redacted send works on volumes.
#
# Strategy:
# 1. Write to a volume, then make a clone of that volume.
# 2. Receive a redacted stream that sends all blocks.
# 3. Receive a redacted stream that redacts the first half of the written area.
#
typeset ds_name="volume"
typeset sendvol="$POOL/$ds_name"
typeset recvvol="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset tmpdir="$(get_prop mountpoint $POOL)/tmp"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
typeset send_file="/dev/zvol/$sendvol"
typeset recv_file="/dev/zvol/$recvvol"
typeset clone_file="/dev/zvol/$clone"
log_onexit redacted_cleanup $sendvol $recvvol
log_must zfs create -b 8k -V 1g $sendvol
sleep 10
log_must zpool export $POOL
log_must zpool import $POOL
udevadm settle
if [[ ! -b $send_file ]]; then
udevadm settle
for t in 10 5 3 2 1; do
log_note "Polling $t seconds for device file."
udevadm settle
sleep $t
[[ -b $send_file ]] && break
done
fi
log_must dd if=/dev/urandom of=$send_file bs=8k count=64
log_must zfs snapshot $sendvol@snap
log_must zfs clone $sendvol@snap $clone
log_must zfs snapshot $clone@snap
log_must set_tunable32 zfs_allow_redacted_dataset_mount 1
log_must zfs redact $sendvol@snap book1 $clone@snap
log_must eval "zfs send --redact book1 $sendvol@snap >$stream"
log_must eval "zfs recv $recvvol <$stream"
sleep 10
log_must zpool export $POOL2
log_must zpool import $POOL2
udevadm settle
if [[ ! -b $recv_file ]]; then
udevadm settle
for t in 10 5 3 2 1; do
log_note "Polling $t seconds for device file."
udevadm settle
sleep $t
[[ -b $recv_file ]] && break
done
fi
log_must dd if=$send_file of=$tmpdir/send.dd bs=8k count=64
log_must dd if=$recv_file of=$tmpdir/recv.dd bs=8k count=64
log_must diff $tmpdir/send.dd $tmpdir/recv.dd
log_must zfs destroy -R $recvvol
log_must dd if=/dev/urandom of=$clone_file bs=8k count=32
log_must zfs snapshot $clone@snap1
log_must zfs redact $sendvol@snap book2 $clone@snap1
log_must eval "zfs send --redact book2 $sendvol@snap >$stream"
log_must eval "zfs recv $recvvol <$stream"
sleep 10
log_must zpool export $POOL2
log_must zpool import $POOL2
udevadm settle
if [[ ! -b $recv_file ]]; then
udevadm settle
for t in 10 5 3 2 1; do
log_note "Polling $t seconds for device file."
udevadm settle
sleep $t
[[ -b $recv_file ]] && break
done
fi
log_must dd if=$send_file of=$tmpdir/send.dd bs=8k count=32 skip=32
log_must dd if=$recv_file of=$tmpdir/recv.dd bs=8k count=32 skip=32
log_must diff $tmpdir/send.dd $tmpdir/recv.dd
log_pass "Redacted send works correctly with volumes."

Some files were not shown because too many files have changed in this diff Show More