4757 ZFS embedded-data block pointers ("zero block compression")

4913 zfs release should not be subject to space checks

Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Max Grossman <max.grossman@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Dan McDonald <danmcd@omniti.com>

illumos/illumos-dist@5d7b4d438c
This commit is contained in:
delphij 2014-06-17 07:35:54 +00:00
parent 3f19f79ed7
commit c2a66ac6e9
10 changed files with 267 additions and 56 deletions

View File

@ -1032,8 +1032,17 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
return;
}
blkbuf[0] = '\0';
if (BP_IS_EMBEDDED(bp)) {
(void) sprintf(blkbuf,
"EMBEDDED et=%u %llxL/%llxP B=%llu",
(int)BPE_GET_ETYPE(bp),
(u_longlong_t)BPE_GET_LSIZE(bp),
(u_longlong_t)BPE_GET_PSIZE(bp),
(u_longlong_t)bp->blk_birth);
return;
}
blkbuf[0] = '\0';
for (int i = 0; i < ndvas; i++)
(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf), "%llu:%llx:%llx ",
@ -1051,7 +1060,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
"%llxL/%llxP F=%llu B=%llu/%llu",
(u_longlong_t)BP_GET_LSIZE(bp),
(u_longlong_t)BP_GET_PSIZE(bp),
(u_longlong_t)bp->blk_fill,
(u_longlong_t)BP_GET_FILL(bp),
(u_longlong_t)bp->blk_birth,
(u_longlong_t)BP_PHYSICAL_BIRTH(bp));
}
@ -1064,8 +1073,10 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
char blkbuf[BP_SPRINTF_LEN];
int l;
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
if (!BP_IS_EMBEDDED(bp)) {
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
}
(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
@ -1119,10 +1130,10 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
err = visit_indirect(spa, dnp, cbp, &czb);
if (err)
break;
fill += cbp->blk_fill;
fill += BP_GET_FILL(cbp);
}
if (!err)
ASSERT3U(fill, ==, bp->blk_fill);
ASSERT3U(fill, ==, BP_GET_FILL(bp));
(void) arc_buf_remove_ref(buf, &buf);
}
@ -1789,14 +1800,14 @@ dump_dir(objset_t *os)
if (dds.dds_type == DMU_OST_META) {
dds.dds_creation_txg = TXG_INITIAL;
usedobjs = os->os_rootbp->blk_fill;
usedobjs = BP_GET_FILL(os->os_rootbp);
refdbytes = os->os_spa->spa_dsl_pool->
dp_mos_dir->dd_phys->dd_used_bytes;
} else {
dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
}
ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
zdb_nicenum(refdbytes, numbuf);
@ -2107,6 +2118,9 @@ typedef struct zdb_cb {
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
uint64_t zcb_dedup_asize;
uint64_t zcb_dedup_blocks;
uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
[BPE_PAYLOAD_SIZE];
uint64_t zcb_start;
uint64_t zcb_lastprint;
uint64_t zcb_totalasize;
@ -2161,6 +2175,13 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
}
if (BP_IS_EMBEDDED(bp)) {
zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
[BPE_GET_PSIZE(bp)]++;
return;
}
if (dump_opt['L'])
return;
@ -2258,7 +2279,8 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
if (!BP_IS_EMBEDDED(bp) &&
(dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
@ -2450,7 +2472,7 @@ dump_block_stats(spa_t *spa)
zdb_blkstats_t *zb, *tzb;
uint64_t norm_alloc, norm_space, total_alloc, total_found;
int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
int leaks = 0;
boolean_t leaks = B_FALSE;
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
@ -2538,7 +2560,7 @@ dump_block_stats(spa_t *spa)
(u_longlong_t)total_alloc,
(dump_opt['L']) ? "unreachable" : "leaked",
(longlong_t)(total_alloc - total_found));
leaks = 1;
leaks = B_TRUE;
}
if (tzb->zb_count == 0)
@ -2570,6 +2592,23 @@ dump_block_stats(spa_t *spa)
(void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
if (zcb.zcb_embedded_blocks[i] == 0)
continue;
(void) printf("\n");
(void) printf("\tadditional, non-pointer bps of type %u: "
"%10llu\n",
i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
if (dump_opt['b'] >= 3) {
(void) printf("\t number of (compressed) bytes: "
"number of bps\n");
dump_histogram(zcb.zcb_embedded_histogram[i],
sizeof (zcb.zcb_embedded_histogram[i]) /
sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
}
}
if (tzb->zb_ditto_samevdev != 0) {
(void) printf("\tDittoed blocks on same vdev: %llu\n",
(longlong_t)tzb->zb_ditto_samevdev);
@ -2682,14 +2721,14 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
avl_index_t where;
zdb_ddt_entry_t *zdde, zdde_search;
if (BP_IS_HOLE(bp))
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);
if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
(void) printf("traversing objset %llu, %llu objects, "
"%lu blocks so far\n",
(u_longlong_t)zb->zb_objset,
(u_longlong_t)bp->blk_fill,
(u_longlong_t)BP_GET_FILL(bp),
avl_numnodes(t));
}

View File

@ -256,9 +256,9 @@ get_usage(zfs_help_t idx)
case HELP_ROLLBACK:
return (gettext("\trollback [-rRf] <snapshot>\n"));
case HELP_SEND:
return (gettext("\tsend [-DnPpRv] [-[iI] snapshot] "
return (gettext("\tsend [-DnPpRve] [-[iI] snapshot] "
"<snapshot>\n"
"\tsend [-i snapshot|bookmark] "
"\tsend [-e] [-i snapshot|bookmark] "
"<filesystem|volume|snapshot>\n"));
case HELP_SET:
return (gettext("\tset <property=value> "
@ -573,6 +573,7 @@ finish_progress(char *done)
free(pt_header);
pt_header = NULL;
}
/*
* zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
*
@ -3299,6 +3300,7 @@ rollback_check_dependent(zfs_handle_t *zhp, void *data)
zfs_close(zhp);
return (0);
}
/*
* Report any snapshots more recent than the one specified. Used when '-r' is
* not specified. We reuse this same callback for the snapshot dependents - if
@ -3638,7 +3640,7 @@ zfs_do_send(int argc, char **argv)
boolean_t extraverbose = B_FALSE;
/* check options */
while ((c = getopt(argc, argv, ":i:I:RDpvnP")) != -1) {
while ((c = getopt(argc, argv, ":i:I:RDpvnPe")) != -1) {
switch (c) {
case 'i':
if (fromname)
@ -3673,6 +3675,9 @@ zfs_do_send(int argc, char **argv)
case 'n':
flags.dryrun = B_TRUE;
break;
case 'e':
flags.embed_data = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
@ -3711,6 +3716,7 @@ zfs_do_send(int argc, char **argv)
if (strchr(argv[0], '@') == NULL ||
(fromname && strchr(fromname, '#') != NULL)) {
char frombuf[ZFS_MAXNAMELEN];
enum lzc_send_flags lzc_flags = 0;
if (flags.replicate || flags.doall || flags.props ||
flags.dedup || flags.dryrun || flags.verbose ||
@ -3725,6 +3731,9 @@ zfs_do_send(int argc, char **argv)
if (zhp == NULL)
return (1);
if (flags.embed_data)
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
if (fromname != NULL &&
(fromname[0] == '#' || fromname[0] == '@')) {
/*
@ -3738,7 +3747,7 @@ zfs_do_send(int argc, char **argv)
(void) strlcat(frombuf, fromname, sizeof (frombuf));
fromname = frombuf;
}
err = zfs_send_one(zhp, fromname, STDOUT_FILENO);
err = zfs_send_one(zhp, fromname, STDOUT_FILENO, lzc_flags);
zfs_close(zhp);
return (err != 0);
}

View File

@ -49,7 +49,6 @@
*/
#define DUMP_GROUPING 4
uint64_t drr_record_count[DRR_NUMTYPES];
uint64_t total_write_size = 0;
uint64_t total_stream_len = 0;
FILE *send_stream = 0;
@ -123,7 +122,7 @@ print_block(char *buf, int length)
* Start printing ASCII characters at a constant offset, after
* the hex prints. Leave 3 characters per byte on a line (2 digit
* hex number plus 1 space) plus spaces between characters and
* groupings
* groupings.
*/
int ascii_start = BYTES_PER_LINE * 3 +
BYTES_PER_LINE / DUMP_GROUPING + 2;
@ -160,6 +159,8 @@ int
main(int argc, char *argv[])
{
char *buf = malloc(INITIAL_BUFLEN);
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
uint64_t total_records = 0;
dmu_replay_record_t thedrr;
dmu_replay_record_t *drr = &thedrr;
struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
@ -170,6 +171,7 @@ main(int argc, char *argv[])
struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
struct drr_free *drrf = &thedrr.drr_u.drr_free;
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
char c;
boolean_t verbose = B_FALSE;
boolean_t first = B_TRUE;
@ -264,6 +266,7 @@ main(int argc, char *argv[])
}
drr_record_count[drr->drr_type]++;
total_records++;
switch (drr->drr_type) {
case DRR_BEGIN:
@ -376,8 +379,8 @@ main(int argc, char *argv[])
drro->drr_bonuslen);
}
if (drro->drr_bonuslen > 0) {
(void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen,
8), &zc);
(void) ssread(buf,
P2ROUNDUP(drro->drr_bonuslen, 8), &zc);
if (dump) {
print_block(buf,
P2ROUNDUP(drro->drr_bonuslen, 8));
@ -506,6 +509,38 @@ main(int argc, char *argv[])
print_block(buf, drrs->drr_length);
}
break;
case DRR_WRITE_EMBEDDED:
if (do_byteswap) {
drrwe->drr_object =
BSWAP_64(drrwe->drr_object);
drrwe->drr_offset =
BSWAP_64(drrwe->drr_offset);
drrwe->drr_length =
BSWAP_64(drrwe->drr_length);
drrwe->drr_toguid =
BSWAP_64(drrwe->drr_toguid);
drrwe->drr_lsize =
BSWAP_32(drrwe->drr_lsize);
drrwe->drr_psize =
BSWAP_32(drrwe->drr_psize);
}
if (verbose) {
(void) printf("WRITE_EMBEDDED object = %llu "
"offset = %llu length = %llu\n"
"toguid = %llx comp = %u etype = %u "
"lsize = %u psize = %u\n",
(u_longlong_t)drrwe->drr_object,
(u_longlong_t)drrwe->drr_offset,
(u_longlong_t)drrwe->drr_length,
(u_longlong_t)drrwe->drr_toguid,
drrwe->drr_compression,
drrwe->drr_etype,
drrwe->drr_lsize,
drrwe->drr_psize);
}
(void) ssread(buf,
P2ROUNDUP(drrwe->drr_psize, 8), &zc);
break;
}
pcksum = zc;
}
@ -524,18 +559,16 @@ main(int argc, char *argv[])
(u_longlong_t)drr_record_count[DRR_FREEOBJECTS]);
(void) printf("\tTotal DRR_WRITE records = %lld\n",
(u_longlong_t)drr_record_count[DRR_WRITE]);
(void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n",
(u_longlong_t)drr_record_count[DRR_WRITE_BYREF]);
(void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n",
(u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]);
(void) printf("\tTotal DRR_FREE records = %lld\n",
(u_longlong_t)drr_record_count[DRR_FREE]);
(void) printf("\tTotal DRR_SPILL records = %lld\n",
(u_longlong_t)drr_record_count[DRR_SPILL]);
(void) printf("\tTotal records = %lld\n",
(u_longlong_t)(drr_record_count[DRR_BEGIN] +
drr_record_count[DRR_OBJECT] +
drr_record_count[DRR_FREEOBJECTS] +
drr_record_count[DRR_WRITE] +
drr_record_count[DRR_FREE] +
drr_record_count[DRR_SPILL] +
drr_record_count[DRR_END]));
(u_longlong_t)total_records);
(void) printf("\tTotal write size = %lld (0x%llx)\n",
(u_longlong_t)total_write_size, (u_longlong_t)total_write_size);
(void) printf("\tTotal stream length = %lld (0x%llx)\n",

View File

@ -52,7 +52,7 @@
* At random times, the child self-immolates with a SIGKILL.
* This is the software equivalent of pulling the power cord.
* The parent then runs the test again, using the existing
* storage pool, as many times as desired. If backwards compatability
* storage pool, as many times as desired. If backwards compatibility
* testing is enabled ztest will sometimes run the "older" version
* of ztest after a SIGKILL.
*
@ -1265,13 +1265,13 @@ static void
ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
{
ASSERT(bt->bt_magic == BT_MAGIC);
ASSERT(bt->bt_objset == dmu_objset_id(os));
ASSERT(bt->bt_object == object);
ASSERT(bt->bt_offset == offset);
ASSERT(bt->bt_gen <= gen);
ASSERT(bt->bt_txg <= txg);
ASSERT(bt->bt_crtxg == crtxg);
ASSERT3U(bt->bt_magic, ==, BT_MAGIC);
ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
ASSERT3U(bt->bt_object, ==, object);
ASSERT3U(bt->bt_offset, ==, offset);
ASSERT3U(bt->bt_gen, <=, gen);
ASSERT3U(bt->bt_txg, <=, txg);
ASSERT3U(bt->bt_crtxg, ==, crtxg);
}
static ztest_block_tag_t *
@ -3470,6 +3470,11 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
if (error)
fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
error = dsl_dataset_promote(clone2name, NULL);
if (error == ENOSPC) {
dmu_objset_disown(os, FTAG);
ztest_record_enospc(FTAG);
goto out;
}
if (error != EBUSY)
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
error);
@ -3625,11 +3630,19 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
return;
}
dmu_object_set_checksum(os, bigobj,
(enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx);
enum zio_checksum cksum;
do {
cksum = (enum zio_checksum)
ztest_random_dsl_prop(ZFS_PROP_CHECKSUM);
} while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS);
dmu_object_set_checksum(os, bigobj, cksum, tx);
dmu_object_set_compress(os, bigobj,
(enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx);
enum zio_compress comp;
do {
comp = (enum zio_compress)
ztest_random_dsl_prop(ZFS_PROP_COMPRESSION);
} while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS);
dmu_object_set_compress(os, bigobj, comp, tx);
/*
* For each index from n to n + s, verify that the existing bufwad
@ -4709,8 +4722,13 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
error = dsl_dataset_user_hold(holds, 0, NULL);
fnvlist_free(holds);
if (error)
fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
if (error == ENOSPC) {
ztest_record_enospc("dsl_dataset_user_hold");
goto out;
} else if (error) {
fatal(0, "dsl_dataset_user_hold(%s, %s) = %u",
fullname, tag, error);
}
error = dsl_destroy_snapshot(fullname, B_FALSE);
if (error != EBUSY) {
@ -5163,7 +5181,7 @@ ztest_run_zdb(char *pool)
isa = strdup(isa);
/* LINTED */
(void) sprintf(bin,
"/usr/sbin%.*s/zdb -bcc%s%s -U %s %s",
"/usr/sbin%.*s/zdb -bcc%s%s -d -U %s %s",
isalen,
isa,
ztest_opts.zo_verbose >= 3 ? "s" : "",

View File

@ -39,6 +39,7 @@
#include <sys/fs/zfs.h>
#include <sys/avl.h>
#include <ucred.h>
#include <libzfs_core.h>
#ifdef __cplusplus
extern "C" {
@ -589,13 +590,16 @@ typedef struct sendflags {
/* show progress (ie. -v) */
boolean_t progress;
/* WRITE_EMBEDDED records of type DATA are permitted */
boolean_t embed_data;
} sendflags_t;
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
extern int zfs_send(zfs_handle_t *, const char *, const char *,
sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
extern int zfs_send_one(zfs_handle_t *, const char *, int);
extern int zfs_send_one(zfs_handle_t *, const char *, int, enum lzc_send_flags);
extern int zfs_promote(zfs_handle_t *);
extern int zfs_hold(zfs_handle_t *, const char *, const char *,

View File

@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
@ -42,6 +42,7 @@
#include <time.h>
#include <libzfs.h>
#include <libzfs_core.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
@ -213,6 +214,7 @@ cksummer(void *arg)
struct drr_object *drro = &thedrr.drr_u.drr_object;
struct drr_write *drrw = &thedrr.drr_u.drr_write;
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
FILE *ofp;
int outfd;
dmu_replay_record_t wbr_drr = {0};
@ -409,6 +411,20 @@ cksummer(void *arg)
break;
}
case DRR_WRITE_EMBEDDED:
{
if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
&stream_cksum, outfd) == -1)
goto out;
(void) ssread(buf,
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
if (cksum_and_write(buf,
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
&stream_cksum, outfd) == -1)
goto out;
break;
}
case DRR_FREE:
{
if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
@ -790,7 +806,7 @@ typedef struct send_dump_data {
char prevsnap[ZFS_MAXNAMELEN];
uint64_t prevsnap_obj;
boolean_t seenfrom, seento, replicate, doall, fromorigin;
boolean_t verbose, dryrun, parsable, progress;
boolean_t verbose, dryrun, parsable, progress, embed_data;
int outfd;
boolean_t err;
nvlist_t *fss;
@ -870,7 +886,8 @@ estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
*/
static int
dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
boolean_t fromorigin, int outfd, nvlist_t *debugnv)
boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
nvlist_t *debugnv)
{
zfs_cmd_t zc = { 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
@ -884,6 +901,7 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
zc.zc_obj = fromorigin;
zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
zc.zc_fromobj = fromsnap_obj;
zc.zc_flags = flags;
VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
if (fromsnap && fromsnap[0] != '\0') {
@ -1134,8 +1152,12 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
}
}
enum lzc_send_flags flags = 0;
if (sdd->embed_data)
flags |= LZC_SEND_FLAG_EMBED_DATA;
err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
fromorigin, sdd->outfd, sdd->debugnv);
fromorigin, sdd->outfd, flags, sdd->debugnv);
if (sdd->progress) {
(void) pthread_cancel(tid);
@ -1479,6 +1501,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
sdd.parsable = flags->parsable;
sdd.progress = flags->progress;
sdd.dryrun = flags->dryrun;
sdd.embed_data = flags->embed_data;
sdd.filter_cb = filter_func;
sdd.filter_cb_arg = cb_arg;
if (debugnvp)
@ -1610,7 +1633,8 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
}
int
zfs_send_one(zfs_handle_t *zhp, const char *from, int fd)
zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
enum lzc_send_flags flags)
{
int err;
libzfs_handle_t *hdl = zhp->zfs_hdl;
@ -1619,7 +1643,7 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd)
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
"warning: cannot send '%s'"), zhp->zfs_name);
err = lzc_send(zhp->zfs_name, from, fd);
err = lzc_send(zhp->zfs_name, from, fd, flags);
if (err != 0) {
switch (errno) {
case EXDEV:
@ -2537,6 +2561,16 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
(void) recv_read(hdl, fd, buf,
drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
break;
case DRR_WRITE_EMBEDDED:
if (byteswap) {
drr->drr_u.drr_write_embedded.drr_psize =
BSWAP_32(drr->drr_u.drr_write_embedded.
drr_psize);
}
(void) recv_read(hdl, fd, buf,
P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
8), B_FALSE, NULL);
break;
case DRR_WRITE_BYREF:
case DRR_FREEOBJECTS:
case DRR_FREE:

View File

@ -439,6 +439,8 @@ lzc_get_holds(const char *snapname, nvlist_t **holdsp)
}
/*
* Generate a zfs send stream for the specified snapshot and write it to
* the specified file descriptor.
*
* "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
*
@ -452,9 +454,15 @@ lzc_get_holds(const char *snapname, nvlist_t **holdsp)
* snapshot in the origin, etc.
*
* "fd" is the file descriptor to write the send stream to.
*
* If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
* to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
* which the receiving system must support (as indicated by support
* for the "embedded_data" feature).
*/
int
lzc_send(const char *snapname, const char *from, int fd)
lzc_send(const char *snapname, const char *from, int fd,
enum lzc_send_flags flags)
{
nvlist_t *args;
int err;
@ -463,6 +471,8 @@ lzc_send(const char *snapname, const char *from, int fd)
fnvlist_add_int32(args, "fd", fd);
if (from != NULL)
fnvlist_add_string(args, "fromsnap", from);
if (flags & LZC_SEND_FLAG_EMBED_DATA)
fnvlist_add_boolean(args, "embedok");
err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
nvlist_free(args);
return (err);

View File

@ -52,7 +52,11 @@ int lzc_hold(nvlist_t *, int, nvlist_t **);
int lzc_release(nvlist_t *, nvlist_t **);
int lzc_get_holds(const char *, nvlist_t **);
int lzc_send(const char *, const char *, int);
enum lzc_send_flags {
LZC_SEND_FLAG_EMBED_DATA = 1 << 0
};
int lzc_send(const char *, const char *, int, enum lzc_send_flags);
int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
int lzc_send_space(const char *, const char *, uint64_t *);

View File

@ -176,12 +176,12 @@ zfs \- configures ZFS file systems
.LP
.nf
\fBzfs\fR \fBsend\fR [\fB-DnPpRv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
\fBzfs\fR \fBsend\fR [\fB-DnPpRve\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
.fi
.LP
.nf
\fBzfs\fR \fBsend\fR [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
\fBzfs\fR \fBsend\fR [\fB-e\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
.fi
.LP
@ -2923,7 +2923,7 @@ See \fBzpool-features\fR(5) for details on ZFS feature flags and the
.sp
.ne 2
.na
\fBzfs send\fR [\fB-DnPpRv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
\fBzfs send\fR [\fB-DnPpRve\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
.ad
.sp .6
.RS 4n
@ -2996,6 +2996,23 @@ will be much better if the filesystem uses a dedup-capable checksum (eg.
\fBsha256\fR).
.RE
.sp
.ne 2
.na
\fB\fB-e\fR\fR
.ad
.sp .6
.RS 4n
Generate a more compact stream by using WRITE_EMBEDDED records for blocks
which are stored more compactly on disk by the \fBembedded_data\fR pool
feature. This flag has no effect if the \fBembedded_data\fR feature is
disabled. The receiving system must have the \fBembedded_data\fR feature
enabled. If the \fBlz4_compress\fR feature is active on the sending system,
then the receiving system must have that feature enabled as well. See
\fBzpool-features\fR(5) for details on ZFS feature flags and the
\fBembedded_data\fR feature.
.RE
.sp
.ne 2
.na
@ -3047,7 +3064,7 @@ on future versions of \fBZFS\fR.
.sp
.ne 2
.na
\fBzfs send\fR [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
\fBzfs send\fR [\fB-e\fR] [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
.ad
.sp .6
.RS 4n
@ -3075,6 +3092,22 @@ be the origin snapshot, or an earlier snapshot in the origin's filesystem,
or the origin's origin, etc.
.RE
.sp
.ne 2
.na
\fB\fB-e\fR\fR
.ad
.sp .6
.RS 4n
Generate a more compact stream by using WRITE_EMBEDDED records for blocks
which are stored more compactly on disk by the \fBembedded_data\fR pool
feature. This flag has no effect if the \fBembedded_data\fR feature is
disabled. The receiving system must have the \fBembedded_data\fR feature
enabled. If the \fBlz4_compress\fR feature is active on the sending system,
then the receiving system must have that feature enabled as well. See
\fBzpool-features\fR(5) for details on ZFS feature flags and the
\fBembedded_data\fR feature.
.RE
.RE
.sp

View File

@ -401,6 +401,33 @@ never return to being \fBenabled\fB.
.RE
.sp
.ne 2
.na
\fB\fBembedded_data\fR\fR
.ad
.RS 4n
.TS
l l .
GUID com.delphix:embedded_data
READ\-ONLY COMPATIBLE no
DEPENDENCIES none
.TE
This feature improves the performance and compression ratio of
highly-compressible blocks. Blocks whose contents can compress to 112 bytes
or smaller can take advantage of this feature.
When this feature is enabled, the contents of highly-compressible blocks are
stored in the block "pointer" itself (a misnomer in this case, as it contains
the compresseed data, rather than a pointer to its location on disk). Thus
the space of the block (one sector, typically 512 bytes or 4KB) is saved,
and no additional i/o is needed to read and write the data block.
This feature becomes \fBactive\fR as soon as it is enabled and will
never return to being \fBenabled\fR.
.RE
.SH "SEE ALSO"
\fBzpool\fR(1M)