7252 7628 compressed zfs send / receive

illumos/illumos-gate@5602294fda
5602294fda

https://www.illumos.org/issues/7252
  This feature includes code to allow a system with compressed ARC enabled to
  send data in its compressed form straight out of the ARC, and receive data in
  its compressed form directly into the ARC.

https://www.illumos.org/issues/7628
  We should have longer, more readable versions of the ZFS send / recv options.

7628 create long versions of ZFS send / receive options

Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: John Kennedy <john.kennedy@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed by: David Quigley <dpquigl@davequigley.com>
Reviewed by: Thomas Caputi <tcaputi@datto.com>
Approved by: Dan McDonald <danmcd@omniti.com>
Author: Dan Kimmel <dan.kimmel@delphix.com>
This commit is contained in:
Andriy Gapon 2017-04-14 18:07:43 +00:00
parent 9c03f5f793
commit b3264caf7b
24 changed files with 1293 additions and 659 deletions

View File

@ -34,6 +34,7 @@
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <getopt.h>
#include <libgen.h>
#include <libintl.h>
#include <libuutil.h>
@ -262,7 +263,7 @@ get_usage(zfs_help_t idx)
case HELP_ROLLBACK:
return (gettext("\trollback [-rRf] <snapshot>\n"));
case HELP_SEND:
return (gettext("\tsend [-DnPpRvLe] [-[iI] snapshot] "
return (gettext("\tsend [-DnPpRvLec] [-[iI] snapshot] "
"<snapshot>\n"
"\tsend [-Le] [-i snapshot|bookmark] "
"<filesystem|volume|snapshot>\n"
@ -3704,8 +3705,23 @@ zfs_do_send(int argc, char **argv)
nvlist_t *dbgnv = NULL;
boolean_t extraverbose = B_FALSE;
struct option long_options[] = {
{"replicate", no_argument, NULL, 'R'},
{"props", no_argument, NULL, 'p'},
{"parsable", no_argument, NULL, 'P'},
{"dedup", no_argument, NULL, 'D'},
{"verbose", no_argument, NULL, 'v'},
{"dryrun", no_argument, NULL, 'n'},
{"large-block", no_argument, NULL, 'L'},
{"embed", no_argument, NULL, 'e'},
{"resume", required_argument, NULL, 't'},
{"compressed", no_argument, NULL, 'c'},
{0, 0, 0, 0}
};
/* check options */
while ((c = getopt(argc, argv, ":i:I:RDpvnPLet:")) != -1) {
while ((c = getopt_long(argc, argv, ":i:I:RbDpvnPLet:c", long_options,
NULL)) != -1) {
switch (c) {
case 'i':
if (fromname)
@ -3749,12 +3765,17 @@ zfs_do_send(int argc, char **argv)
case 't':
resume_token = optarg;
break;
case 'c':
flags.compress = B_TRUE;
break;
case ':':
(void) fprintf(stderr, gettext("missing argument for "
"'%c' option\n"), optopt);
usage(B_FALSE);
break;
case '?':
/*FALLTHROUGH*/
default:
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
@ -3825,6 +3846,8 @@ zfs_do_send(int argc, char **argv)
lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
if (flags.embed_data)
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
if (flags.compress)
lzc_flags |= LZC_SEND_FLAG_COMPRESS;
if (fromname != NULL &&
(fromname[0] == '#' || fromname[0] == '@')) {

View File

@ -25,8 +25,8 @@
*/
/*
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
*/
#include <ctype.h>
@ -39,6 +39,7 @@
#include <sys/dmu.h>
#include <sys/zfs_ioctl.h>
#include <sys/zio.h>
#include <zfs_fletcher.h>
/*
@ -251,6 +252,7 @@ main(int argc, char *argv[])
(void) fprintf(stderr, "invalid option '%c'\n",
optopt);
usage();
break;
}
}
@ -453,38 +455,50 @@ main(int argc, char *argv[])
drrw->drr_object = BSWAP_64(drrw->drr_object);
drrw->drr_type = BSWAP_32(drrw->drr_type);
drrw->drr_offset = BSWAP_64(drrw->drr_offset);
drrw->drr_length = BSWAP_64(drrw->drr_length);
drrw->drr_logical_size =
BSWAP_64(drrw->drr_logical_size);
drrw->drr_toguid = BSWAP_64(drrw->drr_toguid);
drrw->drr_key.ddk_prop =
BSWAP_64(drrw->drr_key.ddk_prop);
drrw->drr_compressed_size =
BSWAP_64(drrw->drr_compressed_size);
}
uint64_t payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
/*
* If this is verbose and/or dump output,
* print info on the modified block
*/
if (verbose) {
(void) printf("WRITE object = %llu type = %u "
"checksum type = %u\n"
" offset = %llu length = %llu "
"checksum type = %u compression type = %u\n"
" offset = %llu logical_size = %llu "
"compressed_size = %llu "
"payload_size = %llu "
"props = %llx\n",
(u_longlong_t)drrw->drr_object,
drrw->drr_type,
drrw->drr_checksumtype,
drrw->drr_compressiontype,
(u_longlong_t)drrw->drr_offset,
(u_longlong_t)drrw->drr_length,
(u_longlong_t)drrw->drr_logical_size,
(u_longlong_t)drrw->drr_compressed_size,
(u_longlong_t)payload_size,
(u_longlong_t)drrw->drr_key.ddk_prop);
}
/*
* Read the contents of the block in from STDIN to buf
*/
(void) ssread(buf, drrw->drr_length, &zc);
(void) ssread(buf, payload_size, &zc);
/*
* If in dump mode
*/
if (dump) {
print_block(buf, drrw->drr_length);
print_block(buf, payload_size);
}
total_write_size += drrw->drr_length;
total_write_size += payload_size;
break;
case DRR_WRITE_BYREF:

View File

@ -600,6 +600,9 @@ typedef struct sendflags {
/* WRITE_EMBEDDED records of type DATA are permitted */
boolean_t embed_data;
/* compressed WRITE records are permitted */
boolean_t compress;
} sendflags_t;
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);

View File

@ -347,8 +347,10 @@ cksummer(void *arg)
{
struct drr_write *drrw = &drr->drr_u.drr_write;
dataref_t dataref;
uint64_t payload_size;
(void) ssread(buf, drrw->drr_length, ofp);
payload_size = DRR_WRITE_PAYLOAD_SIZE(drrw);
(void) ssread(buf, payload_size, ofp);
/*
* Use the existing checksum if it's dedup-capable,
@ -362,7 +364,7 @@ cksummer(void *arg)
zio_cksum_t tmpsha256;
SHA256Init(&ctx);
SHA256Update(&ctx, buf, drrw->drr_length);
SHA256Update(&ctx, buf, payload_size);
SHA256Final(&tmpsha256, &ctx);
drrw->drr_key.ddk_cksum.zc_word[0] =
BE_64(tmpsha256.zc_word[0]);
@ -392,7 +394,7 @@ cksummer(void *arg)
wbr_drrr->drr_object = drrw->drr_object;
wbr_drrr->drr_offset = drrw->drr_offset;
wbr_drrr->drr_length = drrw->drr_length;
wbr_drrr->drr_length = drrw->drr_logical_size;
wbr_drrr->drr_toguid = drrw->drr_toguid;
wbr_drrr->drr_refguid = dataref.ref_guid;
wbr_drrr->drr_refobject =
@ -414,7 +416,7 @@ cksummer(void *arg)
goto out;
} else {
/* block not previously seen */
if (dump_record(drr, buf, drrw->drr_length,
if (dump_record(drr, buf, payload_size,
&stream_cksum, outfd) != 0)
goto out;
}
@ -917,7 +919,7 @@ typedef struct send_dump_data {
uint64_t prevsnap_obj;
boolean_t seenfrom, seento, replicate, doall, fromorigin;
boolean_t verbose, dryrun, parsable, progress, embed_data, std_out;
boolean_t large_block;
boolean_t large_block, compress;
int outfd;
boolean_t err;
nvlist_t *fss;
@ -933,7 +935,7 @@ typedef struct send_dump_data {
static int
estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
boolean_t fromorigin, uint64_t *sizep)
boolean_t fromorigin, enum lzc_send_flags flags, uint64_t *sizep)
{
zfs_cmd_t zc = { 0 };
libzfs_handle_t *hdl = zhp->zfs_hdl;
@ -946,6 +948,7 @@ estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
zc.zc_fromobj = fromsnap_obj;
zc.zc_guid = 1; /* estimate flag */
zc.zc_flags = flags;
if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
char errbuf[1024];
@ -1184,6 +1187,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
progress_arg_t pa = { 0 };
pthread_t tid;
char *thissnap;
enum lzc_send_flags flags = 0;
int err;
boolean_t isfromsnap, istosnap, fromorigin;
boolean_t exclude = B_FALSE;
@ -1212,6 +1216,13 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
if (istosnap)
sdd->seento = B_TRUE;
if (sdd->large_block)
flags |= LZC_SEND_FLAG_LARGE_BLOCK;
if (sdd->embed_data)
flags |= LZC_SEND_FLAG_EMBED_DATA;
if (sdd->compress)
flags |= LZC_SEND_FLAG_COMPRESS;
if (!sdd->doall && !isfromsnap && !istosnap) {
if (sdd->replicate) {
char *snapname;
@ -1258,7 +1269,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
if (sdd->verbose) {
uint64_t size = 0;
(void) estimate_ioctl(zhp, sdd->prevsnap_obj,
fromorigin, &size);
fromorigin, flags, &size);
send_print_verbose(fout, zhp->zfs_name,
sdd->prevsnap[0] ? sdd->prevsnap : NULL,
@ -1283,12 +1294,6 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
}
}
enum lzc_send_flags flags = 0;
if (sdd->large_block)
flags |= LZC_SEND_FLAG_LARGE_BLOCK;
if (sdd->embed_data)
flags |= LZC_SEND_FLAG_EMBED_DATA;
err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
fromorigin, sdd->outfd, flags, sdd->debugnv);
@ -1594,8 +1599,12 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
fromguid = 0;
(void) nvlist_lookup_uint64(resume_nvl, "fromguid", &fromguid);
if (flags->largeblock || nvlist_exists(resume_nvl, "largeblockok"))
lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
if (flags->embed_data || nvlist_exists(resume_nvl, "embedok"))
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
if (flags->compress || nvlist_exists(resume_nvl, "compressok"))
lzc_flags |= LZC_SEND_FLAG_COMPRESS;
if (guid_to_name(hdl, toname, toguid, B_FALSE, name) != 0) {
if (zfs_dataset_exists(hdl, toname, ZFS_TYPE_DATASET)) {
@ -1628,7 +1637,8 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
if (flags->verbose) {
uint64_t size = 0;
error = lzc_send_space(zhp->zfs_name, fromname, &size);
error = lzc_send_space(zhp->zfs_name, fromname,
lzc_flags, &size);
if (error == 0)
size = MAX(0, (int64_t)(size - bytes));
send_print_verbose(stderr, zhp->zfs_name, fromname,
@ -1856,6 +1866,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
sdd.dryrun = flags->dryrun;
sdd.large_block = flags->largeblock;
sdd.embed_data = flags->embed_data;
sdd.compress = flags->compress;
sdd.filter_cb = filter_func;
sdd.filter_cb_arg = cb_arg;
if (debugnvp)
@ -2921,11 +2932,17 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
case DRR_WRITE:
if (byteswap) {
drr->drr_u.drr_write.drr_length =
BSWAP_64(drr->drr_u.drr_write.drr_length);
drr->drr_u.drr_write.drr_logical_size =
BSWAP_64(
drr->drr_u.drr_write.drr_logical_size);
drr->drr_u.drr_write.drr_compressed_size =
BSWAP_64(
drr->drr_u.drr_write.drr_compressed_size);
}
uint64_t payload_size =
DRR_WRITE_PAYLOAD_SIZE(&drr->drr_u.drr_write);
(void) recv_read(hdl, fd, buf,
drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
payload_size, B_FALSE, NULL);
break;
case DRR_SPILL:
if (byteswap) {

View File

@ -487,6 +487,8 @@ lzc_send_resume(const char *snapname, const char *from, int fd,
fnvlist_add_boolean(args, "largeblockok");
if (flags & LZC_SEND_FLAG_EMBED_DATA)
fnvlist_add_boolean(args, "embedok");
if (flags & LZC_SEND_FLAG_COMPRESS)
fnvlist_add_boolean(args, "compressok");
if (resumeobj != 0 || resumeoff != 0) {
fnvlist_add_uint64(args, "resume_object", resumeobj);
fnvlist_add_uint64(args, "resume_offset", resumeoff);
@ -512,7 +514,8 @@ lzc_send_resume(const char *snapname, const char *from, int fd,
* an equivalent snapshot.
*/
int
lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
lzc_send_space(const char *snapname, const char *from,
enum lzc_send_flags flags, uint64_t *spacep)
{
nvlist_t *args;
nvlist_t *result;
@ -521,6 +524,12 @@ lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
args = fnvlist_alloc();
if (from != NULL)
fnvlist_add_string(args, "from", from);
if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
fnvlist_add_boolean(args, "largeblockok");
if (flags & LZC_SEND_FLAG_EMBED_DATA)
fnvlist_add_boolean(args, "embedok");
if (flags & LZC_SEND_FLAG_COMPRESS)
fnvlist_add_boolean(args, "compressok");
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
nvlist_free(args);
if (err == 0)

View File

@ -62,13 +62,14 @@ int lzc_get_holds(const char *, nvlist_t **);
enum lzc_send_flags {
LZC_SEND_FLAG_EMBED_DATA = 1 << 0,
LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1
LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1,
LZC_SEND_FLAG_COMPRESS = 1 << 2
};
int lzc_send(const char *, const char *, int, enum lzc_send_flags);
int lzc_send_resume(const char *, const char *, int,
enum lzc_send_flags, uint64_t, uint64_t);
int lzc_send_space(const char *, const char *, uint64_t *);
int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
struct dmu_replay_record;

View File

@ -165,12 +165,12 @@
.Ar snapshot bookmark
.Nm
.Cm send
.Op Fl DLPRenpv
.Op Fl DLPRcenpv
.Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot
.Ar snapshot
.Nm
.Cm send
.Op Fl Le
.Op Fl Lce
.Op Fl i Ar snapshot Ns | Ns Ar bookmark
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Nm
@ -2450,7 +2450,7 @@ feature.
.It Xo
.Nm
.Cm send
.Op Fl DLPRenpv
.Op Fl DLPRcenpv
.Op Oo Fl I Ns | Ns Fl i Oc Ar snapshot
.Ar snapshot
.Xc
@ -2463,7 +2463,7 @@ to a different system
.Pc .
By default, a full stream is generated.
.Bl -tag -width "-D"
.It Fl D
.It Fl D, -dedup
Generate a deduplicated stream. Blocks which would have been sent multiple times
in the send stream will only be sent once. The receiving system must also
support this feature to receive a deduplicated stream. This flag can be used
@ -2483,7 +2483,7 @@ is similar to
The incremental source may be specified as with the
.Fl i
option.
.It Fl L
.It Fl L, -large-block
Generate a stream which may contain blocks larger than 128KB. This flag has no
effect if the
.Sy large_blocks
@ -2497,9 +2497,9 @@ pool feature enabled as well. See
for details on ZFS feature flags and the
.Sy large_blocks
feature.
.It Fl P
.It Fl P, -parsable
Print machine-parsable verbose information about the stream package generated.
.It Fl R
.It Fl R, -replicate
Generate a replication stream package, which will replicate the specified
file system, and all descendent file systems, up to the named snapshot. When
received, all properties, snapshots, descendent file systems, and clones are
@ -2517,7 +2517,7 @@ is received. If the
.Fl F
flag is specified when this stream is received, snapshots and file systems that
do not exist on the sending side are destroyed.
.It Fl e
.It Fl e, -embed
Generate a more compact stream by using
.Sy WRITE_EMBEDDED
records for blocks which are stored more compactly on disk by the
@ -2534,6 +2534,16 @@ that feature enabled as well. See
for details on ZFS feature flags and the
.Sy embedded_data
feature.
.It Fl c, -compressed
Generate a more compact stream by using compressed WRITE records for blocks
which are compressed on disk and in memory (see the
.Sy compression No property for details). If the Sy lz4_compress No feature
is active on the sending system, then the receiving system must have that
feature enabled as well. If the
.Sy large_blocks No feature is enabled on the sending system but the Fl L
option is not supplied in conjunction with
.Fl c, No then the data will be decompressed before sending so it can be split
into smaller block sizes.
.It Fl i Ar snapshot
Generate an incremental stream from the first
.Ar snapshot
@ -2556,7 +2566,7 @@ be fully specified
not just
.Em @origin
.Pc .
.It Fl n
.It Fl n, -dryrun
Do a dry-run
.Pq Qq No-op
send. Do not generate any actual send data. This is useful in conjunction with
@ -2569,11 +2579,11 @@ be written to standard output
.Po contrast with a non-dry-run, where the stream is written to standard output
and the verbose output goes to standard error
.Pc .
.It Fl p
.It Fl p, -props
Include the dataset's properties in the stream. This flag is implicit when
.Fl R
is specified. The receiving system must also support this feature.
.It Fl v
.It Fl v, -verbose
Print verbose information about the stream package generated. This information
includes a per-second report of how much data has been sent.
.Pp
@ -2583,7 +2593,7 @@ on future versions of ZFS .
.It Xo
.Nm
.Cm send
.Op Fl Le
.Op Fl Lce
.Op Fl i Ar snapshot Ns | Ns Ar bookmark
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Xc
@ -2593,7 +2603,7 @@ read-only, or the filesystem must not be mounted. When the stream generated from
a filesystem or volume is received, the default snapshot name will be
.Qq --head-- .
.Bl -tag -width "-L"
.It Fl L
.It Fl L, -large-block
Generate a stream which may contain blocks larger than 128KB. This flag has no
effect if the
.Sy large_blocks
@ -2607,7 +2617,17 @@ pool feature enabled as well. See
for details on ZFS feature flags and the
.Sy large_blocks
feature.
.It Fl e
.It Fl c, -compressed
Generate a more compact stream by using compressed WRITE records for blocks
which are compressed on disk and in memory (see the
.Sy compression No property for details). If the Sy lz4_compress No feature is
active on the sending system, then the receiving system must have that feature
enabled as well. If the
.Sy large_blocks No feature is enabled on the sending system but the Fl L
option is not supplied in conjunction with
.Fl c, No then the data will be decompressed before sending so it can be split
into smaller block sizes.
.It Fl e, -embed
Generate a more compact stream by using
.Sy WRITE_EMBEDDED
records for blocks which are stored more compactly on disk by the

File diff suppressed because it is too large Load Diff

View File

@ -850,7 +850,7 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
spa_t *spa = db->db_objset->os_spa;
mutex_exit(&db->db_mtx);
abuf = arc_loan_buf(spa, blksz);
abuf = arc_loan_buf(spa, B_FALSE, blksz);
bcopy(db->db.db_data, abuf->b_data, blksz);
} else {
abuf = db->db_buf;
@ -973,8 +973,8 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
BP_IS_HOLE(db->db_blkptr)))) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
dbuf_set_data(db, arc_alloc_buf(db->db_objset->os_spa,
db->db.db_size, db, type));
dbuf_set_data(db, arc_alloc_buf(db->db_objset->os_spa, db, type,
db->db.db_size));
bzero(db->db.db_data, db->db.db_size);
if (db->db_blkptr != NULL && db->db_level > 0 &&
@ -1023,6 +1023,68 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
&aflags, &zb);
}
/*
* This is our just-in-time copy function. It makes a copy of buffers that
* have been modified in a previous transaction group before we access them in
* the current active group.
*
* This function is used in three places: when we are dirtying a buffer for the
* first time in a txg, when we are freeing a range in a dnode that includes
* this buffer, and when we are accessing a buffer which was received compressed
* and later referenced in a WRITE_BYREF record.
*
* Note that when we are called from dbuf_free_range() we do not put a hold on
* the buffer, we just traverse the active dbuf list for the dnode.
*/
static void
dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
{
dbuf_dirty_record_t *dr = db->db_last_dirty;
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(db->db.db_data != NULL);
ASSERT(db->db_level == 0);
ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
if (dr == NULL ||
(dr->dt.dl.dr_data !=
((db->db_blkid == DMU_BONUS_BLKID) ? db->db.db_data : db->db_buf)))
return;
/*
* If the last dirty record for this dbuf has not yet synced
* and its referencing the dbuf data, either:
* reset the reference to point to a new copy,
* or (if there a no active holders)
* just null out the current db_data pointer.
*/
ASSERT(dr->dr_txg >= txg - 2);
if (db->db_blkid == DMU_BONUS_BLKID) {
/* Note that the data bufs here are zio_bufs */
dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN);
arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN);
} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
int size = arc_buf_size(db->db_buf);
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
spa_t *spa = db->db_objset->os_spa;
enum zio_compress compress_type =
arc_get_compression(db->db_buf);
if (compress_type == ZIO_COMPRESS_OFF) {
dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size);
} else {
ASSERT3U(type, ==, ARC_BUFC_DATA);
dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db,
size, arc_buf_lsize(db->db_buf), compress_type);
}
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else {
db->db_buf = NULL;
dbuf_clear_data(db);
}
}
int
dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
{
@ -1051,6 +1113,18 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
mutex_enter(&db->db_mtx);
if (db->db_state == DB_CACHED) {
/*
* If the arc buf is compressed, we need to decompress it to
* read the data. This could happen during the "zfs receive" of
* a stream which is compressed and deduplicated.
*/
if (db->db_buf != NULL &&
arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF) {
dbuf_fix_old_data(db,
spa_syncing_txg(dmu_objset_spa(db->db_objset)));
err = arc_decompress(db->db_buf);
dbuf_set_data(db, db->db_buf);
}
mutex_exit(&db->db_mtx);
if (prefetch)
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
@ -1126,7 +1200,7 @@ dbuf_noread(dmu_buf_impl_t *db)
ASSERT(db->db_buf == NULL);
ASSERT(db->db.db_data == NULL);
dbuf_set_data(db, arc_alloc_buf(spa, db->db.db_size, db, type));
dbuf_set_data(db, arc_alloc_buf(spa, db, type, db->db.db_size));
db->db_state = DB_FILL;
} else if (db->db_state == DB_NOFILL) {
dbuf_clear_data(db);
@ -1136,60 +1210,6 @@ dbuf_noread(dmu_buf_impl_t *db)
mutex_exit(&db->db_mtx);
}
/*
* This is our just-in-time copy function. It makes a copy of
* buffers, that have been modified in a previous transaction
* group, before we modify them in the current active group.
*
* This function is used in two places: when we are dirtying a
* buffer for the first time in a txg, and when we are freeing
* a range in a dnode that includes this buffer.
*
* Note that when we are called from dbuf_free_range() we do
* not put a hold on the buffer, we just traverse the active
* dbuf list for the dnode.
*/
static void
dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
{
dbuf_dirty_record_t *dr = db->db_last_dirty;
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(db->db.db_data != NULL);
ASSERT(db->db_level == 0);
ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
if (dr == NULL ||
(dr->dt.dl.dr_data !=
((db->db_blkid == DMU_BONUS_BLKID) ? db->db.db_data : db->db_buf)))
return;
/*
* If the last dirty record for this dbuf has not yet synced
* and its referencing the dbuf data, either:
* reset the reference to point to a new copy,
* or (if there a no active holders)
* just null out the current db_data pointer.
*/
ASSERT(dr->dr_txg >= txg - 2);
if (db->db_blkid == DMU_BONUS_BLKID) {
/* Note that the data bufs here are zio_bufs */
dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN);
arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN);
} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
int size = db->db.db_size;
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
spa_t *spa = db->db_objset->os_spa;
dr->dt.dl.dr_data = arc_alloc_buf(spa, size, db, type);
bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
} else {
db->db_buf = NULL;
dbuf_clear_data(db);
}
}
void
dbuf_unoverride(dbuf_dirty_record_t *dr)
{
@ -1390,7 +1410,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
dmu_buf_will_dirty(&db->db, tx);
/* create the data buffer for the new block */
buf = arc_alloc_buf(dn->dn_objset->os_spa, size, db, type);
buf = arc_alloc_buf(dn->dn_objset->os_spa, db, type, size);
/* copy old block data to the new block */
obuf = db->db_buf;
@ -1984,9 +2004,9 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
ASSERT(!refcount_is_zero(&db->db_holds));
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT(db->db_level == 0);
ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA);
ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));
ASSERT(buf != NULL);
ASSERT(arc_buf_size(buf) == db->db.db_size);
ASSERT(arc_buf_lsize(buf) == db->db.db_size);
ASSERT(tx->tx_txg != 0);
arc_return_buf(buf, db);
@ -2583,8 +2603,8 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
dbuf_set_data(db,
arc_alloc_buf(dn->dn_objset->os_spa,
db->db.db_size, db, type));
arc_alloc_buf(dn->dn_objset->os_spa, db, type,
db->db.db_size));
bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data,
db->db.db_size);
}
@ -3129,10 +3149,19 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
* objects only modified in the syncing context (e.g.
* DNONE_DNODE blocks).
*/
int blksz = arc_buf_size(*datap);
int psize = arc_buf_size(*datap);
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
*datap = arc_alloc_buf(os->os_spa, blksz, db, type);
bcopy(db->db.db_data, (*datap)->b_data, blksz);
enum zio_compress compress_type = arc_get_compression(*datap);
if (compress_type == ZIO_COMPRESS_OFF) {
*datap = arc_alloc_buf(os->os_spa, db, type, psize);
} else {
ASSERT3U(type, ==, ARC_BUFC_DATA);
int lsize = arc_buf_lsize(*datap);
*datap = arc_alloc_compressed_buf(os->os_spa, db,
psize, lsize, compress_type);
}
bcopy(db->db.db_data, (*datap)->b_data, psize);
}
db->db_data_pending = dr;
@ -3537,7 +3566,9 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
wp_flag = WP_SPILL;
wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0;
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
dmu_write_policy(os, dn, db->db_level, wp_flag,
(data != NULL && arc_get_compression(data) != ZIO_COMPRESS_OFF) ?
arc_get_compression(data) : ZIO_COMPRESS_INHERIT, &zp);
DB_DNODE_EXIT(db);
/*
@ -3556,8 +3587,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
*/
void *contents = (data != NULL) ? data->b_data : NULL;
dr->dr_zio = zio_write(zio, os->os_spa, txg,
&dr->dr_bp_copy, contents, db->db.db_size, &zp,
dr->dr_zio = zio_write(zio, os->os_spa, txg, &dr->dr_bp_copy,
contents, db->db.db_size, db->db.db_size, &zp,
dbuf_write_override_ready, NULL, NULL,
dbuf_write_override_done,
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
@ -3570,7 +3601,7 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
dr->dr_zio = zio_write(zio, os->os_spa, txg,
&dr->dr_bp_copy, NULL, db->db.db_size, &zp,
&dr->dr_bp_copy, NULL, db->db.db_size, db->db.db_size, &zp,
dbuf_write_nofill_ready, NULL, NULL,
dbuf_write_nofill_done, db,
ZIO_PRIORITY_ASYNC_WRITE,

View File

@ -1069,7 +1069,7 @@ dmu_xuio_add(xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n)
int i = priv->next++;
ASSERT(i < priv->cnt);
ASSERT(off + n <= arc_buf_size(abuf));
ASSERT(off + n <= arc_buf_lsize(abuf));
iov = uio->uio_iov + i;
iov->iov_base = (char *)abuf->b_data + off;
iov->iov_len = n;
@ -1415,7 +1415,7 @@ dmu_request_arcbuf(dmu_buf_t *handle, int size)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
return (arc_loan_buf(db->db_objset->os_spa, size));
return (arc_loan_buf(db->db_objset->os_spa, B_FALSE, size));
}
/*
@ -1440,7 +1440,7 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
dnode_t *dn;
dmu_buf_impl_t *db;
uint32_t blksz = (uint32_t)arc_buf_size(buf);
uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
uint64_t blkid;
DB_DNODE_ENTER(dbuf);
@ -1453,18 +1453,19 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
/*
* We can only assign if the offset is aligned, the arc buf is the
* same size as the dbuf, and the dbuf is not metadata. It
* can't be metadata because the loaned arc buf comes from the
* user-data kmem arena.
* same size as the dbuf, and the dbuf is not metadata.
*/
if (offset == db->db.db_offset && blksz == db->db.db_size &&
DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA) {
if (offset == db->db.db_offset && blksz == db->db.db_size) {
dbuf_assign_arcbuf(db, buf, tx);
dbuf_rele(db, FTAG);
} else {
objset_t *os;
uint64_t object;
/* compressed bufs must always be assignable to their dbuf */
ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF);
ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
DB_DNODE_ENTER(dbuf);
dn = DB_DNODE(dbuf);
os = dn->dn_objset;
@ -1614,8 +1615,8 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
dsa->dsa_zgd = zgd;
dsa->dsa_tx = tx;
zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx),
zgd->zgd_bp, zgd->zgd_db->db_data, zgd->zgd_db->db_size,
zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
zgd->zgd_db->db_data, zgd->zgd_db->db_size, zgd->zgd_db->db_size,
zp, dmu_sync_late_arrival_ready, NULL,
NULL, dmu_sync_late_arrival_done, dsa, ZIO_PRIORITY_SYNC_WRITE,
ZIO_FLAG_CANFAIL, zb));
@ -1669,7 +1670,8 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp);
dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC,
ZIO_COMPRESS_INHERIT, &zp);
DB_DNODE_EXIT(db);
/*
@ -1839,7 +1841,8 @@ int zfs_mdcomp_disable = 0;
int zfs_redundant_metadata_most_ditto_level = 2;
void
dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
enum zio_compress override_compress, zio_prop_t *zp)
{
dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) ||
@ -1851,6 +1854,10 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
boolean_t nopwrite = B_FALSE;
boolean_t dedup_verify = os->os_dedup_verify;
int copies = os->os_copies;
boolean_t lz4_ac = spa_feature_is_active(os->os_spa,
SPA_FEATURE_LZ4_COMPRESS);
IMPLY(override_compress == ZIO_COMPRESS_LZ4, lz4_ac);
/*
* We maintain different write policies for each of the following
@ -1938,7 +1945,16 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
}
zp->zp_checksum = checksum;
zp->zp_compress = compress;
/*
* If we're writing a pre-compressed buffer, the compression type we use
* must match the data. If it hasn't been compressed yet, then we should
* use the value dictated by the policies above.
*/
zp->zp_compress = override_compress != ZIO_COMPRESS_INHERIT
? override_compress : compress;
ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT);
zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
zp->zp_level = level;
zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa));

View File

@ -339,9 +339,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
/* Increase the blocksize if we are permitted. */
if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
arc_buf_t *buf = arc_alloc_buf(spa,
sizeof (objset_phys_t), &os->os_phys_buf,
ARC_BUFC_METADATA);
arc_buf_t *buf = arc_alloc_buf(spa, &os->os_phys_buf,
ARC_BUFC_METADATA, sizeof (objset_phys_t));
bzero(buf->b_data, sizeof (objset_phys_t));
bcopy(os->os_phys_buf->b_data, buf->b_data,
arc_buf_size(os->os_phys_buf));
@ -354,8 +353,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
} else {
int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
os->os_phys_buf = arc_alloc_buf(spa, size,
&os->os_phys_buf, ARC_BUFC_METADATA);
os->os_phys_buf = arc_alloc_buf(spa, &os->os_phys_buf,
ARC_BUFC_METADATA, size);
os->os_phys = os->os_phys_buf->b_data;
bzero(os->os_phys, size);
}
@ -1138,7 +1137,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
arc_release(os->os_phys_buf, &os->os_phys_buf);
dmu_write_policy(os, NULL, 0, 0, &zp);
dmu_write_policy(os, NULL, 0, 0, ZIO_COMPRESS_INHERIT, &zp);
zio = arc_write(pio, os->os_spa, tx->tx_txg,
blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),

View File

@ -249,8 +249,10 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
static int
dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
uint64_t object, uint64_t offset, int lsize, int psize, const blkptr_t *bp,
void *data)
{
uint64_t payload_size;
struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
/*
@ -261,7 +263,7 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
(object == dsp->dsa_last_data_object &&
offset > dsp->dsa_last_data_offset));
dsp->dsa_last_data_object = object;
dsp->dsa_last_data_offset = offset + blksz - 1;
dsp->dsa_last_data_offset = offset + lsize - 1;
/*
* If there is any kind of pending aggregation (currently either
@ -280,8 +282,26 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
drrw->drr_object = object;
drrw->drr_type = type;
drrw->drr_offset = offset;
drrw->drr_length = blksz;
drrw->drr_toguid = dsp->dsa_toguid;
drrw->drr_logical_size = lsize;
/* only set the compression fields if the buf is compressed */
if (lsize != psize) {
ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED);
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT(!BP_SHOULD_BYTESWAP(bp));
ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)));
ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF);
ASSERT3S(psize, >, 0);
ASSERT3S(lsize, >=, psize);
drrw->drr_compressiontype = BP_GET_COMPRESS(bp);
drrw->drr_compressed_size = psize;
payload_size = drrw->drr_compressed_size;
} else {
payload_size = drrw->drr_logical_size;
}
if (bp == NULL || BP_IS_EMBEDDED(bp)) {
/*
* There's no pre-computed checksum for partial-block
@ -301,7 +321,7 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
drrw->drr_key.ddk_cksum = bp->blk_cksum;
}
if (dump_record(dsp, data, blksz) != 0)
if (dump_record(dsp, data, payload_size) != 0)
return (SET_ERROR(EINTR));
return (0);
}
@ -476,7 +496,7 @@ backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
* Compression function must be legacy, or explicitly enabled.
*/
if ((BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_LEGACY_FUNCTIONS &&
!(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4)))
!(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_LZ4)))
return (B_FALSE);
/*
@ -639,18 +659,49 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
int blksz = dblkszsec << SPA_MINBLOCKSHIFT;
uint64_t offset;
/*
* If we have large blocks stored on disk but the send flags
* don't allow us to send large blocks, we split the data from
* the arc buf into chunks.
*/
boolean_t split_large_blocks = blksz > SPA_OLD_MAXBLOCKSIZE &&
!(dsa->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS);
/*
* We should only request compressed data from the ARC if all
* the following are true:
* - stream compression was requested
* - we aren't splitting large blocks into smaller chunks
* - the data won't need to be byteswapped before sending
* - this isn't an embedded block
* - this isn't metadata (if receiving on a different endian
* system it can be byteswapped more easily)
*/
boolean_t request_compressed =
(dsa->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED) &&
!split_large_blocks && !BP_SHOULD_BYTESWAP(bp) &&
!BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp));
ASSERT0(zb->zb_level);
ASSERT(zb->zb_object > dsa->dsa_resume_object ||
(zb->zb_object == dsa->dsa_resume_object &&
zb->zb_blkid * blksz >= dsa->dsa_resume_offset));
ASSERT0(zb->zb_level);
ASSERT(zb->zb_object > dsa->dsa_resume_object ||
(zb->zb_object == dsa->dsa_resume_object &&
zb->zb_blkid * blksz >= dsa->dsa_resume_offset));
ASSERT3U(blksz, ==, BP_GET_LSIZE(bp));
enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
if (request_compressed)
zioflags |= ZIO_FLAG_RAW;
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
&aflags, zb) != 0) {
ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) {
if (zfs_send_corrupt_data) {
/* Send a block filled with 0x"zfs badd bloc" */
abuf = arc_alloc_buf(spa, blksz, &abuf,
ARC_BUFC_DATA);
abuf = arc_alloc_buf(spa, &abuf, ARC_BUFC_DATA,
blksz);
uint64_t *ptr;
for (ptr = abuf->b_data;
(char *)ptr < (char *)abuf->b_data + blksz;
@ -663,21 +714,21 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
offset = zb->zb_blkid * blksz;
if (!(dsa->dsa_featureflags &
DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
blksz > SPA_OLD_MAXBLOCKSIZE) {
if (split_large_blocks) {
ASSERT3U(arc_get_compression(abuf), ==,
ZIO_COMPRESS_OFF);
char *buf = abuf->b_data;
while (blksz > 0 && err == 0) {
int n = MIN(blksz, SPA_OLD_MAXBLOCKSIZE);
err = dump_write(dsa, type, zb->zb_object,
offset, n, NULL, buf);
offset, n, n, NULL, buf);
offset += n;
buf += n;
blksz -= n;
}
} else {
err = dump_write(dsa, type, zb->zb_object,
offset, blksz, bp, abuf->b_data);
err = dump_write(dsa, type, zb->zb_object, offset,
blksz, arc_buf_size(abuf), bp, abuf->b_data);
}
arc_buf_destroy(abuf, &abuf);
}
@ -704,9 +755,9 @@ get_next_record(bqueue_t *bq, struct send_block_record *data)
*/
static int
dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
zfs_bookmark_phys_t *ancestor_zb,
boolean_t is_clone, boolean_t embedok, boolean_t large_block_ok, int outfd,
uint64_t resumeobj, uint64_t resumeoff,
zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
int outfd, uint64_t resumeobj, uint64_t resumeoff,
vnode_t *vp, offset_t *off)
{
objset_t *os;
@ -749,7 +800,15 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
featureflags |= DMU_BACKUP_FEATURE_LZ4;
}
if (compressok) {
featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
}
if ((featureflags &
(DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED)) !=
0 && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) {
featureflags |= DMU_BACKUP_FEATURE_LZ4;
}
if (resumeobj != 0 || resumeoff != 0) {
@ -898,7 +957,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
int
dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
int outfd, vnode_t *vp, offset_t *off)
{
dsl_pool_t *dp;
@ -935,10 +994,10 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
is_clone = (fromds->ds_dir != ds->ds_dir);
dsl_dataset_rele(fromds, FTAG);
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
embedok, large_block_ok, outfd, 0, 0, vp, off);
embedok, large_block_ok, compressok, outfd, 0, 0, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
embedok, large_block_ok, outfd, 0, 0, vp, off);
embedok, large_block_ok, compressok, outfd, 0, 0, vp, off);
}
dsl_dataset_rele(ds, FTAG);
return (err);
@ -946,7 +1005,8 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
int
dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
boolean_t large_block_ok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
boolean_t large_block_ok, boolean_t compressok, int outfd,
uint64_t resumeobj, uint64_t resumeoff,
vnode_t *vp, offset_t *off)
{
dsl_pool_t *dp;
@ -1014,11 +1074,11 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
return (err);
}
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
embedok, large_block_ok,
embedok, large_block_ok, compressok,
outfd, resumeobj, resumeoff, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
embedok, large_block_ok,
embedok, large_block_ok, compressok,
outfd, resumeobj, resumeoff, vp, off);
}
if (owned)
@ -1029,33 +1089,45 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
}
static int
dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
uint64_t *sizep)
dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t uncompressed,
uint64_t compressed, boolean_t stream_compressed, uint64_t *sizep)
{
int err;
uint64_t size;
/*
* Assume that space (both on-disk and in-stream) is dominated by
* data. We will adjust for indirect blocks and the copies property,
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
*/
uint64_t recordsize;
uint64_t record_count;
/* Assume all (uncompressed) blocks are recordsize. */
err = dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
&recordsize);
if (err != 0)
return (err);
record_count = uncompressed / recordsize;
/*
* If we're estimating a send size for a compressed stream, use the
* compressed data size to estimate the stream size. Otherwise, use the
* uncompressed data size.
*/
size = stream_compressed ? compressed : uncompressed;
/*
* Subtract out approximate space used by indirect blocks.
* Assume most space is used by data blocks (non-indirect, non-dnode).
* Assume all blocks are recordsize. Assume ditto blocks and
* internal fragmentation counter out compression.
* Assume no ditto blocks or internal fragmentation.
*
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
* block, which we observe in practice.
* block.
*/
uint64_t recordsize;
err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
if (err != 0)
return (err);
size -= size / recordsize * sizeof (blkptr_t);
size -= record_count * sizeof (blkptr_t);
/* Add in the space for the record associated with each block. */
size += size / recordsize * sizeof (dmu_replay_record_t);
size += record_count * sizeof (dmu_replay_record_t);
*sizep = size;
@ -1063,11 +1135,12 @@ dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
}
int
dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds,
boolean_t stream_compressed, uint64_t *sizep)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
int err;
uint64_t size;
uint64_t uncomp, comp;
ASSERT(dsl_pool_config_held(dp));
@ -1086,33 +1159,41 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0))
return (SET_ERROR(EXDEV));
/* Get uncompressed size estimate of changed data. */
/* Get compressed and uncompressed size estimates of changed data. */
if (fromds == NULL) {
size = dsl_dataset_phys(ds)->ds_uncompressed_bytes;
uncomp = dsl_dataset_phys(ds)->ds_uncompressed_bytes;
comp = dsl_dataset_phys(ds)->ds_compressed_bytes;
} else {
uint64_t used, comp;
uint64_t used;
err = dsl_dataset_space_written(fromds, ds,
&used, &comp, &size);
&used, &comp, &uncomp);
if (err != 0)
return (err);
}
err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
err = dmu_adjust_send_estimate_for_indirects(ds, uncomp, comp,
stream_compressed, sizep);
return (err);
}
struct calculate_send_arg {
uint64_t uncompressed;
uint64_t compressed;
};
/*
* Simple callback used to traverse the blocks of a snapshot and sum their
* uncompressed size
* uncompressed and compressed sizes.
*/
/* ARGSUSED */
static int
dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
uint64_t *spaceptr = arg;
struct calculate_send_arg *space = arg;
if (bp != NULL && !BP_IS_HOLE(bp)) {
*spaceptr += BP_GET_UCSIZE(bp);
space->uncompressed += BP_GET_UCSIZE(bp);
space->compressed += BP_GET_PSIZE(bp);
}
return (0);
}
@ -1124,16 +1205,16 @@ dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
*/
int
dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
uint64_t *sizep)
boolean_t stream_compressed, uint64_t *sizep)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
int err;
uint64_t size = 0;
struct calculate_send_arg size = { 0 };
ASSERT(dsl_pool_config_held(dp));
/* tosnap must be a snapshot */
if (!dsl_dataset_is_snapshot(ds))
if (!ds->ds_is_snapshot)
return (SET_ERROR(EINVAL));
/* verify that from_txg is before the provided snapshot was taken */
@ -1150,7 +1231,8 @@ dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
if (err)
return (err);
err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
err = dmu_adjust_send_estimate_for_indirects(ds, size.uncompressed,
size.compressed, stream_compressed, sizep);
return (err);
}
@ -1281,14 +1363,14 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
/*
* The receiving code doesn't know how to translate a WRITE_EMBEDDED
* record to a plan WRITE record, so the pool must have the
* record to a plain WRITE record, so the pool must have the
* EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
* records. Same with WRITE_EMBEDDED records that use LZ4 compression.
*/
if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
return (SET_ERROR(ENOTSUP));
if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) &&
if ((featureflags & DMU_BACKUP_FEATURE_LZ4) &&
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
return (SET_ERROR(ENOTSUP));
@ -1457,11 +1539,21 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
8, 1, &zero, tx));
VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES,
8, 1, &zero, tx));
if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
DMU_BACKUP_FEATURE_LARGE_BLOCKS) {
VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_LARGEBLOCK,
8, 1, &one, tx));
}
if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
DMU_BACKUP_FEATURE_EMBED_DATA) {
VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK,
8, 1, &one, tx));
}
if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
DMU_BACKUP_FEATURE_COMPRESSED) {
VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_COMPRESSOK,
8, 1, &one, tx));
}
}
dmu_buf_will_dirty(newds->ds_dbuf, tx);
@ -1517,7 +1609,7 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
return (SET_ERROR(ENOTSUP));
if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) &&
if ((featureflags & DMU_BACKUP_FEATURE_LZ4) &&
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
return (SET_ERROR(ENOTSUP));
@ -1852,10 +1944,11 @@ byteswap_record(dmu_replay_record_t *drr)
DO64(drr_write.drr_object);
DO32(drr_write.drr_type);
DO64(drr_write.drr_offset);
DO64(drr_write.drr_length);
DO64(drr_write.drr_logical_size);
DO64(drr_write.drr_toguid);
ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write.drr_key.ddk_cksum);
DO64(drr_write.drr_key.ddk_prop);
DO64(drr_write.drr_compressed_size);
break;
case DRR_WRITE_BYREF:
DO64(drr_write_byref.drr_object);
@ -2085,7 +2178,7 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
dmu_tx_t *tx;
int err;
if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset ||
!DMU_OT_IS_VALID(drrw->drr_type))
return (SET_ERROR(EINVAL));
@ -2107,7 +2200,7 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
tx = dmu_tx_create(rwa->os);
dmu_tx_hold_write(tx, drrw->drr_object,
drrw->drr_offset, drrw->drr_length);
drrw->drr_offset, drrw->drr_logical_size);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
dmu_tx_abort(tx);
@ -2117,9 +2210,10 @@ receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
dmu_object_byteswap_t byteswap =
DMU_OT_BYTESWAP(drrw->drr_type);
dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
drrw->drr_length);
DRR_WRITE_PAYLOAD_SIZE(drrw));
}
/* use the bonus buf to look up the dnode in dmu_assign_arcbuf */
dmu_buf_t *bonus;
if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0)
return (SET_ERROR(EINVAL));
@ -2532,18 +2626,31 @@ receive_read_record(struct receive_arg *ra)
case DRR_WRITE:
{
struct drr_write *drrw = &ra->rrd->header.drr_u.drr_write;
arc_buf_t *abuf = arc_loan_buf(dmu_objset_spa(ra->os),
drrw->drr_length);
arc_buf_t *abuf;
boolean_t is_meta = DMU_OT_IS_METADATA(drrw->drr_type);
if (DRR_WRITE_COMPRESSED(drrw)) {
ASSERT3U(drrw->drr_compressed_size, >, 0);
ASSERT3U(drrw->drr_logical_size, >=,
drrw->drr_compressed_size);
ASSERT(!is_meta);
abuf = arc_loan_compressed_buf(
dmu_objset_spa(ra->os),
drrw->drr_compressed_size, drrw->drr_logical_size,
drrw->drr_compressiontype);
} else {
abuf = arc_loan_buf(dmu_objset_spa(ra->os),
is_meta, drrw->drr_logical_size);
}
err = receive_read_payload_and_next_header(ra,
drrw->drr_length, abuf->b_data);
DRR_WRITE_PAYLOAD_SIZE(drrw), abuf->b_data);
if (err != 0) {
dmu_return_arcbuf(abuf);
return (err);
}
ra->rrd->write_buf = abuf;
receive_read_prefetch(ra, drrw->drr_object, drrw->drr_offset,
drrw->drr_length);
drrw->drr_logical_size);
return (err);
}
case DRR_WRITE_BYREF:

View File

@ -1809,10 +1809,18 @@ get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) {
fnvlist_add_string(token_nv, "toname", buf);
}
if (zap_contains(dp->dp_meta_objset, ds->ds_object,
DS_FIELD_RESUME_LARGEBLOCK) == 0) {
fnvlist_add_boolean(token_nv, "largeblockok");
}
if (zap_contains(dp->dp_meta_objset, ds->ds_object,
DS_FIELD_RESUME_EMBEDOK) == 0) {
fnvlist_add_boolean(token_nv, "embedok");
}
if (zap_contains(dp->dp_meta_objset, ds->ds_object,
DS_FIELD_RESUME_COMPRESSOK) == 0) {
fnvlist_add_boolean(token_nv, "compressok");
}
packed = fnvlist_pack(token_nv, &packed_size);
fnvlist_free(token_nv);
compressed = kmem_alloc(packed_size, KM_SLEEP);

View File

@ -85,7 +85,7 @@ lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
/*
* Returns 0 on success (decompression function returned non-negative)
* and non-zero on failure (decompression function returned negative.
* and non-zero on failure (decompression function returned negative).
*/
return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
d_start, bufsiz, d_len) < 0);

View File

@ -122,11 +122,17 @@ typedef enum arc_flags
} arc_flags_t;
typedef enum arc_buf_flags {
ARC_BUF_FLAG_SHARED = 1 << 0,
ARC_BUF_FLAG_COMPRESSED = 1 << 1
} arc_buf_flags_t;
struct arc_buf {
arc_buf_hdr_t *b_hdr;
arc_buf_t *b_next;
kmutex_t b_evict_lock;
void *b_data;
arc_buf_flags_t b_flags;
};
typedef enum arc_buf_contents {
@ -150,13 +156,21 @@ typedef enum arc_space_type {
void arc_space_consume(uint64_t space, arc_space_type_t type);
void arc_space_return(uint64_t space, arc_space_type_t type);
arc_buf_t *arc_alloc_buf(spa_t *spa, int32_t size, void *tag,
arc_buf_contents_t type);
arc_buf_t *arc_loan_buf(spa_t *spa, int size);
boolean_t arc_is_metadata(arc_buf_t *buf);
enum zio_compress arc_get_compression(arc_buf_t *buf);
int arc_decompress(arc_buf_t *buf);
arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
int32_t size);
arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
enum zio_compress compression_type);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_destroy(arc_buf_t *buf, void *tag);
int arc_buf_size(arc_buf_t *buf);
int arc_buf_lsize(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
void arc_buf_freeze(arc_buf_t *buf);

View File

@ -48,6 +48,7 @@
#include <sys/inttypes.h>
#include <sys/cred.h>
#include <sys/fs/zfs.h>
#include <sys/zio_compress.h>
#include <sys/zio_priority.h>
#ifdef __cplusplus
@ -419,7 +420,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
#define WP_SPILL 0x4
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
struct zio_prop *zp);
enum zio_compress compress_override, struct zio_prop *zp);
/*
* The bonus data is accessed more or less like a regular buffer.
* You must dmu_bonus_hold() to get the buffer, which will give you a

View File

@ -42,14 +42,15 @@ struct dmu_replay_record;
extern const char *recv_clone_name;
int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
boolean_t large_block_ok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
boolean_t large_block_ok, boolean_t compressok, int outfd,
uint64_t resumeobj, uint64_t resumeoff,
struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
uint64_t *sizep);
boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
uint64_t *sizep);
boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok,
boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
int outfd, struct vnode *vp, offset_t *off);
typedef struct dmu_recv_cookie {

View File

@ -96,7 +96,9 @@ struct dsl_pool;
#define DS_FIELD_RESUME_OBJECT "com.delphix:resume_object"
#define DS_FIELD_RESUME_OFFSET "com.delphix:resume_offset"
#define DS_FIELD_RESUME_BYTES "com.delphix:resume_bytes"
#define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok"
#define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
#define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok"
/*
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose

View File

@ -103,7 +103,7 @@ typedef struct refcount {
atomic_add_64(&(src)->rc_count, -__tmp); \
atomic_add_64(&(dst)->rc_count, __tmp); \
}
#define refcount_transfer_ownership(rc, current_holder, new_holder)
#define refcount_transfer_ownership(rc, current_holder, new_holder) (void)0
#define refcount_held(rc, holder) ((rc)->rc_count > 0)
#define refcount_not_held(rc, holder) (B_TRUE)

View File

@ -87,19 +87,22 @@ typedef enum drr_headertype {
#define DMU_BACKUP_FEATURE_SA_SPILL (1 << 2)
/* flags #3 - #15 are reserved for incompatible closed-source implementations */
#define DMU_BACKUP_FEATURE_EMBED_DATA (1 << 16)
#define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1 << 17)
#define DMU_BACKUP_FEATURE_LZ4 (1 << 17)
/* flag #18 is reserved for a Delphix feature */
#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19)
#define DMU_BACKUP_FEATURE_RESUMING (1 << 20)
/* flag #21 is reserved for a Delphix feature */
#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
/*
* Mask of all supported backup features
*/
#define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 | \
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
DMU_BACKUP_FEATURE_RESUMING | \
DMU_BACKUP_FEATURE_LARGE_BLOCKS)
DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
DMU_BACKUP_FEATURE_COMPRESSED)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
@ -152,6 +155,12 @@ typedef enum dmu_send_resume_token_version {
#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
/* deal with compressed drr_write replay records */
#define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0)
#define DRR_WRITE_PAYLOAD_SIZE(drrw) \
(DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \
(drrw)->drr_logical_size)
/*
* zfs ioctl command structure
*/
@ -199,12 +208,16 @@ typedef struct dmu_replay_record {
dmu_object_type_t drr_type;
uint32_t drr_pad;
uint64_t drr_offset;
uint64_t drr_length;
uint64_t drr_logical_size;
uint64_t drr_toguid;
uint8_t drr_checksumtype;
uint8_t drr_checksumflags;
uint8_t drr_pad2[6];
ddt_key_t drr_key; /* deduplication key */
uint8_t drr_compressiontype;
uint8_t drr_pad2[5];
/* deduplication key */
ddt_key_t drr_key;
/* only nonzero if drr_compressiontype is not 0 */
uint64_t drr_compressed_size;
/* content follows */
} drr_write;
struct drr_free {

View File

@ -104,26 +104,6 @@ enum zio_checksum {
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
#define ZIO_DEDUPDITTO_MIN 100
enum zio_compress {
ZIO_COMPRESS_INHERIT = 0,
ZIO_COMPRESS_ON,
ZIO_COMPRESS_OFF,
ZIO_COMPRESS_LZJB,
ZIO_COMPRESS_EMPTY,
ZIO_COMPRESS_GZIP_1,
ZIO_COMPRESS_GZIP_2,
ZIO_COMPRESS_GZIP_3,
ZIO_COMPRESS_GZIP_4,
ZIO_COMPRESS_GZIP_5,
ZIO_COMPRESS_GZIP_6,
ZIO_COMPRESS_GZIP_7,
ZIO_COMPRESS_GZIP_8,
ZIO_COMPRESS_GZIP_9,
ZIO_COMPRESS_ZLE,
ZIO_COMPRESS_LZ4,
ZIO_COMPRESS_FUNCTIONS
};
/*
* The number of "legacy" compression functions which can be set on individual
* objects.
@ -428,6 +408,8 @@ struct zio {
void *io_orig_data;
uint64_t io_size;
uint64_t io_orig_size;
/* io_lsize != io_orig_size iff this is a raw write */
uint64_t io_lsize;
/* Stuff for the vdev stack */
vdev_t *io_vd;
@ -482,11 +464,11 @@ extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
uint64_t size, zio_done_func_t *done, void *private,
uint64_t lsize, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, const zio_prop_t *zp,
void *data, uint64_t size, uint64_t psize, const zio_prop_t *zp,
zio_done_func_t *ready, zio_done_func_t *children_ready,
zio_done_func_t *physdone, zio_done_func_t *done,
void *private, zio_priority_t priority, enum zio_flag flags,

View File

@ -25,17 +25,36 @@
*/
/*
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2015 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZIO_COMPRESS_H
#define _SYS_ZIO_COMPRESS_H
#include <sys/zio.h>
#ifdef __cplusplus
extern "C" {
#endif
enum zio_compress {
ZIO_COMPRESS_INHERIT = 0,
ZIO_COMPRESS_ON,
ZIO_COMPRESS_OFF,
ZIO_COMPRESS_LZJB,
ZIO_COMPRESS_EMPTY,
ZIO_COMPRESS_GZIP_1,
ZIO_COMPRESS_GZIP_2,
ZIO_COMPRESS_GZIP_3,
ZIO_COMPRESS_GZIP_4,
ZIO_COMPRESS_GZIP_5,
ZIO_COMPRESS_GZIP_6,
ZIO_COMPRESS_GZIP_7,
ZIO_COMPRESS_GZIP_8,
ZIO_COMPRESS_GZIP_9,
ZIO_COMPRESS_ZLE,
ZIO_COMPRESS_LZ4,
ZIO_COMPRESS_FUNCTIONS
};
/* Common signature for all zio compress functions. */
typedef size_t zio_compress_func_t(void *src, void *dst,
size_t s_len, size_t d_len, int);

View File

@ -4412,6 +4412,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
boolean_t estimate = (zc->zc_guid != 0);
boolean_t embedok = (zc->zc_flags & 0x1);
boolean_t large_block_ok = (zc->zc_flags & 0x2);
boolean_t compressok = (zc->zc_flags & 0x4);
if (zc->zc_obj != 0) {
dsl_pool_t *dp;
@ -4459,7 +4460,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
}
}
error = dmu_send_estimate(tosnap, fromsnap,
error = dmu_send_estimate(tosnap, fromsnap, compressok,
&zc->zc_objset_type);
if (fromsnap != NULL)
@ -4473,7 +4474,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
off = fp->f_offset;
error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
zc->zc_fromobj, embedok, large_block_ok,
zc->zc_fromobj, embedok, large_block_ok, compressok,
zc->zc_cookie, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
@ -5406,6 +5407,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
* indicates that blocks > 128KB are permitted
* (optional) "embedok" -> (value ignored)
* presence indicates DRR_WRITE_EMBEDDED records are permitted
* (optional) "compressok" -> (value ignored)
* presence indicates compressed DRR_WRITE records are permitted
* (optional) "resume_object" and "resume_offset" -> (uint64)
* if present, resume send stream from specified object and offset.
* }
@ -5422,6 +5425,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
int fd;
boolean_t largeblockok;
boolean_t embedok;
boolean_t compressok;
uint64_t resumeobj = 0;
uint64_t resumeoff = 0;
@ -5433,6 +5437,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
largeblockok = nvlist_exists(innvl, "largeblockok");
embedok = nvlist_exists(innvl, "embedok");
compressok = nvlist_exists(innvl, "compressok");
(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
@ -5442,8 +5447,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
return (SET_ERROR(EBADF));
off = fp->f_offset;
error = dmu_send(snapname, fromname, embedok, largeblockok, fd,
resumeobj, resumeoff, fp->f_vnode, &off);
error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
fd, resumeobj, resumeoff, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
@ -5458,6 +5463,12 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
* innvl: {
* (optional) "from" -> full snap or bookmark name to send an incremental
* from
* (optional) "largeblockok" -> (value ignored)
* indicates that blocks > 128KB are permitted
* (optional) "embedok" -> (value ignored)
* presence indicates DRR_WRITE_EMBEDDED records are permitted
* (optional) "compressok" -> (value ignored)
* presence indicates compressed DRR_WRITE records are permitted
* }
*
* outnvl: {
@ -5471,6 +5482,11 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
dsl_dataset_t *tosnap;
int error;
char *fromname;
/* LINTED E_FUNC_SET_NOT_USED */
boolean_t largeblockok;
/* LINTED E_FUNC_SET_NOT_USED */
boolean_t embedok;
boolean_t compressok;
uint64_t space;
error = dsl_pool_hold(snapname, FTAG, &dp);
@ -5483,6 +5499,10 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
}
largeblockok = nvlist_exists(innvl, "largeblockok");
embedok = nvlist_exists(innvl, "embedok");
compressok = nvlist_exists(innvl, "compressok");
error = nvlist_lookup_string(innvl, "from", &fromname);
if (error == 0) {
if (strchr(fromname, '@') != NULL) {
@ -5495,7 +5515,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
if (error != 0)
goto out;
error = dmu_send_estimate(tosnap, fromsnap, &space);
error = dmu_send_estimate(tosnap, fromsnap, compressok,
&space);
dsl_dataset_rele(fromsnap, FTAG);
} else if (strchr(fromname, '#') != NULL) {
/*
@ -5510,7 +5531,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0)
goto out;
error = dmu_send_estimate_from_txg(tosnap,
frombm.zbm_creation_txg, &space);
frombm.zbm_creation_txg, compressok, &space);
} else {
/*
* from is not properly formatted as a snapshot or
@ -5521,7 +5542,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
}
} else {
// If estimating the size of a full send, use dmu_send_estimate
error = dmu_send_estimate(tosnap, NULL, &space);
error = dmu_send_estimate(tosnap, NULL, compressok, &space);
}
fnvlist_add_uint64(outnvl, "space", space);

View File

@ -518,21 +518,23 @@ zio_timestamp_compare(const void *x1, const void *x2)
*/
static zio_t *
zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
void *data, uint64_t size, zio_done_func_t *done, void *private,
zio_type_t type, zio_priority_t priority, enum zio_flag flags,
vdev_t *vd, uint64_t offset, const zbookmark_phys_t *zb,
enum zio_stage stage, enum zio_stage pipeline)
void *data, uint64_t lsize, uint64_t psize, zio_done_func_t *done,
void *private, zio_type_t type, zio_priority_t priority,
enum zio_flag flags, vdev_t *vd, uint64_t offset,
const zbookmark_phys_t *zb, enum zio_stage stage, enum zio_stage pipeline)
{
zio_t *zio;
ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
ASSERT3U(psize, <=, SPA_MAXBLOCKSIZE);
ASSERT(P2PHASE(psize, SPA_MINBLOCKSIZE) == 0);
ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER));
ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
ASSERT(vd || stage == ZIO_STAGE_OPEN);
IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW) != 0);
zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
bzero(zio, sizeof (zio_t));
@ -576,7 +578,8 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio->io_vd = vd;
zio->io_offset = offset;
zio->io_orig_data = zio->io_data = data;
zio->io_orig_size = zio->io_size = size;
zio->io_orig_size = zio->io_size = psize;
zio->io_lsize = lsize;
zio->io_orig_flags = zio->io_flags = flags;
zio->io_orig_stage = zio->io_stage = stage;
zio->io_orig_pipeline = zio->io_pipeline = pipeline;
@ -616,7 +619,7 @@ zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
{
zio_t *zio;
zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
zio = zio_create(pio, spa, 0, NULL, NULL, 0, 0, done, private,
ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL,
ZIO_STAGE_OPEN, ZIO_INTERLOCK_PIPELINE);
@ -721,7 +724,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
zfs_blkptr_verify(spa, bp);
zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
data, size, done, private,
data, size, size, done, private,
ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
ZIO_DDT_CHILD_READ_PIPELINE : ZIO_READ_PIPELINE);
@ -731,7 +734,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
zio_t *
zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, const zio_prop_t *zp,
void *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp,
zio_done_func_t *ready, zio_done_func_t *children_ready,
zio_done_func_t *physdone, zio_done_func_t *done,
void *private, zio_priority_t priority, enum zio_flag flags,
@ -748,7 +751,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zp->zp_copies > 0 &&
zp->zp_copies <= spa_max_replication(spa));
zio = zio_create(pio, spa, txg, bp, data, size, done, private,
zio = zio_create(pio, spa, txg, bp, data, lsize, psize, done, private,
ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE);
@ -778,7 +781,7 @@ zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data,
{
zio_t *zio;
zio = zio_create(pio, spa, txg, bp, data, size, done, private,
zio = zio_create(pio, spa, txg, bp, data, size, size, done, private,
ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_IO_REWRITE, NULL, 0, zb,
ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
@ -858,8 +861,8 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
stage |= ZIO_STAGE_ISSUE_ASYNC;
zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_NOW, flags,
NULL, 0, NULL, ZIO_STAGE_OPEN, stage);
BP_GET_PSIZE(bp), NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_NOW,
flags, NULL, 0, NULL, ZIO_STAGE_OPEN, stage);
return (zio);
}
@ -892,8 +895,8 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(1M) */
zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
BP_GET_PSIZE(bp), done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW,
flags, NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
ASSERT0(zio->io_queued_timestamp);
return (zio);
@ -907,7 +910,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
int c;
if (vd->vdev_children == 0) {
zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
zio = zio_create(pio, spa, 0, NULL, NULL, 0, 0, done, private,
ZIO_TYPE_IOCTL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL,
ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
@ -935,9 +938,9 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
ASSERT3U(offset + size, <=, vd->vdev_psize);
zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private,
ZIO_TYPE_READ, priority, flags | ZIO_FLAG_PHYSICAL, vd, offset,
NULL, ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE);
zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, size, done,
private, ZIO_TYPE_READ, priority, flags | ZIO_FLAG_PHYSICAL, vd,
offset, NULL, ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE);
zio->io_prop.zp_checksum = checksum;
@ -956,9 +959,9 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
ASSERT3U(offset + size, <=, vd->vdev_psize);
zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private,
ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_PHYSICAL, vd, offset,
NULL, ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE);
zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, size, done,
private, ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_PHYSICAL, vd,
offset, NULL, ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE);
zio->io_prop.zp_checksum = checksum;
@ -1034,7 +1037,7 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
flags &= ~ZIO_FLAG_IO_ALLOCATING;
}
zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size,
zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, size,
done, private, type, priority, flags, vd, offset, &pio->io_bookmark,
ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);
@ -1056,7 +1059,7 @@ zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size,
ASSERT(vd->vdev_ops->vdev_op_leaf);
zio = zio_create(NULL, vd->vdev_spa, 0, NULL,
data, size, done, private, type, priority,
data, size, size, done, private, type, priority,
flags | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_DELEGATED,
vd, offset, NULL,
ZIO_STAGE_VDEV_IO_START >> 1, ZIO_VDEV_CHILD_PIPELINE);
@ -1085,8 +1088,11 @@ zio_shrink(zio_t *zio, uint64_t size)
* Note, BP_IS_RAIDZ() assumes no compression.
*/
ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
if (!BP_IS_RAIDZ(zio->io_bp))
zio->io_orig_size = zio->io_size = size;
if (!BP_IS_RAIDZ(zio->io_bp)) {
/* we are not doing a raw write */
ASSERT3U(zio->io_size, ==, zio->io_lsize);
zio->io_orig_size = zio->io_size = zio->io_lsize = size;
}
}
/*
@ -1195,10 +1201,12 @@ zio_write_compress(zio_t *zio)
zio_prop_t *zp = &zio->io_prop;
enum zio_compress compress = zp->zp_compress;
blkptr_t *bp = zio->io_bp;
uint64_t lsize = zio->io_size;
uint64_t psize = lsize;
uint64_t lsize = zio->io_lsize;
uint64_t psize = zio->io_size;
int pass = 1;
EQUIV(lsize != psize, (zio->io_flags & ZIO_FLAG_RAW) != 0);
/*
* If our children haven't all reached the ready stage,
* wait for them and then repeat this pipeline stage.
@ -1247,7 +1255,8 @@ zio_write_compress(zio_t *zio)
spa_max_replication(spa)) == BP_GET_NDVAS(bp));
}
if (compress != ZIO_COMPRESS_OFF) {
/* If it's a compressed write that is not raw, compress the buffer. */
if (compress != ZIO_COMPRESS_OFF && psize == lsize) {
void *cbuf = zio_buf_alloc(lsize);
psize = zio_compress_data(compress, zio->io_data, cbuf, lsize);
if (psize == 0 || psize == lsize) {
@ -1298,6 +1307,8 @@ zio_write_compress(zio_t *zio)
zio->io_bp_override = NULL;
*bp = zio->io_bp_orig;
zio->io_pipeline = zio->io_orig_pipeline;
} else {
ASSERT3U(psize, !=, 0);
}
/*
@ -2157,8 +2168,8 @@ zio_write_gang_block(zio_t *pio)
zp.zp_nopwrite = B_FALSE;
zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
(char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
zio_write_gang_member_ready, NULL, NULL, NULL,
(char *)pio->io_data + (pio->io_size - resid), lsize, lsize,
&zp, zio_write_gang_member_ready, NULL, NULL, NULL,
&gn->gn_child[g], pio->io_priority,
ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
@ -2363,6 +2374,10 @@ static boolean_t
zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
{
spa_t *spa = zio->io_spa;
boolean_t do_raw = (zio->io_flags & ZIO_FLAG_RAW);
/* We should never get a raw, override zio */
ASSERT(!(zio->io_bp_override && do_raw));
/*
* Note: we compare the original data, not the transformed data,
@ -2386,6 +2401,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
if (ddp->ddp_phys_birth != 0) {
arc_buf_t *abuf = NULL;
arc_flags_t aflags = ARC_FLAG_WAIT;
int zio_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
blkptr_t blk = *zio->io_bp;
int error;
@ -2393,10 +2409,26 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
ddt_exit(ddt);
/*
* Intuitively, it would make more sense to compare
* io_data than io_orig_data in the raw case since you
* don't want to look at any transformations that have
* happened to the data. However, for raw I/Os the
* data will actually be the same in io_data and
* io_orig_data, so all we have to do is issue this as
* a raw ARC read.
*/
if (do_raw) {
zio_flags |= ZIO_FLAG_RAW;
ASSERT3U(zio->io_size, ==, zio->io_orig_size);
ASSERT0(bcmp(zio->io_data, zio->io_orig_data,
zio->io_size));
ASSERT3P(zio->io_transform_stack, ==, NULL);
}
error = arc_read(NULL, spa, &blk,
arc_getbuf_func, &abuf, ZIO_PRIORITY_SYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
&aflags, &zio->io_bookmark);
zio_flags, &aflags, &zio->io_bookmark);
if (error == 0) {
if (arc_buf_size(abuf) != zio->io_orig_size ||
@ -2511,6 +2543,7 @@ zio_ddt_write(zio_t *zio)
ASSERT(BP_GET_DEDUP(bp));
ASSERT(BP_GET_CHECKSUM(bp) == zp->zp_checksum);
ASSERT(BP_IS_HOLE(bp) || zio->io_bp_override);
ASSERT(!(zio->io_bp_override && (zio->io_flags & ZIO_FLAG_RAW)));
ddt_enter(ddt);
dde = ddt_lookup(ddt, bp, B_TRUE);
@ -2531,7 +2564,9 @@ zio_ddt_write(zio_t *zio)
BP_ZERO(bp);
} else {
zp->zp_dedup = B_FALSE;
BP_SET_DEDUP(bp, B_FALSE);
}
ASSERT(!BP_GET_DEDUP(bp));
zio->io_pipeline = ZIO_WRITE_PIPELINE;
ddt_exit(ddt);
return (ZIO_PIPELINE_CONTINUE);
@ -2564,7 +2599,7 @@ zio_ddt_write(zio_t *zio)
}
dio = zio_write(zio, spa, txg, bp, zio->io_orig_data,
zio->io_orig_size, &czp, NULL, NULL,
zio->io_orig_size, zio->io_orig_size, &czp, NULL, NULL,
NULL, zio_ddt_ditto_write_done, dde, zio->io_priority,
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
@ -2586,7 +2621,7 @@ zio_ddt_write(zio_t *zio)
ddt_phys_addref(ddp);
} else {
cio = zio_write(zio, spa, txg, bp, zio->io_orig_data,
zio->io_orig_size, zp,
zio->io_orig_size, zio->io_orig_size, zp,
zio_ddt_child_write_ready, NULL, NULL,
zio_ddt_child_write_done, dde, zio->io_priority,
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);