4757 ZFS embedded-data block pointers ("zero block compression")
4913 zfs release should not be subject to space checks Reviewed by: Adam Leventhal <ahl@delphix.com> Reviewed by: Max Grossman <max.grossman@delphix.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Christopher Siden <christopher.siden@delphix.com> Reviewed by: Dan McDonald <danmcd@omniti.com> Approved by: Dan McDonald <danmcd@omniti.com> illumos/illumos-dist@5d7b4d438c
This commit is contained in:
parent
899505ad7d
commit
ee3ed92a53
@ -1032,8 +1032,17 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
|
||||
return;
|
||||
}
|
||||
|
||||
blkbuf[0] = '\0';
|
||||
if (BP_IS_EMBEDDED(bp)) {
|
||||
(void) sprintf(blkbuf,
|
||||
"EMBEDDED et=%u %llxL/%llxP B=%llu",
|
||||
(int)BPE_GET_ETYPE(bp),
|
||||
(u_longlong_t)BPE_GET_LSIZE(bp),
|
||||
(u_longlong_t)BPE_GET_PSIZE(bp),
|
||||
(u_longlong_t)bp->blk_birth);
|
||||
return;
|
||||
}
|
||||
|
||||
blkbuf[0] = '\0';
|
||||
for (int i = 0; i < ndvas; i++)
|
||||
(void) snprintf(blkbuf + strlen(blkbuf),
|
||||
buflen - strlen(blkbuf), "%llu:%llx:%llx ",
|
||||
@ -1051,7 +1060,7 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
|
||||
"%llxL/%llxP F=%llu B=%llu/%llu",
|
||||
(u_longlong_t)BP_GET_LSIZE(bp),
|
||||
(u_longlong_t)BP_GET_PSIZE(bp),
|
||||
(u_longlong_t)bp->blk_fill,
|
||||
(u_longlong_t)BP_GET_FILL(bp),
|
||||
(u_longlong_t)bp->blk_birth,
|
||||
(u_longlong_t)BP_PHYSICAL_BIRTH(bp));
|
||||
}
|
||||
@ -1064,8 +1073,10 @@ print_indirect(blkptr_t *bp, const zbookmark_t *zb,
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
int l;
|
||||
|
||||
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
|
||||
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
|
||||
if (!BP_IS_EMBEDDED(bp)) {
|
||||
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
|
||||
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
|
||||
}
|
||||
|
||||
(void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
|
||||
|
||||
@ -1119,10 +1130,10 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
|
||||
err = visit_indirect(spa, dnp, cbp, &czb);
|
||||
if (err)
|
||||
break;
|
||||
fill += cbp->blk_fill;
|
||||
fill += BP_GET_FILL(cbp);
|
||||
}
|
||||
if (!err)
|
||||
ASSERT3U(fill, ==, bp->blk_fill);
|
||||
ASSERT3U(fill, ==, BP_GET_FILL(bp));
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
}
|
||||
|
||||
@ -1789,14 +1800,14 @@ dump_dir(objset_t *os)
|
||||
|
||||
if (dds.dds_type == DMU_OST_META) {
|
||||
dds.dds_creation_txg = TXG_INITIAL;
|
||||
usedobjs = os->os_rootbp->blk_fill;
|
||||
usedobjs = BP_GET_FILL(os->os_rootbp);
|
||||
refdbytes = os->os_spa->spa_dsl_pool->
|
||||
dp_mos_dir->dd_phys->dd_used_bytes;
|
||||
} else {
|
||||
dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
|
||||
}
|
||||
|
||||
ASSERT3U(usedobjs, ==, os->os_rootbp->blk_fill);
|
||||
ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
|
||||
|
||||
zdb_nicenum(refdbytes, numbuf);
|
||||
|
||||
@ -2107,6 +2118,9 @@ typedef struct zdb_cb {
|
||||
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
|
||||
uint64_t zcb_dedup_asize;
|
||||
uint64_t zcb_dedup_blocks;
|
||||
uint64_t zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
|
||||
uint64_t zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
|
||||
[BPE_PAYLOAD_SIZE];
|
||||
uint64_t zcb_start;
|
||||
uint64_t zcb_lastprint;
|
||||
uint64_t zcb_totalasize;
|
||||
@ -2161,6 +2175,13 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
|
||||
}
|
||||
|
||||
if (BP_IS_EMBEDDED(bp)) {
|
||||
zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
|
||||
zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
|
||||
[BPE_GET_PSIZE(bp)]++;
|
||||
return;
|
||||
}
|
||||
|
||||
if (dump_opt['L'])
|
||||
return;
|
||||
|
||||
@ -2258,7 +2279,8 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
|
||||
is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
|
||||
|
||||
if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
|
||||
if (!BP_IS_EMBEDDED(bp) &&
|
||||
(dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
|
||||
size_t size = BP_GET_PSIZE(bp);
|
||||
void *data = zio_data_buf_alloc(size);
|
||||
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
|
||||
@ -2450,7 +2472,7 @@ dump_block_stats(spa_t *spa)
|
||||
zdb_blkstats_t *zb, *tzb;
|
||||
uint64_t norm_alloc, norm_space, total_alloc, total_found;
|
||||
int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
|
||||
int leaks = 0;
|
||||
boolean_t leaks = B_FALSE;
|
||||
|
||||
(void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
|
||||
(dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
|
||||
@ -2538,7 +2560,7 @@ dump_block_stats(spa_t *spa)
|
||||
(u_longlong_t)total_alloc,
|
||||
(dump_opt['L']) ? "unreachable" : "leaked",
|
||||
(longlong_t)(total_alloc - total_found));
|
||||
leaks = 1;
|
||||
leaks = B_TRUE;
|
||||
}
|
||||
|
||||
if (tzb->zb_count == 0)
|
||||
@ -2570,6 +2592,23 @@ dump_block_stats(spa_t *spa)
|
||||
(void) printf("\tSPA allocated: %10llu used: %5.2f%%\n",
|
||||
(u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
|
||||
|
||||
for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
|
||||
if (zcb.zcb_embedded_blocks[i] == 0)
|
||||
continue;
|
||||
(void) printf("\n");
|
||||
(void) printf("\tadditional, non-pointer bps of type %u: "
|
||||
"%10llu\n",
|
||||
i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
|
||||
|
||||
if (dump_opt['b'] >= 3) {
|
||||
(void) printf("\t number of (compressed) bytes: "
|
||||
"number of bps\n");
|
||||
dump_histogram(zcb.zcb_embedded_histogram[i],
|
||||
sizeof (zcb.zcb_embedded_histogram[i]) /
|
||||
sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (tzb->zb_ditto_samevdev != 0) {
|
||||
(void) printf("\tDittoed blocks on same vdev: %llu\n",
|
||||
(longlong_t)tzb->zb_ditto_samevdev);
|
||||
@ -2682,14 +2721,14 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
avl_index_t where;
|
||||
zdb_ddt_entry_t *zdde, zdde_search;
|
||||
|
||||
if (BP_IS_HOLE(bp))
|
||||
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
|
||||
return (0);
|
||||
|
||||
if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
|
||||
(void) printf("traversing objset %llu, %llu objects, "
|
||||
"%lu blocks so far\n",
|
||||
(u_longlong_t)zb->zb_objset,
|
||||
(u_longlong_t)bp->blk_fill,
|
||||
(u_longlong_t)BP_GET_FILL(bp),
|
||||
avl_numnodes(t));
|
||||
}
|
||||
|
||||
|
@ -256,9 +256,9 @@ get_usage(zfs_help_t idx)
|
||||
case HELP_ROLLBACK:
|
||||
return (gettext("\trollback [-rRf] <snapshot>\n"));
|
||||
case HELP_SEND:
|
||||
return (gettext("\tsend [-DnPpRv] [-[iI] snapshot] "
|
||||
return (gettext("\tsend [-DnPpRve] [-[iI] snapshot] "
|
||||
"<snapshot>\n"
|
||||
"\tsend [-i snapshot|bookmark] "
|
||||
"\tsend [-e] [-i snapshot|bookmark] "
|
||||
"<filesystem|volume|snapshot>\n"));
|
||||
case HELP_SET:
|
||||
return (gettext("\tset <property=value> "
|
||||
@ -573,6 +573,7 @@ finish_progress(char *done)
|
||||
free(pt_header);
|
||||
pt_header = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs clone [-p] [-o prop=value] ... <snap> <fs | vol>
|
||||
*
|
||||
@ -3299,6 +3300,7 @@ rollback_check_dependent(zfs_handle_t *zhp, void *data)
|
||||
zfs_close(zhp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Report any snapshots more recent than the one specified. Used when '-r' is
|
||||
* not specified. We reuse this same callback for the snapshot dependents - if
|
||||
@ -3638,7 +3640,7 @@ zfs_do_send(int argc, char **argv)
|
||||
boolean_t extraverbose = B_FALSE;
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, ":i:I:RDpvnP")) != -1) {
|
||||
while ((c = getopt(argc, argv, ":i:I:RDpvnPe")) != -1) {
|
||||
switch (c) {
|
||||
case 'i':
|
||||
if (fromname)
|
||||
@ -3673,6 +3675,9 @@ zfs_do_send(int argc, char **argv)
|
||||
case 'n':
|
||||
flags.dryrun = B_TRUE;
|
||||
break;
|
||||
case 'e':
|
||||
flags.embed_data = B_TRUE;
|
||||
break;
|
||||
case ':':
|
||||
(void) fprintf(stderr, gettext("missing argument for "
|
||||
"'%c' option\n"), optopt);
|
||||
@ -3711,6 +3716,7 @@ zfs_do_send(int argc, char **argv)
|
||||
if (strchr(argv[0], '@') == NULL ||
|
||||
(fromname && strchr(fromname, '#') != NULL)) {
|
||||
char frombuf[ZFS_MAXNAMELEN];
|
||||
enum lzc_send_flags lzc_flags = 0;
|
||||
|
||||
if (flags.replicate || flags.doall || flags.props ||
|
||||
flags.dedup || flags.dryrun || flags.verbose ||
|
||||
@ -3725,6 +3731,9 @@ zfs_do_send(int argc, char **argv)
|
||||
if (zhp == NULL)
|
||||
return (1);
|
||||
|
||||
if (flags.embed_data)
|
||||
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
|
||||
|
||||
if (fromname != NULL &&
|
||||
(fromname[0] == '#' || fromname[0] == '@')) {
|
||||
/*
|
||||
@ -3738,7 +3747,7 @@ zfs_do_send(int argc, char **argv)
|
||||
(void) strlcat(frombuf, fromname, sizeof (frombuf));
|
||||
fromname = frombuf;
|
||||
}
|
||||
err = zfs_send_one(zhp, fromname, STDOUT_FILENO);
|
||||
err = zfs_send_one(zhp, fromname, STDOUT_FILENO, lzc_flags);
|
||||
zfs_close(zhp);
|
||||
return (err != 0);
|
||||
}
|
||||
|
@ -49,7 +49,6 @@
|
||||
*/
|
||||
#define DUMP_GROUPING 4
|
||||
|
||||
uint64_t drr_record_count[DRR_NUMTYPES];
|
||||
uint64_t total_write_size = 0;
|
||||
uint64_t total_stream_len = 0;
|
||||
FILE *send_stream = 0;
|
||||
@ -123,7 +122,7 @@ print_block(char *buf, int length)
|
||||
* Start printing ASCII characters at a constant offset, after
|
||||
* the hex prints. Leave 3 characters per byte on a line (2 digit
|
||||
* hex number plus 1 space) plus spaces between characters and
|
||||
* groupings
|
||||
* groupings.
|
||||
*/
|
||||
int ascii_start = BYTES_PER_LINE * 3 +
|
||||
BYTES_PER_LINE / DUMP_GROUPING + 2;
|
||||
@ -160,6 +159,8 @@ int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
char *buf = malloc(INITIAL_BUFLEN);
|
||||
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
|
||||
uint64_t total_records = 0;
|
||||
dmu_replay_record_t thedrr;
|
||||
dmu_replay_record_t *drr = &thedrr;
|
||||
struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
|
||||
@ -170,6 +171,7 @@ main(int argc, char *argv[])
|
||||
struct drr_write_byref *drrwbr = &thedrr.drr_u.drr_write_byref;
|
||||
struct drr_free *drrf = &thedrr.drr_u.drr_free;
|
||||
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
|
||||
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
|
||||
char c;
|
||||
boolean_t verbose = B_FALSE;
|
||||
boolean_t first = B_TRUE;
|
||||
@ -264,6 +266,7 @@ main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
drr_record_count[drr->drr_type]++;
|
||||
total_records++;
|
||||
|
||||
switch (drr->drr_type) {
|
||||
case DRR_BEGIN:
|
||||
@ -376,8 +379,8 @@ main(int argc, char *argv[])
|
||||
drro->drr_bonuslen);
|
||||
}
|
||||
if (drro->drr_bonuslen > 0) {
|
||||
(void) ssread(buf, P2ROUNDUP(drro->drr_bonuslen,
|
||||
8), &zc);
|
||||
(void) ssread(buf,
|
||||
P2ROUNDUP(drro->drr_bonuslen, 8), &zc);
|
||||
if (dump) {
|
||||
print_block(buf,
|
||||
P2ROUNDUP(drro->drr_bonuslen, 8));
|
||||
@ -506,6 +509,38 @@ main(int argc, char *argv[])
|
||||
print_block(buf, drrs->drr_length);
|
||||
}
|
||||
break;
|
||||
case DRR_WRITE_EMBEDDED:
|
||||
if (do_byteswap) {
|
||||
drrwe->drr_object =
|
||||
BSWAP_64(drrwe->drr_object);
|
||||
drrwe->drr_offset =
|
||||
BSWAP_64(drrwe->drr_offset);
|
||||
drrwe->drr_length =
|
||||
BSWAP_64(drrwe->drr_length);
|
||||
drrwe->drr_toguid =
|
||||
BSWAP_64(drrwe->drr_toguid);
|
||||
drrwe->drr_lsize =
|
||||
BSWAP_32(drrwe->drr_lsize);
|
||||
drrwe->drr_psize =
|
||||
BSWAP_32(drrwe->drr_psize);
|
||||
}
|
||||
if (verbose) {
|
||||
(void) printf("WRITE_EMBEDDED object = %llu "
|
||||
"offset = %llu length = %llu\n"
|
||||
"toguid = %llx comp = %u etype = %u "
|
||||
"lsize = %u psize = %u\n",
|
||||
(u_longlong_t)drrwe->drr_object,
|
||||
(u_longlong_t)drrwe->drr_offset,
|
||||
(u_longlong_t)drrwe->drr_length,
|
||||
(u_longlong_t)drrwe->drr_toguid,
|
||||
drrwe->drr_compression,
|
||||
drrwe->drr_etype,
|
||||
drrwe->drr_lsize,
|
||||
drrwe->drr_psize);
|
||||
}
|
||||
(void) ssread(buf,
|
||||
P2ROUNDUP(drrwe->drr_psize, 8), &zc);
|
||||
break;
|
||||
}
|
||||
pcksum = zc;
|
||||
}
|
||||
@ -524,18 +559,16 @@ main(int argc, char *argv[])
|
||||
(u_longlong_t)drr_record_count[DRR_FREEOBJECTS]);
|
||||
(void) printf("\tTotal DRR_WRITE records = %lld\n",
|
||||
(u_longlong_t)drr_record_count[DRR_WRITE]);
|
||||
(void) printf("\tTotal DRR_WRITE_BYREF records = %lld\n",
|
||||
(u_longlong_t)drr_record_count[DRR_WRITE_BYREF]);
|
||||
(void) printf("\tTotal DRR_WRITE_EMBEDDED records = %lld\n",
|
||||
(u_longlong_t)drr_record_count[DRR_WRITE_EMBEDDED]);
|
||||
(void) printf("\tTotal DRR_FREE records = %lld\n",
|
||||
(u_longlong_t)drr_record_count[DRR_FREE]);
|
||||
(void) printf("\tTotal DRR_SPILL records = %lld\n",
|
||||
(u_longlong_t)drr_record_count[DRR_SPILL]);
|
||||
(void) printf("\tTotal records = %lld\n",
|
||||
(u_longlong_t)(drr_record_count[DRR_BEGIN] +
|
||||
drr_record_count[DRR_OBJECT] +
|
||||
drr_record_count[DRR_FREEOBJECTS] +
|
||||
drr_record_count[DRR_WRITE] +
|
||||
drr_record_count[DRR_FREE] +
|
||||
drr_record_count[DRR_SPILL] +
|
||||
drr_record_count[DRR_END]));
|
||||
(u_longlong_t)total_records);
|
||||
(void) printf("\tTotal write size = %lld (0x%llx)\n",
|
||||
(u_longlong_t)total_write_size, (u_longlong_t)total_write_size);
|
||||
(void) printf("\tTotal stream length = %lld (0x%llx)\n",
|
||||
|
@ -52,7 +52,7 @@
|
||||
* At random times, the child self-immolates with a SIGKILL.
|
||||
* This is the software equivalent of pulling the power cord.
|
||||
* The parent then runs the test again, using the existing
|
||||
* storage pool, as many times as desired. If backwards compatability
|
||||
* storage pool, as many times as desired. If backwards compatibility
|
||||
* testing is enabled ztest will sometimes run the "older" version
|
||||
* of ztest after a SIGKILL.
|
||||
*
|
||||
@ -1265,13 +1265,13 @@ static void
|
||||
ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
|
||||
uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
|
||||
{
|
||||
ASSERT(bt->bt_magic == BT_MAGIC);
|
||||
ASSERT(bt->bt_objset == dmu_objset_id(os));
|
||||
ASSERT(bt->bt_object == object);
|
||||
ASSERT(bt->bt_offset == offset);
|
||||
ASSERT(bt->bt_gen <= gen);
|
||||
ASSERT(bt->bt_txg <= txg);
|
||||
ASSERT(bt->bt_crtxg == crtxg);
|
||||
ASSERT3U(bt->bt_magic, ==, BT_MAGIC);
|
||||
ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
|
||||
ASSERT3U(bt->bt_object, ==, object);
|
||||
ASSERT3U(bt->bt_offset, ==, offset);
|
||||
ASSERT3U(bt->bt_gen, <=, gen);
|
||||
ASSERT3U(bt->bt_txg, <=, txg);
|
||||
ASSERT3U(bt->bt_crtxg, ==, crtxg);
|
||||
}
|
||||
|
||||
static ztest_block_tag_t *
|
||||
@ -3470,6 +3470,11 @@ ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
|
||||
if (error)
|
||||
fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
|
||||
error = dsl_dataset_promote(clone2name, NULL);
|
||||
if (error == ENOSPC) {
|
||||
dmu_objset_disown(os, FTAG);
|
||||
ztest_record_enospc(FTAG);
|
||||
goto out;
|
||||
}
|
||||
if (error != EBUSY)
|
||||
fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
|
||||
error);
|
||||
@ -3625,11 +3630,19 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
|
||||
return;
|
||||
}
|
||||
|
||||
dmu_object_set_checksum(os, bigobj,
|
||||
(enum zio_checksum)ztest_random_dsl_prop(ZFS_PROP_CHECKSUM), tx);
|
||||
enum zio_checksum cksum;
|
||||
do {
|
||||
cksum = (enum zio_checksum)
|
||||
ztest_random_dsl_prop(ZFS_PROP_CHECKSUM);
|
||||
} while (cksum >= ZIO_CHECKSUM_LEGACY_FUNCTIONS);
|
||||
dmu_object_set_checksum(os, bigobj, cksum, tx);
|
||||
|
||||
dmu_object_set_compress(os, bigobj,
|
||||
(enum zio_compress)ztest_random_dsl_prop(ZFS_PROP_COMPRESSION), tx);
|
||||
enum zio_compress comp;
|
||||
do {
|
||||
comp = (enum zio_compress)
|
||||
ztest_random_dsl_prop(ZFS_PROP_COMPRESSION);
|
||||
} while (comp >= ZIO_COMPRESS_LEGACY_FUNCTIONS);
|
||||
dmu_object_set_compress(os, bigobj, comp, tx);
|
||||
|
||||
/*
|
||||
* For each index from n to n + s, verify that the existing bufwad
|
||||
@ -4709,8 +4722,13 @@ ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
|
||||
error = dsl_dataset_user_hold(holds, 0, NULL);
|
||||
fnvlist_free(holds);
|
||||
|
||||
if (error)
|
||||
fatal(0, "dsl_dataset_user_hold(%s)", fullname, tag);
|
||||
if (error == ENOSPC) {
|
||||
ztest_record_enospc("dsl_dataset_user_hold");
|
||||
goto out;
|
||||
} else if (error) {
|
||||
fatal(0, "dsl_dataset_user_hold(%s, %s) = %u",
|
||||
fullname, tag, error);
|
||||
}
|
||||
|
||||
error = dsl_destroy_snapshot(fullname, B_FALSE);
|
||||
if (error != EBUSY) {
|
||||
@ -5163,7 +5181,7 @@ ztest_run_zdb(char *pool)
|
||||
isa = strdup(isa);
|
||||
/* LINTED */
|
||||
(void) sprintf(bin,
|
||||
"/usr/sbin%.*s/zdb -bcc%s%s -U %s %s",
|
||||
"/usr/sbin%.*s/zdb -bcc%s%s -d -U %s %s",
|
||||
isalen,
|
||||
isa,
|
||||
ztest_opts.zo_verbose >= 3 ? "s" : "",
|
||||
|
@ -215,4 +215,9 @@ zpool_feature_init(void)
|
||||
"com.joyent:filesystem_limits", "filesystem_limits",
|
||||
"Filesystem and snapshot limits.", B_TRUE, B_FALSE, B_FALSE,
|
||||
filesystem_limits_deps);
|
||||
|
||||
zfeature_register(SPA_FEATURE_EMBEDDED_DATA,
|
||||
"com.delphix:embedded_data", "embedded_data",
|
||||
"Blocks which compress very well use even less space.",
|
||||
B_FALSE, B_TRUE, B_TRUE, NULL);
|
||||
}
|
||||
|
@ -48,6 +48,7 @@ typedef enum spa_feature {
|
||||
SPA_FEATURE_ENABLED_TXG,
|
||||
SPA_FEATURE_HOLE_BIRTH,
|
||||
SPA_FEATURE_EXTENSIBLE_DATASET,
|
||||
SPA_FEATURE_EMBEDDED_DATA,
|
||||
SPA_FEATURE_BOOKMARKS,
|
||||
SPA_FEATURE_FS_SS_LIMIT,
|
||||
SPA_FEATURES
|
||||
@ -68,7 +69,7 @@ typedef struct zfeature_info {
|
||||
const spa_feature_t *fi_depends;
|
||||
} zfeature_info_t;
|
||||
|
||||
typedef int (zfeature_func_t)(zfeature_info_t *fi, void *arg);
|
||||
typedef int (zfeature_func_t)(zfeature_info_t *, void *);
|
||||
|
||||
#define ZFS_FEATURE_DEBUG
|
||||
|
||||
@ -77,8 +78,8 @@ extern zfeature_info_t spa_feature_table[SPA_FEATURES];
|
||||
extern boolean_t zfeature_is_valid_guid(const char *);
|
||||
|
||||
extern boolean_t zfeature_is_supported(const char *);
|
||||
extern int zfeature_lookup_name(const char *name, spa_feature_t *res);
|
||||
extern boolean_t zfeature_depends_on(spa_feature_t fid, spa_feature_t check);
|
||||
extern int zfeature_lookup_name(const char *, spa_feature_t *);
|
||||
extern boolean_t zfeature_depends_on(spa_feature_t, spa_feature_t);
|
||||
|
||||
extern void zpool_feature_init(void);
|
||||
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/avl.h>
|
||||
#include <ucred.h>
|
||||
#include <libzfs_core.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -589,13 +590,16 @@ typedef struct sendflags {
|
||||
|
||||
/* show progress (ie. -v) */
|
||||
boolean_t progress;
|
||||
|
||||
/* WRITE_EMBEDDED records of type DATA are permitted */
|
||||
boolean_t embed_data;
|
||||
} sendflags_t;
|
||||
|
||||
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
|
||||
|
||||
extern int zfs_send(zfs_handle_t *, const char *, const char *,
|
||||
sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
|
||||
extern int zfs_send_one(zfs_handle_t *, const char *, int);
|
||||
extern int zfs_send_one(zfs_handle_t *, const char *, int, enum lzc_send_flags);
|
||||
|
||||
extern int zfs_promote(zfs_handle_t *);
|
||||
extern int zfs_hold(zfs_handle_t *, const char *, const char *,
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||
*/
|
||||
@ -42,6 +42,7 @@
|
||||
#include <time.h>
|
||||
|
||||
#include <libzfs.h>
|
||||
#include <libzfs_core.h>
|
||||
|
||||
#include "zfs_namecheck.h"
|
||||
#include "zfs_prop.h"
|
||||
@ -213,6 +214,7 @@ cksummer(void *arg)
|
||||
struct drr_object *drro = &thedrr.drr_u.drr_object;
|
||||
struct drr_write *drrw = &thedrr.drr_u.drr_write;
|
||||
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
|
||||
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
|
||||
FILE *ofp;
|
||||
int outfd;
|
||||
dmu_replay_record_t wbr_drr = {0};
|
||||
@ -409,6 +411,20 @@ cksummer(void *arg)
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_WRITE_EMBEDDED:
|
||||
{
|
||||
if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
|
||||
&stream_cksum, outfd) == -1)
|
||||
goto out;
|
||||
(void) ssread(buf,
|
||||
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
|
||||
if (cksum_and_write(buf,
|
||||
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
|
||||
&stream_cksum, outfd) == -1)
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
case DRR_FREE:
|
||||
{
|
||||
if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
|
||||
@ -790,7 +806,7 @@ typedef struct send_dump_data {
|
||||
char prevsnap[ZFS_MAXNAMELEN];
|
||||
uint64_t prevsnap_obj;
|
||||
boolean_t seenfrom, seento, replicate, doall, fromorigin;
|
||||
boolean_t verbose, dryrun, parsable, progress;
|
||||
boolean_t verbose, dryrun, parsable, progress, embed_data;
|
||||
int outfd;
|
||||
boolean_t err;
|
||||
nvlist_t *fss;
|
||||
@ -870,7 +886,8 @@ estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
|
||||
*/
|
||||
static int
|
||||
dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
|
||||
boolean_t fromorigin, int outfd, nvlist_t *debugnv)
|
||||
boolean_t fromorigin, int outfd, enum lzc_send_flags flags,
|
||||
nvlist_t *debugnv)
|
||||
{
|
||||
zfs_cmd_t zc = { 0 };
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
@ -884,6 +901,7 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
|
||||
zc.zc_obj = fromorigin;
|
||||
zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
|
||||
zc.zc_fromobj = fromsnap_obj;
|
||||
zc.zc_flags = flags;
|
||||
|
||||
VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
|
||||
if (fromsnap && fromsnap[0] != '\0') {
|
||||
@ -1134,8 +1152,12 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
|
||||
}
|
||||
}
|
||||
|
||||
enum lzc_send_flags flags = 0;
|
||||
if (sdd->embed_data)
|
||||
flags |= LZC_SEND_FLAG_EMBED_DATA;
|
||||
|
||||
err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
|
||||
fromorigin, sdd->outfd, sdd->debugnv);
|
||||
fromorigin, sdd->outfd, flags, sdd->debugnv);
|
||||
|
||||
if (sdd->progress) {
|
||||
(void) pthread_cancel(tid);
|
||||
@ -1479,6 +1501,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
|
||||
sdd.parsable = flags->parsable;
|
||||
sdd.progress = flags->progress;
|
||||
sdd.dryrun = flags->dryrun;
|
||||
sdd.embed_data = flags->embed_data;
|
||||
sdd.filter_cb = filter_func;
|
||||
sdd.filter_cb_arg = cb_arg;
|
||||
if (debugnvp)
|
||||
@ -1610,7 +1633,8 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
|
||||
}
|
||||
|
||||
int
|
||||
zfs_send_one(zfs_handle_t *zhp, const char *from, int fd)
|
||||
zfs_send_one(zfs_handle_t *zhp, const char *from, int fd,
|
||||
enum lzc_send_flags flags)
|
||||
{
|
||||
int err;
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
@ -1619,7 +1643,7 @@ zfs_send_one(zfs_handle_t *zhp, const char *from, int fd)
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"warning: cannot send '%s'"), zhp->zfs_name);
|
||||
|
||||
err = lzc_send(zhp->zfs_name, from, fd);
|
||||
err = lzc_send(zhp->zfs_name, from, fd, flags);
|
||||
if (err != 0) {
|
||||
switch (errno) {
|
||||
case EXDEV:
|
||||
@ -2537,6 +2561,16 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
|
||||
(void) recv_read(hdl, fd, buf,
|
||||
drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
|
||||
break;
|
||||
case DRR_WRITE_EMBEDDED:
|
||||
if (byteswap) {
|
||||
drr->drr_u.drr_write_embedded.drr_psize =
|
||||
BSWAP_32(drr->drr_u.drr_write_embedded.
|
||||
drr_psize);
|
||||
}
|
||||
(void) recv_read(hdl, fd, buf,
|
||||
P2ROUNDUP(drr->drr_u.drr_write_embedded.drr_psize,
|
||||
8), B_FALSE, NULL);
|
||||
break;
|
||||
case DRR_WRITE_BYREF:
|
||||
case DRR_FREEOBJECTS:
|
||||
case DRR_FREE:
|
||||
|
@ -439,6 +439,8 @@ lzc_get_holds(const char *snapname, nvlist_t **holdsp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a zfs send stream for the specified snapshot and write it to
|
||||
* the specified file descriptor.
|
||||
*
|
||||
* "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
|
||||
*
|
||||
@ -452,9 +454,15 @@ lzc_get_holds(const char *snapname, nvlist_t **holdsp)
|
||||
* snapshot in the origin, etc.
|
||||
*
|
||||
* "fd" is the file descriptor to write the send stream to.
|
||||
*
|
||||
* If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
|
||||
* to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
|
||||
* which the receiving system must support (as indicated by support
|
||||
* for the "embedded_data" feature).
|
||||
*/
|
||||
int
|
||||
lzc_send(const char *snapname, const char *from, int fd)
|
||||
lzc_send(const char *snapname, const char *from, int fd,
|
||||
enum lzc_send_flags flags)
|
||||
{
|
||||
nvlist_t *args;
|
||||
int err;
|
||||
@ -463,6 +471,8 @@ lzc_send(const char *snapname, const char *from, int fd)
|
||||
fnvlist_add_int32(args, "fd", fd);
|
||||
if (from != NULL)
|
||||
fnvlist_add_string(args, "fromsnap", from);
|
||||
if (flags & LZC_SEND_FLAG_EMBED_DATA)
|
||||
fnvlist_add_boolean(args, "embedok");
|
||||
err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
|
||||
nvlist_free(args);
|
||||
return (err);
|
||||
|
@ -52,7 +52,11 @@ int lzc_hold(nvlist_t *, int, nvlist_t **);
|
||||
int lzc_release(nvlist_t *, nvlist_t **);
|
||||
int lzc_get_holds(const char *, nvlist_t **);
|
||||
|
||||
int lzc_send(const char *, const char *, int);
|
||||
enum lzc_send_flags {
|
||||
LZC_SEND_FLAG_EMBED_DATA = 1 << 0
|
||||
};
|
||||
|
||||
int lzc_send(const char *, const char *, int, enum lzc_send_flags);
|
||||
int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
|
||||
int lzc_send_space(const char *, const char *, uint64_t *);
|
||||
|
||||
|
@ -176,12 +176,12 @@ zfs \- configures ZFS file systems
|
||||
|
||||
.LP
|
||||
.nf
|
||||
\fBzfs\fR \fBsend\fR [\fB-DnPpRv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
|
||||
\fBzfs\fR \fBsend\fR [\fB-DnPpRve\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
|
||||
.fi
|
||||
|
||||
.LP
|
||||
.nf
|
||||
\fBzfs\fR \fBsend\fR [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
|
||||
\fBzfs\fR \fBsend\fR [\fB-e\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
|
||||
.fi
|
||||
|
||||
.LP
|
||||
@ -2923,7 +2923,7 @@ See \fBzpool-features\fR(5) for details on ZFS feature flags and the
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs send\fR [\fB-DnPpRv\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
|
||||
\fBzfs send\fR [\fB-DnPpRve\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
|
||||
.ad
|
||||
.sp .6
|
||||
.RS 4n
|
||||
@ -2996,6 +2996,23 @@ will be much better if the filesystem uses a dedup-capable checksum (eg.
|
||||
\fBsha256\fR).
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fB\fB-e\fR\fR
|
||||
.ad
|
||||
.sp .6
|
||||
.RS 4n
|
||||
Generate a more compact stream by using WRITE_EMBEDDED records for blocks
|
||||
which are stored more compactly on disk by the \fBembedded_data\fR pool
|
||||
feature. This flag has no effect if the \fBembedded_data\fR feature is
|
||||
disabled. The receiving system must have the \fBembedded_data\fR feature
|
||||
enabled. If the \fBlz4_compress\fR feature is active on the sending system,
|
||||
then the receiving system must have that feature enabled as well. See
|
||||
\fBzpool-features\fR(5) for details on ZFS feature flags and the
|
||||
\fBembedded_data\fR feature.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
@ -3047,7 +3064,7 @@ on future versions of \fBZFS\fR.
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs send\fR [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
|
||||
\fBzfs send\fR [\fB-e\fR] [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
|
||||
.ad
|
||||
.sp .6
|
||||
.RS 4n
|
||||
@ -3075,6 +3092,22 @@ be the origin snapshot, or an earlier snapshot in the origin's filesystem,
|
||||
or the origin's origin, etc.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fB\fB-e\fR\fR
|
||||
.ad
|
||||
.sp .6
|
||||
.RS 4n
|
||||
Generate a more compact stream by using WRITE_EMBEDDED records for blocks
|
||||
which are stored more compactly on disk by the \fBembedded_data\fR pool
|
||||
feature. This flag has no effect if the \fBembedded_data\fR feature is
|
||||
disabled. The receiving system must have the \fBembedded_data\fR feature
|
||||
enabled. If the \fBlz4_compress\fR feature is active on the sending system,
|
||||
then the receiving system must have that feature enabled as well. See
|
||||
\fBzpool-features\fR(5) for details on ZFS feature flags and the
|
||||
\fBembedded_data\fR feature.
|
||||
.RE
|
||||
.RE
|
||||
|
||||
.sp
|
||||
|
@ -401,6 +401,33 @@ never return to being \fBenabled\fB.
|
||||
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fB\fBembedded_data\fR\fR
|
||||
.ad
|
||||
.RS 4n
|
||||
.TS
|
||||
l l .
|
||||
GUID com.delphix:embedded_data
|
||||
READ\-ONLY COMPATIBLE no
|
||||
DEPENDENCIES none
|
||||
.TE
|
||||
|
||||
This feature improves the performance and compression ratio of
|
||||
highly-compressible blocks. Blocks whose contents can compress to 112 bytes
|
||||
or smaller can take advantage of this feature.
|
||||
|
||||
When this feature is enabled, the contents of highly-compressible blocks are
|
||||
stored in the block "pointer" itself (a misnomer in this case, as it contains
|
||||
the compresseed data, rather than a pointer to its location on disk). Thus
|
||||
the space of the block (one sector, typically 512 bytes or 4KB) is saved,
|
||||
and no additional i/o is needed to read and write the data block.
|
||||
|
||||
This feature becomes \fBactive\fR as soon as it is enabled and will
|
||||
never return to being \fBenabled\fR.
|
||||
|
||||
.RE
|
||||
|
||||
.SH "SEE ALSO"
|
||||
\fBzpool\fR(1M)
|
||||
|
@ -744,8 +744,10 @@ buf_discard_identity(arc_buf_hdr_t *hdr)
|
||||
}
|
||||
|
||||
static arc_buf_hdr_t *
|
||||
buf_hash_find(uint64_t spa, const dva_t *dva, uint64_t birth, kmutex_t **lockp)
|
||||
buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp)
|
||||
{
|
||||
const dva_t *dva = BP_IDENTITY(bp);
|
||||
uint64_t birth = BP_PHYSICAL_BIRTH(bp);
|
||||
uint64_t idx = BUF_HASH_INDEX(spa, dva, birth);
|
||||
kmutex_t *hash_lock = BUF_HASH_LOCK(idx);
|
||||
arc_buf_hdr_t *buf;
|
||||
@ -777,6 +779,8 @@ buf_hash_insert(arc_buf_hdr_t *buf, kmutex_t **lockp)
|
||||
arc_buf_hdr_t *fbuf;
|
||||
uint32_t i;
|
||||
|
||||
ASSERT(!DVA_IS_EMPTY(&buf->b_dva));
|
||||
ASSERT(buf->b_birth != 0);
|
||||
ASSERT(!HDR_IN_HASH_TABLE(buf));
|
||||
*lockp = hash_lock;
|
||||
mutex_enter(hash_lock);
|
||||
@ -2738,10 +2742,10 @@ arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
|
||||
static void
|
||||
arc_read_done(zio_t *zio)
|
||||
{
|
||||
arc_buf_hdr_t *hdr, *found;
|
||||
arc_buf_hdr_t *hdr;
|
||||
arc_buf_t *buf;
|
||||
arc_buf_t *abuf; /* buffer we're assigning to callback */
|
||||
kmutex_t *hash_lock;
|
||||
kmutex_t *hash_lock = NULL;
|
||||
arc_callback_t *callback_list, *acb;
|
||||
int freeable = FALSE;
|
||||
|
||||
@ -2756,12 +2760,22 @@ arc_read_done(zio_t *zio)
|
||||
* reason for it not to be found is if we were freed during the
|
||||
* read.
|
||||
*/
|
||||
found = buf_hash_find(hdr->b_spa, &hdr->b_dva, hdr->b_birth,
|
||||
&hash_lock);
|
||||
if (HDR_IN_HASH_TABLE(hdr)) {
|
||||
ASSERT3U(hdr->b_birth, ==, BP_PHYSICAL_BIRTH(zio->io_bp));
|
||||
ASSERT3U(hdr->b_dva.dva_word[0], ==,
|
||||
BP_IDENTITY(zio->io_bp)->dva_word[0]);
|
||||
ASSERT3U(hdr->b_dva.dva_word[1], ==,
|
||||
BP_IDENTITY(zio->io_bp)->dva_word[1]);
|
||||
|
||||
ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) && hash_lock == NULL) ||
|
||||
(found == hdr && DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))) ||
|
||||
(found == hdr && HDR_L2_READING(hdr)));
|
||||
arc_buf_hdr_t *found = buf_hash_find(hdr->b_spa, zio->io_bp,
|
||||
&hash_lock);
|
||||
|
||||
ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) &&
|
||||
hash_lock == NULL) ||
|
||||
(found == hdr &&
|
||||
DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))) ||
|
||||
(found == hdr && HDR_L2_READING(hdr)));
|
||||
}
|
||||
|
||||
hdr->b_flags &= ~ARC_L2_EVICTED;
|
||||
if (l2arc_noprefetch && (hdr->b_flags & ARC_PREFETCH))
|
||||
@ -2885,16 +2899,25 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
|
||||
void *private, zio_priority_t priority, int zio_flags, uint32_t *arc_flags,
|
||||
const zbookmark_t *zb)
|
||||
{
|
||||
arc_buf_hdr_t *hdr;
|
||||
arc_buf_hdr_t *hdr = NULL;
|
||||
arc_buf_t *buf = NULL;
|
||||
kmutex_t *hash_lock;
|
||||
kmutex_t *hash_lock = NULL;
|
||||
zio_t *rzio;
|
||||
uint64_t guid = spa_load_guid(spa);
|
||||
|
||||
ASSERT(!BP_IS_EMBEDDED(bp) ||
|
||||
BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA);
|
||||
|
||||
top:
|
||||
hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp),
|
||||
&hash_lock);
|
||||
if (hdr && hdr->b_datacnt > 0) {
|
||||
if (!BP_IS_EMBEDDED(bp)) {
|
||||
/*
|
||||
* Embedded BP's have no DVA and require no I/O to "read".
|
||||
* Create an anonymous arc buf to back it.
|
||||
*/
|
||||
hdr = buf_hash_find(guid, bp, &hash_lock);
|
||||
}
|
||||
|
||||
if (hdr != NULL && hdr->b_datacnt > 0) {
|
||||
|
||||
*arc_flags |= ARC_CACHED;
|
||||
|
||||
@ -2968,7 +2991,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
|
||||
done(NULL, buf, private);
|
||||
} else {
|
||||
uint64_t size = BP_GET_LSIZE(bp);
|
||||
arc_callback_t *acb;
|
||||
arc_callback_t *acb;
|
||||
vdev_t *vd = NULL;
|
||||
uint64_t addr = 0;
|
||||
boolean_t devw = B_FALSE;
|
||||
@ -2977,15 +3000,17 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
|
||||
|
||||
if (hdr == NULL) {
|
||||
/* this block is not in the cache */
|
||||
arc_buf_hdr_t *exists;
|
||||
arc_buf_hdr_t *exists = NULL;
|
||||
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
|
||||
buf = arc_buf_alloc(spa, size, private, type);
|
||||
hdr = buf->b_hdr;
|
||||
hdr->b_dva = *BP_IDENTITY(bp);
|
||||
hdr->b_birth = BP_PHYSICAL_BIRTH(bp);
|
||||
hdr->b_cksum0 = bp->blk_cksum.zc_word[0];
|
||||
exists = buf_hash_insert(hdr, &hash_lock);
|
||||
if (exists) {
|
||||
if (!BP_IS_EMBEDDED(bp)) {
|
||||
hdr->b_dva = *BP_IDENTITY(bp);
|
||||
hdr->b_birth = BP_PHYSICAL_BIRTH(bp);
|
||||
hdr->b_cksum0 = bp->blk_cksum.zc_word[0];
|
||||
exists = buf_hash_insert(hdr, &hash_lock);
|
||||
}
|
||||
if (exists != NULL) {
|
||||
/* somebody beat us to the hash insert */
|
||||
mutex_exit(hash_lock);
|
||||
buf_discard_identity(hdr);
|
||||
@ -3057,7 +3082,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
|
||||
vd = NULL;
|
||||
}
|
||||
|
||||
mutex_exit(hash_lock);
|
||||
if (hash_lock != NULL)
|
||||
mutex_exit(hash_lock);
|
||||
|
||||
/*
|
||||
* At this point, we have a level 1 cache miss. Try again in
|
||||
@ -3192,8 +3218,9 @@ arc_freed(spa_t *spa, const blkptr_t *bp)
|
||||
kmutex_t *hash_lock;
|
||||
uint64_t guid = spa_load_guid(spa);
|
||||
|
||||
hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp),
|
||||
&hash_lock);
|
||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
||||
|
||||
hdr = buf_hash_find(guid, bp, &hash_lock);
|
||||
if (hdr == NULL)
|
||||
return;
|
||||
if (HDR_BUF_AVAILABLE(hdr)) {
|
||||
@ -3509,7 +3536,7 @@ arc_write_done(zio_t *zio)
|
||||
ASSERT(hdr->b_acb == NULL);
|
||||
|
||||
if (zio->io_error == 0) {
|
||||
if (BP_IS_HOLE(zio->io_bp)) {
|
||||
if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) {
|
||||
buf_discard_identity(hdr);
|
||||
} else {
|
||||
hdr->b_dva = *BP_IDENTITY(zio->io_bp);
|
||||
@ -3521,10 +3548,10 @@ arc_write_done(zio_t *zio)
|
||||
}
|
||||
|
||||
/*
|
||||
* If the block to be written was all-zero, we may have
|
||||
* compressed it away. In this case no write was performed
|
||||
* so there will be no dva/birth/checksum. The buffer must
|
||||
* therefore remain anonymous (and uncached).
|
||||
* If the block to be written was all-zero or compressed enough to be
|
||||
* embedded in the BP, no write was performed so there will be no
|
||||
* dva/birth/checksum. The buffer must therefore remain anonymous
|
||||
* (and uncached).
|
||||
*/
|
||||
if (!BUF_EMPTY(hdr)) {
|
||||
arc_buf_hdr_t *exists;
|
||||
@ -4818,7 +4845,7 @@ static boolean_t
|
||||
l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr)
|
||||
{
|
||||
void *cdata;
|
||||
size_t csize, len;
|
||||
size_t csize, len, rounded;
|
||||
|
||||
ASSERT(l2hdr->b_compress == ZIO_COMPRESS_OFF);
|
||||
ASSERT(l2hdr->b_tmp_cdata != NULL);
|
||||
@ -4828,6 +4855,12 @@ l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr)
|
||||
csize = zio_compress_data(ZIO_COMPRESS_LZ4, l2hdr->b_tmp_cdata,
|
||||
cdata, l2hdr->b_asize);
|
||||
|
||||
rounded = P2ROUNDUP(csize, (size_t)SPA_MINBLOCKSIZE);
|
||||
if (rounded > csize) {
|
||||
bzero((char *)cdata + csize, rounded - csize);
|
||||
csize = rounded;
|
||||
}
|
||||
|
||||
if (csize == 0) {
|
||||
/* zero block, indicate that there's nothing to write */
|
||||
zio_data_buf_free(cdata, len);
|
||||
|
119
uts/common/fs/zfs/blkptr.c
Normal file
119
uts/common/fs/zfs/blkptr.c
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_compress.h>
|
||||
|
||||
/*
|
||||
* Embedded-data Block Pointers
|
||||
*
|
||||
* Normally, block pointers point (via their DVAs) to a block which holds data.
|
||||
* If the data that we need to store is very small, this is an inefficient
|
||||
* use of space, because a block must be at minimum 1 sector (typically 512
|
||||
* bytes or 4KB). Additionally, reading these small blocks tends to generate
|
||||
* more random reads.
|
||||
*
|
||||
* Embedded-data Block Pointers allow small pieces of data (the "payload",
|
||||
* up to 112 bytes) to be stored in the block pointer itself, instead of
|
||||
* being pointed to. The "Pointer" part of this name is a bit of a
|
||||
* misnomer, as nothing is pointed to.
|
||||
*
|
||||
* BP_EMBEDDED_TYPE_DATA block pointers allow highly-compressible data to
|
||||
* be embedded in the block pointer. The logic for this is handled in
|
||||
* the SPA, by the zio pipeline. Therefore most code outside the zio
|
||||
* pipeline doesn't need special-cases to handle these block pointers.
|
||||
*
|
||||
* See spa.h for details on the exact layout of embedded block pointers.
|
||||
*/
|
||||
|
||||
void
|
||||
encode_embedded_bp_compressed(blkptr_t *bp, void *data,
|
||||
enum zio_compress comp, int uncompressed_size, int compressed_size)
|
||||
{
|
||||
uint64_t *bp64 = (uint64_t *)bp;
|
||||
uint64_t w = 0;
|
||||
uint8_t *data8 = data;
|
||||
|
||||
ASSERT3U(compressed_size, <=, BPE_PAYLOAD_SIZE);
|
||||
ASSERT(uncompressed_size == compressed_size ||
|
||||
comp != ZIO_COMPRESS_OFF);
|
||||
ASSERT3U(comp, >=, ZIO_COMPRESS_OFF);
|
||||
ASSERT3U(comp, <, ZIO_COMPRESS_FUNCTIONS);
|
||||
|
||||
bzero(bp, sizeof (*bp));
|
||||
BP_SET_EMBEDDED(bp, B_TRUE);
|
||||
BP_SET_COMPRESS(bp, comp);
|
||||
BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
|
||||
BPE_SET_LSIZE(bp, uncompressed_size);
|
||||
BPE_SET_PSIZE(bp, compressed_size);
|
||||
|
||||
/*
|
||||
* Encode the byte array into the words of the block pointer.
|
||||
* First byte goes into low bits of first word (little endian).
|
||||
*/
|
||||
for (int i = 0; i < compressed_size; i++) {
|
||||
BF64_SET(w, (i % sizeof (w)) * NBBY, NBBY, data8[i]);
|
||||
if (i % sizeof (w) == sizeof (w) - 1) {
|
||||
/* we've reached the end of a word */
|
||||
ASSERT3P(bp64, <, bp + 1);
|
||||
*bp64 = w;
|
||||
bp64++;
|
||||
if (!BPE_IS_PAYLOADWORD(bp, bp64))
|
||||
bp64++;
|
||||
w = 0;
|
||||
}
|
||||
}
|
||||
/* write last partial word */
|
||||
if (bp64 < (uint64_t *)(bp + 1))
|
||||
*bp64 = w;
|
||||
}
|
||||
|
||||
/*
|
||||
* buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be
|
||||
* more than BPE_PAYLOAD_SIZE bytes).
|
||||
*/
|
||||
void
|
||||
decode_embedded_bp_compressed(const blkptr_t *bp, void *buf)
|
||||
{
|
||||
int psize;
|
||||
uint8_t *buf8 = buf;
|
||||
uint64_t w = 0;
|
||||
const uint64_t *bp64 = (const uint64_t *)bp;
|
||||
|
||||
ASSERT(BP_IS_EMBEDDED(bp));
|
||||
|
||||
psize = BPE_GET_PSIZE(bp);
|
||||
|
||||
/*
|
||||
* Decode the words of the block pointer into the byte array.
|
||||
* Low bits of first word are the first byte (little endian).
|
||||
*/
|
||||
for (int i = 0; i < psize; i++) {
|
||||
if (i % sizeof (w) == 0) {
|
||||
/* beginning of a word */
|
||||
ASSERT3P(bp64, <, bp + 1);
|
||||
w = *bp64;
|
||||
bp64++;
|
||||
if (!BPE_IS_PAYLOADWORD(bp, bp64))
|
||||
bp64++;
|
||||
}
|
||||
buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY);
|
||||
}
|
||||
}
|
@ -192,6 +192,13 @@ bpobj_close(bpobj_t *bpo)
|
||||
mutex_destroy(&bpo->bpo_lock);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
bpobj_hasentries(bpobj_t *bpo)
|
||||
{
|
||||
return (bpo->bpo_phys->bpo_num_blkptrs != 0 ||
|
||||
(bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs != 0));
|
||||
}
|
||||
|
||||
static int
|
||||
bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
|
||||
boolean_t free)
|
||||
@ -332,9 +339,11 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
|
||||
|
||||
out:
|
||||
/* If there are no entries, there should be no bytes. */
|
||||
ASSERT(bpo->bpo_phys->bpo_num_blkptrs > 0 ||
|
||||
(bpo->bpo_havesubobj && bpo->bpo_phys->bpo_num_subobjs > 0) ||
|
||||
bpo->bpo_phys->bpo_bytes == 0);
|
||||
if (!bpobj_hasentries(bpo)) {
|
||||
ASSERT0(bpo->bpo_phys->bpo_bytes);
|
||||
ASSERT0(bpo->bpo_phys->bpo_comp);
|
||||
ASSERT0(bpo->bpo_phys->bpo_uncomp);
|
||||
}
|
||||
|
||||
mutex_exit(&bpo->bpo_lock);
|
||||
return (err);
|
||||
@ -377,7 +386,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
|
||||
VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
|
||||
VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
|
||||
|
||||
if (used == 0) {
|
||||
if (!bpobj_hasentries(&subbpo)) {
|
||||
/* No point in having an empty subobj. */
|
||||
bpobj_close(&subbpo);
|
||||
bpobj_free(bpo->bpo_os, subobj, tx);
|
||||
@ -453,13 +462,29 @@ bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
|
||||
|
||||
if (BP_IS_EMBEDDED(bp)) {
|
||||
/*
|
||||
* The bpobj will compress better without the payload.
|
||||
*
|
||||
* Note that we store EMBEDDED bp's because they have an
|
||||
* uncompressed size, which must be accounted for. An
|
||||
* alternative would be to add their size to bpo_uncomp
|
||||
* without storing the bp, but that would create additional
|
||||
* complications: bpo_uncomp would be inconsistent with the
|
||||
* set of BP's stored, and bpobj_iterate() wouldn't visit
|
||||
* all the space accounted for in the bpobj.
|
||||
*/
|
||||
bzero(&stored_bp, sizeof (stored_bp));
|
||||
stored_bp.blk_prop = bp->blk_prop;
|
||||
stored_bp.blk_birth = bp->blk_birth;
|
||||
} else if (!BP_GET_DEDUP(bp)) {
|
||||
/* The bpobj will compress better without the checksum */
|
||||
bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum));
|
||||
}
|
||||
|
||||
/* We never need the fill count. */
|
||||
stored_bp.blk_fill = 0;
|
||||
|
||||
/* The bpobj will compress better if we can leave off the checksum */
|
||||
if (!BP_GET_DEDUP(bp))
|
||||
bzero(&stored_bp.blk_cksum, sizeof (stored_bp.blk_cksum));
|
||||
|
||||
mutex_enter(&bpo->bpo_lock);
|
||||
|
||||
offset = bpo->bpo_phys->bpo_num_blkptrs * sizeof (stored_bp);
|
||||
|
@ -40,6 +40,8 @@
|
||||
#include <sys/dmu_zfetch.h>
|
||||
#include <sys/sa.h>
|
||||
#include <sys/sa_impl.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/blkptr.h>
|
||||
#include <sys/range_tree.h>
|
||||
|
||||
/*
|
||||
@ -1435,6 +1437,38 @@ dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
mutex_exit(&db->db_mtx);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
|
||||
bp_embedded_type_t etype, enum zio_compress comp,
|
||||
int uncompressed_size, int compressed_size, int byteorder,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
|
||||
struct dirty_leaf *dl;
|
||||
dmu_object_type_t type;
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
type = DB_DNODE(db)->dn_type;
|
||||
DB_DNODE_EXIT(db);
|
||||
|
||||
ASSERT0(db->db_level);
|
||||
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
|
||||
|
||||
dmu_buf_will_not_fill(dbuf, tx);
|
||||
|
||||
ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
|
||||
dl = &db->db_last_dirty->dt.dl;
|
||||
encode_embedded_bp_compressed(&dl->dr_overridden_by,
|
||||
data, comp, uncompressed_size, compressed_size);
|
||||
BPE_SET_ETYPE(&dl->dr_overridden_by, etype);
|
||||
BP_SET_TYPE(&dl->dr_overridden_by, type);
|
||||
BP_SET_LEVEL(&dl->dr_overridden_by, 0);
|
||||
BP_SET_BYTEORDER(&dl->dr_overridden_by, byteorder);
|
||||
|
||||
dl->dr_override_state = DR_OVERRIDDEN;
|
||||
dl->dr_overridden_by.blk_birth = db->db_last_dirty->dr_txg;
|
||||
}
|
||||
|
||||
/*
|
||||
* Directly assign a provided arc buf to a given dbuf if it's not referenced
|
||||
* by anybody except our caller. Otherwise copy arcbuf's contents to dbuf.
|
||||
@ -1819,7 +1853,7 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio)
|
||||
}
|
||||
|
||||
if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) {
|
||||
if (bp && !BP_IS_HOLE(bp)) {
|
||||
if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
||||
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
|
||||
uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
|
||||
zbookmark_t zb;
|
||||
@ -2451,7 +2485,7 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||
uint64_t fill = 0;
|
||||
int i;
|
||||
|
||||
ASSERT(db->db_blkptr == bp);
|
||||
ASSERT3P(db->db_blkptr, ==, bp);
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
@ -2463,7 +2497,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||
ASSERT((db->db_blkid != DMU_SPILL_BLKID &&
|
||||
BP_GET_TYPE(bp) == dn->dn_type) ||
|
||||
(db->db_blkid == DMU_SPILL_BLKID &&
|
||||
BP_GET_TYPE(bp) == dn->dn_bonustype));
|
||||
BP_GET_TYPE(bp) == dn->dn_bonustype) ||
|
||||
BP_IS_EMBEDDED(bp));
|
||||
ASSERT(BP_GET_LEVEL(bp) == db->db_level);
|
||||
}
|
||||
|
||||
@ -2504,12 +2539,13 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||
for (i = db->db.db_size >> SPA_BLKPTRSHIFT; i > 0; i--, ibp++) {
|
||||
if (BP_IS_HOLE(ibp))
|
||||
continue;
|
||||
fill += ibp->blk_fill;
|
||||
fill += BP_GET_FILL(ibp);
|
||||
}
|
||||
}
|
||||
DB_DNODE_EXIT(db);
|
||||
|
||||
bp->blk_fill = fill;
|
||||
if (!BP_IS_EMBEDDED(bp))
|
||||
bp->blk_fill = fill;
|
||||
|
||||
mutex_exit(&db->db_mtx);
|
||||
}
|
||||
@ -2621,7 +2657,8 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
|
||||
dn->dn_phys->dn_maxblkid >> (db->db_level * epbs));
|
||||
ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
|
||||
db->db.db_size);
|
||||
arc_set_callback(db->db_buf, dbuf_do_evict, db);
|
||||
if (!arc_released(db->db_buf))
|
||||
arc_set_callback(db->db_buf, dbuf_do_evict, db);
|
||||
}
|
||||
DB_DNODE_EXIT(db);
|
||||
mutex_destroy(&dr->dt.di.dr_mtx);
|
||||
@ -2747,10 +2784,16 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
|
||||
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
|
||||
DB_DNODE_EXIT(db);
|
||||
|
||||
if (db->db_level == 0 && dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
|
||||
ASSERT(db->db_state != DB_NOFILL);
|
||||
if (db->db_level == 0 &&
|
||||
dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
|
||||
/*
|
||||
* The BP for this block has been provided by open context
|
||||
* (by dmu_sync() or dmu_buf_write_embedded()).
|
||||
*/
|
||||
void *contents = (data != NULL) ? data->b_data : NULL;
|
||||
|
||||
dr->dr_zio = zio_write(zio, os->os_spa, txg,
|
||||
db->db_blkptr, data->b_data, arc_buf_size(data), &zp,
|
||||
db->db_blkptr, contents, db->db.db_size, &zp,
|
||||
dbuf_write_override_ready, NULL, dbuf_write_override_done,
|
||||
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
|
||||
mutex_enter(&db->db_mtx);
|
||||
|
@ -125,17 +125,13 @@ const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
|
||||
};
|
||||
|
||||
int
|
||||
dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp, int flags)
|
||||
dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp)
|
||||
{
|
||||
dnode_t *dn;
|
||||
uint64_t blkid;
|
||||
dmu_buf_impl_t *db;
|
||||
int err;
|
||||
int db_flags = DB_RF_CANFAIL;
|
||||
|
||||
if (flags & DMU_READ_NO_PREFETCH)
|
||||
db_flags |= DB_RF_NOPREFETCH;
|
||||
|
||||
err = dnode_hold(os, object, FTAG, &dn);
|
||||
if (err)
|
||||
@ -144,18 +140,37 @@ dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
db = dbuf_hold(dn, blkid, tag);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
dnode_rele(dn, FTAG);
|
||||
|
||||
if (db == NULL) {
|
||||
err = SET_ERROR(EIO);
|
||||
} else {
|
||||
*dbp = NULL;
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
|
||||
*dbp = &db->db;
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp, int flags)
|
||||
{
|
||||
int err;
|
||||
int db_flags = DB_RF_CANFAIL;
|
||||
|
||||
if (flags & DMU_READ_NO_PREFETCH)
|
||||
db_flags |= DB_RF_NOPREFETCH;
|
||||
|
||||
err = dmu_buf_hold_noread(os, object, offset, tag, dbp);
|
||||
if (err == 0) {
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
|
||||
err = dbuf_read(db, NULL, db_flags);
|
||||
if (err) {
|
||||
if (err != 0) {
|
||||
dbuf_rele(db, tag);
|
||||
db = NULL;
|
||||
*dbp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
dnode_rele(dn, FTAG);
|
||||
*dbp = &db->db; /* NULL db plus first field offset is NULL */
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -848,6 +863,25 @@ dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
|
||||
dmu_buf_rele_array(dbp, numbufs, FTAG);
|
||||
}
|
||||
|
||||
void
|
||||
dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *data, uint8_t etype, uint8_t comp, int uncompressed_size,
|
||||
int compressed_size, int byteorder, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_t *db;
|
||||
|
||||
ASSERT3U(etype, <, NUM_BP_EMBEDDED_TYPES);
|
||||
ASSERT3U(comp, <, ZIO_COMPRESS_FUNCTIONS);
|
||||
VERIFY0(dmu_buf_hold_noread(os, object, offset,
|
||||
FTAG, &db));
|
||||
|
||||
dmu_buf_write_embedded(db,
|
||||
data, (bp_embedded_type_t)etype, (enum zio_compress)comp,
|
||||
uncompressed_size, compressed_size, byteorder, tx);
|
||||
|
||||
dmu_buf_rele(db, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* DMU support for xuio
|
||||
*/
|
||||
@ -1263,7 +1297,7 @@ dmu_sync_ready(zio_t *zio, arc_buf_t *buf, void *varg)
|
||||
* block size still needs to be known for replay.
|
||||
*/
|
||||
BP_SET_LSIZE(bp, db->db_size);
|
||||
} else {
|
||||
} else if (!BP_IS_EMBEDDED(bp)) {
|
||||
ASSERT(BP_GET_LEVEL(bp) == 0);
|
||||
bp->blk_fill = 1;
|
||||
}
|
||||
@ -1534,9 +1568,15 @@ dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
|
||||
{
|
||||
dnode_t *dn;
|
||||
|
||||
/* XXX assumes dnode_hold will not get an i/o error */
|
||||
(void) dnode_hold(os, object, FTAG, &dn);
|
||||
ASSERT(checksum < ZIO_CHECKSUM_FUNCTIONS);
|
||||
/*
|
||||
* Send streams include each object's checksum function. This
|
||||
* check ensures that the receiving system can understand the
|
||||
* checksum function transmitted.
|
||||
*/
|
||||
ASSERT3U(checksum, <, ZIO_CHECKSUM_LEGACY_FUNCTIONS);
|
||||
|
||||
VERIFY0(dnode_hold(os, object, FTAG, &dn));
|
||||
ASSERT3U(checksum, <, ZIO_CHECKSUM_FUNCTIONS);
|
||||
dn->dn_checksum = checksum;
|
||||
dnode_setdirty(dn, tx);
|
||||
dnode_rele(dn, FTAG);
|
||||
@ -1548,9 +1588,14 @@ dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
||||
{
|
||||
dnode_t *dn;
|
||||
|
||||
/* XXX assumes dnode_hold will not get an i/o error */
|
||||
(void) dnode_hold(os, object, FTAG, &dn);
|
||||
ASSERT(compress < ZIO_COMPRESS_FUNCTIONS);
|
||||
/*
|
||||
* Send streams include each object's compression function. This
|
||||
* check ensures that the receiving system can understand the
|
||||
* compression function transmitted.
|
||||
*/
|
||||
ASSERT3U(compress, <, ZIO_COMPRESS_LEGACY_FUNCTIONS);
|
||||
|
||||
VERIFY0(dnode_hold(os, object, FTAG, &dn));
|
||||
dn->dn_compress = compress;
|
||||
dnode_setdirty(dn, tx);
|
||||
dnode_rele(dn, FTAG);
|
||||
@ -1717,7 +1762,7 @@ dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
|
||||
doi->doi_max_offset = (dn->dn_maxblkid + 1) * dn->dn_datablksz;
|
||||
doi->doi_fill_count = 0;
|
||||
for (int i = 0; i < dnp->dn_nblkptr; i++)
|
||||
doi->doi_fill_count += dnp->dn_blkptr[i].blk_fill;
|
||||
doi->doi_fill_count += BP_GET_FILL(&dnp->dn_blkptr[i]);
|
||||
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
@ -338,7 +338,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
* default (fletcher2/off). Snapshots don't need to know about
|
||||
* checksum/compression/copies.
|
||||
*/
|
||||
if (ds) {
|
||||
if (ds != NULL) {
|
||||
err = dsl_prop_register(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
|
||||
primary_cache_changed_cb, os);
|
||||
@ -391,7 +391,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
kmem_free(os, sizeof (objset_t));
|
||||
return (err);
|
||||
}
|
||||
} else if (ds == NULL) {
|
||||
} else {
|
||||
/* It's the meta-objset. */
|
||||
os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
|
||||
os->os_compress = ZIO_COMPRESS_LZJB;
|
||||
@ -435,17 +435,6 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
&os->os_groupused_dnode);
|
||||
}
|
||||
|
||||
/*
|
||||
* We should be the only thread trying to do this because we
|
||||
* have ds_opening_lock
|
||||
*/
|
||||
if (ds) {
|
||||
mutex_enter(&ds->ds_lock);
|
||||
ASSERT(ds->ds_objset == NULL);
|
||||
ds->ds_objset = os;
|
||||
mutex_exit(&ds->ds_lock);
|
||||
}
|
||||
|
||||
*osp = os;
|
||||
return (0);
|
||||
}
|
||||
@ -456,11 +445,19 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
|
||||
int err = 0;
|
||||
|
||||
mutex_enter(&ds->ds_opening_lock);
|
||||
*osp = ds->ds_objset;
|
||||
if (*osp == NULL) {
|
||||
if (ds->ds_objset == NULL) {
|
||||
objset_t *os;
|
||||
err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
|
||||
ds, dsl_dataset_get_blkptr(ds), osp);
|
||||
ds, dsl_dataset_get_blkptr(ds), &os);
|
||||
|
||||
if (err == 0) {
|
||||
mutex_enter(&ds->ds_lock);
|
||||
ASSERT(ds->ds_objset == NULL);
|
||||
ds->ds_objset = os;
|
||||
mutex_exit(&ds->ds_lock);
|
||||
}
|
||||
}
|
||||
*osp = ds->ds_objset;
|
||||
mutex_exit(&ds->ds_opening_lock);
|
||||
return (err);
|
||||
}
|
||||
@ -986,6 +983,7 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
|
||||
objset_t *os = arg;
|
||||
dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
|
||||
|
||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
||||
ASSERT3P(bp, ==, os->os_rootbp);
|
||||
ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
|
||||
ASSERT0(BP_GET_LEVEL(bp));
|
||||
@ -998,7 +996,7 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
|
||||
*/
|
||||
bp->blk_fill = 0;
|
||||
for (int i = 0; i < dnp->dn_nblkptr; i++)
|
||||
bp->blk_fill += dnp->dn_blkptr[i].blk_fill;
|
||||
bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
|
@ -48,7 +48,9 @@
|
||||
#include <sys/zfs_onexit.h>
|
||||
#include <sys/dmu_send.h>
|
||||
#include <sys/dsl_destroy.h>
|
||||
#include <sys/blkptr.h>
|
||||
#include <sys/dsl_bookmark.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
|
||||
int zfs_send_corrupt_data = B_FALSE;
|
||||
@ -168,7 +170,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
|
||||
}
|
||||
|
||||
static int
|
||||
dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
|
||||
dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
|
||||
uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
|
||||
{
|
||||
struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
|
||||
@ -203,13 +205,22 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
|
||||
drrw->drr_offset = offset;
|
||||
drrw->drr_length = blksz;
|
||||
drrw->drr_toguid = dsp->dsa_toguid;
|
||||
drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
|
||||
if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
|
||||
drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
|
||||
DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
|
||||
DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
|
||||
DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
|
||||
drrw->drr_key.ddk_cksum = bp->blk_cksum;
|
||||
if (BP_IS_EMBEDDED(bp)) {
|
||||
/*
|
||||
* There's no pre-computed checksum of embedded BP's, so
|
||||
* (like fletcher4-checkummed blocks) userland will have
|
||||
* to compute a dedup-capable checksum itself.
|
||||
*/
|
||||
drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
|
||||
} else {
|
||||
drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
|
||||
if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
|
||||
drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
|
||||
DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
|
||||
DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
|
||||
DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
|
||||
drrw->drr_key.ddk_cksum = bp->blk_cksum;
|
||||
}
|
||||
|
||||
if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
return (SET_ERROR(EINTR));
|
||||
@ -218,6 +229,43 @@ dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
|
||||
int blksz, const blkptr_t *bp)
|
||||
{
|
||||
char buf[BPE_PAYLOAD_SIZE];
|
||||
struct drr_write_embedded *drrw =
|
||||
&(dsp->dsa_drr->drr_u.drr_write_embedded);
|
||||
|
||||
if (dsp->dsa_pending_op != PENDING_NONE) {
|
||||
if (dump_bytes(dsp, dsp->dsa_drr,
|
||||
sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
}
|
||||
|
||||
ASSERT(BP_IS_EMBEDDED(bp));
|
||||
|
||||
bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
|
||||
dsp->dsa_drr->drr_type = DRR_WRITE_EMBEDDED;
|
||||
drrw->drr_object = object;
|
||||
drrw->drr_offset = offset;
|
||||
drrw->drr_length = blksz;
|
||||
drrw->drr_toguid = dsp->dsa_toguid;
|
||||
drrw->drr_compression = BP_GET_COMPRESS(bp);
|
||||
drrw->drr_etype = BPE_GET_ETYPE(bp);
|
||||
drrw->drr_lsize = BPE_GET_LSIZE(bp);
|
||||
drrw->drr_psize = BPE_GET_PSIZE(bp);
|
||||
|
||||
decode_embedded_bp_compressed(bp, buf);
|
||||
|
||||
if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
|
||||
return (EINTR);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
|
||||
{
|
||||
@ -338,6 +386,33 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
|
||||
return (0);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
|
||||
{
|
||||
if (!BP_IS_EMBEDDED(bp))
|
||||
return (B_FALSE);
|
||||
|
||||
/*
|
||||
* Compression function must be legacy, or explicitly enabled.
|
||||
*/
|
||||
if ((BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_LEGACY_FUNCTIONS &&
|
||||
!(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4)))
|
||||
return (B_FALSE);
|
||||
|
||||
/*
|
||||
* Embed type must be explicitly enabled.
|
||||
*/
|
||||
switch (BPE_GET_ETYPE(bp)) {
|
||||
case BP_EMBEDDED_TYPE_DATA:
|
||||
if (dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA)
|
||||
return (B_TRUE);
|
||||
break;
|
||||
default:
|
||||
return (B_FALSE);
|
||||
}
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
#define BP_SPAN(dnp, level) \
|
||||
(((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
|
||||
(level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
|
||||
@ -406,11 +481,17 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
|
||||
err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
} else if (backup_do_embed(dsp, bp)) {
|
||||
/* it's an embedded level-0 block of a regular object */
|
||||
int blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
|
||||
err = dump_write_embedded(dsp, zb->zb_object,
|
||||
zb->zb_blkid * blksz, blksz, bp);
|
||||
} else { /* it's a level-0 block of a regular object */
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
arc_buf_t *abuf;
|
||||
int blksz = BP_GET_LSIZE(bp);
|
||||
|
||||
ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
|
||||
ASSERT0(zb->zb_level);
|
||||
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
|
||||
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
|
||||
@ -429,7 +510,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
}
|
||||
}
|
||||
|
||||
err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
|
||||
err = dump_write(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
|
||||
blksz, bp, abuf->b_data);
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
}
|
||||
@ -443,14 +524,15 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
*/
|
||||
static int
|
||||
dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
|
||||
zfs_bookmark_phys_t *fromzb, boolean_t is_clone, int outfd,
|
||||
vnode_t *vp, offset_t *off)
|
||||
zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok,
|
||||
int outfd, vnode_t *vp, offset_t *off)
|
||||
{
|
||||
objset_t *os;
|
||||
dmu_replay_record_t *drr;
|
||||
dmu_sendarg_t *dsp;
|
||||
int err;
|
||||
uint64_t fromtxg = 0;
|
||||
uint64_t featureflags = 0;
|
||||
|
||||
err = dmu_objset_from_ds(ds, &os);
|
||||
if (err != 0) {
|
||||
@ -473,13 +555,23 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
if (version >= ZPL_VERSION_SA) {
|
||||
DMU_SET_FEATUREFLAGS(
|
||||
drr->drr_u.drr_begin.drr_versioninfo,
|
||||
DMU_BACKUP_FEATURE_SA_SPILL);
|
||||
featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (embedok &&
|
||||
spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
|
||||
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
|
||||
if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
|
||||
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
|
||||
} else {
|
||||
embedok = B_FALSE;
|
||||
}
|
||||
|
||||
DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
|
||||
featureflags);
|
||||
|
||||
drr->drr_u.drr_begin.drr_creation_time =
|
||||
ds->ds_phys->ds_creation_time;
|
||||
drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
|
||||
@ -511,6 +603,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
|
||||
ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
dsp->dsa_incremental = (fromzb != NULL);
|
||||
dsp->dsa_featureflags = featureflags;
|
||||
|
||||
mutex_enter(&ds->ds_sendstream_lock);
|
||||
list_insert_head(&ds->ds_sendstreams, dsp);
|
||||
@ -562,7 +655,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
|
||||
|
||||
int
|
||||
dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
|
||||
int outfd, vnode_t *vp, offset_t *off)
|
||||
boolean_t embedok, int outfd, vnode_t *vp, offset_t *off)
|
||||
{
|
||||
dsl_pool_t *dp;
|
||||
dsl_dataset_t *ds;
|
||||
@ -596,10 +689,10 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
|
||||
zb.zbm_guid = fromds->ds_phys->ds_guid;
|
||||
is_clone = (fromds->ds_dir != ds->ds_dir);
|
||||
dsl_dataset_rele(fromds, FTAG);
|
||||
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
|
||||
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok,
|
||||
outfd, vp, off);
|
||||
} else {
|
||||
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
|
||||
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok,
|
||||
outfd, vp, off);
|
||||
}
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
@ -607,7 +700,7 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
|
||||
}
|
||||
|
||||
int
|
||||
dmu_send(const char *tosnap, const char *fromsnap,
|
||||
dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
|
||||
int outfd, vnode_t *vp, offset_t *off)
|
||||
{
|
||||
dsl_pool_t *dp;
|
||||
@ -674,10 +767,10 @@ dmu_send(const char *tosnap, const char *fromsnap,
|
||||
dsl_pool_rele(dp, FTAG);
|
||||
return (err);
|
||||
}
|
||||
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
|
||||
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok,
|
||||
outfd, vp, off);
|
||||
} else {
|
||||
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
|
||||
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok,
|
||||
outfd, vp, off);
|
||||
}
|
||||
if (owned)
|
||||
@ -847,6 +940,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
|
||||
uint64_t fromguid = drrb->drr_fromguid;
|
||||
int flags = drrb->drr_flags;
|
||||
int error;
|
||||
uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
|
||||
dsl_dataset_t *ds;
|
||||
const char *tofs = drba->drba_cookie->drc_tofs;
|
||||
|
||||
@ -860,11 +954,22 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
/* Verify pool version supports SA if SA_SPILL feature set */
|
||||
if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
|
||||
DMU_BACKUP_FEATURE_SA_SPILL) &&
|
||||
spa_version(dp->dp_spa) < SPA_VERSION_SA) {
|
||||
if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
|
||||
spa_version(dp->dp_spa) < SPA_VERSION_SA)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
|
||||
/*
|
||||
* The receiving code doesn't know how to translate a WRITE_EMBEDDED
|
||||
* record to a plan WRITE record, so the pool must have the
|
||||
* EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
|
||||
* records. Same with WRITE_EMBEDDED records that use LZ4 compression.
|
||||
*/
|
||||
if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
|
||||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) &&
|
||||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
|
||||
error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
|
||||
if (error == 0) {
|
||||
@ -1159,7 +1264,6 @@ backup_byteswap(dmu_replay_record_t *drr)
|
||||
break;
|
||||
case DRR_OBJECT:
|
||||
DO64(drr_object.drr_object);
|
||||
/* DO64(drr_object.drr_allocation_txg); */
|
||||
DO32(drr_object.drr_type);
|
||||
DO32(drr_object.drr_bonustype);
|
||||
DO32(drr_object.drr_blksz);
|
||||
@ -1197,6 +1301,14 @@ backup_byteswap(dmu_replay_record_t *drr)
|
||||
DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
|
||||
DO64(drr_write_byref.drr_key.ddk_prop);
|
||||
break;
|
||||
case DRR_WRITE_EMBEDDED:
|
||||
DO64(drr_write_embedded.drr_object);
|
||||
DO64(drr_write_embedded.drr_offset);
|
||||
DO64(drr_write_embedded.drr_length);
|
||||
DO64(drr_write_embedded.drr_toguid);
|
||||
DO32(drr_write_embedded.drr_lsize);
|
||||
DO32(drr_write_embedded.drr_psize);
|
||||
break;
|
||||
case DRR_FREE:
|
||||
DO64(drr_free.drr_object);
|
||||
DO64(drr_free.drr_offset);
|
||||
@ -1384,7 +1496,7 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
|
||||
int err;
|
||||
guid_map_entry_t gmesrch;
|
||||
guid_map_entry_t *gmep;
|
||||
avl_index_t where;
|
||||
avl_index_t where;
|
||||
objset_t *ref_os = NULL;
|
||||
dmu_buf_t *dbp;
|
||||
|
||||
@ -1407,8 +1519,9 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
|
||||
ref_os = os;
|
||||
}
|
||||
|
||||
if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
|
||||
drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH))
|
||||
err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
|
||||
drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
@ -1427,6 +1540,48 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
restore_write_embedded(struct restorearg *ra, objset_t *os,
|
||||
struct drr_write_embedded *drrwnp)
|
||||
{
|
||||
dmu_tx_t *tx;
|
||||
int err;
|
||||
void *data;
|
||||
|
||||
if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset)
|
||||
return (EINVAL);
|
||||
|
||||
if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE)
|
||||
return (EINVAL);
|
||||
|
||||
if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES)
|
||||
return (EINVAL);
|
||||
if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
|
||||
return (EINVAL);
|
||||
|
||||
data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8));
|
||||
if (data == NULL)
|
||||
return (ra->err);
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
|
||||
dmu_tx_hold_write(tx, drrwnp->drr_object,
|
||||
drrwnp->drr_offset, drrwnp->drr_length);
|
||||
err = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (err != 0) {
|
||||
dmu_tx_abort(tx);
|
||||
return (err);
|
||||
}
|
||||
|
||||
dmu_write_embedded(os, drrwnp->drr_object,
|
||||
drrwnp->drr_offset, data, drrwnp->drr_etype,
|
||||
drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize,
|
||||
ra->byteswap ^ ZFS_HOST_BYTEORDER, tx);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
|
||||
{
|
||||
@ -1621,6 +1776,13 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
|
||||
ra.err = restore_write_byref(&ra, os, &drrwbr);
|
||||
break;
|
||||
}
|
||||
case DRR_WRITE_EMBEDDED:
|
||||
{
|
||||
struct drr_write_embedded drrwe =
|
||||
drr->drr_u.drr_write_embedded;
|
||||
ra.err = restore_write_embedded(&ra, os, &drrwe);
|
||||
break;
|
||||
}
|
||||
case DRR_FREE:
|
||||
{
|
||||
struct drr_free drrf = drr->drr_u.drr_free;
|
||||
|
@ -458,7 +458,7 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
if (pfd->pd_cancel)
|
||||
return (SET_ERROR(EINTR));
|
||||
|
||||
if (BP_IS_HOLE(bp) ||
|
||||
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp) ||
|
||||
!((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
|
||||
BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
|
||||
BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
|
||||
|
@ -1811,8 +1811,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
|
||||
*offset = *offset >> span;
|
||||
for (i = BF64_GET(*offset, 0, epbs);
|
||||
i >= 0 && i < epb; i += inc) {
|
||||
if (bp[i].blk_fill >= minfill &&
|
||||
bp[i].blk_fill <= maxfill &&
|
||||
if (BP_GET_FILL(&bp[i]) >= minfill &&
|
||||
BP_GET_FILL(&bp[i]) <= maxfill &&
|
||||
(hole || bp[i].blk_birth > txg))
|
||||
break;
|
||||
if (inc > 0 || *offset > 0)
|
||||
|
@ -233,8 +233,6 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
|
||||
}
|
||||
#endif
|
||||
|
||||
#define ALL -1
|
||||
|
||||
static void
|
||||
free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
|
||||
dmu_tx_t *tx)
|
||||
@ -362,7 +360,6 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
|
||||
|
||||
free_children(db, blkid, nblks, tx);
|
||||
dbuf_rele(db, FTAG);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -591,11 +588,14 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
dnp->dn_bonustype = dn->dn_bonustype;
|
||||
dnp->dn_bonuslen = dn->dn_bonuslen;
|
||||
}
|
||||
|
||||
ASSERT(dnp->dn_nlevels > 1 ||
|
||||
BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
|
||||
BP_IS_EMBEDDED(&dnp->dn_blkptr[0]) ||
|
||||
BP_GET_LSIZE(&dnp->dn_blkptr[0]) ==
|
||||
dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
|
||||
ASSERT(dnp->dn_nlevels < 2 ||
|
||||
BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
|
||||
BP_GET_LSIZE(&dnp->dn_blkptr[0]) == 1 << dnp->dn_indblkshift);
|
||||
|
||||
if (dn->dn_next_type[txgoff] != 0) {
|
||||
dnp->dn_type = dn->dn_type;
|
||||
|
@ -1641,7 +1641,7 @@ dsl_dataset_space(dsl_dataset_t *ds,
|
||||
else
|
||||
*availbytesp = 0;
|
||||
}
|
||||
*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
|
||||
*usedobjsp = BP_GET_FILL(&ds->ds_phys->ds_bp);
|
||||
*availobjsp = DN_MAX_OBJECT - *usedobjsp;
|
||||
}
|
||||
|
||||
|
@ -539,7 +539,7 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
struct killarg *ka = arg;
|
||||
dmu_tx_t *tx = ka->tx;
|
||||
|
||||
if (BP_IS_HOLE(bp))
|
||||
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
|
||||
return (0);
|
||||
|
||||
if (zb->zb_level == ZB_ZIL_LEVEL) {
|
||||
@ -589,6 +589,7 @@ dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
|
||||
uint64_t count;
|
||||
objset_t *mos;
|
||||
|
||||
ASSERT(!dsl_dataset_is_snapshot(ds));
|
||||
if (dsl_dataset_is_snapshot(ds))
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
@ -711,7 +712,7 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
|
||||
ds->ds_prev->ds_phys->ds_num_children == 2 &&
|
||||
ds->ds_prev->ds_userrefs == 0);
|
||||
|
||||
/* Remove our reservation */
|
||||
/* Remove our reservation. */
|
||||
if (ds->ds_reserved != 0) {
|
||||
dsl_dataset_set_refreservation_sync_impl(ds,
|
||||
(ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
|
||||
|
@ -1456,6 +1456,10 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
}
|
||||
if (err == ERESTART)
|
||||
return;
|
||||
/* finished; verify that space accounting went to zero */
|
||||
ASSERT0(dp->dp_free_dir->dd_phys->dd_used_bytes);
|
||||
ASSERT0(dp->dp_free_dir->dd_phys->dd_compressed_bytes);
|
||||
ASSERT0(dp->dp_free_dir->dd_phys->dd_uncompressed_bytes);
|
||||
}
|
||||
|
||||
if (scn->scn_phys.scn_state != DSS_SCANNING)
|
||||
@ -1638,6 +1642,9 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
|
||||
|
||||
count_block(dp->dp_blkstats, bp);
|
||||
|
||||
if (BP_IS_EMBEDDED(bp))
|
||||
return (0);
|
||||
|
||||
ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn));
|
||||
if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) {
|
||||
zio_flags |= ZIO_FLAG_SCRUB;
|
||||
|
@ -600,8 +600,7 @@ dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
|
||||
ddura.ddura_chkholds = fnvlist_alloc();
|
||||
|
||||
error = dsl_sync_task(pool, dsl_dataset_user_release_check,
|
||||
dsl_dataset_user_release_sync, &ddura,
|
||||
fnvlist_num_pairs(holds));
|
||||
dsl_dataset_user_release_sync, &ddura, 0);
|
||||
fnvlist_free(ddura.ddura_todelete);
|
||||
fnvlist_free(ddura.ddura_chkholds);
|
||||
|
||||
|
@ -1827,7 +1827,7 @@ static int
|
||||
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||
{
|
||||
if (!BP_IS_HOLE(bp)) {
|
||||
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
|
||||
zio_t *rio = arg;
|
||||
size_t size = BP_GET_PSIZE(bp);
|
||||
void *data = zio_data_buf_alloc(size);
|
||||
@ -2375,9 +2375,8 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
||||
|
||||
if (spa_feature_is_active(spa, SPA_FEATURE_ENABLED_TXG)) {
|
||||
if (spa_dir_prop(spa, DMU_POOL_FEATURE_ENABLED_TXG,
|
||||
&spa->spa_feat_enabled_txg_obj) != 0) {
|
||||
&spa->spa_feat_enabled_txg_obj) != 0)
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
}
|
||||
}
|
||||
|
||||
spa->spa_is_initializing = B_TRUE;
|
||||
@ -5260,11 +5259,6 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
|
||||
ASSERT(!locked);
|
||||
ASSERT(vd == vd->vdev_top);
|
||||
|
||||
/*
|
||||
* XXX - Once we have bp-rewrite this should
|
||||
* become the common case.
|
||||
*/
|
||||
|
||||
mg = vd->vdev_mg;
|
||||
|
||||
/*
|
||||
@ -6438,7 +6432,7 @@ spa_upgrade(spa_t *spa, uint64_t version)
|
||||
* possible.
|
||||
*/
|
||||
ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version));
|
||||
ASSERT(version >= spa->spa_uberblock.ub_version);
|
||||
ASSERT3U(version, >=, spa->spa_uberblock.ub_version);
|
||||
|
||||
spa->spa_uberblock.ub_version = version;
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
|
@ -1341,7 +1341,10 @@ snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp)
|
||||
(void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
|
||||
sizeof (type));
|
||||
}
|
||||
checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
|
||||
if (!BP_IS_EMBEDDED(bp)) {
|
||||
checksum =
|
||||
zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
|
||||
}
|
||||
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
|
||||
}
|
||||
|
||||
@ -1643,7 +1646,7 @@ bp_get_dsize_sync(spa_t *spa, const blkptr_t *bp)
|
||||
{
|
||||
uint64_t dsize = 0;
|
||||
|
||||
for (int d = 0; d < SPA_DVAS_PER_BP; d++)
|
||||
for (int d = 0; d < BP_GET_NDVAS(bp); d++)
|
||||
dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
|
||||
|
||||
return (dsize);
|
||||
@ -1656,7 +1659,7 @@ bp_get_dsize(spa_t *spa, const blkptr_t *bp)
|
||||
|
||||
spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
|
||||
|
||||
for (int d = 0; d < SPA_DVAS_PER_BP; d++)
|
||||
for (int d = 0; d < BP_GET_NDVAS(bp); d++)
|
||||
dsize += dva_get_dsize_sync(spa, &bp->blk_dva[d]);
|
||||
|
||||
spa_config_exit(spa, SCL_VDEV, FTAG);
|
||||
|
38
uts/common/fs/zfs/sys/blkptr.h
Normal file
38
uts/common/fs/zfs/sys/blkptr.h
Normal file
@ -0,0 +1,38 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* This file and its contents are supplied under the terms of the
|
||||
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||
* You may only use this file in accordance with the terms of version
|
||||
* 1.0 of the CDDL.
|
||||
*
|
||||
* A full copy of the text of the CDDL should have accompanied this
|
||||
* source. A copy of the CDDL is also available via the Internet at
|
||||
* http://www.illumos.org/license/CDDL.
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_BLKPTR_H
|
||||
#define _SYS_BLKPTR_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void encode_embedded_bp_compressed(blkptr_t *, void *,
|
||||
enum zio_compress, int, int);
|
||||
void decode_embedded_bp_compressed(const blkptr_t *, void *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_BLKPTR_H */
|
@ -274,6 +274,9 @@ void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
|
||||
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
|
||||
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
arc_buf_t *dbuf_loan_arcbuf(dmu_buf_impl_t *db);
|
||||
void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
|
||||
bp_embedded_type_t etype, enum zio_compress comp,
|
||||
int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_clear(dmu_buf_impl_t *db);
|
||||
void dbuf_evict(dmu_buf_impl_t *db);
|
||||
|
@ -119,6 +119,14 @@ typedef enum dmu_object_byteswap {
|
||||
((ot) & DMU_OT_METADATA) : \
|
||||
dmu_ot[(ot)].ot_metadata)
|
||||
|
||||
/*
|
||||
* These object types use bp_fill != 1 for their L0 bp's. Therefore they can't
|
||||
* have their data embedded (i.e. use a BP_IS_EMBEDDED() bp), because bp_fill
|
||||
* is repurposed for embedded BPs.
|
||||
*/
|
||||
#define DMU_OT_HAS_FILL(ot) \
|
||||
((ot) == DMU_OT_DNODE || (ot) == DMU_OT_OBJSET)
|
||||
|
||||
#define DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \
|
||||
((ot) & DMU_OT_BYTESWAP_MASK) : \
|
||||
dmu_ot[(ot)].ot_byteswap)
|
||||
@ -394,6 +402,11 @@ void dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
|
||||
void dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
void
|
||||
dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *data, uint8_t etype, uint8_t comp, int uncompressed_size,
|
||||
int compressed_size, int byteorder, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Decide how to write a block: checksum, compression, number of copies, etc.
|
||||
*/
|
||||
|
@ -294,12 +294,15 @@ typedef struct dmu_sendarg {
|
||||
int dsa_err;
|
||||
dmu_pendop_t dsa_pending_op;
|
||||
boolean_t dsa_incremental;
|
||||
uint64_t dsa_featureflags;
|
||||
uint64_t dsa_last_data_object;
|
||||
uint64_t dsa_last_data_offset;
|
||||
} dmu_sendarg_t;
|
||||
|
||||
void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
|
||||
void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
|
||||
int dmu_buf_hold_noread(objset_t *, uint64_t, uint64_t,
|
||||
void *, dmu_buf_t **);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -37,12 +37,12 @@ struct dsl_dataset;
|
||||
struct drr_begin;
|
||||
struct avl_tree;
|
||||
|
||||
int dmu_send(const char *tosnap, const char *fromsnap, int outfd,
|
||||
struct vnode *vp, offset_t *off);
|
||||
int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
|
||||
int outfd, struct vnode *vp, offset_t *off);
|
||||
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
|
||||
uint64_t *sizep);
|
||||
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
|
||||
int outfd, struct vnode *vp, offset_t *off);
|
||||
boolean_t embedok, int outfd, vnode_t *vp, offset_t *off);
|
||||
|
||||
typedef struct dmu_recv_cookie {
|
||||
struct dsl_dataset *drc_ds;
|
||||
|
@ -154,7 +154,7 @@ typedef struct zio_cksum {
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 5 |G| offset3 |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 6 |BDX|lvl| type | cksum | comp | PSIZE | LSIZE |
|
||||
* 6 |BDX|lvl| type | cksum |E| comp| PSIZE | LSIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 7 | padding |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
@ -188,7 +188,8 @@ typedef struct zio_cksum {
|
||||
* G gang block indicator
|
||||
* B byteorder (endianness)
|
||||
* D dedup
|
||||
* X unused
|
||||
* X encryption (on version 30, which is not supported)
|
||||
* E blkptr_t contains embedded data (see below)
|
||||
* lvl level of indirection
|
||||
* type DMU object type
|
||||
* phys birth txg of block allocation; zero if same as logical birth txg
|
||||
@ -196,6 +197,100 @@ typedef struct zio_cksum {
|
||||
* fill count number of non-zero blocks under this bp
|
||||
* checksum[4] 256-bit checksum of the data this bp describes
|
||||
*/
|
||||
|
||||
/*
|
||||
* "Embedded" blkptr_t's don't actually point to a block, instead they
|
||||
* have a data payload embedded in the blkptr_t itself. See the comment
|
||||
* in blkptr.c for more details.
|
||||
*
|
||||
* The blkptr_t is laid out as follows:
|
||||
*
|
||||
* 64 56 48 40 32 24 16 8 0
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 0 | payload |
|
||||
* 1 | payload |
|
||||
* 2 | payload |
|
||||
* 3 | payload |
|
||||
* 4 | payload |
|
||||
* 5 | payload |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 6 |BDX|lvl| type | etype |E| comp| PSIZE| LSIZE |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* 7 | payload |
|
||||
* 8 | payload |
|
||||
* 9 | payload |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* a | logical birth txg |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
* b | payload |
|
||||
* c | payload |
|
||||
* d | payload |
|
||||
* e | payload |
|
||||
* f | payload |
|
||||
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||
*
|
||||
* Legend:
|
||||
*
|
||||
* payload contains the embedded data
|
||||
* B (byteorder) byteorder (endianness)
|
||||
* D (dedup) padding (set to zero)
|
||||
* X encryption (set to zero; see above)
|
||||
* E (embedded) set to one
|
||||
* lvl indirection level
|
||||
* type DMU object type
|
||||
* etype how to interpret embedded data (BP_EMBEDDED_TYPE_*)
|
||||
* comp compression function of payload
|
||||
* PSIZE size of payload after compression, in bytes
|
||||
* LSIZE logical size of payload, in bytes
|
||||
* note that 25 bits is enough to store the largest
|
||||
* "normal" BP's LSIZE (2^16 * 2^9) in bytes
|
||||
* log. birth transaction group in which the block was logically born
|
||||
*
|
||||
* Note that LSIZE and PSIZE are stored in bytes, whereas for non-embedded
|
||||
* bp's they are stored in units of SPA_MINBLOCKSHIFT.
|
||||
* Generally, the generic BP_GET_*() macros can be used on embedded BP's.
|
||||
* The B, D, X, lvl, type, and comp fields are stored the same as with normal
|
||||
* BP's so the BP_SET_* macros can be used with them. etype, PSIZE, LSIZE must
|
||||
* be set with the BPE_SET_* macros. BP_SET_EMBEDDED() should be called before
|
||||
* other macros, as they assert that they are only used on BP's of the correct
|
||||
* "embedded-ness".
|
||||
*/
|
||||
|
||||
#define BPE_GET_ETYPE(bp) \
|
||||
(ASSERT(BP_IS_EMBEDDED(bp)), \
|
||||
BF64_GET((bp)->blk_prop, 40, 8))
|
||||
#define BPE_SET_ETYPE(bp, t) do { \
|
||||
ASSERT(BP_IS_EMBEDDED(bp)); \
|
||||
BF64_SET((bp)->blk_prop, 40, 8, t); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define BPE_GET_LSIZE(bp) \
|
||||
(ASSERT(BP_IS_EMBEDDED(bp)), \
|
||||
BF64_GET_SB((bp)->blk_prop, 0, 25, 0, 1))
|
||||
#define BPE_SET_LSIZE(bp, x) do { \
|
||||
ASSERT(BP_IS_EMBEDDED(bp)); \
|
||||
BF64_SET_SB((bp)->blk_prop, 0, 25, 0, 1, x); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define BPE_GET_PSIZE(bp) \
|
||||
(ASSERT(BP_IS_EMBEDDED(bp)), \
|
||||
BF64_GET_SB((bp)->blk_prop, 25, 7, 0, 1))
|
||||
#define BPE_SET_PSIZE(bp, x) do { \
|
||||
ASSERT(BP_IS_EMBEDDED(bp)); \
|
||||
BF64_SET_SB((bp)->blk_prop, 25, 7, 0, 1, x); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
typedef enum bp_embedded_type {
|
||||
BP_EMBEDDED_TYPE_DATA,
|
||||
BP_EMBEDDED_TYPE_RESERVED, /* Reserved for an unintegrated feature. */
|
||||
NUM_BP_EMBEDDED_TYPES = BP_EMBEDDED_TYPE_RESERVED
|
||||
} bp_embedded_type_t;
|
||||
|
||||
#define BPE_NUM_WORDS 14
|
||||
#define BPE_PAYLOAD_SIZE (BPE_NUM_WORDS * sizeof (uint64_t))
|
||||
#define BPE_IS_PAYLOADWORD(bp, wp) \
|
||||
((wp) != &(bp)->blk_prop && (wp) != &(bp)->blk_birth)
|
||||
|
||||
#define SPA_BLKPTRSHIFT 7 /* blkptr_t is 128 bytes */
|
||||
#define SPA_DVAS_PER_BP 3 /* Number of DVAs in a bp */
|
||||
|
||||
@ -242,20 +337,37 @@ typedef struct blkptr {
|
||||
#define DVA_SET_GANG(dva, x) BF64_SET((dva)->dva_word[1], 63, 1, x)
|
||||
|
||||
#define BP_GET_LSIZE(bp) \
|
||||
BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1)
|
||||
#define BP_SET_LSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
|
||||
(BP_IS_EMBEDDED(bp) ? \
|
||||
(BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA ? BPE_GET_LSIZE(bp) : 0): \
|
||||
BF64_GET_SB((bp)->blk_prop, 0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1))
|
||||
#define BP_SET_LSIZE(bp, x) do { \
|
||||
ASSERT(!BP_IS_EMBEDDED(bp)); \
|
||||
BF64_SET_SB((bp)->blk_prop, \
|
||||
0, SPA_LSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define BP_GET_PSIZE(bp) \
|
||||
BF64_GET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1)
|
||||
#define BP_SET_PSIZE(bp, x) \
|
||||
BF64_SET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x)
|
||||
(BP_IS_EMBEDDED(bp) ? 0 : \
|
||||
BF64_GET_SB((bp)->blk_prop, 16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1))
|
||||
#define BP_SET_PSIZE(bp, x) do { \
|
||||
ASSERT(!BP_IS_EMBEDDED(bp)); \
|
||||
BF64_SET_SB((bp)->blk_prop, \
|
||||
16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 8)
|
||||
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 8, x)
|
||||
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 7)
|
||||
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 7, x)
|
||||
|
||||
#define BP_GET_CHECKSUM(bp) BF64_GET((bp)->blk_prop, 40, 8)
|
||||
#define BP_SET_CHECKSUM(bp, x) BF64_SET((bp)->blk_prop, 40, 8, x)
|
||||
#define BP_IS_EMBEDDED(bp) BF64_GET((bp)->blk_prop, 39, 1)
|
||||
#define BP_SET_EMBEDDED(bp, x) BF64_SET((bp)->blk_prop, 39, 1, x)
|
||||
|
||||
#define BP_GET_CHECKSUM(bp) \
|
||||
(BP_IS_EMBEDDED(bp) ? ZIO_CHECKSUM_OFF : \
|
||||
BF64_GET((bp)->blk_prop, 40, 8))
|
||||
#define BP_SET_CHECKSUM(bp, x) do { \
|
||||
ASSERT(!BP_IS_EMBEDDED(bp)); \
|
||||
BF64_SET((bp)->blk_prop, 40, 8, x); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define BP_GET_TYPE(bp) BF64_GET((bp)->blk_prop, 48, 8)
|
||||
#define BP_SET_TYPE(bp, x) BF64_SET((bp)->blk_prop, 48, 8, x)
|
||||
@ -263,9 +375,6 @@ typedef struct blkptr {
|
||||
#define BP_GET_LEVEL(bp) BF64_GET((bp)->blk_prop, 56, 5)
|
||||
#define BP_SET_LEVEL(bp, x) BF64_SET((bp)->blk_prop, 56, 5, x)
|
||||
|
||||
#define BP_GET_PROP_BIT_61(bp) BF64_GET((bp)->blk_prop, 61, 1)
|
||||
#define BP_SET_PROP_BIT_61(bp, x) BF64_SET((bp)->blk_prop, 61, 1, x)
|
||||
|
||||
#define BP_GET_DEDUP(bp) BF64_GET((bp)->blk_prop, 62, 1)
|
||||
#define BP_SET_DEDUP(bp, x) BF64_SET((bp)->blk_prop, 62, 1, x)
|
||||
|
||||
@ -273,31 +382,39 @@ typedef struct blkptr {
|
||||
#define BP_SET_BYTEORDER(bp, x) BF64_SET((bp)->blk_prop, 63, 1, x)
|
||||
|
||||
#define BP_PHYSICAL_BIRTH(bp) \
|
||||
((bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
|
||||
(BP_IS_EMBEDDED(bp) ? 0 : \
|
||||
(bp)->blk_phys_birth ? (bp)->blk_phys_birth : (bp)->blk_birth)
|
||||
|
||||
#define BP_SET_BIRTH(bp, logical, physical) \
|
||||
{ \
|
||||
ASSERT(!BP_IS_EMBEDDED(bp)); \
|
||||
(bp)->blk_birth = (logical); \
|
||||
(bp)->blk_phys_birth = ((logical) == (physical) ? 0 : (physical)); \
|
||||
}
|
||||
|
||||
#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill)
|
||||
|
||||
#define BP_GET_ASIZE(bp) \
|
||||
(DVA_GET_ASIZE(&(bp)->blk_dva[0]) + DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
|
||||
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
|
||||
(BP_IS_EMBEDDED(bp) ? 0 : \
|
||||
DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
|
||||
DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
|
||||
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
|
||||
|
||||
#define BP_GET_UCSIZE(bp) \
|
||||
((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \
|
||||
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
|
||||
|
||||
#define BP_GET_NDVAS(bp) \
|
||||
(!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
|
||||
(BP_IS_EMBEDDED(bp) ? 0 : \
|
||||
!!DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
|
||||
!!DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
|
||||
!!DVA_GET_ASIZE(&(bp)->blk_dva[2]))
|
||||
|
||||
#define BP_COUNT_GANG(bp) \
|
||||
(BP_IS_EMBEDDED(bp) ? 0 : \
|
||||
(DVA_GET_GANG(&(bp)->blk_dva[0]) + \
|
||||
DVA_GET_GANG(&(bp)->blk_dva[1]) + \
|
||||
DVA_GET_GANG(&(bp)->blk_dva[2]))
|
||||
DVA_GET_GANG(&(bp)->blk_dva[2])))
|
||||
|
||||
#define DVA_EQUAL(dva1, dva2) \
|
||||
((dva1)->dva_word[1] == (dva2)->dva_word[1] && \
|
||||
@ -305,6 +422,7 @@ typedef struct blkptr {
|
||||
|
||||
#define BP_EQUAL(bp1, bp2) \
|
||||
(BP_PHYSICAL_BIRTH(bp1) == BP_PHYSICAL_BIRTH(bp2) && \
|
||||
(bp1)->blk_birth == (bp2)->blk_birth && \
|
||||
DVA_EQUAL(&(bp1)->blk_dva[0], &(bp2)->blk_dva[0]) && \
|
||||
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
|
||||
DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
|
||||
@ -325,11 +443,13 @@ typedef struct blkptr {
|
||||
(zcp)->zc_word[3] = w3; \
|
||||
}
|
||||
|
||||
#define BP_IDENTITY(bp) (&(bp)->blk_dva[0])
|
||||
#define BP_IS_GANG(bp) DVA_GET_GANG(BP_IDENTITY(bp))
|
||||
#define BP_IDENTITY(bp) (ASSERT(!BP_IS_EMBEDDED(bp)), &(bp)->blk_dva[0])
|
||||
#define BP_IS_GANG(bp) \
|
||||
(BP_IS_EMBEDDED(bp) ? B_FALSE : DVA_GET_GANG(BP_IDENTITY(bp)))
|
||||
#define DVA_IS_EMPTY(dva) ((dva)->dva_word[0] == 0ULL && \
|
||||
(dva)->dva_word[1] == 0ULL)
|
||||
#define BP_IS_HOLE(bp) DVA_IS_EMPTY(BP_IDENTITY(bp))
|
||||
#define BP_IS_HOLE(bp) \
|
||||
(!BP_IS_EMBEDDED(bp) && DVA_IS_EMPTY(BP_IDENTITY(bp)))
|
||||
|
||||
/* BP_IS_RAIDZ(bp) assumes no block compression */
|
||||
#define BP_IS_RAIDZ(bp) (DVA_GET_ASIZE(&(bp)->blk_dva[0]) > \
|
||||
@ -383,6 +503,17 @@ typedef struct blkptr {
|
||||
" birth=%lluL", \
|
||||
(u_longlong_t)bp->blk_birth); \
|
||||
} \
|
||||
} else if (BP_IS_EMBEDDED(bp)) { \
|
||||
len = func(buf + len, size - len, \
|
||||
"EMBEDDED [L%llu %s] et=%u %s " \
|
||||
"size=%llxL/%llxP birth=%lluL", \
|
||||
(u_longlong_t)BP_GET_LEVEL(bp), \
|
||||
type, \
|
||||
(int)BPE_GET_ETYPE(bp), \
|
||||
compress, \
|
||||
(u_longlong_t)BPE_GET_LSIZE(bp), \
|
||||
(u_longlong_t)BPE_GET_PSIZE(bp), \
|
||||
(u_longlong_t)bp->blk_birth); \
|
||||
} else { \
|
||||
for (int d = 0; d < BP_GET_NDVAS(bp); d++) { \
|
||||
const dva_t *dva = &bp->blk_dva[d]; \
|
||||
@ -416,7 +547,7 @@ typedef struct blkptr {
|
||||
(u_longlong_t)BP_GET_PSIZE(bp), \
|
||||
(u_longlong_t)bp->blk_birth, \
|
||||
(u_longlong_t)BP_PHYSICAL_BIRTH(bp), \
|
||||
(u_longlong_t)bp->blk_fill, \
|
||||
(u_longlong_t)BP_GET_FILL(bp), \
|
||||
ws, \
|
||||
(u_longlong_t)bp->blk_cksum.zc_word[0], \
|
||||
(u_longlong_t)bp->blk_cksum.zc_word[1], \
|
||||
|
@ -38,6 +38,7 @@
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/bplist.h>
|
||||
#include <sys/bpobj.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <zfeature_common.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_IOCTL_H
|
||||
@ -79,15 +79,19 @@ typedef enum drr_headertype {
|
||||
* Feature flags for zfs send streams (flags in drr_versioninfo)
|
||||
*/
|
||||
|
||||
#define DMU_BACKUP_FEATURE_DEDUP (0x1)
|
||||
#define DMU_BACKUP_FEATURE_DEDUPPROPS (0x2)
|
||||
#define DMU_BACKUP_FEATURE_SA_SPILL (0x4)
|
||||
#define DMU_BACKUP_FEATURE_DEDUP (1<<0)
|
||||
#define DMU_BACKUP_FEATURE_DEDUPPROPS (1<<1)
|
||||
#define DMU_BACKUP_FEATURE_SA_SPILL (1<<2)
|
||||
/* flags #3 - #15 are reserved for incompatible closed-source implementations */
|
||||
#define DMU_BACKUP_FEATURE_EMBED_DATA (1<<16)
|
||||
#define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1<<17)
|
||||
|
||||
/*
|
||||
* Mask of all supported backup features
|
||||
*/
|
||||
#define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \
|
||||
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL)
|
||||
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
|
||||
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4)
|
||||
|
||||
/* Are all features in the given flag word currently supported? */
|
||||
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
|
||||
@ -129,7 +133,7 @@ typedef struct dmu_replay_record {
|
||||
enum {
|
||||
DRR_BEGIN, DRR_OBJECT, DRR_FREEOBJECTS,
|
||||
DRR_WRITE, DRR_FREE, DRR_END, DRR_WRITE_BYREF,
|
||||
DRR_SPILL, DRR_NUMTYPES
|
||||
DRR_SPILL, DRR_WRITE_EMBEDDED, DRR_NUMTYPES
|
||||
} drr_type;
|
||||
uint32_t drr_payloadlen;
|
||||
union {
|
||||
@ -206,6 +210,19 @@ typedef struct dmu_replay_record {
|
||||
uint64_t drr_pad[4]; /* needed for crypto */
|
||||
/* spill data follows */
|
||||
} drr_spill;
|
||||
struct drr_write_embedded {
|
||||
uint64_t drr_object;
|
||||
uint64_t drr_offset;
|
||||
/* logical length, should equal blocksize */
|
||||
uint64_t drr_length;
|
||||
uint64_t drr_toguid;
|
||||
uint8_t drr_compression;
|
||||
uint8_t drr_etype;
|
||||
uint8_t drr_pad[6];
|
||||
uint32_t drr_lsize; /* uncompressed size of payload */
|
||||
uint32_t drr_psize; /* compr. (real) size of payload */
|
||||
/* (possibly compressed) content follows */
|
||||
} drr_write_embedded;
|
||||
} drr_u;
|
||||
} dmu_replay_record_t;
|
||||
|
||||
@ -307,8 +324,8 @@ typedef struct zfs_cmd {
|
||||
dmu_objset_stats_t zc_objset_stats;
|
||||
struct drr_begin zc_begin_record;
|
||||
zinject_record_t zc_inject_record;
|
||||
boolean_t zc_defer_destroy;
|
||||
boolean_t zc_temphold;
|
||||
uint32_t zc_defer_destroy;
|
||||
uint32_t zc_flags;
|
||||
uint64_t zc_action_handle;
|
||||
int zc_cleanup_fd;
|
||||
uint8_t zc_pad[4]; /* alignment */
|
||||
|
@ -84,6 +84,12 @@ enum zio_checksum {
|
||||
ZIO_CHECKSUM_FUNCTIONS
|
||||
};
|
||||
|
||||
/*
|
||||
* The number of "legacy" compression functions which can be set on individual
|
||||
* objects.
|
||||
*/
|
||||
#define ZIO_CHECKSUM_LEGACY_FUNCTIONS ZIO_CHECKSUM_ZILOG2
|
||||
|
||||
#define ZIO_CHECKSUM_ON_VALUE ZIO_CHECKSUM_FLETCHER_4
|
||||
#define ZIO_CHECKSUM_DEFAULT ZIO_CHECKSUM_ON
|
||||
|
||||
@ -113,6 +119,12 @@ enum zio_compress {
|
||||
ZIO_COMPRESS_FUNCTIONS
|
||||
};
|
||||
|
||||
/*
|
||||
* The number of "legacy" compression functions which can be set on individual
|
||||
* objects.
|
||||
*/
|
||||
#define ZIO_COMPRESS_LEGACY_FUNCTIONS ZIO_COMPRESS_LZ4
|
||||
|
||||
/* N.B. when altering this value, also change BOOTFS_COMPRESS_VALID below */
|
||||
#define ZIO_COMPRESS_ON_VALUE ZIO_COMPRESS_LZJB
|
||||
#define ZIO_COMPRESS_DEFAULT ZIO_COMPRESS_OFF
|
||||
|
@ -4275,6 +4275,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
|
||||
* zc_fromobj objsetid of incremental fromsnap (may be zero)
|
||||
* zc_guid if set, estimate size of stream only. zc_cookie is ignored.
|
||||
* output size in zc_objset_type.
|
||||
* zc_flags if =1, WRITE_EMBEDDED records are permitted
|
||||
*
|
||||
* outputs:
|
||||
* zc_objset_type estimated size, if zc_guid is set
|
||||
@ -4285,6 +4286,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
|
||||
int error;
|
||||
offset_t off;
|
||||
boolean_t estimate = (zc->zc_guid != 0);
|
||||
boolean_t embedok = (zc->zc_flags & 0x1);
|
||||
|
||||
if (zc->zc_obj != 0) {
|
||||
dsl_pool_t *dp;
|
||||
@ -4345,7 +4347,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
|
||||
|
||||
off = fp->f_offset;
|
||||
error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
|
||||
zc->zc_fromobj, zc->zc_cookie, fp->f_vnode, &off);
|
||||
zc->zc_fromobj, embedok, zc->zc_cookie, fp->f_vnode, &off);
|
||||
|
||||
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
|
||||
fp->f_offset = off;
|
||||
@ -5247,6 +5249,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
* innvl: {
|
||||
* "fd" -> file descriptor to write stream to (int32)
|
||||
* (optional) "fromsnap" -> full snap name to send an incremental from
|
||||
* (optional) "embedok" -> (value ignored)
|
||||
* presence indicates DRR_WRITE_EMBEDDED records are permitted
|
||||
* }
|
||||
*
|
||||
* outnvl is unused
|
||||
@ -5259,6 +5263,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
offset_t off;
|
||||
char *fromname = NULL;
|
||||
int fd;
|
||||
boolean_t embedok;
|
||||
|
||||
error = nvlist_lookup_int32(innvl, "fd", &fd);
|
||||
if (error != 0)
|
||||
@ -5266,12 +5271,14 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
|
||||
(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
|
||||
|
||||
embedok = nvlist_exists(innvl, "embedok");
|
||||
|
||||
file_t *fp = getf(fd);
|
||||
if (fp == NULL)
|
||||
return (SET_ERROR(EBADF));
|
||||
|
||||
off = fp->f_offset;
|
||||
error = dmu_send(snapname, fromname, fd, fp->f_vnode, &off);
|
||||
error = dmu_send(snapname, fromname, embedok, fd, fp->f_vnode, &off);
|
||||
|
||||
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
|
||||
fp->f_offset = off;
|
||||
|
@ -137,10 +137,15 @@ int
|
||||
zil_bp_tree_add(zilog_t *zilog, const blkptr_t *bp)
|
||||
{
|
||||
avl_tree_t *t = &zilog->zl_bp_tree;
|
||||
const dva_t *dva = BP_IDENTITY(bp);
|
||||
const dva_t *dva;
|
||||
zil_bp_node_t *zn;
|
||||
avl_index_t where;
|
||||
|
||||
if (BP_IS_EMBEDDED(bp))
|
||||
return (0);
|
||||
|
||||
dva = BP_IDENTITY(bp);
|
||||
|
||||
if (avl_find(t, dva, &where) != NULL)
|
||||
return (SET_ERROR(EEXIST));
|
||||
|
||||
@ -831,7 +836,7 @@ zil_lwb_write_done(zio_t *zio)
|
||||
ASSERT(BP_GET_BYTEORDER(zio->io_bp) == ZFS_HOST_BYTEORDER);
|
||||
ASSERT(!BP_IS_GANG(zio->io_bp));
|
||||
ASSERT(!BP_IS_HOLE(zio->io_bp));
|
||||
ASSERT(zio->io_bp->blk_fill == 0);
|
||||
ASSERT(BP_GET_FILL(zio->io_bp) == 0);
|
||||
|
||||
/*
|
||||
* Ensure the lwb buffer pointer is cleared before releasing
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/blkptr.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
/*
|
||||
@ -214,7 +215,7 @@ zio_buf_alloc(size_t size)
|
||||
{
|
||||
size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
|
||||
|
||||
ASSERT(c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
|
||||
ASSERT3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
|
||||
|
||||
return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE));
|
||||
}
|
||||
@ -637,6 +638,16 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zio->io_physdone = physdone;
|
||||
zio->io_prop = *zp;
|
||||
|
||||
/*
|
||||
* Data can be NULL if we are going to call zio_write_override() to
|
||||
* provide the already-allocated BP. But we may need the data to
|
||||
* verify a dedup hit (if requested). In this case, don't try to
|
||||
* dedup (just take the already-allocated BP verbatim).
|
||||
*/
|
||||
if (data == NULL && zio->io_prop.zp_dedup_verify) {
|
||||
zio->io_prop.zp_dedup = zio->io_prop.zp_dedup_verify = B_FALSE;
|
||||
}
|
||||
|
||||
return (zio);
|
||||
}
|
||||
|
||||
@ -676,6 +687,14 @@ zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite)
|
||||
void
|
||||
zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp)
|
||||
{
|
||||
|
||||
/*
|
||||
* The check for EMBEDDED is a performance optimization. We
|
||||
* process the free here (by ignoring it) rather than
|
||||
* putting it on the list and then processing it in zio_free_sync().
|
||||
*/
|
||||
if (BP_IS_EMBEDDED(bp))
|
||||
return;
|
||||
metaslab_check_free(spa, bp);
|
||||
|
||||
/*
|
||||
@ -700,13 +719,13 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||
zio_t *zio;
|
||||
enum zio_stage stage = ZIO_FREE_PIPELINE;
|
||||
|
||||
dprintf_bp(bp, "freeing in txg %llu, pass %u",
|
||||
(longlong_t)txg, spa->spa_sync_pass);
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
ASSERT(spa_syncing_txg(spa) == txg);
|
||||
ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
|
||||
|
||||
if (BP_IS_EMBEDDED(bp))
|
||||
return (zio_null(pio, spa, NULL, NULL, NULL, 0));
|
||||
|
||||
metaslab_check_free(spa, bp);
|
||||
arc_freed(spa, bp);
|
||||
|
||||
@ -722,7 +741,6 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||
NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_NOW, flags,
|
||||
NULL, 0, NULL, ZIO_STAGE_OPEN, stage);
|
||||
|
||||
|
||||
return (zio);
|
||||
}
|
||||
|
||||
@ -732,6 +750,11 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||
{
|
||||
zio_t *zio;
|
||||
|
||||
dprintf_bp(bp, "claiming in txg %llu", txg);
|
||||
|
||||
if (BP_IS_EMBEDDED(bp))
|
||||
return (zio_null(pio, spa, NULL, NULL, NULL, 0));
|
||||
|
||||
/*
|
||||
* A claim is an allocation of a specific block. Claims are needed
|
||||
* to support immediate writes in the intent log. The issue is that
|
||||
@ -938,12 +961,20 @@ zio_read_bp_init(zio_t *zio)
|
||||
if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
|
||||
zio->io_child_type == ZIO_CHILD_LOGICAL &&
|
||||
!(zio->io_flags & ZIO_FLAG_RAW)) {
|
||||
uint64_t psize = BP_GET_PSIZE(bp);
|
||||
uint64_t psize =
|
||||
BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
|
||||
void *cbuf = zio_buf_alloc(psize);
|
||||
|
||||
zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
|
||||
}
|
||||
|
||||
if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) {
|
||||
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
decode_embedded_bp_compressed(bp, zio->io_data);
|
||||
} else {
|
||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
||||
}
|
||||
|
||||
if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
|
||||
zio->io_flags |= ZIO_FLAG_DONT_CACHE;
|
||||
|
||||
@ -987,6 +1018,9 @@ zio_write_bp_init(zio_t *zio)
|
||||
*bp = *zio->io_bp_override;
|
||||
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
|
||||
if (BP_IS_EMBEDDED(bp))
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
|
||||
/*
|
||||
* If we've been overridden and nopwrite is set then
|
||||
* set the flag accordingly to indicate that a nopwrite
|
||||
@ -1035,7 +1069,7 @@ zio_write_bp_init(zio_t *zio)
|
||||
compress = ZIO_COMPRESS_OFF;
|
||||
|
||||
/* Make sure someone doesn't change their mind on overwrites */
|
||||
ASSERT(MIN(zp->zp_copies + BP_IS_GANG(bp),
|
||||
ASSERT(BP_IS_EMBEDDED(bp) || MIN(zp->zp_copies + BP_IS_GANG(bp),
|
||||
spa_max_replication(spa)) == BP_GET_NDVAS(bp));
|
||||
}
|
||||
|
||||
@ -1045,9 +1079,38 @@ zio_write_bp_init(zio_t *zio)
|
||||
if (psize == 0 || psize == lsize) {
|
||||
compress = ZIO_COMPRESS_OFF;
|
||||
zio_buf_free(cbuf, lsize);
|
||||
} else if (!zp->zp_dedup && psize <= BPE_PAYLOAD_SIZE &&
|
||||
zp->zp_level == 0 && !DMU_OT_HAS_FILL(zp->zp_type) &&
|
||||
spa_feature_is_enabled(spa, SPA_FEATURE_EMBEDDED_DATA)) {
|
||||
encode_embedded_bp_compressed(bp,
|
||||
cbuf, compress, lsize, psize);
|
||||
BPE_SET_ETYPE(bp, BP_EMBEDDED_TYPE_DATA);
|
||||
BP_SET_TYPE(bp, zio->io_prop.zp_type);
|
||||
BP_SET_LEVEL(bp, zio->io_prop.zp_level);
|
||||
zio_buf_free(cbuf, lsize);
|
||||
bp->blk_birth = zio->io_txg;
|
||||
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
ASSERT(spa_feature_is_active(spa,
|
||||
SPA_FEATURE_EMBEDDED_DATA));
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
} else {
|
||||
ASSERT(psize < lsize);
|
||||
zio_push_transform(zio, cbuf, psize, lsize, NULL);
|
||||
/*
|
||||
* Round up compressed size to MINBLOCKSIZE and
|
||||
* zero the tail.
|
||||
*/
|
||||
size_t rounded =
|
||||
P2ROUNDUP(psize, (size_t)SPA_MINBLOCKSIZE);
|
||||
if (rounded > psize) {
|
||||
bzero((char *)cbuf + psize, rounded - psize);
|
||||
psize = rounded;
|
||||
}
|
||||
if (psize == lsize) {
|
||||
compress = ZIO_COMPRESS_OFF;
|
||||
zio_buf_free(cbuf, lsize);
|
||||
} else {
|
||||
zio_push_transform(zio, cbuf,
|
||||
psize, lsize, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2745,7 +2808,7 @@ zio_checksum_verified(zio_t *zio)
|
||||
/*
|
||||
* ==========================================================================
|
||||
* Error rank. Error are ranked in the order 0, ENXIO, ECKSUM, EIO, other.
|
||||
* An error of 0 indictes success. ENXIO indicates whole-device failure,
|
||||
* An error of 0 indicates success. ENXIO indicates whole-device failure,
|
||||
* which may be transient (e.g. unplugged) or permament. ECKSUM and EIO
|
||||
* indicate errors that are specific to one I/O, and most likely permanent.
|
||||
* Any other error is presumed to be worse because we weren't expecting it.
|
||||
@ -2855,7 +2918,7 @@ zio_done(zio_t *zio)
|
||||
for (int w = 0; w < ZIO_WAIT_TYPES; w++)
|
||||
ASSERT(zio->io_children[c][w] == 0);
|
||||
|
||||
if (bp != NULL) {
|
||||
if (bp != NULL && !BP_IS_EMBEDDED(bp)) {
|
||||
ASSERT(bp->blk_pad[0] == 0);
|
||||
ASSERT(bp->blk_pad[1] == 0);
|
||||
ASSERT(bcmp(bp, &zio->io_bp_copy, sizeof (blkptr_t)) == 0 ||
|
||||
|
@ -80,7 +80,7 @@ size_t
|
||||
zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len)
|
||||
{
|
||||
uint64_t *word, *word_end;
|
||||
size_t c_len, d_len, r_len;
|
||||
size_t c_len, d_len;
|
||||
zio_compress_info_t *ci = &zio_compress_table[c];
|
||||
|
||||
ASSERT((uint_t)c < ZIO_COMPRESS_FUNCTIONS);
|
||||
@ -102,28 +102,13 @@ zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len)
|
||||
return (s_len);
|
||||
|
||||
/* Compress at least 12.5% */
|
||||
d_len = P2ALIGN(s_len - (s_len >> 3), (size_t)SPA_MINBLOCKSIZE);
|
||||
if (d_len == 0)
|
||||
return (s_len);
|
||||
|
||||
d_len = s_len - (s_len >> 3);
|
||||
c_len = ci->ci_compress(src, dst, s_len, d_len, ci->ci_level);
|
||||
|
||||
if (c_len > d_len)
|
||||
return (s_len);
|
||||
|
||||
/*
|
||||
* Cool. We compressed at least as much as we were hoping to.
|
||||
* For both security and repeatability, pad out the last sector.
|
||||
*/
|
||||
r_len = P2ROUNDUP(c_len, (size_t)SPA_MINBLOCKSIZE);
|
||||
if (r_len > c_len) {
|
||||
bzero((char *)dst + c_len, r_len - c_len);
|
||||
c_len = r_len;
|
||||
}
|
||||
|
||||
ASSERT3U(c_len, <=, d_len);
|
||||
ASSERT(P2PHASE(c_len, (size_t)SPA_MINBLOCKSIZE) == 0);
|
||||
|
||||
return (c_len);
|
||||
}
|
||||
|
||||
|
@ -254,6 +254,8 @@ zvol_map_block(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
|
||||
return (0);
|
||||
|
||||
VERIFY(!BP_IS_EMBEDDED(bp));
|
||||
|
||||
VERIFY3U(ma->ma_blks, ==, zb->zb_blkid);
|
||||
ma->ma_blks++;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user