Import OpenSolaris revision 7837:001de5627df3

It includes the following changes:
- parallel reads in traversal code (Bug ID 6333409)
- faster traversal for zfs send (Bug ID 6418042)
- traversal code cleanup (Bug ID 6725675)
- fix for two scrub related bugs (Bug ID 6729696, 6730101)
- fix assertion in dbuf_verify (Bug ID 6752226)
- fix panic during zfs send with i/o errors (Bug ID 6577985)
- replace P2CROSS with P2BOUNDARY (Bug ID 6725680)

List of OpenSolaris Bug IDs:
6333409, 6418042, 6757112, 6725668, 6725675, 6725680,
6725698, 6729696, 6730101, 6752226, 6577985, 6755042

Approved by:	pjd, delphij (mentor)
Obtained from:	OpenSolaris (multiple Bug IDs)
MFC after:	1 week
This commit is contained in:
mm 2010-05-13 20:32:56 +00:00
parent cdb02238ee
commit 6f4ba1587b
23 changed files with 728 additions and 1596 deletions

View File

@ -50,6 +50,7 @@
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/zfs_fuid.h>
#include <sys/arc.h>
#undef ZFS_MAXNAMELEN
#undef verify
#include <libzfs.h>
@ -62,8 +63,6 @@ typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
extern void dump_intent_log(zilog_t *);
uint64_t *zopt_object = NULL;
int zopt_objects = 0;
int zdb_advance = ADVANCE_PRE;
zbookmark_t zdb_noread = { 0, 0, ZB_NO_LEVEL, 0 };
libzfs_handle_t *g_zfs;
boolean_t zdb_sig_user_data = B_TRUE;
int zdb_sig_cksumalg = ZIO_CHECKSUM_SHA256;
@ -88,8 +87,8 @@ static void
usage(void)
{
(void) fprintf(stderr,
"Usage: %s [-udibcsvL] [-U cachefile_path] [-O order] "
"[-B os:obj:level:blkid] [-S user:cksumalg] "
"Usage: %s [-udibcsv] [-U cachefile_path] "
"[-S user:cksumalg] "
"dataset [object...]\n"
" %s -C [pool]\n"
" %s -l dev\n"
@ -109,13 +108,8 @@ usage(void)
"dump blkptr signatures\n");
(void) fprintf(stderr, " -v verbose (applies to all others)\n");
(void) fprintf(stderr, " -l dump label contents\n");
(void) fprintf(stderr, " -L live pool (allows some errors)\n");
(void) fprintf(stderr, " -O [!]<pre|post|prune|data|holes> "
"visitation order\n");
(void) fprintf(stderr, " -U cachefile_path -- use alternate "
"cachefile\n");
(void) fprintf(stderr, " -B objset:object:level:blkid -- "
"simulate bad block\n");
(void) fprintf(stderr, " -R read and display block from a "
"device\n");
(void) fprintf(stderr, " -e Pool is exported/destroyed/"
@ -138,7 +132,7 @@ fatal(const char *fmt, ...)
va_end(ap);
(void) fprintf(stderr, "\n");
exit(1);
abort();
}
static void
@ -571,7 +565,7 @@ dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
}
static uint64_t
blkid2offset(dnode_phys_t *dnp, int level, uint64_t blkid)
blkid2offset(const dnode_phys_t *dnp, int level, uint64_t blkid)
{
if (level < 0)
return (blkid);
@ -602,115 +596,104 @@ sprintf_blkptr_compact(char *blkbuf, blkptr_t *bp, int alldvas)
(u_longlong_t)bp->blk_birth);
}
/* ARGSUSED */
static int
zdb_indirect_cb(traverse_blk_cache_t *bc, spa_t *spa, void *a)
static void
print_indirect(blkptr_t *bp, const zbookmark_t *zb,
const dnode_phys_t *dnp)
{
zbookmark_t *zb = &bc->bc_bookmark;
blkptr_t *bp = &bc->bc_blkptr;
void *data = bc->bc_data;
dnode_phys_t *dnp = bc->bc_dnode;
char blkbuf[BP_SPRINTF_LEN + 80];
char blkbuf[BP_SPRINTF_LEN];
int l;
if (bc->bc_errno) {
(void) sprintf(blkbuf,
"Error %d reading <%llu, %llu, %lld, %llu>: ",
bc->bc_errno,
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
(u_longlong_t)zb->zb_level,
(u_longlong_t)zb->zb_blkid);
goto out;
}
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
if (zb->zb_level == -1) {
ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
} else {
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
}
if (zb->zb_level > 0) {
uint64_t fill = 0;
blkptr_t *bpx, *bpend;
for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx);
bpx < bpend; bpx++) {
if (bpx->blk_birth != 0) {
fill += bpx->blk_fill;
} else {
ASSERT(bpx->blk_fill == 0);
}
}
ASSERT3U(fill, ==, bp->blk_fill);
}
if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) {
uint64_t fill = 0;
dnode_phys_t *dnx, *dnend;
for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT);
dnx < dnend; dnx++) {
if (dnx->dn_type != DMU_OT_NONE)
fill++;
}
ASSERT3U(fill, ==, bp->blk_fill);
}
(void) sprintf(blkbuf, "%16llx ",
(void) printf("%16llx ",
(u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid));
ASSERT(zb->zb_level >= 0);
for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
if (l == zb->zb_level) {
(void) sprintf(blkbuf + strlen(blkbuf), "L%llx",
(u_longlong_t)zb->zb_level);
(void) printf("L%llx", (u_longlong_t)zb->zb_level);
} else {
(void) sprintf(blkbuf + strlen(blkbuf), " ");
(void) printf(" ");
}
}
out:
if (bp->blk_birth == 0) {
(void) sprintf(blkbuf + strlen(blkbuf), "<hole>");
(void) printf("%s\n", blkbuf);
} else {
sprintf_blkptr_compact(blkbuf + strlen(blkbuf), bp,
dump_opt['d'] > 5 ? 1 : 0);
(void) printf("%s\n", blkbuf);
sprintf_blkptr_compact(blkbuf, bp, dump_opt['d'] > 5 ? 1 : 0);
(void) printf("%s\n", blkbuf);
}
#define SET_BOOKMARK(zb, objset, object, level, blkid) \
{ \
(zb)->zb_objset = objset; \
(zb)->zb_object = object; \
(zb)->zb_level = level; \
(zb)->zb_blkid = blkid; \
}
static int
visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
blkptr_t *bp, const zbookmark_t *zb)
{
int err;
if (bp->blk_birth == 0)
return (0);
print_indirect(bp, zb, dnp);
if (BP_GET_LEVEL(bp) > 0) {
uint32_t flags = ARC_WAIT;
int i;
blkptr_t *cbp;
int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
arc_buf_t *buf;
uint64_t fill = 0;
err = arc_read_nolock(NULL, spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err)
return (err);
/* recursively visit blocks below this */
cbp = buf->b_data;
for (i = 0; i < epb; i++, cbp++) {
zbookmark_t czb;
SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
zb->zb_level - 1,
zb->zb_blkid * epb + i);
err = visit_indirect(spa, dnp, cbp, &czb);
if (err)
break;
fill += cbp->blk_fill;
}
ASSERT3U(fill, ==, bp->blk_fill);
(void) arc_buf_remove_ref(buf, &buf);
}
return (bc->bc_errno ? ERESTART : 0);
return (err);
}
/*ARGSUSED*/
static void
dump_indirect(objset_t *os, uint64_t object, void *data, size_t size)
dump_indirect(dnode_t *dn)
{
traverse_handle_t *th;
uint64_t objset = dmu_objset_id(os);
int advance = zdb_advance;
dnode_phys_t *dnp = dn->dn_phys;
int j;
zbookmark_t czb;
(void) printf("Indirect blocks:\n");
if (object == 0)
advance |= ADVANCE_DATA;
th = traverse_init(dmu_objset_spa(os), zdb_indirect_cb, NULL, advance,
ZIO_FLAG_CANFAIL);
th->th_noread = zdb_noread;
traverse_add_dnode(th, 0, -1ULL, objset, object);
while (traverse_more(th) == EAGAIN)
continue;
SET_BOOKMARK(&czb, dmu_objset_id(&dn->dn_objset->os),
dn->dn_object, dnp->dn_nlevels - 1, 0);
for (j = 0; j < dnp->dn_nblkptr; j++) {
czb.zb_blkid = j;
(void) visit_indirect(dmu_objset_spa(&dn->dn_objset->os), dnp,
&dnp->dn_blkptr[j], &czb);
}
(void) printf("\n");
traverse_fini(th);
}
/*ARGSUSED*/
@ -1093,7 +1076,7 @@ dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
}
if (verbosity >= 5)
dump_indirect(os, object, NULL, 0);
dump_indirect(dn);
if (verbosity >= 5) {
/*
@ -1458,18 +1441,17 @@ typedef struct zdb_blkstats {
#define DMU_OT_DEFERRED DMU_OT_NONE
#define DMU_OT_TOTAL DMU_OT_NUMTYPES
#define ZB_TOTAL ZB_MAXLEVEL
#define ZB_TOTAL DN_MAX_LEVELS
typedef struct zdb_cb {
zdb_blkstats_t zcb_type[ZB_TOTAL + 1][DMU_OT_TOTAL + 1];
uint64_t zcb_errors[256];
traverse_blk_cache_t *zcb_cache;
int zcb_readfails;
int zcb_haderrors;
} zdb_cb_t;
static void
zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, dmu_object_type_t type)
{
for (int i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
@ -1485,7 +1467,7 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
if (dump_opt['S']) {
boolean_t print_sig;
print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
print_sig = !zdb_sig_user_data || (BP_GET_LEVEL(bp) == 0 &&
BP_GET_TYPE(bp) == DMU_OT_PLAIN_FILE_CONTENTS);
if (BP_GET_CHECKSUM(bp) < zdb_sig_cksumalg)
@ -1507,56 +1489,55 @@ zdb_count_block(spa_t *spa, zdb_cb_t *zcb, blkptr_t *bp, int type)
}
}
if (!dump_opt['L'])
VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
VERIFY(zio_wait(zio_claim(NULL, spa, spa_first_txg(spa), bp,
NULL, NULL, ZIO_FLAG_MUSTSUCCEED)) == 0);
}
static int
zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
zdb_blkptr_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
const dnode_phys_t *dnp, void *arg)
{
zbookmark_t *zb = &bc->bc_bookmark;
zdb_cb_t *zcb = arg;
blkptr_t *bp = &bc->bc_blkptr;
dmu_object_type_t type = BP_GET_TYPE(bp);
char blkbuf[BP_SPRINTF_LEN];
int error = 0;
ASSERT(!BP_IS_HOLE(bp));
if (bp == NULL)
return (0);
zdb_count_block(spa, zcb, bp, type);
zdb_count_block(spa, zcb, bp, BP_GET_TYPE(bp));
if (bc->bc_errno) {
if (zcb->zcb_readfails++ < 10 && dump_opt['L']) {
uberblock_t ub;
vdev_uberblock_load(NULL, spa->spa_root_vdev, &ub);
if (ub.ub_txg != 0)
spa->spa_ubsync = ub;
error = EAGAIN;
} else {
if (dump_opt['c'] || dump_opt['S']) {
int ioerr, size;
void *data;
size = BP_GET_LSIZE(bp);
data = malloc(size);
ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB, zb));
free(data);
/* We expect io errors on intent log */
if (ioerr && BP_GET_TYPE(bp) != DMU_OT_INTENT_LOG) {
zcb->zcb_haderrors = 1;
zcb->zcb_errors[bc->bc_errno]++;
error = ERESTART;
zcb->zcb_errors[ioerr]++;
if (dump_opt['b'] >= 2)
sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
else
blkbuf[0] = '\0';
if (!dump_opt['S']) {
(void) printf("zdb_blkptr_cb: "
"Got error %d reading "
"<%llu, %llu, %lld, %llx> %s -- skipping\n",
ioerr,
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
(u_longlong_t)zb->zb_level,
(u_longlong_t)zb->zb_blkid,
blkbuf);
}
}
if (dump_opt['b'] >= 3 || (dump_opt['b'] >= 2 && bc->bc_errno))
sprintf_blkptr(blkbuf, BP_SPRINTF_LEN, bp);
else
blkbuf[0] = '\0';
if (!dump_opt['S']) {
(void) printf("zdb_blkptr_cb: Got error %d reading "
"<%llu, %llu, %lld, %llx> %s -- %s\n",
bc->bc_errno,
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
(u_longlong_t)zb->zb_level,
(u_longlong_t)zb->zb_blkid,
blkbuf,
error == EAGAIN ? "retrying" : "skipping");
}
return (error);
}
zcb->zcb_readfails = 0;
@ -1566,8 +1547,8 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
(void) printf("objset %llu object %llu offset 0x%llx %s\n",
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
(u_longlong_t)blkid2offset(bc->bc_dnode,
zb->zb_level, zb->zb_blkid), blkbuf);
(u_longlong_t)blkid2offset(dnp, zb->zb_level, zb->zb_blkid),
blkbuf);
}
return (0);
@ -1576,22 +1557,12 @@ zdb_blkptr_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
static int
dump_block_stats(spa_t *spa)
{
traverse_handle_t *th;
zdb_cb_t zcb = { 0 };
traverse_blk_cache_t dummy_cache = { 0 };
zdb_blkstats_t *zb, *tzb;
uint64_t alloc, space, logalloc;
vdev_t *rvd = spa->spa_root_vdev;
int leaks = 0;
int advance = zdb_advance;
int c, e, flags;
zcb.zcb_cache = &dummy_cache;
if (dump_opt['c'] || dump_opt['S'])
advance |= ADVANCE_DATA;
advance |= ADVANCE_PRUNE | ADVANCE_ZIL;
int c, e;
if (!dump_opt['S']) {
(void) printf("\nTraversing all blocks to %sverify"
@ -1607,8 +1578,7 @@ dump_block_stats(spa_t *spa)
* it's not part of any space map) is a double allocation,
* reference to a freed block, or an unclaimed log block.
*/
if (!dump_opt['L'])
zdb_leak_init(spa);
zdb_leak_init(spa);
/*
* If there's a deferred-free bplist, process that first.
@ -1634,22 +1604,7 @@ dump_block_stats(spa_t *spa)
bplist_close(bpl);
}
/*
* Now traverse the pool. If we're reading all data to verify
* checksums, do a scrubbing read so that we validate all copies.
*/
flags = ZIO_FLAG_CANFAIL;
if (advance & ADVANCE_DATA)
flags |= ZIO_FLAG_SCRUB;
th = traverse_init(spa, zdb_blkptr_cb, &zcb, advance, flags);
th->th_noread = zdb_noread;
traverse_add_pool(th, 0, spa_first_txg(spa) + TXG_CONCURRENT_STATES);
while (traverse_more(th) == EAGAIN)
continue;
traverse_fini(th);
zcb.zcb_haderrors |= traverse_pool(spa, zdb_blkptr_cb, &zcb);
if (zcb.zcb_haderrors && !dump_opt['S']) {
(void) printf("\nError counts:\n\n");
@ -1665,8 +1620,7 @@ dump_block_stats(spa_t *spa)
/*
* Report any leaked segments.
*/
if (!dump_opt['L'])
zdb_leak_fini(spa);
zdb_leak_fini(spa);
/*
* If we're interested in printing out the blkptr signatures,
@ -1676,10 +1630,6 @@ dump_block_stats(spa_t *spa)
if (dump_opt['S'])
return (zcb.zcb_haderrors ? 3 : 0);
if (dump_opt['L'])
(void) printf("\n\n *** Live pool traversal; "
"block counts are only approximate ***\n\n");
alloc = spa_get_alloc(spa);
space = spa_get_space(spa);
@ -2285,7 +2235,6 @@ main(int argc, char **argv)
int dump_all = 1;
int verbose = 0;
int error;
int flag, set;
int exported = 0;
char *vdev_dir = NULL;
@ -2294,7 +2243,7 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
while ((c = getopt(argc, argv, "udibcsvCLO:B:S:U:lRep:")) != -1) {
while ((c = getopt(argc, argv, "udibcsvCS:U:lRep:")) != -1) {
switch (c) {
case 'u':
case 'd':
@ -2308,49 +2257,6 @@ main(int argc, char **argv)
dump_opt[c]++;
dump_all = 0;
break;
case 'L':
dump_opt[c]++;
break;
case 'O':
endstr = optarg;
if (endstr[0] == '!') {
endstr++;
set = 0;
} else {
set = 1;
}
if (strcmp(endstr, "post") == 0) {
flag = ADVANCE_PRE;
set = !set;
} else if (strcmp(endstr, "pre") == 0) {
flag = ADVANCE_PRE;
} else if (strcmp(endstr, "prune") == 0) {
flag = ADVANCE_PRUNE;
} else if (strcmp(endstr, "data") == 0) {
flag = ADVANCE_DATA;
} else if (strcmp(endstr, "holes") == 0) {
flag = ADVANCE_HOLES;
} else {
usage();
}
if (set)
zdb_advance |= flag;
else
zdb_advance &= ~flag;
break;
case 'B':
endstr = optarg - 1;
zdb_noread.zb_objset = strtoull(endstr + 1, &endstr, 0);
zdb_noread.zb_object = strtoull(endstr + 1, &endstr, 0);
zdb_noread.zb_level = strtol(endstr + 1, &endstr, 0);
zdb_noread.zb_blkid = strtoull(endstr + 1, &endstr, 16);
(void) printf("simulating bad block "
"<%llu, %llu, %lld, %llx>\n",
(u_longlong_t)zdb_noread.zb_objset,
(u_longlong_t)zdb_noread.zb_object,
(u_longlong_t)zdb_noread.zb_level,
(u_longlong_t)zdb_noread.zb_blkid);
break;
case 'v':
verbose++;
break;
@ -2387,21 +2293,17 @@ main(int argc, char **argv)
}
}
if (vdev_dir != NULL && exported == 0)
(void) fatal("-p option requires use of -e\n");
if (vdev_dir != NULL && exported == 0) {
(void) fprintf(stderr, "-p option requires use of -e\n");
usage();
}
kernel_init(FREAD);
g_zfs = libzfs_init();
ASSERT(g_zfs != NULL);
/*
* Disable vdev caching. If we don't do this, live pool traversal
* won't make progress because it will never see disk updates.
*/
zfs_vdev_cache_size = 0;
for (c = 0; c < 256; c++) {
if (dump_all && c != 'L' && c != 'l' && c != 'R')
if (dump_all && c != 'l' && c != 'R')
dump_opt[c] = 1;
if (dump_opt[c])
dump_opt[c] += verbose;

View File

@ -77,7 +77,6 @@
#include <sys/dmu.h>
#include <sys/txg.h>
#include <sys/zap.h>
#include <sys/dmu_traverse.h>
#include <sys/dmu_objset.h>
#include <sys/poll.h>
#include <sys/stat.h>
@ -151,7 +150,6 @@ typedef struct ztest_args {
hrtime_t za_start;
hrtime_t za_stop;
hrtime_t za_kill;
traverse_handle_t *za_th;
/*
* Thread-local variables can go here to aid debugging.
*/
@ -206,7 +204,6 @@ ztest_info_t ztest_info[] = {
{ ztest_dmu_object_alloc_free, 1, &zopt_always },
{ ztest_zap, 30, &zopt_always },
{ ztest_zap_parallel, 100, &zopt_always },
{ ztest_traverse, 1, &zopt_often },
{ ztest_dsl_prop_get_set, 1, &zopt_sometimes },
{ ztest_dmu_objset_create_destroy, 1, &zopt_sometimes },
{ ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
@ -1447,152 +1444,6 @@ ztest_dmu_snapshot_create_destroy(ztest_args_t *za)
(void) rw_unlock(&ztest_shared->zs_name_lock);
}
#define ZTEST_TRAVERSE_BLOCKS 1000
static int
ztest_blk_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
{
ztest_args_t *za = arg;
zbookmark_t *zb = &bc->bc_bookmark;
blkptr_t *bp = &bc->bc_blkptr;
dnode_phys_t *dnp = bc->bc_dnode;
traverse_handle_t *th = za->za_th;
uint64_t size = BP_GET_LSIZE(bp);
/*
* Level -1 indicates the objset_phys_t or something in its intent log.
*/
if (zb->zb_level == -1) {
if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
ASSERT3U(zb->zb_object, ==, 0);
ASSERT3U(zb->zb_blkid, ==, 0);
ASSERT3U(size, ==, sizeof (objset_phys_t));
za->za_zil_seq = 0;
} else if (BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG) {
ASSERT3U(zb->zb_object, ==, 0);
ASSERT3U(zb->zb_blkid, >, za->za_zil_seq);
za->za_zil_seq = zb->zb_blkid;
} else {
ASSERT3U(zb->zb_object, !=, 0); /* lr_write_t */
}
return (0);
}
ASSERT(dnp != NULL);
if (bc->bc_errno)
return (ERESTART);
/*
* Once in a while, abort the traverse. We only do this to odd
* instance numbers to ensure that even ones can run to completion.
*/
if ((za->za_instance & 1) && ztest_random(10000) == 0)
return (EINTR);
if (bp->blk_birth == 0) {
ASSERT(th->th_advance & ADVANCE_HOLES);
return (0);
}
if (zb->zb_level == 0 && !(th->th_advance & ADVANCE_DATA) &&
bc == &th->th_cache[ZB_DN_CACHE][0]) {
ASSERT(bc->bc_data == NULL);
return (0);
}
ASSERT(bc->bc_data != NULL);
/*
* This is an expensive question, so don't ask it too often.
*/
if (((za->za_random ^ th->th_callbacks) & 0xff) == 0) {
void *xbuf = umem_alloc(size, UMEM_NOFAIL);
if (arc_tryread(spa, bp, xbuf) == 0) {
ASSERT(bcmp(bc->bc_data, xbuf, size) == 0);
}
umem_free(xbuf, size);
}
if (zb->zb_level > 0) {
ASSERT3U(size, ==, 1ULL << dnp->dn_indblkshift);
return (0);
}
ASSERT(zb->zb_level == 0);
ASSERT3U(size, ==, dnp->dn_datablkszsec << DEV_BSHIFT);
return (0);
}
/*
* Verify that live pool traversal works.
*/
void
ztest_traverse(ztest_args_t *za)
{
spa_t *spa = za->za_spa;
traverse_handle_t *th = za->za_th;
int rc, advance;
uint64_t cbstart, cblimit;
if (th == NULL) {
advance = 0;
if (ztest_random(2) == 0)
advance |= ADVANCE_PRE;
if (ztest_random(2) == 0)
advance |= ADVANCE_PRUNE;
if (ztest_random(2) == 0)
advance |= ADVANCE_DATA;
if (ztest_random(2) == 0)
advance |= ADVANCE_HOLES;
if (ztest_random(2) == 0)
advance |= ADVANCE_ZIL;
th = za->za_th = traverse_init(spa, ztest_blk_cb, za, advance,
ZIO_FLAG_CANFAIL);
traverse_add_pool(th, 0, -1ULL);
}
advance = th->th_advance;
cbstart = th->th_callbacks;
cblimit = cbstart + ((advance & ADVANCE_DATA) ? 100 : 1000);
while ((rc = traverse_more(th)) == EAGAIN && th->th_callbacks < cblimit)
continue;
if (zopt_verbose >= 5)
(void) printf("traverse %s%s%s%s %llu blocks to "
"<%llu, %llu, %lld, %llx>%s\n",
(advance & ADVANCE_PRE) ? "pre" : "post",
(advance & ADVANCE_PRUNE) ? "|prune" : "",
(advance & ADVANCE_DATA) ? "|data" : "",
(advance & ADVANCE_HOLES) ? "|holes" : "",
(u_longlong_t)(th->th_callbacks - cbstart),
(u_longlong_t)th->th_lastcb.zb_objset,
(u_longlong_t)th->th_lastcb.zb_object,
(u_longlong_t)th->th_lastcb.zb_level,
(u_longlong_t)th->th_lastcb.zb_blkid,
rc == 0 ? " [done]" :
rc == EINTR ? " [aborted]" :
rc == EAGAIN ? "" :
strerror(rc));
if (rc != EAGAIN) {
if (rc != 0 && rc != EINTR)
fatal(0, "traverse_more(%p) = %d", th, rc);
traverse_fini(th);
za->za_th = NULL;
}
}
/*
* Verify dsl_dataset_promote handles EBUSY
*/
@ -3067,12 +2918,12 @@ ztest_verify_blocks(char *pool)
isa = strdup(isa);
/* LINTED */
(void) sprintf(bin,
"/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache -O %s %s",
"/usr/sbin%.*s/zdb -bc%s%s -U /tmp/zpool.cache %s",
isalen,
isa,
zopt_verbose >= 3 ? "s" : "",
zopt_verbose >= 4 ? "v" : "",
ztest_random(2) == 0 ? "pre" : "post", pool);
pool);
free(isa);
if (zopt_verbose >= 5)
@ -3438,8 +3289,6 @@ ztest_run(char *pool)
while (--t >= 0) {
VERIFY(thr_join(za[t].za_thread, NULL, NULL) == 0);
if (za[t].za_th)
traverse_fini(za[t].za_th);
if (t < zopt_datasets) {
zil_close(za[t].za_zilog);
dmu_objset_close(za[t].za_os);

View File

@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <assert.h>
#include <fcntl.h>
#include <poll.h>
@ -842,6 +840,8 @@ kernel_init(int mode)
VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
system_taskq_init();
spa_init(mode);
}

View File

@ -334,11 +334,14 @@ typedef void (task_func_t)(void *);
#define TQ_NOSLEEP KM_NOSLEEP /* cannot block for memory; may fail */
#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
extern taskq_t *system_taskq;
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
extern void taskq_destroy(taskq_t *);
extern void taskq_wait(taskq_t *);
extern int taskq_member(taskq_t *, void *);
extern void system_taskq_init(void);
#define XVA_MAPSIZE 3
#define XVA_MAGIC 0x78766174

View File

@ -19,15 +19,14 @@
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
int taskq_now;
taskq_t *system_taskq;
typedef struct task {
struct task *task_next;
@ -253,3 +252,10 @@ taskq_member(taskq_t *tq, void *t)
return (0);
}
void
system_taskq_init(void)
{
system_taskq = taskq_create("system_taskq", 64, minclsyspri, 4, 512,
TASKQ_DYNAMIC | TASKQ_PREPOPULATE);
}

View File

@ -66,7 +66,7 @@
#define P2ROUNDUP(x, align) (-(-(x) & -(align)))
#define P2END(x, align) (-(~(x) & -(align)))
#define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align)))
#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1)
#define P2BOUNDARY(off, len, align) (((off) ^ ((off) + (len) - 1)) > (align) - 1)
/*
* General-purpose 32-bit and 64-bit bitfield encodings.

View File

@ -43,6 +43,10 @@ extern "C" {
#define ABS(a) ((a) < 0 ? -(a) : (a))
#endif
#ifndef SIGNOF
#define SIGNOF(a) ((a) < 0 ? -1 : (a) > 0)
#endif
/*
* Macro for checking power of 2 address alignment.
*/
@ -63,7 +67,7 @@ extern "C" {
#define P2ROUNDUP(x, align) (-(-(x) & -(align)))
#define P2END(x, align) (-(~(x) & -(align)))
#define P2PHASEUP(x, align, phase) ((phase) - (((phase) - (x)) & -(align)))
#define P2CROSS(x, y, align) (((x) ^ (y)) > (align) - 1)
#define P2BOUNDARY(off, len, align) (((off) ^ ((off) + (len) - 1)) > (align) - 1)
/*
* Determine whether two numbers have the same high-order bit.
*/

View File

@ -308,20 +308,18 @@ dbuf_verify(dmu_buf_impl_t *db)
ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
}
if (db->db_level == 0) {
/* we can be momentarily larger in dnode_set_blksz() */
if (db->db_blkid != DB_BONUS_BLKID && dn) {
ASSERT3U(db->db.db_size, >=, dn->dn_datablksz);
}
if (db->db.db_object == DMU_META_DNODE_OBJECT) {
dbuf_dirty_record_t *dr = db->db_data_pending;
/*
* it should only be modified in syncing
* context, so make sure we only have
* one copy of the data.
*/
ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf);
}
/*
* We can't assert that db_size matches dn_datablksz because it
* can be momentarily different when another thread is doing
* dnode_set_blksz().
*/
if (db->db_level == 0 && db->db.db_object == DMU_META_DNODE_OBJECT) {
dbuf_dirty_record_t *dr = db->db_data_pending;
/*
* It should only be modified in syncing context, so
* make sure we only have one copy of the data.
*/
ASSERT(dr == NULL || dr->dt.dl.dr_data == db->db_buf);
}
/* verify db->db_blkptr */

View File

@ -23,8 +23,6 @@
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/dmu.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_tx.h>
@ -172,66 +170,59 @@ dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp)
(level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
static int
backup_cb(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
backup_cb(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
const dnode_phys_t *dnp, void *arg)
{
struct backuparg *ba = arg;
uint64_t object = bc->bc_bookmark.zb_object;
int level = bc->bc_bookmark.zb_level;
uint64_t blkid = bc->bc_bookmark.zb_blkid;
blkptr_t *bp = bc->bc_blkptr.blk_birth ? &bc->bc_blkptr : NULL;
dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
void *data = bc->bc_data;
int err = 0;
if (issig(JUSTLOOKING) && issig(FORREAL))
return (EINTR);
ASSERT(data || bp == NULL);
if (bp == NULL && object == 0) {
uint64_t span = BP_SPAN(bc->bc_dnode, level);
uint64_t dnobj = (blkid * span) >> DNODE_SHIFT;
if (bp == NULL && zb->zb_object == 0) {
uint64_t span = BP_SPAN(dnp, zb->zb_level);
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT);
} else if (bp == NULL) {
uint64_t span = BP_SPAN(bc->bc_dnode, level);
err = dump_free(ba, object, blkid * span, span);
} else if (data && level == 0 && type == DMU_OT_DNODE) {
dnode_phys_t *blk = data;
uint64_t span = BP_SPAN(dnp, zb->zb_level);
err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span);
} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
return (0);
} else if (type == DMU_OT_DNODE) {
dnode_phys_t *blk;
int i;
int blksz = BP_GET_LSIZE(bp);
uint32_t aflags = ARC_WAIT;
arc_buf_t *abuf;
if (arc_read_nolock(NULL, spa, bp,
arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
return (EIO);
blk = abuf->b_data;
for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
uint64_t dnobj =
(blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
uint64_t dnobj = (zb->zb_blkid <<
(DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
err = dump_dnode(ba, dnobj, blk+i);
if (err)
break;
}
} else if (level == 0 &&
type != DMU_OT_DNODE && type != DMU_OT_OBJSET) {
(void) arc_buf_remove_ref(abuf, &abuf);
} else { /* it's a level-0 block of a regular object */
uint32_t aflags = ARC_WAIT;
arc_buf_t *abuf;
int blksz = BP_GET_LSIZE(bp);
if (data == NULL) {
uint32_t aflags = ARC_WAIT;
arc_buf_t *abuf;
zbookmark_t zb;
zb.zb_objset = ba->os->os->os_dsl_dataset->ds_object;
zb.zb_object = object;
zb.zb_level = level;
zb.zb_blkid = blkid;
(void) arc_read_nolock(NULL, spa, bp,
arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
ZIO_FLAG_MUSTSUCCEED, &aflags, &zb);
if (arc_read_nolock(NULL, spa, bp,
arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
return (EIO);
if (abuf) {
err = dump_data(ba, type, object, blkid * blksz,
blksz, abuf->b_data);
(void) arc_buf_remove_ref(abuf, &abuf);
}
} else {
err = dump_data(ba, type, object, blkid * blksz,
blksz, data);
}
err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz,
blksz, abuf->b_data);
(void) arc_buf_remove_ref(abuf, &abuf);
}
ASSERT(err == 0 || err == EINTR);
@ -311,8 +302,7 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
return (ba.err);
}
err = traverse_dsl_dataset(ds, fromtxg,
ADVANCE_PRE | ADVANCE_HOLES | ADVANCE_DATA | ADVANCE_NOLOCK,
err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
backup_cb, &ba);
if (err) {

File diff suppressed because it is too large Load Diff

View File

@ -1163,12 +1163,13 @@ struct killarg {
/* ARGSUSED */
static int
kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
kill_blkptr(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
const dnode_phys_t *dnp, void *arg)
{
struct killarg *ka = arg;
blkptr_t *bp = &bc->bc_blkptr;
ASSERT3U(bc->bc_errno, ==, 0);
if (bp == NULL)
return (0);
ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
(void) dsl_dataset_block_kill(ka->ds, bp, ka->zio, ka->tx);
@ -1196,7 +1197,7 @@ dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
return (EINVAL);
/*
* If we made changes this txg, traverse_dsl_dataset won't find
* If we made changes this txg, traverse_dataset won't find
* them. Try again.
*/
if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
@ -1263,8 +1264,8 @@ dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
ka.ds = ds;
ka.zio = zio;
ka.tx = tx;
(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
ADVANCE_POST, kill_blkptr, &ka);
(void) traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
TRAVERSE_POST, kill_blkptr, &ka);
(void) zio_wait(zio);
}
@ -1657,8 +1658,8 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
ka.ds = ds;
ka.zio = zio;
ka.tx = tx;
err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
ADVANCE_POST, kill_blkptr, &ka);
err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
TRAVERSE_POST, kill_blkptr, &ka);
ASSERT3U(err, ==, 0);
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
ds->ds_phys->ds_unique_bytes == 0);
@ -2850,6 +2851,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
csa->cds->ds_phys->ds_deadlist_obj));
VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
csa->ohds->ds_phys->ds_deadlist_obj));
dsl_pool_ds_clone_swapped(csa->ohds, csa->cds, tx);
}
/*

View File

@ -232,6 +232,8 @@ dsl_pool_close(dsl_pool_t *dp)
mutex_destroy(&dp->dp_lock);
mutex_destroy(&dp->dp_scrub_cancel_lock);
taskq_destroy(dp->dp_vnrele_taskq);
if (dp->dp_blkstats)
kmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
kmem_free(dp, sizeof (dsl_pool_t));
}

View File

@ -107,6 +107,12 @@ dsl_pool_scrub_setup_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
/* back to the generic stuff */
if (dp->dp_blkstats == NULL) {
dp->dp_blkstats =
kmem_alloc(sizeof (zfs_all_blkstats_t), KM_SLEEP);
}
bzero(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB)
ot = DMU_OT_ZAP_OTHER;
@ -575,6 +581,37 @@ dsl_pool_ds_snapshotted(dsl_dataset_t *ds, dmu_tx_t *tx)
}
}
void
dsl_pool_ds_clone_swapped(dsl_dataset_t *ds1, dsl_dataset_t *ds2, dmu_tx_t *tx)
{
dsl_pool_t *dp = ds1->ds_dir->dd_pool;
if (dp->dp_scrub_func == SCRUB_FUNC_NONE)
return;
if (dp->dp_scrub_bookmark.zb_objset == ds1->ds_object) {
dp->dp_scrub_bookmark.zb_objset = ds2->ds_object;
} else if (dp->dp_scrub_bookmark.zb_objset == ds2->ds_object) {
dp->dp_scrub_bookmark.zb_objset = ds1->ds_object;
}
if (zap_remove_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
ds1->ds_object, tx) == 0) {
int err = zap_add_int(dp->dp_meta_objset,
dp->dp_scrub_queue_obj, ds2->ds_object, tx);
VERIFY(err == 0 || err == EEXIST);
if (err == EEXIST) {
/* Both were there to begin with */
VERIFY(0 == zap_add_int(dp->dp_meta_objset,
dp->dp_scrub_queue_obj, ds1->ds_object, tx));
}
} else if (zap_remove_int(dp->dp_meta_objset, dp->dp_scrub_queue_obj,
ds2->ds_object, tx) == 0) {
VERIFY(0 == zap_add_int(dp->dp_meta_objset,
dp->dp_scrub_queue_obj, ds1->ds_object, tx));
}
}
struct enqueue_clones_arg {
dmu_tx_t *tx;
uint64_t originobj;
@ -816,6 +853,52 @@ dsl_pool_scrub_restart(dsl_pool_t *dp)
* scrub consumers
*/
static void
count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
{
int i;
/*
* If we resume after a reboot, zab will be NULL; don't record
* incomplete stats in that case.
*/
if (zab == NULL)
return;
for (i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
zfs_blkstat_t *zb = &zab->zab_type[l][t];
int equal;
zb->zb_count++;
zb->zb_asize += BP_GET_ASIZE(bp);
zb->zb_lsize += BP_GET_LSIZE(bp);
zb->zb_psize += BP_GET_PSIZE(bp);
zb->zb_gangs += BP_COUNT_GANG(bp);
switch (BP_GET_NDVAS(bp)) {
case 2:
if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
DVA_GET_VDEV(&bp->blk_dva[1]))
zb->zb_ditto_2_of_2_samevdev++;
break;
case 3:
equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
DVA_GET_VDEV(&bp->blk_dva[1])) +
(DVA_GET_VDEV(&bp->blk_dva[0]) ==
DVA_GET_VDEV(&bp->blk_dva[2])) +
(DVA_GET_VDEV(&bp->blk_dva[1]) ==
DVA_GET_VDEV(&bp->blk_dva[2]));
if (equal == 1)
zb->zb_ditto_2_of_3_samevdev++;
else if (equal == 3)
zb->zb_ditto_3_of_3_samevdev++;
break;
}
}
}
static void
dsl_pool_scrub_clean_done(zio_t *zio)
{
@ -844,6 +927,8 @@ dsl_pool_scrub_clean_cb(dsl_pool_t *dp,
int zio_flags = ZIO_FLAG_SCRUB_THREAD | ZIO_FLAG_CANFAIL;
int zio_priority;
count_block(dp->dp_blkstats, bp);
if (dp->dp_scrub_isresilver == 0) {
/* It's a scrub */
zio_flags |= ZIO_FLAG_SCRUB;

View File

@ -4075,11 +4075,7 @@ spa_sync(spa_t *spa, uint64_t txg)
spa->spa_config_syncing = NULL;
}
spa->spa_traverse_wanted = B_TRUE;
rw_enter(&spa->spa_traverse_lock, RW_WRITER);
spa->spa_traverse_wanted = B_FALSE;
spa->spa_ubsync = spa->spa_uberblock;
rw_exit(&spa->spa_traverse_lock);
/*
* Clean up the ZIL records for the synced txg.

View File

@ -428,8 +428,6 @@ spa_add(const char *name, const char *altroot)
spa = kmem_zalloc(sizeof (spa_t), KM_SLEEP);
rw_init(&spa->spa_traverse_lock, NULL, RW_DEFAULT, NULL);
mutex_init(&spa->spa_async_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_async_root_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
@ -513,8 +511,6 @@ spa_remove(spa_t *spa)
spa_config_lock_destroy(spa);
rw_destroy(&spa->spa_traverse_lock);
cv_destroy(&spa->spa_async_cv);
cv_destroy(&spa->spa_async_root_cv);
cv_destroy(&spa->spa_scrub_io_cv);
@ -1127,16 +1123,10 @@ zfs_panic_recover(const char *fmt, ...)
* ==========================================================================
*/
krwlock_t *
spa_traverse_rwlock(spa_t *spa)
{
return (&spa->spa_traverse_lock);
}
boolean_t
spa_traverse_wanted(spa_t *spa)
spa_shutting_down(spa_t *spa)
{
return (spa->spa_traverse_wanted);
return (spa->spa_async_suspended);
}
dsl_pool_t *
@ -1205,7 +1195,7 @@ spa_first_txg(spa_t *spa)
return (spa->spa_first_txg);
}
int
pool_state_t
spa_state(spa_t *spa)
{
return (spa->spa_state);

View File

@ -26,93 +26,29 @@
#ifndef _SYS_DMU_TRAVERSE_H
#define _SYS_DMU_TRAVERSE_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/zio.h>
#include <sys/dmu.h>
#include <sys/dnode.h>
#include <sys/arc.h>
#ifdef __cplusplus
extern "C" {
#endif
#define ADVANCE_POST 0 /* post-order traversal */
#define ADVANCE_PRE 0x01 /* pre-order traversal */
#define ADVANCE_PRUNE 0x02 /* prune by prev snapshot birth time */
#define ADVANCE_DATA 0x04 /* read user data blocks */
#define ADVANCE_HOLES 0x08 /* visit holes */
#define ADVANCE_ZIL 0x10 /* visit intent log blocks */
#define ADVANCE_NOLOCK 0x20 /* Don't grab SPA sync lock */
struct dnode_phys;
struct dsl_dataset;
#define ZB_NO_LEVEL -2
#define ZB_MAXLEVEL 32 /* Next power of 2 >= DN_MAX_LEVELS */
#define ZB_MAXBLKID (1ULL << 62)
#define ZB_MAXOBJSET (1ULL << 62)
#define ZB_MAXOBJECT (1ULL << 62)
typedef int (blkptr_cb_t)(spa_t *spa, blkptr_t *bp,
const zbookmark_t *zb, const struct dnode_phys *dnp, void *arg);
#define ZB_MOS_CACHE 0
#define ZB_MDN_CACHE 1
#define ZB_DN_CACHE 2
#define ZB_DEPTH 3
#define TRAVERSE_PRE (1<<0)
#define TRAVERSE_POST (1<<1)
#define TRAVERSE_PREFETCH_METADATA (1<<2)
#define TRAVERSE_PREFETCH_DATA (1<<3)
#define TRAVERSE_PREFETCH (TRAVERSE_PREFETCH_METADATA | TRAVERSE_PREFETCH_DATA)
typedef struct zseg {
uint64_t seg_mintxg;
uint64_t seg_maxtxg;
zbookmark_t seg_start;
zbookmark_t seg_end;
list_node_t seg_node;
} zseg_t;
typedef struct traverse_blk_cache {
zbookmark_t bc_bookmark;
blkptr_t bc_blkptr;
void *bc_data;
dnode_phys_t *bc_dnode;
int bc_errno;
int bc_pad1;
uint64_t bc_pad2;
} traverse_blk_cache_t;
typedef int (blkptr_cb_t)(traverse_blk_cache_t *bc, spa_t *spa, void *arg);
struct traverse_handle {
spa_t *th_spa;
blkptr_cb_t *th_func;
void *th_arg;
uint16_t th_advance;
uint16_t th_locked;
int th_zio_flags;
list_t th_seglist;
traverse_blk_cache_t th_cache[ZB_DEPTH][ZB_MAXLEVEL];
traverse_blk_cache_t th_zil_cache;
uint64_t th_hits;
uint64_t th_arc_hits;
uint64_t th_reads;
uint64_t th_callbacks;
uint64_t th_syncs;
uint64_t th_restarts;
zbookmark_t th_noread;
zbookmark_t th_lastcb;
};
int traverse_dsl_dataset(struct dsl_dataset *ds, uint64_t txg_start,
int advance, blkptr_cb_t func, void *arg);
int traverse_zvol(objset_t *os, int advance, blkptr_cb_t func, void *arg);
traverse_handle_t *traverse_init(spa_t *spa, blkptr_cb_t *func, void *arg,
int advance, int zio_flags);
void traverse_fini(traverse_handle_t *th);
void traverse_add_dnode(traverse_handle_t *th,
uint64_t mintxg, uint64_t maxtxg, uint64_t objset, uint64_t object);
void traverse_add_objset(traverse_handle_t *th,
uint64_t mintxg, uint64_t maxtxg, uint64_t objset);
void traverse_add_pool(traverse_handle_t *th, uint64_t mintxg, uint64_t maxtxg);
int traverse_more(traverse_handle_t *th);
int traverse_dataset(struct dsl_dataset *ds, uint64_t txg_start,
int flags, blkptr_cb_t func, void *arg);
int traverse_pool(spa_t *spa, blkptr_cb_t func, void *arg);
#ifdef __cplusplus
}

View File

@ -31,6 +31,7 @@
#include <sys/txg_impl.h>
#include <sys/zfs_context.h>
#include <sys/zio.h>
#include <sys/dnode.h>
#ifdef __cplusplus
extern "C" {
@ -48,6 +49,25 @@ enum scrub_func {
SCRUB_FUNC_NUMFUNCS
};
/* These macros are for indexing into the zfs_all_blkstats_t. */
#define DMU_OT_DEFERRED DMU_OT_NONE
#define DMU_OT_TOTAL DMU_OT_NUMTYPES
typedef struct zfs_blkstat {
uint64_t zb_count;
uint64_t zb_asize;
uint64_t zb_lsize;
uint64_t zb_psize;
uint64_t zb_gangs;
uint64_t zb_ditto_2_of_2_samevdev;
uint64_t zb_ditto_2_of_3_samevdev;
uint64_t zb_ditto_3_of_3_samevdev;
} zfs_blkstat_t;
typedef struct zfs_all_blkstats {
zfs_blkstat_t zab_type[DN_MAX_LEVELS + 1][DMU_OT_TOTAL + 1];
} zfs_all_blkstats_t;
typedef struct dsl_pool {
/* Immutable */
@ -95,6 +115,8 @@ typedef struct dsl_pool {
* nobody else could possibly have it for write.
*/
krwlock_t dp_config_rwlock;
zfs_all_blkstats_t *dp_blkstats;
} dsl_pool_t;
int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
@ -112,6 +134,8 @@ int dsl_free(zio_t *pio, dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp,
zio_done_func_t *done, void *private, uint32_t arc_flags);
void dsl_pool_ds_destroyed(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_pool_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_pool_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);

View File

@ -44,7 +44,6 @@ typedef struct spa spa_t;
typedef struct vdev vdev_t;
typedef struct metaslab metaslab_t;
typedef struct zilog zilog_t;
typedef struct traverse_handle traverse_handle_t;
typedef struct spa_aux_vdev spa_aux_vdev_t;
struct dsl_pool;
@ -438,8 +437,7 @@ extern void spa_vdev_state_enter(spa_t *spa);
extern int spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error);
/* Accessor functions */
extern krwlock_t *spa_traverse_rwlock(spa_t *spa);
extern boolean_t spa_traverse_wanted(spa_t *spa);
extern boolean_t spa_shutting_down(spa_t *spa);
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
@ -450,7 +448,7 @@ extern uint64_t spa_guid(spa_t *spa);
extern uint64_t spa_last_synced_txg(spa_t *spa);
extern uint64_t spa_first_txg(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern int spa_state(spa_t *spa);
extern pool_state_t spa_state(spa_t *spa);
extern uint64_t spa_freeze_txg(spa_t *spa);
extern uint64_t spa_get_alloc(spa_t *spa);
extern uint64_t spa_get_space(spa_t *spa);

View File

@ -101,9 +101,8 @@ struct spa {
nvlist_t *spa_config_syncing; /* currently syncing config */
uint64_t spa_config_txg; /* txg of last config change */
int spa_sync_pass; /* iterate-to-convergence */
int spa_state; /* pool state */
pool_state_t spa_state; /* pool state */
int spa_inject_ref; /* injection references */
uint8_t spa_traverse_wanted; /* traverse lock wanted */
uint8_t spa_sync_on; /* sync threads are running */
spa_load_state_t spa_load_state; /* current load operation */
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
@ -125,7 +124,6 @@ struct spa {
uint64_t spa_syncing_txg; /* txg currently syncing */
uint64_t spa_sync_bplist_obj; /* object for deferred frees */
bplist_t spa_sync_bplist; /* deferred-free bplist */
krwlock_t spa_traverse_lock; /* traverse vs. spa_sync() */
uberblock_t spa_ubsync; /* last synced uberblock */
uberblock_t spa_uberblock; /* current uberblock */
kmutex_t spa_scrub_lock; /* resilver/scrub lock */

View File

@ -26,8 +26,6 @@
#ifndef _SYS_TXG_IMPL_H
#define _SYS_TXG_IMPL_H
#pragma ident "%Z%%M% %I% %E% SMI"
#include <sys/spa.h>
#include <sys/txg.h>
@ -66,7 +64,6 @@ typedef struct tx_state {
kthread_t *tx_sync_thread;
kthread_t *tx_quiesce_thread;
kthread_t *tx_timelimit_thread;
} tx_state_t;
#ifdef __cplusplus

View File

@ -309,12 +309,14 @@ txg_sync_thread(void *arg)
uint64_t txg;
/*
* We sync when there's someone waiting on us, or the
* quiesce thread has handed off a txg to us, or we have
* reached our timeout.
* We sync when we're scrubbing, there's someone waiting
* on us, or the quiesce thread has handed off a txg to
* us, or we have reached our timeout.
*/
timer = (delta >= timeout ? 0 : timeout - delta);
while (!tx->tx_exiting && timer > 0 &&
while ((dp->dp_scrub_func == SCRUB_FUNC_NONE ||
spa_shutting_down(dp->dp_spa)) &&
!tx->tx_exiting && timer > 0 &&
tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
tx->tx_quiesced_txg == 0) {
dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",

View File

@ -279,7 +279,7 @@ vdev_cache_read(zio_t *zio)
/*
* If the I/O straddles two or more cache blocks, don't cache it.
*/
if (P2CROSS(zio->io_offset, zio->io_offset + zio->io_size - 1, VCBS))
if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
return (EXDEV);
ASSERT(cache_phase + zio->io_size <= VCBS);

View File

@ -94,22 +94,12 @@ DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol);
static kmutex_t zvol_state_lock;
static uint32_t zvol_minors;
#define NUM_EXTENTS ((SPA_MAXBLOCKSIZE) / sizeof (zvol_extent_t))
typedef struct zvol_extent {
list_node_t ze_node;
dva_t ze_dva; /* dva associated with this extent */
uint64_t ze_stride; /* extent stride */
uint64_t ze_size; /* number of blocks in extent */
uint64_t ze_nblks; /* number of blocks in extent */
} zvol_extent_t;
/*
* The list of extents associated with the dump device
*/
typedef struct zvol_ext_list {
zvol_extent_t zl_extents[NUM_EXTENTS];
struct zvol_ext_list *zl_next;
} zvol_ext_list_t;
/*
* The in-core state of each volume.
*/
@ -124,7 +114,7 @@ typedef struct zvol_state {
uint32_t zv_mode; /* DS_MODE_* flags at open time */
uint32_t zv_total_opens; /* total open count */
zilog_t *zv_zilog; /* ZIL handle */
zvol_ext_list_t *zv_list; /* List of extents for dump */
list_t zv_extents; /* List of extents for dump */
uint64_t zv_txg_assign; /* txg to assign during ZIL replay */
znode_t zv_znode; /* for range locking */
int zv_state;
@ -350,12 +340,12 @@ static void
zvol_serve_one(zvol_state_t *zv, struct bio *bp)
{
uint64_t off, volsize;
size_t size, resid;
size_t resid;
char *addr;
objset_t *os;
rl_t *rl;
int error = 0;
boolean_t reading;
boolean_t doread = (bp->bio_cmd == BIO_READ);
off = bp->bio_offset;
volsize = zv->zv_volsize;
@ -373,18 +363,16 @@ zvol_serve_one(zvol_state_t *zv, struct bio *bp)
* we can't change the data whilst calculating the checksum.
* A better approach than a per zvol rwlock would be to lock ranges.
*/
reading = (bp->bio_cmd == BIO_READ);
rl = zfs_range_lock(&zv->zv_znode, off, resid,
reading ? RL_READER : RL_WRITER);
doread ? RL_READER : RL_WRITER);
while (resid != 0 && off < volsize) {
size = MIN(resid, zvol_maxphys); /* zvol_maxphys per tx */
size_t size = MIN(resid, zvol_maxphys); /* zvol_maxphys per tx */
if (size > volsize - off) /* don't write past the end */
size = volsize - off;
if (reading) {
if (doread) {
error = dmu_read(os, ZVOL_OBJ, off, size, addr);
} else {
dmu_tx_t *tx = dmu_tx_create(os);
@ -457,128 +445,81 @@ zvol_worker(void *arg)
}
}
void
zvol_init_extent(zvol_extent_t *ze, blkptr_t *bp)
{
ze->ze_dva = bp->blk_dva[0]; /* structure assignment */
ze->ze_stride = 0;
ze->ze_size = 1;
}
/* extent mapping arg */
struct maparg {
zvol_ext_list_t *ma_list;
zvol_extent_t *ma_extent;
int ma_gang;
zvol_state_t *ma_zv;
uint64_t ma_blks;
};
/*ARGSUSED*/
static int
zvol_map_block(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
zvol_map_block(spa_t *spa, blkptr_t *bp, const zbookmark_t *zb,
const dnode_phys_t *dnp, void *arg)
{
zbookmark_t *zb = &bc->bc_bookmark;
blkptr_t *bp = &bc->bc_blkptr;
void *data = bc->bc_data;
dnode_phys_t *dnp = bc->bc_dnode;
struct maparg *ma = (struct maparg *)arg;
uint64_t stride;
struct maparg *ma = arg;
zvol_extent_t *ze;
int bs = ma->ma_zv->zv_volblocksize;
/* If there is an error, then keep trying to make progress */
if (bc->bc_errno)
return (ERESTART);
#ifdef ZFS_DEBUG
if (zb->zb_level == -1) {
ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
ASSERT3U(BP_GET_LEVEL(bp), ==, 0);
} else {
ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
}
if (zb->zb_level > 0) {
uint64_t fill = 0;
blkptr_t *bpx, *bpend;
for (bpx = data, bpend = bpx + BP_GET_LSIZE(bp) / sizeof (*bpx);
bpx < bpend; bpx++) {
if (bpx->blk_birth != 0) {
fill += bpx->blk_fill;
} else {
ASSERT(bpx->blk_fill == 0);
}
}
ASSERT3U(fill, ==, bp->blk_fill);
}
if (zb->zb_level == 0 && dnp->dn_type == DMU_OT_DNODE) {
uint64_t fill = 0;
dnode_phys_t *dnx, *dnend;
for (dnx = data, dnend = dnx + (BP_GET_LSIZE(bp)>>DNODE_SHIFT);
dnx < dnend; dnx++) {
if (dnx->dn_type != DMU_OT_NONE)
fill++;
}
ASSERT3U(fill, ==, bp->blk_fill);
}
#endif
if (zb->zb_level || dnp->dn_type == DMU_OT_DNODE)
if (bp == NULL || zb->zb_object != ZVOL_OBJ || zb->zb_level != 0)
return (0);
VERIFY3U(ma->ma_blks, ==, zb->zb_blkid);
ma->ma_blks++;
/* Abort immediately if we have encountered gang blocks */
if (BP_IS_GANG(bp)) {
ma->ma_gang++;
return (EINTR);
}
if (BP_IS_GANG(bp))
return (EFRAGS);
/* first time? */
if (ma->ma_extent->ze_size == 0) {
zvol_init_extent(ma->ma_extent, bp);
/*
* See if the block is at the end of the previous extent.
*/
ze = list_tail(&ma->ma_zv->zv_extents);
if (ze &&
DVA_GET_VDEV(BP_IDENTITY(bp)) == DVA_GET_VDEV(&ze->ze_dva) &&
DVA_GET_OFFSET(BP_IDENTITY(bp)) ==
DVA_GET_OFFSET(&ze->ze_dva) + ze->ze_nblks * bs) {
ze->ze_nblks++;
return (0);
}
stride = (DVA_GET_OFFSET(&bp->blk_dva[0])) -
((DVA_GET_OFFSET(&ma->ma_extent->ze_dva)) +
(ma->ma_extent->ze_size - 1) * (ma->ma_extent->ze_stride));
if (DVA_GET_VDEV(BP_IDENTITY(bp)) ==
DVA_GET_VDEV(&ma->ma_extent->ze_dva)) {
if (ma->ma_extent->ze_stride == 0) {
/* second block in this extent */
ma->ma_extent->ze_stride = stride;
ma->ma_extent->ze_size++;
return (0);
} else if (ma->ma_extent->ze_stride == stride) {
/*
* the block we allocated has the same
* stride
*/
ma->ma_extent->ze_size++;
return (0);
}
dprintf_bp(bp, "%s", "next blkptr:");
/* start a new extent */
ze = kmem_zalloc(sizeof (zvol_extent_t), KM_SLEEP);
ze->ze_dva = bp->blk_dva[0]; /* structure assignment */
ze->ze_nblks = 1;
list_insert_tail(&ma->ma_zv->zv_extents, ze);
return (0);
}
static void
zvol_free_extents(zvol_state_t *zv)
{
zvol_extent_t *ze;
while (ze = list_head(&zv->zv_extents)) {
list_remove(&zv->zv_extents, ze);
kmem_free(ze, sizeof (zvol_extent_t));
}
}
static int
zvol_get_lbas(zvol_state_t *zv)
{
struct maparg ma;
int err;
ma.ma_zv = zv;
ma.ma_blks = 0;
zvol_free_extents(zv);
err = traverse_dataset(dmu_objset_ds(zv->zv_objset), 0,
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, zvol_map_block, &ma);
if (err || ma.ma_blks != (zv->zv_volsize / zv->zv_volblocksize)) {
zvol_free_extents(zv);
return (err ? err : EIO);
}
/*
* dtrace -n 'zfs-dprintf
* /stringof(arg0) == "zvol.c"/
* {
* printf("%s: %s", stringof(arg1), stringof(arg3))
* } '
*/
dprintf("ma_extent 0x%lx mrstride 0x%lx stride %lx\n",
ma->ma_extent->ze_size, ma->ma_extent->ze_stride, stride);
dprintf_bp(bp, "%s", "next blkptr:");
/* start a new extent */
if (ma->ma_extent == &ma->ma_list->zl_extents[NUM_EXTENTS - 1]) {
ma->ma_list->zl_next = kmem_zalloc(sizeof (zvol_ext_list_t),
KM_SLEEP);
ma->ma_list = ma->ma_list->zl_next;
ma->ma_extent = &ma->ma_list->zl_extents[0];
} else {
ma->ma_extent++;
}
zvol_init_extent(ma->ma_extent, bp);
return (0);
}
@ -675,106 +616,6 @@ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
zvol_replay_err, /* TX_ACL */
};
/*
* reconstruct dva that gets us to the desired offset (offset
* is in bytes)
*/
int
zvol_get_dva(zvol_state_t *zv, uint64_t offset, dva_t *dva)
{
zvol_ext_list_t *zl;
zvol_extent_t *ze;
int idx;
uint64_t tmp;
if ((zl = zv->zv_list) == NULL)
return (EIO);
idx = 0;
ze = &zl->zl_extents[0];
while (offset >= ze->ze_size * zv->zv_volblocksize) {
offset -= ze->ze_size * zv->zv_volblocksize;
if (idx == NUM_EXTENTS - 1) {
/* we've reached the end of this array */
ASSERT(zl->zl_next != NULL);
if (zl->zl_next == NULL)
return (-1);
zl = zl->zl_next;
ze = &zl->zl_extents[0];
idx = 0;
} else {
ze++;
idx++;
}
}
DVA_SET_VDEV(dva, DVA_GET_VDEV(&ze->ze_dva));
tmp = DVA_GET_OFFSET((&ze->ze_dva));
tmp += (ze->ze_stride * (offset / zv->zv_volblocksize));
DVA_SET_OFFSET(dva, tmp);
return (0);
}
static void
zvol_free_extents(zvol_state_t *zv)
{
zvol_ext_list_t *zl;
zvol_ext_list_t *tmp;
if (zv->zv_list != NULL) {
zl = zv->zv_list;
while (zl != NULL) {
tmp = zl->zl_next;
kmem_free(zl, sizeof (zvol_ext_list_t));
zl = tmp;
}
zv->zv_list = NULL;
}
}
int
zvol_get_lbas(zvol_state_t *zv)
{
struct maparg ma;
zvol_ext_list_t *zl;
zvol_extent_t *ze;
uint64_t blocks = 0;
int err;
ma.ma_list = zl = kmem_zalloc(sizeof (zvol_ext_list_t), KM_SLEEP);
ma.ma_extent = &ma.ma_list->zl_extents[0];
ma.ma_gang = 0;
zv->zv_list = ma.ma_list;
err = traverse_zvol(zv->zv_objset, ADVANCE_PRE, zvol_map_block, &ma);
if (err == EINTR && ma.ma_gang) {
/*
* We currently don't support dump devices when the pool
* is so fragmented that our allocation has resulted in
* gang blocks.
*/
zvol_free_extents(zv);
return (EFRAGS);
}
ASSERT3U(err, ==, 0);
ze = &zl->zl_extents[0];
while (ze) {
blocks += ze->ze_size;
if (ze == &zl->zl_extents[NUM_EXTENTS - 1]) {
zl = zl->zl_next;
ze = &zl->zl_extents[0];
} else {
ze++;
}
}
if (blocks != (zv->zv_volsize / zv->zv_volblocksize)) {
zvol_free_extents(zv);
return (EIO);
}
return (0);
}
/*
* Create a minor node (plus a whole lot more) for the specified volume.
*/
@ -830,6 +671,8 @@ zvol_create_minor(const char *name, major_t maj)
mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
sizeof (rl_t), offsetof(rl_t, r_node));
list_create(&zv->zv_extents, sizeof (zvol_extent_t),
offsetof(zvol_extent_t, ze_node));
/* get and cache the blocksize */
error = dmu_object_info(os, ZVOL_OBJ, &doi);
ASSERT(error == 0);
@ -1091,6 +934,8 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize)
if (error == ENOTSUP)
error = EBUSY;
dmu_tx_commit(tx);
if (error == 0)
zv->zv_volblocksize = volblocksize;
}
end:
mutex_exit(&zvol_state_lock);
@ -1225,7 +1070,6 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize)
int error = 0;
objset_t *os = zv->zv_objset;
nvlist_t *nv = NULL;
uint64_t checksum, compress, refresrv;
ASSERT(MUTEX_HELD(&zvol_state_lock));
@ -1248,12 +1092,16 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize)
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1,
&zv->zv_volsize, tx);
} else {
uint64_t checksum, compress, refresrv, vbs;
error = dsl_prop_get_integer(zv->zv_name,
zfs_prop_to_name(ZFS_PROP_COMPRESSION), &compress, NULL);
error = error ? error : dsl_prop_get_integer(zv->zv_name,
zfs_prop_to_name(ZFS_PROP_CHECKSUM), &checksum, NULL);
error = error ? error : dsl_prop_get_integer(zv->zv_name,
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), &refresrv, NULL);
error = error ? error : dsl_prop_get_integer(zv->zv_name,
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &vbs, NULL);
error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1,
@ -1263,6 +1111,9 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize)
error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1,
&refresrv, tx);
error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1,
&vbs, tx);
}
dmu_tx_commit(tx);
@ -1288,6 +1139,9 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize)
VERIFY(nvlist_add_uint64(nv,
zfs_prop_to_name(ZFS_PROP_CHECKSUM),
ZIO_CHECKSUM_OFF) == 0);
VERIFY(nvlist_add_uint64(nv,
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
SPA_MAXBLOCKSIZE) == 0);
error = zfs_set_prop_nvlist(zv->zv_name, nv);
nvlist_free(nv);
@ -1367,7 +1221,7 @@ zvol_dump_fini(zvol_state_t *zv)
objset_t *os = zv->zv_objset;
nvlist_t *nv;
int error = 0;
uint64_t checksum, compress, refresrv;
uint64_t checksum, compress, refresrv, vbs;
/*
* Attempt to restore the zvol back to its pre-dumpified state.
@ -1392,6 +1246,8 @@ zvol_dump_fini(zvol_state_t *zv)
zfs_prop_to_name(ZFS_PROP_COMPRESSION), 8, 1, &compress);
(void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), 8, 1, &refresrv);
(void) zap_lookup(zv->zv_objset, ZVOL_ZAP_OBJ,
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1, &vbs);
VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
(void) nvlist_add_uint64(nv,
@ -1400,6 +1256,8 @@ zvol_dump_fini(zvol_state_t *zv)
zfs_prop_to_name(ZFS_PROP_COMPRESSION), compress);
(void) nvlist_add_uint64(nv,
zfs_prop_to_name(ZFS_PROP_REFRESERVATION), refresrv);
(void) nvlist_add_uint64(nv,
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), vbs);
(void) zfs_set_prop_nvlist(zv->zv_name, nv);
nvlist_free(nv);