MFV r242733:
3306 zdb should be able to issue reads in parallel 3321 'zpool reopen' command should be documented in the man page and help message illumos/illumos-gate@31d7e8fa33 FreeBSD porting notes: the kernel part of this changeset depends on Solaris buf(9S) interfaces and are not really applicable for our use. vdev_disk.c is patched as-is to reduce diverge from upstream, but vdev_file.c is left intact. MFC after: 2 weeks
This commit is contained in:
commit
db2aff5f8b
@ -14,11 +14,12 @@
|
||||
.\"
|
||||
.\" Copyright 2012, Richard Lowe.
|
||||
.\" Copyright (c) 2012, Marcelo Araujo <araujo@FreeBSD.org>.
|
||||
.\" Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
.\" All Rights Reserved.
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd May 10, 2012
|
||||
.Dd December 31, 2013
|
||||
.Dt ZDB 8
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -29,27 +30,35 @@
|
||||
.Op Fl CumdibcsDvhLXFPA
|
||||
.Op Fl e Op Fl p Ar path...
|
||||
.Op Fl t Ar txg
|
||||
.Op Fl U Ar cache
|
||||
.Op Fl M Ar inflight I/Os
|
||||
.Ar poolname
|
||||
.Op Ar object ...
|
||||
.Nm
|
||||
.Op Fl divPA
|
||||
.Op Fl e Op Fl p Ar path...
|
||||
.Op Fl U Ar cache
|
||||
.Ar dataset
|
||||
.Op Ar object ...
|
||||
.Nm
|
||||
.Fl m Op Fl LXFPA
|
||||
.Op Fl t Ar txg
|
||||
.Op Fl e Op Fl p Ar path...
|
||||
.Op Fl U Ar cache
|
||||
.Ar poolname
|
||||
.Nm
|
||||
.Fl R Op Fl A
|
||||
.Op Fl e Op Fl p Ar path...
|
||||
.Op Fl U Ar cache
|
||||
.Ar poolname
|
||||
.Ar poolname
|
||||
.Ar vdev Ns : Ns Ar offset Ns : Ns Ar size Ns Op Ns : Ns Ar flags
|
||||
.Nm
|
||||
.Fl S
|
||||
.Op Fl AP
|
||||
.Op Fl e Op Fl p Ar path...
|
||||
.Op Fl U Ar cache
|
||||
.Ar poolname
|
||||
.Ar poolname
|
||||
.Nm
|
||||
.Fl l
|
||||
@ -205,6 +214,11 @@ flag specifies the path under which devices are to be searched.
|
||||
.It Fl F
|
||||
Attempt to make an unreadable pool readable by trying progressively older
|
||||
transactions.
|
||||
.It Fl M Ar inflight I/Os
|
||||
Limit the number of outstanding checksum I/Os to the specified value.
|
||||
The default value is 200. This option affects the performance of the
|
||||
.Fl c
|
||||
option.
|
||||
.It Fl P
|
||||
Print numbers in an unscaled form more amenable to parsing, eg. 1000000 rather
|
||||
than 1M.
|
||||
@ -218,9 +232,7 @@ options for a means to see the available uberblocks and their associated
|
||||
transaction numbers.
|
||||
.It Fl U Ar cachefile
|
||||
Use a cache file other than
|
||||
.Pa /etc/zfs/zpool.cache .
|
||||
This option is only valid with
|
||||
.Fl C
|
||||
.Pa /boot/zfs/zpool.cache .
|
||||
.It Fl v
|
||||
Enable verbosity.
|
||||
Specify multiple times for increased verbosity.
|
||||
|
@ -89,6 +89,7 @@ extern void dump_intent_log(zilog_t *);
|
||||
uint64_t *zopt_object = NULL;
|
||||
int zopt_objects = 0;
|
||||
libzfs_handle_t *g_zfs;
|
||||
uint64_t max_inflight = 200;
|
||||
|
||||
/*
|
||||
* These libumem hooks provide a reasonable set of defaults for the allocator's
|
||||
@ -110,16 +111,17 @@ static void
|
||||
usage(void)
|
||||
{
|
||||
(void) fprintf(stderr,
|
||||
"Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]]"
|
||||
"poolname [object...]\n"
|
||||
" %s [-divPA] [-e -p path...] dataset [object...]\n"
|
||||
" %s -m [-LXFPA] [-t txg] [-e [-p path...]]"
|
||||
"poolname [vdev [metaslab...]]\n"
|
||||
" %s -R [-A] [-e [-p path...]] poolname "
|
||||
"vdev:offset:size[:flags]\n"
|
||||
" %s -S [-PA] [-e [-p path...]] poolname\n"
|
||||
" %s -l [-uA] device\n"
|
||||
" %s -C [-A] [-U config]\n\n",
|
||||
"Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
|
||||
"[-U config] [-M inflight I/Os] poolname [object...]\n"
|
||||
" %s [-divPA] [-e -p path...] [-U config] dataset "
|
||||
"[object...]\n"
|
||||
" %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
|
||||
"poolname [vdev [metaslab...]]\n"
|
||||
" %s -R [-A] [-e [-p path...]] poolname "
|
||||
"vdev:offset:size[:flags]\n"
|
||||
" %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
|
||||
" %s -l [-uA] device\n"
|
||||
" %s -C [-A] [-U config]\n\n",
|
||||
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
|
||||
|
||||
(void) fprintf(stderr, " Dataset name must include at least one "
|
||||
@ -164,6 +166,8 @@ usage(void)
|
||||
(void) fprintf(stderr, " -P print numbers in parseable form\n");
|
||||
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
|
||||
"searching for uberblocks\n");
|
||||
(void) fprintf(stderr, " -M <number of inflight I/Os> -- "
|
||||
"specify the maximum number of checksumming I/Os [default is 200]");
|
||||
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
|
||||
"to make only that option verbose\n");
|
||||
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
|
||||
@ -2154,6 +2158,47 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static void
|
||||
zdb_blkptr_done(zio_t *zio)
|
||||
{
|
||||
spa_t *spa = zio->io_spa;
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
int ioerr = zio->io_error;
|
||||
zdb_cb_t *zcb = zio->io_private;
|
||||
zbookmark_t *zb = &zio->io_bookmark;
|
||||
|
||||
zio_data_buf_free(zio->io_data, zio->io_size);
|
||||
|
||||
mutex_enter(&spa->spa_scrub_lock);
|
||||
spa->spa_scrub_inflight--;
|
||||
cv_broadcast(&spa->spa_scrub_io_cv);
|
||||
|
||||
if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
|
||||
char blkbuf[BP_SPRINTF_LEN];
|
||||
|
||||
zcb->zcb_haderrors = 1;
|
||||
zcb->zcb_errors[ioerr]++;
|
||||
|
||||
if (dump_opt['b'] >= 2)
|
||||
sprintf_blkptr(blkbuf, bp);
|
||||
else
|
||||
blkbuf[0] = '\0';
|
||||
|
||||
(void) printf("zdb_blkptr_cb: "
|
||||
"Got error %d reading "
|
||||
"<%llu, %llu, %lld, %llx> %s -- skipping\n",
|
||||
ioerr,
|
||||
(u_longlong_t)zb->zb_objset,
|
||||
(u_longlong_t)zb->zb_object,
|
||||
(u_longlong_t)zb->zb_level,
|
||||
(u_longlong_t)zb->zb_blkid,
|
||||
blkbuf);
|
||||
}
|
||||
mutex_exit(&spa->spa_scrub_lock);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||
@ -2174,38 +2219,22 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
|
||||
|
||||
if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
|
||||
int ioerr;
|
||||
size_t size = BP_GET_PSIZE(bp);
|
||||
void *data = malloc(size);
|
||||
void *data = zio_data_buf_alloc(size);
|
||||
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
|
||||
|
||||
/* If it's an intent log block, failure is expected. */
|
||||
if (zb->zb_level == ZB_ZIL_LEVEL)
|
||||
flags |= ZIO_FLAG_SPECULATIVE;
|
||||
|
||||
ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
|
||||
NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
|
||||
mutex_enter(&spa->spa_scrub_lock);
|
||||
while (spa->spa_scrub_inflight > max_inflight)
|
||||
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
|
||||
spa->spa_scrub_inflight++;
|
||||
mutex_exit(&spa->spa_scrub_lock);
|
||||
|
||||
free(data);
|
||||
if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
|
||||
zcb->zcb_haderrors = 1;
|
||||
zcb->zcb_errors[ioerr]++;
|
||||
|
||||
if (dump_opt['b'] >= 2)
|
||||
sprintf_blkptr(blkbuf, bp);
|
||||
else
|
||||
blkbuf[0] = '\0';
|
||||
|
||||
(void) printf("zdb_blkptr_cb: "
|
||||
"Got error %d reading "
|
||||
"<%llu, %llu, %lld, %llx> %s -- skipping\n",
|
||||
ioerr,
|
||||
(u_longlong_t)zb->zb_objset,
|
||||
(u_longlong_t)zb->zb_object,
|
||||
(u_longlong_t)zb->zb_level,
|
||||
(u_longlong_t)zb->zb_blkid,
|
||||
blkbuf);
|
||||
}
|
||||
zio_nowait(zio_read(NULL, spa, bp, data, size,
|
||||
zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
|
||||
}
|
||||
|
||||
zcb->zcb_readfails = 0;
|
||||
@ -2433,6 +2462,18 @@ dump_block_stats(spa_t *spa)
|
||||
zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
|
||||
zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
|
||||
|
||||
/*
|
||||
* If we've traversed the data blocks then we need to wait for those
|
||||
* I/Os to complete. We leverage "The Godfather" zio to wait on
|
||||
* all async I/Os to complete.
|
||||
*/
|
||||
if (dump_opt['c']) {
|
||||
(void) zio_wait(spa->spa_async_zio_root);
|
||||
spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
|
||||
ZIO_FLAG_GODFATHER);
|
||||
}
|
||||
|
||||
if (zcb.zcb_haderrors) {
|
||||
(void) printf("\nError counts:\n\n");
|
||||
(void) printf("\t%5s %s\n", "errno", "count");
|
||||
@ -3202,7 +3243,7 @@ main(int argc, char **argv)
|
||||
|
||||
dprintf_setup(&argc, argv);
|
||||
|
||||
while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
|
||||
while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) {
|
||||
switch (c) {
|
||||
case 'b':
|
||||
case 'c':
|
||||
@ -3231,6 +3272,15 @@ main(int argc, char **argv)
|
||||
case 'v':
|
||||
verbose++;
|
||||
break;
|
||||
case 'M':
|
||||
max_inflight = strtoull(optarg, NULL, 0);
|
||||
if (max_inflight == 0) {
|
||||
(void) fprintf(stderr, "maximum number "
|
||||
"of inflight I/Os must be greater "
|
||||
"than 0\n");
|
||||
usage();
|
||||
}
|
||||
break;
|
||||
case 'p':
|
||||
if (searchdirs == NULL) {
|
||||
searchdirs = umem_alloc(sizeof (char *),
|
||||
|
@ -25,7 +25,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd March 14, 2013
|
||||
.Dd December 31, 2013
|
||||
.Dt ZPOOL 8
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -141,6 +141,9 @@
|
||||
.Cm remove
|
||||
.Ar pool device ...
|
||||
.Nm
|
||||
.Cm reopen
|
||||
.Ar pool
|
||||
.Nm
|
||||
.Cm replace
|
||||
.Op Fl f
|
||||
.Ar pool device
|
||||
@ -1431,6 +1434,13 @@ command. Non-redundant and
|
||||
devices cannot be removed from a pool.
|
||||
.It Xo
|
||||
.Nm
|
||||
.Cm reopen
|
||||
.Ar pool
|
||||
.Xc
|
||||
.Pp
|
||||
Reopen all the vdevs associated with the pool.
|
||||
.It Xo
|
||||
.Nm
|
||||
.Cm replace
|
||||
.Op Fl f
|
||||
.Ar pool device
|
||||
|
@ -248,7 +248,7 @@ get_usage(zpool_help_t idx) {
|
||||
case HELP_REMOVE:
|
||||
return (gettext("\tremove <pool> <device> ...\n"));
|
||||
case HELP_REOPEN:
|
||||
return (""); /* Undocumented command */
|
||||
return (gettext("\treopen <pool>\n"));
|
||||
case HELP_SCRUB:
|
||||
return (gettext("\tscrub [-s] <pool> ...\n"));
|
||||
case HELP_STATUS:
|
||||
@ -3720,22 +3720,37 @@ zpool_do_reguid(int argc, char **argv)
|
||||
* zpool reopen <pool>
|
||||
*
|
||||
* Reopen the pool so that the kernel can update the sizes of all vdevs.
|
||||
*
|
||||
* NOTE: This command is currently undocumented. If the command is ever
|
||||
* exposed then the appropriate usage() messages will need to be made.
|
||||
*/
|
||||
int
|
||||
zpool_do_reopen(int argc, char **argv)
|
||||
{
|
||||
int c;
|
||||
int ret = 0;
|
||||
zpool_handle_t *zhp;
|
||||
char *pool;
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, "")) != -1) {
|
||||
switch (c) {
|
||||
case '?':
|
||||
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
|
||||
optopt);
|
||||
usage(B_FALSE);
|
||||
}
|
||||
}
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if (argc != 1)
|
||||
return (2);
|
||||
if (argc < 1) {
|
||||
(void) fprintf(stderr, gettext("missing pool name\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
if (argc > 1) {
|
||||
(void) fprintf(stderr, gettext("too many arguments\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
pool = argv[0];
|
||||
if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)
|
||||
|
@ -661,7 +661,7 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
|
||||
if (dprintf_find_string("pid"))
|
||||
(void) printf("%d ", getpid());
|
||||
if (dprintf_find_string("tid"))
|
||||
(void) printf("%u ", thr_self());
|
||||
(void) printf("%ul ", thr_self());
|
||||
#if 0
|
||||
if (dprintf_find_string("cpu"))
|
||||
(void) printf("%u ", getcpuid());
|
||||
@ -1125,3 +1125,50 @@ zvol_create_minors(const char *name)
|
||||
return (0);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef illumos
|
||||
void
|
||||
bioinit(buf_t *bp)
|
||||
{
|
||||
bzero(bp, sizeof (buf_t));
|
||||
}
|
||||
|
||||
void
|
||||
biodone(buf_t *bp)
|
||||
{
|
||||
if (bp->b_iodone != NULL) {
|
||||
(*(bp->b_iodone))(bp);
|
||||
return;
|
||||
}
|
||||
ASSERT((bp->b_flags & B_DONE) == 0);
|
||||
bp->b_flags |= B_DONE;
|
||||
}
|
||||
|
||||
void
|
||||
bioerror(buf_t *bp, int error)
|
||||
{
|
||||
ASSERT(bp != NULL);
|
||||
ASSERT(error >= 0);
|
||||
|
||||
if (error != 0) {
|
||||
bp->b_flags |= B_ERROR;
|
||||
} else {
|
||||
bp->b_flags &= ~B_ERROR;
|
||||
}
|
||||
bp->b_error = error;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
geterror(struct buf *bp)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
if (bp->b_flags & B_ERROR) {
|
||||
error = bp->b_error;
|
||||
if (!error)
|
||||
error = EIO;
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
#endif
|
||||
|
@ -778,6 +778,38 @@ extern void cyclic_remove(cyclic_id_t);
|
||||
extern int cyclic_reprogram(cyclic_id_t, hrtime_t);
|
||||
#endif /* illumos */
|
||||
|
||||
#ifdef illumos
|
||||
/*
|
||||
* Buf structure
|
||||
*/
|
||||
#define B_BUSY 0x0001
|
||||
#define B_DONE 0x0002
|
||||
#define B_ERROR 0x0004
|
||||
#define B_READ 0x0040 /* read when I/O occurs */
|
||||
#define B_WRITE 0x0100 /* non-read pseudo-flag */
|
||||
|
||||
typedef struct buf {
|
||||
int b_flags;
|
||||
size_t b_bcount;
|
||||
union {
|
||||
caddr_t b_addr;
|
||||
} b_un;
|
||||
|
||||
lldaddr_t _b_blkno;
|
||||
#define b_lblkno _b_blkno._f
|
||||
size_t b_resid;
|
||||
size_t b_bufsize;
|
||||
int (*b_iodone)(struct buf *);
|
||||
int b_error;
|
||||
void *b_private;
|
||||
} buf_t;
|
||||
|
||||
extern void bioinit(buf_t *);
|
||||
extern void biodone(buf_t *);
|
||||
extern void bioerror(buf_t *, int);
|
||||
extern int geterror(buf_t *);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -363,6 +363,16 @@ extern void vdev_set_min_asize(vdev_t *vd);
|
||||
/* zdb uses this tunable, so it must be declared here to make lint happy. */
|
||||
extern int zfs_vdev_cache_size;
|
||||
|
||||
#ifdef illumos
|
||||
/*
|
||||
* The vdev_buf_t is used to translate between zio_t and buf_t, and back again.
|
||||
*/
|
||||
typedef struct vdev_buf {
|
||||
buf_t vb_buf; /* buffer that describes the io */
|
||||
zio_t *vb_io; /* pointer back to the original zio_t */
|
||||
} vdev_buf_t;
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -42,11 +42,6 @@
|
||||
|
||||
extern ldi_ident_t zfs_li;
|
||||
|
||||
typedef struct vdev_disk_buf {
|
||||
buf_t vdb_buf;
|
||||
zio_t *vdb_io;
|
||||
} vdev_disk_buf_t;
|
||||
|
||||
static void
|
||||
vdev_disk_hold(vdev_t *vd)
|
||||
{
|
||||
@ -483,8 +478,8 @@ vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
|
||||
static void
|
||||
vdev_disk_io_intr(buf_t *bp)
|
||||
{
|
||||
vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
|
||||
zio_t *zio = vdb->vdb_io;
|
||||
vdev_buf_t *vb = (vdev_buf_t *)bp;
|
||||
zio_t *zio = vb->vb_io;
|
||||
|
||||
/*
|
||||
* The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
|
||||
@ -496,7 +491,7 @@ vdev_disk_io_intr(buf_t *bp)
|
||||
if (zio->io_error == 0 && bp->b_resid != 0)
|
||||
zio->io_error = SET_ERROR(EIO);
|
||||
|
||||
kmem_free(vdb, sizeof (vdev_disk_buf_t));
|
||||
kmem_free(vb, sizeof (vdev_buf_t));
|
||||
|
||||
zio_interrupt(zio);
|
||||
}
|
||||
@ -527,7 +522,7 @@ vdev_disk_io_start(zio_t *zio)
|
||||
{
|
||||
vdev_t *vd = zio->io_vd;
|
||||
vdev_disk_t *dvd = vd->vdev_tsd;
|
||||
vdev_disk_buf_t *vdb;
|
||||
vdev_buf_t *vb;
|
||||
struct dk_callback *dkc;
|
||||
buf_t *bp;
|
||||
int error;
|
||||
@ -591,10 +586,10 @@ vdev_disk_io_start(zio_t *zio)
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
|
||||
vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
|
||||
vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
|
||||
|
||||
vdb->vdb_io = zio;
|
||||
bp = &vdb->vdb_buf;
|
||||
vb->vb_io = zio;
|
||||
bp = &vb->vb_buf;
|
||||
|
||||
bioinit(bp);
|
||||
bp->b_flags = B_BUSY | B_NOCACHE |
|
||||
|
Loading…
Reference in New Issue
Block a user