Update vendor/illumos/dist and vendor/illumos-sys/dist

to illumos-gate 13871:a9c12c2c1647
(zfs changes, illumos issues #3306, #3321)
This commit is contained in:
Martin Matuska 2012-11-08 01:38:30 +00:00
parent 49bb29ae1c
commit fb6c5b06d9
10 changed files with 287 additions and 66 deletions

View File

@ -86,6 +86,7 @@ extern void dump_intent_log(zilog_t *);
uint64_t *zopt_object = NULL;
int zopt_objects = 0;
libzfs_handle_t *g_zfs;
uint64_t max_inflight = 200;
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
@ -108,13 +109,14 @@ usage(void)
{
(void) fprintf(stderr,
"Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
"poolname [object...]\n"
" %s [-divPA] [-e -p path...] dataset [object...]\n"
" %s -m [-LXFPA] [-t txg] [-e [-p path...]] "
"[-U config] [-M inflight I/Os] poolname [object...]\n"
" %s [-divPA] [-e -p path...] [-U config] dataset "
"[object...]\n"
" %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
"poolname [vdev [metaslab...]]\n"
" %s -R [-A] [-e [-p path...]] poolname "
"vdev:offset:size[:flags]\n"
" %s -S [-PA] [-e [-p path...]] poolname\n"
" %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
" %s -l [-uA] device\n"
" %s -C [-A] [-U config]\n\n",
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
@ -161,6 +163,8 @@ usage(void)
(void) fprintf(stderr, " -P print numbers in parseable form\n");
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
(void) fprintf(stderr, " -M <number of inflight I/Os> -- "
"specify the maximum number of checksumming I/Os [default is 200]");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
@ -2028,6 +2032,45 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
}
static void
zdb_blkptr_done(zio_t *zio)
{
spa_t *spa = zio->io_spa;
blkptr_t *bp = zio->io_bp;
int ioerr = zio->io_error;
zdb_cb_t *zcb = zio->io_private;
zbookmark_t *zb = &zio->io_bookmark;
zio_data_buf_free(zio->io_data, zio->io_size);
mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
cv_broadcast(&spa->spa_scrub_io_cv);
if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
char blkbuf[BP_SPRINTF_LEN];
zcb->zcb_haderrors = 1;
zcb->zcb_errors[ioerr]++;
if (dump_opt['b'] >= 2)
sprintf_blkptr(blkbuf, bp);
else
blkbuf[0] = '\0';
(void) printf("zdb_blkptr_cb: "
"Got error %d reading "
"<%llu, %llu, %lld, %llx> %s -- skipping\n",
ioerr,
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
(u_longlong_t)zb->zb_level,
(u_longlong_t)zb->zb_blkid,
blkbuf);
}
mutex_exit(&spa->spa_scrub_lock);
}
/* ARGSUSED */
static int
zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
@ -2049,39 +2092,23 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
int ioerr;
size_t size = BP_GET_PSIZE(bp);
void *data = malloc(size);
void *data = zio_data_buf_alloc(size);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
/* If it's an intent log block, failure is expected. */
if (zb->zb_level == ZB_ZIL_LEVEL)
flags |= ZIO_FLAG_SPECULATIVE;
ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_scrub_inflight > max_inflight)
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
spa->spa_scrub_inflight++;
mutex_exit(&spa->spa_scrub_lock);
free(data);
zio_nowait(zio_read(NULL, spa, bp, data, size,
zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
zcb->zcb_haderrors = 1;
zcb->zcb_errors[ioerr]++;
if (dump_opt['b'] >= 2)
sprintf_blkptr(blkbuf, bp);
else
blkbuf[0] = '\0';
(void) printf("zdb_blkptr_cb: "
"Got error %d reading "
"<%llu, %llu, %lld, %llx> %s -- skipping\n",
ioerr,
(u_longlong_t)zb->zb_objset,
(u_longlong_t)zb->zb_object,
(u_longlong_t)zb->zb_level,
(u_longlong_t)zb->zb_blkid,
blkbuf);
}
}
zcb->zcb_readfails = 0;
@ -2283,6 +2310,18 @@ dump_block_stats(spa_t *spa)
zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
/*
* If we've traversed the data blocks then we need to wait for those
* I/Os to complete. We leverage "The Godfather" zio to wait on
* all async I/Os to complete.
*/
if (dump_opt['c']) {
(void) zio_wait(spa->spa_async_zio_root);
spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
ZIO_FLAG_GODFATHER);
}
if (zcb.zcb_haderrors) {
(void) printf("\nError counts:\n\n");
(void) printf("\t%5s %s\n", "errno", "count");
@ -3040,7 +3079,7 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) {
switch (c) {
case 'b':
case 'c':
@ -3069,6 +3108,15 @@ main(int argc, char **argv)
case 'v':
verbose++;
break;
case 'M':
max_inflight = strtoull(optarg, NULL, 0);
if (max_inflight == 0) {
(void) fprintf(stderr, "maximum number "
"of inflight I/Os must be greater "
"than 0\n");
usage();
}
break;
case 'p':
if (searchdirs == NULL) {
searchdirs = umem_alloc(sizeof (char *),

View File

@ -238,7 +238,7 @@ get_usage(zpool_help_t idx) {
case HELP_REMOVE:
return (gettext("\tremove <pool> <device> ...\n"));
case HELP_REOPEN:
return (""); /* Undocumented command */
return (gettext("\treopen <pool>\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n"));
case HELP_STATUS:
@ -3550,22 +3550,37 @@ zpool_do_reguid(int argc, char **argv)
* zpool reopen <pool>
*
* Reopen the pool so that the kernel can update the sizes of all vdevs.
*
* NOTE: This command is currently undocumented. If the command is ever
* exposed then the appropriate usage() messages will need to be made.
*/
int
zpool_do_reopen(int argc, char **argv)
{
int c;
int ret = 0;
zpool_handle_t *zhp;
char *pool;
/* check options */
while ((c = getopt(argc, argv, "")) != -1) {
switch (c) {
case '?':
(void) fprintf(stderr, gettext("invalid option '%c'\n"),
optopt);
usage(B_FALSE);
}
}
argc--;
argv++;
if (argc != 1)
return (2);
if (argc < 1) {
(void) fprintf(stderr, gettext("missing pool name\n"));
usage(B_FALSE);
}
if (argc > 1) {
(void) fprintf(stderr, gettext("too many arguments\n"));
usage(B_FALSE);
}
pool = argv[0];
if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)

View File

@ -1012,3 +1012,48 @@ zfs_onexit_cb_data(minor_t minor, uint64_t action_handle, void **data)
{
return (0);
}
void
bioinit(buf_t *bp)
{
bzero(bp, sizeof (buf_t));
}
void
biodone(buf_t *bp)
{
if (bp->b_iodone != NULL) {
(*(bp->b_iodone))(bp);
return;
}
ASSERT((bp->b_flags & B_DONE) == 0);
bp->b_flags |= B_DONE;
}
void
bioerror(buf_t *bp, int error)
{
ASSERT(bp != NULL);
ASSERT(error >= 0);
if (error != 0) {
bp->b_flags |= B_ERROR;
} else {
bp->b_flags &= ~B_ERROR;
}
bp->b_error = error;
}
int
geterror(struct buf *bp)
{
int error = 0;
if (bp->b_flags & B_ERROR) {
error = bp->b_error;
if (!error)
error = EIO;
}
return (error);
}

View File

@ -603,6 +603,36 @@ extern cyclic_id_t cyclic_add(cyc_handler_t *, cyc_time_t *);
extern void cyclic_remove(cyclic_id_t);
extern int cyclic_reprogram(cyclic_id_t, hrtime_t);
/*
* Buf structure
*/
#define B_BUSY 0x0001
#define B_DONE 0x0002
#define B_ERROR 0x0004
#define B_READ 0x0040 /* read when I/O occurs */
#define B_WRITE 0x0100 /* non-read pseudo-flag */
typedef struct buf {
int b_flags;
size_t b_bcount;
union {
caddr_t b_addr;
} b_un;
lldaddr_t _b_blkno;
#define b_lblkno _b_blkno._f
size_t b_resid;
size_t b_bufsize;
int (*b_iodone)(struct buf *);
int b_error;
void *b_private;
} buf_t;
extern void bioinit(buf_t *);
extern void biodone(buf_t *);
extern void bioerror(buf_t *, int);
extern int geterror(buf_t *);
#ifdef __cplusplus
}
#endif

View File

@ -11,6 +11,7 @@
.\"
.\"
.\" Copyright 2012, Richard Lowe.
.\" Copyright (c) 2012 by Delphix. All rights reserved.
.\"
.TH "ZDB" "1M" "February 15, 2012" "" ""
@ -19,21 +20,23 @@
.SH "SYNOPSIS"
\fBzdb\fR [-CumdibcsDvhLXFPA] [-e [-p \fIpath\fR...]] [-t \fItxg\fR]
\fIpoolname\fR [\fIobject\fR ...]
[-U \fIcache\fR] [-M \fIinflight I/Os\fR] [\fIpoolname\fR
[\fIobject\fR ...]]
.P
\fBzdb\fR [-divPA] [-e [-p \fIpath\fR...]] \fIdataset\fR [\fIobject\fR ...]
\fBzdb\fR [-divPA] [-e [-p \fIpath\fR...]] [-U \fIcache\fR]
\fIdataset\fR [\fIobject\fR ...]
.P
\fBzdb\fR -m [-LXFPA] [-t \fItxg\fR] [-e [-p \fIpath\fR...]] \fIpoolname\fR
[\fIvdev\fR [\fImetaslab\fR ...]]
\fBzdb\fR -m [-LXFPA] [-t \fItxg\fR] [-e [-p \fIpath\fR...]] [-U \fIcache\fR]
\fIpoolname\fR [\fIvdev\fR [\fImetaslab\fR ...]]
.P
\fBzdb\fR -R [-A] [-e [-p \fIpath\fR...]] \fIpoolname\fR
\fBzdb\fR -R [-A] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] \fIpoolname\fR
\fIvdev\fR:\fIoffset\fR:\fIsize\fR[:\fIflags\fR]
.P
\fBzdb\fR -S [-AP] [-e [-p \fIpath\fR...]] \fIpoolname\fR
\fBzdb\fR -S [-AP] [-e [-p \fIpath\fR...]] [-U \fIcache\fR] \fIpoolname\fR
.P
\fBzdb\fR -l [-uA] \fIdevice\fR
@ -354,6 +357,18 @@ Attempt to make an unreadable pool readable by trying progressively older
transactions.
.RE
.sp
.ne 2
.na
\fB-M \fIinflight I/Os\fR \fR
.ad
.sp .6
.RS 4n
Limit the number of outstanding checksum I/Os to the specified value. The
default value is 200. This option affects the performance of the \fB-c\fR
option.
.RE
.sp
.ne 2
.na
@ -384,8 +399,7 @@ and their associated transaction numbers.
.ad
.sp .6
.RS 4n
Use a cache file other than \fB/etc/zfs/zpool.cache\fR. This option is only
valid with \fB-C\fR
Use a cache file other than \fB/etc/zfs/zpool.cache\fR.
.RE
.sp

View File

@ -111,6 +111,11 @@ zpool \- configures ZFS storage pools
\fBzpool reguid\fR \fIpool\fR
.fi
.LP
.nf
\fBzpool reopen\fR \fIpool\fR
.fi
.LP
.nf
\fBzpool remove\fR \fIpool\fR \fIdevice\fR ...
@ -1550,8 +1555,18 @@ become available to the pool.
.ad
.sp .6
.RS 4n
Generates a new unique identifier for the pool. You must ensure that all devices in this pool are online and
healthy before performing this action.
Generates a new unique identifier for the pool. You must ensure that all
devices in this pool are online and healthy before performing this action.
.RE
.sp
.ne 2
.na
\fB\fBzpool reopen\fR \fIpool\fR
.ad
.sp .6
.RS 4n
Reopen all the vdevs associated with the pool.
.RE
.sp

View File

@ -322,6 +322,14 @@ extern void vdev_set_min_asize(vdev_t *vd);
*/
extern int zfs_vdev_cache_size;
/*
* The vdev_buf_t is used to translate between zio_t and buf_t, and back again.
*/
typedef struct vdev_buf {
buf_t vb_buf; /* buffer that describes the io */
zio_t *vb_io; /* pointer back to the original zio_t */
} vdev_buf_t;
#ifdef __cplusplus
}
#endif

View File

@ -40,11 +40,6 @@
extern ldi_ident_t zfs_li;
typedef struct vdev_disk_buf {
buf_t vdb_buf;
zio_t *vdb_io;
} vdev_disk_buf_t;
static void
vdev_disk_hold(vdev_t *vd)
{
@ -397,8 +392,8 @@ vdev_disk_physio(ldi_handle_t vd_lh, caddr_t data, size_t size,
static void
vdev_disk_io_intr(buf_t *bp)
{
vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
zio_t *zio = vdb->vdb_io;
vdev_buf_t *vb = (vdev_buf_t *)bp;
zio_t *zio = vb->vb_io;
/*
* The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
@ -410,7 +405,7 @@ vdev_disk_io_intr(buf_t *bp)
if (zio->io_error == 0 && bp->b_resid != 0)
zio->io_error = EIO;
kmem_free(vdb, sizeof (vdev_disk_buf_t));
kmem_free(vb, sizeof (vdev_buf_t));
zio_interrupt(zio);
}
@ -441,7 +436,7 @@ vdev_disk_io_start(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
vdev_disk_t *dvd = vd->vdev_tsd;
vdev_disk_buf_t *vdb;
vdev_buf_t *vb;
struct dk_callback *dkc;
buf_t *bp;
int error;
@ -505,10 +500,10 @@ vdev_disk_io_start(zio_t *zio)
return (ZIO_PIPELINE_CONTINUE);
}
vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
vdb->vdb_io = zio;
bp = &vdb->vdb_buf;
vb->vb_io = zio;
bp = &vb->vb_buf;
bioinit(bp);
bp->b_flags = B_BUSY | B_NOCACHE |

View File

@ -25,6 +25,7 @@
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/vdev_file.h>
#include <sys/vdev_impl.h>
#include <sys/zio.h>
@ -140,12 +141,55 @@ vdev_file_close(vdev_t *vd)
vd->vdev_tsd = NULL;
}
/*
* Implements the interrupt side for file vdev types. This routine will be
* called when the I/O completes allowing us to transfer the I/O to the
* interrupt taskqs. For consistency, the code structure mimics disk vdev
* types.
*/
static void
vdev_file_io_intr(buf_t *bp)
{
vdev_buf_t *vb = (vdev_buf_t *)bp;
zio_t *zio = vb->vb_io;
zio->io_error = (geterror(bp) != 0 ? EIO : 0);
if (zio->io_error == 0 && bp->b_resid != 0)
zio->io_error = ENOSPC;
kmem_free(vb, sizeof (vdev_buf_t));
zio_interrupt(zio);
}
static void
vdev_file_io_strategy(void *arg)
{
buf_t *bp = arg;
vnode_t *vp = bp->b_private;
ssize_t resid;
int error;
error = vn_rdwr((bp->b_flags & B_READ) ? UIO_READ : UIO_WRITE,
vp, bp->b_un.b_addr, bp->b_bcount, ldbtob(bp->b_lblkno),
UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
if (error == 0) {
bp->b_resid = resid;
biodone(bp);
} else {
bioerror(bp, error);
biodone(bp);
}
}
static int
vdev_file_io_start(zio_t *zio)
{
spa_t *spa = zio->io_spa;
vdev_t *vd = zio->io_vd;
vdev_file_t *vf = vd->vdev_tsd;
ssize_t resid;
vdev_buf_t *vb;
buf_t *bp;
if (zio->io_type == ZIO_TYPE_IOCTL) {
/* XXPOLICY */
@ -166,15 +210,22 @@ vdev_file_io_start(zio_t *zio)
return (ZIO_PIPELINE_CONTINUE);
}
zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
zio->io_size, zio->io_offset, UIO_SYSSPACE,
0, RLIM64_INFINITY, kcred, &resid);
vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
if (resid != 0 && zio->io_error == 0)
zio->io_error = ENOSPC;
vb->vb_io = zio;
bp = &vb->vb_buf;
zio_interrupt(zio);
bioinit(bp);
bp->b_flags = (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
bp->b_bcount = zio->io_size;
bp->b_un.b_addr = zio->io_data;
bp->b_lblkno = lbtodb(zio->io_offset);
bp->b_bufsize = zio->io_size;
bp->b_private = vf->vf_vnode;
bp->b_iodone = (int (*)())vdev_file_io_intr;
taskq_dispatch_ent(spa->spa_zio_taskq[ZIO_TYPE_FREE][ZIO_TASKQ_ISSUE],
vdev_file_io_strategy, bp, 0, &zio->io_tqent);
return (ZIO_PIPELINE_STOP);
}

View File

@ -2917,7 +2917,7 @@ zio_done(zio_t *zio)
* Hand it off to the otherwise-unused claim taskq.
*/
ASSERT(zio->io_tqent.tqent_next == NULL);
(void) taskq_dispatch_ent(
taskq_dispatch_ent(
spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
(task_func_t *)zio_reexecute, zio, 0,
&zio->io_tqent);