From 6c426f3bd473343f9e556a900758e03b8269d0f9 Mon Sep 17 00:00:00 2001 From: Xin LI Date: Sat, 8 Nov 2014 06:34:37 +0000 Subject: [PATCH] 5244 zio pipeline callers should explicitly invoke next stage Reviewed by: Adam Leventhal Reviewed by: Alex Reece Reviewed by: Christopher Siden Reviewed by: Matthew Ahrens Reviewed by: Richard Elling Reviewed by: Dan McDonald Reviewed by: Steven Hartland Approved by: Gordon Ross Author: George Wilson illumos/illumos-gate@738f37bc3dcd61e8a893af0f2d466d76690b70ec --- lib/libzpool/common/taskq.c | 4 ++++ uts/common/fs/zfs/sys/vdev_impl.h | 2 +- uts/common/fs/zfs/sys/zio.h | 3 --- uts/common/fs/zfs/vdev_disk.c | 15 ++++++++------- uts/common/fs/zfs/vdev_file.c | 12 ++++++------ uts/common/fs/zfs/vdev_mirror.c | 9 +++++---- uts/common/fs/zfs/vdev_missing.c | 6 +++--- uts/common/fs/zfs/vdev_raidz.c | 9 +++++---- uts/common/fs/zfs/zio.c | 23 ++++++++++++++++++++--- 9 files changed, 52 insertions(+), 31 deletions(-) diff --git a/lib/libzpool/common/taskq.c b/lib/libzpool/common/taskq.c index 2c5dfd86dcc0..a4ab58963d75 100644 --- a/lib/libzpool/common/taskq.c +++ b/lib/libzpool/common/taskq.c @@ -25,6 +25,7 @@ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright 2012 Garrett D'Amore . All rights reserved. + * Copyright (c) 2014 by Delphix. All rights reserved. */ #include @@ -33,8 +34,10 @@ int taskq_now; taskq_t *system_taskq; #define TASKQ_ACTIVE 0x00010000 +#define TASKQ_NAMELEN 31 struct taskq { + char tq_name[TASKQ_NAMELEN + 1]; kmutex_t tq_lock; krwlock_t tq_threadlock; kcondvar_t tq_dispatch_cv; @@ -247,6 +250,7 @@ taskq_create(const char *name, int nthreads, pri_t pri, cv_init(&tq->tq_dispatch_cv, NULL, CV_DEFAULT, NULL); cv_init(&tq->tq_wait_cv, NULL, CV_DEFAULT, NULL); cv_init(&tq->tq_maxalloc_cv, NULL, CV_DEFAULT, NULL); + (void) strncpy(tq->tq_name, name, TASKQ_NAMELEN + 1); tq->tq_flags = flags | TASKQ_ACTIVE; tq->tq_active = nthreads; tq->tq_nthreads = nthreads; diff --git a/uts/common/fs/zfs/sys/vdev_impl.h b/uts/common/fs/zfs/sys/vdev_impl.h index 9c22384f733d..6d9bcb17d00f 100644 --- a/uts/common/fs/zfs/sys/vdev_impl.h +++ b/uts/common/fs/zfs/sys/vdev_impl.h @@ -60,7 +60,7 @@ typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size, uint64_t *ashift); typedef void vdev_close_func_t(vdev_t *vd); typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize); -typedef int vdev_io_start_func_t(zio_t *zio); +typedef void vdev_io_start_func_t(zio_t *zio); typedef void vdev_io_done_func_t(zio_t *zio); typedef void vdev_state_change_func_t(vdev_t *vd, int, int); typedef void vdev_hold_func_t(vdev_t *vd); diff --git a/uts/common/fs/zfs/sys/zio.h b/uts/common/fs/zfs/sys/zio.h index f158042eb40c..f6cf259bf71a 100644 --- a/uts/common/fs/zfs/sys/zio.h +++ b/uts/common/fs/zfs/sys/zio.h @@ -151,9 +151,6 @@ typedef enum zio_priority { ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */ } zio_priority_t; -#define ZIO_PIPELINE_CONTINUE 0x100 -#define ZIO_PIPELINE_STOP 0x101 - enum zio_flag { /* * Flags inherited by gang, ddt, and vdev children, diff --git a/uts/common/fs/zfs/vdev_disk.c b/uts/common/fs/zfs/vdev_disk.c index 3a4bc20f5be4..ed4a8b773bf4 100644 --- a/uts/common/fs/zfs/vdev_disk.c +++ b/uts/common/fs/zfs/vdev_disk.c @@ -715,7 +715,7 @@ vdev_disk_ioctl_done(void *zio_arg, int error) zio_interrupt(zio); } -static int +static void vdev_disk_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; @@ -731,14 +731,16 @@ vdev_disk_io_start(zio_t *zio) */ if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) { zio->io_error = ENXIO; - return (ZIO_PIPELINE_CONTINUE); + zio_interrupt(zio); + return; } if (zio->io_type == ZIO_TYPE_IOCTL) { /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); - return (ZIO_PIPELINE_CONTINUE); + zio_interrupt(zio); + return; } switch (zio->io_cmd) { @@ -769,7 +771,7 @@ vdev_disk_io_start(zio_t *zio) * and will call vdev_disk_ioctl_done() * upon completion. */ - return (ZIO_PIPELINE_STOP); + return; } if (error == ENOTSUP || error == ENOTTY) { @@ -790,7 +792,8 @@ vdev_disk_io_start(zio_t *zio) zio->io_error = SET_ERROR(ENOTSUP); } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); + return; } vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP); @@ -811,8 +814,6 @@ vdev_disk_io_start(zio_t *zio) /* ldi_strategy() will return non-zero only on programming errors */ VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0); - - return (ZIO_PIPELINE_STOP); } static void diff --git a/uts/common/fs/zfs/vdev_file.c b/uts/common/fs/zfs/vdev_file.c index a05abeb9d910..5dfc331d20a4 100644 --- a/uts/common/fs/zfs/vdev_file.c +++ b/uts/common/fs/zfs/vdev_file.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2011, 2014 by Delphix. All rights reserved. */ #include @@ -182,7 +182,7 @@ vdev_file_io_strategy(void *arg) } } -static int +static void vdev_file_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; @@ -194,7 +194,8 @@ vdev_file_io_start(zio_t *zio) /* XXPOLICY */ if (!vdev_readable(vd)) { zio->io_error = SET_ERROR(ENXIO); - return (ZIO_PIPELINE_CONTINUE); + zio_interrupt(zio); + return; } switch (zio->io_cmd) { @@ -206,7 +207,8 @@ vdev_file_io_start(zio_t *zio) zio->io_error = SET_ERROR(ENOTSUP); } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); + return; } vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP); @@ -225,8 +227,6 @@ vdev_file_io_start(zio_t *zio) VERIFY3U(taskq_dispatch(system_taskq, vdev_file_io_strategy, bp, TQ_SLEEP), !=, 0); - - return (ZIO_PIPELINE_STOP); } /* ARGSUSED */ diff --git a/uts/common/fs/zfs/vdev_mirror.c b/uts/common/fs/zfs/vdev_mirror.c index f62c1e3617d8..8749e539f466 100644 --- a/uts/common/fs/zfs/vdev_mirror.c +++ b/uts/common/fs/zfs/vdev_mirror.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ #include @@ -260,7 +260,7 @@ vdev_mirror_child_select(zio_t *zio) return (-1); } -static int +static void vdev_mirror_io_start(zio_t *zio) { mirror_map_t *mm; @@ -285,7 +285,8 @@ vdev_mirror_io_start(zio_t *zio) zio->io_type, zio->io_priority, 0, vdev_mirror_scrub_done, mc)); } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); + return; } /* * For normal reads just pick one child. @@ -311,7 +312,7 @@ vdev_mirror_io_start(zio_t *zio) c++; } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); } static int diff --git a/uts/common/fs/zfs/vdev_missing.c b/uts/common/fs/zfs/vdev_missing.c index b9eb99d18005..228757334234 100644 --- a/uts/common/fs/zfs/vdev_missing.c +++ b/uts/common/fs/zfs/vdev_missing.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. */ /* @@ -66,11 +66,11 @@ vdev_missing_close(vdev_t *vd) } /* ARGSUSED */ -static int +static void vdev_missing_io_start(zio_t *zio) { zio->io_error = SET_ERROR(ENOTSUP); - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); } /* ARGSUSED */ diff --git a/uts/common/fs/zfs/vdev_raidz.c b/uts/common/fs/zfs/vdev_raidz.c index 480141dc6379..f686d56e9e5f 100644 --- a/uts/common/fs/zfs/vdev_raidz.c +++ b/uts/common/fs/zfs/vdev_raidz.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013 by Delphix. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2013, Joyent, Inc. All rights reserved. */ @@ -1711,7 +1711,7 @@ vdev_raidz_child_done(zio_t *zio) * vdevs have had errors, then create zio read operations to the parity * columns' VDevs as well. */ -static int +static void vdev_raidz_io_start(zio_t *zio) { vdev_t *vd = zio->io_vd; @@ -1759,7 +1759,8 @@ vdev_raidz_io_start(zio_t *zio) ZIO_FLAG_NODATA | ZIO_FLAG_OPTIONAL, NULL, NULL)); } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); + return; } ASSERT(zio->io_type == ZIO_TYPE_READ); @@ -1799,7 +1800,7 @@ vdev_raidz_io_start(zio_t *zio) } } - return (ZIO_PIPELINE_CONTINUE); + zio_execute(zio); } diff --git a/uts/common/fs/zfs/zio.c b/uts/common/fs/zfs/zio.c index b154cb639d5d..302d692c64a6 100644 --- a/uts/common/fs/zfs/zio.c +++ b/uts/common/fs/zfs/zio.c @@ -63,6 +63,9 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT]; extern vmem_t *zio_alloc_arena; #endif +#define ZIO_PIPELINE_CONTINUE 0x100 +#define ZIO_PIPELINE_STOP 0x101 + /* * The following actions directly effect the spa's sync-to-convergence logic. * The values below define the sync pass when we start performing the action. @@ -2466,6 +2469,18 @@ zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp) * Read and write to physical devices * ========================================================================== */ + + +/* + * Issue an I/O to the underlying vdev. Typically the issue pipeline + * stops after this stage and will resume upon I/O completion. + * However, there are instances where the vdev layer may need to + * continue the pipeline when an I/O was not issued. Since the I/O + * that was sent to the vdev layer might be different than the one + * currently active in the pipeline (see vdev_queue_io()), we explicitly + * force the underlying vdev layers to call either zio_execute() or + * zio_interrupt() to ensure that the pipeline continues with the correct I/O. + */ static int zio_vdev_io_start(zio_t *zio) { @@ -2483,7 +2498,8 @@ zio_vdev_io_start(zio_t *zio) /* * The mirror_ops handle multiple DVAs in a single BP. */ - return (vdev_mirror_ops.vdev_op_io_start(zio)); + vdev_mirror_ops.vdev_op_io_start(zio); + return (ZIO_PIPELINE_STOP); } /* @@ -2491,7 +2507,7 @@ zio_vdev_io_start(zio_t *zio) * can quickly react to certain workloads. In particular, we care * about non-scrubbing, top-level reads and writes with the following * characteristics: - * - synchronous writes of user data to non-slog devices + * - synchronous writes of user data to non-slog devices * - any reads of user data * When these conditions are met, adjust the timestamp of spa_last_io * which allows the scan thread to adjust its workload accordingly. @@ -2577,7 +2593,8 @@ zio_vdev_io_start(zio_t *zio) } } - return (vd->vdev_ops->vdev_op_io_start(zio)); + vd->vdev_ops->vdev_op_io_start(zio); + return (ZIO_PIPELINE_STOP); } static int