From 72f53c5694bfd57b56e79fc3b7c4390bce4072d2 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Fri, 21 Dec 2012 10:15:34 +0800 Subject: [PATCH 1/2] Revert part of "Log I/Os longer than zio_delay_max (30s default)" This reverts commit 9dcb97198338ba2d8764dd5604b278118612f74 which was originally introduced to debug occasional slow I/Os. These I/Os would complete eventually but were observed to take several 100 seconds. The root cause of this issue was the CFQ scheduler which can, under certain conditions, excessively delay an I/O from being issued to the device. This issue was mitigated somewhat by commit 84daaddedbfc9cf4bd1490d8a6f4b2967051e308 which ensures the I/O elevator gets changed even for DM style devices. This change isn't in any way harmful but it does conflict with a required change to properly account from I/O wait time. Because Linux does not export the io_schedule_timeout() function we must instead rely on io_schedule() via cv_wait_io(). The additional debugging information which was added to the delay event has been intentionally left in place. Signed-off-by: Brian Behlendorf --- module/zfs/zio.c | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/module/zfs/zio.c b/module/zfs/zio.c index bcfc081d1a01..ece3329d061a 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1305,34 +1305,18 @@ __zio_execute(zio_t *zio) int zio_wait(zio_t *zio) { - uint64_t timeout; int error; ASSERT(zio->io_stage == ZIO_STAGE_OPEN); ASSERT(zio->io_executor == NULL); zio->io_waiter = curthread; - timeout = ddi_get_lbolt() + (zio_delay_max / MILLISEC * hz); __zio_execute(zio); mutex_enter(&zio->io_lock); - while (zio->io_executor != NULL) { - /* - * Wake up periodically to prevent the kernel from complaining - * about a blocked task. However, check zio_delay_max to see - * if the I/O has exceeded the timeout and post an ereport. - */ - cv_timedwait_interruptible(&zio->io_cv, &zio->io_lock, - ddi_get_lbolt() + hz); - - if (timeout && (ddi_get_lbolt() > timeout)) { - zio->io_delay = zio_delay_max; - zfs_ereport_post(FM_EREPORT_ZFS_DELAY, - zio->io_spa, zio->io_vd, zio, 0, 0); - timeout = 0; - } - } + while (zio->io_executor != NULL) + cv_wait(&zio->io_cv, &zio->io_lock); mutex_exit(&zio->io_lock); error = zio->io_error; @@ -2905,11 +2889,15 @@ zio_done(zio_t *zio) vdev_stat_update(zio, zio->io_size); /* - * When an I/O completes but was slow post an ereport. + * If this I/O is attached to a particular vdev is slow, exeeding + * 30 seconds to complete, post an error described the I/O delay. + * We ignore these errors if the device is currently unavailable. */ - if (zio->io_delay >= zio_delay_max) - zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, - zio->io_vd, zio, 0, 0); + if (zio->io_delay >= zio_delay_max) { + if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd)) + zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa, + zio->io_vd, zio, 0, 0); + } if (zio->io_error) { /* From 72938d6905d9dcd60f7a5ff68ae469e74c248228 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Fri, 21 Dec 2012 10:40:20 +0800 Subject: [PATCH 2/2] Use cv_wait_io() which will will account for iowait Update zio_wait() to use cv_wait_io() to ensure the iowait time is properly accounted for. Signed-off-by: Brian Behlendorf --- include/sys/zfs_context.h | 1 + module/zfs/zio.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index ad282c43c486..6b00a5d5db80 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -322,6 +322,7 @@ extern void cv_signal(kcondvar_t *cv); extern void cv_broadcast(kcondvar_t *cv); #define cv_timedwait_interruptible(cv, mp, at) cv_timedwait(cv, mp, at) #define cv_wait_interruptible(cv, mp) cv_wait(cv, mp) +#define cv_wait_io(cv, mp) cv_wait(cv, mp) /* * kstat creation, installation and deletion diff --git a/module/zfs/zio.c b/module/zfs/zio.c index ece3329d061a..bfb817b7860b 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -1316,7 +1316,7 @@ zio_wait(zio_t *zio) mutex_enter(&zio->io_lock); while (zio->io_executor != NULL) - cv_wait(&zio->io_cv, &zio->io_lock); + cv_wait_io(&zio->io_cv, &zio->io_lock); mutex_exit(&zio->io_lock); error = zio->io_error;