Fix synchronous behavior in __vdev_disk_physio()

Commit b39c22b set the READ_SYNC and WRITE_SYNC flags for a bio
based on the ZIO_PRIORITY_* flag passed in.  This had the unnoticed
side-effect of making the vdev_disk_io_start() synchronous for
certain I/Os.

This in turn resulted in vdev_disk_io_start() being able to
re-dispatch zio's which would result in a RCU stalls when a disk
was removed from the system.  Additionally, this could negatively
impact performance and explains the performance regressions reported
in both #3829 and #3780.

This patch resolves the issue by making the blocking behavior
dependent on a 'wait' flag being passed rather than overloading
the passed bio flags.

Finally, the WRITE_SYNC and READ_SYNC behavior is restricted to
non-rotational devices where there is no benefit to queuing to
aggregate the I/O.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #3652
Issue #3780
Issue #3785
Issue #3817
Issue #3821
Issue #3829
Issue #3832
Issue #3870
This commit is contained in:
Brian Behlendorf 2015-09-24 16:32:25 -07:00
parent ef5b2e1048
commit 5592404784
3 changed files with 8 additions and 81 deletions

View File

@ -1,50 +0,0 @@
dnl #
dnl # Preferred interface for flagging a synchronous bio:
dnl # 2.6.12-2.6.29: BIO_RW_SYNC
dnl # 2.6.30-2.6.35: BIO_RW_SYNCIO
dnl # 2.6.36-2.6.xx: REQ_SYNC
dnl #
AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNC], [
AC_MSG_CHECKING([whether BIO_RW_SYNC is defined])
ZFS_LINUX_TRY_COMPILE([
#include <linux/bio.h>
],[
int flags __attribute__ ((unused));
flags = BIO_RW_SYNC;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BIO_RW_SYNC, 1, [BIO_RW_SYNC is defined])
],[
AC_MSG_RESULT(no)
])
])
AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNCIO], [
AC_MSG_CHECKING([whether BIO_RW_SYNCIO is defined])
ZFS_LINUX_TRY_COMPILE([
#include <linux/bio.h>
],[
int flags __attribute__ ((unused));
flags = BIO_RW_SYNCIO;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BIO_RW_SYNCIO, 1, [BIO_RW_SYNCIO is defined])
],[
AC_MSG_RESULT(no)
])
])
AC_DEFUN([ZFS_AC_KERNEL_REQ_SYNC], [
AC_MSG_CHECKING([whether REQ_SYNC is defined])
ZFS_LINUX_TRY_COMPILE([
#include <linux/bio.h>
],[
int flags __attribute__ ((unused));
flags = REQ_SYNC;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_REQ_SYNC, 1, [REQ_SYNC is defined])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -25,9 +25,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
ZFS_AC_KERNEL_BIO_RW_BARRIER
ZFS_AC_KERNEL_BIO_RW_DISCARD
ZFS_AC_KERNEL_BIO_RW_SYNC
ZFS_AC_KERNEL_BIO_RW_SYNCIO
ZFS_AC_KERNEL_REQ_SYNC
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS

View File

@ -369,27 +369,6 @@ vdev_disk_dio_free(dio_request_t *dr)
sizeof (struct bio *) * dr->dr_bio_count);
}
static int
vdev_disk_dio_is_sync(dio_request_t *dr)
{
#ifdef HAVE_BIO_RW_SYNC
/* BIO_RW_SYNC preferred interface from 2.6.12-2.6.29 */
return (dr->dr_rw & (1 << BIO_RW_SYNC));
#else
#ifdef HAVE_BIO_RW_SYNCIO
/* BIO_RW_SYNCIO preferred interface from 2.6.30-2.6.35 */
return (dr->dr_rw & (1 << BIO_RW_SYNCIO));
#else
#ifdef HAVE_REQ_SYNC
/* REQ_SYNC preferred interface from 2.6.36-2.6.xx */
return (dr->dr_rw & REQ_SYNC);
#else
#error "Unable to determine bio sync flag"
#endif /* HAVE_REQ_SYNC */
#endif /* HAVE_BIO_RW_SYNC */
#endif /* HAVE_BIO_RW_SYNCIO */
}
static void
vdev_disk_dio_get(dio_request_t *dr)
{
@ -444,7 +423,7 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
rc = vdev_disk_dio_put(dr);
/* Wake up synchronous waiter this is the last outstanding bio */
if ((rc == 1) && vdev_disk_dio_is_sync(dr))
if (rc == 1)
complete(&dr->dr_comp);
}
@ -512,7 +491,7 @@ vdev_submit_bio(int rw, struct bio *bio)
static int
__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
size_t kbuf_size, uint64_t kbuf_offset, int flags)
size_t kbuf_size, uint64_t kbuf_offset, int flags, int wait)
{
dio_request_t *dr;
caddr_t bio_ptr;
@ -603,7 +582,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
* only synchronous consumer is vdev_disk_read_rootlabel() all other
* IO originating from vdev_disk_io_start() is asynchronous.
*/
if (vdev_disk_dio_is_sync(dr)) {
if (wait) {
wait_for_completion(&dr->dr_comp);
error = dr->dr_error;
ASSERT3S(atomic_read(&dr->dr_ref), ==, 1);
@ -619,7 +598,7 @@ vdev_disk_physio(struct block_device *bdev, caddr_t kbuf,
size_t size, uint64_t offset, int flags)
{
bio_set_flags_failfast(bdev, &flags);
return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags));
return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags, 1));
}
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, rc)
@ -671,6 +650,7 @@ vdev_disk_io_start(zio_t *zio)
{
vdev_t *v = zio->io_vd;
vdev_disk_t *vd = v->vdev_tsd;
zio_priority_t pri = zio->io_priority;
int flags, error;
switch (zio->io_type) {
@ -710,14 +690,14 @@ vdev_disk_io_start(zio_t *zio)
zio_execute(zio);
return;
case ZIO_TYPE_WRITE:
if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
if ((pri == ZIO_PRIORITY_SYNC_WRITE) && (v->vdev_nonrot))
flags = WRITE_SYNC;
else
flags = WRITE;
break;
case ZIO_TYPE_READ:
if (zio->io_priority == ZIO_PRIORITY_SYNC_READ)
if ((pri == ZIO_PRIORITY_SYNC_READ) && (v->vdev_nonrot))
flags = READ_SYNC;
else
flags = READ;
@ -730,7 +710,7 @@ vdev_disk_io_start(zio_t *zio)
}
error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
zio->io_size, zio->io_offset, flags);
zio->io_size, zio->io_offset, flags, 0);
if (error) {
zio->io_error = error;
zio_interrupt(zio);