Fix synchronous behavior in __vdev_disk_physio()
Commit b39c22b
set the READ_SYNC and WRITE_SYNC flags for a bio
based on the ZIO_PRIORITY_* flag passed in. This had the unnoticed
side-effect of making the vdev_disk_io_start() synchronous for
certain I/Os.
This in turn resulted in vdev_disk_io_start() being able to
re-dispatch zio's which would result in a RCU stalls when a disk
was removed from the system. Additionally, this could negatively
impact performance and explains the performance regressions reported
in both #3829 and #3780.
This patch resolves the issue by making the blocking behavior
dependent on a 'wait' flag being passed rather than overloading
the passed bio flags.
Finally, the WRITE_SYNC and READ_SYNC behavior is restricted to
non-rotational devices where there is no benefit to queuing to
aggregate the I/O.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #3652
Issue #3780
Issue #3785
Issue #3817
Issue #3821
Issue #3829
Issue #3832
Issue #3870
This commit is contained in:
parent
ef5b2e1048
commit
5592404784
@ -1,50 +0,0 @@
|
||||
dnl #
|
||||
dnl # Preferred interface for flagging a synchronous bio:
|
||||
dnl # 2.6.12-2.6.29: BIO_RW_SYNC
|
||||
dnl # 2.6.30-2.6.35: BIO_RW_SYNCIO
|
||||
dnl # 2.6.36-2.6.xx: REQ_SYNC
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNC], [
|
||||
AC_MSG_CHECKING([whether BIO_RW_SYNC is defined])
|
||||
ZFS_LINUX_TRY_COMPILE([
|
||||
#include <linux/bio.h>
|
||||
],[
|
||||
int flags __attribute__ ((unused));
|
||||
flags = BIO_RW_SYNC;
|
||||
],[
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BIO_RW_SYNC, 1, [BIO_RW_SYNC is defined])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BIO_RW_SYNCIO], [
|
||||
AC_MSG_CHECKING([whether BIO_RW_SYNCIO is defined])
|
||||
ZFS_LINUX_TRY_COMPILE([
|
||||
#include <linux/bio.h>
|
||||
],[
|
||||
int flags __attribute__ ((unused));
|
||||
flags = BIO_RW_SYNCIO;
|
||||
],[
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BIO_RW_SYNCIO, 1, [BIO_RW_SYNCIO is defined])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_REQ_SYNC], [
|
||||
AC_MSG_CHECKING([whether REQ_SYNC is defined])
|
||||
ZFS_LINUX_TRY_COMPILE([
|
||||
#include <linux/bio.h>
|
||||
],[
|
||||
int flags __attribute__ ((unused));
|
||||
flags = REQ_SYNC;
|
||||
],[
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_REQ_SYNC, 1, [REQ_SYNC is defined])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
@ -25,9 +25,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
|
||||
ZFS_AC_KERNEL_BIO_END_IO_T_ARGS
|
||||
ZFS_AC_KERNEL_BIO_RW_BARRIER
|
||||
ZFS_AC_KERNEL_BIO_RW_DISCARD
|
||||
ZFS_AC_KERNEL_BIO_RW_SYNC
|
||||
ZFS_AC_KERNEL_BIO_RW_SYNCIO
|
||||
ZFS_AC_KERNEL_REQ_SYNC
|
||||
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
|
||||
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
|
||||
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
|
||||
|
@ -369,27 +369,6 @@ vdev_disk_dio_free(dio_request_t *dr)
|
||||
sizeof (struct bio *) * dr->dr_bio_count);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_disk_dio_is_sync(dio_request_t *dr)
|
||||
{
|
||||
#ifdef HAVE_BIO_RW_SYNC
|
||||
/* BIO_RW_SYNC preferred interface from 2.6.12-2.6.29 */
|
||||
return (dr->dr_rw & (1 << BIO_RW_SYNC));
|
||||
#else
|
||||
#ifdef HAVE_BIO_RW_SYNCIO
|
||||
/* BIO_RW_SYNCIO preferred interface from 2.6.30-2.6.35 */
|
||||
return (dr->dr_rw & (1 << BIO_RW_SYNCIO));
|
||||
#else
|
||||
#ifdef HAVE_REQ_SYNC
|
||||
/* REQ_SYNC preferred interface from 2.6.36-2.6.xx */
|
||||
return (dr->dr_rw & REQ_SYNC);
|
||||
#else
|
||||
#error "Unable to determine bio sync flag"
|
||||
#endif /* HAVE_REQ_SYNC */
|
||||
#endif /* HAVE_BIO_RW_SYNC */
|
||||
#endif /* HAVE_BIO_RW_SYNCIO */
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_disk_dio_get(dio_request_t *dr)
|
||||
{
|
||||
@ -444,7 +423,7 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
|
||||
rc = vdev_disk_dio_put(dr);
|
||||
|
||||
/* Wake up synchronous waiter this is the last outstanding bio */
|
||||
if ((rc == 1) && vdev_disk_dio_is_sync(dr))
|
||||
if (rc == 1)
|
||||
complete(&dr->dr_comp);
|
||||
}
|
||||
|
||||
@ -512,7 +491,7 @@ vdev_submit_bio(int rw, struct bio *bio)
|
||||
|
||||
static int
|
||||
__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
|
||||
size_t kbuf_size, uint64_t kbuf_offset, int flags)
|
||||
size_t kbuf_size, uint64_t kbuf_offset, int flags, int wait)
|
||||
{
|
||||
dio_request_t *dr;
|
||||
caddr_t bio_ptr;
|
||||
@ -603,7 +582,7 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
|
||||
* only synchronous consumer is vdev_disk_read_rootlabel() all other
|
||||
* IO originating from vdev_disk_io_start() is asynchronous.
|
||||
*/
|
||||
if (vdev_disk_dio_is_sync(dr)) {
|
||||
if (wait) {
|
||||
wait_for_completion(&dr->dr_comp);
|
||||
error = dr->dr_error;
|
||||
ASSERT3S(atomic_read(&dr->dr_ref), ==, 1);
|
||||
@ -619,7 +598,7 @@ vdev_disk_physio(struct block_device *bdev, caddr_t kbuf,
|
||||
size_t size, uint64_t offset, int flags)
|
||||
{
|
||||
bio_set_flags_failfast(bdev, &flags);
|
||||
return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags));
|
||||
return (__vdev_disk_physio(bdev, NULL, kbuf, size, offset, flags, 1));
|
||||
}
|
||||
|
||||
BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, rc)
|
||||
@ -671,6 +650,7 @@ vdev_disk_io_start(zio_t *zio)
|
||||
{
|
||||
vdev_t *v = zio->io_vd;
|
||||
vdev_disk_t *vd = v->vdev_tsd;
|
||||
zio_priority_t pri = zio->io_priority;
|
||||
int flags, error;
|
||||
|
||||
switch (zio->io_type) {
|
||||
@ -710,14 +690,14 @@ vdev_disk_io_start(zio_t *zio)
|
||||
zio_execute(zio);
|
||||
return;
|
||||
case ZIO_TYPE_WRITE:
|
||||
if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
|
||||
if ((pri == ZIO_PRIORITY_SYNC_WRITE) && (v->vdev_nonrot))
|
||||
flags = WRITE_SYNC;
|
||||
else
|
||||
flags = WRITE;
|
||||
break;
|
||||
|
||||
case ZIO_TYPE_READ:
|
||||
if (zio->io_priority == ZIO_PRIORITY_SYNC_READ)
|
||||
if ((pri == ZIO_PRIORITY_SYNC_READ) && (v->vdev_nonrot))
|
||||
flags = READ_SYNC;
|
||||
else
|
||||
flags = READ;
|
||||
@ -730,7 +710,7 @@ vdev_disk_io_start(zio_t *zio)
|
||||
}
|
||||
|
||||
error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
|
||||
zio->io_size, zio->io_offset, flags);
|
||||
zio->io_size, zio->io_offset, flags, 0);
|
||||
if (error) {
|
||||
zio->io_error = error;
|
||||
zio_interrupt(zio);
|
||||
|
Loading…
Reference in New Issue
Block a user