Implement IO_NOWDRAIN and B_NOWDRAIN - prevents the buffer cache from blocking
in wdrain during a write. This flag needs to be used in devices whos strategy routines turn-around and issue another high level I/O, such as when MD turns around and issues a VOP_WRITE to vnode backing store, in order to avoid deadlocking the dirty buffer draining code. Remove a vprintf() warning from MD when the backing vnode is found to be in-use. The syncer of buf_daemon could be flushing the backing vnode at the time of an MD operation so the warning is not correct. MFC after: 1 week
This commit is contained in:
parent
93b2ae9fcf
commit
7e76bb562e
@ -386,15 +386,18 @@ mdstart_vnode(struct md_s *sc)
|
||||
auio.uio_rw = UIO_WRITE;
|
||||
auio.uio_resid = bp->bio_bcount;
|
||||
auio.uio_td = curthread;
|
||||
if (VOP_ISLOCKED(sc->vnode, NULL))
|
||||
vprint("unexpected md driver lock", sc->vnode);
|
||||
/*
|
||||
* When reading set IO_DIRECT to try to avoid double-caching
|
||||
* the data. When writing IO_DIRECT is not optimal, but we
|
||||
* must set IO_NOWDRAIN to avoid a wdrain deadlock.
|
||||
*/
|
||||
if (bp->bio_cmd == BIO_READ) {
|
||||
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
|
||||
error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
|
||||
error = VOP_READ(sc->vnode, &auio, IO_DIRECT, sc->cred);
|
||||
} else {
|
||||
(void) vn_start_write(sc->vnode, &mp, V_WAIT);
|
||||
vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY, curthread);
|
||||
error = VOP_WRITE(sc->vnode, &auio, 0, sc->cred);
|
||||
error = VOP_WRITE(sc->vnode, &auio, IO_NOWDRAIN, sc->cred);
|
||||
vn_finished_write(mp);
|
||||
}
|
||||
VOP_UNLOCK(sc->vnode, 0, curthread);
|
||||
|
@ -758,11 +758,15 @@ bwrite(struct buf * bp)
|
||||
int rtval = bufwait(bp);
|
||||
brelse(bp);
|
||||
return (rtval);
|
||||
} else {
|
||||
} else if ((oldflags & B_NOWDRAIN) == 0) {
|
||||
/*
|
||||
* don't allow the async write to saturate the I/O
|
||||
* system. There is no chance of deadlock here because
|
||||
* we are blocking on I/O that is already in-progress.
|
||||
* system. Deadlocks can occur only if a device strategy
|
||||
* routine (like in MD) turns around and issues another
|
||||
* high-level write, in which case B_NOWDRAIN is expected
|
||||
* to be set. Otherwise we will not deadlock here because
|
||||
* we are blocking waiting for I/O that is already in-progress
|
||||
* to complete.
|
||||
*/
|
||||
waitrunningbufspace();
|
||||
}
|
||||
@ -1286,7 +1290,8 @@ brelse(struct buf * bp)
|
||||
|
||||
/* unlock */
|
||||
BUF_UNLOCK(bp);
|
||||
bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF | B_DIRECT);
|
||||
bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF |
|
||||
B_DIRECT | B_NOWDRAIN);
|
||||
bp->b_ioflags &= ~BIO_ORDERED;
|
||||
if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
|
||||
panic("brelse: not dirty");
|
||||
|
@ -836,7 +836,7 @@ cluster_wbuild(vp, size, start_lbn, len)
|
||||
bp->b_data = (char *)((vm_offset_t)bp->b_data |
|
||||
((vm_offset_t)tbp->b_data & PAGE_MASK));
|
||||
bp->b_flags |= B_CLUSTER |
|
||||
(tbp->b_flags & (B_VMIO | B_NEEDCOMMIT));
|
||||
(tbp->b_flags & (B_VMIO | B_NEEDCOMMIT | B_NOWDRAIN));
|
||||
bp->b_iodone = cluster_callback;
|
||||
pbgetvp(vp, bp);
|
||||
/*
|
||||
|
@ -961,6 +961,12 @@ again:
|
||||
}
|
||||
vfs_bio_set_validclean(bp, on, n);
|
||||
}
|
||||
/*
|
||||
* If IO_NOWDRAIN then set B_NOWDRAIN (nfs-backed MD
|
||||
* filesystem)
|
||||
*/
|
||||
if (ioflag & IO_NOWDRAIN)
|
||||
bp->b_flags |= B_NOWDRAIN;
|
||||
|
||||
/*
|
||||
* If IO_SYNC do bwrite().
|
||||
|
@ -192,6 +192,11 @@ struct buf {
|
||||
* the pages underlying the buffer. B_DIRECT is
|
||||
* sticky until the buffer is released and typically
|
||||
* only has an effect when B_RELBUF is also set.
|
||||
*
|
||||
* B_NOWDRAIN This flag should be set when a device (like MD)
|
||||
* does a turn-around VOP_WRITE from its strategy
|
||||
* routine. This flag prevents bwrite() from blocking
|
||||
* in wdrain, avoiding a deadlock situation.
|
||||
*/
|
||||
|
||||
#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
|
||||
@ -204,7 +209,7 @@ struct buf {
|
||||
#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
|
||||
#define B_DONE 0x00000200 /* I/O completed. */
|
||||
#define B_EINTR 0x00000400 /* I/O was interrupted */
|
||||
#define B_00000800 0x00000800 /* Available flag. */
|
||||
#define B_NOWDRAIN 0x00000800 /* Avoid wdrain deadlock */
|
||||
#define B_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */
|
||||
#define B_INVAL 0x00002000 /* Does not contain valid info. */
|
||||
#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
|
||||
|
@ -222,6 +222,7 @@ struct vattr {
|
||||
#define IO_INVAL 0x40 /* invalidate after I/O */
|
||||
#define IO_ASYNC 0x80 /* bawrite rather then bdwrite */
|
||||
#define IO_DIRECT 0x100 /* attempt to bypass buffer cache */
|
||||
#define IO_NOWDRAIN 0x200 /* do not block on wdrain */
|
||||
|
||||
/*
|
||||
* Modes. Some values same as Ixxx entries from inode.h for now.
|
||||
|
@ -511,6 +511,8 @@ WRITE(ap)
|
||||
break;
|
||||
if (ioflag & IO_DIRECT)
|
||||
bp->b_flags |= B_DIRECT;
|
||||
if (ioflag & IO_NOWDRAIN)
|
||||
bp->b_flags |= B_NOWDRAIN;
|
||||
|
||||
if (uio->uio_offset + xfersize > ip->i_size) {
|
||||
ip->i_size = uio->uio_offset + xfersize;
|
||||
|
Loading…
x
Reference in New Issue
Block a user