diff --git a/sys/geom/geom_vfs.c b/sys/geom/geom_vfs.c index afc65499c56e..bbed550df062 100644 --- a/sys/geom/geom_vfs.c +++ b/sys/geom/geom_vfs.c @@ -192,6 +192,10 @@ g_vfs_strategy(struct bufobj *bo, struct buf *bp) bip->bio_done = g_vfs_done; bip->bio_caller2 = bp; bip->bio_length = bp->b_bcount; + if (bp->b_flags & B_BARRIER) { + bip->bio_flags |= BIO_ORDERED; + bp->b_flags &= ~B_BARRIER; + } g_io_request(bip, cp); } diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 96c844252a3b..63933991e180 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -206,6 +206,9 @@ SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0, static long notbufdflashes; SYSCTL_LONG(_vfs, OID_AUTO, notbufdflashes, CTLFLAG_RD, ¬bufdflashes, 0, "Number of dirty buffer flushes done by the bufdaemon helpers"); +static long barrierwrites; +SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0, + "Number of barrier writes"); /* * Wakeup point for bufdaemon, as well as indicator of whether it is already @@ -888,6 +891,9 @@ bufwrite(struct buf *bp) return (0); } + if (bp->b_flags & B_BARRIER) + barrierwrites++; + oldflags = bp->b_flags; BUF_ASSERT_HELD(bp); @@ -1007,6 +1013,8 @@ bdwrite(struct buf *bp) CTR3(KTR_BUF, "bdwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); + KASSERT((bp->b_flags & B_BARRIER) == 0, + ("Barrier request in delayed write %p", bp)); BUF_ASSERT_HELD(bp); if (bp->b_flags & B_INVAL) { @@ -1166,6 +1174,40 @@ bawrite(struct buf *bp) (void) bwrite(bp); } +/* + * babarrierwrite: + * + * Asynchronous barrier write. Start output on a buffer, but do not + * wait for it to complete. Place a write barrier after this write so + * that this buffer and all buffers written before it are committed to + * the disk before any buffers written after this write are committed + * to the disk. The buffer is released when the output completes. + */ +void +babarrierwrite(struct buf *bp) +{ + + bp->b_flags |= B_ASYNC | B_BARRIER; + (void) bwrite(bp); +} + +/* + * bbarrierwrite: + * + * Synchronous barrier write. Start output on a buffer and wait for + * it to complete. Place a write barrier after this write so that + * this buffer and all buffers written before it are committed to + * the disk before any buffers written after this write are committed + * to the disk. The buffer is released when the output completes. + */ +int +bbarrierwrite(struct buf *bp) +{ + + bp->b_flags |= B_BARRIER; + return (bwrite(bp)); +} + /* * bwillwrite: * diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 19ee05f9d702..663b66f3e639 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -944,11 +944,17 @@ cluster_wbuild(vp, size, start_lbn, len) } bp->b_bcount += size; bp->b_bufsize += size; - bundirty(tbp); - tbp->b_flags &= ~B_DONE; - tbp->b_ioflags &= ~BIO_ERROR; + /* + * If any of the clustered buffers have their + * B_BARRIER flag set, transfer that request to + * the cluster. + */ + bp->b_flags |= (tbp->b_flags & B_BARRIER); + tbp->b_flags &= ~(B_DONE | B_BARRIER); tbp->b_flags |= B_ASYNC; + tbp->b_ioflags &= ~BIO_ERROR; tbp->b_iocmd = BIO_WRITE; + bundirty(tbp); reassignbuf(tbp); /* put on clean list */ bufobj_wref(tbp->b_bufobj); BUF_KERNPROC(tbp); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 418d6c5c3561..672ef5a10038 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -205,7 +205,7 @@ struct buf { #define B_00000800 0x00000800 /* Available flag. */ #define B_00001000 0x00001000 /* Available flag. */ #define B_INVAL 0x00002000 /* Does not contain valid info. */ -#define B_00004000 0x00004000 /* Available flag. */ +#define B_BARRIER 0x00004000 /* Write this and all preceeding first. */ #define B_NOCACHE 0x00008000 /* Do not cache block after use. */ #define B_MALLOC 0x00010000 /* malloced b_data */ #define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */ @@ -488,6 +488,8 @@ int breadn_flags(struct vnode *, daddr_t, int, daddr_t *, int *, int, void breada(struct vnode *, daddr_t *, int *, int, struct ucred *); void bdwrite(struct buf *); void bawrite(struct buf *); +void babarrierwrite(struct buf *); +int bbarrierwrite(struct buf *); void bdirty(struct buf *); void bundirty(struct buf *); void bufstrategy(struct bufobj *, struct buf *);