Handle errors from background write of the cylinder group blocks.

First, on the write error, bufdone() call from ffs_backgroundwrite()
panics because pbrelvp() cleared bp->b_bufobj, while brelse() would
try to re-dirty the copy of the cg buffer.  Handle this by setting
B_INVAL for the case of BIO_ERROR.

Second, we must re-dirty the real buffer containing the cylinder group
block data when background write failed.  Real cg buffer was already
marked clean in ffs_bufwrite(). After the BV_BKGRDINPROG flag is
cleared on the real cg buffer in ffs_backgroundwrite(), buffer scan
may reuse the buffer at any moment. The result is lost write, and if
the write error was only transient, we get corrupted bitmaps.

We cannot re-dirty the original cg buffer in the
ffs_backgroundwritedone(), since the context is not sleepable,
preventing us from sleeping for origbp' lock.  Add BV_BKGDERR flag
(protected by the buffer object lock), which is converted into delayed
write by brelse(), bqrelse() and buffer scan.

In collaboration with:	Conrad Meyer <cse.cem@gmail.com>
Reviewed by:	mckusick
Sponsored by:	The FreeBSD Foundation (kib),
	  EMC/Isilon storage division (Conrad)
MFC after:	2 weeks
This commit is contained in:
Konstantin Belousov 2015-06-27 09:44:14 +00:00
parent 5d62c861b5
commit b2c3df842b
3 changed files with 42 additions and 3 deletions

View File

@ -1597,6 +1597,12 @@ brelse(struct buf *bp)
return;
}
if ((bp->b_vflags & (BV_BKGRDINPROG | BV_BKGRDERR)) == BV_BKGRDERR) {
BO_LOCK(bp->b_bufobj);
bp->b_vflags &= ~BV_BKGRDERR;
BO_UNLOCK(bp->b_bufobj);
bdirty(bp);
}
if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
bp->b_error == EIO && !(bp->b_flags & B_INVAL)) {
/*
@ -1853,7 +1859,11 @@ bqrelse(struct buf *bp)
}
/* buffers with stale but valid contents */
if (bp->b_flags & B_DELWRI) {
if ((bp->b_flags & B_DELWRI) != 0 || (bp->b_vflags & (BV_BKGRDINPROG |
BV_BKGRDERR)) == BV_BKGRDERR) {
BO_LOCK(bp->b_bufobj);
bp->b_vflags &= ~BV_BKGRDERR;
BO_UNLOCK(bp->b_bufobj);
qindex = QUEUE_DIRTY;
} else {
if ((bp->b_flags & B_DELWRI) == 0 &&
@ -2372,6 +2382,16 @@ getnewbuf_scan(int maxsize, int defrag, int unmapped, int metadata)
continue;
}
/*
* Requeue the background write buffer with error.
*/
if ((bp->b_vflags & BV_BKGRDERR) != 0) {
bremfreel(bp);
mtx_unlock(&bqclean);
bqrelse(bp);
continue;
}
KASSERT(bp->b_qindex == qindex,
("getnewbuf: inconsistent queue %d bp %p", qindex, bp));

View File

@ -253,8 +253,9 @@ struct buf {
#define BV_SCANNED 0x00000001 /* VOP_FSYNC funcs mark written bufs */
#define BV_BKGRDINPROG 0x00000002 /* Background write in progress */
#define BV_BKGRDWAIT 0x00000004 /* Background write waiting */
#define BV_BKGRDERR 0x00000008 /* Error from background write */
#define PRINT_BUF_VFLAGS "\20\3bkgrdwait\2bkgrdinprog\1scanned"
#define PRINT_BUF_VFLAGS "\20\4bkgrderr\3bkgrdwait\2bkgrdinprog\1scanned"
#ifdef _KERNEL
/*

View File

@ -1978,12 +1978,19 @@ ffs_backgroundwritedone(struct buf *bp)
BO_LOCK(bufobj);
if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
panic("backgroundwritedone: lost buffer");
/*
* We should mark the cylinder group buffer origbp as
* dirty, to not loose the failed write.
*/
if ((bp->b_ioflags & BIO_ERROR) != 0)
origbp->b_vflags |= BV_BKGRDERR;
BO_UNLOCK(bufobj);
/*
* Process dependencies then return any unfinished ones.
*/
pbrelvp(bp);
if (!LIST_EMPTY(&bp->b_dep))
if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0)
buf_complete(bp);
#ifdef SOFTUPDATES
if (!LIST_EMPTY(&bp->b_dep))
@ -1995,6 +2002,15 @@ ffs_backgroundwritedone(struct buf *bp)
*/
bp->b_flags |= B_NOCACHE;
bp->b_flags &= ~B_CACHE;
/*
* Prevent brelse() from trying to keep and re-dirtying bp on
* errors. It causes b_bufobj dereference in
* bdirty()/reassignbuf(), and b_bufobj was cleared in
* pbrelvp() above.
*/
if ((bp->b_ioflags & BIO_ERROR) != 0)
bp->b_flags |= B_INVAL;
bufdone(bp);
BO_LOCK(bufobj);
/*
@ -2056,6 +2072,8 @@ ffs_bufwrite(struct buf *bp)
if (bp->b_vflags & BV_BKGRDINPROG)
panic("bufwrite: still writing");
}
if ((bp->b_vflags & BV_BKGRDERR) != 0)
bp->b_vflags &= ~BV_BKGRDERR;
BO_UNLOCK(bp->b_bufobj);
/*