- Eliminate the acquisition and release of the bqlock in bremfree() by
setting the B_REMFREE flag in the buf. This is done to prevent lock order reversals with code that must call bremfree() with a local lock held. This also reduces overhead by removing two lock operations per buf for fsync() and similar. - Check for the B_REMFREE flag in brelse() and bqrelse() after the bqlock has been acquired so that we may remove ourself from the free-list. - Provide a bremfreef() function to immediately remove a buf from a free-list for use only by NFS. This is done because the nfsclient code overloads the b_freelist queue for its own async. io queue. - Simplify the numfreebuffers accounting by removing a switch statement that executed the same code in every possible case. - getnewbuf() can encounter locked bufs on free-lists once Giant is removed. Remove a panic associated with this condition and delay asserts that inspect the buf until after it is locked. Reviewed by: phk Sponsored by: Isilon Systems, Inc.
This commit is contained in:
parent
c31e6a8dc8
commit
b646893f0f
@ -645,24 +645,49 @@ bfreekva(struct buf *bp)
|
||||
/*
|
||||
* bremfree:
|
||||
*
|
||||
* Remove the buffer from the appropriate free list.
|
||||
* Mark the buffer for removal from the appropriate free list in brelse.
|
||||
*
|
||||
*/
|
||||
void
|
||||
bremfree(struct buf *bp)
|
||||
{
|
||||
|
||||
KASSERT(BUF_REFCNT(bp), ("bremfree: buf must be locked."));
|
||||
KASSERT((bp->b_flags & B_REMFREE) == 0 && bp->b_qindex != QUEUE_NONE,
|
||||
("bremfree: buffer not on a queue."));
|
||||
|
||||
bp->b_flags |= B_REMFREE;
|
||||
/* Fixup numfreebuffers count. */
|
||||
if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0)
|
||||
atomic_subtract_int(&numfreebuffers, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* bremfreef:
|
||||
*
|
||||
* Force an immediate removal from a free list. Used only in nfs when
|
||||
* it abuses the b_freelist pointer.
|
||||
*/
|
||||
void
|
||||
bremfreef(struct buf *bp)
|
||||
{
|
||||
mtx_lock(&bqlock);
|
||||
bremfreel(bp);
|
||||
mtx_unlock(&bqlock);
|
||||
}
|
||||
|
||||
/*
|
||||
* bremfreel:
|
||||
*
|
||||
* Removes a buffer from the free list, must be called with the
|
||||
* bqlock held.
|
||||
*/
|
||||
void
|
||||
bremfreel(struct buf *bp)
|
||||
{
|
||||
int s = splbio();
|
||||
int old_qindex = bp->b_qindex;
|
||||
|
||||
GIANT_REQUIRED;
|
||||
mtx_assert(&bqlock, MA_OWNED);
|
||||
|
||||
if (bp->b_qindex != QUEUE_NONE) {
|
||||
KASSERT(BUF_REFCNT(bp) == 1, ("bremfree: bp %p not locked",bp));
|
||||
@ -672,24 +697,22 @@ bremfreel(struct buf *bp)
|
||||
if (BUF_REFCNT(bp) <= 1)
|
||||
panic("bremfree: removing a buffer not on a queue");
|
||||
}
|
||||
|
||||
/*
|
||||
* If this was a delayed bremfree() we only need to remove the buffer
|
||||
* from the queue and return the stats are already done.
|
||||
*/
|
||||
if (bp->b_flags & B_REMFREE) {
|
||||
bp->b_flags &= ~B_REMFREE;
|
||||
splx(s);
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Fixup numfreebuffers count. If the buffer is invalid or not
|
||||
* delayed-write, and it was on the EMPTY, LRU, or AGE queues,
|
||||
* the buffer was free and we must decrement numfreebuffers.
|
||||
* delayed-write, the buffer was free and we must decrement
|
||||
* numfreebuffers.
|
||||
*/
|
||||
if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0) {
|
||||
switch(old_qindex) {
|
||||
case QUEUE_DIRTY:
|
||||
case QUEUE_CLEAN:
|
||||
case QUEUE_EMPTY:
|
||||
case QUEUE_EMPTYKVA:
|
||||
atomic_subtract_int(&numfreebuffers, 1);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0)
|
||||
atomic_subtract_int(&numfreebuffers, 1);
|
||||
splx(s);
|
||||
}
|
||||
|
||||
@ -1105,7 +1128,7 @@ bdirty(struct buf *bp)
|
||||
{
|
||||
|
||||
KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
|
||||
KASSERT(bp->b_qindex == QUEUE_NONE,
|
||||
KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE,
|
||||
("bdirty: buffer %p still on queue %d", bp, bp->b_qindex));
|
||||
bp->b_flags &= ~(B_RELBUF);
|
||||
bp->b_iocmd = BIO_WRITE;
|
||||
@ -1135,7 +1158,7 @@ bundirty(struct buf *bp)
|
||||
{
|
||||
|
||||
KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
|
||||
KASSERT(bp->b_qindex == QUEUE_NONE,
|
||||
KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE,
|
||||
("bundirty: buffer %p still on queue %d", bp, bp->b_qindex));
|
||||
|
||||
if (bp->b_flags & B_DELWRI) {
|
||||
@ -1398,8 +1421,6 @@ brelse(struct buf *bp)
|
||||
|
||||
}
|
||||
|
||||
if (bp->b_qindex != QUEUE_NONE)
|
||||
panic("brelse: free buffer onto another queue???");
|
||||
if (BUF_REFCNT(bp) > 1) {
|
||||
/* do not release to free list */
|
||||
BUF_UNLOCK(bp);
|
||||
@ -1409,6 +1430,11 @@ brelse(struct buf *bp)
|
||||
|
||||
/* enqueue */
|
||||
mtx_lock(&bqlock);
|
||||
/* Handle delayed bremfree() processing. */
|
||||
if (bp->b_flags & B_REMFREE)
|
||||
bremfreel(bp);
|
||||
if (bp->b_qindex != QUEUE_NONE)
|
||||
panic("brelse: free buffer onto another queue???");
|
||||
|
||||
/* buffers with no memory */
|
||||
if (bp->b_bufsize == 0) {
|
||||
@ -1502,8 +1528,6 @@ bqrelse(struct buf *bp)
|
||||
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
|
||||
("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
|
||||
|
||||
if (bp->b_qindex != QUEUE_NONE)
|
||||
panic("bqrelse: free buffer onto another queue???");
|
||||
if (BUF_REFCNT(bp) > 1) {
|
||||
/* do not release to free list */
|
||||
BUF_UNLOCK(bp);
|
||||
@ -1511,6 +1535,11 @@ bqrelse(struct buf *bp)
|
||||
return;
|
||||
}
|
||||
mtx_lock(&bqlock);
|
||||
/* Handle delayed bremfree() processing. */
|
||||
if (bp->b_flags & B_REMFREE)
|
||||
bremfreel(bp);
|
||||
if (bp->b_qindex != QUEUE_NONE)
|
||||
panic("bqrelse: free buffer onto another queue???");
|
||||
/* buffers with stale but valid contents */
|
||||
if (bp->b_flags & B_DELWRI) {
|
||||
bp->b_qindex = QUEUE_DIRTY;
|
||||
@ -1853,18 +1882,6 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
|
||||
BO_UNLOCK(bp->b_bufobj);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanity Checks
|
||||
*/
|
||||
KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp));
|
||||
|
||||
/*
|
||||
* Note: we no longer distinguish between VMIO and non-VMIO
|
||||
* buffers.
|
||||
*/
|
||||
|
||||
KASSERT((bp->b_flags & B_DELWRI) == 0, ("delwri buffer %p found in queue %d", bp, qindex));
|
||||
|
||||
/*
|
||||
* If we are defragging then we need a buffer with
|
||||
* b_kvasize != 0. XXX this situation should no longer
|
||||
@ -1880,9 +1897,20 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
|
||||
* Start freeing the bp. This is somewhat involved. nbp
|
||||
* remains valid only for QUEUE_EMPTY[KVA] bp's.
|
||||
*/
|
||||
|
||||
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
|
||||
panic("getnewbuf: locked buf");
|
||||
continue;
|
||||
/*
|
||||
* Sanity Checks
|
||||
*/
|
||||
KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp));
|
||||
|
||||
/*
|
||||
* Note: we no longer distinguish between VMIO and non-VMIO
|
||||
* buffers.
|
||||
*/
|
||||
|
||||
KASSERT((bp->b_flags & B_DELWRI) == 0, ("delwri buffer %p found in queue %d", bp, qindex));
|
||||
|
||||
bremfreel(bp);
|
||||
mtx_unlock(&bqlock);
|
||||
|
||||
|
@ -1241,6 +1241,8 @@ nfs_asyncio(struct nfsmount *nmp, struct buf *bp, struct ucred *cred, struct thr
|
||||
bp->b_wcred = crhold(cred);
|
||||
}
|
||||
|
||||
if (bp->b_flags & B_REMFREE)
|
||||
bremfreef(bp);
|
||||
BUF_KERNPROC(bp);
|
||||
TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
|
||||
nmp->nm_bufqlen++;
|
||||
|
@ -219,7 +219,7 @@ struct buf {
|
||||
#define B_RAM 0x10000000 /* Read ahead mark (flag) */
|
||||
#define B_VMIO 0x20000000 /* VMIO flag */
|
||||
#define B_CLUSTER 0x40000000 /* pagein op, so swap() can count it */
|
||||
#define B_80000000 0x80000000 /* Available flag. */
|
||||
#define B_REMFREE 0x80000000 /* Delayed bremfree */
|
||||
|
||||
#define PRINT_BUF_FLAGS "\20\40b31\37cluster\36vmio\35ram\34b27" \
|
||||
"\33paging\32b25\31b24\30b23\27relbuf\26dirty\25b20" \
|
||||
@ -485,6 +485,7 @@ void bufinit(void);
|
||||
void bwillwrite(void);
|
||||
int buf_dirty_count_severe(void);
|
||||
void bremfree(struct buf *);
|
||||
void bremfreef(struct buf *); /* XXX Force bremfree, only for nfs. */
|
||||
int bread(struct vnode *, daddr_t, int, struct ucred *, struct buf **);
|
||||
int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
|
||||
struct ucred *, struct buf **);
|
||||
|
@ -208,14 +208,15 @@ ffs_fsync(ap)
|
||||
continue;
|
||||
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
|
||||
continue;
|
||||
VI_UNLOCK(vp);
|
||||
if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
|
||||
(bp->b_flags & B_DEFERRED) == 0 &&
|
||||
buf_countdeps(bp, 0)) {
|
||||
bp->b_flags |= B_DEFERRED;
|
||||
BUF_UNLOCK(bp);
|
||||
VI_LOCK(vp);
|
||||
continue;
|
||||
}
|
||||
VI_UNLOCK(vp);
|
||||
if ((bp->b_flags & B_DELWRI) == 0)
|
||||
panic("ffs_fsync: not dirty");
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user