- Eliminate the acquisition and release of the bqlock in bremfree() by

setting the B_REMFREE flag in the buf.  This is done to prevent lock order
   reversals with code that must call bremfree() with a local lock held.
   This also reduces overhead by removing two lock operations per buf for
   fsync() and similar.
 - Check for the B_REMFREE flag in brelse() and bqrelse() after the bqlock
   has been acquired so that we may remove ourself from the free-list.
 - Provide a bremfreef() function to immediately remove a buf from a
   free-list for use only by NFS.  This is done because the nfsclient code
   overloads the b_freelist queue for its own async. io queue.
 - Simplify the numfreebuffers accounting by removing a switch statement
   that executed the same code in every possible case.
 - getnewbuf() can encounter locked bufs on free-lists once Giant is removed.
   Remove a panic associated with this condition and delay asserts that
   inspect the buf until after it is locked.

Reviewed by:	phk
Sponsored by:	Isilon Systems, Inc.
This commit is contained in:
Jeff Roberson 2004-11-18 08:44:09 +00:00
parent c31e6a8dc8
commit b646893f0f
4 changed files with 72 additions and 40 deletions

View File

@ -645,24 +645,49 @@ bfreekva(struct buf *bp)
/*
* bremfree:
*
* Remove the buffer from the appropriate free list.
* Mark the buffer for removal from the appropriate free list in brelse.
*
*/
void
bremfree(struct buf *bp)
{
KASSERT(BUF_REFCNT(bp), ("bremfree: buf must be locked."));
KASSERT((bp->b_flags & B_REMFREE) == 0 && bp->b_qindex != QUEUE_NONE,
("bremfree: buffer not on a queue."));
bp->b_flags |= B_REMFREE;
/* Fixup numfreebuffers count. */
if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0)
atomic_subtract_int(&numfreebuffers, 1);
}
/*
* bremfreef:
*
* Force an immediate removal from a free list. Used only in nfs when
* it abuses the b_freelist pointer.
*/
void
bremfreef(struct buf *bp)
{
mtx_lock(&bqlock);
bremfreel(bp);
mtx_unlock(&bqlock);
}
/*
* bremfreel:
*
* Removes a buffer from the free list, must be called with the
* bqlock held.
*/
void
bremfreel(struct buf *bp)
{
int s = splbio();
int old_qindex = bp->b_qindex;
GIANT_REQUIRED;
mtx_assert(&bqlock, MA_OWNED);
if (bp->b_qindex != QUEUE_NONE) {
KASSERT(BUF_REFCNT(bp) == 1, ("bremfree: bp %p not locked",bp));
@ -672,24 +697,22 @@ bremfreel(struct buf *bp)
if (BUF_REFCNT(bp) <= 1)
panic("bremfree: removing a buffer not on a queue");
}
/*
* If this was a delayed bremfree() we only need to remove the buffer
* from the queue and return the stats are already done.
*/
if (bp->b_flags & B_REMFREE) {
bp->b_flags &= ~B_REMFREE;
splx(s);
return;
}
/*
* Fixup numfreebuffers count. If the buffer is invalid or not
* delayed-write, and it was on the EMPTY, LRU, or AGE queues,
* the buffer was free and we must decrement numfreebuffers.
* delayed-write, the buffer was free and we must decrement
* numfreebuffers.
*/
if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0) {
switch(old_qindex) {
case QUEUE_DIRTY:
case QUEUE_CLEAN:
case QUEUE_EMPTY:
case QUEUE_EMPTYKVA:
atomic_subtract_int(&numfreebuffers, 1);
break;
default:
break;
}
}
if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0)
atomic_subtract_int(&numfreebuffers, 1);
splx(s);
}
@ -1105,7 +1128,7 @@ bdirty(struct buf *bp)
{
KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
KASSERT(bp->b_qindex == QUEUE_NONE,
KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE,
("bdirty: buffer %p still on queue %d", bp, bp->b_qindex));
bp->b_flags &= ~(B_RELBUF);
bp->b_iocmd = BIO_WRITE;
@ -1135,7 +1158,7 @@ bundirty(struct buf *bp)
{
KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp));
KASSERT(bp->b_qindex == QUEUE_NONE,
KASSERT(bp->b_flags & B_REMFREE || bp->b_qindex == QUEUE_NONE,
("bundirty: buffer %p still on queue %d", bp, bp->b_qindex));
if (bp->b_flags & B_DELWRI) {
@ -1398,8 +1421,6 @@ brelse(struct buf *bp)
}
if (bp->b_qindex != QUEUE_NONE)
panic("brelse: free buffer onto another queue???");
if (BUF_REFCNT(bp) > 1) {
/* do not release to free list */
BUF_UNLOCK(bp);
@ -1409,6 +1430,11 @@ brelse(struct buf *bp)
/* enqueue */
mtx_lock(&bqlock);
/* Handle delayed bremfree() processing. */
if (bp->b_flags & B_REMFREE)
bremfreel(bp);
if (bp->b_qindex != QUEUE_NONE)
panic("brelse: free buffer onto another queue???");
/* buffers with no memory */
if (bp->b_bufsize == 0) {
@ -1502,8 +1528,6 @@ bqrelse(struct buf *bp)
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
if (bp->b_qindex != QUEUE_NONE)
panic("bqrelse: free buffer onto another queue???");
if (BUF_REFCNT(bp) > 1) {
/* do not release to free list */
BUF_UNLOCK(bp);
@ -1511,6 +1535,11 @@ bqrelse(struct buf *bp)
return;
}
mtx_lock(&bqlock);
/* Handle delayed bremfree() processing. */
if (bp->b_flags & B_REMFREE)
bremfreel(bp);
if (bp->b_qindex != QUEUE_NONE)
panic("bqrelse: free buffer onto another queue???");
/* buffers with stale but valid contents */
if (bp->b_flags & B_DELWRI) {
bp->b_qindex = QUEUE_DIRTY;
@ -1853,18 +1882,6 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
BO_UNLOCK(bp->b_bufobj);
}
/*
* Sanity Checks
*/
KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp));
/*
* Note: we no longer distinguish between VMIO and non-VMIO
* buffers.
*/
KASSERT((bp->b_flags & B_DELWRI) == 0, ("delwri buffer %p found in queue %d", bp, qindex));
/*
* If we are defragging then we need a buffer with
* b_kvasize != 0. XXX this situation should no longer
@ -1880,9 +1897,20 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
* Start freeing the bp. This is somewhat involved. nbp
* remains valid only for QUEUE_EMPTY[KVA] bp's.
*/
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0)
panic("getnewbuf: locked buf");
continue;
/*
* Sanity Checks
*/
KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp));
/*
* Note: we no longer distinguish between VMIO and non-VMIO
* buffers.
*/
KASSERT((bp->b_flags & B_DELWRI) == 0, ("delwri buffer %p found in queue %d", bp, qindex));
bremfreel(bp);
mtx_unlock(&bqlock);

View File

@ -1241,6 +1241,8 @@ nfs_asyncio(struct nfsmount *nmp, struct buf *bp, struct ucred *cred, struct thr
bp->b_wcred = crhold(cred);
}
if (bp->b_flags & B_REMFREE)
bremfreef(bp);
BUF_KERNPROC(bp);
TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
nmp->nm_bufqlen++;

View File

@ -219,7 +219,7 @@ struct buf {
#define B_RAM 0x10000000 /* Read ahead mark (flag) */
#define B_VMIO 0x20000000 /* VMIO flag */
#define B_CLUSTER 0x40000000 /* pagein op, so swap() can count it */
#define B_80000000 0x80000000 /* Available flag. */
#define B_REMFREE 0x80000000 /* Delayed bremfree */
#define PRINT_BUF_FLAGS "\20\40b31\37cluster\36vmio\35ram\34b27" \
"\33paging\32b25\31b24\30b23\27relbuf\26dirty\25b20" \
@ -485,6 +485,7 @@ void bufinit(void);
void bwillwrite(void);
int buf_dirty_count_severe(void);
void bremfree(struct buf *);
void bremfreef(struct buf *); /* XXX Force bremfree, only for nfs. */
int bread(struct vnode *, daddr_t, int, struct ucred *, struct buf **);
int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
struct ucred *, struct buf **);

View File

@ -208,14 +208,15 @@ ffs_fsync(ap)
continue;
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
continue;
VI_UNLOCK(vp);
if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
(bp->b_flags & B_DEFERRED) == 0 &&
buf_countdeps(bp, 0)) {
bp->b_flags |= B_DEFERRED;
BUF_UNLOCK(bp);
VI_LOCK(vp);
continue;
}
VI_UNLOCK(vp);
if ((bp->b_flags & B_DELWRI) == 0)
panic("ffs_fsync: not dirty");
/*