Prepare to replace the buf splay with a trie:

- Don't insert BKGRDMARKER bufs into the splay or dirty/clean buf lists.
   No consumers need to find them there and it complicates the tree.
   These flags are all FFS specific and could be moved out of the buf
   cache.
 - Use pbgetvp() and pbrelvp() to associate the background and journal
   bufs with the vp.  Not only is this much cheaper it makes more sense
   for these transient bufs.
 - Fix the assertions in pbget* and pbrel*.  It's not safe to check list
   pointers which were never initialized.  Use the BX flags instead.  We
   also check B_PAGING in reassignbuf() so this should cover all cases.

Discussed with:	kib, mckusick, attilio
Sponsored by:	EMC / Isilon Storage Division
This commit is contained in:
Jeff Roberson 2013-04-06 22:21:23 +00:00
parent e36a4a7ea0
commit 26089666b6
5 changed files with 31 additions and 68 deletions

View File

@ -794,8 +794,6 @@ ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len)
goto fail_lock;
bbp = (char *)bp->b_data;
bp->b_xflags |= BX_BKGRDWRITE;
EXT2_LOCK(ump);
/*
* Check to see if a cluster of the needed size (or bigger) is

View File

@ -1312,8 +1312,7 @@ flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag,
xflags = 0;
if (nbp != NULL) {
lblkno = nbp->b_lblkno;
xflags = nbp->b_xflags &
(BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN);
xflags = nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN);
}
retval = EAGAIN;
error = BUF_TIMELOCK(bp,
@ -1357,8 +1356,7 @@ flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag,
if (nbp != NULL &&
(nbp->b_bufobj != bo ||
nbp->b_lblkno != lblkno ||
(nbp->b_xflags &
(BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN)) != xflags))
(nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) != xflags))
break; /* nbp invalid */
}
return (retval);
@ -1501,9 +1499,7 @@ buf_splay(daddr_t lblkno, b_xflags_t xflags, struct buf *root)
return (NULL);
lefttreemax = righttreemin = &dummy;
for (;;) {
if (lblkno < root->b_lblkno ||
(lblkno == root->b_lblkno &&
(xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) {
if (lblkno < root->b_lblkno) {
if ((y = root->b_left) == NULL)
break;
if (lblkno < y->b_lblkno) {
@ -1517,9 +1513,7 @@ buf_splay(daddr_t lblkno, b_xflags_t xflags, struct buf *root)
/* Link into the new root's right tree. */
righttreemin->b_left = root;
righttreemin = root;
} else if (lblkno > root->b_lblkno ||
(lblkno == root->b_lblkno &&
(xflags & BX_BKGRDMARKER) > (root->b_xflags & BX_BKGRDMARKER))) {
} else if (lblkno > root->b_lblkno) {
if ((y = root->b_right) == NULL)
break;
if (lblkno > y->b_lblkno) {
@ -1603,9 +1597,7 @@ buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags)
bp->b_left = NULL;
bp->b_right = NULL;
TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs);
} else if (bp->b_lblkno < root->b_lblkno ||
(bp->b_lblkno == root->b_lblkno &&
(bp->b_xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) {
} else if (bp->b_lblkno < root->b_lblkno) {
bp->b_left = root->b_left;
bp->b_right = root;
root->b_left = NULL;
@ -1638,20 +1630,18 @@ gbincore(struct bufobj *bo, daddr_t lblkno)
struct buf *bp;
ASSERT_BO_LOCKED(bo);
if ((bp = bo->bo_clean.bv_root) != NULL &&
bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER))
if ((bp = bo->bo_clean.bv_root) != NULL && bp->b_lblkno == lblkno)
return (bp);
if ((bp = bo->bo_dirty.bv_root) != NULL &&
bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER))
if ((bp = bo->bo_dirty.bv_root) != NULL && bp->b_lblkno == lblkno)
return (bp);
if ((bp = bo->bo_clean.bv_root) != NULL) {
bo->bo_clean.bv_root = bp = buf_splay(lblkno, 0, bp);
if (bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER))
if (bp->b_lblkno == lblkno)
return (bp);
}
if ((bp = bo->bo_dirty.bv_root) != NULL) {
bo->bo_dirty.bv_root = bp = buf_splay(lblkno, 0, bp);
if (bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER))
if (bp->b_lblkno == lblkno)
return (bp);
}
return (NULL);

View File

@ -3285,7 +3285,6 @@ softdep_process_journal(mp, needwk, flags)
bp->b_lblkno = bp->b_blkno;
bp->b_offset = bp->b_blkno * DEV_BSIZE;
bp->b_bcount = size;
bp->b_bufobj = &ump->um_devvp->v_bufobj;
bp->b_flags &= ~B_INVAL;
bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY;
/*
@ -3365,9 +3364,7 @@ softdep_process_journal(mp, needwk, flags)
jblocks->jb_needseg = 0;
WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
FREE_LOCK(&lk);
BO_LOCK(bp->b_bufobj);
bgetvp(ump->um_devvp, bp);
BO_UNLOCK(bp->b_bufobj);
pbgetvp(ump->um_devvp, bp);
/*
* We only do the blocking wait once we find the journal
* entry we're looking for.
@ -3522,6 +3519,7 @@ handle_written_jseg(jseg, bp)
* discarded.
*/
bp->b_flags |= B_INVAL | B_NOCACHE;
pbrelvp(bp);
complete_jsegs(jseg);
}
@ -11450,6 +11448,7 @@ handle_written_bmsafemap(bmsafemap, bp)
struct cg *cgp;
struct fs *fs;
ino_t ino;
int foreground;
int chgs;
if ((bmsafemap->sm_state & IOSTARTED) == 0)
@ -11457,6 +11456,7 @@ handle_written_bmsafemap(bmsafemap, bp)
ump = VFSTOUFS(bmsafemap->sm_list.wk_mp);
chgs = 0;
bmsafemap->sm_state &= ~IOSTARTED;
foreground = (bp->b_xflags & BX_BKGRDMARKER) == 0;
/*
* Release journal work that was waiting on the write.
*/
@ -11477,7 +11477,8 @@ handle_written_bmsafemap(bmsafemap, bp)
if (isset(inosused, ino))
panic("handle_written_bmsafemap: "
"re-allocated inode");
if ((bp->b_xflags & BX_BKGRDMARKER) == 0) {
/* Do the roll-forward only if it's a real copy. */
if (foreground) {
if ((jaddref->ja_mode & IFMT) == IFDIR)
cgp->cg_cs.cs_ndir++;
cgp->cg_cs.cs_nifree--;
@ -11500,7 +11501,8 @@ handle_written_bmsafemap(bmsafemap, bp)
jntmp) {
if ((jnewblk->jn_state & UNDONE) == 0)
continue;
if ((bp->b_xflags & BX_BKGRDMARKER) == 0 &&
/* Do the roll-forward only if it's a real copy. */
if (foreground &&
jnewblk_rollforward(jnewblk, fs, cgp, blksfree))
chgs = 1;
jnewblk->jn_state &= ~(UNDONE | NEWBLOCK);
@ -11540,6 +11542,7 @@ handle_written_bmsafemap(bmsafemap, bp)
return (0);
}
LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
if (foreground)
bdirty(bp);
return (1);
}

View File

@ -2000,12 +2000,11 @@ ffs_backgroundwritedone(struct buf *bp)
BO_LOCK(bufobj);
if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL)
panic("backgroundwritedone: lost buffer");
/* Grab an extra reference to be dropped by the bufdone() below. */
bufobj_wrefl(bufobj);
BO_UNLOCK(bufobj);
/*
* Process dependencies then return any unfinished ones.
*/
pbrelvp(bp);
if (!LIST_EMPTY(&bp->b_dep))
buf_complete(bp);
#ifdef SOFTUPDATES
@ -2051,8 +2050,8 @@ ffs_backgroundwritedone(struct buf *bp)
static int
ffs_bufwrite(struct buf *bp)
{
int oldflags, s;
struct buf *newbp;
int oldflags;
CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
if (bp->b_flags & B_INVAL) {
@ -2064,7 +2063,6 @@ ffs_bufwrite(struct buf *bp)
if (!BUF_ISLOCKED(bp))
panic("bufwrite: buffer is not busy???");
s = splbio();
/*
* If a background write is already in progress, delay
* writing this block if it is asynchronous. Otherwise
@ -2074,7 +2072,6 @@ ffs_bufwrite(struct buf *bp)
if (bp->b_vflags & BV_BKGRDINPROG) {
if (bp->b_flags & B_ASYNC) {
BO_UNLOCK(bp->b_bufobj);
splx(s);
bdwrite(bp);
return (0);
}
@ -2105,25 +2102,19 @@ ffs_bufwrite(struct buf *bp)
if (newbp == NULL)
goto normal_write;
/*
* set it to be identical to the old block. We have to
* set b_lblkno and BKGRDMARKER before calling bgetvp()
* to avoid confusing the splay tree and gbincore().
*/
KASSERT((bp->b_flags & B_UNMAPPED) == 0, ("Unmapped cg"));
memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
newbp->b_lblkno = bp->b_lblkno;
newbp->b_xflags |= BX_BKGRDMARKER;
BO_LOCK(bp->b_bufobj);
bp->b_vflags |= BV_BKGRDINPROG;
bgetvp(bp->b_vp, newbp);
BO_UNLOCK(bp->b_bufobj);
newbp->b_bufobj = &bp->b_vp->v_bufobj;
newbp->b_xflags |= BX_BKGRDMARKER;
newbp->b_lblkno = bp->b_lblkno;
newbp->b_blkno = bp->b_blkno;
newbp->b_offset = bp->b_offset;
newbp->b_iodone = ffs_backgroundwritedone;
newbp->b_flags |= B_ASYNC;
newbp->b_flags &= ~B_INVAL;
pbgetvp(bp->b_vp, newbp);
#ifdef SOFTUPDATES
/*
@ -2139,12 +2130,9 @@ ffs_bufwrite(struct buf *bp)
#endif
/*
* Initiate write on the copy, release the original to
* the B_LOCKED queue so that it cannot go away until
* the background write completes. If not locked it could go
* away and then be reconstituted while it was being written.
* If the reconstituted buffer were written, we could end up
* with two background copies being written at the same time.
* Initiate write on the copy, release the original. The
* BKGRDINPROG flag prevents it from going away until
* the background write completes.
*/
bqrelse(bp);
bp = newbp;

View File

@ -469,17 +469,9 @@ pbrelvp(struct buf *bp)
KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
KASSERT(bp->b_bufobj != NULL, ("pbrelvp: NULL bufobj"));
KASSERT((bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) == 0,
("pbrelvp: pager buf on vnode list."));
/* XXX REMOVE ME */
BO_LOCK(bp->b_bufobj);
if (TAILQ_NEXT(bp, b_bobufs) != NULL) {
panic(
"relpbuf(): b_vp was probably reassignbuf()d %p %x",
bp,
(int)bp->b_flags
);
}
BO_UNLOCK(bp->b_bufobj);
bp->b_vp = NULL;
bp->b_bufobj = NULL;
bp->b_flags &= ~B_PAGING;
@ -494,17 +486,9 @@ pbrelbo(struct buf *bp)
KASSERT(bp->b_vp == NULL, ("pbrelbo: vnode"));
KASSERT(bp->b_bufobj != NULL, ("pbrelbo: NULL bufobj"));
KASSERT((bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) == 0,
("pbrelbo: pager buf on vnode list."));
/* XXX REMOVE ME */
BO_LOCK(bp->b_bufobj);
if (TAILQ_NEXT(bp, b_bobufs) != NULL) {
panic(
"relpbuf(): b_vp was probably reassignbuf()d %p %x",
bp,
(int)bp->b_flags
);
}
BO_UNLOCK(bp->b_bufobj);
bp->b_bufobj = NULL;
bp->b_flags &= ~B_PAGING;
}