- Complete part of the unfinished bufobj work by consistently using

BO_LOCK/UNLOCK/MTX when manipulating the bufobj.
 - Create a new lock in the bufobj to lock bufobj fields independently.
   This leaves the vnode interlock as an 'identity' lock while the bufobj
   is an io lock.  The bufobj lock is ordered before the vnode interlock
   and also before the mnt ilock.
 - Exploit this new lock order to simplify softdep_check_suspend().
 - A few sync related functions are marked with a new XXX to note that
   we may not properly interlock against a non-zero bv_cnt when
   attempting to sync all vnodes on a mountlist.  I do not believe this
   race is important.  If I'm wrong this will make these locations easier
   to find.

Reviewed by:	kib (earlier diff)
Tested by:	kris, pho (earlier diff)
This commit is contained in:
Jeff Roberson 2008-03-22 09:15:16 +00:00
parent 435cdf88ea
commit 698b1a6643
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=177493
18 changed files with 244 additions and 254 deletions

View File

@ -803,7 +803,10 @@ msdosfs_unmount(struct mount *mp, int mntflags, struct thread *td)
#ifdef MSDOSFS_DEBUG
{
struct vnode *vp = pmp->pm_devvp;
struct bufobj *bo;
bo = &vp->v_bufobj;
BO_LOCK(bo);
VI_LOCK(vp);
vn_printf(vp,
"msdosfs_umount(): just before calling VOP_CLOSE()\n");
@ -815,6 +818,7 @@ msdosfs_unmount(struct mount *mp, int mntflags, struct thread *td)
TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd),
vp->v_bufobj.bo_numoutput, vp->v_type);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
}
#endif
DROP_GIANT();

View File

@ -1608,6 +1608,7 @@ vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno)
int
vfs_bio_awrite(struct buf *bp)
{
struct bufobj *bo;
int i;
int j;
daddr_t lblkno = bp->b_lblkno;
@ -1617,6 +1618,7 @@ vfs_bio_awrite(struct buf *bp)
int size;
int maxcl;
bo = &vp->v_bufobj;
/*
* right now we support clustered writing only to regular files. If
* we find a clusterable block we could be in the middle of a cluster
@ -1629,7 +1631,7 @@ vfs_bio_awrite(struct buf *bp)
size = vp->v_mount->mnt_stat.f_iosize;
maxcl = MAXPHYS / size;
VI_LOCK(vp);
BO_LOCK(bo);
for (i = 1; i < maxcl; i++)
if (vfs_bio_clcheck(vp, size, lblkno + i,
bp->b_blkno + ((i * size) >> DEV_BSHIFT)) == 0)
@ -1639,8 +1641,7 @@ vfs_bio_awrite(struct buf *bp)
if (vfs_bio_clcheck(vp, size, lblkno - j,
bp->b_blkno - ((j * size) >> DEV_BSHIFT)) == 0)
break;
VI_UNLOCK(vp);
BO_UNLOCK(bo);
--j;
ncl = i + j;
/*
@ -2454,7 +2455,7 @@ getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo,
lockflags |= LK_NOWAIT;
error = BUF_TIMELOCK(bp, lockflags,
VI_MTX(vp), "getblk", slpflag, slptimeo);
BO_MTX(bo), "getblk", slpflag, slptimeo);
/*
* If we slept and got the lock we have to restart in case

View File

@ -94,12 +94,14 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
struct buf **bpp;
{
struct buf *bp, *rbp, *reqbp;
struct bufobj *bo;
daddr_t blkno, origblkno;
int maxra, racluster;
int error, ncontig;
int i;
error = 0;
bo = &vp->v_bufobj;
/*
* Try to limit the amount of read-ahead by a few
@ -130,7 +132,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
return 0;
} else {
bp->b_flags &= ~B_RAM;
VI_LOCK(vp);
BO_LOCK(bo);
for (i = 1; i < maxra; i++) {
/*
* Stop if the buffer does not exist or it
@ -153,7 +155,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
BUF_UNLOCK(rbp);
}
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
if (i >= maxra) {
return 0;
}
@ -305,6 +307,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
int run;
struct buf *fbp;
{
struct bufobj *bo;
struct buf *bp, *tbp;
daddr_t bn;
int i, inc, j;
@ -330,7 +333,6 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
tbp->b_flags |= B_ASYNC | B_RAM;
tbp->b_iocmd = BIO_READ;
}
tbp->b_blkno = blkno;
if( (tbp->b_flags & B_MALLOC) ||
((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
@ -364,6 +366,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
bp->b_npages = 0;
inc = btodb(size);
bo = &vp->v_bufobj;
for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
if (i != 0) {
if ((bp->b_npages * PAGE_SIZE) +
@ -384,15 +387,15 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
* VMIO backed. The clustering code can only deal
* with VMIO-backed buffers.
*/
VI_LOCK(vp);
BO_LOCK(bo);
if ((tbp->b_vflags & BV_BKGRDINPROG) ||
(tbp->b_flags & B_CACHE) ||
(tbp->b_flags & B_VMIO) == 0) {
VI_UNLOCK(vp);
BO_UNLOCK(bo);
bqrelse(tbp);
break;
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
/*
* The buffer must be completely invalid in order to
@ -740,26 +743,28 @@ cluster_wbuild(vp, size, start_lbn, len)
int len;
{
struct buf *bp, *tbp;
struct bufobj *bo;
int i, j;
int totalwritten = 0;
int dbsize = btodb(size);
bo = &vp->v_bufobj;
while (len > 0) {
/*
* If the buffer is not delayed-write (i.e. dirty), or it
* is delayed-write but either locked or inval, it cannot
* partake in the clustered write.
*/
VI_LOCK(vp);
BO_LOCK(bo);
if ((tbp = gbincore(&vp->v_bufobj, start_lbn)) == NULL ||
(tbp->b_vflags & BV_BKGRDINPROG)) {
VI_UNLOCK(vp);
BO_UNLOCK(bo);
++start_lbn;
--len;
continue;
}
if (BUF_LOCK(tbp,
LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, VI_MTX(vp))) {
LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, BO_MTX(bo))) {
++start_lbn;
--len;
continue;
@ -838,10 +843,10 @@ cluster_wbuild(vp, size, start_lbn, len)
* If the adjacent data is not even in core it
* can't need to be written.
*/
VI_LOCK(vp);
if ((tbp = gbincore(&vp->v_bufobj, start_lbn)) == NULL ||
BO_LOCK(bo);
if ((tbp = gbincore(bo, start_lbn)) == NULL ||
(tbp->b_vflags & BV_BKGRDINPROG)) {
VI_UNLOCK(vp);
BO_UNLOCK(bo);
break;
}
@ -854,7 +859,7 @@ cluster_wbuild(vp, size, start_lbn, len)
*/
if (BUF_LOCK(tbp,
LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
VI_MTX(vp)))
BO_MTX(bo)))
break;
if ((tbp->b_flags & (B_VMIO | B_CLUSTEROK |

View File

@ -405,12 +405,13 @@ vop_stdfsync(ap)
int error = 0;
int maxretry = 1000; /* large, arbitrarily chosen */
VI_LOCK(vp);
bo = &vp->v_bufobj;
BO_LOCK(bo);
loop1:
/*
* MARK/SCAN initialization to avoid infinite loops.
*/
TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) {
TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
bp->b_vflags &= ~BV_SCANNED;
bp->b_error = 0;
}
@ -419,16 +420,16 @@ vop_stdfsync(ap)
* Flush all dirty buffers associated with a vnode.
*/
loop2:
TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
if ((bp->b_vflags & BV_SCANNED) != 0)
continue;
bp->b_vflags |= BV_SCANNED;
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
continue;
VI_UNLOCK(vp);
KASSERT(bp->b_bufobj == &vp->v_bufobj,
BO_UNLOCK(bo);
KASSERT(bp->b_bufobj == bo,
("bp %p wrong b_bufobj %p should be %p",
bp, bp->b_bufobj, &vp->v_bufobj));
bp, bp->b_bufobj, bo));
if ((bp->b_flags & B_DELWRI) == 0)
panic("fsync: not dirty");
if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
@ -437,7 +438,7 @@ vop_stdfsync(ap)
bremfree(bp);
bawrite(bp);
}
VI_LOCK(vp);
BO_LOCK(bo);
goto loop2;
}
@ -448,7 +449,6 @@ vop_stdfsync(ap)
* retry if dirty blocks still exist.
*/
if (ap->a_waitfor == MNT_WAIT) {
bo = &vp->v_bufobj;
bufobj_wwait(bo, 0, 0);
if (bo->bo_dirty.bv_cnt > 0) {
/*
@ -464,7 +464,7 @@ vop_stdfsync(ap)
error = EAGAIN;
}
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
if (error == EAGAIN)
vprint("fsync: giving up on dirty", vp);
@ -571,14 +571,11 @@ vfs_stdsync(mp, waitfor, td)
MNT_ILOCK(mp);
loop:
MNT_VNODE_FOREACH(vp, mp, mvp) {
VI_LOCK(vp);
if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
VI_UNLOCK(vp);
/* bv_cnt is an acceptable race here. */
if (vp->v_bufobj.bo_dirty.bv_cnt == 0)
continue;
}
VI_LOCK(vp);
MNT_IUNLOCK(mp);
if ((error = vget(vp, lockreq, td)) != 0) {
MNT_ILOCK(mp);
if (error == ENOENT) {

View File

@ -936,7 +936,7 @@ getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
*/
bo = &vp->v_bufobj;
bo->__bo_vnode = vp;
bo->bo_mtx = &vp->v_interlock;
mtx_init(BO_MTX(bo), "bufobj interlock", NULL, MTX_DEF);
bo->bo_ops = &buf_ops_bio;
bo->bo_private = vp;
TAILQ_INIT(&bo->bo_clean.bv_hd);
@ -1236,8 +1236,8 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td,
ASSERT_VOP_LOCKED(vp, "vtruncbuf");
restart:
VI_LOCK(vp);
bo = &vp->v_bufobj;
BO_LOCK(bo);
anyfreed = 1;
for (;anyfreed;) {
anyfreed = 0;
@ -1246,7 +1246,7 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td,
continue;
if (BUF_LOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
VI_MTX(vp)) == ENOLCK)
BO_MTX(bo)) == ENOLCK)
goto restart;
bremfree(bp);
@ -1261,7 +1261,7 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td,
(nbp->b_flags & B_DELWRI))) {
goto restart;
}
VI_LOCK(vp);
BO_LOCK(bo);
}
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
@ -1269,7 +1269,7 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td,
continue;
if (BUF_LOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
VI_MTX(vp)) == ENOLCK)
BO_MTX(bo)) == ENOLCK)
goto restart;
bremfree(bp);
bp->b_flags |= (B_INVAL | B_RELBUF);
@ -1282,7 +1282,7 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td,
(nbp->b_flags & B_DELWRI) == 0)) {
goto restart;
}
VI_LOCK(vp);
BO_LOCK(bo);
}
}
@ -1305,13 +1305,13 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td,
bremfree(bp);
bawrite(bp);
VI_LOCK(vp);
BO_LOCK(bo);
goto restartsync;
}
}
bufobj_wwait(bo, 0, 0);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
vnode_pager_setsize(vp, length);
return (0);
@ -1503,24 +1503,25 @@ gbincore(struct bufobj *bo, daddr_t lblkno)
void
bgetvp(struct vnode *vp, struct buf *bp)
{
struct bufobj *bo;
bo = &vp->v_bufobj;
ASSERT_BO_LOCKED(bo);
VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free"));
CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags);
VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp,
("bgetvp: bp already attached! %p", bp));
ASSERT_VI_LOCKED(vp, "bgetvp");
vholdl(vp);
if (VFS_NEEDSGIANT(vp->v_mount) ||
vp->v_bufobj.bo_flag & BO_NEEDSGIANT)
vhold(vp);
if (VFS_NEEDSGIANT(vp->v_mount) || bo->bo_flag & BO_NEEDSGIANT)
bp->b_flags |= B_NEEDSGIANT;
bp->b_vp = vp;
bp->b_bufobj = &vp->v_bufobj;
bp->b_bufobj = bo;
/*
* Insert onto list for new vnode.
*/
buf_vlist_add(bp, &vp->v_bufobj, BX_VNCLEAN);
buf_vlist_add(bp, bo, BX_VNCLEAN);
}
/*
@ -1557,7 +1558,8 @@ brelvp(struct buf *bp)
bp->b_vp = NULL;
bp->b_bufobj = NULL;
waiters = bp->b_waiters;
vdropl(vp);
BO_UNLOCK(bo);
vdrop(vp);
return (waiters);
}
@ -1668,7 +1670,7 @@ sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td)
(void) VOP_FSYNC(vp, MNT_LAZY, td);
VOP_UNLOCK(vp, 0);
vn_finished_write(mp);
VI_LOCK(vp);
BO_LOCK(*bo);
if (((*bo)->bo_flag & BO_ONWORKLST) != 0) {
/*
* Put us back on the worklist. The worklist
@ -1678,7 +1680,8 @@ sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td)
*/
vn_syncer_add_to_worklist(*bo, syncdelay);
}
vdropl(vp);
BO_UNLOCK(*bo);
vdrop(vp);
VFS_UNLOCK_GIANT(vfslocked);
mtx_lock(&sync_mtx);
return (0);
@ -1886,7 +1889,7 @@ reassignbuf(struct buf *bp)
/*
* Delete from old vnode list, if on one.
*/
VI_LOCK(vp);
BO_LOCK(bo);
if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN))
buf_vlist_remove(bp);
else
@ -1937,7 +1940,7 @@ reassignbuf(struct buf *bp)
KASSERT(bp == NULL || bp->b_bufobj == bo,
("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo));
#endif
VI_UNLOCK(vp);
BO_UNLOCK(bo);
}
/*
@ -3127,6 +3130,7 @@ int
vfs_allocate_syncvnode(struct mount *mp)
{
struct vnode *vp;
struct bufobj *bo;
static long start, incr, next;
int error;
@ -3155,14 +3159,14 @@ vfs_allocate_syncvnode(struct mount *mp)
}
next = start;
}
VI_LOCK(vp);
vn_syncer_add_to_worklist(&vp->v_bufobj,
syncdelay > 0 ? next % syncdelay : 0);
bo = &vp->v_bufobj;
BO_LOCK(bo);
vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0);
/* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */
mtx_lock(&sync_mtx);
sync_vnode_count++;
mtx_unlock(&sync_mtx);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
mp->mnt_syncer = vp;
return (0);
}
@ -3244,8 +3248,8 @@ sync_reclaim(struct vop_reclaim_args *ap)
struct vnode *vp = ap->a_vp;
struct bufobj *bo;
VI_LOCK(vp);
bo = &vp->v_bufobj;
BO_LOCK(bo);
vp->v_mount->mnt_syncer = NULL;
if (bo->bo_flag & BO_ONWORKLST) {
mtx_lock(&sync_mtx);
@ -3255,7 +3259,7 @@ sync_reclaim(struct vop_reclaim_args *ap)
mtx_unlock(&sync_mtx);
bo->bo_flag &= ~BO_ONWORKLST;
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
return (0);
}

View File

@ -749,6 +749,7 @@ nfs4_sync(struct mount *mp, int waitfor, struct thread *td)
MNT_VNODE_FOREACH(vp, mp, mvp) {
VI_LOCK(vp);
MNT_IUNLOCK(mp);
/* XXX racy bv_cnt check. */
if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
waitfor == MNT_LAZY) {
VI_UNLOCK(vp);

View File

@ -2486,11 +2486,12 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
int commit)
{
struct nfsnode *np = VTONFS(vp);
struct bufobj *bo;
struct buf *bp;
int i;
struct buf *nbp;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
int passone = 1;
u_quad_t off, endoff, toff;
struct ucred* wcred = NULL;
@ -2500,6 +2501,7 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
#endif
struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
int bvecsize = 0, bveccount;
bo = &vp->v_bufobj;
if (nmp->nm_flag & NFSMNT_INT)
slpflag = PCATCH;
@ -2517,15 +2519,14 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
endoff = 0;
bvecpos = 0;
if (NFS_ISV3(vp) && commit) {
s = splbio();
if (bvec != NULL && bvec != bvec_on_stack)
free(bvec, M_TEMP);
/*
* Count up how many buffers waiting for a commit.
*/
bveccount = 0;
VI_LOCK(vp);
TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
BO_LOCK(bo);
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
if (!BUF_ISLOCKED(bp) &&
(bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
== (B_DELWRI | B_NEEDCOMMIT))
@ -2542,11 +2543,11 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
* Release the vnode interlock to avoid a lock
* order reversal.
*/
VI_UNLOCK(vp);
BO_UNLOCK(bo);
bvec = (struct buf **)
malloc(bveccount * sizeof(struct buf *),
M_TEMP, M_NOWAIT);
VI_LOCK(vp);
BO_LOCK(bo);
if (bvec == NULL) {
bvec = bvec_on_stack;
bvecsize = NFS_COMMITBVECSIZ;
@ -2556,7 +2557,7 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
bvec = bvec_on_stack;
bvecsize = NFS_COMMITBVECSIZ;
}
TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
if (bvecpos >= bvecsize)
break;
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
@ -2569,7 +2570,7 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
nbp = TAILQ_NEXT(bp, b_bobufs);
continue;
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
bremfree(bp);
/*
* Work out if all buffers are using the same cred
@ -2588,7 +2589,7 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
wcred = NOCRED;
vfs_busy_pages(bp, 1);
VI_LOCK(vp);
BO_LOCK(bo);
/*
* bp is protected by being locked, but nbp is not
* and vfs_busy_pages() may sleep. We have to
@ -2612,8 +2613,7 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
if (toff > endoff)
endoff = toff;
}
splx(s);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
}
if (bvecpos > 0) {
/*
@ -2665,14 +2665,12 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
* specific. We should probably move that
* into bundirty(). XXX
*/
s = splbio();
bufobj_wref(&vp->v_bufobj);
bufobj_wref(bo);
bp->b_flags |= B_ASYNC;
bundirty(bp);
bp->b_flags &= ~B_DONE;
bp->b_ioflags &= ~BIO_ERROR;
bp->b_dirtyoff = bp->b_dirtyend = 0;
splx(s);
bufdone(bp);
}
}
@ -2682,17 +2680,15 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
* Start/do any write(s) that are required.
*/
loop:
s = splbio();
VI_LOCK(vp);
TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
BO_LOCK(bo);
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
if (waitfor != MNT_WAIT || passone)
continue;
error = BUF_TIMELOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
VI_MTX(vp), "nfsfsync", slpflag, slptimeo);
splx(s);
BO_MTX(bo), "nfsfsync", slpflag, slptimeo);
if (error == 0)
panic("nfs4_fsync: inconsistent lock");
if (error == ENOLCK)
@ -2713,27 +2709,25 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
BUF_UNLOCK(bp);
continue;
}
VI_UNLOCK(vp);
BO_LOCK(bo);
bremfree(bp);
if (passone || !commit)
bp->b_flags |= B_ASYNC;
else
bp->b_flags |= B_ASYNC;
splx(s);
bwrite(bp);
goto loop;
}
splx(s);
if (passone) {
passone = 0;
VI_UNLOCK(vp);
BO_UNLOCK(bo);
goto again;
}
if (waitfor == MNT_WAIT) {
while (vp->v_bufobj.bo_numoutput) {
error = bufobj_wwait(&vp->v_bufobj, slpflag, slptimeo);
while (bo->bo_numoutput) {
error = bufobj_wwait(bo, slpflag, slptimeo);
if (error) {
VI_UNLOCK(vp);
BO_UNLOCK(bo);
if (nfs4_sigintr(nmp, NULL, td)) {
error = EINTR;
goto done;
@ -2742,15 +2736,15 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td,
slpflag = 0;
slptimeo = 2 * hz;
}
VI_LOCK(vp);
BO_LOCK(bo);
}
}
if (vp->v_bufobj.bo_dirty.bv_cnt > 0 && commit) {
VI_UNLOCK(vp);
BO_UNLOCK(bo);
goto loop;
}
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
if (np->n_flag & NWRITEERR) {
error = np->n_error;
np->n_flag &= ~NWRITEERR;

View File

@ -915,28 +915,31 @@ nfs_clearcommit(struct mount *mp)
{
struct vnode *vp, *nvp;
struct buf *bp, *nbp;
int s;
struct bufobj *bo;
s = splbio();
MNT_ILOCK(mp);
MNT_VNODE_FOREACH(vp, mp, nvp) {
bo = &vp->v_bufobj;
VI_LOCK(vp);
if (vp->v_iflag & VI_DOOMED) {
VI_UNLOCK(vp);
continue;
}
vholdl(vp);
VI_UNLOCK(vp);
MNT_IUNLOCK(mp);
TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
BO_LOCK(bo);
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
if (!BUF_ISLOCKED(bp) &&
(bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
== (B_DELWRI | B_NEEDCOMMIT))
bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
vdrop(vp);
MNT_ILOCK(mp);
}
MNT_IUNLOCK(mp);
splx(s);
}
/*

View File

@ -1074,6 +1074,7 @@ nfs_sync(struct mount *mp, int waitfor, struct thread *td)
MNT_VNODE_FOREACH(vp, mp, mvp) {
VI_LOCK(vp);
MNT_IUNLOCK(mp);
/* XXX Racy bv_cnt check. */
if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
waitfor == MNT_LAZY) {
VI_UNLOCK(vp);

View File

@ -2736,11 +2736,12 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
int i;
struct buf *nbp;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
int passone = 1;
u_quad_t off, endoff, toff;
struct ucred* wcred = NULL;
struct buf **bvec = NULL;
struct bufobj *bo;
#ifndef NFS_COMMITBVECSIZ
#define NFS_COMMITBVECSIZ 20
#endif
@ -2751,6 +2752,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
slpflag = PCATCH;
if (!commit)
passone = 0;
bo = &vp->v_bufobj;
/*
* A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
* server, but has not been committed to stable storage on the server
@ -2763,15 +2765,14 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
endoff = 0;
bvecpos = 0;
if (NFS_ISV3(vp) && commit) {
s = splbio();
if (bvec != NULL && bvec != bvec_on_stack)
free(bvec, M_TEMP);
/*
* Count up how many buffers waiting for a commit.
*/
bveccount = 0;
VI_LOCK(vp);
TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
BO_LOCK(bo);
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
if (!BUF_ISLOCKED(bp) &&
(bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
== (B_DELWRI | B_NEEDCOMMIT))
@ -2788,11 +2789,11 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
* Release the vnode interlock to avoid a lock
* order reversal.
*/
VI_UNLOCK(vp);
BO_UNLOCK(bo);
bvec = (struct buf **)
malloc(bveccount * sizeof(struct buf *),
M_TEMP, M_NOWAIT);
VI_LOCK(vp);
BO_LOCK(bo);
if (bvec == NULL) {
bvec = bvec_on_stack;
bvecsize = NFS_COMMITBVECSIZ;
@ -2802,7 +2803,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
bvec = bvec_on_stack;
bvecsize = NFS_COMMITBVECSIZ;
}
TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
if (bvecpos >= bvecsize)
break;
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
@ -2815,7 +2816,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
nbp = TAILQ_NEXT(bp, b_bobufs);
continue;
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
bremfree(bp);
/*
* Work out if all buffers are using the same cred
@ -2834,7 +2835,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
wcred = NOCRED;
vfs_busy_pages(bp, 1);
VI_LOCK(vp);
BO_LOCK(bo);
/*
* bp is protected by being locked, but nbp is not
* and vfs_busy_pages() may sleep. We have to
@ -2858,8 +2859,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
if (toff > endoff)
endoff = toff;
}
splx(s);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
}
if (bvecpos > 0) {
/*
@ -2911,14 +2911,12 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
* specific. We should probably move that
* into bundirty(). XXX
*/
s = splbio();
bufobj_wref(&vp->v_bufobj);
bufobj_wref(bo);
bp->b_flags |= B_ASYNC;
bundirty(bp);
bp->b_flags &= ~B_DONE;
bp->b_ioflags &= ~BIO_ERROR;
bp->b_dirtyoff = bp->b_dirtyend = 0;
splx(s);
bufdone(bp);
}
}
@ -2928,17 +2926,15 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
* Start/do any write(s) that are required.
*/
loop:
s = splbio();
VI_LOCK(vp);
TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
BO_LOCK(bo);
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
if (waitfor != MNT_WAIT || passone)
continue;
error = BUF_TIMELOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
VI_MTX(vp), "nfsfsync", slpflag, slptimeo);
splx(s);
BO_MTX(bo), "nfsfsync", slpflag, slptimeo);
if (error == 0) {
BUF_UNLOCK(bp);
goto loop;
@ -2961,13 +2957,12 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
BUF_UNLOCK(bp);
continue;
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
bremfree(bp);
if (passone || !commit)
bp->b_flags |= B_ASYNC;
else
bp->b_flags |= B_ASYNC;
splx(s);
bwrite(bp);
if (nfs_sigintr(nmp, NULL, td)) {
error = EINTR;
@ -2975,17 +2970,16 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
}
goto loop;
}
splx(s);
if (passone) {
passone = 0;
VI_UNLOCK(vp);
BO_UNLOCK(bo);
goto again;
}
if (waitfor == MNT_WAIT) {
while (vp->v_bufobj.bo_numoutput) {
error = bufobj_wwait(&vp->v_bufobj, slpflag, slptimeo);
while (bo->bo_numoutput) {
error = bufobj_wwait(bo, slpflag, slptimeo);
if (error) {
VI_UNLOCK(vp);
BO_UNLOCK(bo);
error = nfs_sigintr(nmp, NULL, td);
if (error)
goto done;
@ -2993,17 +2987,17 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
slpflag = 0;
slptimeo = 2 * hz;
}
VI_LOCK(vp);
BO_LOCK(bo);
}
}
if (vp->v_bufobj.bo_dirty.bv_cnt != 0 && commit) {
VI_UNLOCK(vp);
if (bo->bo_dirty.bv_cnt != 0 && commit) {
BO_UNLOCK(bo);
goto loop;
}
/*
* Wait for all the async IO requests to drain
*/
VI_UNLOCK(vp);
BO_UNLOCK(bo);
mtx_lock(&np->n_mtx);
while (np->n_directio_asyncwr > 0) {
np->n_flag |= NFSYNCWAIT;
@ -3020,14 +3014,14 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td,
}
mtx_unlock(&np->n_mtx);
} else
VI_UNLOCK(vp);
BO_UNLOCK(bo);
mtx_lock(&np->n_mtx);
if (np->n_flag & NWRITEERR) {
error = np->n_error;
np->n_flag &= ~NWRITEERR;
}
if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0 &&
vp->v_bufobj.bo_numoutput == 0 && np->n_directio_asyncwr == 0)
if (commit && bo->bo_dirty.bv_cnt == 0 &&
bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
np->n_flag &= ~NMODIFIED;
mtx_unlock(&np->n_mtx);
done:

View File

@ -3847,7 +3847,7 @@ nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
*/
int iosize = vp->v_mount->mnt_stat.f_iosize;
int iomask = iosize - 1;
int s;
struct bufobj *bo;
daddr_t lblkno;
/*
@ -3870,8 +3870,8 @@ nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
VM_OBJECT_UNLOCK(vp->v_object);
}
s = splbio();
VI_LOCK(vp);
bo = &vp->v_bufobj;
BO_LOCK(bo);
while (cnt > 0) {
struct buf *bp;
@ -3887,8 +3887,8 @@ nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
*/
if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) {
VI_LOCK(vp);
LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
BO_LOCK(bo);
continue; /* retry */
}
if ((bp->b_flags & (B_DELWRI|B_INVAL)) ==
@ -3899,7 +3899,7 @@ nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
++nfs_commit_miss;
} else
BUF_UNLOCK(bp);
VI_LOCK(vp);
BO_LOCK(bo);
}
++nfs_commit_blks;
if (cnt < iosize)
@ -3907,8 +3907,7 @@ nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp,
cnt -= iosize;
++lblkno;
}
VI_UNLOCK(vp);
splx(s);
BO_UNLOCK(bo);
}
aft_ret = VOP_GETATTR(vp, &aft, cred, td);

View File

@ -88,7 +88,7 @@ typedef unsigned char b_xflags_t;
* completes, b_resid is usually 0 indicating 100% success.
*
* All fields are protected by the buffer lock except those marked:
* V - Protected by owning vnode lock
* V - Protected by owning bufobj lock
* Q - Protected by the buf queue lock
* D - Protected by an dependency implementation specific lock
*/

View File

@ -52,6 +52,8 @@
#if defined(_KERNEL) || defined(_KVM_VNODE)
#include <sys/queue.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
struct bufobj;
struct buf_ops;
@ -87,7 +89,7 @@ struct buf_ops {
#define BO_BDFLUSH(bo, bp) ((bo)->bo_ops->bop_bdflush((bo), (bp)))
struct bufobj {
struct mtx *bo_mtx; /* Mutex which protects "i" things */
struct mtx bo_mtx; /* Mutex which protects "i" things */
struct bufv bo_clean; /* i Clean buffers */
struct bufv bo_dirty; /* i Dirty buffers */
long bo_numoutput; /* i Writes in progress */
@ -112,21 +114,11 @@ struct bufobj {
#define BO_WWAIT (1 << 1) /* Wait for output to complete */
#define BO_NEEDSGIANT (1 << 2) /* Require giant for child buffers. */
#define BO_LOCK(bo) \
do { \
KASSERT((bo)->bo_mtx != NULL, ("No lock in bufobj")); \
mtx_lock((bo)->bo_mtx); \
} while (0)
#define BO_UNLOCK(bo) \
do { \
KASSERT((bo)->bo_mtx != NULL, ("No lock in bufobj")); \
mtx_unlock((bo)->bo_mtx); \
} while (0)
#define BO_MTX(bo) ((bo)->bo_mtx)
#define ASSERT_BO_LOCKED(bo) mtx_assert(bo->bo_mtx, MA_OWNED)
#define ASSERT_BO_UNLOCKED(bo) mtx_assert(bo->bo_mtx, MA_NOTOWNED)
#define BO_MTX(bo) (&(bo)->bo_mtx)
#define BO_LOCK(bo) mtx_lock(BO_MTX((bo)))
#define BO_UNLOCK(bo) mtx_unlock(BO_MTX((bo)))
#define ASSERT_BO_LOCKED(bo) mtx_assert(BO_MTX((bo)), MA_OWNED)
#define ASSERT_BO_UNLOCKED(bo) mtx_assert(BO_MTX((bo)), MA_NOTOWNED)
void bufobj_wdrop(struct bufobj *bo);
void bufobj_wref(struct bufobj *bo);

View File

@ -147,6 +147,7 @@ ffs_truncate(vp, length, flags, cred, td)
ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
ufs2_daddr_t count, blocksreleased = 0, datablocks;
struct bufobj *bo;
struct fs *fs;
struct buf *bp;
struct ufsmount *ump;
@ -158,6 +159,7 @@ ffs_truncate(vp, length, flags, cred, td)
ip = VTOI(vp);
fs = ip->i_fs;
ump = ip->i_ump;
bo = &vp->v_bufobj;
ASSERT_VOP_LOCKED(vp, "ffs_truncate");
@ -486,13 +488,12 @@ ffs_truncate(vp, length, flags, cred, td)
for (i = 0; i < NDADDR; i++)
if (newblks[i] != DIP(ip, i_db[i]))
panic("ffs_truncate2");
VI_LOCK(vp);
BO_LOCK(bo);
if (length == 0 &&
(fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) &&
(vp->v_bufobj.bo_dirty.bv_cnt > 0 ||
vp->v_bufobj.bo_clean.bv_cnt > 0))
(bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0))
panic("ffs_truncate3");
VI_UNLOCK(vp);
BO_UNLOCK(bo);
#endif /* INVARIANTS */
/*
* Put back the real size.

View File

@ -97,21 +97,20 @@ ffs_rawread_setup(void)
static int
ffs_rawread_sync(struct vnode *vp)
{
int spl;
int error;
int upgraded;
struct bufobj *bo;
struct mount *mp;
/* Check for dirty mmap, pending writes and dirty buffers */
spl = splbio();
VI_LOCK(vp);
bo = &vp->v_bufobj;
BO_LOCK(bo);
VI_LOCK(vp);
if (bo->bo_numoutput > 0 ||
bo->bo_dirty.bv_cnt > 0 ||
(vp->v_iflag & VI_OBJDIRTY) != 0) {
splx(spl);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)
@ -146,16 +145,15 @@ ffs_rawread_sync(struct vnode *vp)
vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
VM_OBJECT_UNLOCK(vp->v_object);
}
VI_LOCK(vp);
}
} else
VI_UNLOCK(vp);
/* Wait for pending writes to complete */
spl = splbio();
BO_LOCK(bo);
error = bufobj_wwait(&vp->v_bufobj, 0, 0);
if (error != 0) {
/* XXX: can't happen with a zero timeout ??? */
splx(spl);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
if (upgraded != 0)
VOP_LOCK(vp, LK_DOWNGRADE);
vn_finished_write(mp);
@ -163,27 +161,24 @@ ffs_rawread_sync(struct vnode *vp)
}
/* Flush dirty buffers */
if (bo->bo_dirty.bv_cnt > 0) {
splx(spl);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) {
if (upgraded != 0)
VOP_LOCK(vp, LK_DOWNGRADE);
vn_finished_write(mp);
return (error);
}
VI_LOCK(vp);
spl = splbio();
BO_LOCK(bo);
if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)
panic("ffs_rawread_sync: dirty bufs");
}
splx(spl);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
if (upgraded != 0)
VOP_LOCK(vp, LK_DOWNGRADE);
vn_finished_write(mp);
} else {
splx(spl);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
}
return 0;
}

View File

@ -357,26 +357,16 @@ softdep_check_suspend(struct mount *mp,
(void) softdep_deps,
(void) softdep_accdeps;
ASSERT_VI_LOCKED(devvp, "softdep_check_suspend");
bo = &devvp->v_bufobj;
ASSERT_BO_LOCKED(bo);
for (;;) {
if (!MNT_ITRYLOCK(mp)) {
VI_UNLOCK(devvp);
MNT_ILOCK(mp);
MNT_IUNLOCK(mp);
VI_LOCK(devvp);
continue;
}
if (mp->mnt_secondary_writes != 0) {
VI_UNLOCK(devvp);
msleep(&mp->mnt_secondary_writes,
MNT_MTX(mp),
(PUSER - 1) | PDROP, "secwr", 0);
VI_LOCK(devvp);
continue;
}
break;
MNT_ILOCK(mp);
while (mp->mnt_secondary_writes != 0) {
BO_UNLOCK(bo);
msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
(PUSER - 1) | PDROP, "secwr", 0);
BO_LOCK(bo);
MNT_ILOCK(mp);
}
/*
@ -391,7 +381,7 @@ softdep_check_suspend(struct mount *mp,
mp->mnt_secondary_writes != 0 ||
secondary_accwrites != mp->mnt_secondary_accwrites)
error = EAGAIN;
VI_UNLOCK(devvp);
BO_UNLOCK(bo);
return (error);
}
@ -2189,6 +2179,7 @@ softdep_setup_freeblocks(ip, length, flags)
struct freeblks *freeblks;
struct inodedep *inodedep;
struct allocdirect *adp;
struct bufobj *bo;
struct vnode *vp;
struct buf *bp;
struct fs *fs;
@ -2314,27 +2305,28 @@ softdep_setup_freeblocks(ip, length, flags)
* any dependencies.
*/
vp = ITOV(ip);
VI_LOCK(vp);
bo = &vp->v_bufobj;
BO_LOCK(bo);
drain_output(vp);
restart:
TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) {
TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) ||
((flags & IO_NORMAL) == 0 &&
(bp->b_xflags & BX_ALTDATA) == 0))
continue;
if ((bp = getdirtybuf(bp, VI_MTX(vp), MNT_WAIT)) == NULL)
if ((bp = getdirtybuf(bp, BO_MTX(bo), MNT_WAIT)) == NULL)
goto restart;
VI_UNLOCK(vp);
BO_UNLOCK(bo);
ACQUIRE_LOCK(&lk);
(void) inodedep_lookup(mp, ip->i_number, 0, &inodedep);
deallocate_dependencies(bp, inodedep);
FREE_LOCK(&lk);
bp->b_flags |= B_INVAL | B_NOCACHE;
brelse(bp);
VI_LOCK(vp);
BO_LOCK(bo);
goto restart;
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
(void) free_inodedep(inodedep);
@ -5159,13 +5151,15 @@ softdep_fsync_mountdev(vp)
{
struct buf *bp, *nbp;
struct worklist *wk;
struct bufobj *bo;
if (!vn_isdisk(vp, NULL))
panic("softdep_fsync_mountdev: vnode not a disk");
bo = &vp->v_bufobj;
restart:
BO_LOCK(bo);
ACQUIRE_LOCK(&lk);
VI_LOCK(vp);
TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
/*
* If it is already scheduled, skip to the next buffer.
*/
@ -5184,15 +5178,15 @@ softdep_fsync_mountdev(vp)
BUF_UNLOCK(bp);
continue;
}
VI_UNLOCK(vp);
FREE_LOCK(&lk);
BO_UNLOCK(bo);
bremfree(bp);
(void) bawrite(bp);
goto restart;
}
FREE_LOCK(&lk);
drain_output(vp);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
}
/*
@ -5209,6 +5203,7 @@ softdep_sync_metadata(struct vnode *vp)
struct allocindir *aip;
struct buf *bp, *nbp;
struct worklist *wk;
struct bufobj *bo;
int i, error, waitfor;
if (!DOINGSOFTDEP(vp))
@ -5240,20 +5235,21 @@ softdep_sync_metadata(struct vnode *vp)
* resolved. Thus the second pass is expected to end quickly.
*/
waitfor = MNT_NOWAIT;
bo = &vp->v_bufobj;
top:
/*
* We must wait for any I/O in progress to finish so that
* all potential buffers on the dirty list will be visible.
*/
VI_LOCK(vp);
BO_LOCK(bo);
drain_output(vp);
while ((bp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd)) != NULL) {
bp = getdirtybuf(bp, VI_MTX(vp), MNT_WAIT);
while ((bp = TAILQ_FIRST(&bo->bo_dirty.bv_hd)) != NULL) {
bp = getdirtybuf(bp, BO_MTX(bo), MNT_WAIT);
if (bp)
break;
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
if (bp == NULL)
return (0);
loop:
@ -5405,13 +5401,13 @@ softdep_sync_metadata(struct vnode *vp)
return (error);
}
FREE_LOCK(&lk);
VI_LOCK(vp);
BO_LOCK(bo);
while ((nbp = TAILQ_NEXT(bp, b_bobufs)) != NULL) {
nbp = getdirtybuf(nbp, VI_MTX(vp), MNT_WAIT);
nbp = getdirtybuf(nbp, BO_MTX(bo), MNT_WAIT);
if (nbp)
break;
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
BUF_NOREC(bp);
bawrite(bp);
if (nbp != NULL) {
@ -5435,9 +5431,9 @@ softdep_sync_metadata(struct vnode *vp)
* We must wait for any I/O in progress to finish so that
* all potential buffers on the dirty list will be visible.
*/
VI_LOCK(vp);
BO_LOCK(bo);
drain_output(vp);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
return (0);
}
@ -5544,6 +5540,7 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
struct ufsmount *ump;
struct diradd *dap;
struct vnode *vp;
struct bufobj *bo;
int error = 0;
struct buf *bp;
ino_t inum;
@ -5590,7 +5587,8 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
vput(vp);
break;
}
VI_LOCK(vp);
bo = &vp->v_bufobj;
BO_LOCK(bo);
drain_output(vp);
/*
* If first block is still dirty with a D_MKDIR
@ -5598,15 +5596,15 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
*/
for (;;) {
error = 0;
bp = gbincore(&vp->v_bufobj, 0);
bp = gbincore(bo, 0);
if (bp == NULL)
break; /* First block not present */
error = BUF_LOCK(bp,
LK_EXCLUSIVE |
LK_SLEEPFAIL |
LK_INTERLOCK,
VI_MTX(vp));
VI_LOCK(vp);
BO_MTX(bo));
BO_LOCK(bo);
if (error == ENOLCK)
continue; /* Slept, retry */
if (error != 0)
@ -5628,14 +5626,14 @@ flush_pagedep_deps(pvp, mp, diraddhdp)
* must write buffer to stable
* storage.
*/
VI_UNLOCK(vp);
BO_UNLOCK(bo);
bremfree(bp);
error = bwrite(bp);
VI_LOCK(vp);
BO_LOCK(bo);
}
break;
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
vput(vp);
if (error != 0)
break; /* Flushing of first block failed */
@ -5904,6 +5902,7 @@ clear_remove(td)
static int next = 0;
struct mount *mp;
struct vnode *vp;
struct bufobj *bo;
int error, cnt;
ino_t ino;
@ -5929,9 +5928,10 @@ clear_remove(td)
}
if ((error = ffs_syncvnode(vp, MNT_NOWAIT)))
softdep_error("clear_remove: fsync", error);
VI_LOCK(vp);
bo = &vp->v_bufobj;
BO_LOCK(bo);
drain_output(vp);
VI_UNLOCK(vp);
BO_UNLOCK(bo);
vput(vp);
vn_finished_write(mp);
ACQUIRE_LOCK(&lk);
@ -6004,9 +6004,9 @@ clear_inodedeps(td)
} else {
if ((error = ffs_syncvnode(vp, MNT_NOWAIT)))
softdep_error("clear_inodedeps: fsync2", error);
VI_LOCK(vp);
BO_LOCK(&vp->v_bufobj);
drain_output(vp);
VI_UNLOCK(vp);
BO_UNLOCK(&vp->v_bufobj);
}
vput(vp);
vn_finished_write(mp);
@ -6154,7 +6154,7 @@ getdirtybuf(bp, mtx, waitfor)
*/
#ifdef DEBUG_VFS_LOCKS
if (bp->b_vp->v_type != VCHR)
ASSERT_VI_LOCKED(bp->b_vp, "getdirtybuf");
ASSERT_BO_LOCKED(bp->b_bufobj);
#endif
bp->b_vflags |= BV_BKGRDWAIT;
msleep(&bp->b_xflags, mtx, PRIBIO, "getbuf", 0);
@ -6187,33 +6187,26 @@ softdep_check_suspend(struct mount *mp,
struct ufsmount *ump;
int error;
ASSERT_VI_LOCKED(devvp, "softdep_check_suspend");
ump = VFSTOUFS(mp);
bo = &devvp->v_bufobj;
ASSERT_BO_LOCKED(bo);
for (;;) {
if (!TRY_ACQUIRE_LOCK(&lk)) {
VI_UNLOCK(devvp);
BO_UNLOCK(bo);
ACQUIRE_LOCK(&lk);
FREE_LOCK(&lk);
VI_LOCK(devvp);
continue;
}
if (!MNT_ITRYLOCK(mp)) {
FREE_LOCK(&lk);
VI_UNLOCK(devvp);
MNT_ILOCK(mp);
MNT_IUNLOCK(mp);
VI_LOCK(devvp);
BO_LOCK(bo);
continue;
}
MNT_ILOCK(mp);
if (mp->mnt_secondary_writes != 0) {
FREE_LOCK(&lk);
VI_UNLOCK(devvp);
BO_UNLOCK(bo);
msleep(&mp->mnt_secondary_writes,
MNT_MTX(mp),
(PUSER - 1) | PDROP, "secwr", 0);
VI_LOCK(devvp);
BO_LOCK(bo);
continue;
}
break;
@ -6236,7 +6229,7 @@ softdep_check_suspend(struct mount *mp,
secondary_accwrites != mp->mnt_secondary_accwrites)
error = EAGAIN;
FREE_LOCK(&lk);
VI_UNLOCK(devvp);
BO_UNLOCK(bo);
return (error);
}
@ -6270,13 +6263,16 @@ static void
drain_output(vp)
struct vnode *vp;
{
ASSERT_VOP_LOCKED(vp, "drain_output");
ASSERT_VI_LOCKED(vp, "drain_output");
struct bufobj *bo;
while (vp->v_bufobj.bo_numoutput) {
vp->v_bufobj.bo_flag |= BO_WWAIT;
msleep((caddr_t)&vp->v_bufobj.bo_numoutput,
VI_MTX(vp), PRIBIO + 1, "drainvp", 0);
bo = &vp->v_bufobj;
ASSERT_VOP_LOCKED(vp, "drain_output");
ASSERT_BO_LOCKED(bo);
while (bo->bo_numoutput) {
bo->bo_flag |= BO_WWAIT;
msleep((caddr_t)&bo->bo_numoutput,
BO_MTX(bo), PRIBIO + 1, "drainvp", 0);
}
}

View File

@ -1267,11 +1267,12 @@ ffs_sync(mp, waitfor, td)
qsync(mp);
#endif
devvp = ump->um_devvp;
VI_LOCK(devvp);
bo = &devvp->v_bufobj;
BO_LOCK(bo);
if (waitfor != MNT_LAZY &&
(bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) {
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK);
BO_UNLOCK(bo);
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
allerror = error;
VOP_UNLOCK(devvp, 0);
@ -1292,7 +1293,7 @@ ffs_sync(mp, waitfor, td)
MNT_IUNLOCK(mp);
suspended = 1;
} else
VI_UNLOCK(devvp);
BO_UNLOCK(bo);
/*
* Write back modified superblock.
*/

View File

@ -195,6 +195,7 @@ int
ffs_syncvnode(struct vnode *vp, int waitfor)
{
struct inode *ip = VTOI(vp);
struct bufobj *bo;
struct buf *bp;
struct buf *nbp;
int s, error, wait, passes, skipmeta;
@ -202,6 +203,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
wait = (waitfor == MNT_WAIT);
lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
bo = &vp->v_bufobj;
/*
* Flush all dirty buffers associated with a vnode.
@ -211,11 +213,11 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
if (wait)
skipmeta = 1;
s = splbio();
VI_LOCK(vp);
BO_LOCK(bo);
loop:
TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs)
TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
bp->b_vflags &= ~BV_SCANNED;
TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) {
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
/*
* Reasons to skip this buffer: it has already been considered
* on this pass, this pass is the first time through on a
@ -231,13 +233,13 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
continue;
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
continue;
VI_UNLOCK(vp);
BO_UNLOCK(bo);
if (!wait && !LIST_EMPTY(&bp->b_dep) &&
(bp->b_flags & B_DEFERRED) == 0 &&
buf_countdeps(bp, 0)) {
bp->b_flags |= B_DEFERRED;
BUF_UNLOCK(bp);
VI_LOCK(vp);
BO_LOCK(bo);
continue;
}
if ((bp->b_flags & B_DELWRI) == 0)
@ -286,8 +288,8 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
* Since we may have slept during the I/O, we need
* to start from a known point.
*/
VI_LOCK(vp);
nbp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd);
BO_LOCK(bo);
nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd);
}
/*
* If we were asked to do this synchronously, then go back for
@ -299,8 +301,8 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
}
if (wait) {
bufobj_wwait(&vp->v_bufobj, 3, 0);
VI_UNLOCK(vp);
bufobj_wwait(bo, 3, 0);
BO_UNLOCK(bo);
/*
* Ensure that any filesystem metatdata associated
@ -311,8 +313,8 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
return (error);
s = splbio();
VI_LOCK(vp);
if (vp->v_bufobj.bo_dirty.bv_cnt > 0) {
BO_LOCK(bo);
if (bo->bo_dirty.bv_cnt > 0) {
/*
* Block devices associated with filesystems may
* have new I/O requests posted for them even if
@ -331,7 +333,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
#endif
}
}
VI_UNLOCK(vp);
BO_UNLOCK(bo);
splx(s);
return (ffs_update(vp, wait));
}