diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c index 9bba037ec8f0..7ed4d656db2e 100644 --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -803,7 +803,10 @@ msdosfs_unmount(struct mount *mp, int mntflags, struct thread *td) #ifdef MSDOSFS_DEBUG { struct vnode *vp = pmp->pm_devvp; + struct bufobj *bo; + bo = &vp->v_bufobj; + BO_LOCK(bo); VI_LOCK(vp); vn_printf(vp, "msdosfs_umount(): just before calling VOP_CLOSE()\n"); @@ -815,6 +818,7 @@ msdosfs_unmount(struct mount *mp, int mntflags, struct thread *td) TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd), vp->v_bufobj.bo_numoutput, vp->v_type); VI_UNLOCK(vp); + BO_UNLOCK(bo); } #endif DROP_GIANT(); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 1f6c942d6f3e..cc04f3738767 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -1608,6 +1608,7 @@ vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno) int vfs_bio_awrite(struct buf *bp) { + struct bufobj *bo; int i; int j; daddr_t lblkno = bp->b_lblkno; @@ -1617,6 +1618,7 @@ vfs_bio_awrite(struct buf *bp) int size; int maxcl; + bo = &vp->v_bufobj; /* * right now we support clustered writing only to regular files. If * we find a clusterable block we could be in the middle of a cluster @@ -1629,7 +1631,7 @@ vfs_bio_awrite(struct buf *bp) size = vp->v_mount->mnt_stat.f_iosize; maxcl = MAXPHYS / size; - VI_LOCK(vp); + BO_LOCK(bo); for (i = 1; i < maxcl; i++) if (vfs_bio_clcheck(vp, size, lblkno + i, bp->b_blkno + ((i * size) >> DEV_BSHIFT)) == 0) @@ -1639,8 +1641,7 @@ vfs_bio_awrite(struct buf *bp) if (vfs_bio_clcheck(vp, size, lblkno - j, bp->b_blkno - ((j * size) >> DEV_BSHIFT)) == 0) break; - - VI_UNLOCK(vp); + BO_UNLOCK(bo); --j; ncl = i + j; /* @@ -2454,7 +2455,7 @@ getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo, lockflags |= LK_NOWAIT; error = BUF_TIMELOCK(bp, lockflags, - VI_MTX(vp), "getblk", slpflag, slptimeo); + BO_MTX(bo), "getblk", slpflag, slptimeo); /* * If we slept and got the lock we have to restart in case diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 7770bc43a01b..a74c27290340 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -94,12 +94,14 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) struct buf **bpp; { struct buf *bp, *rbp, *reqbp; + struct bufobj *bo; daddr_t blkno, origblkno; int maxra, racluster; int error, ncontig; int i; error = 0; + bo = &vp->v_bufobj; /* * Try to limit the amount of read-ahead by a few @@ -130,7 +132,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) return 0; } else { bp->b_flags &= ~B_RAM; - VI_LOCK(vp); + BO_LOCK(bo); for (i = 1; i < maxra; i++) { /* * Stop if the buffer does not exist or it @@ -153,7 +155,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) BUF_UNLOCK(rbp); } } - VI_UNLOCK(vp); + BO_UNLOCK(bo); if (i >= maxra) { return 0; } @@ -305,6 +307,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) int run; struct buf *fbp; { + struct bufobj *bo; struct buf *bp, *tbp; daddr_t bn; int i, inc, j; @@ -330,7 +333,6 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) tbp->b_flags |= B_ASYNC | B_RAM; tbp->b_iocmd = BIO_READ; } - tbp->b_blkno = blkno; if( (tbp->b_flags & B_MALLOC) || ((tbp->b_flags & B_VMIO) == 0) || (run <= 1) ) @@ -364,6 +366,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) bp->b_npages = 0; inc = btodb(size); + bo = &vp->v_bufobj; for (bn = blkno, i = 0; i < run; ++i, bn += inc) { if (i != 0) { if ((bp->b_npages * PAGE_SIZE) + @@ -384,15 +387,15 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) * VMIO backed. The clustering code can only deal * with VMIO-backed buffers. */ - VI_LOCK(vp); + BO_LOCK(bo); if ((tbp->b_vflags & BV_BKGRDINPROG) || (tbp->b_flags & B_CACHE) || (tbp->b_flags & B_VMIO) == 0) { - VI_UNLOCK(vp); + BO_UNLOCK(bo); bqrelse(tbp); break; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); /* * The buffer must be completely invalid in order to @@ -740,26 +743,28 @@ cluster_wbuild(vp, size, start_lbn, len) int len; { struct buf *bp, *tbp; + struct bufobj *bo; int i, j; int totalwritten = 0; int dbsize = btodb(size); + bo = &vp->v_bufobj; while (len > 0) { /* * If the buffer is not delayed-write (i.e. dirty), or it * is delayed-write but either locked or inval, it cannot * partake in the clustered write. */ - VI_LOCK(vp); + BO_LOCK(bo); if ((tbp = gbincore(&vp->v_bufobj, start_lbn)) == NULL || (tbp->b_vflags & BV_BKGRDINPROG)) { - VI_UNLOCK(vp); + BO_UNLOCK(bo); ++start_lbn; --len; continue; } if (BUF_LOCK(tbp, - LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, VI_MTX(vp))) { + LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, BO_MTX(bo))) { ++start_lbn; --len; continue; @@ -838,10 +843,10 @@ cluster_wbuild(vp, size, start_lbn, len) * If the adjacent data is not even in core it * can't need to be written. */ - VI_LOCK(vp); - if ((tbp = gbincore(&vp->v_bufobj, start_lbn)) == NULL || + BO_LOCK(bo); + if ((tbp = gbincore(bo, start_lbn)) == NULL || (tbp->b_vflags & BV_BKGRDINPROG)) { - VI_UNLOCK(vp); + BO_UNLOCK(bo); break; } @@ -854,7 +859,7 @@ cluster_wbuild(vp, size, start_lbn, len) */ if (BUF_LOCK(tbp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, - VI_MTX(vp))) + BO_MTX(bo))) break; if ((tbp->b_flags & (B_VMIO | B_CLUSTEROK | diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 542253038117..8b4170f9f021 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -405,12 +405,13 @@ vop_stdfsync(ap) int error = 0; int maxretry = 1000; /* large, arbitrarily chosen */ - VI_LOCK(vp); + bo = &vp->v_bufobj; + BO_LOCK(bo); loop1: /* * MARK/SCAN initialization to avoid infinite loops. */ - TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) { + TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { bp->b_vflags &= ~BV_SCANNED; bp->b_error = 0; } @@ -419,16 +420,16 @@ vop_stdfsync(ap) * Flush all dirty buffers associated with a vnode. */ loop2: - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if ((bp->b_vflags & BV_SCANNED) != 0) continue; bp->b_vflags |= BV_SCANNED; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) continue; - VI_UNLOCK(vp); - KASSERT(bp->b_bufobj == &vp->v_bufobj, + BO_UNLOCK(bo); + KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", - bp, bp->b_bufobj, &vp->v_bufobj)); + bp, bp->b_bufobj, bo)); if ((bp->b_flags & B_DELWRI) == 0) panic("fsync: not dirty"); if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) { @@ -437,7 +438,7 @@ vop_stdfsync(ap) bremfree(bp); bawrite(bp); } - VI_LOCK(vp); + BO_LOCK(bo); goto loop2; } @@ -448,7 +449,6 @@ vop_stdfsync(ap) * retry if dirty blocks still exist. */ if (ap->a_waitfor == MNT_WAIT) { - bo = &vp->v_bufobj; bufobj_wwait(bo, 0, 0); if (bo->bo_dirty.bv_cnt > 0) { /* @@ -464,7 +464,7 @@ vop_stdfsync(ap) error = EAGAIN; } } - VI_UNLOCK(vp); + BO_UNLOCK(bo); if (error == EAGAIN) vprint("fsync: giving up on dirty", vp); @@ -571,14 +571,11 @@ vfs_stdsync(mp, waitfor, td) MNT_ILOCK(mp); loop: MNT_VNODE_FOREACH(vp, mp, mvp) { - - VI_LOCK(vp); - if (vp->v_bufobj.bo_dirty.bv_cnt == 0) { - VI_UNLOCK(vp); + /* bv_cnt is an acceptable race here. */ + if (vp->v_bufobj.bo_dirty.bv_cnt == 0) continue; - } + VI_LOCK(vp); MNT_IUNLOCK(mp); - if ((error = vget(vp, lockreq, td)) != 0) { MNT_ILOCK(mp); if (error == ENOENT) { diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index a0df9e66b1db..0fcff5f2b9da 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -936,7 +936,7 @@ getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, */ bo = &vp->v_bufobj; bo->__bo_vnode = vp; - bo->bo_mtx = &vp->v_interlock; + mtx_init(BO_MTX(bo), "bufobj interlock", NULL, MTX_DEF); bo->bo_ops = &buf_ops_bio; bo->bo_private = vp; TAILQ_INIT(&bo->bo_clean.bv_hd); @@ -1236,8 +1236,8 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td, ASSERT_VOP_LOCKED(vp, "vtruncbuf"); restart: - VI_LOCK(vp); bo = &vp->v_bufobj; + BO_LOCK(bo); anyfreed = 1; for (;anyfreed;) { anyfreed = 0; @@ -1246,7 +1246,7 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td, continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - VI_MTX(vp)) == ENOLCK) + BO_MTX(bo)) == ENOLCK) goto restart; bremfree(bp); @@ -1261,7 +1261,7 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td, (nbp->b_flags & B_DELWRI))) { goto restart; } - VI_LOCK(vp); + BO_LOCK(bo); } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { @@ -1269,7 +1269,7 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td, continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - VI_MTX(vp)) == ENOLCK) + BO_MTX(bo)) == ENOLCK) goto restart; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); @@ -1282,7 +1282,7 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td, (nbp->b_flags & B_DELWRI) == 0)) { goto restart; } - VI_LOCK(vp); + BO_LOCK(bo); } } @@ -1305,13 +1305,13 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td, bremfree(bp); bawrite(bp); - VI_LOCK(vp); + BO_LOCK(bo); goto restartsync; } } bufobj_wwait(bo, 0, 0); - VI_UNLOCK(vp); + BO_UNLOCK(bo); vnode_pager_setsize(vp, length); return (0); @@ -1503,24 +1503,25 @@ gbincore(struct bufobj *bo, daddr_t lblkno) void bgetvp(struct vnode *vp, struct buf *bp) { + struct bufobj *bo; + bo = &vp->v_bufobj; + ASSERT_BO_LOCKED(bo); VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free")); CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags); VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp, ("bgetvp: bp already attached! %p", bp)); - ASSERT_VI_LOCKED(vp, "bgetvp"); - vholdl(vp); - if (VFS_NEEDSGIANT(vp->v_mount) || - vp->v_bufobj.bo_flag & BO_NEEDSGIANT) + vhold(vp); + if (VFS_NEEDSGIANT(vp->v_mount) || bo->bo_flag & BO_NEEDSGIANT) bp->b_flags |= B_NEEDSGIANT; bp->b_vp = vp; - bp->b_bufobj = &vp->v_bufobj; + bp->b_bufobj = bo; /* * Insert onto list for new vnode. */ - buf_vlist_add(bp, &vp->v_bufobj, BX_VNCLEAN); + buf_vlist_add(bp, bo, BX_VNCLEAN); } /* @@ -1557,7 +1558,8 @@ brelvp(struct buf *bp) bp->b_vp = NULL; bp->b_bufobj = NULL; waiters = bp->b_waiters; - vdropl(vp); + BO_UNLOCK(bo); + vdrop(vp); return (waiters); } @@ -1668,7 +1670,7 @@ sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) (void) VOP_FSYNC(vp, MNT_LAZY, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); - VI_LOCK(vp); + BO_LOCK(*bo); if (((*bo)->bo_flag & BO_ONWORKLST) != 0) { /* * Put us back on the worklist. The worklist @@ -1678,7 +1680,8 @@ sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) */ vn_syncer_add_to_worklist(*bo, syncdelay); } - vdropl(vp); + BO_UNLOCK(*bo); + vdrop(vp); VFS_UNLOCK_GIANT(vfslocked); mtx_lock(&sync_mtx); return (0); @@ -1886,7 +1889,7 @@ reassignbuf(struct buf *bp) /* * Delete from old vnode list, if on one. */ - VI_LOCK(vp); + BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else @@ -1937,7 +1940,7 @@ reassignbuf(struct buf *bp) KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); #endif - VI_UNLOCK(vp); + BO_UNLOCK(bo); } /* @@ -3127,6 +3130,7 @@ int vfs_allocate_syncvnode(struct mount *mp) { struct vnode *vp; + struct bufobj *bo; static long start, incr, next; int error; @@ -3155,14 +3159,14 @@ vfs_allocate_syncvnode(struct mount *mp) } next = start; } - VI_LOCK(vp); - vn_syncer_add_to_worklist(&vp->v_bufobj, - syncdelay > 0 ? next % syncdelay : 0); + bo = &vp->v_bufobj; + BO_LOCK(bo); + vn_syncer_add_to_worklist(bo, syncdelay > 0 ? next % syncdelay : 0); /* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */ mtx_lock(&sync_mtx); sync_vnode_count++; mtx_unlock(&sync_mtx); - VI_UNLOCK(vp); + BO_UNLOCK(bo); mp->mnt_syncer = vp; return (0); } @@ -3244,8 +3248,8 @@ sync_reclaim(struct vop_reclaim_args *ap) struct vnode *vp = ap->a_vp; struct bufobj *bo; - VI_LOCK(vp); bo = &vp->v_bufobj; + BO_LOCK(bo); vp->v_mount->mnt_syncer = NULL; if (bo->bo_flag & BO_ONWORKLST) { mtx_lock(&sync_mtx); @@ -3255,7 +3259,7 @@ sync_reclaim(struct vop_reclaim_args *ap) mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); return (0); } diff --git a/sys/nfs4client/nfs4_vfsops.c b/sys/nfs4client/nfs4_vfsops.c index e587d53b3e12..6de47a9a156c 100644 --- a/sys/nfs4client/nfs4_vfsops.c +++ b/sys/nfs4client/nfs4_vfsops.c @@ -749,6 +749,7 @@ nfs4_sync(struct mount *mp, int waitfor, struct thread *td) MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); MNT_IUNLOCK(mp); + /* XXX racy bv_cnt check. */ if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY) { VI_UNLOCK(vp); diff --git a/sys/nfs4client/nfs4_vnops.c b/sys/nfs4client/nfs4_vnops.c index 7a6db6628ece..0d6ad9ae5c4c 100644 --- a/sys/nfs4client/nfs4_vnops.c +++ b/sys/nfs4client/nfs4_vnops.c @@ -2486,11 +2486,12 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, int commit) { struct nfsnode *np = VTONFS(vp); + struct bufobj *bo; struct buf *bp; int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); - int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; + int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; @@ -2500,6 +2501,7 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; + bo = &vp->v_bufobj; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; @@ -2517,15 +2519,14 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, endoff = 0; bvecpos = 0; if (NFS_ISV3(vp) && commit) { - s = splbio(); if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; - VI_LOCK(vp); - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + BO_LOCK(bo); + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) @@ -2542,11 +2543,11 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, * Release the vnode interlock to avoid a lock * order reversal. */ - VI_UNLOCK(vp); + BO_UNLOCK(bo); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); - VI_LOCK(vp); + BO_LOCK(bo); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; @@ -2556,7 +2557,7 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bvecpos >= bvecsize) break; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { @@ -2569,7 +2570,7 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, nbp = TAILQ_NEXT(bp, b_bobufs); continue; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); bremfree(bp); /* * Work out if all buffers are using the same cred @@ -2588,7 +2589,7 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, wcred = NOCRED; vfs_busy_pages(bp, 1); - VI_LOCK(vp); + BO_LOCK(bo); /* * bp is protected by being locked, but nbp is not * and vfs_busy_pages() may sleep. We have to @@ -2612,8 +2613,7 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, if (toff > endoff) endoff = toff; } - splx(s); - VI_UNLOCK(vp); + BO_UNLOCK(bo); } if (bvecpos > 0) { /* @@ -2665,14 +2665,12 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, * specific. We should probably move that * into bundirty(). XXX */ - s = splbio(); - bufobj_wref(&vp->v_bufobj); + bufobj_wref(bo); bp->b_flags |= B_ASYNC; bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_dirtyoff = bp->b_dirtyend = 0; - splx(s); bufdone(bp); } } @@ -2682,17 +2680,15 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, * Start/do any write(s) that are required. */ loop: - s = splbio(); - VI_LOCK(vp); - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + BO_LOCK(bo); + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (waitfor != MNT_WAIT || passone) continue; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - VI_MTX(vp), "nfsfsync", slpflag, slptimeo); - splx(s); + BO_MTX(bo), "nfsfsync", slpflag, slptimeo); if (error == 0) panic("nfs4_fsync: inconsistent lock"); if (error == ENOLCK) @@ -2713,27 +2709,25 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, BUF_UNLOCK(bp); continue; } - VI_UNLOCK(vp); + BO_LOCK(bo); bremfree(bp); if (passone || !commit) bp->b_flags |= B_ASYNC; else bp->b_flags |= B_ASYNC; - splx(s); bwrite(bp); goto loop; } - splx(s); if (passone) { passone = 0; - VI_UNLOCK(vp); + BO_UNLOCK(bo); goto again; } if (waitfor == MNT_WAIT) { - while (vp->v_bufobj.bo_numoutput) { - error = bufobj_wwait(&vp->v_bufobj, slpflag, slptimeo); + while (bo->bo_numoutput) { + error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { - VI_UNLOCK(vp); + BO_UNLOCK(bo); if (nfs4_sigintr(nmp, NULL, td)) { error = EINTR; goto done; @@ -2742,15 +2736,15 @@ nfs4_flush(struct vnode *vp, int waitfor, struct thread *td, slpflag = 0; slptimeo = 2 * hz; } - VI_LOCK(vp); + BO_LOCK(bo); } } if (vp->v_bufobj.bo_dirty.bv_cnt > 0 && commit) { - VI_UNLOCK(vp); + BO_UNLOCK(bo); goto loop; } } - VI_UNLOCK(vp); + BO_UNLOCK(bo); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c index 3bd498006111..6b5c63f0302c 100644 --- a/sys/nfsclient/nfs_subs.c +++ b/sys/nfsclient/nfs_subs.c @@ -915,28 +915,31 @@ nfs_clearcommit(struct mount *mp) { struct vnode *vp, *nvp; struct buf *bp, *nbp; - int s; + struct bufobj *bo; - s = splbio(); MNT_ILOCK(mp); MNT_VNODE_FOREACH(vp, mp, nvp) { + bo = &vp->v_bufobj; VI_LOCK(vp); if (vp->v_iflag & VI_DOOMED) { VI_UNLOCK(vp); continue; } + vholdl(vp); + VI_UNLOCK(vp); MNT_IUNLOCK(mp); - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + BO_LOCK(bo); + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); } - VI_UNLOCK(vp); + BO_UNLOCK(bo); + vdrop(vp); MNT_ILOCK(mp); } MNT_IUNLOCK(mp); - splx(s); } /* diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c index 57cb8c823d44..7185bb5f4237 100644 --- a/sys/nfsclient/nfs_vfsops.c +++ b/sys/nfsclient/nfs_vfsops.c @@ -1074,6 +1074,7 @@ nfs_sync(struct mount *mp, int waitfor, struct thread *td) MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); MNT_IUNLOCK(mp); + /* XXX Racy bv_cnt check. */ if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY) { VI_UNLOCK(vp); diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c index 77713b175a3d..2b90abdecc72 100644 --- a/sys/nfsclient/nfs_vnops.c +++ b/sys/nfsclient/nfs_vnops.c @@ -2736,11 +2736,12 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); - int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; + int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; + struct bufobj *bo; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif @@ -2751,6 +2752,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, slpflag = PCATCH; if (!commit) passone = 0; + bo = &vp->v_bufobj; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but has not been committed to stable storage on the server @@ -2763,15 +2765,14 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, endoff = 0; bvecpos = 0; if (NFS_ISV3(vp) && commit) { - s = splbio(); if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; - VI_LOCK(vp); - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + BO_LOCK(bo); + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) @@ -2788,11 +2789,11 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, * Release the vnode interlock to avoid a lock * order reversal. */ - VI_UNLOCK(vp); + BO_UNLOCK(bo); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); - VI_LOCK(vp); + BO_LOCK(bo); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; @@ -2802,7 +2803,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bvecpos >= bvecsize) break; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { @@ -2815,7 +2816,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, nbp = TAILQ_NEXT(bp, b_bobufs); continue; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); bremfree(bp); /* * Work out if all buffers are using the same cred @@ -2834,7 +2835,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, wcred = NOCRED; vfs_busy_pages(bp, 1); - VI_LOCK(vp); + BO_LOCK(bo); /* * bp is protected by being locked, but nbp is not * and vfs_busy_pages() may sleep. We have to @@ -2858,8 +2859,7 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, if (toff > endoff) endoff = toff; } - splx(s); - VI_UNLOCK(vp); + BO_UNLOCK(bo); } if (bvecpos > 0) { /* @@ -2911,14 +2911,12 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, * specific. We should probably move that * into bundirty(). XXX */ - s = splbio(); - bufobj_wref(&vp->v_bufobj); + bufobj_wref(bo); bp->b_flags |= B_ASYNC; bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_dirtyoff = bp->b_dirtyend = 0; - splx(s); bufdone(bp); } } @@ -2928,17 +2926,15 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, * Start/do any write(s) that are required. */ loop: - s = splbio(); - VI_LOCK(vp); - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + BO_LOCK(bo); + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (waitfor != MNT_WAIT || passone) continue; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - VI_MTX(vp), "nfsfsync", slpflag, slptimeo); - splx(s); + BO_MTX(bo), "nfsfsync", slpflag, slptimeo); if (error == 0) { BUF_UNLOCK(bp); goto loop; @@ -2961,13 +2957,12 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, BUF_UNLOCK(bp); continue; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); bremfree(bp); if (passone || !commit) bp->b_flags |= B_ASYNC; else bp->b_flags |= B_ASYNC; - splx(s); bwrite(bp); if (nfs_sigintr(nmp, NULL, td)) { error = EINTR; @@ -2975,17 +2970,16 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, } goto loop; } - splx(s); if (passone) { passone = 0; - VI_UNLOCK(vp); + BO_UNLOCK(bo); goto again; } if (waitfor == MNT_WAIT) { - while (vp->v_bufobj.bo_numoutput) { - error = bufobj_wwait(&vp->v_bufobj, slpflag, slptimeo); + while (bo->bo_numoutput) { + error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { - VI_UNLOCK(vp); + BO_UNLOCK(bo); error = nfs_sigintr(nmp, NULL, td); if (error) goto done; @@ -2993,17 +2987,17 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, slpflag = 0; slptimeo = 2 * hz; } - VI_LOCK(vp); + BO_LOCK(bo); } } - if (vp->v_bufobj.bo_dirty.bv_cnt != 0 && commit) { - VI_UNLOCK(vp); + if (bo->bo_dirty.bv_cnt != 0 && commit) { + BO_UNLOCK(bo); goto loop; } /* * Wait for all the async IO requests to drain */ - VI_UNLOCK(vp); + BO_UNLOCK(bo); mtx_lock(&np->n_mtx); while (np->n_directio_asyncwr > 0) { np->n_flag |= NFSYNCWAIT; @@ -3020,14 +3014,14 @@ nfs_flush(struct vnode *vp, int waitfor, struct thread *td, } mtx_unlock(&np->n_mtx); } else - VI_UNLOCK(vp); + BO_UNLOCK(bo); mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } - if (commit && vp->v_bufobj.bo_dirty.bv_cnt == 0 && - vp->v_bufobj.bo_numoutput == 0 && np->n_directio_asyncwr == 0) + if (commit && bo->bo_dirty.bv_cnt == 0 && + bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; mtx_unlock(&np->n_mtx); done: diff --git a/sys/nfsserver/nfs_serv.c b/sys/nfsserver/nfs_serv.c index 8f5b0d5a15de..67a0a7f8eb8a 100644 --- a/sys/nfsserver/nfs_serv.c +++ b/sys/nfsserver/nfs_serv.c @@ -3847,7 +3847,7 @@ nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, */ int iosize = vp->v_mount->mnt_stat.f_iosize; int iomask = iosize - 1; - int s; + struct bufobj *bo; daddr_t lblkno; /* @@ -3870,8 +3870,8 @@ nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, VM_OBJECT_UNLOCK(vp->v_object); } - s = splbio(); - VI_LOCK(vp); + bo = &vp->v_bufobj; + BO_LOCK(bo); while (cnt > 0) { struct buf *bp; @@ -3887,8 +3887,8 @@ nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, */ if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | - LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) { - VI_LOCK(vp); + LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) { + BO_LOCK(bo); continue; /* retry */ } if ((bp->b_flags & (B_DELWRI|B_INVAL)) == @@ -3899,7 +3899,7 @@ nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, ++nfs_commit_miss; } else BUF_UNLOCK(bp); - VI_LOCK(vp); + BO_LOCK(bo); } ++nfs_commit_blks; if (cnt < iosize) @@ -3907,8 +3907,7 @@ nfsrv_commit(struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, cnt -= iosize; ++lblkno; } - VI_UNLOCK(vp); - splx(s); + BO_UNLOCK(bo); } aft_ret = VOP_GETATTR(vp, &aft, cred, td); diff --git a/sys/sys/buf.h b/sys/sys/buf.h index 5cb56565ca6a..20393d41e3fc 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -88,7 +88,7 @@ typedef unsigned char b_xflags_t; * completes, b_resid is usually 0 indicating 100% success. * * All fields are protected by the buffer lock except those marked: - * V - Protected by owning vnode lock + * V - Protected by owning bufobj lock * Q - Protected by the buf queue lock * D - Protected by an dependency implementation specific lock */ diff --git a/sys/sys/bufobj.h b/sys/sys/bufobj.h index 9ad572ab27bd..e491ce140936 100644 --- a/sys/sys/bufobj.h +++ b/sys/sys/bufobj.h @@ -52,6 +52,8 @@ #if defined(_KERNEL) || defined(_KVM_VNODE) #include +#include +#include struct bufobj; struct buf_ops; @@ -87,7 +89,7 @@ struct buf_ops { #define BO_BDFLUSH(bo, bp) ((bo)->bo_ops->bop_bdflush((bo), (bp))) struct bufobj { - struct mtx *bo_mtx; /* Mutex which protects "i" things */ + struct mtx bo_mtx; /* Mutex which protects "i" things */ struct bufv bo_clean; /* i Clean buffers */ struct bufv bo_dirty; /* i Dirty buffers */ long bo_numoutput; /* i Writes in progress */ @@ -112,21 +114,11 @@ struct bufobj { #define BO_WWAIT (1 << 1) /* Wait for output to complete */ #define BO_NEEDSGIANT (1 << 2) /* Require giant for child buffers. */ -#define BO_LOCK(bo) \ - do { \ - KASSERT((bo)->bo_mtx != NULL, ("No lock in bufobj")); \ - mtx_lock((bo)->bo_mtx); \ - } while (0) - -#define BO_UNLOCK(bo) \ - do { \ - KASSERT((bo)->bo_mtx != NULL, ("No lock in bufobj")); \ - mtx_unlock((bo)->bo_mtx); \ - } while (0) - -#define BO_MTX(bo) ((bo)->bo_mtx) -#define ASSERT_BO_LOCKED(bo) mtx_assert(bo->bo_mtx, MA_OWNED) -#define ASSERT_BO_UNLOCKED(bo) mtx_assert(bo->bo_mtx, MA_NOTOWNED) +#define BO_MTX(bo) (&(bo)->bo_mtx) +#define BO_LOCK(bo) mtx_lock(BO_MTX((bo))) +#define BO_UNLOCK(bo) mtx_unlock(BO_MTX((bo))) +#define ASSERT_BO_LOCKED(bo) mtx_assert(BO_MTX((bo)), MA_OWNED) +#define ASSERT_BO_UNLOCKED(bo) mtx_assert(BO_MTX((bo)), MA_NOTOWNED) void bufobj_wdrop(struct bufobj *bo); void bufobj_wref(struct bufobj *bo); diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index a6929f05c6ba..0a9bc9df6f1e 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -147,6 +147,7 @@ ffs_truncate(vp, length, flags, cred, td) ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR]; ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; ufs2_daddr_t count, blocksreleased = 0, datablocks; + struct bufobj *bo; struct fs *fs; struct buf *bp; struct ufsmount *ump; @@ -158,6 +159,7 @@ ffs_truncate(vp, length, flags, cred, td) ip = VTOI(vp); fs = ip->i_fs; ump = ip->i_ump; + bo = &vp->v_bufobj; ASSERT_VOP_LOCKED(vp, "ffs_truncate"); @@ -486,13 +488,12 @@ ffs_truncate(vp, length, flags, cred, td) for (i = 0; i < NDADDR; i++) if (newblks[i] != DIP(ip, i_db[i])) panic("ffs_truncate2"); - VI_LOCK(vp); + BO_LOCK(bo); if (length == 0 && (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) && - (vp->v_bufobj.bo_dirty.bv_cnt > 0 || - vp->v_bufobj.bo_clean.bv_cnt > 0)) + (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("ffs_truncate3"); - VI_UNLOCK(vp); + BO_UNLOCK(bo); #endif /* INVARIANTS */ /* * Put back the real size. diff --git a/sys/ufs/ffs/ffs_rawread.c b/sys/ufs/ffs/ffs_rawread.c index f10e432ae9f8..434c833f7c3e 100644 --- a/sys/ufs/ffs/ffs_rawread.c +++ b/sys/ufs/ffs/ffs_rawread.c @@ -97,21 +97,20 @@ ffs_rawread_setup(void) static int ffs_rawread_sync(struct vnode *vp) { - int spl; int error; int upgraded; struct bufobj *bo; struct mount *mp; /* Check for dirty mmap, pending writes and dirty buffers */ - spl = splbio(); - VI_LOCK(vp); bo = &vp->v_bufobj; + BO_LOCK(bo); + VI_LOCK(vp); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0 || (vp->v_iflag & VI_OBJDIRTY) != 0) { - splx(spl); VI_UNLOCK(vp); + BO_UNLOCK(bo); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) @@ -146,16 +145,15 @@ ffs_rawread_sync(struct vnode *vp) vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC); VM_OBJECT_UNLOCK(vp->v_object); } - VI_LOCK(vp); - } + } else + VI_UNLOCK(vp); /* Wait for pending writes to complete */ - spl = splbio(); + BO_LOCK(bo); error = bufobj_wwait(&vp->v_bufobj, 0, 0); if (error != 0) { /* XXX: can't happen with a zero timeout ??? */ - splx(spl); - VI_UNLOCK(vp); + BO_UNLOCK(bo); if (upgraded != 0) VOP_LOCK(vp, LK_DOWNGRADE); vn_finished_write(mp); @@ -163,27 +161,24 @@ ffs_rawread_sync(struct vnode *vp) } /* Flush dirty buffers */ if (bo->bo_dirty.bv_cnt > 0) { - splx(spl); - VI_UNLOCK(vp); + BO_UNLOCK(bo); if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) { if (upgraded != 0) VOP_LOCK(vp, LK_DOWNGRADE); vn_finished_write(mp); return (error); } - VI_LOCK(vp); - spl = splbio(); + BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) panic("ffs_rawread_sync: dirty bufs"); } - splx(spl); - VI_UNLOCK(vp); + BO_UNLOCK(bo); if (upgraded != 0) VOP_LOCK(vp, LK_DOWNGRADE); vn_finished_write(mp); } else { - splx(spl); VI_UNLOCK(vp); + BO_UNLOCK(bo); } return 0; } diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 41a6df88e2c2..7cb7b40089b6 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -357,26 +357,16 @@ softdep_check_suspend(struct mount *mp, (void) softdep_deps, (void) softdep_accdeps; - ASSERT_VI_LOCKED(devvp, "softdep_check_suspend"); bo = &devvp->v_bufobj; + ASSERT_BO_LOCKED(bo); - for (;;) { - if (!MNT_ITRYLOCK(mp)) { - VI_UNLOCK(devvp); - MNT_ILOCK(mp); - MNT_IUNLOCK(mp); - VI_LOCK(devvp); - continue; - } - if (mp->mnt_secondary_writes != 0) { - VI_UNLOCK(devvp); - msleep(&mp->mnt_secondary_writes, - MNT_MTX(mp), - (PUSER - 1) | PDROP, "secwr", 0); - VI_LOCK(devvp); - continue; - } - break; + MNT_ILOCK(mp); + while (mp->mnt_secondary_writes != 0) { + BO_UNLOCK(bo); + msleep(&mp->mnt_secondary_writes, MNT_MTX(mp), + (PUSER - 1) | PDROP, "secwr", 0); + BO_LOCK(bo); + MNT_ILOCK(mp); } /* @@ -391,7 +381,7 @@ softdep_check_suspend(struct mount *mp, mp->mnt_secondary_writes != 0 || secondary_accwrites != mp->mnt_secondary_accwrites) error = EAGAIN; - VI_UNLOCK(devvp); + BO_UNLOCK(bo); return (error); } @@ -2189,6 +2179,7 @@ softdep_setup_freeblocks(ip, length, flags) struct freeblks *freeblks; struct inodedep *inodedep; struct allocdirect *adp; + struct bufobj *bo; struct vnode *vp; struct buf *bp; struct fs *fs; @@ -2314,27 +2305,28 @@ softdep_setup_freeblocks(ip, length, flags) * any dependencies. */ vp = ITOV(ip); - VI_LOCK(vp); + bo = &vp->v_bufobj; + BO_LOCK(bo); drain_output(vp); restart: - TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) { + TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) { if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) || ((flags & IO_NORMAL) == 0 && (bp->b_xflags & BX_ALTDATA) == 0)) continue; - if ((bp = getdirtybuf(bp, VI_MTX(vp), MNT_WAIT)) == NULL) + if ((bp = getdirtybuf(bp, BO_MTX(bo), MNT_WAIT)) == NULL) goto restart; - VI_UNLOCK(vp); + BO_UNLOCK(bo); ACQUIRE_LOCK(&lk); (void) inodedep_lookup(mp, ip->i_number, 0, &inodedep); deallocate_dependencies(bp, inodedep); FREE_LOCK(&lk); bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); - VI_LOCK(vp); + BO_LOCK(bo); goto restart; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); ACQUIRE_LOCK(&lk); if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) (void) free_inodedep(inodedep); @@ -5159,13 +5151,15 @@ softdep_fsync_mountdev(vp) { struct buf *bp, *nbp; struct worklist *wk; + struct bufobj *bo; if (!vn_isdisk(vp, NULL)) panic("softdep_fsync_mountdev: vnode not a disk"); + bo = &vp->v_bufobj; restart: + BO_LOCK(bo); ACQUIRE_LOCK(&lk); - VI_LOCK(vp); - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { /* * If it is already scheduled, skip to the next buffer. */ @@ -5184,15 +5178,15 @@ softdep_fsync_mountdev(vp) BUF_UNLOCK(bp); continue; } - VI_UNLOCK(vp); FREE_LOCK(&lk); + BO_UNLOCK(bo); bremfree(bp); (void) bawrite(bp); goto restart; } FREE_LOCK(&lk); drain_output(vp); - VI_UNLOCK(vp); + BO_UNLOCK(bo); } /* @@ -5209,6 +5203,7 @@ softdep_sync_metadata(struct vnode *vp) struct allocindir *aip; struct buf *bp, *nbp; struct worklist *wk; + struct bufobj *bo; int i, error, waitfor; if (!DOINGSOFTDEP(vp)) @@ -5240,20 +5235,21 @@ softdep_sync_metadata(struct vnode *vp) * resolved. Thus the second pass is expected to end quickly. */ waitfor = MNT_NOWAIT; + bo = &vp->v_bufobj; top: /* * We must wait for any I/O in progress to finish so that * all potential buffers on the dirty list will be visible. */ - VI_LOCK(vp); + BO_LOCK(bo); drain_output(vp); - while ((bp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd)) != NULL) { - bp = getdirtybuf(bp, VI_MTX(vp), MNT_WAIT); + while ((bp = TAILQ_FIRST(&bo->bo_dirty.bv_hd)) != NULL) { + bp = getdirtybuf(bp, BO_MTX(bo), MNT_WAIT); if (bp) break; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); if (bp == NULL) return (0); loop: @@ -5405,13 +5401,13 @@ softdep_sync_metadata(struct vnode *vp) return (error); } FREE_LOCK(&lk); - VI_LOCK(vp); + BO_LOCK(bo); while ((nbp = TAILQ_NEXT(bp, b_bobufs)) != NULL) { - nbp = getdirtybuf(nbp, VI_MTX(vp), MNT_WAIT); + nbp = getdirtybuf(nbp, BO_MTX(bo), MNT_WAIT); if (nbp) break; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); BUF_NOREC(bp); bawrite(bp); if (nbp != NULL) { @@ -5435,9 +5431,9 @@ softdep_sync_metadata(struct vnode *vp) * We must wait for any I/O in progress to finish so that * all potential buffers on the dirty list will be visible. */ - VI_LOCK(vp); + BO_LOCK(bo); drain_output(vp); - VI_UNLOCK(vp); + BO_UNLOCK(bo); return (0); } @@ -5544,6 +5540,7 @@ flush_pagedep_deps(pvp, mp, diraddhdp) struct ufsmount *ump; struct diradd *dap; struct vnode *vp; + struct bufobj *bo; int error = 0; struct buf *bp; ino_t inum; @@ -5590,7 +5587,8 @@ flush_pagedep_deps(pvp, mp, diraddhdp) vput(vp); break; } - VI_LOCK(vp); + bo = &vp->v_bufobj; + BO_LOCK(bo); drain_output(vp); /* * If first block is still dirty with a D_MKDIR @@ -5598,15 +5596,15 @@ flush_pagedep_deps(pvp, mp, diraddhdp) */ for (;;) { error = 0; - bp = gbincore(&vp->v_bufobj, 0); + bp = gbincore(bo, 0); if (bp == NULL) break; /* First block not present */ error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - VI_MTX(vp)); - VI_LOCK(vp); + BO_MTX(bo)); + BO_LOCK(bo); if (error == ENOLCK) continue; /* Slept, retry */ if (error != 0) @@ -5628,14 +5626,14 @@ flush_pagedep_deps(pvp, mp, diraddhdp) * must write buffer to stable * storage. */ - VI_UNLOCK(vp); + BO_UNLOCK(bo); bremfree(bp); error = bwrite(bp); - VI_LOCK(vp); + BO_LOCK(bo); } break; } - VI_UNLOCK(vp); + BO_UNLOCK(bo); vput(vp); if (error != 0) break; /* Flushing of first block failed */ @@ -5904,6 +5902,7 @@ clear_remove(td) static int next = 0; struct mount *mp; struct vnode *vp; + struct bufobj *bo; int error, cnt; ino_t ino; @@ -5929,9 +5928,10 @@ clear_remove(td) } if ((error = ffs_syncvnode(vp, MNT_NOWAIT))) softdep_error("clear_remove: fsync", error); - VI_LOCK(vp); + bo = &vp->v_bufobj; + BO_LOCK(bo); drain_output(vp); - VI_UNLOCK(vp); + BO_UNLOCK(bo); vput(vp); vn_finished_write(mp); ACQUIRE_LOCK(&lk); @@ -6004,9 +6004,9 @@ clear_inodedeps(td) } else { if ((error = ffs_syncvnode(vp, MNT_NOWAIT))) softdep_error("clear_inodedeps: fsync2", error); - VI_LOCK(vp); + BO_LOCK(&vp->v_bufobj); drain_output(vp); - VI_UNLOCK(vp); + BO_UNLOCK(&vp->v_bufobj); } vput(vp); vn_finished_write(mp); @@ -6154,7 +6154,7 @@ getdirtybuf(bp, mtx, waitfor) */ #ifdef DEBUG_VFS_LOCKS if (bp->b_vp->v_type != VCHR) - ASSERT_VI_LOCKED(bp->b_vp, "getdirtybuf"); + ASSERT_BO_LOCKED(bp->b_bufobj); #endif bp->b_vflags |= BV_BKGRDWAIT; msleep(&bp->b_xflags, mtx, PRIBIO, "getbuf", 0); @@ -6187,33 +6187,26 @@ softdep_check_suspend(struct mount *mp, struct ufsmount *ump; int error; - ASSERT_VI_LOCKED(devvp, "softdep_check_suspend"); ump = VFSTOUFS(mp); bo = &devvp->v_bufobj; + ASSERT_BO_LOCKED(bo); for (;;) { if (!TRY_ACQUIRE_LOCK(&lk)) { - VI_UNLOCK(devvp); + BO_UNLOCK(bo); ACQUIRE_LOCK(&lk); FREE_LOCK(&lk); - VI_LOCK(devvp); - continue; - } - if (!MNT_ITRYLOCK(mp)) { - FREE_LOCK(&lk); - VI_UNLOCK(devvp); - MNT_ILOCK(mp); - MNT_IUNLOCK(mp); - VI_LOCK(devvp); + BO_LOCK(bo); continue; } + MNT_ILOCK(mp); if (mp->mnt_secondary_writes != 0) { FREE_LOCK(&lk); - VI_UNLOCK(devvp); + BO_UNLOCK(bo); msleep(&mp->mnt_secondary_writes, MNT_MTX(mp), (PUSER - 1) | PDROP, "secwr", 0); - VI_LOCK(devvp); + BO_LOCK(bo); continue; } break; @@ -6236,7 +6229,7 @@ softdep_check_suspend(struct mount *mp, secondary_accwrites != mp->mnt_secondary_accwrites) error = EAGAIN; FREE_LOCK(&lk); - VI_UNLOCK(devvp); + BO_UNLOCK(bo); return (error); } @@ -6270,13 +6263,16 @@ static void drain_output(vp) struct vnode *vp; { - ASSERT_VOP_LOCKED(vp, "drain_output"); - ASSERT_VI_LOCKED(vp, "drain_output"); + struct bufobj *bo; - while (vp->v_bufobj.bo_numoutput) { - vp->v_bufobj.bo_flag |= BO_WWAIT; - msleep((caddr_t)&vp->v_bufobj.bo_numoutput, - VI_MTX(vp), PRIBIO + 1, "drainvp", 0); + bo = &vp->v_bufobj; + ASSERT_VOP_LOCKED(vp, "drain_output"); + ASSERT_BO_LOCKED(bo); + + while (bo->bo_numoutput) { + bo->bo_flag |= BO_WWAIT; + msleep((caddr_t)&bo->bo_numoutput, + BO_MTX(bo), PRIBIO + 1, "drainvp", 0); } } diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 6a1dce31db29..cc13410445c8 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1267,11 +1267,12 @@ ffs_sync(mp, waitfor, td) qsync(mp); #endif devvp = ump->um_devvp; - VI_LOCK(devvp); bo = &devvp->v_bufobj; + BO_LOCK(bo); if (waitfor != MNT_LAZY && (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)) { - vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK); + BO_UNLOCK(bo); + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0) allerror = error; VOP_UNLOCK(devvp, 0); @@ -1292,7 +1293,7 @@ ffs_sync(mp, waitfor, td) MNT_IUNLOCK(mp); suspended = 1; } else - VI_UNLOCK(devvp); + BO_UNLOCK(bo); /* * Write back modified superblock. */ diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 796866205264..dca262c14b07 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -195,6 +195,7 @@ int ffs_syncvnode(struct vnode *vp, int waitfor) { struct inode *ip = VTOI(vp); + struct bufobj *bo; struct buf *bp; struct buf *nbp; int s, error, wait, passes, skipmeta; @@ -202,6 +203,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor) wait = (waitfor == MNT_WAIT); lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); + bo = &vp->v_bufobj; /* * Flush all dirty buffers associated with a vnode. @@ -211,11 +213,11 @@ ffs_syncvnode(struct vnode *vp, int waitfor) if (wait) skipmeta = 1; s = splbio(); - VI_LOCK(vp); + BO_LOCK(bo); loop: - TAILQ_FOREACH(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs) + TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) bp->b_vflags &= ~BV_SCANNED; - TAILQ_FOREACH_SAFE(bp, &vp->v_bufobj.bo_dirty.bv_hd, b_bobufs, nbp) { + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { /* * Reasons to skip this buffer: it has already been considered * on this pass, this pass is the first time through on a @@ -231,13 +233,13 @@ ffs_syncvnode(struct vnode *vp, int waitfor) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) continue; - VI_UNLOCK(vp); + BO_UNLOCK(bo); if (!wait && !LIST_EMPTY(&bp->b_dep) && (bp->b_flags & B_DEFERRED) == 0 && buf_countdeps(bp, 0)) { bp->b_flags |= B_DEFERRED; BUF_UNLOCK(bp); - VI_LOCK(vp); + BO_LOCK(bo); continue; } if ((bp->b_flags & B_DELWRI) == 0) @@ -286,8 +288,8 @@ ffs_syncvnode(struct vnode *vp, int waitfor) * Since we may have slept during the I/O, we need * to start from a known point. */ - VI_LOCK(vp); - nbp = TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd); + BO_LOCK(bo); + nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd); } /* * If we were asked to do this synchronously, then go back for @@ -299,8 +301,8 @@ ffs_syncvnode(struct vnode *vp, int waitfor) } if (wait) { - bufobj_wwait(&vp->v_bufobj, 3, 0); - VI_UNLOCK(vp); + bufobj_wwait(bo, 3, 0); + BO_UNLOCK(bo); /* * Ensure that any filesystem metatdata associated @@ -311,8 +313,8 @@ ffs_syncvnode(struct vnode *vp, int waitfor) return (error); s = splbio(); - VI_LOCK(vp); - if (vp->v_bufobj.bo_dirty.bv_cnt > 0) { + BO_LOCK(bo); + if (bo->bo_dirty.bv_cnt > 0) { /* * Block devices associated with filesystems may * have new I/O requests posted for them even if @@ -331,7 +333,7 @@ ffs_syncvnode(struct vnode *vp, int waitfor) #endif } } - VI_UNLOCK(vp); + BO_UNLOCK(bo); splx(s); return (ffs_update(vp, wait)); }