- Cleanup unlocked accesses to buf flags by introducing a new b_vflag member

that is protected by the vnode lock.
 - Move B_SCANNED into b_vflags and call it BV_SCANNED.
 - Create a vop_stdfsync() modeled after spec's sync.
 - Replace spec_fsync, msdos_fsync, and hpfs_fsync with the stdfsync and some
   fs specific processing.  This gives all of these filesystems proper
   behavior wrt MNT_WAIT/NOWAIT and the use of the B_SCANNED flag.
 - Annotate the locking in buf.h
This commit is contained in:
Jeff Roberson 2003-02-09 11:28:35 +00:00
parent 15553af710
commit 767b9a529d
8 changed files with 125 additions and 171 deletions

View File

@ -87,48 +87,15 @@ hpfs_fsync(ap)
struct thread *a_td;
} */ *ap;
{
struct vnode *vp = ap->a_vp;
int s;
struct buf *bp, *nbp;
/*
* Flush all dirty buffers associated with a vnode.
* Flush our dirty buffers.
*/
loop:
VI_LOCK(vp);
s = splbio();
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = TAILQ_NEXT(bp, b_vnbufs);
VI_UNLOCK(vp);
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
VI_LOCK(vp);
continue;
}
if ((bp->b_flags & B_DELWRI) == 0)
panic("hpfs_fsync: not dirty");
bremfree(bp);
splx(s);
(void) bwrite(bp);
goto loop;
}
while (vp->v_numoutput) {
vp->v_iflag |= VI_BWAIT;
msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), PRIBIO + 1,
"hpfsn", 0);
}
#ifdef DIAGNOSTIC
if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
vprint("hpfs_fsync: dirty", vp);
goto loop;
}
#endif
VI_UNLOCK(vp);
splx(s);
vop_stdfsync(ap);
/*
* Write out the on-disc version of the vnode.
*/
return hpfs_update(VTOHP(vp));
return hpfs_update(VTOHP(ap->a_vp));
}
static int

View File

@ -806,45 +806,12 @@ msdosfs_fsync(ap)
struct thread *a_td;
} */ *ap;
{
struct vnode *vp = ap->a_vp;
int s;
struct buf *bp, *nbp;
/*
* Flush all dirty buffers associated with a vnode.
* Flush our dirty buffers.
*/
loop:
s = splbio();
VI_LOCK(vp);
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = TAILQ_NEXT(bp, b_vnbufs);
VI_UNLOCK(vp);
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
VI_LOCK(vp);
continue;
}
if ((bp->b_flags & B_DELWRI) == 0)
panic("msdosfs_fsync: not dirty");
bremfree(bp);
splx(s);
/* XXX Could do bawrite */
(void) bwrite(bp);
goto loop;
}
while (vp->v_numoutput) {
vp->v_iflag |= VI_BWAIT;
(void) msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp),
PRIBIO + 1, "msdosfsn", 0);
}
#ifdef DIAGNOSTIC
if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
vprint("msdosfs_fsync: dirty", vp);
goto loop;
}
#endif
VI_UNLOCK(vp);
splx(s);
return (deupdat(VTODE(vp), ap->a_waitfor == MNT_WAIT));
vop_stdfsync(ap);
return (deupdat(VTODE(ap->a_vp), ap->a_waitfor == MNT_WAIT));
}
static int

View File

@ -416,89 +416,10 @@ spec_fsync(ap)
struct thread *a_td;
} */ *ap;
{
struct vnode *vp = ap->a_vp;
struct buf *bp;
struct buf *nbp;
int s, error = 0;
int maxretry = 100; /* large, arbitrarily chosen */
if (!vn_isdisk(vp, NULL))
if (!vn_isdisk(ap->a_vp, NULL))
return (0);
VI_LOCK(vp);
loop1:
/*
* MARK/SCAN initialization to avoid infinite loops.
*/
s = splbio();
TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
bp->b_flags &= ~B_SCANNED;
bp->b_error = 0;
}
splx(s);
/*
* Flush all dirty buffers associated with a block device.
*/
loop2:
s = splbio();
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp != NULL; bp = nbp) {
nbp = TAILQ_NEXT(bp, b_vnbufs);
if ((bp->b_flags & B_SCANNED) != 0)
continue;
VI_UNLOCK(vp);
bp->b_flags |= B_SCANNED;
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
VI_LOCK(vp);
continue;
}
if ((bp->b_flags & B_DELWRI) == 0)
panic("spec_fsync: not dirty");
if ((vp->v_vflag & VV_OBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
BUF_UNLOCK(bp);
vfs_bio_awrite(bp);
splx(s);
} else {
bremfree(bp);
splx(s);
bawrite(bp);
}
VI_LOCK(vp);
goto loop2;
}
/*
* If synchronous the caller expects us to completely resolve all
* dirty buffers in the system. Wait for in-progress I/O to
* complete (which could include background bitmap writes), then
* retry if dirty blocks still exist.
*/
if (ap->a_waitfor == MNT_WAIT) {
while (vp->v_numoutput) {
vp->v_iflag |= VI_BWAIT;
msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp),
PRIBIO + 1, "spfsyn", 0);
}
if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
/*
* If we are unable to write any of these buffers
* then we fail now rather than trying endlessly
* to write them out.
*/
TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
if ((error = bp->b_error) == 0)
continue;
if (error == 0 && --maxretry >= 0) {
splx(s);
goto loop1;
}
vprint("spec_fsync: giving up on dirty", vp);
error = EAGAIN;
}
}
VI_UNLOCK(vp);
splx(s);
return (error);
return (vop_stdfsync(ap));
}
/*

View File

@ -542,6 +542,7 @@ bufinit(void)
bp->b_rcred = NOCRED;
bp->b_wcred = NOCRED;
bp->b_qindex = QUEUE_EMPTY;
bp->b_vflags = 0;
bp->b_xflags = 0;
LIST_INIT(&bp->b_dep);
BUF_LOCKINIT(bp);
@ -1909,6 +1910,7 @@ restart:
bp->b_flags = 0;
bp->b_ioflags = 0;
bp->b_xflags = 0;
bp->b_vflags = 0;
bp->b_dev = NODEV;
bp->b_vp = NULL;
bp->b_blkno = bp->b_lblkno = 0;
@ -3216,12 +3218,12 @@ bufdone(struct buf *bp)
(int) m->pindex, (int)(foff >> 32),
(int) foff & 0xffffffff, resid, i);
if (!vn_isdisk(vp, NULL))
printf(" iosize: %ld, lblkno: %jd, flags: 0x%lx, npages: %d\n",
printf(" iosize: %ld, lblkno: %jd, flags: 0x%x, npages: %d\n",
bp->b_vp->v_mount->mnt_stat.f_iosize,
(intmax_t) bp->b_lblkno,
bp->b_flags, bp->b_npages);
else
printf(" VDEV, lblkno: %jd, flags: 0x%lx, npages: %d\n",
printf(" VDEV, lblkno: %jd, flags: 0x%x, npages: %d\n",
(intmax_t) bp->b_lblkno,
bp->b_flags, bp->b_npages);
printf(" valid: 0x%x, dirty: 0x%x, wired: %d\n",

View File

@ -708,6 +708,95 @@ vop_stdbmap(ap)
return (0);
}
int
vop_stdfsync(ap)
struct vop_fsync_args /* {
struct vnode *a_vp;
struct ucred *a_cred;
int a_waitfor;
struct thread *a_td;
} */ *ap;
{
struct vnode *vp = ap->a_vp;
struct buf *bp;
struct buf *nbp;
int s, error = 0;
int maxretry = 100; /* large, arbitrarily chosen */
VI_LOCK(vp);
loop1:
/*
* MARK/SCAN initialization to avoid infinite loops.
*/
s = splbio();
TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
bp->b_vflags &= ~BV_SCANNED;
bp->b_error = 0;
}
splx(s);
/*
* Flush all dirty buffers associated with a block device.
*/
loop2:
s = splbio();
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp != NULL; bp = nbp) {
nbp = TAILQ_NEXT(bp, b_vnbufs);
if ((bp->b_vflags & BV_SCANNED) != 0)
continue;
bp->b_vflags |= BV_SCANNED;
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
continue;
VI_UNLOCK(vp);
if ((bp->b_flags & B_DELWRI) == 0)
panic("spec_fsync: not dirty");
if ((vp->v_vflag & VV_OBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
BUF_UNLOCK(bp);
vfs_bio_awrite(bp);
splx(s);
} else {
bremfree(bp);
splx(s);
bawrite(bp);
}
VI_LOCK(vp);
goto loop2;
}
/*
* If synchronous the caller expects us to completely resolve all
* dirty buffers in the system. Wait for in-progress I/O to
* complete (which could include background bitmap writes), then
* retry if dirty blocks still exist.
*/
if (ap->a_waitfor == MNT_WAIT) {
while (vp->v_numoutput) {
vp->v_iflag |= VI_BWAIT;
msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp),
PRIBIO + 1, "spfsyn", 0);
}
if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
/*
* If we are unable to write any of these buffers
* then we fail now rather than trying endlessly
* to write them out.
*/
TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
if ((error = bp->b_error) == 0)
continue;
if (error == 0 && --maxretry >= 0) {
splx(s);
goto loop1;
}
vprint("fsync: giving up on dirty", vp);
error = EAGAIN;
}
}
VI_UNLOCK(vp);
splx(s);
return (error);
}
/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
int
vop_stdgetpages(ap)

View File

@ -95,6 +95,11 @@ typedef unsigned char b_xflags_t;
*
* b_resid. Number of bytes remaining in I/O. After an I/O operation
* completes, b_resid is usually 0 indicating 100% success.
*
* All fields are protected by the buffer lock except those marked:
* V - Protected by owning vnode lock
* Q - Protected by the buf queue lock
* D - Protected by an dependency implementation specific lock
*/
struct buf {
/* XXX: b_io must be the first element of struct buf for now /phk */
@ -122,12 +127,13 @@ struct buf {
#ifdef USE_BUFHASH
LIST_ENTRY(buf) b_hash; /* Hash chain. */
#endif
TAILQ_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */
struct buf *b_left; /* splay tree link (V) */
struct buf *b_right; /* splay tree link (V) */
TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */
long b_flags; /* B_* flags. */
unsigned short b_qindex; /* buffer queue index */
TAILQ_ENTRY(buf) b_vnbufs; /* (V) Buffer's associated vnode. */
struct buf *b_left; /* (V) splay tree link */
struct buf *b_right; /* (V) splay tree link */
uint32_t b_vflags; /* (V) BV_* flags */
TAILQ_ENTRY(buf) b_freelist; /* (Q) Free list position inactive. */
unsigned short b_qindex; /* (Q) buffer queue index */
uint32_t b_flags; /* B_* flags. */
b_xflags_t b_xflags; /* extra flags */
struct lock b_lock; /* Buffer lock */
long b_bufsize; /* Allocated buffer size. */
@ -152,7 +158,7 @@ struct buf {
} b_cluster;
struct vm_page *b_pages[btoc(MAXPHYS)];
int b_npages;
struct workhead b_dep; /* List of filesystem dependencies. */
struct workhead b_dep; /* (D) List of filesystem dependencies. */
};
#define b_spc b_pager.pg_spc
@ -221,7 +227,7 @@ struct buf {
#define B_DONE 0x00000200 /* I/O completed. */
#define B_EINTR 0x00000400 /* I/O was interrupted */
#define B_NOWDRAIN 0x00000800 /* Avoid wdrain deadlock */
#define B_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */
#define B_00001000 0x00001000 /* Available flag. */
#define B_INVAL 0x00002000 /* Does not contain valid info. */
#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
@ -261,6 +267,8 @@ struct buf {
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
#define BV_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */
#ifdef _KERNEL
/*
* Buffer locking

View File

@ -688,6 +688,7 @@ void vfs_timestamp(struct timespec *);
void vfs_write_resume(struct mount *mp);
int vfs_write_suspend(struct mount *mp);
int vop_stdbmap(struct vop_bmap_args *);
int vop_stdfsync(struct vop_fsync_args *);
int vop_stdgetwritemount(struct vop_getwritemount_args *);
int vop_stdgetpages(struct vop_getpages_args *);
int vop_stdinactive(struct vop_inactive_args *);

View File

@ -184,7 +184,7 @@ ffs_fsync(ap)
VI_LOCK(vp);
loop:
TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
bp->b_flags &= ~B_SCANNED;
bp->b_vflags &= ~BV_SCANNED;
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = TAILQ_NEXT(bp, b_vnbufs);
/*
@ -195,22 +195,21 @@ loop:
* it to be redirtied and it has not already been deferred,
* or it is already being written.
*/
if ((bp->b_flags & B_SCANNED) != 0)
if ((bp->b_vflags & BV_SCANNED) != 0)
continue;
bp->b_flags |= B_SCANNED;
bp->b_vflags |= BV_SCANNED;
if ((skipmeta == 1 && bp->b_lblkno < 0))
continue;
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
continue;
if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
(bp->b_flags & B_DEFERRED) == 0 &&
buf_countdeps(bp, 0)) {
bp->b_flags |= B_DEFERRED;
BUF_UNLOCK(bp);
continue;
}
VI_UNLOCK(vp);
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
VI_LOCK(vp);
continue;
}
if ((bp->b_flags & B_DELWRI) == 0)
panic("ffs_fsync: not dirty");
if (vp != bp->b_vp)