- Cleanup unlocked accesses to buf flags by introducing a new b_vflag member
that is protected by the vnode lock. - Move B_SCANNED into b_vflags and call it BV_SCANNED. - Create a vop_stdfsync() modeled after spec's sync. - Replace spec_fsync, msdos_fsync, and hpfs_fsync with the stdfsync and some fs specific processing. This gives all of these filesystems proper behavior wrt MNT_WAIT/NOWAIT and the use of the B_SCANNED flag. - Annotate the locking in buf.h
This commit is contained in:
parent
15553af710
commit
767b9a529d
@ -87,48 +87,15 @@ hpfs_fsync(ap)
|
||||
struct thread *a_td;
|
||||
} */ *ap;
|
||||
{
|
||||
struct vnode *vp = ap->a_vp;
|
||||
int s;
|
||||
struct buf *bp, *nbp;
|
||||
|
||||
/*
|
||||
* Flush all dirty buffers associated with a vnode.
|
||||
* Flush our dirty buffers.
|
||||
*/
|
||||
loop:
|
||||
VI_LOCK(vp);
|
||||
s = splbio();
|
||||
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
|
||||
nbp = TAILQ_NEXT(bp, b_vnbufs);
|
||||
VI_UNLOCK(vp);
|
||||
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
|
||||
VI_LOCK(vp);
|
||||
continue;
|
||||
}
|
||||
if ((bp->b_flags & B_DELWRI) == 0)
|
||||
panic("hpfs_fsync: not dirty");
|
||||
bremfree(bp);
|
||||
splx(s);
|
||||
(void) bwrite(bp);
|
||||
goto loop;
|
||||
}
|
||||
while (vp->v_numoutput) {
|
||||
vp->v_iflag |= VI_BWAIT;
|
||||
msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), PRIBIO + 1,
|
||||
"hpfsn", 0);
|
||||
}
|
||||
#ifdef DIAGNOSTIC
|
||||
if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
|
||||
vprint("hpfs_fsync: dirty", vp);
|
||||
goto loop;
|
||||
}
|
||||
#endif
|
||||
VI_UNLOCK(vp);
|
||||
splx(s);
|
||||
vop_stdfsync(ap);
|
||||
|
||||
/*
|
||||
* Write out the on-disc version of the vnode.
|
||||
*/
|
||||
return hpfs_update(VTOHP(vp));
|
||||
return hpfs_update(VTOHP(ap->a_vp));
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -806,45 +806,12 @@ msdosfs_fsync(ap)
|
||||
struct thread *a_td;
|
||||
} */ *ap;
|
||||
{
|
||||
struct vnode *vp = ap->a_vp;
|
||||
int s;
|
||||
struct buf *bp, *nbp;
|
||||
|
||||
/*
|
||||
* Flush all dirty buffers associated with a vnode.
|
||||
* Flush our dirty buffers.
|
||||
*/
|
||||
loop:
|
||||
s = splbio();
|
||||
VI_LOCK(vp);
|
||||
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
|
||||
nbp = TAILQ_NEXT(bp, b_vnbufs);
|
||||
VI_UNLOCK(vp);
|
||||
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
|
||||
VI_LOCK(vp);
|
||||
continue;
|
||||
}
|
||||
if ((bp->b_flags & B_DELWRI) == 0)
|
||||
panic("msdosfs_fsync: not dirty");
|
||||
bremfree(bp);
|
||||
splx(s);
|
||||
/* XXX Could do bawrite */
|
||||
(void) bwrite(bp);
|
||||
goto loop;
|
||||
}
|
||||
while (vp->v_numoutput) {
|
||||
vp->v_iflag |= VI_BWAIT;
|
||||
(void) msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp),
|
||||
PRIBIO + 1, "msdosfsn", 0);
|
||||
}
|
||||
#ifdef DIAGNOSTIC
|
||||
if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
|
||||
vprint("msdosfs_fsync: dirty", vp);
|
||||
goto loop;
|
||||
}
|
||||
#endif
|
||||
VI_UNLOCK(vp);
|
||||
splx(s);
|
||||
return (deupdat(VTODE(vp), ap->a_waitfor == MNT_WAIT));
|
||||
vop_stdfsync(ap);
|
||||
|
||||
return (deupdat(VTODE(ap->a_vp), ap->a_waitfor == MNT_WAIT));
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -416,89 +416,10 @@ spec_fsync(ap)
|
||||
struct thread *a_td;
|
||||
} */ *ap;
|
||||
{
|
||||
struct vnode *vp = ap->a_vp;
|
||||
struct buf *bp;
|
||||
struct buf *nbp;
|
||||
int s, error = 0;
|
||||
int maxretry = 100; /* large, arbitrarily chosen */
|
||||
|
||||
if (!vn_isdisk(vp, NULL))
|
||||
if (!vn_isdisk(ap->a_vp, NULL))
|
||||
return (0);
|
||||
|
||||
VI_LOCK(vp);
|
||||
loop1:
|
||||
/*
|
||||
* MARK/SCAN initialization to avoid infinite loops.
|
||||
*/
|
||||
s = splbio();
|
||||
TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
|
||||
bp->b_flags &= ~B_SCANNED;
|
||||
bp->b_error = 0;
|
||||
}
|
||||
splx(s);
|
||||
|
||||
/*
|
||||
* Flush all dirty buffers associated with a block device.
|
||||
*/
|
||||
loop2:
|
||||
s = splbio();
|
||||
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp != NULL; bp = nbp) {
|
||||
nbp = TAILQ_NEXT(bp, b_vnbufs);
|
||||
if ((bp->b_flags & B_SCANNED) != 0)
|
||||
continue;
|
||||
VI_UNLOCK(vp);
|
||||
bp->b_flags |= B_SCANNED;
|
||||
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
|
||||
VI_LOCK(vp);
|
||||
continue;
|
||||
}
|
||||
if ((bp->b_flags & B_DELWRI) == 0)
|
||||
panic("spec_fsync: not dirty");
|
||||
if ((vp->v_vflag & VV_OBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
|
||||
BUF_UNLOCK(bp);
|
||||
vfs_bio_awrite(bp);
|
||||
splx(s);
|
||||
} else {
|
||||
bremfree(bp);
|
||||
splx(s);
|
||||
bawrite(bp);
|
||||
}
|
||||
VI_LOCK(vp);
|
||||
goto loop2;
|
||||
}
|
||||
|
||||
/*
|
||||
* If synchronous the caller expects us to completely resolve all
|
||||
* dirty buffers in the system. Wait for in-progress I/O to
|
||||
* complete (which could include background bitmap writes), then
|
||||
* retry if dirty blocks still exist.
|
||||
*/
|
||||
if (ap->a_waitfor == MNT_WAIT) {
|
||||
while (vp->v_numoutput) {
|
||||
vp->v_iflag |= VI_BWAIT;
|
||||
msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp),
|
||||
PRIBIO + 1, "spfsyn", 0);
|
||||
}
|
||||
if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
|
||||
/*
|
||||
* If we are unable to write any of these buffers
|
||||
* then we fail now rather than trying endlessly
|
||||
* to write them out.
|
||||
*/
|
||||
TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
|
||||
if ((error = bp->b_error) == 0)
|
||||
continue;
|
||||
if (error == 0 && --maxretry >= 0) {
|
||||
splx(s);
|
||||
goto loop1;
|
||||
}
|
||||
vprint("spec_fsync: giving up on dirty", vp);
|
||||
error = EAGAIN;
|
||||
}
|
||||
}
|
||||
VI_UNLOCK(vp);
|
||||
splx(s);
|
||||
return (error);
|
||||
return (vop_stdfsync(ap));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -542,6 +542,7 @@ bufinit(void)
|
||||
bp->b_rcred = NOCRED;
|
||||
bp->b_wcred = NOCRED;
|
||||
bp->b_qindex = QUEUE_EMPTY;
|
||||
bp->b_vflags = 0;
|
||||
bp->b_xflags = 0;
|
||||
LIST_INIT(&bp->b_dep);
|
||||
BUF_LOCKINIT(bp);
|
||||
@ -1909,6 +1910,7 @@ restart:
|
||||
bp->b_flags = 0;
|
||||
bp->b_ioflags = 0;
|
||||
bp->b_xflags = 0;
|
||||
bp->b_vflags = 0;
|
||||
bp->b_dev = NODEV;
|
||||
bp->b_vp = NULL;
|
||||
bp->b_blkno = bp->b_lblkno = 0;
|
||||
@ -3216,12 +3218,12 @@ bufdone(struct buf *bp)
|
||||
(int) m->pindex, (int)(foff >> 32),
|
||||
(int) foff & 0xffffffff, resid, i);
|
||||
if (!vn_isdisk(vp, NULL))
|
||||
printf(" iosize: %ld, lblkno: %jd, flags: 0x%lx, npages: %d\n",
|
||||
printf(" iosize: %ld, lblkno: %jd, flags: 0x%x, npages: %d\n",
|
||||
bp->b_vp->v_mount->mnt_stat.f_iosize,
|
||||
(intmax_t) bp->b_lblkno,
|
||||
bp->b_flags, bp->b_npages);
|
||||
else
|
||||
printf(" VDEV, lblkno: %jd, flags: 0x%lx, npages: %d\n",
|
||||
printf(" VDEV, lblkno: %jd, flags: 0x%x, npages: %d\n",
|
||||
(intmax_t) bp->b_lblkno,
|
||||
bp->b_flags, bp->b_npages);
|
||||
printf(" valid: 0x%x, dirty: 0x%x, wired: %d\n",
|
||||
|
@ -708,6 +708,95 @@ vop_stdbmap(ap)
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vop_stdfsync(ap)
|
||||
struct vop_fsync_args /* {
|
||||
struct vnode *a_vp;
|
||||
struct ucred *a_cred;
|
||||
int a_waitfor;
|
||||
struct thread *a_td;
|
||||
} */ *ap;
|
||||
{
|
||||
struct vnode *vp = ap->a_vp;
|
||||
struct buf *bp;
|
||||
struct buf *nbp;
|
||||
int s, error = 0;
|
||||
int maxretry = 100; /* large, arbitrarily chosen */
|
||||
|
||||
VI_LOCK(vp);
|
||||
loop1:
|
||||
/*
|
||||
* MARK/SCAN initialization to avoid infinite loops.
|
||||
*/
|
||||
s = splbio();
|
||||
TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
|
||||
bp->b_vflags &= ~BV_SCANNED;
|
||||
bp->b_error = 0;
|
||||
}
|
||||
splx(s);
|
||||
|
||||
/*
|
||||
* Flush all dirty buffers associated with a block device.
|
||||
*/
|
||||
loop2:
|
||||
s = splbio();
|
||||
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp != NULL; bp = nbp) {
|
||||
nbp = TAILQ_NEXT(bp, b_vnbufs);
|
||||
if ((bp->b_vflags & BV_SCANNED) != 0)
|
||||
continue;
|
||||
bp->b_vflags |= BV_SCANNED;
|
||||
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
|
||||
continue;
|
||||
VI_UNLOCK(vp);
|
||||
if ((bp->b_flags & B_DELWRI) == 0)
|
||||
panic("spec_fsync: not dirty");
|
||||
if ((vp->v_vflag & VV_OBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
|
||||
BUF_UNLOCK(bp);
|
||||
vfs_bio_awrite(bp);
|
||||
splx(s);
|
||||
} else {
|
||||
bremfree(bp);
|
||||
splx(s);
|
||||
bawrite(bp);
|
||||
}
|
||||
VI_LOCK(vp);
|
||||
goto loop2;
|
||||
}
|
||||
|
||||
/*
|
||||
* If synchronous the caller expects us to completely resolve all
|
||||
* dirty buffers in the system. Wait for in-progress I/O to
|
||||
* complete (which could include background bitmap writes), then
|
||||
* retry if dirty blocks still exist.
|
||||
*/
|
||||
if (ap->a_waitfor == MNT_WAIT) {
|
||||
while (vp->v_numoutput) {
|
||||
vp->v_iflag |= VI_BWAIT;
|
||||
msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp),
|
||||
PRIBIO + 1, "spfsyn", 0);
|
||||
}
|
||||
if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
|
||||
/*
|
||||
* If we are unable to write any of these buffers
|
||||
* then we fail now rather than trying endlessly
|
||||
* to write them out.
|
||||
*/
|
||||
TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
|
||||
if ((error = bp->b_error) == 0)
|
||||
continue;
|
||||
if (error == 0 && --maxretry >= 0) {
|
||||
splx(s);
|
||||
goto loop1;
|
||||
}
|
||||
vprint("fsync: giving up on dirty", vp);
|
||||
error = EAGAIN;
|
||||
}
|
||||
}
|
||||
VI_UNLOCK(vp);
|
||||
splx(s);
|
||||
|
||||
return (error);
|
||||
}
|
||||
/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
|
||||
int
|
||||
vop_stdgetpages(ap)
|
||||
|
@ -95,6 +95,11 @@ typedef unsigned char b_xflags_t;
|
||||
*
|
||||
* b_resid. Number of bytes remaining in I/O. After an I/O operation
|
||||
* completes, b_resid is usually 0 indicating 100% success.
|
||||
*
|
||||
* All fields are protected by the buffer lock except those marked:
|
||||
* V - Protected by owning vnode lock
|
||||
* Q - Protected by the buf queue lock
|
||||
* D - Protected by an dependency implementation specific lock
|
||||
*/
|
||||
struct buf {
|
||||
/* XXX: b_io must be the first element of struct buf for now /phk */
|
||||
@ -122,12 +127,13 @@ struct buf {
|
||||
#ifdef USE_BUFHASH
|
||||
LIST_ENTRY(buf) b_hash; /* Hash chain. */
|
||||
#endif
|
||||
TAILQ_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */
|
||||
struct buf *b_left; /* splay tree link (V) */
|
||||
struct buf *b_right; /* splay tree link (V) */
|
||||
TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */
|
||||
long b_flags; /* B_* flags. */
|
||||
unsigned short b_qindex; /* buffer queue index */
|
||||
TAILQ_ENTRY(buf) b_vnbufs; /* (V) Buffer's associated vnode. */
|
||||
struct buf *b_left; /* (V) splay tree link */
|
||||
struct buf *b_right; /* (V) splay tree link */
|
||||
uint32_t b_vflags; /* (V) BV_* flags */
|
||||
TAILQ_ENTRY(buf) b_freelist; /* (Q) Free list position inactive. */
|
||||
unsigned short b_qindex; /* (Q) buffer queue index */
|
||||
uint32_t b_flags; /* B_* flags. */
|
||||
b_xflags_t b_xflags; /* extra flags */
|
||||
struct lock b_lock; /* Buffer lock */
|
||||
long b_bufsize; /* Allocated buffer size. */
|
||||
@ -152,7 +158,7 @@ struct buf {
|
||||
} b_cluster;
|
||||
struct vm_page *b_pages[btoc(MAXPHYS)];
|
||||
int b_npages;
|
||||
struct workhead b_dep; /* List of filesystem dependencies. */
|
||||
struct workhead b_dep; /* (D) List of filesystem dependencies. */
|
||||
};
|
||||
|
||||
#define b_spc b_pager.pg_spc
|
||||
@ -221,7 +227,7 @@ struct buf {
|
||||
#define B_DONE 0x00000200 /* I/O completed. */
|
||||
#define B_EINTR 0x00000400 /* I/O was interrupted */
|
||||
#define B_NOWDRAIN 0x00000800 /* Avoid wdrain deadlock */
|
||||
#define B_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */
|
||||
#define B_00001000 0x00001000 /* Available flag. */
|
||||
#define B_INVAL 0x00002000 /* Does not contain valid info. */
|
||||
#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
|
||||
#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
|
||||
@ -261,6 +267,8 @@ struct buf {
|
||||
|
||||
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */
|
||||
|
||||
#define BV_SCANNED 0x00001000 /* VOP_FSYNC funcs mark written bufs */
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* Buffer locking
|
||||
|
@ -688,6 +688,7 @@ void vfs_timestamp(struct timespec *);
|
||||
void vfs_write_resume(struct mount *mp);
|
||||
int vfs_write_suspend(struct mount *mp);
|
||||
int vop_stdbmap(struct vop_bmap_args *);
|
||||
int vop_stdfsync(struct vop_fsync_args *);
|
||||
int vop_stdgetwritemount(struct vop_getwritemount_args *);
|
||||
int vop_stdgetpages(struct vop_getpages_args *);
|
||||
int vop_stdinactive(struct vop_inactive_args *);
|
||||
|
@ -184,7 +184,7 @@ ffs_fsync(ap)
|
||||
VI_LOCK(vp);
|
||||
loop:
|
||||
TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs)
|
||||
bp->b_flags &= ~B_SCANNED;
|
||||
bp->b_vflags &= ~BV_SCANNED;
|
||||
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
|
||||
nbp = TAILQ_NEXT(bp, b_vnbufs);
|
||||
/*
|
||||
@ -195,22 +195,21 @@ loop:
|
||||
* it to be redirtied and it has not already been deferred,
|
||||
* or it is already being written.
|
||||
*/
|
||||
if ((bp->b_flags & B_SCANNED) != 0)
|
||||
if ((bp->b_vflags & BV_SCANNED) != 0)
|
||||
continue;
|
||||
bp->b_flags |= B_SCANNED;
|
||||
bp->b_vflags |= BV_SCANNED;
|
||||
if ((skipmeta == 1 && bp->b_lblkno < 0))
|
||||
continue;
|
||||
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
|
||||
continue;
|
||||
if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
|
||||
(bp->b_flags & B_DEFERRED) == 0 &&
|
||||
buf_countdeps(bp, 0)) {
|
||||
bp->b_flags |= B_DEFERRED;
|
||||
BUF_UNLOCK(bp);
|
||||
continue;
|
||||
}
|
||||
VI_UNLOCK(vp);
|
||||
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
|
||||
VI_LOCK(vp);
|
||||
continue;
|
||||
}
|
||||
if ((bp->b_flags & B_DELWRI) == 0)
|
||||
panic("ffs_fsync: not dirty");
|
||||
if (vp != bp->b_vp)
|
||||
|
Loading…
x
Reference in New Issue
Block a user