MFC:
rev. 1.11 of src/sys/geom/geom_vfs.c rev. 1.516 of src/sys/kern/vfs_bio.c rev. 1.35 of src/sys/nfs4client/nfs4_vnops.c rev. 1.272 of src/sys/nfsclient/nfs_vnops.c rev. 1.195 of src/sys/sys/buf.h rev. 1.18 of src/sys/sys/bufobj.h rev. 1.73 of src/sys/ufs/ffs/ffs_extern.h rev. 1.133 of src/sys/ufs/ffs/ffs_snapshot.c rev. 1.324 of src/sys/ufs/ffs/ffs_vfsops.c Avoid dealing with buffers in bdwrite() that are from other side of snaplock divisor in the lock order then the buffer being written. Add new BOP, bop_bdwrite(), to do dirty buffer flushing for same vnode in the bdwrite(). Default implementation, bufbdflush(), refactors the code from bdwrite(). For ffs device buffers, specialized implementation is used. This commit changes KPI/KBI, thus recompilation of out of tree kernel modules is required. Approved by: re (kensmith)
This commit is contained in:
parent
f3d7053114
commit
b75617bf6d
@ -50,6 +50,7 @@ static struct buf_ops __g_vfs_bufops = {
|
||||
.bop_write = bufwrite,
|
||||
.bop_strategy = g_vfs_strategy,
|
||||
.bop_sync = bufsync,
|
||||
.bop_bdflush = bufbdflush
|
||||
};
|
||||
|
||||
struct buf_ops *g_vfs_bufops = &__g_vfs_bufops;
|
||||
|
@ -80,6 +80,7 @@ struct buf_ops buf_ops_bio = {
|
||||
.bop_write = bufwrite,
|
||||
.bop_strategy = bufstrategy,
|
||||
.bop_sync = bufsync,
|
||||
.bop_bdflush = bufbdflush,
|
||||
};
|
||||
|
||||
/*
|
||||
@ -145,10 +146,13 @@ SYSCTL_INT(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW, &lorunningspace, 0,
|
||||
static int hirunningspace;
|
||||
SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW, &hirunningspace, 0,
|
||||
"Maximum amount of space to use for in-progress I/O");
|
||||
static int dirtybufferflushes;
|
||||
int dirtybufferflushes;
|
||||
SYSCTL_INT(_vfs, OID_AUTO, dirtybufferflushes, CTLFLAG_RW, &dirtybufferflushes,
|
||||
0, "Number of bdwrite to bawrite conversions to limit dirty buffers");
|
||||
static int altbufferflushes;
|
||||
int bdwriteskip;
|
||||
SYSCTL_INT(_vfs, OID_AUTO, bdwriteskip, CTLFLAG_RW, &bdwriteskip,
|
||||
0, "Number of buffers supplied to bdwrite with snapshot deadlock risk");
|
||||
int altbufferflushes;
|
||||
SYSCTL_INT(_vfs, OID_AUTO, altbufferflushes, CTLFLAG_RW, &altbufferflushes,
|
||||
0, "Number of fsync flushes to limit dirty buffers");
|
||||
static int recursiveflushes;
|
||||
@ -163,7 +167,7 @@ SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW, &lodirtybuffers, 0,
|
||||
static int hidirtybuffers;
|
||||
SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW, &hidirtybuffers, 0,
|
||||
"When the number of dirty buffers is considered severe");
|
||||
static int dirtybufthresh;
|
||||
int dirtybufthresh;
|
||||
SYSCTL_INT(_vfs, OID_AUTO, dirtybufthresh, CTLFLAG_RW, &dirtybufthresh,
|
||||
0, "Number of bdwrite to bawrite conversions to clear dirty buffers");
|
||||
static int numfreebuffers;
|
||||
@ -862,6 +866,47 @@ bufwrite(struct buf *bp)
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
bufbdflush(struct bufobj *bo, struct buf *bp)
|
||||
{
|
||||
struct buf *nbp;
|
||||
|
||||
if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10) {
|
||||
(void) VOP_FSYNC(bp->b_vp, MNT_NOWAIT, curthread);
|
||||
altbufferflushes++;
|
||||
} else if (bo->bo_dirty.bv_cnt > dirtybufthresh) {
|
||||
BO_LOCK(bo);
|
||||
/*
|
||||
* Try to find a buffer to flush.
|
||||
*/
|
||||
TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
|
||||
if ((nbp->b_vflags & BV_BKGRDINPROG) ||
|
||||
BUF_LOCK(nbp,
|
||||
LK_EXCLUSIVE | LK_NOWAIT, NULL))
|
||||
continue;
|
||||
if (bp == nbp)
|
||||
panic("bdwrite: found ourselves");
|
||||
BO_UNLOCK(bo);
|
||||
/* Don't countdeps with the bo lock held. */
|
||||
if (buf_countdeps(nbp, 0)) {
|
||||
BO_LOCK(bo);
|
||||
BUF_UNLOCK(nbp);
|
||||
continue;
|
||||
}
|
||||
if (nbp->b_flags & B_CLUSTEROK) {
|
||||
vfs_bio_awrite(nbp);
|
||||
} else {
|
||||
bremfree(nbp);
|
||||
bawrite(nbp);
|
||||
}
|
||||
dirtybufferflushes++;
|
||||
break;
|
||||
}
|
||||
if (nbp == NULL)
|
||||
BO_UNLOCK(bo);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Delayed write. (Buffer is marked dirty). Do not bother writing
|
||||
* anything if the buffer is marked invalid.
|
||||
@ -876,7 +921,6 @@ bdwrite(struct buf *bp)
|
||||
{
|
||||
struct thread *td = curthread;
|
||||
struct vnode *vp;
|
||||
struct buf *nbp;
|
||||
struct bufobj *bo;
|
||||
|
||||
CTR3(KTR_BUF, "bdwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags);
|
||||
@ -897,44 +941,9 @@ bdwrite(struct buf *bp)
|
||||
*/
|
||||
vp = bp->b_vp;
|
||||
bo = bp->b_bufobj;
|
||||
if ((td->td_pflags & TDP_COWINPROGRESS) == 0) {
|
||||
BO_LOCK(bo);
|
||||
if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10) {
|
||||
BO_UNLOCK(bo);
|
||||
(void) VOP_FSYNC(vp, MNT_NOWAIT, td);
|
||||
altbufferflushes++;
|
||||
} else if (bo->bo_dirty.bv_cnt > dirtybufthresh) {
|
||||
/*
|
||||
* Try to find a buffer to flush.
|
||||
*/
|
||||
TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
|
||||
if ((nbp->b_vflags & BV_BKGRDINPROG) ||
|
||||
BUF_LOCK(nbp,
|
||||
LK_EXCLUSIVE | LK_NOWAIT, NULL))
|
||||
continue;
|
||||
if (bp == nbp)
|
||||
panic("bdwrite: found ourselves");
|
||||
BO_UNLOCK(bo);
|
||||
/* Don't countdeps with the bo lock held. */
|
||||
if (buf_countdeps(nbp, 0)) {
|
||||
BO_LOCK(bo);
|
||||
BUF_UNLOCK(nbp);
|
||||
continue;
|
||||
}
|
||||
if (nbp->b_flags & B_CLUSTEROK) {
|
||||
vfs_bio_awrite(nbp);
|
||||
} else {
|
||||
bremfree(nbp);
|
||||
bawrite(nbp);
|
||||
}
|
||||
dirtybufferflushes++;
|
||||
break;
|
||||
}
|
||||
if (nbp == NULL)
|
||||
BO_UNLOCK(bo);
|
||||
} else
|
||||
BO_UNLOCK(bo);
|
||||
} else
|
||||
if ((td->td_pflags & TDP_COWINPROGRESS) == 0)
|
||||
BO_BDFLUSH(bo, bp);
|
||||
else
|
||||
recursiveflushes++;
|
||||
|
||||
bdirty(bp);
|
||||
|
@ -2874,4 +2874,5 @@ struct buf_ops buf_ops_nfs4 = {
|
||||
.bop_write = nfs4_bwrite,
|
||||
.bop_strategy = bufstrategy,
|
||||
.bop_sync = bufsync,
|
||||
.bop_bdflush = bufbdflush,
|
||||
};
|
||||
|
@ -3129,4 +3129,5 @@ struct buf_ops buf_ops_nfs = {
|
||||
.bop_write = nfs_bwrite,
|
||||
.bop_strategy = bufstrategy,
|
||||
.bop_sync = bufsync,
|
||||
.bop_bdflush = bufbdflush,
|
||||
};
|
||||
|
@ -479,6 +479,10 @@ extern int maxswzone; /* Max KVA for swap structures */
|
||||
extern int maxbcache; /* Max KVA for buffer cache */
|
||||
extern int runningbufspace;
|
||||
extern int hibufspace;
|
||||
extern int dirtybufthresh;
|
||||
extern int bdwriteskip;
|
||||
extern int dirtybufferflushes;
|
||||
extern int altbufferflushes;
|
||||
extern int buf_maxio; /* nominal maximum I/O for buffer */
|
||||
extern struct buf *buf; /* The buffer headers. */
|
||||
extern char *buffers; /* The buffer contents. */
|
||||
|
@ -70,17 +70,20 @@ struct bufv {
|
||||
typedef void b_strategy_t(struct bufobj *, struct buf *);
|
||||
typedef int b_write_t(struct buf *);
|
||||
typedef int b_sync_t(struct bufobj *, int waitfor, struct thread *td);
|
||||
typedef void b_bdflush_t(struct bufobj *, struct buf *);
|
||||
|
||||
struct buf_ops {
|
||||
char *bop_name;
|
||||
b_write_t *bop_write;
|
||||
b_strategy_t *bop_strategy;
|
||||
b_sync_t *bop_sync;
|
||||
b_bdflush_t *bop_bdflush;
|
||||
};
|
||||
|
||||
#define BO_STRATEGY(bo, bp) ((bo)->bo_ops->bop_strategy((bo), (bp)))
|
||||
#define BO_SYNC(bo, w, td) ((bo)->bo_ops->bop_sync((bo), (w), (td)))
|
||||
#define BO_WRITE(bo, bp) ((bo)->bo_ops->bop_write((bp)))
|
||||
#define BO_BDFLUSH(bo, bp) ((bo)->bo_ops->bop_bdflush((bo), (bp)))
|
||||
|
||||
struct bufobj {
|
||||
struct mtx *bo_mtx; /* Mutex which protects "i" things */
|
||||
@ -129,6 +132,7 @@ void bufobj_wrefl(struct bufobj *bo);
|
||||
int bufobj_invalbuf(struct bufobj *bo, int flags, struct thread *td, int slpflag, int slptimeo);
|
||||
int bufobj_wwait(struct bufobj *bo, int slpflag, int timeo);
|
||||
int bufsync(struct bufobj *bo, int waitfor, struct thread *td);
|
||||
void bufbdflush(struct bufobj *bo, struct buf *bp);
|
||||
|
||||
#endif /* defined(_KERNEL) || defined(_KVM_VNODE) */
|
||||
#endif /* _SYS_BUFOBJ_H_ */
|
||||
|
@ -61,6 +61,7 @@ ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *);
|
||||
ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *);
|
||||
int ffs_checkfreefile(struct fs *, struct vnode *, ino_t);
|
||||
void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t);
|
||||
void ffs_bdflush(struct bufobj *, struct buf *);
|
||||
int ffs_copyonwrite(struct vnode *, struct buf *);
|
||||
int ffs_flushfiles(struct mount *, int, struct thread *);
|
||||
void ffs_fragacct(struct fs *, int, int32_t [], int);
|
||||
|
@ -163,6 +163,7 @@ static int mapacct_ufs2(struct vnode *, ufs2_daddr_t *, ufs2_daddr_t *,
|
||||
static int readblock(struct vnode *vp, struct buf *, ufs2_daddr_t);
|
||||
static void process_deferred_inactive(struct mount *);
|
||||
static void try_free_snapdata(struct vnode *devvp, struct thread *td);
|
||||
static int ffs_bp_snapblk(struct vnode *, struct buf *);
|
||||
|
||||
/*
|
||||
* To ensure the consistency of snapshots across crashes, we must
|
||||
@ -2065,6 +2066,119 @@ ffs_snapshot_unmount(mp)
|
||||
ASSERT_VOP_LOCKED(devvp, "ffs_snapshot_unmount");
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the buffer block to be belong to device buffer that shall be
|
||||
* locked after snaplk. devvp shall be locked on entry, and will be
|
||||
* leaved locked upon exit.
|
||||
*/
|
||||
static int
|
||||
ffs_bp_snapblk(devvp, bp)
|
||||
struct vnode *devvp;
|
||||
struct buf *bp;
|
||||
{
|
||||
struct snapdata *sn;
|
||||
struct fs *fs;
|
||||
ufs2_daddr_t lbn, *snapblklist;
|
||||
int lower, upper, mid;
|
||||
|
||||
ASSERT_VI_LOCKED(devvp, "ffs_bp_snapblk");
|
||||
KASSERT(devvp->v_type == VCHR, ("Not a device %p", devvp));
|
||||
sn = devvp->v_rdev->si_snapdata;
|
||||
if (sn == NULL || TAILQ_FIRST(&sn->sn_head) == NULL)
|
||||
return (0);
|
||||
fs = TAILQ_FIRST(&sn->sn_head)->i_fs;
|
||||
lbn = fragstoblks(fs, dbtofsb(fs, bp->b_blkno));
|
||||
snapblklist = sn->sn_blklist;
|
||||
upper = sn->sn_listsize - 1;
|
||||
lower = 1;
|
||||
while (lower <= upper) {
|
||||
mid = (lower + upper) / 2;
|
||||
if (snapblklist[mid] == lbn)
|
||||
break;
|
||||
if (snapblklist[mid] < lbn)
|
||||
lower = mid + 1;
|
||||
else
|
||||
upper = mid - 1;
|
||||
}
|
||||
if (lower <= upper)
|
||||
return (1);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
ffs_bdflush(bo, bp)
|
||||
struct bufobj *bo;
|
||||
struct buf *bp;
|
||||
{
|
||||
struct thread *td;
|
||||
struct vnode *vp, *devvp;
|
||||
struct buf *nbp;
|
||||
int bp_bdskip;
|
||||
|
||||
if (bo->bo_dirty.bv_cnt <= dirtybufthresh)
|
||||
return;
|
||||
|
||||
td = curthread;
|
||||
vp = bp->b_vp;
|
||||
devvp = bo->__bo_vnode;
|
||||
KASSERT(vp == devvp, ("devvp != vp %p %p", bo, bp));
|
||||
|
||||
VI_LOCK(devvp);
|
||||
bp_bdskip = ffs_bp_snapblk(devvp, bp);
|
||||
if (bp_bdskip)
|
||||
bdwriteskip++;
|
||||
VI_UNLOCK(devvp);
|
||||
if (bo->bo_dirty.bv_cnt > dirtybufthresh + 10 && !bp_bdskip) {
|
||||
(void) VOP_FSYNC(vp, MNT_NOWAIT, td);
|
||||
altbufferflushes++;
|
||||
} else {
|
||||
BO_LOCK(bo);
|
||||
/*
|
||||
* Try to find a buffer to flush.
|
||||
*/
|
||||
TAILQ_FOREACH(nbp, &bo->bo_dirty.bv_hd, b_bobufs) {
|
||||
if ((nbp->b_vflags & BV_BKGRDINPROG) ||
|
||||
BUF_LOCK(nbp,
|
||||
LK_EXCLUSIVE | LK_NOWAIT, NULL))
|
||||
continue;
|
||||
if (bp == nbp)
|
||||
panic("bdwrite: found ourselves");
|
||||
BO_UNLOCK(bo);
|
||||
/*
|
||||
* Don't countdeps with the bo lock
|
||||
* held.
|
||||
*/
|
||||
if (buf_countdeps(nbp, 0)) {
|
||||
BO_LOCK(bo);
|
||||
BUF_UNLOCK(nbp);
|
||||
continue;
|
||||
}
|
||||
if (bp_bdskip) {
|
||||
VI_LOCK(devvp);
|
||||
if (!ffs_bp_snapblk(vp, nbp)) {
|
||||
if (BO_MTX(bo) != VI_MTX(vp)) {
|
||||
VI_UNLOCK(devvp);
|
||||
BO_LOCK(bo);
|
||||
}
|
||||
BUF_UNLOCK(nbp);
|
||||
continue;
|
||||
}
|
||||
VI_UNLOCK(devvp);
|
||||
}
|
||||
if (nbp->b_flags & B_CLUSTEROK) {
|
||||
vfs_bio_awrite(nbp);
|
||||
} else {
|
||||
bremfree(nbp);
|
||||
bawrite(nbp);
|
||||
}
|
||||
dirtybufferflushes++;
|
||||
break;
|
||||
}
|
||||
if (nbp == NULL)
|
||||
BO_UNLOCK(bo);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for need to copy block that is about to be written,
|
||||
* copying the block if necessary.
|
||||
|
@ -115,6 +115,11 @@ static struct buf_ops ffs_ops = {
|
||||
.bop_write = ffs_bufwrite,
|
||||
.bop_strategy = ffs_geom_strategy,
|
||||
.bop_sync = bufsync,
|
||||
#ifdef NO_FFS_SNAPSHOT
|
||||
.bop_bdflush = bufbdflush,
|
||||
#else
|
||||
.bop_bdflush = ffs_bdflush,
|
||||
#endif
|
||||
};
|
||||
|
||||
static const char *ffs_opts[] = { "acls", "async", "atime", "clusterr",
|
||||
|
Loading…
x
Reference in New Issue
Block a user