Partially revert r255986: do not call VOP_FSYNC() when helping

bufdaemon in getnewbuf(), do use buf_flush().  The difference is that
bufdaemon uses TRYLOCK to get buffer locks, which allows calls to
getnewbuf() while another buffer is locked.

Reported and tested by:	pho
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
This commit is contained in:
Konstantin Belousov 2015-04-27 11:13:19 +00:00
parent f16f8610bb
commit 9a2c85350a

View File

@ -113,8 +113,8 @@ static void vfs_setdirty_locked_object(struct buf *bp);
static void vfs_vmio_release(struct buf *bp);
static int vfs_bio_clcheck(struct vnode *vp, int size,
daddr_t lblkno, daddr_t blkno);
static int buf_flush(int);
static int flushbufqueues(int, int);
static int buf_flush(struct vnode *vp, int);
static int flushbufqueues(struct vnode *, int, int);
static void buf_daemon(void);
static void bremfreel(struct buf *bp);
static __inline void bd_wakeup(void);
@ -2097,7 +2097,7 @@ getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo,
{
struct thread *td;
char *waitmsg;
int cnt, error, flags, norunbuf, wait;
int error, fl, flags, norunbuf;
mtx_assert(&bqclean, MA_OWNED);
@ -2119,8 +2119,6 @@ getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo,
return;
td = curthread;
cnt = 0;
wait = MNT_NOWAIT;
rw_wlock(&nblock);
while ((needsbuffer & flags) != 0) {
if (vp != NULL && vp->v_type != VCHR &&
@ -2134,20 +2132,23 @@ getnewbuf_bufd_help(struct vnode *vp, int gbflags, int slpflag, int slptimeo,
* cannot be achieved by the buf_daemon, that
* cannot lock the vnode.
*/
if (cnt++ > 2)
wait = MNT_WAIT;
ASSERT_VOP_LOCKED(vp, "bufd_helper");
error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 :
vn_lock(vp, LK_TRYUPGRADE);
if (error == 0) {
/* play bufdaemon */
norunbuf = curthread_pflags_set(TDP_BUFNEED |
TDP_NORUNNINGBUF);
VOP_FSYNC(vp, wait, td);
atomic_add_long(&notbufdflushes, 1);
curthread_pflags_restore(norunbuf);
}
norunbuf = ~(TDP_BUFNEED | TDP_NORUNNINGBUF) |
(td->td_pflags & TDP_NORUNNINGBUF);
/*
* Play bufdaemon. The getnewbuf() function
* may be called while the thread owns lock
* for another dirty buffer for the same
* vnode, which makes it impossible to use
* VOP_FSYNC() there, due to the buffer lock
* recursion.
*/
td->td_pflags |= TDP_BUFNEED | TDP_NORUNNINGBUF;
fl = buf_flush(vp, flushbufqtarget);
td->td_pflags &= norunbuf;
rw_wlock(&nblock);
if (fl != 0)
continue;
if ((needsbuffer & flags) == 0)
break;
}
@ -2566,18 +2567,20 @@ static struct kproc_desc buf_kp = {
SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp);
static int
buf_flush(int target)
buf_flush(struct vnode *vp, int target)
{
int flushed;
flushed = flushbufqueues(target, 0);
flushed = flushbufqueues(vp, target, 0);
if (flushed == 0) {
/*
* Could not find any buffers without rollback
* dependencies, so just write the first one
* in the hopes of eventually making progress.
*/
flushed = flushbufqueues(target, 1);
if (vp != NULL && target > 2)
target /= 2;
flushbufqueues(vp, target, 1);
}
return (flushed);
}
@ -2614,7 +2617,7 @@ buf_daemon()
* the I/O system.
*/
while (numdirtybuffers > lodirty) {
if (buf_flush(numdirtybuffers - lodirty) == 0)
if (buf_flush(NULL, numdirtybuffers - lodirty) == 0)
break;
kern_yield(PRI_USER);
}
@ -2669,7 +2672,7 @@ SYSCTL_INT(_vfs, OID_AUTO, flushwithdeps, CTLFLAG_RW, &flushwithdeps,
0, "Number of buffers flushed with dependecies that require rollbacks");
static int
flushbufqueues(int target, int flushdeps)
flushbufqueues(struct vnode *lvp, int target, int flushdeps)
{
struct buf *sentinel;
struct vnode *vp;
@ -2679,6 +2682,7 @@ flushbufqueues(int target, int flushdeps)
int flushed;
int queue;
int error;
bool unlock;
flushed = 0;
queue = QUEUE_DIRTY;
@ -2700,8 +2704,18 @@ flushbufqueues(int target, int flushdeps)
mtx_unlock(&bqdirty);
break;
}
KASSERT(bp->b_qindex != QUEUE_SENTINEL,
("parallel calls to flushbufqueues() bp %p", bp));
/*
* Skip sentinels inserted by other invocations of the
* flushbufqueues(), taking care to not reorder them.
*
* Only flush the buffers that belong to the
* vnode locked by the curthread.
*/
if (bp->b_qindex == QUEUE_SENTINEL || (lvp != NULL &&
bp->b_vp != lvp)) {
mtx_unlock(&bqdirty);
continue;
}
error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL);
mtx_unlock(&bqdirty);
if (error != 0)
@ -2749,16 +2763,37 @@ flushbufqueues(int target, int flushdeps)
BUF_UNLOCK(bp);
continue;
}
error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
if (lvp == NULL) {
unlock = true;
error = vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT);
} else {
ASSERT_VOP_LOCKED(vp, "getbuf");
unlock = false;
error = VOP_ISLOCKED(vp) == LK_EXCLUSIVE ? 0 :
vn_lock(vp, LK_TRYUPGRADE);
}
if (error == 0) {
CTR3(KTR_BUF, "flushbufqueue(%p) vp %p flags %X",
bp, bp->b_vp, bp->b_flags);
vfs_bio_awrite(bp);
if (curproc == bufdaemonproc) {
vfs_bio_awrite(bp);
} else {
bremfree(bp);
bwrite(bp);
notbufdflushes++;
}
vn_finished_write(mp);
VOP_UNLOCK(vp, 0);
if (unlock)
VOP_UNLOCK(vp, 0);
flushwithdeps += hasdeps;
flushed++;
if (runningbufspace > hirunningspace)
/*
* Sleeping on runningbufspace while holding
* vnode lock leads to deadlock.
*/
if (curproc == bufdaemonproc &&
runningbufspace > hirunningspace)
waitrunningbufspace();
continue;
}