Make it possible to atomically resume writes on the mount and account

the write start, by adding a variation of the vfs_write_resume(9)
which accepts flags.

Use the new function to prevent a deadlock between parallel suspension
and snapshotting a UFS mount.  The ffs_snapshot() code performed
vfs_write_resume() followed by vn_start_write() while owning the
snaplock.  If the suspension intervene between resume and
vn_start_write(), the deadlock occured after the suspending thread
tried to lock the snaplock, most typically during the write in the
ffs_copyonwrite().

Reported and tested by:	Andreas Longwitz <longwitz@incore.de>
Reviewed by:	mckusick
MFC after:	2 weeks
X-MFC-note:	make the vfs_write_resume(9) function a macro after the MFC,
	in HEAD
This commit is contained in:
Konstantin Belousov 2012-12-28 23:08:30 +00:00
parent 833966f999
commit 91e9474552
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=244795
3 changed files with 56 additions and 29 deletions

View File

@ -1434,6 +1434,40 @@ vn_closefile(fp, td)
* proceed. If a suspend request is in progress, we wait until the
* suspension is over, and then proceed.
*/
static int
vn_start_write_locked(struct mount *mp, int flags)
{
int error;
mtx_assert(MNT_MTX(mp), MA_OWNED);
error = 0;
/*
* Check on status of suspension.
*/
if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
mp->mnt_susp_owner != curthread) {
while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
if (flags & V_NOWAIT) {
error = EWOULDBLOCK;
goto unlock;
}
error = msleep(&mp->mnt_flag, MNT_MTX(mp),
(PUSER - 1) | (flags & PCATCH), "suspfs", 0);
if (error)
goto unlock;
}
}
if (flags & V_XSLEEP)
goto unlock;
mp->mnt_writeopcount++;
unlock:
if (error != 0 || (flags & V_XSLEEP) != 0)
MNT_REL(mp);
MNT_IUNLOCK(mp);
return (error);
}
int
vn_start_write(vp, mpp, flags)
struct vnode *vp;
@ -1470,30 +1504,7 @@ vn_start_write(vp, mpp, flags)
if (vp == NULL)
MNT_REF(mp);
/*
* Check on status of suspension.
*/
if ((curthread->td_pflags & TDP_IGNSUSP) == 0 ||
mp->mnt_susp_owner != curthread) {
while ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0) {
if (flags & V_NOWAIT) {
error = EWOULDBLOCK;
goto unlock;
}
error = msleep(&mp->mnt_flag, MNT_MTX(mp),
(PUSER - 1) | (flags & PCATCH), "suspfs", 0);
if (error)
goto unlock;
}
}
if (flags & V_XSLEEP)
goto unlock;
mp->mnt_writeopcount++;
unlock:
if (error != 0 || (flags & V_XSLEEP) != 0)
MNT_REL(mp);
MNT_IUNLOCK(mp);
return (error);
return (vn_start_write_locked(mp, flags));
}
/*
@ -1639,8 +1650,7 @@ vfs_write_suspend(mp)
* Request a filesystem to resume write operations.
*/
void
vfs_write_resume(mp)
struct mount *mp;
vfs_write_resume_flags(struct mount *mp, int flags)
{
MNT_ILOCK(mp);
@ -1652,10 +1662,25 @@ vfs_write_resume(mp)
wakeup(&mp->mnt_writeopcount);
wakeup(&mp->mnt_flag);
curthread->td_pflags &= ~TDP_IGNSUSP;
if ((flags & VR_START_WRITE) != 0) {
MNT_REF(mp);
mp->mnt_writeopcount++;
}
MNT_IUNLOCK(mp);
VFS_SUSP_CLEAN(mp);
} else
} else if ((flags & VR_START_WRITE) != 0) {
MNT_REF(mp);
vn_start_write_locked(mp, 0);
} else {
MNT_IUNLOCK(mp);
}
}
void
vfs_write_resume(struct mount *mp)
{
vfs_write_resume_flags(mp, 0);
}
/*

View File

@ -392,6 +392,8 @@ extern int vttoif_tab[];
#define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */
#define V_XSLEEP 0x0004 /* vn_start_write: just return after sleep */
#define VR_START_WRITE 0x0001 /* vfs_write_resume: start write atomically */
#define VREF(vp) vref(vp)
#ifdef DIAGNOSTIC
@ -701,6 +703,7 @@ int vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio);
int vfs_cache_lookup(struct vop_lookup_args *ap);
void vfs_timestamp(struct timespec *);
void vfs_write_resume(struct mount *mp);
void vfs_write_resume_flags(struct mount *mp, int flags);
int vfs_write_suspend(struct mount *mp);
int vop_stdbmap(struct vop_bmap_args *);
int vop_stdfsync(struct vop_fsync_args *);

View File

@ -687,8 +687,7 @@ ffs_snapshot(mp, snapfile)
/*
* Resume operation on filesystem.
*/
vfs_write_resume(vp->v_mount);
vn_start_write(NULL, &wrtmp, V_WAIT);
vfs_write_resume_flags(vp->v_mount, VR_START_WRITE);
if (collectsnapstats && starttime.tv_sec > 0) {
nanotime(&endtime);
timespecsub(&endtime, &starttime);