Fix the hand after the immediate reboot when the following command

sequence is performed on UFS SU+J rootfs:
cp -Rp /sbin/init /sbin/init.old
mv -f /sbin/init.old /sbin/init

Hang occurs on the rootfs unmount.  There are two issues:

1. Removed init binary, which is still mapped, creates a reference to
the removed vnode. The inodeblock for such vnode must have active
inodedep, which is (eventually) linked through the unlinked list. This
means that ffs_sync(MNT_SUSPEND) cannot succeed, because number of
softdep workitems for the mp is always > 0.  FFS is suspended during
unmount, so unmount just hangs.

2. As noted above, the inodedep is linked eventually.  It is not
linked until the superblock is written.  But at the vfs_unmountall()
time, when the rootfs is unmounted, the call is made to
ffs_unmount()->ffs_sync() before vflush(), and ffs_sync() only calls
ffs_sbupdate() after all workitems are flushed.  It is masked for
normal system operations, because syncer works in parallel and
eventually flushes superblock.  Syncer is stopped when rootfs
unmounted, so ffs_sync() must do sb update on its own.

Correct the issues listed above. For MNT_SUSPEND, count the number of
linked unlinked inodedeps (this is not a typo) and substract the count
of such workitems from the total. For the second issue, the
ffs_sbupdate() is called right after device sync in ffs_sync() loop.

There is third problem, occuring with both SU and SU+J. The
softdep_waitidle() loop, which waits for softdep_flush() thread to
clear the worklist, only waits 20ms max. It seems that the 1 tick,
specified for msleep(9), was a typo.

Add fsync(devvp, MNT_WAIT) call to softdep_waitidle(), which seems to
significantly help the softdep thread, and change the MNT_LAZY update
at the reboot time to MNT_WAIT for similar reasons.  Note that
userspace cannot create more work while devvp is flushed, since the
mount point is always suspended before the call to softdep_waitidle()
in unmount or remount path.

PR:	195458
In collaboration with:	gjb, pho
Reviewed by:	mckusick
Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
This commit is contained in:
kib 2015-03-27 13:55:56 +00:00
parent d819d79785
commit 871e29c970
2 changed files with 67 additions and 24 deletions

View File

@ -1885,8 +1885,8 @@ softdep_flushworklist(oldmnt, countp, td)
struct thread *td;
{
struct vnode *devvp;
int count, error = 0;
struct ufsmount *ump;
int count, error;
/*
* Alternately flush the block device associated with the mount
@ -1895,6 +1895,7 @@ softdep_flushworklist(oldmnt, countp, td)
* are found.
*/
*countp = 0;
error = 0;
ump = VFSTOUFS(oldmnt);
devvp = ump->um_devvp;
while ((count = softdep_process_worklist(oldmnt, 1)) > 0) {
@ -1902,37 +1903,47 @@ softdep_flushworklist(oldmnt, countp, td)
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
error = VOP_FSYNC(devvp, MNT_WAIT, td);
VOP_UNLOCK(devvp, 0);
if (error)
if (error != 0)
break;
}
return (error);
}
#define SU_WAITIDLE_RETRIES 20
static int
softdep_waitidle(struct mount *mp, int flags __unused)
{
struct ufsmount *ump;
int error;
int i;
struct vnode *devvp;
struct thread *td;
int error, i;
ump = VFSTOUFS(mp);
devvp = ump->um_devvp;
td = curthread;
error = 0;
ACQUIRE_LOCK(ump);
for (i = 0; i < 10 && ump->softdep_deps; i++) {
for (i = 0; i < SU_WAITIDLE_RETRIES && ump->softdep_deps != 0; i++) {
ump->softdep_req = 1;
KASSERT((flags & FORCECLOSE) == 0 ||
ump->softdep_on_worklist == 0,
("softdep_waitidle: work added after flush"));
msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM, "softdeps", 1);
msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM | PDROP,
"softdeps", 10 * hz);
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
error = VOP_FSYNC(devvp, MNT_WAIT, td);
VOP_UNLOCK(devvp, 0);
if (error != 0)
break;
ACQUIRE_LOCK(ump);
}
ump->softdep_req = 0;
FREE_LOCK(ump);
error = 0;
if (i == 10) {
if (i == SU_WAITIDLE_RETRIES && error == 0 && ump->softdep_deps != 0) {
error = EBUSY;
printf("softdep_waitidle: Failed to flush worklist for %p\n",
mp);
}
FREE_LOCK(ump);
return (error);
}
@ -7637,17 +7648,13 @@ check_inode_unwritten(inodedep)
return (1);
}
/*
* Try to free an inodedep structure. Return 1 if it could be freed.
*/
static int
free_inodedep(inodedep)
check_inodedep_free(inodedep)
struct inodedep *inodedep;
{
LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
(inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
if ((inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
!LIST_EMPTY(&inodedep->id_dirremhd) ||
!LIST_EMPTY(&inodedep->id_pendinghd) ||
!LIST_EMPTY(&inodedep->id_bufwait) ||
@ -7662,6 +7669,21 @@ free_inodedep(inodedep)
inodedep->id_nlinkdelta != 0 ||
inodedep->id_savedino1 != NULL)
return (0);
return (1);
}
/*
* Try to free an inodedep structure. Return 1 if it could be freed.
*/
static int
free_inodedep(inodedep)
struct inodedep *inodedep;
{
LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
!check_inodedep_free(inodedep))
return (0);
if (inodedep->id_state & ONDEPLIST)
LIST_REMOVE(inodedep, id_deps);
LIST_REMOVE(inodedep, id_hash);
@ -13846,7 +13868,8 @@ softdep_check_suspend(struct mount *mp,
{
struct bufobj *bo;
struct ufsmount *ump;
int error;
struct inodedep *inodedep;
int error, unlinked;
bo = &devvp->v_bufobj;
ASSERT_BO_WLOCKED(bo);
@ -13907,6 +13930,20 @@ softdep_check_suspend(struct mount *mp,
break;
}
unlinked = 0;
if (MOUNTEDSUJ(mp)) {
for (inodedep = TAILQ_FIRST(&ump->softdep_unlinked);
inodedep != NULL;
inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
if ((inodedep->id_state & (UNLINKED | UNLINKLINKS |
UNLINKONLIST)) != (UNLINKED | UNLINKLINKS |
UNLINKONLIST) ||
!check_inodedep_free(inodedep))
continue;
unlinked++;
}
}
/*
* Reasons for needing more work before suspend:
* - Dirty buffers on devvp.
@ -13916,8 +13953,8 @@ softdep_check_suspend(struct mount *mp,
error = 0;
if (bo->bo_numoutput > 0 ||
bo->bo_dirty.bv_cnt > 0 ||
softdep_depcnt != 0 ||
ump->softdep_deps != 0 ||
softdep_depcnt != unlinked ||
ump->softdep_deps != unlinked ||
softdep_accdepcnt != ump->softdep_accdeps ||
secondary_writes != 0 ||
mp->mnt_secondary_writes != 0 ||

View File

@ -1502,8 +1502,11 @@ ffs_sync(mp, waitfor)
if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0)
panic("%s: ffs_sync: modification on read-only filesystem",
fs->fs_fsmnt);
if (waitfor == MNT_LAZY)
return (ffs_sync_lazy(mp));
if (waitfor == MNT_LAZY) {
if (!rebooting)
return (ffs_sync_lazy(mp));
waitfor = MNT_NOWAIT;
}
/*
* Write back each (modified) inode.
@ -1560,7 +1563,7 @@ loop:
/*
* Force stale filesystem control information to be flushed.
*/
if (waitfor == MNT_WAIT) {
if (waitfor == MNT_WAIT || rebooting) {
if ((error = softdep_flushworklist(ump->um_mountp, &count, td)))
allerror = error;
/* Flushed work items may create new vnodes to clean */
@ -1577,9 +1580,12 @@ loop:
if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) {
BO_UNLOCK(bo);
vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
if ((error = VOP_FSYNC(devvp, waitfor, td)) != 0)
allerror = error;
error = VOP_FSYNC(devvp, waitfor, td);
VOP_UNLOCK(devvp, 0);
if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN))
error = ffs_sbupdate(ump, waitfor, 0);
if (error != 0)
allerror = error;
if (allerror == 0 && waitfor == MNT_WAIT)
goto loop;
} else if (suspend != 0) {