- Don't immediately re-run softdepflush if we didn't make any progress

on the last iteration.  This can lead to a deadlock when we have
   worklist items that cannot be immediately satisfied.

Reported by:	uqs, Dimitry Andric <dimitry@andric.com>

 - Remove some unnecessary debugging code and place some other under
   SUJ_DEBUG.
 - Examine the journal state in softdep_slowdown().
 - Re-format some comments so I may more easily add flag descriptions.
This commit is contained in:
Jeff Roberson 2010-05-19 06:18:01 +00:00
parent 94ee116706
commit f0268739c7
2 changed files with 72 additions and 51 deletions

View File

@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$");
#ifndef DEBUG
#define DEBUG
#endif
#define SUJ_DEBUG
#include <sys/param.h>
#include <sys/kernel.h>
@ -1200,6 +1199,7 @@ softdep_flush(void)
struct ufsmount *ump;
struct thread *td;
int remaining;
int progress;
int vfslocked;
td = curthread;
@ -1224,7 +1224,7 @@ softdep_flush(void)
}
FREE_LOCK(&lk);
VFS_UNLOCK_GIANT(vfslocked);
remaining = 0;
remaining = progress = 0;
mtx_lock(&mountlist_mtx);
for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
nmp = TAILQ_NEXT(mp, mnt_list);
@ -1233,7 +1233,7 @@ softdep_flush(void)
if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
continue;
vfslocked = VFS_LOCK_GIANT(mp);
softdep_process_worklist(mp, 0);
progress += softdep_process_worklist(mp, 0);
ump = VFSTOUFS(mp);
remaining += ump->softdep_on_worklist -
ump->softdep_on_worklist_inprogress;
@ -1243,7 +1243,7 @@ softdep_flush(void)
vfs_unbusy(mp);
}
mtx_unlock(&mountlist_mtx);
if (remaining)
if (remaining && progress)
continue;
ACQUIRE_LOCK(&lk);
if (!req_pending)
@ -1449,7 +1449,7 @@ process_worklist_item(mp, flags)
struct mount *mp;
int flags;
{
struct worklist *wk, *wkXXX;
struct worklist *wk;
struct ufsmount *ump;
struct vnode *vp;
int matchcnt = 0;
@ -1472,11 +1472,8 @@ process_worklist_item(mp, flags)
vp = NULL;
ump = VFSTOUFS(mp);
LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) {
if (wk->wk_state & INPROGRESS) {
wkXXX = wk;
if (wk->wk_state & INPROGRESS)
continue;
}
wkXXX = wk; /* Record the last valid wk pointer. */
if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
break;
wk->wk_state |= INPROGRESS;
@ -2364,7 +2361,7 @@ remove_from_journal(wk)
mtx_assert(&lk, MA_OWNED);
ump = VFSTOUFS(wk->wk_mp);
#ifdef DEBUG /* XXX Expensive, temporary. */
#ifdef SUJ_DEBUG
{
struct worklist *wkn;
@ -2401,16 +2398,15 @@ journal_space(ump, thresh)
struct jblocks *jblocks;
int avail;
jblocks = ump->softdep_jblocks;
if (jblocks == NULL)
return (1);
/*
* We use a tighter restriction here to prevent request_cleanup()
* running in threads from running into locks we currently hold.
*/
if (num_inodedep > (max_softdeps / 10) * 9)
return (0);
jblocks = ump->softdep_jblocks;
if (jblocks == NULL)
return (1);
if (thresh)
thresh = jblocks->jb_min;
else
@ -2727,7 +2723,7 @@ softdep_process_journal(mp, flags)
break;
printf("softdep: Out of journal space!\n");
softdep_speedup();
msleep(jblocks, &lk, PRIBIO, "jblocks", 1);
msleep(jblocks, &lk, PRIBIO, "jblocks", hz);
}
FREE_LOCK(&lk);
jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
@ -10870,18 +10866,29 @@ int
softdep_slowdown(vp)
struct vnode *vp;
{
struct ufsmount *ump;
int jlow;
int max_softdeps_hard;
ACQUIRE_LOCK(&lk);
jlow = 0;
/*
* Check for journal space if needed.
*/
if (DOINGSUJ(vp)) {
ump = VFSTOUFS(vp->v_mount);
if (journal_space(ump, 0) == 0)
jlow = 1;
}
max_softdeps_hard = max_softdeps * 11 / 10;
if (num_dirrem < max_softdeps_hard / 2 &&
num_inodedep < max_softdeps_hard &&
VFSTOUFS(vp->v_mount)->um_numindirdeps < maxindirdeps &&
num_freeblkdep < max_softdeps_hard) {
num_freeblkdep < max_softdeps_hard && jlow == 0) {
FREE_LOCK(&lk);
return (0);
}
if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps)
if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps || jlow)
softdep_speedup();
stat_sync_limit_hit += 1;
FREE_LOCK(&lk);

View File

@ -46,51 +46,65 @@
* copy of the data. A particular data dependency is eliminated when
* it is ALLCOMPLETE: that is ATTACHED, DEPCOMPLETE, and COMPLETE.
*
* ATTACHED means that the data is not currently being written to
* disk. UNDONE means that the data has been rolled back to a safe
* The ATTACHED flag means that the data is not currently being written
* to disk.
*
* The UNDONE flag means that the data has been rolled back to a safe
* state for writing to the disk. When the I/O completes, the data is
* restored to its current form and the state reverts to ATTACHED.
* The data must be locked throughout the rollback, I/O, and roll
* forward so that the rolled back information is never visible to
* user processes. The COMPLETE flag indicates that the item has been
* written. For example, a dependency that requires that an inode be
* written will be marked COMPLETE after the inode has been written
* to disk. The DEPCOMPLETE flag indicates the completion of any other
* user processes.
*
* The COMPLETE flag indicates that the item has been written. For example,
* a dependency that requires that an inode be written will be marked
* COMPLETE after the inode has been written to disk.
*
* The DEPCOMPLETE flag indicates the completion of any other
* dependencies such as the writing of a cylinder group map has been
* completed. A dependency structure may be freed only when both it
* and its dependencies have completed and any rollbacks that are in
* progress have finished as indicated by the set of ALLCOMPLETE flags
* all being set. The two MKDIR flags indicate additional dependencies
* that must be done when creating a new directory. MKDIR_BODY is
* cleared when the directory data block containing the "." and ".."
* entries has been written. MKDIR_PARENT is cleared when the parent
* inode with the increased link count for ".." has been written. When
* both MKDIR flags have been cleared, the DEPCOMPLETE flag is set to
* indicate that the directory dependencies have been completed. The
* writing of the directory inode itself sets the COMPLETE flag which
* then allows the directory entry for the new directory to be written
* to disk. The RMDIR flag marks a dirrem structure as representing
* the removal of a directory rather than a file. When the removal
* dependencies are completed, additional work needs to be done
* (truncation of the "." and ".." entries, an additional decrement
* of the associated inode, and a decrement of the parent inode). The
* DIRCHG flag marks a diradd structure as representing the changing
* all being set.
*
* The two MKDIR flags indicate additional dependencies that must be done
* when creating a new directory. MKDIR_BODY is cleared when the directory
* data block containing the "." and ".." entries has been written.
* MKDIR_PARENT is cleared when the parent inode with the increased link
* count for ".." has been written. When both MKDIR flags have been
* cleared, the DEPCOMPLETE flag is set to indicate that the directory
* dependencies have been completed. The writing of the directory inode
* itself sets the COMPLETE flag which then allows the directory entry for
* the new directory to be written to disk. The RMDIR flag marks a dirrem
* structure as representing the removal of a directory rather than a
* file. When the removal dependencies are completed, additional work needs
* to be done* (an additional decrement of the associated inode, and a
* decrement of the parent inode).
*
* The DIRCHG flag marks a diradd structure as representing the changing
* of an existing entry rather than the addition of a new one. When
* the update is complete the dirrem associated with the inode for
* the old name must be added to the worklist to do the necessary
* reference count decrement. The GOINGAWAY flag indicates that the
* data structure is frozen from further change until its dependencies
* have been completed and its resources freed after which it will be
* discarded. The IOSTARTED flag prevents multiple calls to the I/O
* start routine from doing multiple rollbacks. The SPACECOUNTED flag
* says that the files space has been accounted to the pending free
* space count. The NEWBLOCK flag marks pagedep structures that have
* just been allocated, so must be claimed by the inode before all
* dependencies are complete. The INPROGRESS flag marks worklist
* structures that are still on the worklist, but are being considered
* for action by some process. The UFS1FMT flag indicates that the
* inode being processed is a ufs1 format. The EXTDATA flag indicates
* that the allocdirect describes an extended-attributes dependency.
* reference count decrement.
*
* The GOINGAWAY flag indicates that the data structure is frozen from
* further change until its dependencies have been completed and its
* resources freed after which it will be discarded.
*
* The IOSTARTED flag prevents multiple calls to the I/O start routine from
* doing multiple rollbacks.
*
* The NEWBLOCK flag marks pagedep structures that have just been allocated,
* so must be claimed by the inode before all dependencies are complete.
*
* The INPROGRESS flag marks worklist structures that are still on the
* worklist, but are being considered for action by some process.
*
* The UFS1FMT flag indicates that the inode being processed is a ufs1 format.
*
* The EXTDATA flag indicates that the allocdirect describes an
* extended-attributes dependency.
*
* The ONWORKLIST flag shows whether the structure is currently linked
* onto a worklist.
*/