- Don't immediately re-run softdepflush if we didn't make any progress
on the last iteration. This can lead to a deadlock when we have worklist items that cannot be immediately satisfied. Reported by: uqs, Dimitry Andric <dimitry@andric.com> - Remove some unnecessary debugging code and place some other under SUJ_DEBUG. - Examine the journal state in softdep_slowdown(). - Re-format some comments so I may more easily add flag descriptions.
This commit is contained in:
parent
94ee116706
commit
f0268739c7
@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$");
|
||||
#ifndef DEBUG
|
||||
#define DEBUG
|
||||
#endif
|
||||
#define SUJ_DEBUG
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
@ -1200,6 +1199,7 @@ softdep_flush(void)
|
||||
struct ufsmount *ump;
|
||||
struct thread *td;
|
||||
int remaining;
|
||||
int progress;
|
||||
int vfslocked;
|
||||
|
||||
td = curthread;
|
||||
@ -1224,7 +1224,7 @@ softdep_flush(void)
|
||||
}
|
||||
FREE_LOCK(&lk);
|
||||
VFS_UNLOCK_GIANT(vfslocked);
|
||||
remaining = 0;
|
||||
remaining = progress = 0;
|
||||
mtx_lock(&mountlist_mtx);
|
||||
for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
|
||||
nmp = TAILQ_NEXT(mp, mnt_list);
|
||||
@ -1233,7 +1233,7 @@ softdep_flush(void)
|
||||
if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
|
||||
continue;
|
||||
vfslocked = VFS_LOCK_GIANT(mp);
|
||||
softdep_process_worklist(mp, 0);
|
||||
progress += softdep_process_worklist(mp, 0);
|
||||
ump = VFSTOUFS(mp);
|
||||
remaining += ump->softdep_on_worklist -
|
||||
ump->softdep_on_worklist_inprogress;
|
||||
@ -1243,7 +1243,7 @@ softdep_flush(void)
|
||||
vfs_unbusy(mp);
|
||||
}
|
||||
mtx_unlock(&mountlist_mtx);
|
||||
if (remaining)
|
||||
if (remaining && progress)
|
||||
continue;
|
||||
ACQUIRE_LOCK(&lk);
|
||||
if (!req_pending)
|
||||
@ -1449,7 +1449,7 @@ process_worklist_item(mp, flags)
|
||||
struct mount *mp;
|
||||
int flags;
|
||||
{
|
||||
struct worklist *wk, *wkXXX;
|
||||
struct worklist *wk;
|
||||
struct ufsmount *ump;
|
||||
struct vnode *vp;
|
||||
int matchcnt = 0;
|
||||
@ -1472,11 +1472,8 @@ process_worklist_item(mp, flags)
|
||||
vp = NULL;
|
||||
ump = VFSTOUFS(mp);
|
||||
LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) {
|
||||
if (wk->wk_state & INPROGRESS) {
|
||||
wkXXX = wk;
|
||||
if (wk->wk_state & INPROGRESS)
|
||||
continue;
|
||||
}
|
||||
wkXXX = wk; /* Record the last valid wk pointer. */
|
||||
if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
|
||||
break;
|
||||
wk->wk_state |= INPROGRESS;
|
||||
@ -2364,7 +2361,7 @@ remove_from_journal(wk)
|
||||
|
||||
mtx_assert(&lk, MA_OWNED);
|
||||
ump = VFSTOUFS(wk->wk_mp);
|
||||
#ifdef DEBUG /* XXX Expensive, temporary. */
|
||||
#ifdef SUJ_DEBUG
|
||||
{
|
||||
struct worklist *wkn;
|
||||
|
||||
@ -2401,16 +2398,15 @@ journal_space(ump, thresh)
|
||||
struct jblocks *jblocks;
|
||||
int avail;
|
||||
|
||||
jblocks = ump->softdep_jblocks;
|
||||
if (jblocks == NULL)
|
||||
return (1);
|
||||
/*
|
||||
* We use a tighter restriction here to prevent request_cleanup()
|
||||
* running in threads from running into locks we currently hold.
|
||||
*/
|
||||
if (num_inodedep > (max_softdeps / 10) * 9)
|
||||
return (0);
|
||||
|
||||
jblocks = ump->softdep_jblocks;
|
||||
if (jblocks == NULL)
|
||||
return (1);
|
||||
if (thresh)
|
||||
thresh = jblocks->jb_min;
|
||||
else
|
||||
@ -2727,7 +2723,7 @@ softdep_process_journal(mp, flags)
|
||||
break;
|
||||
printf("softdep: Out of journal space!\n");
|
||||
softdep_speedup();
|
||||
msleep(jblocks, &lk, PRIBIO, "jblocks", 1);
|
||||
msleep(jblocks, &lk, PRIBIO, "jblocks", hz);
|
||||
}
|
||||
FREE_LOCK(&lk);
|
||||
jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
|
||||
@ -10870,18 +10866,29 @@ int
|
||||
softdep_slowdown(vp)
|
||||
struct vnode *vp;
|
||||
{
|
||||
struct ufsmount *ump;
|
||||
int jlow;
|
||||
int max_softdeps_hard;
|
||||
|
||||
ACQUIRE_LOCK(&lk);
|
||||
jlow = 0;
|
||||
/*
|
||||
* Check for journal space if needed.
|
||||
*/
|
||||
if (DOINGSUJ(vp)) {
|
||||
ump = VFSTOUFS(vp->v_mount);
|
||||
if (journal_space(ump, 0) == 0)
|
||||
jlow = 1;
|
||||
}
|
||||
max_softdeps_hard = max_softdeps * 11 / 10;
|
||||
if (num_dirrem < max_softdeps_hard / 2 &&
|
||||
num_inodedep < max_softdeps_hard &&
|
||||
VFSTOUFS(vp->v_mount)->um_numindirdeps < maxindirdeps &&
|
||||
num_freeblkdep < max_softdeps_hard) {
|
||||
num_freeblkdep < max_softdeps_hard && jlow == 0) {
|
||||
FREE_LOCK(&lk);
|
||||
return (0);
|
||||
}
|
||||
if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps)
|
||||
if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps || jlow)
|
||||
softdep_speedup();
|
||||
stat_sync_limit_hit += 1;
|
||||
FREE_LOCK(&lk);
|
||||
|
@ -46,51 +46,65 @@
|
||||
* copy of the data. A particular data dependency is eliminated when
|
||||
* it is ALLCOMPLETE: that is ATTACHED, DEPCOMPLETE, and COMPLETE.
|
||||
*
|
||||
* ATTACHED means that the data is not currently being written to
|
||||
* disk. UNDONE means that the data has been rolled back to a safe
|
||||
* The ATTACHED flag means that the data is not currently being written
|
||||
* to disk.
|
||||
*
|
||||
* The UNDONE flag means that the data has been rolled back to a safe
|
||||
* state for writing to the disk. When the I/O completes, the data is
|
||||
* restored to its current form and the state reverts to ATTACHED.
|
||||
* The data must be locked throughout the rollback, I/O, and roll
|
||||
* forward so that the rolled back information is never visible to
|
||||
* user processes. The COMPLETE flag indicates that the item has been
|
||||
* written. For example, a dependency that requires that an inode be
|
||||
* written will be marked COMPLETE after the inode has been written
|
||||
* to disk. The DEPCOMPLETE flag indicates the completion of any other
|
||||
* user processes.
|
||||
*
|
||||
* The COMPLETE flag indicates that the item has been written. For example,
|
||||
* a dependency that requires that an inode be written will be marked
|
||||
* COMPLETE after the inode has been written to disk.
|
||||
*
|
||||
* The DEPCOMPLETE flag indicates the completion of any other
|
||||
* dependencies such as the writing of a cylinder group map has been
|
||||
* completed. A dependency structure may be freed only when both it
|
||||
* and its dependencies have completed and any rollbacks that are in
|
||||
* progress have finished as indicated by the set of ALLCOMPLETE flags
|
||||
* all being set. The two MKDIR flags indicate additional dependencies
|
||||
* that must be done when creating a new directory. MKDIR_BODY is
|
||||
* cleared when the directory data block containing the "." and ".."
|
||||
* entries has been written. MKDIR_PARENT is cleared when the parent
|
||||
* inode with the increased link count for ".." has been written. When
|
||||
* both MKDIR flags have been cleared, the DEPCOMPLETE flag is set to
|
||||
* indicate that the directory dependencies have been completed. The
|
||||
* writing of the directory inode itself sets the COMPLETE flag which
|
||||
* then allows the directory entry for the new directory to be written
|
||||
* to disk. The RMDIR flag marks a dirrem structure as representing
|
||||
* the removal of a directory rather than a file. When the removal
|
||||
* dependencies are completed, additional work needs to be done
|
||||
* (truncation of the "." and ".." entries, an additional decrement
|
||||
* of the associated inode, and a decrement of the parent inode). The
|
||||
* DIRCHG flag marks a diradd structure as representing the changing
|
||||
* all being set.
|
||||
*
|
||||
* The two MKDIR flags indicate additional dependencies that must be done
|
||||
* when creating a new directory. MKDIR_BODY is cleared when the directory
|
||||
* data block containing the "." and ".." entries has been written.
|
||||
* MKDIR_PARENT is cleared when the parent inode with the increased link
|
||||
* count for ".." has been written. When both MKDIR flags have been
|
||||
* cleared, the DEPCOMPLETE flag is set to indicate that the directory
|
||||
* dependencies have been completed. The writing of the directory inode
|
||||
* itself sets the COMPLETE flag which then allows the directory entry for
|
||||
* the new directory to be written to disk. The RMDIR flag marks a dirrem
|
||||
* structure as representing the removal of a directory rather than a
|
||||
* file. When the removal dependencies are completed, additional work needs
|
||||
* to be done* (an additional decrement of the associated inode, and a
|
||||
* decrement of the parent inode).
|
||||
*
|
||||
* The DIRCHG flag marks a diradd structure as representing the changing
|
||||
* of an existing entry rather than the addition of a new one. When
|
||||
* the update is complete the dirrem associated with the inode for
|
||||
* the old name must be added to the worklist to do the necessary
|
||||
* reference count decrement. The GOINGAWAY flag indicates that the
|
||||
* data structure is frozen from further change until its dependencies
|
||||
* have been completed and its resources freed after which it will be
|
||||
* discarded. The IOSTARTED flag prevents multiple calls to the I/O
|
||||
* start routine from doing multiple rollbacks. The SPACECOUNTED flag
|
||||
* says that the files space has been accounted to the pending free
|
||||
* space count. The NEWBLOCK flag marks pagedep structures that have
|
||||
* just been allocated, so must be claimed by the inode before all
|
||||
* dependencies are complete. The INPROGRESS flag marks worklist
|
||||
* structures that are still on the worklist, but are being considered
|
||||
* for action by some process. The UFS1FMT flag indicates that the
|
||||
* inode being processed is a ufs1 format. The EXTDATA flag indicates
|
||||
* that the allocdirect describes an extended-attributes dependency.
|
||||
* reference count decrement.
|
||||
*
|
||||
* The GOINGAWAY flag indicates that the data structure is frozen from
|
||||
* further change until its dependencies have been completed and its
|
||||
* resources freed after which it will be discarded.
|
||||
*
|
||||
* The IOSTARTED flag prevents multiple calls to the I/O start routine from
|
||||
* doing multiple rollbacks.
|
||||
*
|
||||
* The NEWBLOCK flag marks pagedep structures that have just been allocated,
|
||||
* so must be claimed by the inode before all dependencies are complete.
|
||||
*
|
||||
* The INPROGRESS flag marks worklist structures that are still on the
|
||||
* worklist, but are being considered for action by some process.
|
||||
*
|
||||
* The UFS1FMT flag indicates that the inode being processed is a ufs1 format.
|
||||
*
|
||||
* The EXTDATA flag indicates that the allocdirect describes an
|
||||
* extended-attributes dependency.
|
||||
*
|
||||
* The ONWORKLIST flag shows whether the structure is currently linked
|
||||
* onto a worklist.
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user