Several performance improvements for soft updates have been added:

1) Fastpath deletions. When a file is being deleted, check to see if it
   was so recently created that its inode has not yet been written to
   disk. If so, the delete can proceed to immediately free the inode.
2) Background writes: No file or block allocations can be done while the
   bitmap is being written to disk. To avoid these stalls, the bitmap is
   copied to another buffer which is written thus leaving the original
   available for futher allocations.
3) Link count tracking. Constantly track the difference in i_effnlink and
   i_nlink so that inodes that have had no change other than i_effnlink
   need not be written.
4) Identify buffers with rollback dependencies so that the buffer flushing
   daemon can choose to skip over them.
This commit is contained in:
Kirk McKusick 2000-01-10 00:24:24 +00:00
parent bd5f5da94d
commit cf60e8e4bf
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=55697
12 changed files with 626 additions and 179 deletions

View File

@ -52,7 +52,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)ffs_softdep.c 9.45 (McKusick) 1/9/00
* from: @(#)ffs_softdep.c 9.46 (McKusick) 1/9/00
* $FreeBSD$
*/
@ -212,6 +212,8 @@ static void softdep_disk_write_complete __P((struct buf *));
static void softdep_deallocate_dependencies __P((struct buf *));
static int softdep_fsync __P((struct vnode *));
static int softdep_process_worklist __P((struct mount *));
static void softdep_move_dependencies __P((struct buf *, struct buf *));
static int softdep_count_dependencies __P((struct buf *bp, int));
struct bio_ops bioops = {
softdep_disk_io_initiation, /* io_start */
@ -219,6 +221,8 @@ struct bio_ops bioops = {
softdep_deallocate_dependencies, /* io_deallocate */
softdep_fsync, /* io_fsync */
softdep_process_worklist, /* io_sync */
softdep_move_dependencies, /* io_movedeps */
softdep_count_dependencies, /* io_countdeps */
};
/*
@ -472,7 +476,6 @@ static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
#ifdef DEBUG
#include <vm/vm.h>
#include <sys/sysctl.h>
#if defined(__FreeBSD__)
SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, "");
SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, "");
SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,"");
@ -483,19 +486,6 @@ SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0
SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, "");
SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, "");
SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, "");
#else /* !__FreeBSD__ */
struct ctldebug debug20 = { "max_softdeps", &max_softdeps };
struct ctldebug debug21 = { "tickdelay", &tickdelay };
struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push };
struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push };
struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit };
struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit };
struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs };
struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap };
struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs };
struct ctldebug debug30 = { "dir_entry", &stat_dir_entry };
#endif /* !__FreeBSD__ */
#endif /* DEBUG */
/*
@ -636,6 +626,31 @@ softdep_process_worklist(matchmnt)
return (matchcnt);
}
/*
* Move dependencies from one buffer to another.
*/
static void
softdep_move_dependencies(oldbp, newbp)
struct buf *oldbp;
struct buf *newbp;
{
struct worklist *wk, *wktail;
if (LIST_FIRST(&newbp->b_dep) != NULL)
panic("softdep_move_dependencies: need merge code");
wktail = 0;
ACQUIRE_LOCK(&lk);
while (wk = LIST_FIRST(&oldbp->b_dep)) {
LIST_REMOVE(wk, wk_list);
if (wktail == 0)
LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
else
LIST_INSERT_AFTER(wktail, wk, wk_list);
wktail = wk;
}
FREE_LOCK(&lk);
}
/*
* Purge the work list of all items associated with a particular mount point.
*/
@ -1632,11 +1647,6 @@ softdep_setup_freeblocks(ip, length)
(void) inodedep_lookup(fs, ip->i_number, DEPALLOC, &inodedep);
if ((inodedep->id_state & IOSTARTED) != 0)
panic("softdep_setup_freeblocks: inode busy");
/*
* Add the freeblks structure to the list of operations that
* must await the zero'ed inode being written to disk.
*/
WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
/*
* Because the file length has been truncated to zero, any
* pending block allocation dependency structures associated
@ -1647,6 +1657,16 @@ softdep_setup_freeblocks(ip, length)
merge_inode_lists(inodedep);
while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
free_allocdirect(&inodedep->id_inoupdt, adp, 1);
/*
* Add the freeblks structure to the list of operations that
* must await the zero'ed inode being written to disk. If we
* still have a bitmap dependency, then the inode has never been
* written to disk, so we can process the freeblks immediately.
*/
if ((inodedep->id_state & DEPCOMPLETE) == 0)
handle_workitem_freeblocks(freeblks);
else
WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
FREE_LOCK(&lk);
bdwrite(bp);
/*
@ -1841,36 +1861,35 @@ softdep_freefile(pvp, ino, mode)
*/
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0) {
add_to_worklist(&freefile->fx_list);
FREE_LOCK(&lk);
handle_workitem_freefile(freefile);
return;
}
/*
* If we still have a bitmap dependency, then the inode has never
* been written to disk. Drop the dependency as it is no longer
* necessary since the inode is being deallocated. We could process
* the freefile immediately, but then we would have to clear the
* id_inowait dependencies here and it is easier just to let the
* zero'ed inode be written and let them be cleaned up in the
* normal followup actions that follow the inode write.
* necessary since the inode is being deallocated. We set the
* ALLCOMPLETE flags since the bitmap now properly shows that the
* inode is not allocated. Even if the inode is actively being
* written, it has been rolled back to its zero'ed state, so we
* are ensured that a zero inode is what is on the disk. For short
* lived files, this change will usually result in removing all the
* depedencies from the inode so that it can be freed immediately.
*/
if ((inodedep->id_state & DEPCOMPLETE) == 0) {
inodedep->id_state |= DEPCOMPLETE;
if ((inodedep->id_state & DEPCOMPLETE) == 0) {
inodedep->id_state |= ALLCOMPLETE;
LIST_REMOVE(inodedep, id_deps);
inodedep->id_buf = NULL;
WORKLIST_REMOVE(&inodedep->id_list);
}
/*
* If the inodedep has no dependencies associated with it,
* then we must free it here and free the file immediately.
* This case arises when an early allocation fails (for
* example, the user is over their file quota).
*/
if (free_inodedep(inodedep) == 0)
if (free_inodedep(inodedep) == 0) {
WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
else
add_to_worklist(&freefile->fx_list);
FREE_LOCK(&lk);
FREE_LOCK(&lk);
} else {
FREE_LOCK(&lk);
handle_workitem_freefile(freefile);
}
}
/*
@ -2318,11 +2337,12 @@ softdep_setup_remove(bp, dp, ip, isrmdir)
if ((dirrem->dm_state & COMPLETE) == 0) {
LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
dm_next);
FREE_LOCK(&lk);
} else {
dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
add_to_worklist(&dirrem->dm_list);
FREE_LOCK(&lk);
handle_workitem_remove(dirrem);
}
FREE_LOCK(&lk);
}
/*
@ -2515,19 +2535,22 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
}
/*
* Called whenever the link count on an inode is increased.
* Called whenever the link count on an inode is changed.
* It creates an inode dependency so that the new reference(s)
* to the inode cannot be committed to disk until the updated
* inode has been written.
*/
void
softdep_increase_linkcnt(ip)
softdep_change_linkcnt(ip)
struct inode *ip; /* the inode with the increased link count */
{
struct inodedep *inodedep;
ACQUIRE_LOCK(&lk);
(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
if (ip->i_nlink < ip->i_effnlink)
panic("softdep_change_linkcnt: bad delta");
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
FREE_LOCK(&lk);
}
@ -2550,14 +2573,19 @@ handle_workitem_remove(dirrem)
return;
}
ip = VTOI(vp);
ACQUIRE_LOCK(&lk);
if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0)
panic("handle_workitem_remove: lost inodedep 1");
/*
* Normal file deletion.
*/
if ((dirrem->dm_state & RMDIR) == 0) {
ip->i_nlink--;
ip->i_flag |= IN_CHANGE;
if (ip->i_nlink < ip->i_effnlink)
panic("handle_workitem_remove: bad file delta");
ip->i_flag |= IN_CHANGE;
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
FREE_LOCK(&lk);
vput(vp);
num_dirrem -= 1;
WORKITEM_FREE(dirrem, D_DIRREM);
@ -2571,9 +2599,11 @@ handle_workitem_remove(dirrem)
* the parent decremented to account for the loss of "..".
*/
ip->i_nlink -= 2;
ip->i_flag |= IN_CHANGE;
if (ip->i_nlink < ip->i_effnlink)
panic("handle_workitem_remove: bad dir delta");
ip->i_flag |= IN_CHANGE;
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
FREE_LOCK(&lk);
if ((error = UFS_TRUNCATE(vp, (off_t)0, 0, p->p_ucred, p)) != 0)
softdep_error("handle_workitem_remove: truncate", error);
/*
@ -2587,14 +2617,37 @@ handle_workitem_remove(dirrem)
WORKITEM_FREE(dirrem, D_DIRREM);
return;
}
/*
* If we still have a bitmap dependency, then the inode has never
* been written to disk. Drop the dependency as it is no longer
* necessary since the inode is being deallocated. We set the
* ALLCOMPLETE flags since the bitmap now properly shows that the
* inode is not allocated. Even if the inode is actively being
* written, it has been rolled back to its zero'ed state, so we
* are ensured that a zero inode is what is on the disk. For short
* lived files, this change will usually result in removing all the
* depedencies from the inode so that it can be freed immediately.
*/
ACQUIRE_LOCK(&lk);
(void) inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, DEPALLOC,
&inodedep);
if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0)
panic("handle_workitem_remove: lost inodedep 2");
if ((inodedep->id_state & DEPCOMPLETE) == 0) {
inodedep->id_state |= ALLCOMPLETE;
LIST_REMOVE(inodedep, id_deps);
inodedep->id_buf = NULL;
WORKLIST_REMOVE(&inodedep->id_list);
}
dirrem->dm_state = 0;
dirrem->dm_oldinum = dirrem->dm_dirinum;
WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
FREE_LOCK(&lk);
vput(vp);
if (free_inodedep(inodedep) == 0) {
WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
FREE_LOCK(&lk);
vput(vp);
} else {
FREE_LOCK(&lk);
vput(vp);
handle_workitem_remove(dirrem);
}
}
/*
@ -3456,12 +3509,7 @@ softdep_load_inodeblock(ip)
FREE_LOCK(&lk);
return;
}
if (inodedep->id_nlinkdelta != 0) {
ip->i_effnlink -= inodedep->id_nlinkdelta;
ip->i_flag |= IN_MODIFIED;
inodedep->id_nlinkdelta = 0;
(void) free_inodedep(inodedep);
}
ip->i_effnlink -= inodedep->id_nlinkdelta;
FREE_LOCK(&lk);
}
@ -3500,9 +3548,8 @@ softdep_update_inodeblock(ip, bp, waitfor)
FREE_LOCK(&lk);
return;
}
if (ip->i_nlink < ip->i_effnlink)
if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)
panic("softdep_update_inodeblock: bad delta");
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
/*
* Changes have been initiated. Anything depending on these
* changes cannot occur until this inode has been written.
@ -4404,6 +4451,87 @@ clear_inodedeps(p)
FREE_LOCK(&lk);
}
/*
* Function to determine if the buffer has outstanding dependencies
* that will cause a roll-back if the buffer is written. If wantcount
* is set, return number of dependencies, otherwise just yes or no.
*/
static int
softdep_count_dependencies(bp, wantcount)
struct buf *bp;
int wantcount;
{
struct worklist *wk;
struct inodedep *inodedep;
struct indirdep *indirdep;
struct allocindir *aip;
struct pagedep *pagedep;
struct diradd *dap;
int i, retval;
retval = 0;
ACQUIRE_LOCK(&lk);
for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list)) {
switch (wk->wk_type) {
case D_INODEDEP:
inodedep = WK_INODEDEP(wk);
if ((inodedep->id_state & DEPCOMPLETE) == 0) {
/* bitmap allocation dependency */
retval += 1;
if (!wantcount)
goto out;
}
if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
/* direct block pointer dependency */
retval += 1;
if (!wantcount)
goto out;
}
continue;
case D_INDIRDEP:
indirdep = WK_INDIRDEP(wk);
for (aip = LIST_FIRST(&indirdep->ir_deplisthd);
aip; aip = LIST_NEXT(aip, ai_next)) {
/* indirect block pointer dependency */
retval += 1;
if (!wantcount)
goto out;
}
continue;
case D_PAGEDEP:
pagedep = WK_PAGEDEP(wk);
for (i = 0; i < DAHASHSZ; i++) {
for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]);
dap; dap = LIST_NEXT(dap, da_pdlist)) {
/* directory entry dependency */
retval += 1;
if (!wantcount)
goto out;
}
}
continue;
case D_BMSAFEMAP:
case D_ALLOCDIRECT:
case D_ALLOCINDIR:
case D_MKDIR:
/* never a dependency on these blocks */
continue;
default:
panic("softdep_check_for_rollback: Unexpected type %s",
TYPENAME(wk->wk_type));
/* NOTREACHED */
}
}
out:
FREE_LOCK(&lk);
return retval;
}
/*
* Acquire exclusive access to a buffer.
* Must be called with splbio blocked.

View File

@ -68,6 +68,7 @@ static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off,
static void vfs_clean_pages(struct buf * bp);
static void vfs_setdirty(struct buf *bp);
static void vfs_vmio_release(struct buf *bp);
static void vfs_backgroundwritedone(struct buf *bp);
static int flushbufqueues(void);
static int bd_request;
@ -349,7 +350,7 @@ bufinit(void)
* buffer cache operation.
*/
maxbufspace = (nbuf + 8) * DFLTBSIZE;
hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 5);
hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10);
/*
* Limit the amount of malloc memory since it is wired permanently into
* the kernel space. Even though this is accounted for in the buffer
@ -593,6 +594,7 @@ int
bwrite(struct buf * bp)
{
int oldflags, s;
struct buf *newbp;
if (bp->b_flags & B_INVAL) {
brelse(bp);
@ -606,8 +608,66 @@ bwrite(struct buf * bp)
panic("bwrite: buffer is not busy???");
#endif
s = splbio();
/*
* If a background write is already in progress, delay
* writing this block if it is asynchronous. Otherwise
* wait for the background write to complete.
*/
if (bp->b_xflags & BX_BKGRDINPROG) {
if (bp->b_flags & B_ASYNC) {
splx(s);
bdwrite(bp);
return (0);
}
bp->b_xflags |= BX_BKGRDWAIT;
tsleep(&bp->b_xflags, PRIBIO, "biord", 0);
if (bp->b_xflags & BX_BKGRDINPROG)
panic("bwrite: still writing");
}
/* Mark the buffer clean */
bundirty(bp);
/*
* If this buffer is marked for background writing and we
* do not have to wait for it, make a copy and write the
* copy so as to leave this buffer ready for further use.
*/
if ((bp->b_xflags & BX_BKGRDWRITE) && (bp->b_flags & B_ASYNC)) {
if (bp->b_flags & B_CALL)
panic("bwrite: need chained iodone");
/* get a new block */
newbp = geteblk(bp->b_bufsize);
/* set it to be identical to the old block */
memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
bgetvp(bp->b_vp, newbp);
newbp->b_lblkno = bp->b_lblkno;
newbp->b_blkno = bp->b_blkno;
newbp->b_offset = bp->b_offset;
newbp->b_iodone = vfs_backgroundwritedone;
newbp->b_flags |= B_ASYNC | B_CALL;
newbp->b_flags &= ~B_INVAL;
/* move over the dependencies */
if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_movedeps)
(*bioops.io_movedeps)(bp, newbp);
/*
* Initiate write on the copy, release the original to
* the B_LOCKED queue so that it cannot go away until
* the background write completes. If not locked it could go
* away and then be reconstituted while it was being written.
* If the reconstituted buffer were written, we could end up
* with two background copies being written at the same time.
*/
bp->b_xflags |= BX_BKGRDINPROG;
bp->b_flags |= B_LOCKED;
bqrelse(bp);
bp = newbp;
}
bp->b_flags &= ~(B_READ | B_DONE | B_ERROR);
bp->b_flags |= B_WRITEINPROG | B_CACHE;
@ -629,6 +689,56 @@ bwrite(struct buf * bp)
return (0);
}
/*
* Complete a background write started from bwrite.
*/
static void
vfs_backgroundwritedone(bp)
struct buf *bp;
{
struct buf *origbp;
/*
* Find the original buffer that we are writing.
*/
if ((origbp = gbincore(bp->b_vp, bp->b_lblkno)) == NULL)
panic("backgroundwritedone: lost buffer");
/*
* Process dependencies then return any unfinished ones.
*/
if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
(*bioops.io_complete)(bp);
if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_movedeps)
(*bioops.io_movedeps)(bp, origbp);
/*
* Clear the BX_BKGRDINPROG flag in the original buffer
* and awaken it if it is waiting for the write to complete.
*/
origbp->b_xflags &= ~BX_BKGRDINPROG;
if (origbp->b_xflags & BX_BKGRDWAIT) {
origbp->b_xflags &= ~BX_BKGRDWAIT;
wakeup(&origbp->b_xflags);
}
/*
* Clear the B_LOCKED flag and remove it from the locked
* queue if it currently resides there.
*/
origbp->b_flags &= ~B_LOCKED;
if (BUF_LOCK(origbp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
bremfree(origbp);
bqrelse(origbp);
}
/*
* This buffer is marked B_NOCACHE, so when it is released
* by biodone, it will be tossed. We mark it with B_READ
* to avoid biodone doing a second vwakeup.
*/
bp->b_flags |= B_NOCACHE | B_READ;
bp->b_flags &= ~(B_CACHE | B_CALL | B_DONE);
bp->b_iodone = 0;
biodone(bp);
}
/*
* Delayed write. (Buffer is marked dirty). Do not bother writing
* anything if the buffer is marked invalid.
@ -757,6 +867,10 @@ bundirty(bp)
--numdirtybuffers;
numdirtywakeup();
}
/*
* Since it is now being written, we can clear its deferred write flag.
*/
bp->b_flags &= ~B_DEFERRED;
}
/*
@ -895,12 +1009,16 @@ brelse(struct buf * bp)
*
* Normally we can do this whether a buffer is B_DELWRI or not. If
* the buffer is an NFS buffer, it is tracking piecemeal writes or
* the commit state and we cannot afford to lose the buffer.
* the commit state and we cannot afford to lose the buffer. If the
* buffer has a background write in progress, we need to keep it
* around to prevent it from being reconstituted and starting a second
* background write.
*/
if ((bp->b_flags & B_VMIO)
&& !(bp->b_vp->v_tag == VT_NFS &&
!vn_isdisk(bp->b_vp) &&
(bp->b_flags & B_DELWRI))
(bp->b_flags & B_DELWRI) &&
(bp->b_xflags & BX_BKGRDINPROG))
) {
int i, j, resid;
@ -997,6 +1115,9 @@ brelse(struct buf * bp)
/* buffers with no memory */
if (bp->b_bufsize == 0) {
bp->b_flags |= B_INVAL;
bp->b_xflags &= ~BX_BKGRDWRITE;
if (bp->b_xflags & BX_BKGRDINPROG)
panic("losing buffer 1");
if (bp->b_kvasize) {
bp->b_qindex = QUEUE_EMPTYKVA;
kvawakeup = 1;
@ -1011,6 +1132,9 @@ brelse(struct buf * bp)
/* buffers with junk contents */
} else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) {
bp->b_flags |= B_INVAL;
bp->b_xflags &= ~BX_BKGRDWRITE;
if (bp->b_xflags & BX_BKGRDINPROG)
panic("losing buffer 2");
bp->b_qindex = QUEUE_CLEAN;
if (bp->b_kvasize)
kvawakeup = 1;
@ -1501,6 +1625,8 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
}
if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
(*bioops.io_deallocate)(bp);
if (bp->b_xflags & BX_BKGRDINPROG)
panic("losing buffer 3");
LIST_REMOVE(bp, b_hash);
LIST_INSERT_HEAD(&invalhash, bp, b_hash);
@ -1508,6 +1634,7 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
allocbuf(bp, 0);
bp->b_flags = 0;
bp->b_xflags = 0;
bp->b_dev = NODEV;
bp->b_vp = NULL;
bp->b_blkno = bp->b_lblkno = 0;
@ -1761,7 +1888,8 @@ flushbufqueues(void)
while (bp) {
KASSERT((bp->b_flags & B_DELWRI), ("unexpected clean buffer %p", bp));
if ((bp->b_flags & B_DELWRI) != 0) {
if ((bp->b_flags & B_DELWRI) != 0 &&
(bp->b_xflags & BX_BKGRDINPROG) == 0) {
if (bp->b_flags & B_INVAL) {
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
panic("flushbufqueues: locked buf");
@ -1770,13 +1898,24 @@ flushbufqueues(void)
++r;
break;
}
if (LIST_FIRST(&bp->b_dep) != NULL &&
bioops.io_countdeps &&
(bp->b_flags & B_DEFERRED) == 0 &&
(*bioops.io_countdeps)(bp, 0)) {
TAILQ_REMOVE(&bufqueues[QUEUE_DIRTY],
bp, b_freelist);
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY],
bp, b_freelist);
bp->b_flags |= B_DEFERRED;
continue;
}
vfs_bio_awrite(bp);
++r;
break;
}
bp = TAILQ_NEXT(bp, b_freelist);
}
return(r);
return (r);
}
/*

View File

@ -65,6 +65,8 @@ extern struct bio_ops {
void (*io_deallocate) __P((struct buf *));
int (*io_fsync) __P((struct vnode *));
int (*io_sync) __P((struct mount *));
void (*io_movedeps) __P((struct buf *, struct buf *));
int (*io_countdeps) __P((struct buf *, int));
} bioops;
struct iodone_chain {
@ -194,7 +196,7 @@ struct buf {
#define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */
#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
#define B_UNUSED0 0x00000008 /* Old B_BAD */
#define B_UNUSED1 0x00000010 /* Old B_BUSY */
#define B_DEFERRED 0x00000010 /* Skipped over for cleaning */
#define B_CACHE 0x00000020 /* Bread found us in the cache. */
#define B_CALL 0x00000040 /* Call b_iodone from biodone. */
#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
@ -235,6 +237,9 @@ struct buf {
*/
#define BX_VNDIRTY 0x00000001 /* On vnode dirty list */
#define BX_VNCLEAN 0x00000002 /* On vnode clean list */
#define BX_BKGRDWRITE 0x00000004 /* Do writes in background */
#define BX_BKGRDINPROG 0x00000008 /* Background write in progress */
#define BX_BKGRDWAIT 0x00000010 /* Background write waiting */
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */

View File

@ -65,6 +65,8 @@ extern struct bio_ops {
void (*io_deallocate) __P((struct buf *));
int (*io_fsync) __P((struct vnode *));
int (*io_sync) __P((struct mount *));
void (*io_movedeps) __P((struct buf *, struct buf *));
int (*io_countdeps) __P((struct buf *, int));
} bioops;
struct iodone_chain {
@ -194,7 +196,7 @@ struct buf {
#define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */
#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
#define B_UNUSED0 0x00000008 /* Old B_BAD */
#define B_UNUSED1 0x00000010 /* Old B_BUSY */
#define B_DEFERRED 0x00000010 /* Skipped over for cleaning */
#define B_CACHE 0x00000020 /* Bread found us in the cache. */
#define B_CALL 0x00000040 /* Call b_iodone from biodone. */
#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
@ -235,6 +237,9 @@ struct buf {
*/
#define BX_VNDIRTY 0x00000001 /* On vnode dirty list */
#define BX_VNCLEAN 0x00000002 /* On vnode clean list */
#define BX_BKGRDWRITE 0x00000004 /* Do writes in background */
#define BX_BKGRDINPROG 0x00000008 /* Background write in progress */
#define BX_BKGRDWAIT 0x00000010 /* Background write waiting */
#define NOOFFSET (-1LL) /* No buffer offset calculated yet */

View File

@ -836,6 +836,7 @@ ffs_fragextend(ip, cg, bprev, osize, nsize)
brelse(bp);
return (0);
}
bp->b_xflags |= BX_BKGRDWRITE;
cgp->cg_time = time_second;
bno = dtogd(fs, bprev);
for (i = numfrags(fs, osize); i < frags; i++)
@ -903,6 +904,7 @@ ffs_alloccg(ip, cg, bpref, size)
brelse(bp);
return (0);
}
bp->b_xflags |= BX_BKGRDWRITE;
cgp->cg_time = time_second;
if (size == fs->fs_bsize) {
bno = ffs_alloccgblk(ip, bp, bpref);
@ -1113,6 +1115,7 @@ ffs_clusteralloc(ip, cg, bpref, len)
cgp = (struct cg *)bp->b_data;
if (!cg_chkmagic(cgp))
goto fail;
bp->b_xflags |= BX_BKGRDWRITE;
/*
* Check to see if a cluster of the needed size (or bigger) is
* available in this cylinder group.
@ -1227,6 +1230,7 @@ ffs_nodealloccg(ip, cg, ipref, mode)
brelse(bp);
return (0);
}
bp->b_xflags |= BX_BKGRDWRITE;
cgp->cg_time = time_second;
if (ipref) {
ipref %= fs->fs_ipg;
@ -1322,6 +1326,7 @@ ffs_blkfree(ip, bno, size)
brelse(bp);
return;
}
bp->b_xflags |= BX_BKGRDWRITE;
cgp->cg_time = time_second;
bno = dtogd(fs, bno);
if (size == fs->fs_bsize) {
@ -1419,6 +1424,7 @@ ffs_checkblk(ip, bno, size)
cgp = (struct cg *)bp->b_data;
if (!cg_chkmagic(cgp))
panic("ffs_checkblk: cg magic mismatch");
bp->b_xflags |= BX_BKGRDWRITE;
bno = dtogd(fs, bno);
if (size == fs->fs_bsize) {
free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
@ -1484,6 +1490,7 @@ ffs_vfree( pvp, ino, mode)
brelse(bp);
return (0);
}
bp->b_xflags |= BX_BKGRDWRITE;
cgp->cg_time = time_second;
ino %= fs->fs_ipg;
if (isclr(cg_inosused(cgp), ino)) {

View File

@ -52,7 +52,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* from: @(#)ffs_softdep.c 9.45 (McKusick) 1/9/00
* from: @(#)ffs_softdep.c 9.46 (McKusick) 1/9/00
* $FreeBSD$
*/
@ -212,6 +212,8 @@ static void softdep_disk_write_complete __P((struct buf *));
static void softdep_deallocate_dependencies __P((struct buf *));
static int softdep_fsync __P((struct vnode *));
static int softdep_process_worklist __P((struct mount *));
static void softdep_move_dependencies __P((struct buf *, struct buf *));
static int softdep_count_dependencies __P((struct buf *bp, int));
struct bio_ops bioops = {
softdep_disk_io_initiation, /* io_start */
@ -219,6 +221,8 @@ struct bio_ops bioops = {
softdep_deallocate_dependencies, /* io_deallocate */
softdep_fsync, /* io_fsync */
softdep_process_worklist, /* io_sync */
softdep_move_dependencies, /* io_movedeps */
softdep_count_dependencies, /* io_countdeps */
};
/*
@ -472,7 +476,6 @@ static int stat_dir_entry; /* bufs redirtied as dir entry cannot write */
#ifdef DEBUG
#include <vm/vm.h>
#include <sys/sysctl.h>
#if defined(__FreeBSD__)
SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, "");
SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, "");
SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,"");
@ -483,19 +486,6 @@ SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0
SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, "");
SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, "");
SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, "");
#else /* !__FreeBSD__ */
struct ctldebug debug20 = { "max_softdeps", &max_softdeps };
struct ctldebug debug21 = { "tickdelay", &tickdelay };
struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push };
struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push };
struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit };
struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit };
struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs };
struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap };
struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs };
struct ctldebug debug30 = { "dir_entry", &stat_dir_entry };
#endif /* !__FreeBSD__ */
#endif /* DEBUG */
/*
@ -636,6 +626,31 @@ softdep_process_worklist(matchmnt)
return (matchcnt);
}
/*
* Move dependencies from one buffer to another.
*/
static void
softdep_move_dependencies(oldbp, newbp)
struct buf *oldbp;
struct buf *newbp;
{
struct worklist *wk, *wktail;
if (LIST_FIRST(&newbp->b_dep) != NULL)
panic("softdep_move_dependencies: need merge code");
wktail = 0;
ACQUIRE_LOCK(&lk);
while (wk = LIST_FIRST(&oldbp->b_dep)) {
LIST_REMOVE(wk, wk_list);
if (wktail == 0)
LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
else
LIST_INSERT_AFTER(wktail, wk, wk_list);
wktail = wk;
}
FREE_LOCK(&lk);
}
/*
* Purge the work list of all items associated with a particular mount point.
*/
@ -1632,11 +1647,6 @@ softdep_setup_freeblocks(ip, length)
(void) inodedep_lookup(fs, ip->i_number, DEPALLOC, &inodedep);
if ((inodedep->id_state & IOSTARTED) != 0)
panic("softdep_setup_freeblocks: inode busy");
/*
* Add the freeblks structure to the list of operations that
* must await the zero'ed inode being written to disk.
*/
WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
/*
* Because the file length has been truncated to zero, any
* pending block allocation dependency structures associated
@ -1647,6 +1657,16 @@ softdep_setup_freeblocks(ip, length)
merge_inode_lists(inodedep);
while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
free_allocdirect(&inodedep->id_inoupdt, adp, 1);
/*
* Add the freeblks structure to the list of operations that
* must await the zero'ed inode being written to disk. If we
* still have a bitmap dependency, then the inode has never been
* written to disk, so we can process the freeblks immediately.
*/
if ((inodedep->id_state & DEPCOMPLETE) == 0)
handle_workitem_freeblocks(freeblks);
else
WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
FREE_LOCK(&lk);
bdwrite(bp);
/*
@ -1841,36 +1861,35 @@ softdep_freefile(pvp, ino, mode)
*/
ACQUIRE_LOCK(&lk);
if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0) {
add_to_worklist(&freefile->fx_list);
FREE_LOCK(&lk);
handle_workitem_freefile(freefile);
return;
}
/*
* If we still have a bitmap dependency, then the inode has never
* been written to disk. Drop the dependency as it is no longer
* necessary since the inode is being deallocated. We could process
* the freefile immediately, but then we would have to clear the
* id_inowait dependencies here and it is easier just to let the
* zero'ed inode be written and let them be cleaned up in the
* normal followup actions that follow the inode write.
* necessary since the inode is being deallocated. We set the
* ALLCOMPLETE flags since the bitmap now properly shows that the
* inode is not allocated. Even if the inode is actively being
* written, it has been rolled back to its zero'ed state, so we
* are ensured that a zero inode is what is on the disk. For short
* lived files, this change will usually result in removing all the
* depedencies from the inode so that it can be freed immediately.
*/
if ((inodedep->id_state & DEPCOMPLETE) == 0) {
inodedep->id_state |= DEPCOMPLETE;
if ((inodedep->id_state & DEPCOMPLETE) == 0) {
inodedep->id_state |= ALLCOMPLETE;
LIST_REMOVE(inodedep, id_deps);
inodedep->id_buf = NULL;
WORKLIST_REMOVE(&inodedep->id_list);
}
/*
* If the inodedep has no dependencies associated with it,
* then we must free it here and free the file immediately.
* This case arises when an early allocation fails (for
* example, the user is over their file quota).
*/
if (free_inodedep(inodedep) == 0)
if (free_inodedep(inodedep) == 0) {
WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
else
add_to_worklist(&freefile->fx_list);
FREE_LOCK(&lk);
FREE_LOCK(&lk);
} else {
FREE_LOCK(&lk);
handle_workitem_freefile(freefile);
}
}
/*
@ -2318,11 +2337,12 @@ softdep_setup_remove(bp, dp, ip, isrmdir)
if ((dirrem->dm_state & COMPLETE) == 0) {
LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
dm_next);
FREE_LOCK(&lk);
} else {
dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
add_to_worklist(&dirrem->dm_list);
FREE_LOCK(&lk);
handle_workitem_remove(dirrem);
}
FREE_LOCK(&lk);
}
/*
@ -2515,19 +2535,22 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
}
/*
* Called whenever the link count on an inode is increased.
* Called whenever the link count on an inode is changed.
* It creates an inode dependency so that the new reference(s)
* to the inode cannot be committed to disk until the updated
* inode has been written.
*/
void
softdep_increase_linkcnt(ip)
softdep_change_linkcnt(ip)
struct inode *ip; /* the inode with the increased link count */
{
struct inodedep *inodedep;
ACQUIRE_LOCK(&lk);
(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
if (ip->i_nlink < ip->i_effnlink)
panic("softdep_change_linkcnt: bad delta");
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
FREE_LOCK(&lk);
}
@ -2550,14 +2573,19 @@ handle_workitem_remove(dirrem)
return;
}
ip = VTOI(vp);
ACQUIRE_LOCK(&lk);
if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0)
panic("handle_workitem_remove: lost inodedep 1");
/*
* Normal file deletion.
*/
if ((dirrem->dm_state & RMDIR) == 0) {
ip->i_nlink--;
ip->i_flag |= IN_CHANGE;
if (ip->i_nlink < ip->i_effnlink)
panic("handle_workitem_remove: bad file delta");
ip->i_flag |= IN_CHANGE;
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
FREE_LOCK(&lk);
vput(vp);
num_dirrem -= 1;
WORKITEM_FREE(dirrem, D_DIRREM);
@ -2571,9 +2599,11 @@ handle_workitem_remove(dirrem)
* the parent decremented to account for the loss of "..".
*/
ip->i_nlink -= 2;
ip->i_flag |= IN_CHANGE;
if (ip->i_nlink < ip->i_effnlink)
panic("handle_workitem_remove: bad dir delta");
ip->i_flag |= IN_CHANGE;
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
FREE_LOCK(&lk);
if ((error = UFS_TRUNCATE(vp, (off_t)0, 0, p->p_ucred, p)) != 0)
softdep_error("handle_workitem_remove: truncate", error);
/*
@ -2587,14 +2617,37 @@ handle_workitem_remove(dirrem)
WORKITEM_FREE(dirrem, D_DIRREM);
return;
}
/*
* If we still have a bitmap dependency, then the inode has never
* been written to disk. Drop the dependency as it is no longer
* necessary since the inode is being deallocated. We set the
* ALLCOMPLETE flags since the bitmap now properly shows that the
* inode is not allocated. Even if the inode is actively being
* written, it has been rolled back to its zero'ed state, so we
* are ensured that a zero inode is what is on the disk. For short
* lived files, this change will usually result in removing all the
* depedencies from the inode so that it can be freed immediately.
*/
ACQUIRE_LOCK(&lk);
(void) inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, DEPALLOC,
&inodedep);
if ((inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, 0, &inodedep)) == 0)
panic("handle_workitem_remove: lost inodedep 2");
if ((inodedep->id_state & DEPCOMPLETE) == 0) {
inodedep->id_state |= ALLCOMPLETE;
LIST_REMOVE(inodedep, id_deps);
inodedep->id_buf = NULL;
WORKLIST_REMOVE(&inodedep->id_list);
}
dirrem->dm_state = 0;
dirrem->dm_oldinum = dirrem->dm_dirinum;
WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
FREE_LOCK(&lk);
vput(vp);
if (free_inodedep(inodedep) == 0) {
WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
FREE_LOCK(&lk);
vput(vp);
} else {
FREE_LOCK(&lk);
vput(vp);
handle_workitem_remove(dirrem);
}
}
/*
@ -3456,12 +3509,7 @@ softdep_load_inodeblock(ip)
FREE_LOCK(&lk);
return;
}
if (inodedep->id_nlinkdelta != 0) {
ip->i_effnlink -= inodedep->id_nlinkdelta;
ip->i_flag |= IN_MODIFIED;
inodedep->id_nlinkdelta = 0;
(void) free_inodedep(inodedep);
}
ip->i_effnlink -= inodedep->id_nlinkdelta;
FREE_LOCK(&lk);
}
@ -3500,9 +3548,8 @@ softdep_update_inodeblock(ip, bp, waitfor)
FREE_LOCK(&lk);
return;
}
if (ip->i_nlink < ip->i_effnlink)
if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)
panic("softdep_update_inodeblock: bad delta");
inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
/*
* Changes have been initiated. Anything depending on these
* changes cannot occur until this inode has been written.
@ -4404,6 +4451,87 @@ clear_inodedeps(p)
FREE_LOCK(&lk);
}
/*
* Function to determine if the buffer has outstanding dependencies
* that will cause a roll-back if the buffer is written. If wantcount
* is set, return number of dependencies, otherwise just yes or no.
*/
static int
softdep_count_dependencies(bp, wantcount)
struct buf *bp;
int wantcount;
{
struct worklist *wk;
struct inodedep *inodedep;
struct indirdep *indirdep;
struct allocindir *aip;
struct pagedep *pagedep;
struct diradd *dap;
int i, retval;
retval = 0;
ACQUIRE_LOCK(&lk);
for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list)) {
switch (wk->wk_type) {
case D_INODEDEP:
inodedep = WK_INODEDEP(wk);
if ((inodedep->id_state & DEPCOMPLETE) == 0) {
/* bitmap allocation dependency */
retval += 1;
if (!wantcount)
goto out;
}
if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
/* direct block pointer dependency */
retval += 1;
if (!wantcount)
goto out;
}
continue;
case D_INDIRDEP:
indirdep = WK_INDIRDEP(wk);
for (aip = LIST_FIRST(&indirdep->ir_deplisthd);
aip; aip = LIST_NEXT(aip, ai_next)) {
/* indirect block pointer dependency */
retval += 1;
if (!wantcount)
goto out;
}
continue;
case D_PAGEDEP:
pagedep = WK_PAGEDEP(wk);
for (i = 0; i < DAHASHSZ; i++) {
for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]);
dap; dap = LIST_NEXT(dap, da_pdlist)) {
/* directory entry dependency */
retval += 1;
if (!wantcount)
goto out;
}
}
continue;
case D_BMSAFEMAP:
case D_ALLOCDIRECT:
case D_ALLOCINDIR:
case D_MKDIR:
/* never a dependency on these blocks */
continue;
default:
panic("softdep_check_for_rollback: Unexpected type %s",
TYPENAME(wk->wk_type));
/* NOTREACHED */
}
}
out:
FREE_LOCK(&lk);
return retval;
}
/*
* Acquire exclusive access to a buffer.
* Must be called with splbio blocked.

View File

@ -210,11 +210,11 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
}
void
softdep_increase_linkcnt(ip)
softdep_change_linkcnt(ip)
struct inode *ip;
{
panic("softdep_increase_linkcnt called");
panic("softdep_change_linkcnt called");
}
void

View File

@ -671,10 +671,6 @@ ffs_mountfs(devvp, mp, p, malloctype)
bp = NULL;
fs = ump->um_fs;
fs->fs_ronly = ronly;
if (ronly == 0) {
fs->fs_fmod = 1;
fs->fs_clean = 0;
}
size = fs->fs_cssize;
blks = howmany(size, fs->fs_fsize);
if (fs->fs_contigsumsize > 0)
@ -747,6 +743,7 @@ ffs_mountfs(devvp, mp, p, malloctype)
free(base, M_UFSMNT);
goto out;
}
fs->fs_fmod = 1;
fs->fs_clean = 0;
(void) ffs_sbupdate(ump, MNT_WAIT);
}
@ -964,9 +961,9 @@ ffs_sync(mp, waitfor, cred, p)
simple_lock(&vp->v_interlock);
nvp = vp->v_mntvnodes.le_next;
ip = VTOI(vp);
if ((vp->v_type == VNON) || (((ip->i_flag &
(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
(TAILQ_EMPTY(&vp->v_dirtyblkhd) || (waitfor == MNT_LAZY)))) {
if (vp->v_type == VNON || ((ip->i_flag &
(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
simple_unlock(&vp->v_interlock);
continue;
}
@ -1080,7 +1077,7 @@ ffs_vget(mp, ino, vpp)
return (error);
}
bzero((caddr_t)ip, sizeof(struct inode));
lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
lockinit(&ip->i_lock, PINOD, "inode", 0, LK_CANRECURSE);
vp->v_data = ip;
ip->i_vnode = vp;
ip->i_fs = fs = ump->um_fs;

View File

@ -123,10 +123,11 @@ ffs_fsync(ap)
struct vnode *vp = ap->a_vp;
struct buf *bp;
struct buf *nbp;
int s, error, passes, skipmeta;
int s, error, wait, passes, skipmeta;
daddr_t lbn;
wait = (ap->a_waitfor == MNT_WAIT);
if (vn_isdisk(vp)) {
lbn = INT_MAX;
if (vp->v_specmountpoint != NULL &&
@ -143,7 +144,7 @@ ffs_fsync(ap)
*/
passes = NIADDR + 1;
skipmeta = 0;
if (ap->a_waitfor == MNT_WAIT)
if (wait)
skipmeta = 1;
s = splbio();
loop:
@ -153,33 +154,43 @@ ffs_fsync(ap)
for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
nbp = TAILQ_NEXT(bp, b_vnbufs);
/*
* First time through on a synchronous call,
* or if it's already scheduled, skip to the next
* buffer
* Reasons to skip this buffer: it has already been considered
* on this pass, this pass is the first time through on a
* synchronous flush request and the buffer being considered
* is metadata, the buffer has dependencies that will cause
* it to be redirtied and it has not already been deferred,
* or it is already being written.
*/
if ((bp->b_flags & B_SCANNED) ||
((skipmeta == 1) && (bp->b_lblkno < 0)) ||
BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
if ((bp->b_flags & B_SCANNED) != 0)
continue;
bp->b_flags |= B_SCANNED;
if ((skipmeta == 1 && bp->b_lblkno < 0))
continue;
if (!wait && LIST_FIRST(&bp->b_dep) != NULL &&
(bp->b_flags & B_DEFERRED) == 0 &&
bioops.io_countdeps && (*bioops.io_countdeps)(bp, 0)) {
bp->b_flags |= B_DEFERRED;
continue;
}
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
continue;
if ((bp->b_flags & B_DELWRI) == 0)
panic("ffs_fsync: not dirty");
if (vp != bp->b_vp)
panic("ffs_fsync: vp != vp->b_vp");
/*
* If data is outstanding to another vnode, or we were
* asked to wait for everything, or it's not a file or BDEV,
* start the IO on this buffer immediatly.
* If this is a synchronous flush request, or it is not a
* file or device, start the write on this buffer immediatly.
*/
bp->b_flags |= B_SCANNED;
if (((bp->b_vp != vp) || (ap->a_waitfor == MNT_WAIT)) ||
((vp->v_type != VREG) && (vp->v_type != VBLK))) {
if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) {
/*
* On our final pass through, do all I/O synchronously
* so that we can find out if our flush is failing
* because of write errors.
*/
if (passes > 0 || (ap->a_waitfor != MNT_WAIT)) {
if ((bp->b_flags & B_CLUSTEROK) &&
ap->a_waitfor != MNT_WAIT) {
if (passes > 0 || !wait) {
if ((bp->b_flags & B_CLUSTEROK) && !wait) {
BUF_UNLOCK(bp);
(void) vfs_bio_awrite(bp);
} else {
@ -224,7 +235,7 @@ ffs_fsync(ap)
goto loop;
}
if (ap->a_waitfor == MNT_WAIT) {
if (wait) {
while (vp->v_numoutput) {
vp->v_flag |= VBWAIT;
(void) tsleep((caddr_t)&vp->v_numoutput,
@ -260,5 +271,5 @@ ffs_fsync(ap)
}
}
splx(s);
return (UFS_UPDATE(vp, ap->a_waitfor == MNT_WAIT));
return (UFS_UPDATE(vp, wait));
}

View File

@ -102,6 +102,6 @@ void softdep_setup_remove __P((struct buf *,struct inode *, struct inode *,
int));
void softdep_setup_directory_change __P((struct buf *, struct inode *,
struct inode *, long, int));
void softdep_increase_linkcnt __P((struct inode *));
void softdep_change_linkcnt __P((struct inode *));
#endif /* !_UFS_UFS_EXTERN_H_ */

View File

@ -899,17 +899,19 @@ ufs_dirremove(dvp, ip, flags, isrmdir)
ep->d_reclen += dp->i_reclen;
}
out:
if (ip) {
ip->i_effnlink--;
ip->i_flag |= IN_CHANGE;
}
if (DOINGSOFTDEP(dvp)) {
if (ip)
if (ip) {
ip->i_effnlink--;
softdep_change_linkcnt(ip);
softdep_setup_remove(bp, dp, ip, isrmdir);
}
bdwrite(bp);
} else {
if (ip)
if (ip) {
ip->i_effnlink--;
ip->i_nlink--;
ip->i_flag |= IN_CHANGE;
}
if (flags & DOWHITEOUT)
error = VOP_BWRITE(bp->b_vp, bp);
else if (DOINGASYNC(dvp) && dp->i_count != 0) {
@ -946,12 +948,13 @@ ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
if (!OFSFMT(vdp))
ep->d_type = newtype;
oip->i_effnlink--;
oip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(vdp)) {
softdep_change_linkcnt(oip);
softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
bdwrite(bp);
} else {
oip->i_nlink--;
oip->i_flag |= IN_CHANGE;
if (DOINGASYNC(vdp)) {
bdwrite(bp);
error = 0;

View File

@ -754,7 +754,7 @@ ufs_link(ap)
ip->i_nlink++;
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(vp))
softdep_increase_linkcnt(ip);
softdep_change_linkcnt(ip);
error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
if (!error) {
ufs_makedirentry(ip, cnp, &newdir);
@ -765,6 +765,8 @@ ufs_link(ap)
ip->i_effnlink--;
ip->i_nlink--;
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(vp))
softdep_change_linkcnt(ip);
}
out1:
if (tdvp != vp)
@ -1014,7 +1016,7 @@ ufs_rename(ap)
ip->i_nlink++;
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(fvp))
softdep_increase_linkcnt(ip);
softdep_change_linkcnt(ip);
if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
DOINGASYNC(fvp)))) != 0) {
VOP_UNLOCK(fvp, 0, p);
@ -1079,7 +1081,7 @@ ufs_rename(ap)
dp->i_nlink++;
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_increase_linkcnt(dp);
softdep_change_linkcnt(dp);
error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
DOINGASYNC(tdvp)));
if (error)
@ -1092,6 +1094,8 @@ ufs_rename(ap)
dp->i_effnlink--;
dp->i_nlink--;
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(dp);
(void)UFS_UPDATE(tdvp, 1);
}
goto bad;
@ -1146,10 +1150,12 @@ ufs_rename(ap)
if (doingdirectory) {
if (!newparent) {
dp->i_effnlink--;
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tdvp))
softdep_change_linkcnt(dp);
}
xp->i_effnlink--;
xp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(xp);
}
VN_POLLEVENT(tdvp, POLLWRITE);
if (doingdirectory && !DOINGSOFTDEP(tvp)) {
@ -1164,9 +1170,12 @@ ufs_rename(ap)
* disk, so when running with that code we avoid doing
* them now.
*/
if (!newparent)
if (!newparent) {
dp->i_nlink--;
dp->i_flag |= IN_CHANGE;
}
xp->i_nlink--;
xp->i_flag |= IN_CHANGE;
ioflag = DOINGASYNC(tvp) ? 0 : IO_SYNC;
if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
tcnp->cn_cred, tcnp->cn_proc)) != 0)
@ -1247,6 +1256,8 @@ ufs_rename(ap)
ip->i_nlink--;
ip->i_flag |= IN_CHANGE;
ip->i_flag &= ~IN_RENAME;
if (DOINGSOFTDEP(fvp))
softdep_change_linkcnt(ip);
vput(fvp);
} else
vrele(fvp);
@ -1359,7 +1370,7 @@ ufs_mkdir(ap)
ip->i_effnlink = 2;
ip->i_nlink = 2;
if (DOINGSOFTDEP(tvp))
softdep_increase_linkcnt(ip);
softdep_change_linkcnt(ip);
if (cnp->cn_flags & ISWHITEOUT)
ip->i_flags |= UF_OPAQUE;
@ -1372,7 +1383,7 @@ ufs_mkdir(ap)
dp->i_nlink++;
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(dvp))
softdep_increase_linkcnt(dp);
softdep_change_linkcnt(dp);
error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
if (error)
goto bad;
@ -1440,6 +1451,8 @@ ufs_mkdir(ap)
dp->i_effnlink--;
dp->i_nlink--;
dp->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(dvp))
softdep_change_linkcnt(dp);
/*
* No need to do an explicit VOP_TRUNCATE here, vrele will
* do this for us because we set the link count to 0.
@ -1447,6 +1460,8 @@ ufs_mkdir(ap)
ip->i_effnlink = 0;
ip->i_nlink = 0;
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(ip);
vput(tvp);
}
out:
@ -1505,29 +1520,36 @@ ufs_rmdir(ap)
* inode. If we crash in between, the directory
* will be reattached to lost+found,
*/
dp->i_effnlink--;
ip->i_effnlink--;
if (DOINGSOFTDEP(vp)) {
softdep_change_linkcnt(dp);
softdep_change_linkcnt(ip);
}
error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
if (error)
if (error) {
dp->i_effnlink++;
ip->i_effnlink++;
if (DOINGSOFTDEP(vp)) {
softdep_change_linkcnt(dp);
softdep_change_linkcnt(ip);
}
goto out;
}
VN_POLLEVENT(dvp, POLLWRITE|POLLNLINK);
cache_purge(dvp);
/*
* Truncate inode. The only stuff left in the directory is "." and
* "..". The "." reference is inconsequential since we are quashing
* it. We have removed the "." reference and the reference in the
* parent directory, but there may be other hard links. So,
* ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no
* new entries are made. The soft dependency code will arrange to
* do these operations after the parent directory entry has been
* deleted on disk, so when running with that code we avoid doing
* them now.
* it. The soft dependency code will arrange to do these operations
* after the parent directory entry has been deleted on disk, so
* when running with that code we avoid doing them now.
*/
dp->i_effnlink--;
dp->i_flag |= IN_CHANGE;
ip->i_effnlink--;
ip->i_flag |= IN_CHANGE;
if (!DOINGSOFTDEP(vp)) {
dp->i_nlink--;
dp->i_flag |= IN_CHANGE;
ip->i_nlink--;
ip->i_flag |= IN_CHANGE;
ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
cnp->cn_proc);
@ -2119,7 +2141,7 @@ ufs_makeinode(mode, dvp, vpp, cnp)
ip->i_effnlink = 1;
ip->i_nlink = 1;
if (DOINGSOFTDEP(tvp))
softdep_increase_linkcnt(ip);
softdep_change_linkcnt(ip);
if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
suser_xxx(cnp->cn_cred, 0, 0))
ip->i_mode &= ~ISGID;
@ -2148,6 +2170,8 @@ ufs_makeinode(mode, dvp, vpp, cnp)
ip->i_effnlink = 0;
ip->i_nlink = 0;
ip->i_flag |= IN_CHANGE;
if (DOINGSOFTDEP(tvp))
softdep_change_linkcnt(ip);
vput(tvp);
return (error);
}