Implement fully asynchronous partial truncation with softupdates journaling

to resolve errors which can cause corruption on recovery with the old
synchronous mechanism.

 - Append partial truncation freework structures to indirdeps while
   truncation is proceeding.  These prevent new block pointers from
   becoming valid until truncation completes and serialize truncations.
 - On completion of a partial truncate journal work waits for zeroed
   pointers to hit indirects.
 - softdep_journal_freeblocks() handles last frag allocation and last
   block zeroing.
 - vtruncbuf/ffs_page_remove moved into softdep_*_freeblocks() so it
   is only implemented in one place.
 - Block allocation failure handling moved up one level so it does not
   proceed with buf locks held.  This permits us to do more extensive
   reclaims when filesystem space is exhausted.
 - softdep_sync_metadata() is broken into two parts, the first executes
   once at the start of ffs_syncvnode() and flushes truncations and
   inode dependencies.  The second is called on each locked buf.  This
   eliminates excessive looping and rollbacks.
 - Improve the mechanism in process_worklist_item() that handles
   acquiring vnode locks for handle_workitem_remove() so that it works
   more generally and does not loop excessively over the same worklist
   items on each call.
 - Don't corrupt directories by zeroing the tail in fsck.  This is only
   done for regular files.
 - Push a fsync complete record for files that need it so the checker
   knows a truncation in the journal is no longer valid.

Discussed with:	mckusick, kib (ffs_pages_remove and ffs_truncate parts)
Tested by:	pho
This commit is contained in:
Jeff Roberson 2011-06-10 22:48:35 +00:00
parent 1a62d8843d
commit 280e091a99
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=222958
13 changed files with 2675 additions and 1574 deletions

View File

@ -1604,7 +1604,7 @@ ino_trunc(ino_t ino, off_t size)
* uninitialized space later. * uninitialized space later.
*/ */
off = blkoff(fs, size); off = blkoff(fs, size);
if (off) { if (off && DIP(ip, di_mode) != IFDIR) {
uint8_t *buf; uint8_t *buf;
long clrsize; long clrsize;
@ -1775,13 +1775,18 @@ cg_trunc(struct suj_cg *sc)
struct suj_ino *sino; struct suj_ino *sino;
int i; int i;
for (i = 0; i < SUJ_HASHSIZE; i++) for (i = 0; i < SUJ_HASHSIZE; i++) {
LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) {
if (sino->si_trunc) { if (sino->si_trunc) {
ino_trunc(sino->si_ino, ino_trunc(sino->si_ino,
sino->si_trunc->jt_size); sino->si_trunc->jt_size);
sino->si_blkadj = 0;
sino->si_trunc = NULL; sino->si_trunc = NULL;
} }
if (sino->si_blkadj)
ino_adjblks(sino);
}
}
} }
/* /*
@ -1791,7 +1796,6 @@ cg_trunc(struct suj_cg *sc)
static void static void
cg_check_blk(struct suj_cg *sc) cg_check_blk(struct suj_cg *sc)
{ {
struct suj_ino *sino;
struct suj_blk *sblk; struct suj_blk *sblk;
int i; int i;
@ -1799,15 +1803,6 @@ cg_check_blk(struct suj_cg *sc)
for (i = 0; i < SUJ_HASHSIZE; i++) for (i = 0; i < SUJ_HASHSIZE; i++)
LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next)
blk_check(sblk); blk_check(sblk);
/*
* Now that we've freed blocks which are not referenced we
* make a second pass over all inodes to adjust their block
* counts.
*/
for (i = 0; i < SUJ_HASHSIZE; i++)
LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
if (sino->si_blkadj)
ino_adjblks(sino);
} }
/* /*
@ -1961,14 +1956,7 @@ ino_append(union jrec *rec)
"parent %d, diroff %jd\n", "parent %d, diroff %jd\n",
refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, refrec->jr_op, refrec->jr_ino, refrec->jr_nlink,
refrec->jr_parent, refrec->jr_diroff); refrec->jr_parent, refrec->jr_diroff);
/*
* Lookup the ino and clear truncate if one is found. Partial
* truncates are always done synchronously so if we discover
* an operation that requires a lock the truncation has completed
* and can be discarded.
*/
sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1);
sino->si_trunc = NULL;
sino->si_hasrecs = 1; sino->si_hasrecs = 1;
srec = errmalloc(sizeof(*srec)); srec = errmalloc(sizeof(*srec));
srec->sr_rec = rec; srec->sr_rec = rec;
@ -2174,9 +2162,7 @@ blk_build(struct jblkrec *blkrec)
struct suj_rec *srec; struct suj_rec *srec;
struct suj_blk *sblk; struct suj_blk *sblk;
struct jblkrec *blkrn; struct jblkrec *blkrn;
struct suj_ino *sino;
ufs2_daddr_t blk; ufs2_daddr_t blk;
off_t foff;
int frag; int frag;
if (debug) if (debug)
@ -2185,17 +2171,6 @@ blk_build(struct jblkrec *blkrec)
blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags, blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags,
blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn); blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn);
/*
* Look up the inode and clear the truncate if any lbns after the
* truncate lbn are freed or allocated.
*/
sino = ino_lookup(blkrec->jb_ino, 0);
if (sino && sino->si_trunc) {
foff = lblktosize(fs, blkrec->jb_lbn);
foff += lfragtosize(fs, blkrec->jb_frags);
if (foff > sino->si_trunc->jt_size)
sino->si_trunc = NULL;
}
blk = blknum(fs, blkrec->jb_blkno); blk = blknum(fs, blkrec->jb_blkno);
frag = fragnum(fs, blkrec->jb_blkno); frag = fragnum(fs, blkrec->jb_blkno);
sblk = blk_lookup(blk, 1); sblk = blk_lookup(blk, 1);
@ -2242,10 +2217,15 @@ ino_build_trunc(struct jtrncrec *rec)
struct suj_ino *sino; struct suj_ino *sino;
if (debug) if (debug)
printf("ino_build_trunc: ino %d, size %jd\n", printf("ino_build_trunc: op %d ino %d, size %jd\n",
rec->jt_ino, rec->jt_size); rec->jt_op, rec->jt_ino, rec->jt_size);
sino = ino_lookup(rec->jt_ino, 1); sino = ino_lookup(rec->jt_ino, 1);
sino->si_trunc = rec; if (rec->jt_op == JOP_SYNC) {
sino->si_trunc = NULL;
return;
}
if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size)
sino->si_trunc = rec;
} }
/* /*

View File

@ -302,6 +302,7 @@ struct vattr {
#define IO_EXT 0x0400 /* operate on external attributes */ #define IO_EXT 0x0400 /* operate on external attributes */
#define IO_NORMAL 0x0800 /* operate on regular data */ #define IO_NORMAL 0x0800 /* operate on regular data */
#define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */ #define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */
#define IO_BUFLOCKED 0x2000 /* ffs flag; indir buf is locked */
#define IO_SEQMAX 0x7F /* seq heuristic max value */ #define IO_SEQMAX 0x7F /* seq heuristic max value */
#define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */ #define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */

View File

@ -217,7 +217,7 @@ ffs_alloc(ip, lbn, bpref, size, flags, cred, bnp)
(void) chkdq(ip, -btodb(size), cred, FORCE); (void) chkdq(ip, -btodb(size), cred, FORCE);
UFS_LOCK(ump); UFS_LOCK(ump);
#endif #endif
if (reclaimed == 0) { if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) {
reclaimed = 1; reclaimed = 1;
softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT); softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT);
goto retry; goto retry;
@ -418,7 +418,7 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp)
/* /*
* no space available * no space available
*/ */
if (reclaimed == 0) { if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) {
reclaimed = 1; reclaimed = 1;
UFS_UNLOCK(ump); UFS_UNLOCK(ump);
if (bp) { if (bp) {

View File

@ -105,6 +105,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1]; ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
int unwindidx = -1; int unwindidx = -1;
int saved_inbdflush; int saved_inbdflush;
int reclaimed;
ip = VTOI(vp); ip = VTOI(vp);
dp = ip->i_din1; dp = ip->i_din1;
@ -112,6 +113,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
ump = ip->i_ump; ump = ip->i_ump;
lbn = lblkno(fs, startoffset); lbn = lblkno(fs, startoffset);
size = blkoff(fs, startoffset) + size; size = blkoff(fs, startoffset) + size;
reclaimed = 0;
if (size > fs->fs_bsize) if (size > fs->fs_bsize)
panic("ffs_balloc_ufs1: blk too big"); panic("ffs_balloc_ufs1: blk too big");
*bpp = NULL; *bpp = NULL;
@ -276,6 +278,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
/* /*
* Fetch through the indirect blocks, allocating as necessary. * Fetch through the indirect blocks, allocating as necessary.
*/ */
retry:
for (i = 1;;) { for (i = 1;;) {
error = bread(vp, error = bread(vp,
indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
@ -296,8 +299,15 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
if (pref == 0) if (pref == 0)
pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags, cred, &newb)) != 0) { flags | IO_BUFLOCKED, cred, &newb)) != 0) {
brelse(bp); brelse(bp);
if (++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
UFS_UNLOCK(ump);
goto retry;
}
goto fail; goto fail;
} }
nb = newb; nb = newb;
@ -349,10 +359,17 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
if (nb == 0) { if (nb == 0) {
UFS_LOCK(ump); UFS_LOCK(ump);
pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]); pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
error = ffs_alloc(ip, error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); flags | IO_BUFLOCKED, cred, &newb);
if (error) { if (error) {
brelse(bp); brelse(bp);
if (++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
UFS_UNLOCK(ump);
goto retry;
}
goto fail; goto fail;
} }
nb = newb; nb = newb;
@ -506,6 +523,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
int deallocated, osize, nsize, num, i, error; int deallocated, osize, nsize, num, i, error;
int unwindidx = -1; int unwindidx = -1;
int saved_inbdflush; int saved_inbdflush;
int reclaimed;
ip = VTOI(vp); ip = VTOI(vp);
dp = ip->i_din2; dp = ip->i_din2;
@ -513,6 +531,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
ump = ip->i_ump; ump = ip->i_ump;
lbn = lblkno(fs, startoffset); lbn = lblkno(fs, startoffset);
size = blkoff(fs, startoffset) + size; size = blkoff(fs, startoffset) + size;
reclaimed = 0;
if (size > fs->fs_bsize) if (size > fs->fs_bsize)
panic("ffs_balloc_ufs2: blk too big"); panic("ffs_balloc_ufs2: blk too big");
*bpp = NULL; *bpp = NULL;
@ -787,6 +806,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
/* /*
* Fetch through the indirect blocks, allocating as necessary. * Fetch through the indirect blocks, allocating as necessary.
*/ */
retry:
for (i = 1;;) { for (i = 1;;) {
error = bread(vp, error = bread(vp,
indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
@ -807,8 +827,15 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
if (pref == 0) if (pref == 0)
pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
flags, cred, &newb)) != 0) { flags | IO_BUFLOCKED, cred, &newb)) != 0) {
brelse(bp); brelse(bp);
if (++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
UFS_UNLOCK(ump);
goto retry;
}
goto fail; goto fail;
} }
nb = newb; nb = newb;
@ -860,10 +887,17 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
if (nb == 0) { if (nb == 0) {
UFS_LOCK(ump); UFS_LOCK(ump);
pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]); pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
error = ffs_alloc(ip, error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
lbn, pref, (int)fs->fs_bsize, flags, cred, &newb); flags | IO_BUFLOCKED, cred, &newb);
if (error) { if (error) {
brelse(bp); brelse(bp);
if (++reclaimed == 1) {
UFS_LOCK(ump);
softdep_request_cleanup(fs, vp, cred,
FLUSH_BLOCKS_WAIT);
UFS_UNLOCK(ump);
goto retry;
}
goto fail; goto fail;
} }
nb = newb; nb = newb;

View File

@ -74,6 +74,7 @@ int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t);
void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t); void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t);
int ffs_mountroot(void); int ffs_mountroot(void);
void ffs_oldfscompat_write(struct fs *, struct ufsmount *); void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
void ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end);
int ffs_reallocblks(struct vop_reallocblks_args *); int ffs_reallocblks(struct vop_reallocblks_args *);
int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t, int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
ufs2_daddr_t, int, int, int, struct ucred *, struct buf **); ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
@ -107,7 +108,6 @@ extern struct vop_vector ffs_fifoops2;
int softdep_check_suspend(struct mount *, struct vnode *, int softdep_check_suspend(struct mount *, struct vnode *,
int, int, int, int); int, int, int, int);
int softdep_complete_trunc(struct vnode *, void *);
void softdep_get_depcounts(struct mount *, int *, int *); void softdep_get_depcounts(struct mount *, int *, int *);
void softdep_initialize(void); void softdep_initialize(void);
void softdep_uninitialize(void); void softdep_uninitialize(void);
@ -139,14 +139,17 @@ void softdep_setup_blkfree(struct mount *, struct buf *, ufs2_daddr_t, int,
void softdep_setup_inofree(struct mount *, struct buf *, ino_t, void softdep_setup_inofree(struct mount *, struct buf *, ino_t,
struct workhead *); struct workhead *);
void softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *); void softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *);
void *softdep_setup_trunc(struct vnode *vp, off_t length, int flags);
void softdep_fsync_mountdev(struct vnode *); void softdep_fsync_mountdev(struct vnode *);
int softdep_sync_metadata(struct vnode *); int softdep_sync_metadata(struct vnode *);
int softdep_sync_buf(struct vnode *, struct buf *, int);
int softdep_process_worklist(struct mount *, int); int softdep_process_worklist(struct mount *, int);
int softdep_fsync(struct vnode *); int softdep_fsync(struct vnode *);
int softdep_waitidle(struct mount *); int softdep_waitidle(struct mount *);
int softdep_prealloc(struct vnode *, int); int softdep_prealloc(struct vnode *, int);
int softdep_journal_lookup(struct mount *, struct vnode **); int softdep_journal_lookup(struct mount *, struct vnode **);
void softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int);
void softdep_journal_fsync(struct inode *);
/* /*
* Things to request flushing in softdep_request_cleanup() * Things to request flushing in softdep_request_cleanup()

View File

@ -120,7 +120,7 @@ ffs_update(vp, waitfor)
} }
} }
static void void
ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end)
{ {
vm_object_t object; vm_object_t object;
@ -151,12 +151,12 @@ ffs_truncate(vp, length, flags, cred, td)
ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR]; ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
ufs2_daddr_t count, blocksreleased = 0, datablocks; ufs2_daddr_t count, blocksreleased = 0, datablocks;
void *cookie;
struct bufobj *bo; struct bufobj *bo;
struct fs *fs; struct fs *fs;
struct buf *bp; struct buf *bp;
struct ufsmount *ump; struct ufsmount *ump;
int needextclean, softdepslowdown, extblocks; int softdeptrunc, journaltrunc;
int needextclean, extblocks;
int offset, size, level, nblocks; int offset, size, level, nblocks;
int i, error, allerror; int i, error, allerror;
off_t osize; off_t osize;
@ -165,7 +165,6 @@ ffs_truncate(vp, length, flags, cred, td)
fs = ip->i_fs; fs = ip->i_fs;
ump = ip->i_ump; ump = ip->i_ump;
bo = &vp->v_bufobj; bo = &vp->v_bufobj;
cookie = NULL;
ASSERT_VOP_LOCKED(vp, "ffs_truncate"); ASSERT_VOP_LOCKED(vp, "ffs_truncate");
@ -173,6 +172,11 @@ ffs_truncate(vp, length, flags, cred, td)
return (EINVAL); return (EINVAL);
if (length > fs->fs_maxfilesize) if (length > fs->fs_maxfilesize)
return (EFBIG); return (EFBIG);
#ifdef QUOTA
error = getinoquota(ip);
if (error)
return (error);
#endif
/* /*
* Historically clients did not have to specify which data * Historically clients did not have to specify which data
* they were truncating. So, if not specified, we assume * they were truncating. So, if not specified, we assume
@ -191,7 +195,10 @@ ffs_truncate(vp, length, flags, cred, td)
*/ */
allerror = 0; allerror = 0;
needextclean = 0; needextclean = 0;
softdepslowdown = DOINGSOFTDEP(vp) && softdep_slowdown(vp); softdeptrunc = 0;
journaltrunc = DOINGSUJ(vp);
if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0)
softdeptrunc = !softdep_slowdown(vp);
extblocks = 0; extblocks = 0;
datablocks = DIP(ip, i_blocks); datablocks = DIP(ip, i_blocks);
if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) { if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) {
@ -199,27 +206,23 @@ ffs_truncate(vp, length, flags, cred, td)
datablocks -= extblocks; datablocks -= extblocks;
} }
if ((flags & IO_EXT) && extblocks > 0) { if ((flags & IO_EXT) && extblocks > 0) {
if (DOINGSOFTDEP(vp) && softdepslowdown == 0 && length == 0) { if (length != 0)
if ((flags & IO_NORMAL) == 0) { panic("ffs_truncate: partial trunc of extdata");
softdep_setup_freeblocks(ip, length, IO_EXT); if (softdeptrunc || journaltrunc) {
return (0); if ((flags & IO_NORMAL) == 0)
} goto extclean;
needextclean = 1; needextclean = 1;
} else { } else {
if (length != 0)
panic("ffs_truncate: partial trunc of extdata");
if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
return (error); return (error);
if (DOINGSUJ(vp))
cookie = softdep_setup_trunc(vp, length, flags);
osize = ip->i_din2->di_extsize;
ip->i_din2->di_blocks -= extblocks;
#ifdef QUOTA #ifdef QUOTA
(void) chkdq(ip, -extblocks, NOCRED, 0); (void) chkdq(ip, -extblocks, NOCRED, 0);
#endif #endif
vinvalbuf(vp, V_ALT, 0, 0); vinvalbuf(vp, V_ALT, 0, 0);
ffs_pages_remove(vp, ffs_pages_remove(vp,
OFF_TO_IDX(lblktosize(fs, -extblocks)), 0); OFF_TO_IDX(lblktosize(fs, -extblocks)), 0);
osize = ip->i_din2->di_extsize;
ip->i_din2->di_blocks -= extblocks;
ip->i_din2->di_extsize = 0; ip->i_din2->di_extsize = 0;
for (i = 0; i < NXADDR; i++) { for (i = 0; i < NXADDR; i++) {
oldblks[i] = ip->i_din2->di_extb[i]; oldblks[i] = ip->i_din2->di_extb[i];
@ -227,7 +230,7 @@ ffs_truncate(vp, length, flags, cred, td)
} }
ip->i_flag |= IN_CHANGE; ip->i_flag |= IN_CHANGE;
if ((error = ffs_update(vp, 1))) if ((error = ffs_update(vp, 1)))
goto out; return (error);
for (i = 0; i < NXADDR; i++) { for (i = 0; i < NXADDR; i++) {
if (oldblks[i] == 0) if (oldblks[i] == 0)
continue; continue;
@ -236,10 +239,8 @@ ffs_truncate(vp, length, flags, cred, td)
} }
} }
} }
if ((flags & IO_NORMAL) == 0) { if ((flags & IO_NORMAL) == 0)
error = 0; return (0);
goto out;
}
if (vp->v_type == VLNK && if (vp->v_type == VLNK &&
(ip->i_size < vp->v_mount->mnt_maxsymlinklen || (ip->i_size < vp->v_mount->mnt_maxsymlinklen ||
datablocks == 0)) { datablocks == 0)) {
@ -252,24 +253,17 @@ ffs_truncate(vp, length, flags, cred, td)
DIP_SET(ip, i_size, 0); DIP_SET(ip, i_size, 0);
ip->i_flag |= IN_CHANGE | IN_UPDATE; ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (needextclean) if (needextclean)
softdep_setup_freeblocks(ip, length, IO_EXT); goto extclean;
error = ffs_update(vp, 1); return ffs_update(vp, 1);
goto out;
} }
if (ip->i_size == length) { if (ip->i_size == length) {
ip->i_flag |= IN_CHANGE | IN_UPDATE; ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (needextclean) if (needextclean)
softdep_setup_freeblocks(ip, length, IO_EXT); goto extclean;
error = ffs_update(vp, 0); return ffs_update(vp, 0);
goto out;
} }
if (fs->fs_ronly) if (fs->fs_ronly)
panic("ffs_truncate: read-only filesystem"); panic("ffs_truncate: read-only filesystem");
#ifdef QUOTA
error = getinoquota(ip);
if (error)
goto out;
#endif
if ((ip->i_flags & SF_SNAPSHOT) != 0) if ((ip->i_flags & SF_SNAPSHOT) != 0)
ffs_snapremove(vp); ffs_snapremove(vp);
vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
@ -285,7 +279,7 @@ ffs_truncate(vp, length, flags, cred, td)
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
if (error) { if (error) {
vnode_pager_setsize(vp, osize); vnode_pager_setsize(vp, osize);
goto out; return (error);
} }
ip->i_size = length; ip->i_size = length;
DIP_SET(ip, i_size, length); DIP_SET(ip, i_size, length);
@ -296,11 +290,10 @@ ffs_truncate(vp, length, flags, cred, td)
else else
bawrite(bp); bawrite(bp);
ip->i_flag |= IN_CHANGE | IN_UPDATE; ip->i_flag |= IN_CHANGE | IN_UPDATE;
error = ffs_update(vp, 1); return ffs_update(vp, 1);
goto out;
} }
if (DOINGSOFTDEP(vp)) { if (DOINGSOFTDEP(vp)) {
if (length > 0 || softdepslowdown) { if (softdeptrunc == 0 && journaltrunc == 0) {
/* /*
* If a file is only partially truncated, then * If a file is only partially truncated, then
* we have to clean up the data structures * we have to clean up the data structures
@ -311,29 +304,20 @@ ffs_truncate(vp, length, flags, cred, td)
* so that it will have no data structures left. * so that it will have no data structures left.
*/ */
if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
goto out; return (error);
/*
* We have to journal the truncation before we change
* any blocks so we don't leave the file partially
* truncated.
*/
if (DOINGSUJ(vp) && cookie == NULL)
cookie = softdep_setup_trunc(vp, length, flags);
} else { } else {
#ifdef QUOTA flags = IO_NORMAL | (needextclean ? IO_EXT: 0);
(void) chkdq(ip, -datablocks, NOCRED, 0); if (journaltrunc)
#endif softdep_journal_freeblocks(ip, cred, length,
softdep_setup_freeblocks(ip, length, needextclean ? flags);
IO_EXT | IO_NORMAL : IO_NORMAL); else
softdep_setup_freeblocks(ip, length, flags);
ASSERT_VOP_LOCKED(vp, "ffs_truncate1"); ASSERT_VOP_LOCKED(vp, "ffs_truncate1");
vinvalbuf(vp, needextclean ? 0 : V_NORMAL, 0, 0); if (journaltrunc == 0) {
if (!needextclean) ip->i_flag |= IN_CHANGE | IN_UPDATE;
ffs_pages_remove(vp, 0, error = ffs_update(vp, 0);
OFF_TO_IDX(lblktosize(fs, -extblocks))); }
vnode_pager_setsize(vp, 0); return (error);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
error = ffs_update(vp, 0);
goto out;
} }
} }
/* /*
@ -353,7 +337,7 @@ ffs_truncate(vp, length, flags, cred, td)
flags |= BA_CLRBUF; flags |= BA_CLRBUF;
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
if (error) if (error)
goto out; return (error);
/* /*
* When we are doing soft updates and the UFS_BALLOC * When we are doing soft updates and the UFS_BALLOC
* above fills in a direct block hole with a full sized * above fills in a direct block hole with a full sized
@ -365,7 +349,7 @@ ffs_truncate(vp, length, flags, cred, td)
if (DOINGSOFTDEP(vp) && lbn < NDADDR && if (DOINGSOFTDEP(vp) && lbn < NDADDR &&
fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
(error = ffs_syncvnode(vp, MNT_WAIT)) != 0) (error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
goto out; return (error);
ip->i_size = length; ip->i_size = length;
DIP_SET(ip, i_size, length); DIP_SET(ip, i_size, length);
size = blksize(fs, ip, lbn); size = blksize(fs, ip, lbn);
@ -411,13 +395,7 @@ ffs_truncate(vp, length, flags, cred, td)
DIP_SET(ip, i_db[i], 0); DIP_SET(ip, i_db[i], 0);
} }
ip->i_flag |= IN_CHANGE | IN_UPDATE; ip->i_flag |= IN_CHANGE | IN_UPDATE;
/* allerror = ffs_update(vp, 1);
* When doing softupdate journaling we must preserve the size along
* with the old pointers until they are freed or we might not
* know how many fragments remain.
*/
if (!DOINGSUJ(vp))
allerror = ffs_update(vp, 1);
/* /*
* Having written the new inode to disk, save its new configuration * Having written the new inode to disk, save its new configuration
@ -541,14 +519,14 @@ ffs_truncate(vp, length, flags, cred, td)
#ifdef QUOTA #ifdef QUOTA
(void) chkdq(ip, -blocksreleased, NOCRED, 0); (void) chkdq(ip, -blocksreleased, NOCRED, 0);
#endif #endif
error = allerror; return (allerror);
out:
if (cookie) { extclean:
allerror = softdep_complete_trunc(vp, cookie); if (journaltrunc)
if (allerror != 0 && error == 0) softdep_journal_freeblocks(ip, cred, length, IO_EXT);
error = allerror; else
} softdep_setup_freeblocks(ip, length, IO_EXT);
return (error); return ffs_update(vp, MNT_WAIT);
} }
/* /*

File diff suppressed because it is too large Load Diff

View File

@ -2034,12 +2034,10 @@ ffs_geom_strategy(struct bufobj *bo, struct buf *bp)
static void static void
db_print_ffs(struct ufsmount *ump) db_print_ffs(struct ufsmount *ump)
{ {
db_printf("mp %p %s devvp %p fs %p su_wl %d su_wl_in %d su_deps %d " db_printf("mp %p %s devvp %p fs %p su_wl %d su_deps %d su_req %d\n",
"su_req %d\n",
ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname, ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
ump->um_devvp, ump->um_fs, ump->softdep_on_worklist, ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
ump->softdep_on_worklist_inprogress, ump->softdep_deps, ump->softdep_deps, ump->softdep_req);
ump->softdep_req);
} }
DB_SHOW_COMMAND(ffs, db_show_ffs) DB_SHOW_COMMAND(ffs, db_show_ffs)

View File

@ -212,26 +212,32 @@ ffs_fsync(struct vop_fsync_args *ap)
int int
ffs_syncvnode(struct vnode *vp, int waitfor) ffs_syncvnode(struct vnode *vp, int waitfor)
{ {
struct inode *ip = VTOI(vp); struct inode *ip;
struct bufobj *bo; struct bufobj *bo;
struct buf *bp; struct buf *bp;
struct buf *nbp; struct buf *nbp;
int s, error, wait, passes, skipmeta;
ufs_lbn_t lbn; ufs_lbn_t lbn;
int error, wait, passes;
wait = (waitfor == MNT_WAIT); ip = VTOI(vp);
lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
bo = &vp->v_bufobj;
ip->i_flag &= ~IN_NEEDSYNC; ip->i_flag &= ~IN_NEEDSYNC;
bo = &vp->v_bufobj;
/*
* When doing MNT_WAIT we must first flush all dependencies
* on the inode.
*/
if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT &&
(error = softdep_sync_metadata(vp)) != 0)
return (error);
/* /*
* Flush all dirty buffers associated with a vnode. * Flush all dirty buffers associated with a vnode.
*/ */
passes = NIADDR + 1; error = 0;
skipmeta = 0; passes = 0;
if (wait) wait = 0; /* Always do an async pass first. */
skipmeta = 1; lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
s = splbio();
BO_LOCK(bo); BO_LOCK(bo);
loop: loop:
TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
@ -239,70 +245,53 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
/* /*
* Reasons to skip this buffer: it has already been considered * Reasons to skip this buffer: it has already been considered
* on this pass, this pass is the first time through on a * on this pass, the buffer has dependencies that will cause
* synchronous flush request and the buffer being considered
* is metadata, the buffer has dependencies that will cause
* it to be redirtied and it has not already been deferred, * it to be redirtied and it has not already been deferred,
* or it is already being written. * or it is already being written.
*/ */
if ((bp->b_vflags & BV_SCANNED) != 0) if ((bp->b_vflags & BV_SCANNED) != 0)
continue; continue;
bp->b_vflags |= BV_SCANNED; bp->b_vflags |= BV_SCANNED;
if ((skipmeta == 1 && bp->b_lblkno < 0)) /* Flush indirects in order. */
if (waitfor == MNT_WAIT && bp->b_lblkno <= -NDADDR &&
lbn_level(bp->b_lblkno) >= passes)
continue; continue;
if (bp->b_lblkno > lbn)
panic("ffs_syncvnode: syncing truncated data.");
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
continue; continue;
BO_UNLOCK(bo); BO_UNLOCK(bo);
if (!wait && !LIST_EMPTY(&bp->b_dep) &&
(bp->b_flags & B_DEFERRED) == 0 &&
buf_countdeps(bp, 0)) {
bp->b_flags |= B_DEFERRED;
BUF_UNLOCK(bp);
BO_LOCK(bo);
continue;
}
if ((bp->b_flags & B_DELWRI) == 0) if ((bp->b_flags & B_DELWRI) == 0)
panic("ffs_fsync: not dirty"); panic("ffs_fsync: not dirty");
/* /*
* If this is a synchronous flush request, or it is not a * Check for dependencies and potentially complete them.
* file or device, start the write on this buffer immediately.
*/ */
if (wait || (vp->v_type != VREG && vp->v_type != VBLK)) { if (!LIST_EMPTY(&bp->b_dep) &&
(error = softdep_sync_buf(vp, bp,
/* wait ? MNT_WAIT : MNT_NOWAIT)) != 0) {
* On our final pass through, do all I/O synchronously /* I/O error. */
* so that we can find out if our flush is failing if (error != EBUSY) {
* because of write errors. BUF_UNLOCK(bp);
*/ return (error);
if (passes > 0 || !wait) {
if ((bp->b_flags & B_CLUSTEROK) && !wait) {
(void) vfs_bio_awrite(bp);
} else {
bremfree(bp);
splx(s);
(void) bawrite(bp);
s = splbio();
}
} else {
bremfree(bp);
splx(s);
if ((error = bwrite(bp)) != 0)
return (error);
s = splbio();
} }
} else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { /* If we deferred once, don't defer again. */
/* if ((bp->b_flags & B_DEFERRED) == 0) {
* If the buffer is for data that has been truncated bp->b_flags |= B_DEFERRED;
* off the file, then throw it away. BUF_UNLOCK(bp);
*/ goto next;
}
}
if (wait) {
bremfree(bp); bremfree(bp);
bp->b_flags |= B_INVAL | B_NOCACHE; if ((error = bwrite(bp)) != 0)
splx(s); return (error);
brelse(bp); } else if ((bp->b_flags & B_CLUSTEROK)) {
s = splbio(); (void) vfs_bio_awrite(bp);
} else } else {
vfs_bio_awrite(bp); bremfree(bp);
(void) bawrite(bp);
}
next:
/* /*
* Since we may have slept during the I/O, we need * Since we may have slept during the I/O, we need
* to start from a known point. * to start from a known point.
@ -310,51 +299,44 @@ ffs_syncvnode(struct vnode *vp, int waitfor)
BO_LOCK(bo); BO_LOCK(bo);
nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd); nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd);
} }
/* if (waitfor != MNT_WAIT) {
* If we were asked to do this synchronously, then go back for
* another pass, this time doing the metadata.
*/
if (skipmeta) {
skipmeta = 0;
goto loop;
}
if (wait) {
bufobj_wwait(bo, 3, 0);
BO_UNLOCK(bo); BO_UNLOCK(bo);
return (ffs_update(vp, waitfor));
/* }
* Ensure that any filesystem metatdata associated /* Drain IO to see if we're done. */
* with the vnode has been written. bufobj_wwait(bo, 0, 0);
*/ /*
splx(s); * Block devices associated with filesystems may have new I/O
if ((error = softdep_sync_metadata(vp)) != 0) * requests posted for them even if the vnode is locked, so no
return (error); * amount of trying will get them clean. We make several passes
s = splbio(); * as a best effort.
*
BO_LOCK(bo); * Regular files may need multiple passes to flush all dependency
if (bo->bo_dirty.bv_cnt > 0) { * work as it is possible that we must write once per indirect
/* * level, once for the leaf, and once for the inode and each of
* Block devices associated with filesystems may * these will be done with one sync and one async pass.
* have new I/O requests posted for them even if */
* the vnode is locked, so no amount of trying will if (bo->bo_dirty.bv_cnt > 0) {
* get them clean. Thus we give block devices a /* Write the inode after sync passes to flush deps. */
* good effort, then just give up. For all other file if (wait && DOINGSOFTDEP(vp)) {
* types, go around and try again until it is clean. BO_UNLOCK(bo);
*/ ffs_update(vp, MNT_WAIT);
if (passes > 0) { BO_LOCK(bo);
passes -= 1;
goto loop;
}
#ifdef INVARIANTS
if (!vn_isdisk(vp, NULL))
vprint("ffs_fsync: dirty", vp);
#endif
} }
/* switch between sync/async. */
wait = !wait;
if (wait == 1 || ++passes < NIADDR + 2)
goto loop;
#ifdef INVARIANTS
if (!vn_isdisk(vp, NULL))
vprint("ffs_fsync: dirty", vp);
#endif
} }
BO_UNLOCK(bo); BO_UNLOCK(bo);
splx(s); error = ffs_update(vp, MNT_WAIT);
return (ffs_update(vp, wait)); if (DOINGSUJ(vp))
softdep_journal_fsync(VTOI(vp));
return (error);
} }
static int static int

View File

@ -664,6 +664,7 @@ lbn_offset(struct fs *fs, int level)
#define JOP_FREEBLK 4 /* Free a block or a tree of blocks. */ #define JOP_FREEBLK 4 /* Free a block or a tree of blocks. */
#define JOP_MVREF 5 /* Move a reference from one off to another. */ #define JOP_MVREF 5 /* Move a reference from one off to another. */
#define JOP_TRUNC 6 /* Partial truncation record. */ #define JOP_TRUNC 6 /* Partial truncation record. */
#define JOP_SYNC 7 /* fsync() complete record. */
#define JREC_SIZE 32 /* Record and segment header size. */ #define JREC_SIZE 32 /* Record and segment header size. */
@ -729,7 +730,7 @@ struct jblkrec {
/* /*
* Truncation record. Records a partial truncation so that it may be * Truncation record. Records a partial truncation so that it may be
* completed later. * completed at check time. Also used for sync records.
*/ */
struct jtrncrec { struct jtrncrec {
uint32_t jt_op; uint32_t jt_op;

View File

@ -127,7 +127,7 @@
#define DIRCHG 0x000080 /* diradd, dirrem only */ #define DIRCHG 0x000080 /* diradd, dirrem only */
#define GOINGAWAY 0x000100 /* indirdep, jremref only */ #define GOINGAWAY 0x000100 /* indirdep, jremref only */
#define IOSTARTED 0x000200 /* inodedep, pagedep, bmsafemap only */ #define IOSTARTED 0x000200 /* inodedep, pagedep, bmsafemap only */
#define UNUSED400 0x000400 /* currently available. */ #define DELAYEDFREE 0x000400 /* allocindirect free delayed. */
#define NEWBLOCK 0x000800 /* pagedep, jaddref only */ #define NEWBLOCK 0x000800 /* pagedep, jaddref only */
#define INPROGRESS 0x001000 /* dirrem, freeblks, freefrag, freefile only */ #define INPROGRESS 0x001000 /* dirrem, freeblks, freefrag, freefile only */
#define UFS1FMT 0x002000 /* indirdep only */ #define UFS1FMT 0x002000 /* indirdep only */
@ -195,8 +195,9 @@ struct worklist {
#define WK_JFREEBLK(wk) ((struct jfreeblk *)(wk)) #define WK_JFREEBLK(wk) ((struct jfreeblk *)(wk))
#define WK_FREEDEP(wk) ((struct freedep *)(wk)) #define WK_FREEDEP(wk) ((struct freedep *)(wk))
#define WK_JFREEFRAG(wk) ((struct jfreefrag *)(wk)) #define WK_JFREEFRAG(wk) ((struct jfreefrag *)(wk))
#define WK_SBDEP(wk) ((struct sbdep *)wk) #define WK_SBDEP(wk) ((struct sbdep *)(wk))
#define WK_JTRUNC(wk) ((struct jtrunc *)(wk)) #define WK_JTRUNC(wk) ((struct jtrunc *)(wk))
#define WK_JFSYNC(wk) ((struct jfsync *)(wk))
/* /*
* Various types of lists * Various types of lists
@ -213,10 +214,12 @@ LIST_HEAD(jaddrefhd, jaddref);
LIST_HEAD(jremrefhd, jremref); LIST_HEAD(jremrefhd, jremref);
LIST_HEAD(jmvrefhd, jmvref); LIST_HEAD(jmvrefhd, jmvref);
LIST_HEAD(jnewblkhd, jnewblk); LIST_HEAD(jnewblkhd, jnewblk);
LIST_HEAD(jfreeblkhd, jfreeblk); LIST_HEAD(jblkdephd, jblkdep);
LIST_HEAD(freeworkhd, freework); LIST_HEAD(freeworkhd, freework);
TAILQ_HEAD(freeworklst, freework);
TAILQ_HEAD(jseglst, jseg); TAILQ_HEAD(jseglst, jseg);
TAILQ_HEAD(inoreflst, inoref); TAILQ_HEAD(inoreflst, inoref);
TAILQ_HEAD(freeblklst, freeblks);
/* /*
* The "pagedep" structure tracks the various dependencies related to * The "pagedep" structure tracks the various dependencies related to
@ -321,6 +324,7 @@ struct inodedep {
struct allocdirectlst id_newinoupdt; /* updates when inode written */ struct allocdirectlst id_newinoupdt; /* updates when inode written */
struct allocdirectlst id_extupdt; /* extdata updates pre-inode write */ struct allocdirectlst id_extupdt; /* extdata updates pre-inode write */
struct allocdirectlst id_newextupdt; /* extdata updates at ino write */ struct allocdirectlst id_newextupdt; /* extdata updates at ino write */
struct freeblklst id_freeblklst; /* List of partial truncates. */
union { union {
struct ufs1_dinode *idu_savedino1; /* saved ufs1_dinode contents */ struct ufs1_dinode *idu_savedino1; /* saved ufs1_dinode contents */
struct ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */ struct ufs2_dinode *idu_savedino2; /* saved ufs2_dinode contents */
@ -342,8 +346,9 @@ struct inodedep {
struct bmsafemap { struct bmsafemap {
struct worklist sm_list; /* cylgrp buffer */ struct worklist sm_list; /* cylgrp buffer */
# define sm_state sm_list.wk_state # define sm_state sm_list.wk_state
int sm_cg;
LIST_ENTRY(bmsafemap) sm_hash; /* Hash links. */ LIST_ENTRY(bmsafemap) sm_hash; /* Hash links. */
LIST_ENTRY(bmsafemap) sm_next; /* Mount list. */
int sm_cg;
struct buf *sm_buf; /* associated buffer */ struct buf *sm_buf; /* associated buffer */
struct allocdirecthd sm_allocdirecthd; /* allocdirect deps */ struct allocdirecthd sm_allocdirecthd; /* allocdirect deps */
struct allocdirecthd sm_allocdirectwr; /* writing allocdirect deps */ struct allocdirecthd sm_allocdirectwr; /* writing allocdirect deps */
@ -355,6 +360,8 @@ struct bmsafemap {
struct newblkhd sm_newblkwr; /* writing newblk deps */ struct newblkhd sm_newblkwr; /* writing newblk deps */
struct jaddrefhd sm_jaddrefhd; /* Pending inode allocations. */ struct jaddrefhd sm_jaddrefhd; /* Pending inode allocations. */
struct jnewblkhd sm_jnewblkhd; /* Pending block allocations. */ struct jnewblkhd sm_jnewblkhd; /* Pending block allocations. */
struct workhead sm_freehd; /* Freedep deps. */
struct workhead sm_freewr; /* Written freedeps. */
}; };
/* /*
@ -442,14 +449,15 @@ struct indirdep {
struct worklist ir_list; /* buffer holding indirect block */ struct worklist ir_list; /* buffer holding indirect block */
# define ir_state ir_list.wk_state /* indirect block pointer state */ # define ir_state ir_list.wk_state /* indirect block pointer state */
LIST_ENTRY(indirdep) ir_next; /* alloc{direct,indir} list */ LIST_ENTRY(indirdep) ir_next; /* alloc{direct,indir} list */
TAILQ_HEAD(, freework) ir_trunc; /* List of truncations. */
caddr_t ir_saveddata; /* buffer cache contents */ caddr_t ir_saveddata; /* buffer cache contents */
struct buf *ir_savebp; /* buffer holding safe copy */ struct buf *ir_savebp; /* buffer holding safe copy */
struct buf *ir_bp; /* buffer holding live copy */
struct allocindirhd ir_completehd; /* waiting for indirdep complete */ struct allocindirhd ir_completehd; /* waiting for indirdep complete */
struct allocindirhd ir_writehd; /* Waiting for the pointer write. */ struct allocindirhd ir_writehd; /* Waiting for the pointer write. */
struct allocindirhd ir_donehd; /* done waiting to update safecopy */ struct allocindirhd ir_donehd; /* done waiting to update safecopy */
struct allocindirhd ir_deplisthd; /* allocindir deps for this block */ struct allocindirhd ir_deplisthd; /* allocindir deps for this block */
struct jnewblkhd ir_jnewblkhd; /* Canceled block allocations. */ struct freeblks *ir_freeblks; /* Freeblks that frees this indir. */
struct workhead ir_jwork; /* Journal work pending. */
}; };
/* /*
@ -471,6 +479,7 @@ struct allocindir {
LIST_ENTRY(allocindir) ai_next; /* indirdep's list of allocindir's */ LIST_ENTRY(allocindir) ai_next; /* indirdep's list of allocindir's */
struct indirdep *ai_indirdep; /* address of associated indirdep */ struct indirdep *ai_indirdep; /* address of associated indirdep */
ufs2_daddr_t ai_oldblkno; /* old value of block pointer */ ufs2_daddr_t ai_oldblkno; /* old value of block pointer */
ufs_lbn_t ai_lbn; /* Logical block number. */
int ai_offset; /* Pointer offset in parent. */ int ai_offset; /* Pointer offset in parent. */
}; };
#define ai_newblkno ai_block.nb_newblkno #define ai_newblkno ai_block.nb_newblkno
@ -516,14 +525,22 @@ struct freefrag {
struct freeblks { struct freeblks {
struct worklist fb_list; /* id_inowait or delayed worklist */ struct worklist fb_list; /* id_inowait or delayed worklist */
# define fb_state fb_list.wk_state /* inode and dirty block state */ # define fb_state fb_list.wk_state /* inode and dirty block state */
struct jfreeblkhd fb_jfreeblkhd; /* Journal entries pending */ TAILQ_ENTRY(freeblks) fb_next; /* List of inode truncates. */
struct jblkdephd fb_jblkdephd; /* Journal entries pending */
struct workhead fb_freeworkhd; /* Work items pending */ struct workhead fb_freeworkhd; /* Work items pending */
struct workhead fb_jwork; /* Journal work pending */ struct workhead fb_jwork; /* Journal work pending */
ino_t fb_previousinum; /* inode of previous owner of blocks */
uid_t fb_uid; /* uid of previous owner of blocks */
struct vnode *fb_devvp; /* filesystem device vnode */ struct vnode *fb_devvp; /* filesystem device vnode */
ufs2_daddr_t fb_chkcnt; /* used to check cnt of blks released */ #ifdef QUOTA
struct dquot *fb_quota[MAXQUOTAS]; /* quotas to be adjusted */
#endif
uint64_t fb_modrev; /* Inode revision at start of trunc. */
off_t fb_len; /* Length we're truncating to. */
ufs2_daddr_t fb_chkcnt; /* Expected blks released. */
ufs2_daddr_t fb_freecnt; /* Actual blocks released. */
ino_t fb_inum; /* inode owner of blocks */
uid_t fb_uid; /* uid of previous owner of blocks */
int fb_ref; /* Children outstanding. */ int fb_ref; /* Children outstanding. */
int fb_cgwait; /* cg writes outstanding. */
}; };
/* /*
@ -538,16 +555,18 @@ struct freeblks {
struct freework { struct freework {
struct worklist fw_list; /* Delayed worklist. */ struct worklist fw_list; /* Delayed worklist. */
# define fw_state fw_list.wk_state # define fw_state fw_list.wk_state
LIST_ENTRY(freework) fw_next; /* For seg journal list. */ LIST_ENTRY(freework) fw_segs; /* Seg list. */
struct jnewblk *fw_jnewblk; /* Journal entry to cancel. */ TAILQ_ENTRY(freework) fw_next; /* Hash/Trunc list. */
struct jnewblk *fw_jnewblk; /* Journal entry to cancel. */
struct freeblks *fw_freeblks; /* Root of operation. */ struct freeblks *fw_freeblks; /* Root of operation. */
struct freework *fw_parent; /* Parent indirect. */ struct freework *fw_parent; /* Parent indirect. */
struct indirdep *fw_indir; /* indirect block. */
ufs2_daddr_t fw_blkno; /* Our block #. */ ufs2_daddr_t fw_blkno; /* Our block #. */
ufs_lbn_t fw_lbn; /* Original lbn before free. */ ufs_lbn_t fw_lbn; /* Original lbn before free. */
int fw_frags; /* Number of frags. */ uint16_t fw_frags; /* Number of frags. */
int fw_ref; /* Number of children out. */ uint16_t fw_ref; /* Number of children out. */
int fw_off; /* Current working position. */ uint16_t fw_off; /* Current working position. */
struct workhead fw_jwork; /* Journal work pending. */ uint16_t fw_start; /* Start of partial truncate. */
}; };
/* /*
@ -674,6 +693,7 @@ struct dirrem {
LIST_ENTRY(dirrem) dm_inonext; /* inodedep's list of dirrem's */ LIST_ENTRY(dirrem) dm_inonext; /* inodedep's list of dirrem's */
struct jremrefhd dm_jremrefhd; /* Pending remove reference deps. */ struct jremrefhd dm_jremrefhd; /* Pending remove reference deps. */
ino_t dm_oldinum; /* inum of the removed dir entry */ ino_t dm_oldinum; /* inum of the removed dir entry */
doff_t dm_offset; /* offset of removed dir entry in blk */
union { union {
struct pagedep *dmu_pagedep; /* pagedep dependency for remove */ struct pagedep *dmu_pagedep; /* pagedep dependency for remove */
ino_t dmu_dirinum; /* parent inode number (for rmdir) */ ino_t dmu_dirinum; /* parent inode number (for rmdir) */
@ -707,7 +727,7 @@ struct dirrem {
*/ */
struct newdirblk { struct newdirblk {
struct worklist db_list; /* id_inowait or pg_newdirblk */ struct worklist db_list; /* id_inowait or pg_newdirblk */
# define db_state db_list.wk_state /* unused */ # define db_state db_list.wk_state
struct pagedep *db_pagedep; /* associated pagedep */ struct pagedep *db_pagedep; /* associated pagedep */
struct workhead db_mkdir; struct workhead db_mkdir;
}; };
@ -807,29 +827,36 @@ struct jnewblk {
# define jn_state jn_list.wk_state # define jn_state jn_list.wk_state
struct jsegdep *jn_jsegdep; /* Will track our journal record. */ struct jsegdep *jn_jsegdep; /* Will track our journal record. */
LIST_ENTRY(jnewblk) jn_deps; /* Jnewblks on sm_jnewblkhd. */ LIST_ENTRY(jnewblk) jn_deps; /* Jnewblks on sm_jnewblkhd. */
LIST_ENTRY(jnewblk) jn_indirdeps; /* Jnewblks on ir_jnewblkhd. */
struct worklist *jn_dep; /* Dependency to ref completed seg. */ struct worklist *jn_dep; /* Dependency to ref completed seg. */
ino_t jn_ino; /* Ino to which allocated. */
ufs_lbn_t jn_lbn; /* Lbn to which allocated. */ ufs_lbn_t jn_lbn; /* Lbn to which allocated. */
ufs2_daddr_t jn_blkno; /* Blkno allocated */ ufs2_daddr_t jn_blkno; /* Blkno allocated */
ino_t jn_ino; /* Ino to which allocated. */
int jn_oldfrags; /* Previous fragments when extended. */ int jn_oldfrags; /* Previous fragments when extended. */
int jn_frags; /* Number of fragments. */ int jn_frags; /* Number of fragments. */
}; };
/*
* A "jblkdep" structure tracks jfreeblk and jtrunc records attached to a
* freeblks structure.
*/
struct jblkdep {
struct worklist jb_list; /* For softdep journal pending. */
struct jsegdep *jb_jsegdep; /* Reference to the jseg. */
struct freeblks *jb_freeblks; /* Back pointer to freeblks. */
LIST_ENTRY(jblkdep) jb_deps; /* Dep list on freeblks. */
};
/* /*
* A "jfreeblk" structure tracks the journal write for freeing a block * A "jfreeblk" structure tracks the journal write for freeing a block
* or tree of blocks. The block pointer must not be cleared in the inode * or tree of blocks. The block pointer must not be cleared in the inode
* or indirect prior to the jfreeblk being written to the journal. * or indirect prior to the jfreeblk being written to the journal.
*/ */
struct jfreeblk { struct jfreeblk {
struct worklist jf_list; /* Linked to softdep_journal_pending. */ struct jblkdep jf_dep; /* freeblks linkage. */
# define jf_state jf_list.wk_state
struct jsegdep *jf_jsegdep; /* Will track our journal record. */
struct freeblks *jf_freeblks; /* Back pointer to freeblks. */
LIST_ENTRY(jfreeblk) jf_deps; /* Jfreeblk on fb_jfreeblkhd. */
ino_t jf_ino; /* Ino from which blocks freed. */
ufs_lbn_t jf_lbn; /* Lbn from which blocks freed. */ ufs_lbn_t jf_lbn; /* Lbn from which blocks freed. */
ufs2_daddr_t jf_blkno; /* Blkno being freed. */ ufs2_daddr_t jf_blkno; /* Blkno being freed. */
ino_t jf_ino; /* Ino from which blocks freed. */
int jf_frags; /* Number of frags being freed. */ int jf_frags; /* Number of frags being freed. */
}; };
@ -843,24 +870,31 @@ struct jfreefrag {
# define fr_state fr_list.wk_state # define fr_state fr_list.wk_state
struct jsegdep *fr_jsegdep; /* Will track our journal record. */ struct jsegdep *fr_jsegdep; /* Will track our journal record. */
struct freefrag *fr_freefrag; /* Back pointer to freefrag. */ struct freefrag *fr_freefrag; /* Back pointer to freefrag. */
ino_t fr_ino; /* Ino from which frag freed. */
ufs_lbn_t fr_lbn; /* Lbn from which frag freed. */ ufs_lbn_t fr_lbn; /* Lbn from which frag freed. */
ufs2_daddr_t fr_blkno; /* Blkno being freed. */ ufs2_daddr_t fr_blkno; /* Blkno being freed. */
ino_t fr_ino; /* Ino from which frag freed. */
int fr_frags; /* Size of frag being freed. */ int fr_frags; /* Size of frag being freed. */
}; };
/* /*
* A "jtrunc" journals the intent to truncate an inode to a non-zero * A "jtrunc" journals the intent to truncate an inode's data or extent area.
* value. This is done synchronously prior to the synchronous partial
* truncation process. The jsegdep is not released until the truncation
* is complete and the truncated inode is fsync'd.
*/ */
struct jtrunc { struct jtrunc {
struct worklist jt_list; /* Linked to softdep_journal_pending. */ struct jblkdep jt_dep; /* freeblks linkage. */
struct jsegdep *jt_jsegdep; /* Will track our journal record. */ off_t jt_size; /* Final file size. */
ino_t jt_ino; /* Ino being truncated. */ int jt_extsize; /* Final extent size. */
off_t jt_size; /* Final file size. */ ino_t jt_ino; /* Ino being truncated. */
int jt_extsize; /* Final extent size. */ };
/*
* A "jfsync" journals the completion of an fsync which invalidates earlier
* jtrunc records in the journal.
*/
struct jfsync {
struct worklist jfs_list; /* For softdep journal pending. */
off_t jfs_size; /* Sync file size. */
int jfs_extsize; /* Sync extent size. */
ino_t jfs_ino; /* ino being synced. */
}; };
/* /*

View File

@ -127,6 +127,8 @@ struct inode {
#define IN_EA_LOCKED 0x0200 #define IN_EA_LOCKED 0x0200
#define IN_EA_LOCKWAIT 0x0400 #define IN_EA_LOCKWAIT 0x0400
#define IN_TRUNCATED 0x0800 /* Journaled truncation pending. */
#define i_devvp i_ump->um_devvp #define i_devvp i_ump->um_devvp
#define i_umbufobj i_ump->um_bo #define i_umbufobj i_ump->um_bo
#define i_dirhash i_un.dirhash #define i_dirhash i_un.dirhash

View File

@ -61,6 +61,7 @@ struct jblocks;
struct inodedep; struct inodedep;
TAILQ_HEAD(inodedeplst, inodedep); TAILQ_HEAD(inodedeplst, inodedep);
LIST_HEAD(bmsafemaphd, bmsafemap);
/* This structure describes the UFS specific mount structure data. */ /* This structure describes the UFS specific mount structure data. */
struct ufsmount { struct ufsmount {
@ -82,10 +83,10 @@ struct ufsmount {
struct workhead softdep_journal_pending; /* journal work queue */ struct workhead softdep_journal_pending; /* journal work queue */
struct worklist *softdep_journal_tail; /* Tail pointer for above */ struct worklist *softdep_journal_tail; /* Tail pointer for above */
struct jblocks *softdep_jblocks; /* Journal block information */ struct jblocks *softdep_jblocks; /* Journal block information */
struct inodedeplst softdep_unlinked; /* Unlinked inodes */ struct inodedeplst softdep_unlinked; /* Unlinked inodes */
struct bmsafemaphd softdep_dirtycg; /* Dirty CGs */
int softdep_on_journal; /* Items on the journal list */ int softdep_on_journal; /* Items on the journal list */
int softdep_on_worklist; /* Items on the worklist */ int softdep_on_worklist; /* Items on the worklist */
int softdep_on_worklist_inprogress; /* Busy items on worklist */
int softdep_deps; /* Total dependency count */ int softdep_deps; /* Total dependency count */
int softdep_accdeps; /* accumulated dep count */ int softdep_accdeps; /* accumulated dep count */
int softdep_req; /* Wakeup when deps hits 0. */ int softdep_req; /* Wakeup when deps hits 0. */