From 045528c230abc49a8b1b1393bc45366a302fa5bf Mon Sep 17 00:00:00 2001 From: jeff Date: Mon, 20 Jun 2011 03:25:09 +0000 Subject: [PATCH] - Fix directory count rollbacks by passing the mode to the journal dep earlier. - Add rollback/forward code for frag and cluster accounting. - Handle the FREEDEP case in softdep_sync_buf(). (submitted by pho) --- sys/ufs/ffs/ffs_alloc.c | 2 +- sys/ufs/ffs/ffs_extern.h | 2 +- sys/ufs/ffs/ffs_softdep.c | 206 +++++++++++++++++++++++++++++--------- 3 files changed, 163 insertions(+), 47 deletions(-) diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 7d7866c95b78..6d27ace13c7e 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1829,7 +1829,7 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused) } UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); + softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref, mode); bdwrite(bp); if (ibp != NULL) bawrite(ibp); diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 1fd01444cdee..70bcf1df4f79 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -130,7 +130,7 @@ void softdep_freefile(struct vnode *, ino_t, int); int softdep_request_cleanup(struct fs *, struct vnode *, struct ucred *, int); void softdep_setup_freeblocks(struct inode *, off_t, int); -void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t); +void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t, int); void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t, int, int); void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t, diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 3734a5d55d55..a10104d9b837 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -142,10 +142,11 @@ softdep_setup_sbupdate(ump, fs, bp) } void -softdep_setup_inomapdep(bp, ip, newinum) +softdep_setup_inomapdep(bp, ip, newinum, mode) struct buf *bp; struct inode *ip; ino_t newinum; + int mode; { panic("softdep_setup_inomapdep called"); @@ -789,6 +790,8 @@ static void diradd_inode_written(struct diradd *, struct inodedep *); static int handle_written_indirdep(struct indirdep *, struct buf *, struct buf**); static int handle_written_inodeblock(struct inodedep *, struct buf *); +static int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *, + uint8_t *); static int handle_written_bmsafemap(struct bmsafemap *, struct buf *); static void handle_written_jaddref(struct jaddref *); static void handle_written_jremref(struct jremref *); @@ -820,6 +823,8 @@ static void handle_allocindir_partdone(struct allocindir *); static void initiate_write_filepage(struct pagedep *, struct buf *); static void initiate_write_indirdep(struct indirdep*, struct buf *); static void handle_written_mkdir(struct mkdir *, int); +static int jnewblk_rollback(struct jnewblk *, struct fs *, struct cg *, + uint8_t *); static void initiate_write_bmsafemap(struct bmsafemap *, struct buf *); static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *); static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *); @@ -935,6 +940,7 @@ static void wake_worklist(struct worklist *); static void wait_worklist(struct worklist *, char *); static void remove_from_worklist(struct worklist *); static void softdep_flush(void); +static void softdep_flushjournal(struct mount *); static int softdep_speedup(void); static void worklist_speedup(void); static int journal_mount(struct mount *, struct fs *, struct ucred *); @@ -3046,6 +3052,25 @@ jfsync_write(jfsync, jseg, data) rec->jt_extsize = jfsync->jfs_extsize; } +static void +softdep_flushjournal(mp) + struct mount *mp; +{ + struct jblocks *jblocks; + struct ufsmount *ump; + + if ((mp->mnt_kern_flag & MNTK_SUJ) == 0) + return; + ump = VFSTOUFS(mp); + jblocks = ump->softdep_jblocks; + ACQUIRE_LOCK(&lk); + while (ump->softdep_on_journal) { + jblocks->jb_needseg = 1; + softdep_process_journal(mp, NULL, MNT_WAIT); + } + FREE_LOCK(&lk); +} + /* * Flush some journal records to disk. */ @@ -4310,7 +4335,6 @@ softdep_setup_create(dp, ip) inoreflst); KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number, ("softdep_setup_create: No addref structure present.")); - jaddref->ja_mode = ip->i_mode; } softdep_prelink(dvp, NULL); FREE_LOCK(&lk); @@ -4417,7 +4441,6 @@ softdep_setup_mkdir(dp, ip) KASSERT(jaddref->ja_parent == dp->i_number, ("softdep_setup_mkdir: bad parent %d", jaddref->ja_parent)); - jaddref->ja_mode = ip->i_mode; TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref, if_deps); } @@ -4637,10 +4660,11 @@ softdep_revert_rmdir(dp, ip) * Called just after updating the cylinder group block to allocate an inode. */ void -softdep_setup_inomapdep(bp, ip, newinum) +softdep_setup_inomapdep(bp, ip, newinum, mode) struct buf *bp; /* buffer for cylgroup block with inode map */ struct inode *ip; /* inode related to allocation */ ino_t newinum; /* new inode number being allocated */ + int mode; { struct inodedep *inodedep; struct bmsafemap *bmsafemap; @@ -4657,7 +4681,7 @@ softdep_setup_inomapdep(bp, ip, newinum) * can be dependent on it. */ if (mp->mnt_kern_flag & MNTK_SUJ) { - jaddref = newjaddref(ip, newinum, 0, 0, 0); + jaddref = newjaddref(ip, newinum, 0, 0, mode); jaddref->ja_state |= NEWBLOCK; } @@ -5014,14 +5038,12 @@ jnewblk_merge(new, old, wkhd) if (jnewblk->jn_blkno != njnewblk->jn_blkno) panic("jnewblk_merge: Merging disparate blocks."); /* - * The record may be rolled back in the cg update bits - * appropriately. NEWBLOCK here alerts the cg rollback code - * that the frag bits have changed. + * The record may be rolled back in the cg. */ if (jnewblk->jn_state & UNDONE) { - njnewblk->jn_state |= UNDONE | NEWBLOCK; - njnewblk->jn_state &= ~ATTACHED; jnewblk->jn_state &= ~UNDONE; + njnewblk->jn_state |= UNDONE; + njnewblk->jn_state &= ~ATTACHED; } /* * We modify the newer addref and free the older so that if neither @@ -10233,6 +10255,70 @@ softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) FREE_LOCK(&lk); } +/* + * Revert a block allocation when the journal record that describes it + * is not yet written. + */ +int +jnewblk_rollback(jnewblk, fs, cgp, blksfree) + struct jnewblk *jnewblk; + struct fs *fs; + struct cg *cgp; + uint8_t *blksfree; +{ + ufs1_daddr_t fragno; + long cgbno, bbase; + int frags, blk; + int i; + + frags = 0; + cgbno = dtogd(fs, jnewblk->jn_blkno); + /* + * We have to test which frags need to be rolled back. We may + * be operating on a stale copy when doing background writes. + */ + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) + if (isclr(blksfree, cgbno + i)) + frags++; + if (frags == 0) + return (0); + /* + * This is mostly ffs_blkfree() sans some validation and + * superblock updates. + */ + if (frags == fs->fs_frag) { + fragno = fragstoblks(fs, cgbno); + ffs_setblock(fs, blksfree, fragno); + ffs_clusteracct(fs, cgp, fragno, 1); + cgp->cg_cs.cs_nbfree++; + } else { + cgbno += jnewblk->jn_oldfrags; + bbase = cgbno - fragnum(fs, cgbno); + /* Decrement the old frags. */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, -1); + /* Deallocate the fragment */ + for (i = 0; i < frags; i++) + setbit(blksfree, cgbno + i); + cgp->cg_cs.cs_nffree += frags; + /* Add back in counts associated with the new frags */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, 1); + /* If a complete block has been reassembled, account for it. */ + fragno = fragstoblks(fs, bbase); + if (ffs_isblock(fs, blksfree, fragno)) { + cgp->cg_cs.cs_nffree -= fs->fs_frag; + ffs_clusteracct(fs, cgp, fragno, 1); + cgp->cg_cs.cs_nbfree++; + } + } + stat_jnewblk++; + jnewblk->jn_state &= ~ATTACHED; + jnewblk->jn_state |= UNDONE; + + return (frags); +} + static void initiate_write_bmsafemap(bmsafemap, bp) struct bmsafemap *bmsafemap; @@ -10244,10 +10330,7 @@ initiate_write_bmsafemap(bmsafemap, bp) uint8_t *blksfree; struct cg *cgp; struct fs *fs; - int cleared; ino_t ino; - long bno; - int i; if (bmsafemap->sm_state & IOSTARTED) panic("initiate_write_bmsafemap: Already started\n"); @@ -10286,25 +10369,9 @@ initiate_write_bmsafemap(bmsafemap, bp) fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; blksfree = cg_blksfree(cgp); LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) { - bno = dtogd(fs, jnewblk->jn_blkno); - cleared = 0; - for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; - i++) { - if (isclr(blksfree, bno + i)) { - cleared = 1; - setbit(blksfree, bno + i); - } - } - /* - * We may not clear the block if it's a background - * copy. In that case there is no reason to detach - * it. - */ - if (cleared) { - stat_jnewblk++; - jnewblk->jn_state &= ~ATTACHED; - jnewblk->jn_state |= UNDONE; - } else if ((bp->b_xflags & BX_BKGRDMARKER) == 0) + if (jnewblk_rollback(jnewblk, fs, cgp, blksfree)) + continue; + if ((bp->b_xflags & BX_BKGRDMARKER) == 0) panic("initiate_write_bmsafemap: block %jd " "marked free", jnewblk->jn_blkno); } @@ -10578,6 +10645,9 @@ handle_jwork(wkhd) case D_FREEDEP: free_freedep(WK_FREEDEP(wk)); continue; + case D_FREEFRAG: + rele_jseg(WK_JSEG(WK_FREEFRAG(wk)->ff_jdep)); + WORKITEM_FREE(wk, D_FREEFRAG); case D_FREEWORK: handle_written_freework(WK_FREEWORK(wk)); continue; @@ -11049,6 +11119,58 @@ bmsafemap_rollbacks(bmsafemap) !LIST_EMPTY(&bmsafemap->sm_jnewblkhd)); } +/* + * Re-apply an allocation when a cg write is complete. + */ +static int +jnewblk_rollforward(jnewblk, fs, cgp, blksfree) + struct jnewblk *jnewblk; + struct fs *fs; + struct cg *cgp; + uint8_t *blksfree; +{ + ufs1_daddr_t fragno; + ufs2_daddr_t blkno; + long cgbno, bbase; + int frags, blk; + int i; + + frags = 0; + cgbno = dtogd(fs, jnewblk->jn_blkno); + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) { + if (isclr(blksfree, cgbno + i)) + panic("jnewblk_rollforward: re-allocated fragment"); + frags++; + } + if (frags == fs->fs_frag) { + blkno = fragstoblks(fs, cgbno); + ffs_clrblock(fs, blksfree, (long)blkno); + ffs_clusteracct(fs, cgp, blkno, -1); + cgp->cg_cs.cs_nbfree--; + } else { + bbase = cgbno - fragnum(fs, cgbno); + cgbno += jnewblk->jn_oldfrags; + /* If a complete block had been reassembled, account for it. */ + fragno = fragstoblks(fs, bbase); + if (ffs_isblock(fs, blksfree, fragno)) { + cgp->cg_cs.cs_nffree += fs->fs_frag; + ffs_clusteracct(fs, cgp, fragno, -1); + cgp->cg_cs.cs_nbfree--; + } + /* Decrement the old frags. */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, -1); + /* Allocate the fragment */ + for (i = 0; i < frags; i++) + clrbit(blksfree, cgbno + i); + cgp->cg_cs.cs_nffree -= frags; + /* Add back in counts associated with the new frags */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, 1); + } + return (frags); +} + /* * Complete a write to a bmsafemap structure. Roll forward any bitmap * changes if it's not a background write. Set all written dependencies @@ -11069,9 +11191,7 @@ handle_written_bmsafemap(bmsafemap, bp) struct cg *cgp; struct fs *fs; ino_t ino; - long bno; int chgs; - int i; if ((bmsafemap->sm_state & IOSTARTED) == 0) panic("initiate_write_bmsafemap: Not started\n"); @@ -11121,18 +11241,9 @@ handle_written_bmsafemap(bmsafemap, bp) jntmp) { if ((jnewblk->jn_state & UNDONE) == 0) continue; - bno = dtogd(fs, jnewblk->jn_blkno); - for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; - i++) { - if (bp->b_xflags & BX_BKGRDMARKER) - break; - if ((jnewblk->jn_state & NEWBLOCK) == 0 && - isclr(blksfree, bno + i)) - panic("handle_written_bmsafemap: " - "re-allocated fragment"); - clrbit(blksfree, bno + i); + if ((bp->b_xflags & BX_BKGRDMARKER) == 0 && + jnewblk_rollforward(jnewblk, fs, cgp, blksfree)) chgs = 1; - } jnewblk->jn_state &= ~(UNDONE | NEWBLOCK); jnewblk->jn_state |= ATTACHED; free_jnewblk(jnewblk); @@ -11826,6 +11937,11 @@ softdep_sync_metadata(struct vnode *vp) * truncations are started, and inode references are journaled. */ ACQUIRE_LOCK(&lk); + /* + * Write all journal records to prevent rollbacks on devvp. + */ + if (vp->v_type == VCHR) + softdep_flushjournal(vp->v_mount); error = flush_inodedep_deps(vp, vp->v_mount, VTOI(vp)->i_number); /* * Ensure that all truncates are written so we won't find deps on