Update comments in soft updates code to more fully describe
the addition of journalling. Only functional change is to tighten a KASSERT. Reviewed by: jeff Roberson
This commit is contained in:
parent
cb1d2fe2cd
commit
c0b2efce9e
sys/ufs/ffs
@ -2378,7 +2378,8 @@ remove_from_journal(wk)
|
||||
/*
|
||||
* We emulate a TAILQ to save space in most structures which do not
|
||||
* require TAILQ semantics. Here we must update the tail position
|
||||
* when removing the tail which is not the final entry.
|
||||
* when removing the tail which is not the final entry. This works
|
||||
* only if the worklist linkage are at the beginning of the structure.
|
||||
*/
|
||||
if (ump->softdep_journal_tail == wk)
|
||||
ump->softdep_journal_tail =
|
||||
@ -2906,9 +2907,9 @@ complete_jseg(jseg)
|
||||
waiting = wk->wk_state & IOWAITING;
|
||||
wk->wk_state &= ~(IOSTARTED | IOWAITING);
|
||||
wk->wk_state |= COMPLETE;
|
||||
KASSERT(i < jseg->js_cnt,
|
||||
KASSERT(i++ < jseg->js_cnt,
|
||||
("handle_written_jseg: overflow %d >= %d",
|
||||
i, jseg->js_cnt));
|
||||
i - 1, jseg->js_cnt));
|
||||
switch (wk->wk_type) {
|
||||
case D_JADDREF:
|
||||
handle_written_jaddref(WK_JADDREF(wk));
|
||||
@ -7492,7 +7493,7 @@ handle_written_sbdep(sbdep, bp)
|
||||
if (inodedep_lookup(mp, fs->fs_sujfree, 0, &inodedep) == 0)
|
||||
panic("handle_written_sbdep: lost inodedep");
|
||||
/*
|
||||
* Now that we have a record of this indode in stable store allow it
|
||||
* Now that we have a record of this inode in stable store allow it
|
||||
* to be written to free up pending work. Inodes may see a lot of
|
||||
* write activity after they are unlinked which we must not hold up.
|
||||
*/
|
||||
@ -7509,8 +7510,7 @@ handle_written_sbdep(sbdep, bp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark an inodedep has unlinked and insert it into the in-memory unlinked
|
||||
* list.
|
||||
* Mark an inodedep as unlinked and insert it into the in-memory unlinked list.
|
||||
*/
|
||||
static void
|
||||
unlinked_inodedep(mp, inodedep)
|
||||
@ -7576,7 +7576,7 @@ clear_unlinked_inodedep(inodedep)
|
||||
* link before us, whether it be the superblock or an inode.
|
||||
* Unfortunately the list may change while we're waiting
|
||||
* on the buf lock for either resource so we must loop until
|
||||
* we lock. the right one. If both the superblock and an
|
||||
* we lock the right one. If both the superblock and an
|
||||
* inode point to this inode we must clear the inode first
|
||||
* followed by the superblock.
|
||||
*/
|
||||
@ -9094,7 +9094,7 @@ handle_jwork(wkhd)
|
||||
/*
|
||||
* Handle the bufwait list on an inode when it is safe to release items
|
||||
* held there. This normally happens after an inode block is written but
|
||||
* may be delayed and handle later if there are pending journal items that
|
||||
* may be delayed and handled later if there are pending journal items that
|
||||
* are not yet safe to be released.
|
||||
*/
|
||||
static struct freefile *
|
||||
|
@ -661,7 +661,7 @@ lbn_level(ufs_lbn_t lbn)
|
||||
|
||||
/*
|
||||
* Size of the segment record header. There is at most one for each disk
|
||||
* block n the journal. The segment header is followed by an array of
|
||||
* block in the journal. The segment header is followed by an array of
|
||||
* records. fsck depends on the first element in each record being 'op'
|
||||
* and the second being 'ino'. Segments may span multiple disk blocks but
|
||||
* the header is present on each.
|
||||
|
@ -107,6 +107,15 @@
|
||||
*
|
||||
* The ONWORKLIST flag shows whether the structure is currently linked
|
||||
* onto a worklist.
|
||||
*
|
||||
* The UNLINK* flags track the progress of updating the on-disk linked
|
||||
* list of active but unlinked inodes. When an inode is first unlinked
|
||||
* it is marked as UNLINKED. When its on-disk di_freelink has been
|
||||
* written its UNLINKNEXT flags is set. When its predecessor in the
|
||||
* list has its di_freelink pointing at us its UNLINKPREV is set.
|
||||
* When the on-disk list can reach it from the superblock, its
|
||||
* UNLINKONLIST flag is set. Once all of these flags are set, it
|
||||
* is safe to let its last name be removed.
|
||||
*/
|
||||
#define ATTACHED 0x000001
|
||||
#define UNDONE 0x000002
|
||||
@ -353,20 +362,22 @@ struct bmsafemap {
|
||||
* or fragment is allocated from a cylinder group. Its state is set to
|
||||
* DEPCOMPLETE when its cylinder group map is written. It is converted to
|
||||
* an allocdirect or allocindir allocation once the allocator calls the
|
||||
* appropriate setup function.
|
||||
* appropriate setup function. It will initially be linked onto a bmsafemap
|
||||
* list. Once converted it can be linked onto the lists described for
|
||||
* allocdirect or allocindir as described below.
|
||||
*/
|
||||
struct newblk {
|
||||
struct worklist nb_list;
|
||||
struct worklist nb_list; /* See comment above. */
|
||||
# define nb_state nb_list.wk_state
|
||||
LIST_ENTRY(newblk) nb_hash; /* hashed lookup */
|
||||
LIST_ENTRY(newblk) nb_deps; /* bmsafemap's list of newblks */
|
||||
LIST_ENTRY(newblk) nb_hash; /* Hashed lookup. */
|
||||
LIST_ENTRY(newblk) nb_deps; /* Bmsafemap's list of newblks. */
|
||||
struct jnewblk *nb_jnewblk; /* New block journal entry. */
|
||||
struct bmsafemap *nb_bmsafemap;/* cylgrp dep (if pending) */
|
||||
struct freefrag *nb_freefrag; /* fragment to be freed (if any) */
|
||||
struct bmsafemap *nb_bmsafemap;/* Cylgrp dep (if pending). */
|
||||
struct freefrag *nb_freefrag; /* Fragment to be freed (if any). */
|
||||
struct indirdephd nb_indirdeps; /* Children indirect blocks. */
|
||||
struct workhead nb_newdirblk; /* dir block to notify when written */
|
||||
struct workhead nb_newdirblk; /* Dir block to notify when written. */
|
||||
struct workhead nb_jwork; /* Journal work pending. */
|
||||
ufs2_daddr_t nb_newblkno; /* new value of block pointer */
|
||||
ufs2_daddr_t nb_newblkno; /* New value of block pointer. */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -517,16 +528,16 @@ struct freeblks {
|
||||
/*
|
||||
* A "freework" structure handles the release of a tree of blocks or a single
|
||||
* block. Each indirect block in a tree is allocated its own freework
|
||||
* structure so that the indrect block may be freed only when all of its
|
||||
* structure so that the indirect block may be freed only when all of its
|
||||
* children are freed. In this way we enforce the rule that an allocated
|
||||
* block must have a valid path to a root that is journaled. Each child
|
||||
* block acquires a reference and when the ref hits zero the parent ref
|
||||
* is decremented. If there is no parent the freeblks ref is decremented.
|
||||
*/
|
||||
struct freework {
|
||||
struct worklist fw_list;
|
||||
struct worklist fw_list; /* Delayed worklist. */
|
||||
# define fw_state fw_list.wk_state
|
||||
LIST_ENTRY(freework) fw_next; /* Queue for freeblksk. */
|
||||
LIST_ENTRY(freework) fw_next; /* Queue for freeblk list. */
|
||||
struct freeblks *fw_freeblks; /* Root of operation. */
|
||||
struct freework *fw_parent; /* Parent indirect. */
|
||||
ufs2_daddr_t fw_blkno; /* Our block #. */
|
||||
@ -545,7 +556,7 @@ struct freework {
|
||||
* to be freed as well.
|
||||
*/
|
||||
struct freedep {
|
||||
struct worklist fd_list;
|
||||
struct worklist fd_list; /* Delayed worklist. */
|
||||
struct freework *fd_freework; /* Parent freework. */
|
||||
};
|
||||
|
||||
@ -705,10 +716,10 @@ struct newdirblk {
|
||||
* so they may easily be queued in-order on the inodedep.
|
||||
*/
|
||||
struct inoref {
|
||||
struct worklist if_list;
|
||||
struct worklist if_list; /* Journal pending or jseg entries. */
|
||||
# define if_state if_list.wk_state
|
||||
TAILQ_ENTRY(inoref) if_deps; /* Links for inodedep. */
|
||||
struct jsegdep *if_jsegdep;
|
||||
struct jsegdep *if_jsegdep; /* Will track our journal record. */
|
||||
off_t if_diroff; /* Directory offset. */
|
||||
ino_t if_ino; /* Inode number. */
|
||||
ino_t if_parent; /* Parent inode number. */
|
||||
@ -731,8 +742,8 @@ struct inoref {
|
||||
* ultimately released when the file is freed or the link is dropped again.
|
||||
*/
|
||||
struct jaddref {
|
||||
struct inoref ja_ref;
|
||||
# define ja_list ja_ref.if_list /* Journal pending or jseg entries. */
|
||||
struct inoref ja_ref; /* see inoref above. */
|
||||
# define ja_list ja_ref.if_list /* Jrnl pending, id_inowait, dm_jwork.*/
|
||||
# define ja_state ja_ref.if_list.wk_state
|
||||
LIST_ENTRY(jaddref) ja_bmdeps; /* Links for bmsafemap. */
|
||||
union {
|
||||
@ -754,21 +765,28 @@ struct jaddref {
|
||||
* may proceed as normal.
|
||||
*/
|
||||
struct jremref {
|
||||
struct inoref jr_ref;
|
||||
# define jr_list jr_ref.if_list /* Journal pending or jseg entries. */
|
||||
struct inoref jr_ref; /* see inoref above. */
|
||||
# define jr_list jr_ref.if_list /* Linked to softdep_journal_pending. */
|
||||
# define jr_state jr_ref.if_list.wk_state
|
||||
LIST_ENTRY(jremref) jr_deps; /* Links for pagdep. */
|
||||
LIST_ENTRY(jremref) jr_deps; /* Links for dirrem. */
|
||||
struct dirrem *jr_dirrem; /* Back pointer to dirrem. */
|
||||
};
|
||||
|
||||
/*
|
||||
* A "jmvref" structure tracks a name relocations within the same
|
||||
* directory block that occur as a result of directory compaction.
|
||||
* It prevents the updated directory entry from being written to disk
|
||||
* until the journal entry is written. Once the journal has been
|
||||
* written the compacted directory may be written to disk.
|
||||
*/
|
||||
struct jmvref {
|
||||
struct worklist jm_list;
|
||||
LIST_ENTRY(jmvref) jm_deps;
|
||||
struct pagedep *jm_pagedep;
|
||||
ino_t jm_parent;
|
||||
ino_t jm_ino;
|
||||
off_t jm_oldoff;
|
||||
off_t jm_newoff;
|
||||
struct worklist jm_list; /* Linked to softdep_journal_pending. */
|
||||
LIST_ENTRY(jmvref) jm_deps; /* Jmvref on pagedep. */
|
||||
struct pagedep *jm_pagedep; /* Back pointer to pagedep. */
|
||||
ino_t jm_parent; /* Containing directory inode number. */
|
||||
ino_t jm_ino; /* Inode number of our entry. */
|
||||
off_t jm_oldoff; /* Our old offset in directory. */
|
||||
off_t jm_newoff; /* Our new offset in directory. */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -780,36 +798,37 @@ struct jmvref {
|
||||
* write the jnewblk structure is maintained to prevent the bitmaps from
|
||||
* reaching the disk. Ultimately the jnewblk structure will be passed
|
||||
* to the free routine as the in memory cg is modified back to the free
|
||||
* state at which time it can be released.
|
||||
* state at which time it can be released. It may be held on any of the
|
||||
* fx_jwork, fw_jwork, fb_jwork, ff_jwork, nb_jwork, or ir_jwork lists.
|
||||
*/
|
||||
struct jnewblk {
|
||||
struct worklist jn_list;
|
||||
struct worklist jn_list; /* See lists above. */
|
||||
# define jn_state jn_list.wk_state
|
||||
struct jsegdep *jn_jsegdep;
|
||||
LIST_ENTRY(jnewblk) jn_deps; /* All jnewblks on bmsafemap */
|
||||
struct newblk *jn_newblk;
|
||||
ino_t jn_ino;
|
||||
ufs_lbn_t jn_lbn;
|
||||
ufs2_daddr_t jn_blkno;
|
||||
int jn_oldfrags;
|
||||
int jn_frags;
|
||||
struct jsegdep *jn_jsegdep; /* Will track our journal record. */
|
||||
LIST_ENTRY(jnewblk) jn_deps; /* Jnewblks on sm_jnewblkhd. */
|
||||
struct newblk *jn_newblk; /* Back pointer to newblk. */
|
||||
ino_t jn_ino; /* Ino to which allocated. */
|
||||
ufs_lbn_t jn_lbn; /* Lbn to which allocated. */
|
||||
ufs2_daddr_t jn_blkno; /* Blkno allocated */
|
||||
int jn_oldfrags; /* Previous fragments when extended. */
|
||||
int jn_frags; /* Number of fragments. */
|
||||
};
|
||||
|
||||
/*
|
||||
* A "jfreeblk" structure tracks the journal write for freeing a block
|
||||
* or tree of blocks. The block pointer must not be cleared in the inode
|
||||
* or indirect prior to the jfreeblk being written.
|
||||
* or indirect prior to the jfreeblk being written to the journal.
|
||||
*/
|
||||
struct jfreeblk {
|
||||
struct worklist jf_list;
|
||||
struct worklist jf_list; /* Linked to softdep_journal_pending. */
|
||||
# define jf_state jf_list.wk_state
|
||||
struct jsegdep *jf_jsegdep;
|
||||
struct freeblks *jf_freeblks;
|
||||
LIST_ENTRY(jfreeblk) jf_deps;
|
||||
ino_t jf_ino;
|
||||
ufs_lbn_t jf_lbn;
|
||||
ufs2_daddr_t jf_blkno;
|
||||
int jf_frags;
|
||||
struct jsegdep *jf_jsegdep; /* Will track our journal record. */
|
||||
struct freeblks *jf_freeblks; /* Back pointer to freeblks. */
|
||||
LIST_ENTRY(jfreeblk) jf_deps; /* Jfreeblk on fb_jfreeblkhd. */
|
||||
ino_t jf_ino; /* Ino from which blocks freed. */
|
||||
ufs_lbn_t jf_lbn; /* Lbn from which blocks freed. */
|
||||
ufs2_daddr_t jf_blkno; /* Blkno being freed. */
|
||||
int jf_frags; /* Number of frags being freed. */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -818,14 +837,14 @@ struct jfreeblk {
|
||||
* freeblks operation.
|
||||
*/
|
||||
struct jfreefrag {
|
||||
struct worklist fr_list;
|
||||
struct worklist fr_list; /* Linked to softdep_journal_pending. */
|
||||
# define fr_state fr_list.wk_state
|
||||
struct jsegdep *fr_jsegdep;
|
||||
struct freefrag *fr_freefrag;
|
||||
ino_t fr_ino;
|
||||
ufs_lbn_t fr_lbn;
|
||||
ufs2_daddr_t fr_blkno;
|
||||
int fr_frags;
|
||||
struct jsegdep *fr_jsegdep; /* Will track our journal record. */
|
||||
struct freefrag *fr_freefrag; /* Back pointer to freefrag. */
|
||||
ino_t fr_ino; /* Ino from which frag freed. */
|
||||
ufs_lbn_t fr_lbn; /* Lbn from which frag freed. */
|
||||
ufs2_daddr_t fr_blkno; /* Blkno being freed. */
|
||||
int fr_frags; /* Size of frag being freed. */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -835,42 +854,45 @@ struct jfreefrag {
|
||||
* is complete and the truncated inode is fsync'd.
|
||||
*/
|
||||
struct jtrunc {
|
||||
struct worklist jt_list;
|
||||
struct jsegdep *jt_jsegdep;
|
||||
ino_t jt_ino;
|
||||
off_t jt_size;
|
||||
int jt_extsize;
|
||||
struct worklist jt_list; /* Linked to softdep_journal_pending. */
|
||||
struct jsegdep *jt_jsegdep; /* Will track our journal record. */
|
||||
ino_t jt_ino; /* Ino being truncated. */
|
||||
off_t jt_size; /* Final file size. */
|
||||
int jt_extsize; /* Final extent size. */
|
||||
};
|
||||
|
||||
/*
|
||||
* A "jsegdep" structure tracks a single reference to a written journal
|
||||
* segment so the journal space can be reclaimed when all dependencies
|
||||
* have been written.
|
||||
* have been written. It can hang off of id_inowait, dm_jwork, da_jwork,
|
||||
* nb_jwork, ff_jwork, or fb_jwork lists.
|
||||
*/
|
||||
struct jsegdep {
|
||||
struct worklist jd_list;
|
||||
struct worklist jd_list; /* See above for lists. */
|
||||
# define jd_state jd_list.wk_state
|
||||
struct jseg *jd_seg;
|
||||
struct jseg *jd_seg; /* Our journal record. */
|
||||
};
|
||||
|
||||
/*
|
||||
* A "jseg" structure contains all of the journal records written in a
|
||||
* single disk write. jaddref and jremref structures are linked into
|
||||
* single disk write. The jaddref and jremref structures are linked into
|
||||
* js_entries so thay may be completed when the write completes. The
|
||||
* js_deps array contains as many entries as there are ref counts to
|
||||
* reduce the number of allocations required per journal write to one.
|
||||
* js_entries also include the write dependency structures: jmvref,
|
||||
* jnewblk, jfreeblk, jfreefrag, and jtrunc. The js_refs field counts
|
||||
* the number of entries on the js_entries list. Thus there is a single
|
||||
* jseg entry to describe each journal write.
|
||||
*/
|
||||
struct jseg {
|
||||
struct worklist js_list; /* b_deps link for journal */
|
||||
# define js_state js_list.wk_state
|
||||
struct workhead js_entries; /* Entries awaiting write */
|
||||
TAILQ_ENTRY(jseg) js_next;
|
||||
TAILQ_ENTRY(jseg) js_next; /* List of all unfinished segments. */
|
||||
struct jblocks *js_jblocks; /* Back pointer to block/seg list */
|
||||
struct buf *js_buf; /* Buffer while unwritten */
|
||||
uint64_t js_seq;
|
||||
int js_size; /* Allocated size in bytes */
|
||||
int js_cnt; /* Total items allocated */
|
||||
int js_refs; /* Count of items pending completion */
|
||||
uint64_t js_seq; /* Journal record sequence number. */
|
||||
int js_size; /* Size of journal record in bytes. */
|
||||
int js_cnt; /* Total items allocated. */
|
||||
int js_refs; /* Count of js_entries items. */
|
||||
};
|
||||
|
||||
/*
|
||||
@ -878,10 +900,11 @@ struct jseg {
|
||||
* superblock writes. This makes sure the superblock is always pointing at
|
||||
* the first possible unlinked inode for the suj recovery process. If a
|
||||
* block write completes and we discover a new head is available the buf
|
||||
* is dirtied and the dep is kept.
|
||||
* is dirtied and the dep is kept. See the description of the UNLINK*
|
||||
* flags above for more details.
|
||||
*/
|
||||
struct sbdep {
|
||||
struct worklist sb_list; /* b_dep linkage */
|
||||
struct fs *sb_fs; /* Filesystem pointer within buf. */
|
||||
struct ufsmount *sb_ump;
|
||||
struct ufsmount *sb_ump; /* Our mount structure */
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user