Speed up fsck by caching the cylinder group maps in pass1 so

that they do not need to be read again in pass5. As this nearly
doubles the memory requirement for fsck, the cache is thrown away
if other memory needs in fsck would otherwise fail. Thus, the
memory footprint of fsck remains unchanged in memory constrained
environments.

This work was inspired by a paper presented at Usenix's FAST '13:
www.usenix.org/conference/fast13/ffsck-fast-file-system-checker

Details of this implementation appears in the April 2013 of ;login:
www.usenix.org/publications/login/april-2013-volume-38-number-2.
A copy of the April 2013 ;login: paper can also be downloaded
from: www.mckusick.com/publications/faster_fsck.pdf.

Reviewed by: kib
Tested by:   Peter Holm
MFC after:   4 weeks
This commit is contained in:
mckusick 2013-03-22 21:50:43 +00:00
parent 45f62f67d3
commit 93fa1464f2
7 changed files with 154 additions and 56 deletions

View File

@ -198,7 +198,6 @@ struct timespec totalreadtime[BT_NUMBUFTYPES];
struct timespec startprog;
struct bufarea sblk; /* file system superblock */
struct bufarea cgblk; /* cylinder group blocks */
struct bufarea *pdirbp; /* current directory contents */
struct bufarea *pbp; /* current inode block */
@ -216,9 +215,7 @@ struct bufarea *pbp; /* current inode block */
} while (0)
#define sbdirty() dirty(&sblk)
#define cgdirty() dirty(&cgblk)
#define sblock (*sblk.b_un.b_fs)
#define cgrp (*cgblk.b_un.b_cg)
enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE};
ino_t cursnapshot;
@ -361,6 +358,37 @@ struct ufs2_dinode ufs2_zino;
#define EEXIT 8 /* Standard error exit. */
int flushentry(void);
/*
* Wrapper for malloc() that flushes the cylinder group cache to try
* to get space.
*/
static inline void*
Malloc(int size)
{
void *retval;
while ((retval = malloc(size)) == NULL)
if (flushentry() == 0)
break;
return (retval);
}
/*
* Wrapper for calloc() that flushes the cylinder group cache to try
* to get space.
*/
static inline void*
Calloc(int cnt, int size)
{
void *retval;
while ((retval = calloc(cnt, size)) == NULL)
if (flushentry() == 0)
break;
return (retval);
}
struct fstab;
@ -378,7 +406,7 @@ void cacheino(union dinode *dp, ino_t inumber);
void catch(int);
void catchquit(int);
int changeino(ino_t dir, const char *name, ino_t newnum);
int check_cgmagic(int cg, struct cg *cgp);
int check_cgmagic(int cg, struct bufarea *cgbp);
int chkrange(ufs2_daddr_t blk, int cnt);
void ckfini(int markclean);
int ckinode(union dinode *dp, struct inodesc *);
@ -398,6 +426,7 @@ void freeino(ino_t ino);
void freeinodebuf(void);
int ftypeok(union dinode *dp);
void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size);
struct bufarea *cgget(int cg);
struct bufarea *getdatablk(ufs2_daddr_t blkno, long size, int type);
struct inoinfo *getinoinfo(ino_t inumber);
union dinode *getnextinode(ino_t inumber, int rebuildcg);

View File

@ -70,6 +70,7 @@ static struct timespec startpass, finishpass;
struct timeval slowio_starttime;
int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */
int slowio_pollcnt;
static struct bufarea cgblk; /* backup buffer for cylinder group blocks */
static TAILQ_HEAD(buflist, bufarea) bufhead; /* head of buffer cache list */
static int numbufs; /* size of buffer cache */
static char *buftype[BT_NUMBUFTYPES] = BT_NAMES;
@ -163,7 +164,7 @@ bufinit(void)
char *bufp;
pbp = pdirbp = (struct bufarea *)0;
bufp = malloc((unsigned int)sblock.fs_bsize);
bufp = Malloc((unsigned int)sblock.fs_bsize);
if (bufp == 0)
errx(EEXIT, "cannot allocate buffer pool");
cgblk.b_un.b_buf = bufp;
@ -173,8 +174,8 @@ bufinit(void)
if (bufcnt < MINBUFS)
bufcnt = MINBUFS;
for (i = 0; i < bufcnt; i++) {
bp = (struct bufarea *)malloc(sizeof(struct bufarea));
bufp = malloc((unsigned int)sblock.fs_bsize);
bp = (struct bufarea *)Malloc(sizeof(struct bufarea));
bufp = Malloc((unsigned int)sblock.fs_bsize);
if (bp == NULL || bufp == NULL) {
if (i >= MINBUFS)
break;
@ -192,6 +193,57 @@ bufinit(void)
}
}
/*
* Manage cylinder group buffers.
*/
static struct bufarea *cgbufs; /* header for cylinder group cache */
static int flushtries; /* number of tries to reclaim memory */
struct bufarea *
cgget(int cg)
{
struct bufarea *cgbp;
struct cg *cgp;
if (cgbufs == NULL) {
cgbufs = Calloc(sblock.fs_ncg, sizeof(struct bufarea));
if (cgbufs == NULL)
errx(EEXIT, "cannot allocate cylinder group buffers");
}
cgbp = &cgbufs[cg];
if (cgbp->b_un.b_cg != NULL)
return (cgbp);
cgp = NULL;
if (flushtries == 0)
cgp = malloc((unsigned int)sblock.fs_cgsize);
if (cgp == NULL) {
getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
return (&cgblk);
}
cgbp->b_un.b_cg = cgp;
initbarea(cgbp, BT_CYLGRP);
getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize);
return (cgbp);
}
/*
* Attempt to flush a cylinder group cache entry.
* Return whether the flush was successful.
*/
int
flushentry(void)
{
struct bufarea *cgbp;
cgbp = &cgbufs[flushtries++];
if (cgbp->b_un.b_cg == NULL)
return (0);
flush(fswritefd, cgbp);
free(cgbp->b_un.b_buf);
cgbp->b_un.b_buf = NULL;
return (1);
}
/*
* Manage a cache of directory blocks.
*/
@ -363,6 +415,13 @@ ckfini(int markclean)
}
if (numbufs != cnt)
errx(EEXIT, "panic: lost %d buffers", numbufs - cnt);
for (cnt = 0; cnt < sblock.fs_ncg; cnt++) {
if (cgbufs[cnt].b_un.b_cg == NULL)
continue;
flush(fswritefd, &cgbufs[cnt]);
free(cgbufs[cnt].b_un.b_cg);
}
free(cgbufs);
pbp = pdirbp = (struct bufarea *)0;
if (cursnapshot == 0 && sblock.fs_clean != markclean) {
if ((sblock.fs_clean = markclean) != 0) {
@ -448,8 +507,8 @@ static void printIOstats(void)
clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass);
timespecsub(&finishpass, &startpass);
msec = finishpass.tv_sec * 1000 + finishpass.tv_nsec / 1000000;
printf("Running time: %lld msec\n", msec);
printf("Running time: %d.%03ld msec\n",
finishpass.tv_sec, finishpass.tv_nsec / 1000000);
printf("buffer reads by type:\n");
for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++)
totalmsec += readtime[i].tv_sec * 1000 +
@ -460,9 +519,10 @@ static void printIOstats(void)
if (readcnt[i] == 0)
continue;
msec = readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000;
printf("%21s:%8ld %2ld.%ld%% %8lld msec %2lld.%lld%%\n",
printf("%21s:%8ld %2ld.%ld%% %4d.%03ld sec %2jd.%jd%%\n",
buftype[i], readcnt[i], readcnt[i] * 100 / diskreads,
(readcnt[i] * 1000 / diskreads) % 10, msec,
(readcnt[i] * 1000 / diskreads) % 10,
readtime[i].tv_sec, readtime[i].tv_nsec / 1000000,
msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10);
}
printf("\n");
@ -562,8 +622,9 @@ blerase(int fd, ufs2_daddr_t blk, long size)
* test fails, offer an option to rebuild the whole cylinder group.
*/
int
check_cgmagic(int cg, struct cg *cgp)
check_cgmagic(int cg, struct bufarea *cgbp)
{
struct cg *cgp = cgbp->b_un.b_cg;
/*
* Extended cylinder group checks.
@ -623,7 +684,7 @@ check_cgmagic(int cg, struct cg *cgp)
cgp->cg_nextfreeoff = cgp->cg_clusteroff +
howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT);
}
cgdirty();
dirty(cgbp);
return (0);
}
@ -634,7 +695,8 @@ ufs2_daddr_t
allocblk(long frags)
{
int i, j, k, cg, baseblk;
struct cg *cgp = &cgrp;
struct bufarea *cgbp;
struct cg *cgp;
if (frags <= 0 || frags > sblock.fs_frag)
return (0);
@ -650,8 +712,9 @@ allocblk(long frags)
continue;
}
cg = dtog(&sblock, i + j);
getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
if (!check_cgmagic(cg, cgp))
cgbp = cgget(cg);
cgp = cgbp->b_un.b_cg;
if (!check_cgmagic(cg, cgbp))
return (0);
baseblk = dtogd(&sblock, i + j);
for (k = 0; k < frags; k++) {
@ -663,7 +726,7 @@ allocblk(long frags)
cgp->cg_cs.cs_nbfree--;
else
cgp->cg_cs.cs_nffree -= frags;
cgdirty();
dirty(cgbp);
return (i + j);
}
}

View File

@ -423,7 +423,7 @@ setinodebuf(ino_t inum)
partialsize = inobufsize;
}
initbarea(&inobuf, BT_INODES);
if ((inobuf.b_un.b_buf = malloc((unsigned)inobufsize)) == NULL)
if ((inobuf.b_un.b_buf = Malloc((unsigned)inobufsize)) == NULL)
errx(EEXIT, "cannot allocate space for inode buffer");
}
@ -454,7 +454,7 @@ cacheino(union dinode *dp, ino_t inumber)
else
blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
inp = (struct inoinfo *)
malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t));
Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t));
if (inp == NULL)
errx(EEXIT, "cannot increase directory list");
inpp = &inphead[inumber % dirhash];
@ -657,7 +657,8 @@ allocino(ino_t request, int type)
{
ino_t ino;
union dinode *dp;
struct cg *cgp = &cgrp;
struct bufarea *cgbp;
struct cg *cgp;
int cg;
if (request == 0)
@ -670,8 +671,9 @@ allocino(ino_t request, int type)
if (ino == maxino)
return (0);
cg = ino_to_cg(&sblock, ino);
getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
if (!check_cgmagic(cg, cgp))
cgbp = cgget(cg);
cgp = cgbp->b_un.b_cg;
if (!check_cgmagic(cg, cgbp))
return (0);
setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
cgp->cg_cs.cs_nifree--;
@ -687,7 +689,7 @@ allocino(ino_t request, int type)
default:
return (0);
}
cgdirty();
dirty(cgbp);
dp = ginode(ino);
DIP_SET(dp, di_db[0], allocblk((long)1));
if (DIP(dp, di_db[0]) == 0) {

View File

@ -61,6 +61,8 @@ pass1(void)
{
struct inostat *info;
struct inodesc idesc;
struct bufarea *cgbp;
struct cg *cgp;
ino_t inumber, inosused, mininos;
ufs2_daddr_t i, cgd;
u_int8_t *cp;
@ -92,12 +94,13 @@ pass1(void)
for (c = 0; c < sblock.fs_ncg; c++) {
inumber = c * sblock.fs_ipg;
setinodebuf(inumber);
getblk(&cgblk, cgtod(&sblock, c), sblock.fs_cgsize);
cgbp = cgget(c);
cgp = cgbp->b_un.b_cg;
rebuildcg = 0;
if (!check_cgmagic(c, &cgrp))
if (!check_cgmagic(c, cgbp))
rebuildcg = 1;
if (!rebuildcg && sblock.fs_magic == FS_UFS2_MAGIC) {
inosused = cgrp.cg_initediblk;
inosused = cgp->cg_initediblk;
if (inosused > sblock.fs_ipg) {
pfatal(
"Too many initialized inodes (%ju > %d) in cylinder group %d\nReset to %d\n",
@ -127,7 +130,7 @@ pass1(void)
* read only those inodes in from disk.
*/
if ((preen || inoopt) && usedsoftdep && !rebuildcg) {
cp = &cg_inosused(&cgrp)[(inosused - 1) / CHAR_BIT];
cp = &cg_inosused(cgp)[(inosused - 1) / CHAR_BIT];
for ( ; inosused > 0; inosused -= CHAR_BIT, cp--) {
if (*cp == 0)
continue;
@ -149,7 +152,7 @@ pass1(void)
inostathead[c].il_stat = 0;
continue;
}
info = calloc((unsigned)inosused, sizeof(struct inostat));
info = Calloc((unsigned)inosused, sizeof(struct inostat));
if (info == NULL)
errx(EEXIT, "cannot alloc %u bytes for inoinfo",
(unsigned)(sizeof(struct inostat) * inosused));
@ -169,7 +172,7 @@ pass1(void)
* valid number for this cylinder group.
*/
if (checkinode(inumber, &idesc, rebuildcg) == 0 &&
i > cgrp.cg_initediblk)
i > cgp->cg_initediblk)
break;
}
/*
@ -181,16 +184,16 @@ pass1(void)
mininos = roundup(inosused + INOPB(&sblock), INOPB(&sblock));
if (inoopt && !preen && !rebuildcg &&
sblock.fs_magic == FS_UFS2_MAGIC &&
cgrp.cg_initediblk > 2 * INOPB(&sblock) &&
mininos < cgrp.cg_initediblk) {
i = cgrp.cg_initediblk;
cgp->cg_initediblk > 2 * INOPB(&sblock) &&
mininos < cgp->cg_initediblk) {
i = cgp->cg_initediblk;
if (mininos < 2 * INOPB(&sblock))
cgrp.cg_initediblk = 2 * INOPB(&sblock);
cgp->cg_initediblk = 2 * INOPB(&sblock);
else
cgrp.cg_initediblk = mininos;
cgp->cg_initediblk = mininos;
pwarn("CYLINDER GROUP %d: RESET FROM %ju TO %d %s\n",
c, i, cgrp.cg_initediblk, "VALID INODES");
cgdirty();
c, i, cgp->cg_initediblk, "VALID INODES");
dirty(cgbp);
}
if (inosused < sblock.fs_ipg)
continue;
@ -199,11 +202,11 @@ pass1(void)
inosused = 0;
else
inosused = lastino - (c * sblock.fs_ipg);
if (rebuildcg && inosused > cgrp.cg_initediblk &&
if (rebuildcg && inosused > cgp->cg_initediblk &&
sblock.fs_magic == FS_UFS2_MAGIC) {
cgrp.cg_initediblk = roundup(inosused, INOPB(&sblock));
cgp->cg_initediblk = roundup(inosused, INOPB(&sblock));
pwarn("CYLINDER GROUP %d: FOUND %d VALID INODES\n", c,
cgrp.cg_initediblk);
cgp->cg_initediblk);
}
/*
* If we were not able to determine in advance which inodes
@ -219,7 +222,7 @@ pass1(void)
inostathead[c].il_stat = 0;
continue;
}
info = calloc((unsigned)inosused, sizeof(struct inostat));
info = Calloc((unsigned)inosused, sizeof(struct inostat));
if (info == NULL)
errx(EEXIT, "cannot alloc %u bytes for inoinfo",
(unsigned)(sizeof(struct inostat) * inosused));
@ -482,7 +485,7 @@ pass1check(struct inodesc *idesc)
}
return (STOP);
}
new = (struct dups *)malloc(sizeof(struct dups));
new = (struct dups *)Malloc(sizeof(struct dups));
if (new == NULL) {
pfatal("DUP TABLE OVERFLOW.");
if (reply("CONTINUE") == 0) {

View File

@ -59,14 +59,14 @@ pass5(void)
int c, i, j, blk, frags, basesize, mapsize;
int inomapsize, blkmapsize;
struct fs *fs = &sblock;
struct cg *cg = &cgrp;
ufs2_daddr_t d, dbase, dmax, start;
int rewritecg = 0;
struct csum *cs;
struct csum_total cstotal;
struct inodesc idesc[3];
char buf[MAXBSIZE];
struct cg *newcg = (struct cg *)buf;
struct cg *cg, *newcg = (struct cg *)buf;
struct bufarea *cgbp;
inoinfo(WINO)->ino_state = USTATE;
memset(newcg, 0, (size_t)fs->fs_cgsize);
@ -162,7 +162,8 @@ pass5(void)
c * 100 / sblock.fs_ncg);
got_sigalarm = 0;
}
getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize);
cgbp = cgget(c);
cg = cgbp->b_un.b_cg;
if (!cg_chkmagic(cg))
pfatal("CG %d: BAD MAGIC NUMBER\n", c);
newcg->cg_time = cg->cg_time;
@ -324,14 +325,14 @@ pass5(void)
}
if (rewritecg) {
memmove(cg, newcg, (size_t)fs->fs_cgsize);
cgdirty();
dirty(cgbp);
continue;
}
if (cursnapshot == 0 &&
memcmp(newcg, cg, basesize) != 0 &&
dofix(&idesc[2], "SUMMARY INFORMATION BAD")) {
memmove(cg, newcg, (size_t)basesize);
cgdirty();
dirty(cgbp);
}
if (bkgrdflag != 0 || usedsoftdep || debug)
update_maps(cg, newcg, bkgrdflag);
@ -340,7 +341,7 @@ pass5(void)
dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) {
memmove(cg_inosused(cg), cg_inosused(newcg),
(size_t)mapsize);
cgdirty();
dirty(cgbp);
}
}
if (cursnapshot == 0 &&

View File

@ -240,7 +240,7 @@ setup(char *dev)
* read in the summary info.
*/
asked = 0;
sblock.fs_csp = calloc(1, sblock.fs_cssize);
sblock.fs_csp = Calloc(1, sblock.fs_cssize);
if (sblock.fs_csp == NULL) {
printf("cannot alloc %u bytes for cg summary info\n",
(unsigned)sblock.fs_cssize);
@ -265,13 +265,13 @@ setup(char *dev)
* allocate and initialize the necessary maps
*/
bmapsize = roundup(howmany(maxfsblock, CHAR_BIT), sizeof(short));
blockmap = calloc((unsigned)bmapsize, sizeof (char));
blockmap = Calloc((unsigned)bmapsize, sizeof (char));
if (blockmap == NULL) {
printf("cannot alloc %u bytes for blockmap\n",
(unsigned)bmapsize);
goto badsb;
}
inostathead = calloc((unsigned)(sblock.fs_ncg),
inostathead = Calloc((unsigned)(sblock.fs_ncg),
sizeof(struct inostatlist));
if (inostathead == NULL) {
printf("cannot alloc %u bytes for inostathead\n",
@ -282,9 +282,9 @@ setup(char *dev)
dirhash = numdirs;
inplast = 0;
listmax = numdirs + 10;
inpsort = (struct inoinfo **)calloc((unsigned)listmax,
inpsort = (struct inoinfo **)Calloc((unsigned)listmax,
sizeof(struct inoinfo *));
inphead = (struct inoinfo **)calloc((unsigned)numdirs,
inphead = (struct inoinfo **)Calloc((unsigned)numdirs,
sizeof(struct inoinfo *));
if (inpsort == NULL || inphead == NULL) {
printf("cannot alloc %ju bytes for inphead\n",
@ -444,8 +444,8 @@ sblock_init(void)
lfdir = 0;
initbarea(&sblk, BT_SUPERBLK);
initbarea(&asblk, BT_SUPERBLK);
sblk.b_un.b_buf = malloc(SBLOCKSIZE);
asblk.b_un.b_buf = malloc(SBLOCKSIZE);
sblk.b_un.b_buf = Malloc(SBLOCKSIZE);
asblk.b_un.b_buf = Malloc(SBLOCKSIZE);
if (sblk.b_un.b_buf == NULL || asblk.b_un.b_buf == NULL)
errx(EEXIT, "cannot allocate space for superblock");
if ((lp = getdisklabel(NULL, fsreadfd)))

View File

@ -161,7 +161,7 @@ errmalloc(size_t n)
{
void *a;
a = malloc(n);
a = Malloc(n);
if (a == NULL)
err(EX_OSERR, "malloc(%zu)", n);
return (a);
@ -194,7 +194,7 @@ opendisk(const char *devnam)
{
if (disk != NULL)
return;
disk = malloc(sizeof(*disk));
disk = Malloc(sizeof(*disk));
if (disk == NULL)
err(EX_OSERR, "malloc(%zu)", sizeof(*disk));
if (ufs_disk_fillout(disk, devnam) == -1) {