From f55ff3f3efac63d2f4e5125ccdb454cc52f3eff1 Mon Sep 17 00:00:00 2001 From: Ian Dowse Date: Mon, 15 Jan 2001 18:30:40 +0000 Subject: [PATCH] The ffs superblock includes a 128-byte region for use by temporary in-core pointers to summary information. An array in this region (fs_csp) could overflow on filesystems with a very large number of cylinder groups (~16000 on i386 with 8k blocks). When this happens, other fields in the superblock get corrupted, and fsck refuses to check the filesystem. Solve this problem by replacing the fs_csp array in 'struct fs' with a single pointer, and add padding to keep the length of the 128-byte region fixed. Update the kernel and userland utilities to use just this single pointer. With this change, the kernel no longer makes use of the superblock fields 'fs_csshift' and 'fs_csmask'. Add a comment to newfs/mkfs.c to indicate that these fields must be calculated for compatibility with older kernels. Reviewed by: mckusick --- sbin/dumpfs/dumpfs.c | 4 ++-- sbin/fsck_ffs/fsutil.c | 2 +- sbin/fsck_ffs/setup.c | 7 ++++--- sbin/fsck_ifs/fsutil.c | 2 +- sbin/fsck_ifs/setup.c | 7 ++++--- sbin/newfs/mkfs.c | 5 +++++ sys/ufs/ffs/ffs_snapshot.c | 5 ++++- sys/ufs/ffs/ffs_vfsops.c | 31 ++++++++++++++++--------------- sys/ufs/ffs/fs.h | 34 ++++++++++++++++------------------ 9 files changed, 53 insertions(+), 44 deletions(-) diff --git a/sbin/dumpfs/dumpfs.c b/sbin/dumpfs/dumpfs.c index 8e0416e74dea..b5d1df012e6d 100644 --- a/sbin/dumpfs/dumpfs.c +++ b/sbin/dumpfs/dumpfs.c @@ -205,15 +205,15 @@ dumpfs(name) } } printf("\ncs[].cs_(nbfree,ndir,nifree,nffree):\n\t"); + afs.fs_csp = calloc(1, afs.fs_cssize); for (i = 0, j = 0; i < afs.fs_cssize; i += afs.fs_bsize, j++) { size = afs.fs_cssize - i < afs.fs_bsize ? afs.fs_cssize - i : afs.fs_bsize; - afs.fs_csp[j] = calloc(1, size); if (lseek(fd, (off_t)(fsbtodb(&afs, (afs.fs_csaddr + j * afs.fs_frag))) * (off_t)dev_bsize, SEEK_SET) == (off_t)-1) goto err; - if (read(fd, afs.fs_csp[j], size) != size) + if (read(fd, (char *)afs.fs_csp + i, size) != size) goto err; } for (i = 0; i < afs.fs_ncg; i++) { diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c index 2cdb25253b4b..2217f3c8a85b 100644 --- a/sbin/fsck_ffs/fsutil.c +++ b/sbin/fsck_ffs/fsutil.c @@ -249,7 +249,7 @@ flush(fd, bp) if (bp != &sblk) return; for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { - bwrite(fswritefd, (char *)sblock.fs_csp[j], + bwrite(fswritefd, (char *)sblock.fs_csp + i, fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize); diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c index 6c4098c11bc7..0c2e8ac0552e 100644 --- a/sbin/fsck_ffs/setup.c +++ b/sbin/fsck_ffs/setup.c @@ -258,11 +258,11 @@ setup(dev) * read in the summary info. */ asked = 0; + sblock.fs_csp = calloc(1, sblock.fs_cssize); for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { size = sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize; - sblock.fs_csp[j] = (struct csum *)calloc(1, (unsigned)size); - if (bread(fsreadfd, (char *)sblock.fs_csp[j], + if (bread(fsreadfd, (char *)sblock.fs_csp + i, fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size) != 0 && !asked) { pfatal("BAD SUMMARY INFORMATION"); @@ -380,7 +380,8 @@ readsb(listerr) altsblock.fs_optim = sblock.fs_optim; altsblock.fs_rotdelay = sblock.fs_rotdelay; altsblock.fs_maxbpg = sblock.fs_maxbpg; - memmove(altsblock.fs_csp, sblock.fs_csp, sizeof sblock.fs_csp); + memmove(altsblock.fs_ocsp, sblock.fs_ocsp, sizeof sblock.fs_ocsp); + altsblock.fs_csp = sblock.fs_csp; altsblock.fs_maxcluster = sblock.fs_maxcluster; memmove(altsblock.fs_fsmnt, sblock.fs_fsmnt, sizeof sblock.fs_fsmnt); memmove(altsblock.fs_snapinum, sblock.fs_snapinum, diff --git a/sbin/fsck_ifs/fsutil.c b/sbin/fsck_ifs/fsutil.c index 2cdb25253b4b..2217f3c8a85b 100644 --- a/sbin/fsck_ifs/fsutil.c +++ b/sbin/fsck_ifs/fsutil.c @@ -249,7 +249,7 @@ flush(fd, bp) if (bp != &sblk) return; for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { - bwrite(fswritefd, (char *)sblock.fs_csp[j], + bwrite(fswritefd, (char *)sblock.fs_csp + i, fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize); diff --git a/sbin/fsck_ifs/setup.c b/sbin/fsck_ifs/setup.c index b9c3148c8ba5..52fb8490e2fc 100644 --- a/sbin/fsck_ifs/setup.c +++ b/sbin/fsck_ifs/setup.c @@ -258,11 +258,11 @@ setup(dev) * read in the summary info. */ asked = 0; + sblock.fs_csp = calloc(1, sblock.fs_cssize); for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) { size = sblock.fs_cssize - i < sblock.fs_bsize ? sblock.fs_cssize - i : sblock.fs_bsize; - sblock.fs_csp[j] = (struct csum *)calloc(1, (unsigned)size); - if (bread(fsreadfd, (char *)sblock.fs_csp[j], + if (bread(fsreadfd, (char *)sblock.fs_csp + i, fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag), size) != 0 && !asked) { pfatal("BAD SUMMARY INFORMATION"); @@ -382,7 +382,8 @@ readsb(listerr) altsblock.fs_optim = sblock.fs_optim; altsblock.fs_rotdelay = sblock.fs_rotdelay; altsblock.fs_maxbpg = sblock.fs_maxbpg; - memmove(altsblock.fs_csp, sblock.fs_csp, sizeof sblock.fs_csp); + memmove(altsblock.fs_ocsp, sblock.fs_ocsp, sizeof sblock.fs_ocsp); + altsblock.fs_csp = sblock.fs_csp; altsblock.fs_maxcluster = sblock.fs_maxcluster; memmove(altsblock.fs_fsmnt, sblock.fs_fsmnt, sizeof sblock.fs_fsmnt); memmove(altsblock.fs_snapinum, sblock.fs_snapinum, diff --git a/sbin/newfs/mkfs.c b/sbin/newfs/mkfs.c index 7e5ea14dedde..4cf854b1e38e 100644 --- a/sbin/newfs/mkfs.c +++ b/sbin/newfs/mkfs.c @@ -633,6 +633,11 @@ mkfs(pp, fsys, fi, fo) sblock.fs_csaddr = cgdmin(&sblock, 0); sblock.fs_cssize = fragroundup(&sblock, sblock.fs_ncg * sizeof(struct csum)); + /* + * The superblock fields 'fs_csmask' and 'fs_csshift' are no + * longer used. However, we still initialise them so that the + * filesystem remains compatible with old kernels. + */ i = sblock.fs_bsize / sizeof(struct csum); sblock.fs_csmask = ~(i - 1); for (sblock.fs_csshift = 0; i > 1; i >>= 1) diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index 6097de54a93b..75c31384d4ce 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -83,6 +83,7 @@ ffs_snapshot(mp, snapfile) int error, cg, snaploc, indiroff, numblks; int i, size, base, len, loc, inoblkcnt; int blksperindir, flag = mp->mnt_flag; + void *space; struct fs *copy_fs, *fs = VFSTOUFS(mp)->um_fs; struct proc *p = CURPROC; struct inode *devip, *ip, *xp; @@ -346,6 +347,7 @@ ffs_snapshot(mp, snapfile) blkno = fragstoblks(fs, fs->fs_csaddr); len = howmany(fs->fs_cssize, fs->fs_bsize) - 1; size = fs->fs_bsize; + space = fs->fs_csp; for (loc = 0; loc <= len; loc++) { error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)), fs->fs_bsize, KERNCRED, 0, &nbp); @@ -355,7 +357,8 @@ ffs_snapshot(mp, snapfile) readblock(nbp, blkno + loc); size = fs->fs_cssize - loc * fs->fs_bsize; } - bcopy(fs->fs_csp[loc], nbp->b_data, size); + bcopy(space, nbp->b_data, size); + space = (char *)space + size; nbp->b_flags |= B_VALIDSUSPWRT; bawrite(nbp); } diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index b10339464e6f..c05beef25be9 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -365,7 +365,7 @@ ffs_reload(mp, cred, p) { register struct vnode *vp, *nvp, *devvp; struct inode *ip; - struct csum *space; + void *space; struct buf *bp; struct fs *fs, *newfs; struct partinfo dpart; @@ -419,7 +419,7 @@ ffs_reload(mp, cred, p) * new superblock. These should really be in the ufsmount. XXX * Note that important parameters (eg fs_ncg) are unchanged. */ - bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp)); + newfs->fs_csp = fs->fs_csp; newfs->fs_maxcluster = fs->fs_maxcluster; bcopy(newfs, fs, (u_int)fs->fs_sbsize); if (fs->fs_sbsize < SBSIZE) @@ -432,7 +432,7 @@ ffs_reload(mp, cred, p) * Step 3: re-read summary information from disk. */ blks = howmany(fs->fs_cssize, fs->fs_fsize); - space = fs->fs_csp[0]; + space = fs->fs_csp; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) @@ -441,7 +441,8 @@ ffs_reload(mp, cred, p) NOCRED, &bp); if (error) return (error); - bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size); + bcopy(bp->b_data, space, (u_int)size); + space = (char *)space + size; brelse(bp); } /* @@ -513,7 +514,7 @@ ffs_mountfs(devvp, mp, p, malloctype) register struct fs *fs; dev_t dev; struct partinfo dpart; - caddr_t base, space; + void *space; int error, i, blks, size, ronly; int32_t *lp; struct ucred *cred; @@ -623,24 +624,24 @@ ffs_mountfs(devvp, mp, p, malloctype) blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) size += fs->fs_ncg * sizeof(int32_t); - base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK); + space = malloc((u_long)size, M_UFSMNT, M_WAITOK); + fs->fs_csp = space; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, cred, &bp)) != 0) { - free(base, M_UFSMNT); + free(fs->fs_csp, M_UFSMNT); goto out; } bcopy(bp->b_data, space, (u_int)size); - fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space; - space += size; + space = (char *)space + size; brelse(bp); bp = NULL; } if (fs->fs_contigsumsize > 0) { - fs->fs_maxcluster = lp = (int32_t *)space; + fs->fs_maxcluster = lp = space; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; } @@ -691,7 +692,7 @@ ffs_mountfs(devvp, mp, p, malloctype) if (ronly == 0) { if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, cred)) != 0) { - free(base, M_UFSMNT); + free(fs->fs_csp, M_UFSMNT); goto out; } if (fs->fs_snapinum[0] != 0) @@ -808,7 +809,7 @@ ffs_unmount(mp, mntflags, p) vrele(ump->um_devvp); - free(fs->fs_csp[0], M_UFSMNT); + free(fs->fs_csp, M_UFSMNT); free(fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = (qaddr_t)0; @@ -1263,14 +1264,14 @@ ffs_sbupdate(mp, waitfor) register struct fs *dfs, *fs = mp->um_fs; register struct buf *bp; int blks; - caddr_t space; + void *space; int i, size, error, allerror = 0; /* * First write back the summary information. */ blks = howmany(fs->fs_cssize, fs->fs_fsize); - space = (caddr_t)fs->fs_csp[0]; + space = fs->fs_csp; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) @@ -1278,7 +1279,7 @@ ffs_sbupdate(mp, waitfor) bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 0, 0); bcopy(space, bp->b_data, (u_int)size); - space += size; + space = (char *)space + size; if (waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h index 5d76aeecc34b..515c065ed2f0 100644 --- a/sys/ufs/ffs/fs.h +++ b/sys/ufs/ffs/fs.h @@ -106,14 +106,17 @@ #define MAXMNTLEN 512 /* - * The limit on the amount of summary information per file system - * is defined by MAXCSBUFS. It is currently parameterized for a - * size of 128 bytes (2 million cylinder groups on machines with - * 32-bit pointers, and 1 million on 64-bit machines). One pointer - * is taken away to point to an array of cluster sizes that is - * computed as cylinder groups are inspected. + * There is a 128-byte region in the superblock reserved for in-core + * pointers to summary information. Originally this included an array + * of pointers to blocks of struct csum; now there are just two + * pointers and the remaining space is padded with fs_ocsp[]. + * + * NOCSPTRS determines the size of this padding. One pointer (fs_csp) + * is taken away to point to a contiguous array of struct csum for + * all cylinder groups; a second (fs_maxcluster) points to an array + * of cluster sizes that is computed as cylinder groups are inspected. */ -#define MAXCSBUFS ((128 / sizeof(void *)) - 1) +#define NOCSPTRS ((128 / sizeof(void *)) - 2) /* * A summary of contiguous blocks of various sizes is maintained @@ -167,9 +170,6 @@ * from first cylinder group data blocks. These blocks have to be * read in from fs_csaddr (size fs_cssize) in addition to the * super block. - * - * N.B. sizeof(struct csum) must be a power of two in order for - * the ``fs_cs'' macro to work (see below). */ struct csum { int32_t cs_ndir; /* number of directories */ @@ -213,8 +213,8 @@ struct fs { int32_t fs_fragshift; /* block to frag shift */ int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ int32_t fs_sbsize; /* actual size of super block */ - int32_t fs_csmask; /* csum block offset */ - int32_t fs_csshift; /* csum block number */ + int32_t fs_csmask; /* csum block offset (now unused) */ + int32_t fs_csshift; /* csum block number (now unused) */ int32_t fs_nindir; /* value of NINDIR */ int32_t fs_inopb; /* value of INOPB */ int32_t fs_nspf; /* value of NSPF */ @@ -250,8 +250,9 @@ struct fs { u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ /* these fields retain the current block allocation info */ int32_t fs_cgrotor; /* last cg searched */ - struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */ - int32_t *fs_maxcluster; /* max cluster in each cyl group */ + void *fs_ocsp[NOCSPTRS]; /* padding; was list of fs_cs buffers */ + struct csum *fs_csp; /* cg summary info buffer for fs_cs */ + int32_t *fs_maxcluster; /* max cluster in each cyl group */ int32_t fs_cpc; /* cyl per cycle in postbl */ int16_t fs_opostbl[16][8]; /* old rotation block list head */ int32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */ @@ -328,11 +329,8 @@ struct fs { /* * Convert cylinder group to base address of its global summary info. - * - * N.B. This macro assumes that sizeof(struct csum) is a power of two. */ -#define fs_cs(fs, indx) \ - fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask] +#define fs_cs(fs, indx) fs_csp[indx] /* * Cylinder group block for a file system.