The ffs superblock includes a 128-byte region for use by temporary
in-core pointers to summary information. An array in this region (fs_csp) could overflow on filesystems with a very large number of cylinder groups (~16000 on i386 with 8k blocks). When this happens, other fields in the superblock get corrupted, and fsck refuses to check the filesystem. Solve this problem by replacing the fs_csp array in 'struct fs' with a single pointer, and add padding to keep the length of the 128-byte region fixed. Update the kernel and userland utilities to use just this single pointer. With this change, the kernel no longer makes use of the superblock fields 'fs_csshift' and 'fs_csmask'. Add a comment to newfs/mkfs.c to indicate that these fields must be calculated for compatibility with older kernels. Reviewed by: mckusick
This commit is contained in:
parent
8fbaa337ac
commit
f55ff3f3ef
@ -205,15 +205,15 @@ dumpfs(name)
|
||||
}
|
||||
}
|
||||
printf("\ncs[].cs_(nbfree,ndir,nifree,nffree):\n\t");
|
||||
afs.fs_csp = calloc(1, afs.fs_cssize);
|
||||
for (i = 0, j = 0; i < afs.fs_cssize; i += afs.fs_bsize, j++) {
|
||||
size = afs.fs_cssize - i < afs.fs_bsize ?
|
||||
afs.fs_cssize - i : afs.fs_bsize;
|
||||
afs.fs_csp[j] = calloc(1, size);
|
||||
if (lseek(fd,
|
||||
(off_t)(fsbtodb(&afs, (afs.fs_csaddr + j * afs.fs_frag))) *
|
||||
(off_t)dev_bsize, SEEK_SET) == (off_t)-1)
|
||||
goto err;
|
||||
if (read(fd, afs.fs_csp[j], size) != size)
|
||||
if (read(fd, (char *)afs.fs_csp + i, size) != size)
|
||||
goto err;
|
||||
}
|
||||
for (i = 0; i < afs.fs_ncg; i++) {
|
||||
|
@ -249,7 +249,7 @@ flush(fd, bp)
|
||||
if (bp != &sblk)
|
||||
return;
|
||||
for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
|
||||
bwrite(fswritefd, (char *)sblock.fs_csp[j],
|
||||
bwrite(fswritefd, (char *)sblock.fs_csp + i,
|
||||
fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
|
||||
sblock.fs_cssize - i < sblock.fs_bsize ?
|
||||
sblock.fs_cssize - i : sblock.fs_bsize);
|
||||
|
@ -258,11 +258,11 @@ setup(dev)
|
||||
* read in the summary info.
|
||||
*/
|
||||
asked = 0;
|
||||
sblock.fs_csp = calloc(1, sblock.fs_cssize);
|
||||
for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
|
||||
size = sblock.fs_cssize - i < sblock.fs_bsize ?
|
||||
sblock.fs_cssize - i : sblock.fs_bsize;
|
||||
sblock.fs_csp[j] = (struct csum *)calloc(1, (unsigned)size);
|
||||
if (bread(fsreadfd, (char *)sblock.fs_csp[j],
|
||||
if (bread(fsreadfd, (char *)sblock.fs_csp + i,
|
||||
fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
|
||||
size) != 0 && !asked) {
|
||||
pfatal("BAD SUMMARY INFORMATION");
|
||||
@ -380,7 +380,8 @@ readsb(listerr)
|
||||
altsblock.fs_optim = sblock.fs_optim;
|
||||
altsblock.fs_rotdelay = sblock.fs_rotdelay;
|
||||
altsblock.fs_maxbpg = sblock.fs_maxbpg;
|
||||
memmove(altsblock.fs_csp, sblock.fs_csp, sizeof sblock.fs_csp);
|
||||
memmove(altsblock.fs_ocsp, sblock.fs_ocsp, sizeof sblock.fs_ocsp);
|
||||
altsblock.fs_csp = sblock.fs_csp;
|
||||
altsblock.fs_maxcluster = sblock.fs_maxcluster;
|
||||
memmove(altsblock.fs_fsmnt, sblock.fs_fsmnt, sizeof sblock.fs_fsmnt);
|
||||
memmove(altsblock.fs_snapinum, sblock.fs_snapinum,
|
||||
|
@ -249,7 +249,7 @@ flush(fd, bp)
|
||||
if (bp != &sblk)
|
||||
return;
|
||||
for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
|
||||
bwrite(fswritefd, (char *)sblock.fs_csp[j],
|
||||
bwrite(fswritefd, (char *)sblock.fs_csp + i,
|
||||
fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
|
||||
sblock.fs_cssize - i < sblock.fs_bsize ?
|
||||
sblock.fs_cssize - i : sblock.fs_bsize);
|
||||
|
@ -258,11 +258,11 @@ setup(dev)
|
||||
* read in the summary info.
|
||||
*/
|
||||
asked = 0;
|
||||
sblock.fs_csp = calloc(1, sblock.fs_cssize);
|
||||
for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
|
||||
size = sblock.fs_cssize - i < sblock.fs_bsize ?
|
||||
sblock.fs_cssize - i : sblock.fs_bsize;
|
||||
sblock.fs_csp[j] = (struct csum *)calloc(1, (unsigned)size);
|
||||
if (bread(fsreadfd, (char *)sblock.fs_csp[j],
|
||||
if (bread(fsreadfd, (char *)sblock.fs_csp + i,
|
||||
fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
|
||||
size) != 0 && !asked) {
|
||||
pfatal("BAD SUMMARY INFORMATION");
|
||||
@ -382,7 +382,8 @@ readsb(listerr)
|
||||
altsblock.fs_optim = sblock.fs_optim;
|
||||
altsblock.fs_rotdelay = sblock.fs_rotdelay;
|
||||
altsblock.fs_maxbpg = sblock.fs_maxbpg;
|
||||
memmove(altsblock.fs_csp, sblock.fs_csp, sizeof sblock.fs_csp);
|
||||
memmove(altsblock.fs_ocsp, sblock.fs_ocsp, sizeof sblock.fs_ocsp);
|
||||
altsblock.fs_csp = sblock.fs_csp;
|
||||
altsblock.fs_maxcluster = sblock.fs_maxcluster;
|
||||
memmove(altsblock.fs_fsmnt, sblock.fs_fsmnt, sizeof sblock.fs_fsmnt);
|
||||
memmove(altsblock.fs_snapinum, sblock.fs_snapinum,
|
||||
|
@ -633,6 +633,11 @@ next:
|
||||
sblock.fs_csaddr = cgdmin(&sblock, 0);
|
||||
sblock.fs_cssize =
|
||||
fragroundup(&sblock, sblock.fs_ncg * sizeof(struct csum));
|
||||
/*
|
||||
* The superblock fields 'fs_csmask' and 'fs_csshift' are no
|
||||
* longer used. However, we still initialise them so that the
|
||||
* filesystem remains compatible with old kernels.
|
||||
*/
|
||||
i = sblock.fs_bsize / sizeof(struct csum);
|
||||
sblock.fs_csmask = ~(i - 1);
|
||||
for (sblock.fs_csshift = 0; i > 1; i >>= 1)
|
||||
|
@ -83,6 +83,7 @@ ffs_snapshot(mp, snapfile)
|
||||
int error, cg, snaploc, indiroff, numblks;
|
||||
int i, size, base, len, loc, inoblkcnt;
|
||||
int blksperindir, flag = mp->mnt_flag;
|
||||
void *space;
|
||||
struct fs *copy_fs, *fs = VFSTOUFS(mp)->um_fs;
|
||||
struct proc *p = CURPROC;
|
||||
struct inode *devip, *ip, *xp;
|
||||
@ -346,6 +347,7 @@ restart:
|
||||
blkno = fragstoblks(fs, fs->fs_csaddr);
|
||||
len = howmany(fs->fs_cssize, fs->fs_bsize) - 1;
|
||||
size = fs->fs_bsize;
|
||||
space = fs->fs_csp;
|
||||
for (loc = 0; loc <= len; loc++) {
|
||||
error = VOP_BALLOC(vp, lblktosize(fs, (off_t)(blkno + loc)),
|
||||
fs->fs_bsize, KERNCRED, 0, &nbp);
|
||||
@ -355,7 +357,8 @@ restart:
|
||||
readblock(nbp, blkno + loc);
|
||||
size = fs->fs_cssize - loc * fs->fs_bsize;
|
||||
}
|
||||
bcopy(fs->fs_csp[loc], nbp->b_data, size);
|
||||
bcopy(space, nbp->b_data, size);
|
||||
space = (char *)space + size;
|
||||
nbp->b_flags |= B_VALIDSUSPWRT;
|
||||
bawrite(nbp);
|
||||
}
|
||||
|
@ -365,7 +365,7 @@ ffs_reload(mp, cred, p)
|
||||
{
|
||||
register struct vnode *vp, *nvp, *devvp;
|
||||
struct inode *ip;
|
||||
struct csum *space;
|
||||
void *space;
|
||||
struct buf *bp;
|
||||
struct fs *fs, *newfs;
|
||||
struct partinfo dpart;
|
||||
@ -419,7 +419,7 @@ ffs_reload(mp, cred, p)
|
||||
* new superblock. These should really be in the ufsmount. XXX
|
||||
* Note that important parameters (eg fs_ncg) are unchanged.
|
||||
*/
|
||||
bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
|
||||
newfs->fs_csp = fs->fs_csp;
|
||||
newfs->fs_maxcluster = fs->fs_maxcluster;
|
||||
bcopy(newfs, fs, (u_int)fs->fs_sbsize);
|
||||
if (fs->fs_sbsize < SBSIZE)
|
||||
@ -432,7 +432,7 @@ ffs_reload(mp, cred, p)
|
||||
* Step 3: re-read summary information from disk.
|
||||
*/
|
||||
blks = howmany(fs->fs_cssize, fs->fs_fsize);
|
||||
space = fs->fs_csp[0];
|
||||
space = fs->fs_csp;
|
||||
for (i = 0; i < blks; i += fs->fs_frag) {
|
||||
size = fs->fs_bsize;
|
||||
if (i + fs->fs_frag > blks)
|
||||
@ -441,7 +441,8 @@ ffs_reload(mp, cred, p)
|
||||
NOCRED, &bp);
|
||||
if (error)
|
||||
return (error);
|
||||
bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
|
||||
bcopy(bp->b_data, space, (u_int)size);
|
||||
space = (char *)space + size;
|
||||
brelse(bp);
|
||||
}
|
||||
/*
|
||||
@ -513,7 +514,7 @@ ffs_mountfs(devvp, mp, p, malloctype)
|
||||
register struct fs *fs;
|
||||
dev_t dev;
|
||||
struct partinfo dpart;
|
||||
caddr_t base, space;
|
||||
void *space;
|
||||
int error, i, blks, size, ronly;
|
||||
int32_t *lp;
|
||||
struct ucred *cred;
|
||||
@ -623,24 +624,24 @@ ffs_mountfs(devvp, mp, p, malloctype)
|
||||
blks = howmany(size, fs->fs_fsize);
|
||||
if (fs->fs_contigsumsize > 0)
|
||||
size += fs->fs_ncg * sizeof(int32_t);
|
||||
base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
|
||||
space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
|
||||
fs->fs_csp = space;
|
||||
for (i = 0; i < blks; i += fs->fs_frag) {
|
||||
size = fs->fs_bsize;
|
||||
if (i + fs->fs_frag > blks)
|
||||
size = (blks - i) * fs->fs_fsize;
|
||||
if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
|
||||
cred, &bp)) != 0) {
|
||||
free(base, M_UFSMNT);
|
||||
free(fs->fs_csp, M_UFSMNT);
|
||||
goto out;
|
||||
}
|
||||
bcopy(bp->b_data, space, (u_int)size);
|
||||
fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
|
||||
space += size;
|
||||
space = (char *)space + size;
|
||||
brelse(bp);
|
||||
bp = NULL;
|
||||
}
|
||||
if (fs->fs_contigsumsize > 0) {
|
||||
fs->fs_maxcluster = lp = (int32_t *)space;
|
||||
fs->fs_maxcluster = lp = space;
|
||||
for (i = 0; i < fs->fs_ncg; i++)
|
||||
*lp++ = fs->fs_contigsumsize;
|
||||
}
|
||||
@ -691,7 +692,7 @@ ffs_mountfs(devvp, mp, p, malloctype)
|
||||
if (ronly == 0) {
|
||||
if ((fs->fs_flags & FS_DOSOFTDEP) &&
|
||||
(error = softdep_mount(devvp, mp, fs, cred)) != 0) {
|
||||
free(base, M_UFSMNT);
|
||||
free(fs->fs_csp, M_UFSMNT);
|
||||
goto out;
|
||||
}
|
||||
if (fs->fs_snapinum[0] != 0)
|
||||
@ -808,7 +809,7 @@ ffs_unmount(mp, mntflags, p)
|
||||
|
||||
vrele(ump->um_devvp);
|
||||
|
||||
free(fs->fs_csp[0], M_UFSMNT);
|
||||
free(fs->fs_csp, M_UFSMNT);
|
||||
free(fs, M_UFSMNT);
|
||||
free(ump, M_UFSMNT);
|
||||
mp->mnt_data = (qaddr_t)0;
|
||||
@ -1263,14 +1264,14 @@ ffs_sbupdate(mp, waitfor)
|
||||
register struct fs *dfs, *fs = mp->um_fs;
|
||||
register struct buf *bp;
|
||||
int blks;
|
||||
caddr_t space;
|
||||
void *space;
|
||||
int i, size, error, allerror = 0;
|
||||
|
||||
/*
|
||||
* First write back the summary information.
|
||||
*/
|
||||
blks = howmany(fs->fs_cssize, fs->fs_fsize);
|
||||
space = (caddr_t)fs->fs_csp[0];
|
||||
space = fs->fs_csp;
|
||||
for (i = 0; i < blks; i += fs->fs_frag) {
|
||||
size = fs->fs_bsize;
|
||||
if (i + fs->fs_frag > blks)
|
||||
@ -1278,7 +1279,7 @@ ffs_sbupdate(mp, waitfor)
|
||||
bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
|
||||
size, 0, 0);
|
||||
bcopy(space, bp->b_data, (u_int)size);
|
||||
space += size;
|
||||
space = (char *)space + size;
|
||||
if (waitfor != MNT_WAIT)
|
||||
bawrite(bp);
|
||||
else if ((error = bwrite(bp)) != 0)
|
||||
|
@ -106,14 +106,17 @@
|
||||
#define MAXMNTLEN 512
|
||||
|
||||
/*
|
||||
* The limit on the amount of summary information per file system
|
||||
* is defined by MAXCSBUFS. It is currently parameterized for a
|
||||
* size of 128 bytes (2 million cylinder groups on machines with
|
||||
* 32-bit pointers, and 1 million on 64-bit machines). One pointer
|
||||
* is taken away to point to an array of cluster sizes that is
|
||||
* computed as cylinder groups are inspected.
|
||||
* There is a 128-byte region in the superblock reserved for in-core
|
||||
* pointers to summary information. Originally this included an array
|
||||
* of pointers to blocks of struct csum; now there are just two
|
||||
* pointers and the remaining space is padded with fs_ocsp[].
|
||||
*
|
||||
* NOCSPTRS determines the size of this padding. One pointer (fs_csp)
|
||||
* is taken away to point to a contiguous array of struct csum for
|
||||
* all cylinder groups; a second (fs_maxcluster) points to an array
|
||||
* of cluster sizes that is computed as cylinder groups are inspected.
|
||||
*/
|
||||
#define MAXCSBUFS ((128 / sizeof(void *)) - 1)
|
||||
#define NOCSPTRS ((128 / sizeof(void *)) - 2)
|
||||
|
||||
/*
|
||||
* A summary of contiguous blocks of various sizes is maintained
|
||||
@ -167,9 +170,6 @@
|
||||
* from first cylinder group data blocks. These blocks have to be
|
||||
* read in from fs_csaddr (size fs_cssize) in addition to the
|
||||
* super block.
|
||||
*
|
||||
* N.B. sizeof(struct csum) must be a power of two in order for
|
||||
* the ``fs_cs'' macro to work (see below).
|
||||
*/
|
||||
struct csum {
|
||||
int32_t cs_ndir; /* number of directories */
|
||||
@ -213,8 +213,8 @@ struct fs {
|
||||
int32_t fs_fragshift; /* block to frag shift */
|
||||
int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */
|
||||
int32_t fs_sbsize; /* actual size of super block */
|
||||
int32_t fs_csmask; /* csum block offset */
|
||||
int32_t fs_csshift; /* csum block number */
|
||||
int32_t fs_csmask; /* csum block offset (now unused) */
|
||||
int32_t fs_csshift; /* csum block number (now unused) */
|
||||
int32_t fs_nindir; /* value of NINDIR */
|
||||
int32_t fs_inopb; /* value of INOPB */
|
||||
int32_t fs_nspf; /* value of NSPF */
|
||||
@ -250,8 +250,9 @@ struct fs {
|
||||
u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */
|
||||
/* these fields retain the current block allocation info */
|
||||
int32_t fs_cgrotor; /* last cg searched */
|
||||
struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */
|
||||
int32_t *fs_maxcluster; /* max cluster in each cyl group */
|
||||
void *fs_ocsp[NOCSPTRS]; /* padding; was list of fs_cs buffers */
|
||||
struct csum *fs_csp; /* cg summary info buffer for fs_cs */
|
||||
int32_t *fs_maxcluster; /* max cluster in each cyl group */
|
||||
int32_t fs_cpc; /* cyl per cycle in postbl */
|
||||
int16_t fs_opostbl[16][8]; /* old rotation block list head */
|
||||
int32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */
|
||||
@ -328,11 +329,8 @@ struct fs {
|
||||
|
||||
/*
|
||||
* Convert cylinder group to base address of its global summary info.
|
||||
*
|
||||
* N.B. This macro assumes that sizeof(struct csum) is a power of two.
|
||||
*/
|
||||
#define fs_cs(fs, indx) \
|
||||
fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask]
|
||||
#define fs_cs(fs, indx) fs_csp[indx]
|
||||
|
||||
/*
|
||||
* Cylinder group block for a file system.
|
||||
|
Loading…
x
Reference in New Issue
Block a user