Continuing efforts to provide hardening of FFS. This change adds a

check hash to the filesystem inodes. Access attempts to files
associated with an inode with an invalid check hash will fail with
EINVAL (Invalid argument). Access is reestablished after an fsck
is run to find and validate the inodes with invalid check-hashes.
This check avoids a class of filesystem panics related to corrupted
inodes. The hash is done using crc32c.

Note this check-hash is for the inode itself and not any of its
indirect blocks. Check-hash validation may be extended to also
cover indirect block pointers, but that will be a separate (and
more costly) feature.

Check hashes are added only to UFS2 and not to UFS1 as UFS1 is
primarily used in embedded systems with small memories and low-powered
processors which need as light-weight a filesystem as possible.

Reviewed by:  kib
Tested by:    Peter Holm
Sponsored by: Netflix
This commit is contained in:
Kirk McKusick 2018-12-11 22:14:37 +00:00
parent d3cc40300e
commit 8f829a5cf0
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=341836
14 changed files with 107 additions and 8 deletions

View File

@ -90,7 +90,10 @@ gotit: switch (disk->d_ufs) {
disk->d_dp.dp2 = &((struct ufs2_dinode *)inoblock)[inum - min];
if (dp != NULL)
*dp = disk->d_dp;
return (0);
if (ffs_verify_dinode_ckhash(fs, disk->d_dp.dp2) == 0)
return (0);
ERROR(disk, "check-hash failed for inode read from disk");
return (-1);
default:
break;
}
@ -108,6 +111,8 @@ putinode(struct uufsd *disk)
ERROR(disk, "No inode block allocated");
return (-1);
}
if (disk->d_ufs == 2)
ffs_update_dinode_ckhash(fs, disk->d_dp.dp2);
if (bwrite(disk, fsbtodb(fs, ino_to_fsba(&disk->d_fs, disk->d_inomin)),
disk->d_inoblock, disk->d_fs.fs_bsize) <= 0)
return (-1);

View File

@ -116,6 +116,8 @@ int ffs_sbget(void *, struct fs **, off_t, char *,
int (*)(void *, off_t, void **, int));
int ffs_sbput(void *, struct fs *, off_t,
int (*)(void *, off_t, void *, int));
void ffs_update_dinode_ckhash(struct fs *, struct ufs2_dinode *);
int ffs_verify_dinode_ckhash(struct fs *, struct ufs2_dinode *);
/*
* Request standard superblock location in ffs_sbget

View File

@ -286,6 +286,7 @@ union dinode *
ginode(ino_t inumber)
{
ufs2_daddr_t iblk;
union dinode *dp;
if (inumber < UFS_ROOTINO || inumber > maxino)
errx(EEXIT, "bad inode number %ju to ginode",
@ -301,7 +302,17 @@ ginode(ino_t inumber)
if (sblock.fs_magic == FS_UFS1_MAGIC)
return ((union dinode *)
&pbp->b_un.b_dinode1[inumber % INOPB(&sblock)]);
return ((union dinode *)&pbp->b_un.b_dinode2[inumber % INOPB(&sblock)]);
dp = (union dinode *)&pbp->b_un.b_dinode2[inumber % INOPB(&sblock)];
if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) {
pwarn("INODE CHECK-HASH FAILED");
prtinode(inumber, dp);
if (preen || reply("FIX") != 0) {
if (preen)
printf(" (FIXED)\n");
inodirty(dp);
}
}
return (dp);
}
/*
@ -347,6 +358,21 @@ getnextinode(ino_t inumber, int rebuildcg)
nextinop += sizeof(struct ufs1_dinode);
else
nextinop += sizeof(struct ufs2_dinode);
if ((ckhashadd & CK_INODE) != 0) {
ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp);
dirty(&inobuf);
}
if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) {
pwarn("INODE CHECK-HASH FAILED");
prtinode(inumber, dp);
if (preen || reply("FIX") != 0) {
if (preen)
printf(" (FIXED)\n");
ffs_update_dinode_ckhash(&sblock,
(struct ufs2_dinode *)dp);
dirty(&inobuf);
}
}
if (rebuildcg && (char *)dp == inobuf.b_un.b_buf) {
/*
* Try to determine if we have reached the end of the
@ -522,6 +548,8 @@ void
inodirty(union dinode *dp)
{
if (sblock.fs_magic == FS_UFS2_MAGIC)
ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp);
dirty(pbp);
}

View File

@ -468,13 +468,13 @@ checkfilesys(char *filesys)
ckhashadd |= CK_SUPERBLOCK;
sblock.fs_metackhash |= CK_SUPERBLOCK;
}
#ifdef notyet
if ((sblock.fs_metackhash & CK_INODE) == 0 &&
getosreldate() >= P_OSREL_CK_INODE &&
reply("ADD INODE CHECK-HASH PROTECTION") != 0) {
ckhashadd |= CK_INODE;
sblock.fs_metackhash |= CK_INODE;
}
#ifdef notyet
if ((sblock.fs_metackhash & CK_INDIR) == 0 &&
getosreldate() >= P_OSREL_CK_INDIR &&
reply("ADD INDIRECT BLOCK CHECK-HASH PROTECTION") != 0) {

View File

@ -202,6 +202,7 @@ fsirand(char *device)
dp1++;
} else {
dp2->di_gen = arc4random();
ffs_update_dinode_ckhash(sblock, dp2);
dp2++;
}
}

View File

@ -499,6 +499,8 @@ mkfs(struct partition *pp, char *fsys)
sblock.fs_metackhash |= CK_CYLGRP;
if (getosreldate() >= P_OSREL_CK_SUPERBLOCK)
sblock.fs_metackhash |= CK_SUPERBLOCK;
if (getosreldate() >= P_OSREL_CK_INODE)
sblock.fs_metackhash |= CK_INODE;
}
/*

View File

@ -60,7 +60,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 1300004 /* Master, propagated to newvers */
#define __FreeBSD_version 1300005 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
@ -89,6 +89,7 @@
#define P_OSREL_CK_CYLGRP 1200046
#define P_OSREL_VMTOTAL64 1200054
#define P_OSREL_CK_SUPERBLOCK 1300000
#define P_OSREL_CK_INODE 1300005
#define P_OSREL_MAJOR(x) ((x) / 100000)
#endif

View File

@ -108,6 +108,8 @@ void ffs_sync_snap(struct mount *, int);
int ffs_syncvnode(struct vnode *vp, int waitfor, int flags);
int ffs_truncate(struct vnode *, off_t, int, struct ucred *);
int ffs_update(struct vnode *, int);
void ffs_update_dinode_ckhash(struct fs *, struct ufs2_dinode *);
int ffs_verify_dinode_ckhash(struct fs *, struct ufs2_dinode *);
int ffs_valloc(struct vnode *, int, struct ucred *, struct vnode **);
int ffs_vfree(struct vnode *, ino_t, int);
vfs_vget_t ffs_vget;

View File

@ -154,6 +154,7 @@ ffs_update(vp, waitfor)
*/
random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME);
} else {
ffs_update_dinode_ckhash(fs, ip->i_din2);
*((struct ufs2_dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
/*

View File

@ -1340,6 +1340,8 @@ expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
bzero(&dip->di_db[0], (UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t));
if (clearmode || cancelip->i_effnlink == 0)
dip->di_mode = 0;
else
ffs_update_dinode_ckhash(fs, dip);
bdwrite(bp);
/*
* Now go through and expunge all the blocks in the file

View File

@ -6702,6 +6702,7 @@ softdep_journal_freeblocks(ip, cred, length, flags)
*((struct ufs1_dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
} else {
ffs_update_dinode_ckhash(fs, ip->i_din2);
*((struct ufs2_dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
}
@ -6957,6 +6958,7 @@ softdep_setup_freeblocks(ip, length, flags)
dp2 = ((struct ufs2_dinode *)bp->b_data +
ino_to_fsbo(fs, ip->i_number));
ip->i_din2->di_freelink = dp2->di_freelink;
ffs_update_dinode_ckhash(fs, ip->i_din2);
*dp2 = *ip->i_din2;
}
/*
@ -9752,6 +9754,7 @@ clear_unlinked_inodedep(inodedep)
dip = (struct ufs2_dinode *)bp->b_data +
ino_to_fsbo(fs, pino);
dip->di_freelink = nino;
ffs_update_dinode_ckhash(fs, dip);
}
/*
* If the bwrite fails we have no recourse to recover. The
@ -10392,6 +10395,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
inon = TAILQ_NEXT(inodedep, id_unlinked);
dp->di_freelink = inon ? inon->id_ino : 0;
ffs_update_dinode_ckhash(fs, dp);
}
/*
* If the bitmap is not yet written, then the allocated
@ -10553,6 +10557,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
#endif /* INVARIANTS */
dp->di_ib[i] = 0;
}
ffs_update_dinode_ckhash(fs, dp);
return;
}
/*
@ -10581,6 +10586,7 @@ initiate_write_inodeblock_ufs2(inodedep, bp)
*/
for (; adp; adp = TAILQ_NEXT(adp, ad_next))
dp->di_ib[adp->ad_offset - UFS_NDADDR] = 0;
ffs_update_dinode_ckhash(fs, dp);
}
/*
@ -11619,8 +11625,11 @@ handle_written_inodeblock(inodedep, bp, flags)
* marked dirty so that its will eventually get written back in
* its correct form.
*/
if (hadchanges)
if (hadchanges) {
if (fstype == UFS2)
ffs_update_dinode_ckhash(inodedep->id_fs, dp2);
bdirty(bp);
}
bufwait:
/*
* If the write did not succeed, we have done all the roll-forward

View File

@ -119,6 +119,7 @@ ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino)
{
struct ufs1_dinode *dip1;
struct ufs2_dinode *dip2;
int error;
if (I_IS_UFS1(ip)) {
dip1 = ip->i_din1;
@ -142,10 +143,54 @@ ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino)
ip->i_gen = dip2->di_gen;
ip->i_uid = dip2->di_uid;
ip->i_gid = dip2->di_gid;
return (0);
if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0)
printf("Inode %jd: check-hash failed\n", (intmax_t)ino);
return (error);
}
#endif /* _KERNEL */
/*
* Verify an inode check-hash.
*/
int
ffs_verify_dinode_ckhash(struct fs *fs, struct ufs2_dinode *dip)
{
uint32_t save_ckhash;
/*
* Return success if unallocated or we are not doing inode check-hash.
*/
if (dip->di_mode == 0 || (fs->fs_metackhash & CK_INODE) == 0)
return (0);
/*
* Exclude di_ckhash from the crc32 calculation, e.g., always use
* a check-hash value of zero when calculating the check-hash.
*/
save_ckhash = dip->di_ckhash;
dip->di_ckhash = 0;
if (save_ckhash != calculate_crc32c(~0L, (void *)dip, sizeof(*dip)))
return (EINVAL);
dip->di_ckhash = save_ckhash;
return (0);
}
/*
* Update an inode check-hash.
*/
void
ffs_update_dinode_ckhash(struct fs *fs, struct ufs2_dinode *dip)
{
if (dip->di_mode == 0 || (fs->fs_metackhash & CK_INODE) == 0)
return;
/*
* Exclude old di_ckhash from the crc32 calculation, e.g., always use
* a check-hash value of zero when calculating the new check-hash.
*/
dip->di_ckhash = 0;
dip->di_ckhash = calculate_crc32c(~0L, (void *)dip, sizeof(*dip));
}
/*
* These are the low-level functions that actually read and write
* the superblock and its associated data.

View File

@ -817,7 +817,7 @@ ffs_mountfs(devvp, mp, td)
if ((error = ffs_sbget(devvp, &fs, loc, M_UFSMNT, ffs_use_bread)) != 0)
goto out;
/* none of these types of check-hashes are maintained by this kernel */
fs->fs_metackhash &= ~(CK_INODE | CK_INDIR | CK_DIR);
fs->fs_metackhash &= ~(CK_INDIR | CK_DIR);
/* no support for any undefined flags */
fs->fs_flags &= FS_SUPPORTED;
fs->fs_flags &= ~FS_UNCLEAN;

View File

@ -149,7 +149,8 @@ struct ufs2_dinode {
ufs2_daddr_t di_ib[UFS_NIADDR]; /* 208: Indirect disk blocks. */
u_int64_t di_modrev; /* 232: i_modrev for NFSv4 */
uint32_t di_freelink; /* 240: SUJ: Next unlinked inode. */
uint32_t di_spare[3]; /* 244: Reserved; currently unused */
uint32_t di_ckhash; /* 244: if CK_INODE, its check-hash */
uint32_t di_spare[2]; /* 248: Reserved; currently unused */
};
/*