After a crash, a file that extends into indirect blocks may end up

shorter than its size resulting in a hole as its final block (which
is a violation of the invarients of the UFS filesystem).

Soft updates will always ensure that the file size is correct when
writing inodes to disk for files that contain only direct block
pointers. However soft updates does not roll back sizes for files
with indirect blocks that it has set to unallocated because their
contents have not yet been written to disk. Hence, the file can
appear to have a hole at its end because the block pointer has been
rolled back to zero when its inode was written to disk. Thus,
fsck_ffs calculates the last allocated block in the file. For files
that extend into indirect blocks, fsck_ffs checks for a size past
the last allocated block of the file and if that is found, shortens
the file to reference the last allocated block thus avoiding having
it reference a hole at its end.

Submitted by: Chuck Silvers <chs@netflix.com>
Tested by:    Chuck Silvers <chs@netflix.com>
MFC after:    1 week
Sponsored by: Netflix
This commit is contained in:
Kirk McKusick 2019-02-25 21:58:19 +00:00
parent 2528b7e2cb
commit ac4b20a0a7
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=344552
7 changed files with 100 additions and 1 deletions

View File

@ -232,6 +232,7 @@ struct inodesc {
ufs_lbn_t id_lbn; /* logical block number of current block */
ufs2_daddr_t id_blkno; /* current block number being examined */
int id_numfrags; /* number of frags contained in block */
ufs_lbn_t id_lballoc; /* pass1: last LBN that is allocated */
off_t id_filesize; /* for DATA nodes, the size of the directory */
ufs2_daddr_t id_entryno;/* for DATA nodes, current entry number */
int id_loc; /* for DATA nodes, current location in dir */
@ -291,6 +292,7 @@ extern long countdirs; /* number of directories we actually found */
#define MIBSIZE 3 /* size of fsck sysctl MIBs */
extern int adjrefcnt[MIBSIZE]; /* MIB command to adjust inode reference cnt */
extern int adjblkcnt[MIBSIZE]; /* MIB command to adjust inode block count */
extern int setsize[MIBSIZE]; /* MIB command to set inode size */
extern int adjndir[MIBSIZE]; /* MIB command to adjust number of directories */
extern int adjnbfree[MIBSIZE]; /* MIB command to adjust number of free blocks */
extern int adjnifree[MIBSIZE]; /* MIB command to adjust number of free inodes */

View File

@ -63,6 +63,7 @@ unsigned long numdirs, listmax;
long countdirs; /* number of directories we actually found */
int adjrefcnt[MIBSIZE]; /* MIB command to adjust inode reference cnt */
int adjblkcnt[MIBSIZE]; /* MIB command to adjust inode block count */
int setsize[MIBSIZE]; /* MIB command to set inode size */
int adjndir[MIBSIZE]; /* MIB command to adjust number of directories */
int adjnbfree[MIBSIZE]; /* MIB command to adjust number of free blocks */
int adjnifree[MIBSIZE]; /* MIB command to adjust number of free inodes */
@ -131,6 +132,7 @@ fsckinit(void)
countdirs = 0;
bzero(adjrefcnt, sizeof(int) * MIBSIZE);
bzero(adjblkcnt, sizeof(int) * MIBSIZE);
bzero(setsize, sizeof(int) * MIBSIZE);
bzero(adjndir, sizeof(int) * MIBSIZE);
bzero(adjnbfree, sizeof(int) * MIBSIZE);
bzero(adjnifree, sizeof(int) * MIBSIZE);

View File

@ -247,6 +247,7 @@ checkinode(ino_t inumber, struct inodesc *idesc, int rebuildcg)
off_t kernmaxfilesize;
ufs2_daddr_t ndb;
mode_t mode;
uintmax_t fixsize;
int j, ret, offset;
if ((dp = getnextinode(inumber, rebuildcg)) == NULL)
@ -377,6 +378,7 @@ checkinode(ino_t inumber, struct inodesc *idesc, int rebuildcg)
idesc->id_type = SNAP;
else
idesc->id_type = ADDR;
idesc->id_lballoc = -1;
(void)ckinode(dp, idesc);
if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) {
idesc->id_type = ADDR;
@ -422,6 +424,46 @@ checkinode(ino_t inumber, struct inodesc *idesc, int rebuildcg)
rwerror("ADJUST INODE BLOCK COUNT", cmd.value);
}
}
/*
* Soft updates will always ensure that the file size is correct
* for files that contain only direct block pointers. However
* soft updates does not roll back sizes for files with indirect
* blocks that it has set to unallocated because their contents
* have not yet been written to disk. Hence, the file can appear
* to have a hole at its end because the block pointer has been
* rolled back to zero. Thus, id_lballoc tracks the last allocated
* block in the file. Here, for files that extend into indirect
* blocks, we check for a size past the last allocated block of
* the file and if that is found, shorten the file to reference
* the last allocated block to avoid having it reference a hole
* at its end.
*/
if (DIP(dp, di_size) > UFS_NDADDR * sblock.fs_bsize &&
idesc->id_lballoc != lblkno(&sblock, DIP(dp, di_size) - 1)) {
fixsize = lblktosize(&sblock, idesc->id_lballoc + 1);
pwarn("INODE %lu: FILE SIZE %ju BEYOND END OF ALLOCATED FILE, "
"SIZE SHOULD BE %ju", (u_long)inumber,
(uintmax_t)DIP(dp, di_size), fixsize);
if (preen)
printf(" (ADJUSTED)\n");
else if (reply("ADJUST") == 0)
return (1);
if (bkgrdflag == 0) {
dp = ginode(inumber);
DIP_SET(dp, di_size, fixsize);
inodirty(dp);
} else {
cmd.value = idesc->id_number;
cmd.size = fixsize;
if (debug)
printf("setsize ino %ju size set to %ju\n",
(uintmax_t)cmd.value, (uintmax_t)cmd.size);
if (sysctl(setsize, MIBSIZE, 0, 0,
&cmd, sizeof cmd) == -1)
rwerror("SET INODE SIZE", cmd.value);
}
}
return (1);
unknown:
pfatal("UNKNOWN FILE TYPE I=%lu", (u_long)inumber);
@ -523,5 +565,7 @@ pass1check(struct inodesc *idesc)
*/
idesc->id_entryno++;
}
if (idesc->id_lballoc == -1 || idesc->id_lballoc < idesc->id_lbn)
idesc->id_lballoc = idesc->id_lbn;
return (res);
}

View File

@ -140,6 +140,7 @@ setup(char *dev)
size = MIBSIZE;
if (sysctlnametomib("vfs.ffs.adjrefcnt", adjrefcnt, &size) < 0||
sysctlnametomib("vfs.ffs.adjblkcnt", adjblkcnt, &size) < 0||
sysctlnametomib("vfs.ffs.setsize", setsize, &size) < 0 ||
sysctlnametomib("vfs.ffs.freefiles", freefiles, &size) < 0||
sysctlnametomib("vfs.ffs.freedirs", freedirs, &size) < 0 ||
sysctlnametomib("vfs.ffs.freeblks", freeblks, &size) < 0) {

View File

@ -157,6 +157,7 @@ CMDFUNC(chctime); /* Change ctime */
CMDFUNC(chatime); /* Change atime */
CMDFUNC(chinum); /* Change inode # of dirent */
CMDFUNC(chname); /* Change dirname of dirent */
CMDFUNC(chsize); /* Change size */
struct cmdtable cmds[] = {
{ "help", "Print out help", 1, 1, FL_RO, helpfn },
@ -186,6 +187,7 @@ struct cmdtable cmds[] = {
{ "chgrp", "Change group of current inode to GROUP", 2, 2, FL_WR, chgroup },
{ "chflags", "Change flags of current inode to FLAGS", 2, 2, FL_WR, chaflags },
{ "chgen", "Change generation number of current inode to GEN", 2, 2, FL_WR, chgen },
{ "chsize", "Change size of current inode to SIZE", 2, 2, FL_WR, chsize },
{ "btime", "Change btime of current inode to BTIME", 2, 2, FL_WR, chbtime },
{ "mtime", "Change mtime of current inode to MTIME", 2, 2, FL_WR, chmtime },
{ "ctime", "Change ctime of current inode to CTIME", 2, 2, FL_WR, chctime },
@ -1017,6 +1019,31 @@ CMDFUNCSTART(chgen)
return rval;
}
CMDFUNCSTART(chsize)
{
int rval = 1;
off_t size;
char *cp;
if (!checkactive())
return 1;
size = strtoll(argv[1], &cp, 0);
if (cp == argv[1] || *cp != '\0') {
warnx("bad size `%s'", argv[1]);
return 1;
}
if (size < 0) {
warnx("size set to negative (%jd)\n", (intmax_t)size);
return(1);
}
DIP_SET(curinode, di_size, size);
inodirty(curinode);
printactive(0);
return rval;
}
CMDFUNCSTART(linkcount)
{
int rval = 1;

View File

@ -3037,6 +3037,8 @@ ffs_fserr(fs, inum, cp)
* the count to zero will cause the inode to be freed.
* adjblkcnt(inode, amt) - adjust the number of blocks used by the
* inode by the specified amount.
* adjsize(inode, size) - set the size of the inode to the
* specified size.
* adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) -
* adjust the superblock summary.
* freedirs(inode, count) - directory inodes [inode..inode + count - 1]
@ -3078,6 +3080,9 @@ SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT,
static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR,
sysctl_ffs_fsck, "Adjust Inode Used Blocks Count");
static SYSCTL_NODE(_vfs_ffs, FFS_SET_SIZE, setsize, CTLFLAG_WR,
sysctl_ffs_fsck, "Set the inode size");
static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NDIR, adjndir, CTLFLAG_WR,
sysctl_ffs_fsck, "Adjust number of directories");
@ -3230,6 +3235,23 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
vput(vp);
break;
case FFS_SET_SIZE:
#ifdef DEBUG
if (fsckcmds) {
printf("%s: set inode %jd size to %jd\n",
mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
(intmax_t)cmd.size);
}
#endif /* DEBUG */
if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
break;
ip = VTOI(vp);
DIP_SET(ip, i_size, cmd.size);
ip->i_flag |= IN_CHANGE | IN_MODIFIED;
error = ffs_update(vp, 1);
vput(vp);
break;
case FFS_DIR_FREE:
filetype = IFDIR;
/* fall through */

View File

@ -221,7 +221,8 @@
#define FFS_UNLINK 14 /* remove a name in the filesystem */
#define FFS_SET_INODE 15 /* update an on-disk inode */
#define FFS_SET_BUFOUTPUT 16 /* set buffered writing on descriptor */
#define FFS_MAXID 16 /* number of valid ffs ids */
#define FFS_SET_SIZE 17 /* set inode size */
#define FFS_MAXID 17 /* number of valid ffs ids */
/*
* Command structure passed in to the filesystem to adjust filesystem values.