freebsd-dev/sbin/fsck_ffs/main.c

823 lines
22 KiB
C
Raw Normal View History

/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1980, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if 0
#ifndef lint
static const char copyright[] =
"@(#) Copyright (c) 1980, 1986, 1993\n\
The Regents of the University of California. All rights reserved.\n";
#endif /* not lint */
#ifndef lint
static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95";
#endif /* not lint */
#endif
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
2018-02-08 23:14:24 +00:00
#define IN_RTLD /* So we pickup the P_OSREL defines */
#include <sys/param.h>
#include <sys/file.h>
#include <sys/mount.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/uio.h>
This commit adds basic support for the UFS2 filesystem. The UFS2 filesystem expands the inode to 256 bytes to make space for 64-bit block pointers. It also adds a file-creation time field, an ability to use jumbo blocks per inode to allow extent like pointer density, and space for extended attributes (up to twice the filesystem block size worth of attributes, e.g., on a 16K filesystem, there is space for 32K of attributes). UFS2 fully supports and runs existing UFS1 filesystems. New filesystems built using newfs can be built in either UFS1 or UFS2 format using the -O option. In this commit UFS1 is the default format, so if you want to build UFS2 format filesystems, you must specify -O 2. This default will be changed to UFS2 when UFS2 proves itself to be stable. In this commit the boot code for reading UFS2 filesystems is not compiled (see /sys/boot/common/ufsread.c) as there is insufficient space in the boot block. Once the size of the boot block is increased, this code can be defined. Things to note: the definition of SBSIZE has changed to SBLOCKSIZE. The header file <ufs/ufs/dinode.h> must be included before <ufs/ffs/fs.h> so as to get the definitions of ufs2_daddr_t and ufs_lbn_t. Still TODO: Verify that the first level bootstraps work for all the architectures. Convert the utility ffsinfo to understand UFS2 and test growfs. Add support for the extended attribute storage. Update soft updates to ensure integrity of extended attribute storage. Switch the current extended attribute interfaces to use the extended attribute storage. Add the extent like functionality (framework is there, but is currently never used). Sponsored by: DARPA & NAI Labs. Reviewed by: Poul-Henning Kamp <phk@freebsd.org>
2002-06-21 06:18:05 +00:00
#include <sys/disklabel.h>
#include <ufs/ufs/dinode.h>
#include <ufs/ffs/fs.h>
#include <err.h>
#include <errno.h>
#include <fstab.h>
#include <grp.h>
#include <inttypes.h>
#include <libufs.h>
#include <mntopts.h>
#include <paths.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include "fsck.h"
Rewrite the disk I/O management system in fsck_ffs(8). Other than making fsck_ffs(8) run faster, there should be no functional change. The original fsck_ffs(8) had its own disk I/O management system. When gjournal(8) was added to FreeBSD 7, code was added to fsck_ffs(8) to do the necessary gjournal rollback. Rather than use the existing fsck_ffs(8) disk I/O system, it wrote its own from scratch. Similarly when journalled soft updates were added in FreeBSD 9, code was added to fsck_ffs(8) to do the necessary journal rollback. And once again, rather than using either of the existing fsck_ffs(8) disk I/O systems, it wrote its own from scratch. Lastly the fsdb(8) utility uses the fsck_ffs(8) disk I/O management system. In preparation for making the changes necessary to enable snapshots to be taken when using journalled soft updates, it was necessary to have a single disk I/O system used by all the various subsystems in fsck_ffs(8). This commit merges the functionality required by all the different subsystems into a single disk I/O system that supports all of their needs. In so doing it picks up optimizations from each of them with the results that each of the subsystems does fewer reads and writes than it did with its own customized I/O system. It also greatly simplifies making changes to fsck_ffs(8) since everything goes through a single place. For example the ginode() function fetches an inode from the disk. When inode check hashes were added, they previously had to be checked in the code implementing inode fetch in each of the three different disk I/O systems. Now they need only be checked in ginode(). Tested by: Peter Holm Sponsored by: Netflix
2021-01-07 01:37:08 +00:00
static int restarts;
static void usage(void) __dead2;
static intmax_t argtoimax(int flag, const char *req, const char *str, int base);
static int checkfilesys(char *filesys);
static int setup_bkgrdchk(struct statfs *mntp, int sbrdfailed, char **filesys);
static int chkdoreload(struct statfs *mntp);
static struct statfs *getmntpt(const char *);
int
main(int argc, char *argv[])
{
int ch;
struct rlimit rlimit;
struct itimerval itimerval;
int fsret;
int ret = 0;
sync();
skipclean = 1;
inoopt = 0;
This update eliminates a kernel stack disclosure bug in UFS/FFS directory entries that is caused by uninitialized directory entry padding written to the disk. It can be viewed by any user with read access to that directory. Up to 3 bytes of kernel stack are disclosed per file entry, depending on the the amount of padding the kernel needs to pad out the entry to a 32 bit boundry. The offset in the kernel stack that is disclosed is a function of the filename size. Furthermore, if the user can create files in a directory, this 3 byte window can be expanded 3 bytes at a time to a 254 byte window with 75% of the data in that window exposed. The additional exposure is done by removing the entry, creating a new entry with a 4-byte longer name, extracting 3 more bytes by reading the directory, and repeating until a 252 byte name is created. This exploit works in part because the area of the kernel stack that is being disclosed is in an area that typically doesn't change that often (perhaps a few times a second on a lightly loaded system), and these file creates and unlinks themselves don't overwrite the area of kernel stack being disclosed. It appears that this bug originated with the creation of the Fast File System in 4.1b-BSD (Circa 1982, more than 36 years ago!), and is likely present in every Unix or Unix-like system that uses UFS/FFS. Amazingly, nobody noticed until now. This update also adds the -z flag to fsck_ffs to have it scrub the leaked information in the name padding of existing directories. It only needs to be run once on each UFS/FFS filesystem after a patched kernel is installed and running. Submitted by: David G. Lawrence <dg@dglawrence.com> Reviewed by: kib MFC after: 1 week
2019-05-03 21:54:14 +00:00
while ((ch = getopt(argc, argv, "b:Bc:CdEfFm:npRrSyZz")) != -1) {
switch (ch) {
case 'b':
skipclean = 0;
bflag = argtoimax('b', "number", optarg, 10);
printf("Alternate super block location: %jd\n", bflag);
break;
case 'B':
bkgrdflag = 1;
break;
case 'c':
skipclean = 0;
cvtlevel = argtoimax('c', "conversion level", optarg,
10);
This commit adds basic support for the UFS2 filesystem. The UFS2 filesystem expands the inode to 256 bytes to make space for 64-bit block pointers. It also adds a file-creation time field, an ability to use jumbo blocks per inode to allow extent like pointer density, and space for extended attributes (up to twice the filesystem block size worth of attributes, e.g., on a 16K filesystem, there is space for 32K of attributes). UFS2 fully supports and runs existing UFS1 filesystems. New filesystems built using newfs can be built in either UFS1 or UFS2 format using the -O option. In this commit UFS1 is the default format, so if you want to build UFS2 format filesystems, you must specify -O 2. This default will be changed to UFS2 when UFS2 proves itself to be stable. In this commit the boot code for reading UFS2 filesystems is not compiled (see /sys/boot/common/ufsread.c) as there is insufficient space in the boot block. Once the size of the boot block is increased, this code can be defined. Things to note: the definition of SBSIZE has changed to SBLOCKSIZE. The header file <ufs/ufs/dinode.h> must be included before <ufs/ffs/fs.h> so as to get the definitions of ufs2_daddr_t and ufs_lbn_t. Still TODO: Verify that the first level bootstraps work for all the architectures. Convert the utility ffsinfo to understand UFS2 and test growfs. Add support for the extended attribute storage. Update soft updates to ensure integrity of extended attribute storage. Switch the current extended attribute interfaces to use the extended attribute storage. Add the extent like functionality (framework is there, but is currently never used). Sponsored by: DARPA & NAI Labs. Reviewed by: Poul-Henning Kamp <phk@freebsd.org>
2002-06-21 06:18:05 +00:00
if (cvtlevel < 3)
errx(EEXIT, "cannot do level %d conversion",
cvtlevel);
break;
1995-05-30 06:12:45 +00:00
case 'd':
debug++;
break;
case 'E':
Eflag++;
break;
case 'f':
skipclean = 0;
break;
case 'F':
bkgrdcheck = 1;
break;
case 'm':
lfmode = argtoimax('m', "mode", optarg, 8);
if (lfmode &~ 07777)
errx(EEXIT, "bad mode to -m: %o", lfmode);
printf("** lost+found creation mode %o\n", lfmode);
break;
case 'n':
nflag++;
yflag = 0;
break;
case 'p':
preen++;
/*FALLTHROUGH*/
case 'C':
ckclean++;
break;
case 'R':
wantrestart = 1;
break;
case 'r':
inoopt++;
break;
case 'S':
surrender = 1;
break;
case 'y':
yflag++;
nflag = 0;
break;
case 'Z':
Zflag++;
break;
This update eliminates a kernel stack disclosure bug in UFS/FFS directory entries that is caused by uninitialized directory entry padding written to the disk. It can be viewed by any user with read access to that directory. Up to 3 bytes of kernel stack are disclosed per file entry, depending on the the amount of padding the kernel needs to pad out the entry to a 32 bit boundry. The offset in the kernel stack that is disclosed is a function of the filename size. Furthermore, if the user can create files in a directory, this 3 byte window can be expanded 3 bytes at a time to a 254 byte window with 75% of the data in that window exposed. The additional exposure is done by removing the entry, creating a new entry with a 4-byte longer name, extracting 3 more bytes by reading the directory, and repeating until a 252 byte name is created. This exploit works in part because the area of the kernel stack that is being disclosed is in an area that typically doesn't change that often (perhaps a few times a second on a lightly loaded system), and these file creates and unlinks themselves don't overwrite the area of kernel stack being disclosed. It appears that this bug originated with the creation of the Fast File System in 4.1b-BSD (Circa 1982, more than 36 years ago!), and is likely present in every Unix or Unix-like system that uses UFS/FFS. Amazingly, nobody noticed until now. This update also adds the -z flag to fsck_ffs to have it scrub the leaked information in the name padding of existing directories. It only needs to be run once on each UFS/FFS filesystem after a patched kernel is installed and running. Submitted by: David G. Lawrence <dg@dglawrence.com> Reviewed by: kib MFC after: 1 week
2019-05-03 21:54:14 +00:00
case 'z':
zflag++;
break;
default:
usage();
}
}
argc -= optind;
argv += optind;
if (!argc)
usage();
if (bkgrdflag && cvtlevel > 0) {
pfatal("CANNOT CONVERT A SNAPSHOT\n");
exit(EEXIT);
}
if (signal(SIGINT, SIG_IGN) != SIG_IGN)
(void)signal(SIGINT, catch);
if (ckclean)
(void)signal(SIGQUIT, catchquit);
signal(SIGINFO, infohandler);
if (bkgrdflag) {
signal(SIGALRM, alarmhandler);
itimerval.it_interval.tv_sec = 5;
itimerval.it_interval.tv_usec = 0;
itimerval.it_value.tv_sec = 5;
itimerval.it_value.tv_usec = 0;
setitimer(ITIMER_REAL, &itimerval, NULL);
}
/*
* Push up our allowed memory limit so we can cope
* with huge file systems.
*/
if (getrlimit(RLIMIT_DATA, &rlimit) == 0) {
rlimit.rlim_cur = rlimit.rlim_max;
(void)setrlimit(RLIMIT_DATA, &rlimit);
}
while (argc > 0) {
if ((fsret = checkfilesys(*argv)) == ERESTART)
continue;
ret |= fsret;
argc--;
argv++;
}
if (returntosingle)
ret = 2;
exit(ret);
}
static intmax_t
argtoimax(int flag, const char *req, const char *str, int base)
{
char *cp;
intmax_t ret;
ret = strtoimax(str, &cp, base);
if (cp == str || *cp)
errx(EEXIT, "-%c flag requires a %s", flag, req);
return (ret);
}
/*
* Check the specified file system.
*/
/* ARGSUSED */
static int
checkfilesys(char *filesys)
{
This commit adds basic support for the UFS2 filesystem. The UFS2 filesystem expands the inode to 256 bytes to make space for 64-bit block pointers. It also adds a file-creation time field, an ability to use jumbo blocks per inode to allow extent like pointer density, and space for extended attributes (up to twice the filesystem block size worth of attributes, e.g., on a 16K filesystem, there is space for 32K of attributes). UFS2 fully supports and runs existing UFS1 filesystems. New filesystems built using newfs can be built in either UFS1 or UFS2 format using the -O option. In this commit UFS1 is the default format, so if you want to build UFS2 format filesystems, you must specify -O 2. This default will be changed to UFS2 when UFS2 proves itself to be stable. In this commit the boot code for reading UFS2 filesystems is not compiled (see /sys/boot/common/ufsread.c) as there is insufficient space in the boot block. Once the size of the boot block is increased, this code can be defined. Things to note: the definition of SBSIZE has changed to SBLOCKSIZE. The header file <ufs/ufs/dinode.h> must be included before <ufs/ffs/fs.h> so as to get the definitions of ufs2_daddr_t and ufs_lbn_t. Still TODO: Verify that the first level bootstraps work for all the architectures. Convert the utility ffsinfo to understand UFS2 and test growfs. Add support for the extended attribute storage. Update soft updates to ensure integrity of extended attribute storage. Switch the current extended attribute interfaces to use the extended attribute storage. Add the extent like functionality (framework is there, but is currently never used). Sponsored by: DARPA & NAI Labs. Reviewed by: Poul-Henning Kamp <phk@freebsd.org>
2002-06-21 06:18:05 +00:00
ufs2_daddr_t n_ffree, n_bfree;
struct dups *dp;
struct statfs *mntp;
intmax_t blks, files;
size_t size;
int sbreadfailed, ofsmodified;
fsutilinit();
fsckinit();
cdevname = filesys;
if (debug && ckclean)
pwarn("starting\n");
/*
* Make best effort to get the disk name. Check first to see
* if it is listed among the mounted file systems. Failing that
* check to see if it is listed in /etc/fstab.
*/
mntp = getmntpt(filesys);
if (mntp != NULL)
filesys = mntp->f_mntfromname;
else
filesys = blockcheck(filesys);
/*
* If -F flag specified, check to see whether a background check
* is possible and needed. If possible and needed, exit with
* status zero. Otherwise exit with status non-zero. A non-zero
* exit status will cause a foreground check to be run.
*/
sblock_init();
sbreadfailed = 0;
Move the ability to search for alternate UFS superblocks from fsck_ffs(8) into ffs_sbsearch() to allow use by other parts of the system. Historically only fsck_ffs(8), the UFS filesystem checker, had code to track down and use alternate UFS superblocks. Since fsdb(8) used much of the fsck_ffs(8) implementation it had some ability to track down alternate superblocks. This change extracts the code to track down alternate superblocks from fsck_ffs(8) and puts it into a new function ffs_sbsearch() in sys/ufs/ffs/ffs_subr.c. Like ffs_sbget() and ffs_sbput() also found in ffs_subr.c, these functions can be used directly by the kernel subsystems. Additionally they are exported to the UFS library, libufs(8) so that they can be used by user-level programs. The new functions added to libufs(8) are sbfind(3) that is an alternative to sbread(3) and sbsearch(3) that is an alternative to sbget(3). See their manual pages for further details. The utilities that have been changed to search for superblocks are dumpfs(8), fsdb(8), ffsinfo(8), and fsck_ffs(8). Also, the prtblknos(8) tool found in tools/diag/prtblknos searches for superblocks. The UFS specific mount code uses the superblock search interface when mounting the root filesystem and when the administrator doing a mount(8) command specifies the force flag (-f). The standalone UFS boot code (found in stand/libsa/ufs.c) uses the superblock search code in the hope of being able to get the system up and running so that fsck_ffs(8) can be used to get the filesystem cleaned up. The following utilities have not been changed to search for superblocks: clri(8), tunefs(8), snapinfo(8), fstyp(8), quot(8), dump(8), fsirand(8), growfs(8), quotacheck(8), gjournal(8), and glabel(8). When these utilities fail, they do report the cause of the failure. The one exception is the tasting code used to try and figure what a given disk contains. The tasting code will remain silent so as not to put out a slew of messages as it trying to taste every new mass storage device that shows up. Reviewed by: kib Reviewed by: Warner Losh Tested by: Peter Holm Differential Revision: https://reviews.freebsd.org/D36053 Sponsored by: The FreeBSD Foundation
2022-08-13 19:41:53 +00:00
if (openfilesys(filesys) == 0 || readsb() == 0)
sbreadfailed = 1;
if (bkgrdcheck) {
if (sbreadfailed)
exit(3); /* Cannot read superblock */
/* Earlier background failed or journaled */
if (sblock.fs_flags & (FS_NEEDSFSCK | FS_SUJ))
exit(4);
if ((sblock.fs_flags & FS_DOSOFTDEP) == 0)
exit(5); /* Not running soft updates */
size = MIBSIZE;
if (sysctlnametomib("vfs.ffs.adjrefcnt", adjrefcnt, &size) < 0)
exit(6); /* Lacks kernel support */
if ((mntp == NULL && sblock.fs_clean == 1) ||
(mntp != NULL && (sblock.fs_flags & FS_UNCLEAN) == 0))
exit(7); /* Filesystem clean, report it now */
exit(0);
}
if (ckclean && skipclean) {
/*
* If file system is gjournaled, check it here.
*/
if (sbreadfailed)
exit(3); /* Cannot read superblock */
if (bkgrdflag == 0 &&
(nflag || (fswritefd = open(filesys, O_WRONLY)) < 0)) {
Rewrite the disk I/O management system in fsck_ffs(8). Other than making fsck_ffs(8) run faster, there should be no functional change. The original fsck_ffs(8) had its own disk I/O management system. When gjournal(8) was added to FreeBSD 7, code was added to fsck_ffs(8) to do the necessary gjournal rollback. Rather than use the existing fsck_ffs(8) disk I/O system, it wrote its own from scratch. Similarly when journalled soft updates were added in FreeBSD 9, code was added to fsck_ffs(8) to do the necessary journal rollback. And once again, rather than using either of the existing fsck_ffs(8) disk I/O systems, it wrote its own from scratch. Lastly the fsdb(8) utility uses the fsck_ffs(8) disk I/O management system. In preparation for making the changes necessary to enable snapshots to be taken when using journalled soft updates, it was necessary to have a single disk I/O system used by all the various subsystems in fsck_ffs(8). This commit merges the functionality required by all the different subsystems into a single disk I/O system that supports all of their needs. In so doing it picks up optimizations from each of them with the results that each of the subsystems does fewer reads and writes than it did with its own customized I/O system. It also greatly simplifies making changes to fsck_ffs(8) since everything goes through a single place. For example the ginode() function fetches an inode from the disk. When inode check hashes were added, they previously had to be checked in the code implementing inode fetch in each of the three different disk I/O systems. Now they need only be checked in ginode(). Tested by: Peter Holm Sponsored by: Netflix
2021-01-07 01:37:08 +00:00
fswritefd = -1;
if (preen)
pfatal("NO WRITE ACCESS");
printf(" (NO WRITE)");
}
if ((sblock.fs_flags & FS_GJOURNAL) != 0) {
if (sblock.fs_clean == 1) {
pwarn("FILE SYSTEM CLEAN; SKIPPING CHECKS\n");
exit(0);
}
if ((sblock.fs_flags &
(FS_UNCLEAN | FS_NEEDSFSCK)) == 0) {
bufinit();
gjournal_check(filesys);
if (chkdoreload(mntp) == 0)
exit(0);
exit(4);
} else {
pfatal("FULL FSCK NEEDED, CANNOT RUN FAST "
"FSCK\n");
}
}
close(fswritefd);
fswritefd = -1;
}
if (bkgrdflag) {
switch (setup_bkgrdchk(mntp, sbreadfailed, &filesys)) {
case -1: /* filesystem clean */
goto clean;
case 0: /* cannot do background, give up */
exit(EEXIT);
case 1: /* doing background check, preen rules apply */
preen = 1;
break;
}
}
switch (setup(filesys)) {
case 0:
if (preen)
pfatal("CAN'T CHECK FILE SYSTEM.");
return (EEXIT);
case -1:
clean:
pwarn("clean, %ld free ", (long)(sblock.fs_cstotal.cs_nffree +
sblock.fs_frag * sblock.fs_cstotal.cs_nbfree));
printf("(%jd frags, %jd blocks, %.1f%% fragmentation)\n",
(intmax_t)sblock.fs_cstotal.cs_nffree,
(intmax_t)sblock.fs_cstotal.cs_nbfree,
sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize);
return (0);
}
/*
* Determine if we can and should do journal recovery.
*/
if ((sblock.fs_flags & FS_SUJ) == FS_SUJ) {
if ((sblock.fs_flags & FS_NEEDSFSCK) != FS_NEEDSFSCK && skipclean) {
Rewrite the disk I/O management system in fsck_ffs(8). Other than making fsck_ffs(8) run faster, there should be no functional change. The original fsck_ffs(8) had its own disk I/O management system. When gjournal(8) was added to FreeBSD 7, code was added to fsck_ffs(8) to do the necessary gjournal rollback. Rather than use the existing fsck_ffs(8) disk I/O system, it wrote its own from scratch. Similarly when journalled soft updates were added in FreeBSD 9, code was added to fsck_ffs(8) to do the necessary journal rollback. And once again, rather than using either of the existing fsck_ffs(8) disk I/O systems, it wrote its own from scratch. Lastly the fsdb(8) utility uses the fsck_ffs(8) disk I/O management system. In preparation for making the changes necessary to enable snapshots to be taken when using journalled soft updates, it was necessary to have a single disk I/O system used by all the various subsystems in fsck_ffs(8). This commit merges the functionality required by all the different subsystems into a single disk I/O system that supports all of their needs. In so doing it picks up optimizations from each of them with the results that each of the subsystems does fewer reads and writes than it did with its own customized I/O system. It also greatly simplifies making changes to fsck_ffs(8) since everything goes through a single place. For example the ginode() function fetches an inode from the disk. When inode check hashes were added, they previously had to be checked in the code implementing inode fetch in each of the three different disk I/O systems. Now they need only be checked in ginode(). Tested by: Peter Holm Sponsored by: Netflix
2021-01-07 01:37:08 +00:00
sujrecovery = 1;
if (suj_check(filesys) == 0) {
printf("\n***** FILE SYSTEM MARKED CLEAN *****\n");
if (chkdoreload(mntp) == 0)
exit(0);
exit(4);
}
Rewrite the disk I/O management system in fsck_ffs(8). Other than making fsck_ffs(8) run faster, there should be no functional change. The original fsck_ffs(8) had its own disk I/O management system. When gjournal(8) was added to FreeBSD 7, code was added to fsck_ffs(8) to do the necessary gjournal rollback. Rather than use the existing fsck_ffs(8) disk I/O system, it wrote its own from scratch. Similarly when journalled soft updates were added in FreeBSD 9, code was added to fsck_ffs(8) to do the necessary journal rollback. And once again, rather than using either of the existing fsck_ffs(8) disk I/O systems, it wrote its own from scratch. Lastly the fsdb(8) utility uses the fsck_ffs(8) disk I/O management system. In preparation for making the changes necessary to enable snapshots to be taken when using journalled soft updates, it was necessary to have a single disk I/O system used by all the various subsystems in fsck_ffs(8). This commit merges the functionality required by all the different subsystems into a single disk I/O system that supports all of their needs. In so doing it picks up optimizations from each of them with the results that each of the subsystems does fewer reads and writes than it did with its own customized I/O system. It also greatly simplifies making changes to fsck_ffs(8) since everything goes through a single place. For example the ginode() function fetches an inode from the disk. When inode check hashes were added, they previously had to be checked in the code implementing inode fetch in each of the three different disk I/O systems. Now they need only be checked in ginode(). Tested by: Peter Holm Sponsored by: Netflix
2021-01-07 01:37:08 +00:00
sujrecovery = 0;
printf("** Skipping journal, falling through to full fsck\n\n");
}
if (fswritefd != -1) {
/*
* Write the superblock so we don't try to recover the
* journal on another pass. If this is the only change
* to the filesystem, we do not want it to be called
* out as modified.
*/
sblock.fs_mtime = time(NULL);
sbdirty();
ofsmodified = fsmodified;
flush(fswritefd, &sblk);
fsmodified = ofsmodified;
}
}
2018-02-08 23:14:24 +00:00
/*
* If the filesystem was run on an old kernel that did not
* support check hashes, clear the check-hash flags so that
* we do not try to verify them.
*/
if ((sblock.fs_flags & FS_METACKHASH) == 0)
sblock.fs_metackhash = 0;
/*
* If we are running on a kernel that can provide check hashes
* that are not yet enabled for the filesystem and we are
* running manually without the -y flag, offer to add any
* supported check hashes that are not already enabled.
*/
ckhashadd = 0;
if (preen == 0 && yflag == 0 && sblock.fs_magic != FS_UFS1_MAGIC &&
fswritefd != -1 && getosreldate() >= P_OSREL_CK_CYLGRP) {
if ((sblock.fs_metackhash & CK_CYLGRP) == 0 &&
reply("ADD CYLINDER GROUP CHECK-HASH PROTECTION") != 0) {
2018-02-08 23:14:24 +00:00
ckhashadd |= CK_CYLGRP;
sblock.fs_metackhash |= CK_CYLGRP;
}
2018-02-08 23:14:24 +00:00
if ((sblock.fs_metackhash & CK_SUPERBLOCK) == 0 &&
getosreldate() >= P_OSREL_CK_SUPERBLOCK &&
reply("ADD SUPERBLOCK CHECK-HASH PROTECTION") != 0) {
ckhashadd |= CK_SUPERBLOCK;
sblock.fs_metackhash |= CK_SUPERBLOCK;
}
2018-02-08 23:14:24 +00:00
if ((sblock.fs_metackhash & CK_INODE) == 0 &&
getosreldate() >= P_OSREL_CK_INODE &&
reply("ADD INODE CHECK-HASH PROTECTION") != 0) {
2018-02-08 23:14:24 +00:00
ckhashadd |= CK_INODE;
sblock.fs_metackhash |= CK_INODE;
}
#ifdef notyet
2018-02-08 23:14:24 +00:00
if ((sblock.fs_metackhash & CK_INDIR) == 0 &&
getosreldate() >= P_OSREL_CK_INDIR &&
reply("ADD INDIRECT BLOCK CHECK-HASH PROTECTION") != 0) {
2018-02-08 23:14:24 +00:00
ckhashadd |= CK_INDIR;
sblock.fs_metackhash |= CK_INDIR;
}
2018-02-08 23:14:24 +00:00
if ((sblock.fs_metackhash & CK_DIR) == 0 &&
getosreldate() >= P_OSREL_CK_DIR &&
reply("ADD DIRECTORY CHECK-HASH PROTECTION") != 0) {
2018-02-08 23:14:24 +00:00
ckhashadd |= CK_DIR;
sblock.fs_metackhash |= CK_DIR;
}
2018-02-08 23:14:24 +00:00
#endif /* notyet */
if (ckhashadd != 0) {
2018-02-08 23:14:24 +00:00
sblock.fs_flags |= FS_METACKHASH;
sbdirty();
}
2018-02-08 23:14:24 +00:00
}
/*
* Cleared if any questions answered no. Used to decide if
* the superblock should be marked clean.
*/
resolved = 1;
/*
* 1: scan inodes tallying blocks used
*/
if (preen == 0) {
printf("** Last Mounted on %s\n", sblock.fs_fsmnt);
if (mntp != NULL && mntp->f_flags & MNT_ROOTFS)
printf("** Root file system\n");
printf("** Phase 1 - Check Blocks and Sizes\n");
}
clock_gettime(CLOCK_REALTIME_PRECISE, &startprog);
pass1();
IOstats("Pass1");
/*
* 1b: locate first references to duplicates, if any
*/
if (duplist) {
if (preen || usedsoftdep)
pfatal("INTERNAL ERROR: DUPS WITH %s%s%s",
preen ? "-p" : "",
(preen && usedsoftdep) ? " AND " : "",
usedsoftdep ? "SOFTUPDATES" : "");
printf("** Phase 1b - Rescan For More DUPS\n");
pass1b();
IOstats("Pass1b");
}
/*
* 2: traverse directories from root to mark all connected directories
*/
if (preen == 0)
printf("** Phase 2 - Check Pathnames\n");
pass2();
IOstats("Pass2");
/*
* 3: scan inodes looking for disconnected directories
*/
if (preen == 0)
printf("** Phase 3 - Check Connectivity\n");
pass3();
IOstats("Pass3");
/*
* 4: scan inodes looking for disconnected files; check reference counts
*/
if (preen == 0)
printf("** Phase 4 - Check Reference Counts\n");
pass4();
IOstats("Pass4");
/*
* 5: check and repair resource counts in cylinder groups
*/
if (preen == 0)
printf("** Phase 5 - Check Cyl groups\n");
pass5();
IOstats("Pass5");
/*
* print out summary statistics
*/
n_ffree = sblock.fs_cstotal.cs_nffree;
n_bfree = sblock.fs_cstotal.cs_nbfree;
files = maxino - UFS_ROOTINO - sblock.fs_cstotal.cs_nifree - n_files;
blks = n_blks +
sblock.fs_ncg * (cgdmin(&sblock, 0) - cgsblock(&sblock, 0));
blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0);
blks += howmany(sblock.fs_cssize, sblock.fs_fsize);
blks = maxfsblock - (n_ffree + sblock.fs_frag * n_bfree) - blks;
if (bkgrdflag && (files > 0 || blks > 0)) {
countdirs = sblock.fs_cstotal.cs_ndir - countdirs;
pwarn("Reclaimed: %ld directories, %jd files, %jd fragments\n",
countdirs, files - countdirs, blks);
}
pwarn("%ld files, %jd used, %ju free ",
(long)n_files, (intmax_t)n_blks,
(uintmax_t)n_ffree + sblock.fs_frag * n_bfree);
printf("(%ju frags, %ju blocks, %.1f%% fragmentation)\n",
(uintmax_t)n_ffree, (uintmax_t)n_bfree,
n_ffree * 100.0 / sblock.fs_dsize);
if (debug) {
if (files < 0)
printf("%jd inodes missing\n", -files);
if (blks < 0)
printf("%jd blocks missing\n", -blks);
if (duplist != NULL) {
printf("The following duplicate blocks remain:");
for (dp = duplist; dp; dp = dp->next)
printf(" %jd,", (intmax_t)dp->dup);
printf("\n");
}
}
duplist = (struct dups *)0;
muldup = (struct dups *)0;
inocleanup();
if (fsmodified) {
sblock.fs_time = time(NULL);
sbdirty();
}
Rewrite the disk I/O management system in fsck_ffs(8). Other than making fsck_ffs(8) run faster, there should be no functional change. The original fsck_ffs(8) had its own disk I/O management system. When gjournal(8) was added to FreeBSD 7, code was added to fsck_ffs(8) to do the necessary gjournal rollback. Rather than use the existing fsck_ffs(8) disk I/O system, it wrote its own from scratch. Similarly when journalled soft updates were added in FreeBSD 9, code was added to fsck_ffs(8) to do the necessary journal rollback. And once again, rather than using either of the existing fsck_ffs(8) disk I/O systems, it wrote its own from scratch. Lastly the fsdb(8) utility uses the fsck_ffs(8) disk I/O management system. In preparation for making the changes necessary to enable snapshots to be taken when using journalled soft updates, it was necessary to have a single disk I/O system used by all the various subsystems in fsck_ffs(8). This commit merges the functionality required by all the different subsystems into a single disk I/O system that supports all of their needs. In so doing it picks up optimizations from each of them with the results that each of the subsystems does fewer reads and writes than it did with its own customized I/O system. It also greatly simplifies making changes to fsck_ffs(8) since everything goes through a single place. For example the ginode() function fetches an inode from the disk. When inode check hashes were added, they previously had to be checked in the code implementing inode fetch in each of the three different disk I/O systems. Now they need only be checked in ginode(). Tested by: Peter Holm Sponsored by: Netflix
2021-01-07 01:37:08 +00:00
if (cvtlevel && (sblk.b_flags & B_DIRTY) != 0) {
1995-05-30 06:12:45 +00:00
/*
* Write out the duplicate super blocks
*/
if (sbput(fswritefd, &sblock, sblock.fs_ncg) == 0)
fsmodified = 1;
}
if (rerun)
resolved = 0;
/*
* Check to see if the file system is mounted read-write.
*/
if (bkgrdflag == 0 && mntp != NULL && (mntp->f_flags & MNT_RDONLY) == 0)
resolved = 0;
ckfini(resolved);
if (fsmodified && !preen)
printf("\n***** FILE SYSTEM WAS MODIFIED *****\n");
if (rerun) {
if (wantrestart && (restarts++ < 10) &&
(preen || reply("RESTART")))
return (ERESTART);
printf("\n***** PLEASE RERUN FSCK *****\n");
}
if (chkdoreload(mntp) != 0) {
if (!fsmodified)
return (0);
if (!preen)
printf("\n***** REBOOT NOW *****\n");
sync();
return (4);
}
return (rerun ? ERERUN : 0);
}
/*
* If we are to do a background check:
* Get the mount point information of the file system
* If already clean, return -1
* Check that kernel supports background fsck
* Find or create the snapshot directory
* Create the snapshot file
* Open snapshot
* If anything fails print reason and return 0 which exits
*/
static int
setup_bkgrdchk(struct statfs *mntp, int sbreadfailed, char **filesys)
{
struct stat snapdir;
struct group *grp;
struct iovec *iov;
char errmsg[255];
int iovlen;
size_t size;
/* Get the mount point information of the file system */
if (mntp == NULL) {
pwarn("NOT MOUNTED, CANNOT RUN IN BACKGROUND\n");
return (0);
}
if ((mntp->f_flags & MNT_RDONLY) != 0) {
pwarn("MOUNTED READ-ONLY, CANNOT RUN IN BACKGROUND\n");
return (0);
}
if ((mntp->f_flags & MNT_SOFTDEP) == 0) {
pwarn("NOT USING SOFT UPDATES, CANNOT RUN IN BACKGROUND\n");
return (0);
}
if (sbreadfailed) {
pwarn("SUPERBLOCK READ FAILED, CANNOT RUN IN BACKGROUND\n");
return (0);
}
if ((sblock.fs_flags & FS_NEEDSFSCK) != 0) {
pwarn("FULL FSCK NEEDED, CANNOT RUN IN BACKGROUND\n");
return (0);
}
if ((sblock.fs_flags & FS_SUJ) != 0) {
pwarn("JOURNALED FILESYSTEM, CANNOT RUN IN BACKGROUND\n");
return (0);
}
if (skipclean && ckclean &&
(sblock.fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK)) == 0) {
/*
* file system is clean;
* skip snapshot and report it clean
*/
pwarn("FILE SYSTEM CLEAN; SKIPPING CHECKS\n");
return (-1);
}
/* Check that kernel supports background fsck */
size = MIBSIZE;
if (sysctlnametomib("vfs.ffs.adjrefcnt", adjrefcnt, &size) < 0||
sysctlnametomib("vfs.ffs.adjblkcnt", adjblkcnt, &size) < 0||
sysctlnametomib("vfs.ffs.setsize", setsize, &size) < 0 ||
sysctlnametomib("vfs.ffs.freefiles", freefiles, &size) < 0||
sysctlnametomib("vfs.ffs.freedirs", freedirs, &size) < 0 ||
sysctlnametomib("vfs.ffs.freeblks", freeblks, &size) < 0) {
pwarn("KERNEL LACKS BACKGROUND FSCK SUPPORT\n");
return (0);
}
/*
* When kernel lacks runtime bgfsck superblock summary
* adjustment functionality, it does not mean we can not
* continue, as old kernels will recompute the summary at
* mount time. However, it will be an unexpected softupdates
* inconsistency if it turns out that the summary is still
* incorrect. Set a flag so subsequent operation can know this.
*/
bkgrdsumadj = 1;
if (sysctlnametomib("vfs.ffs.adjndir", adjndir, &size) < 0 ||
sysctlnametomib("vfs.ffs.adjnbfree", adjnbfree, &size) < 0 ||
sysctlnametomib("vfs.ffs.adjnifree", adjnifree, &size) < 0 ||
sysctlnametomib("vfs.ffs.adjnffree", adjnffree, &size) < 0 ||
sysctlnametomib("vfs.ffs.adjnumclusters", adjnumclusters,
&size) < 0) {
bkgrdsumadj = 0;
pwarn("KERNEL LACKS RUNTIME SUPERBLOCK SUMMARY ADJUSTMENT "
"SUPPORT\n");
}
/* Find or create the snapshot directory */
snprintf(snapname, sizeof snapname, "%s/.snap",
mntp->f_mntonname);
if (stat(snapname, &snapdir) < 0) {
if (errno != ENOENT) {
pwarn("CANNOT FIND SNAPSHOT DIRECTORY %s: %s, CANNOT "
"RUN IN BACKGROUND\n", snapname, strerror(errno));
return (0);
}
if ((grp = getgrnam("operator")) == NULL ||
mkdir(snapname, 0770) < 0 ||
chown(snapname, -1, grp->gr_gid) < 0 ||
chmod(snapname, 0770) < 0) {
pwarn("CANNOT CREATE SNAPSHOT DIRECTORY %s: %s, "
"CANNOT RUN IN BACKGROUND\n", snapname,
strerror(errno));
return (0);
}
} else if (!S_ISDIR(snapdir.st_mode)) {
pwarn("%s IS NOT A DIRECTORY, CANNOT RUN IN BACKGROUND\n",
snapname);
return (0);
}
/* Create the snapshot file */
iov = NULL;
iovlen = 0;
errmsg[0] = '\0';
snprintf(snapname, sizeof snapname, "%s/.snap/fsck_snapshot",
mntp->f_mntonname);
build_iovec(&iov, &iovlen, "fstype", "ffs", 4);
build_iovec(&iov, &iovlen, "from", snapname, (size_t)-1);
build_iovec(&iov, &iovlen, "fspath", mntp->f_mntonname, (size_t)-1);
build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg));
build_iovec(&iov, &iovlen, "update", NULL, 0);
build_iovec(&iov, &iovlen, "snapshot", NULL, 0);
/* Create snapshot, removing old snapshot if it exists */
while (nmount(iov, iovlen, mntp->f_flags) < 0) {
if (errno == EEXIST && unlink(snapname) == 0)
continue;
pwarn("CANNOT CREATE SNAPSHOT %s: %s %s\n", snapname,
strerror(errno), errmsg);
return (0);
}
/* Open snapshot */
if (openfilesys(snapname) == 0) {
unlink(snapname);
pwarn("CANNOT OPEN SNAPSHOT %s: %s, CANNOT RUN IN "
"BACKGROUND\n", snapname, strerror(errno));
return (0);
}
free(sblock.fs_csp);
free(sblock.fs_si);
havesb = 0;
*filesys = snapname;
cmd.version = FFS_CMD_VERSION;
cmd.handle = fsreadfd;
return (1);
}
static int
chkdoreload(struct statfs *mntp)
{
struct iovec *iov;
int iovlen;
char errmsg[255];
if (mntp == NULL)
return (0);
iov = NULL;
iovlen = 0;
errmsg[0] = '\0';
/*
* We modified a mounted file system. Do a mount update on
* it unless it is read-write, so we can continue using it
* as safely as possible.
*/
if (mntp->f_flags & MNT_RDONLY) {
build_iovec(&iov, &iovlen, "fstype", "ffs", 4);
build_iovec(&iov, &iovlen, "from", mntp->f_mntfromname,
(size_t)-1);
build_iovec(&iov, &iovlen, "fspath", mntp->f_mntonname,
(size_t)-1);
build_iovec(&iov, &iovlen, "errmsg", errmsg,
sizeof(errmsg));
build_iovec(&iov, &iovlen, "update", NULL, 0);
build_iovec(&iov, &iovlen, "reload", NULL, 0);
/*
* XX: We need the following line until we clean up
* nmount parsing of root mounts and NFS root mounts.
*/
build_iovec(&iov, &iovlen, "ro", NULL, 0);
2008-12-24 03:07:19 +00:00
if (nmount(iov, iovlen, mntp->f_flags) == 0) {
return (0);
}
pwarn("mount reload of '%s' failed: %s %s\n\n",
mntp->f_mntonname, strerror(errno), errmsg);
return (1);
}
return (0);
}
/*
* Get the mount point information for name.
*/
static struct statfs *
getmntpt(const char *name)
{
struct stat devstat, mntdevstat;
char device[sizeof(_PATH_DEV) - 1 + MNAMELEN];
char *ddevname;
struct statfs *mntbuf, *statfsp;
int i, mntsize, isdev;
if (stat(name, &devstat) != 0)
return (NULL);
if (S_ISCHR(devstat.st_mode) || S_ISBLK(devstat.st_mode))
isdev = 1;
else
isdev = 0;
mntsize = getmntinfo(&mntbuf, MNT_NOWAIT);
for (i = 0; i < mntsize; i++) {
statfsp = &mntbuf[i];
ddevname = statfsp->f_mntfromname;
if (*ddevname != '/') {
if (strlen(_PATH_DEV) + strlen(ddevname) + 1 >
sizeof(statfsp->f_mntfromname))
continue;
strcpy(device, _PATH_DEV);
strcat(device, ddevname);
strcpy(statfsp->f_mntfromname, device);
}
if (isdev == 0) {
if (strcmp(name, statfsp->f_mntonname))
continue;
return (statfsp);
}
if (stat(ddevname, &mntdevstat) == 0 &&
mntdevstat.st_rdev == devstat.st_rdev)
return (statfsp);
}
statfsp = NULL;
return (statfsp);
}
static void
usage(void)
{
(void) fprintf(stderr,
"usage: %s [-BCdEFfnpRrSyZ] [-b block] [-c level] [-m mode] filesystem ...\n",
getprogname());
exit(1);
}
void
infohandler(int sig __unused)
{
got_siginfo = 1;
}
void
alarmhandler(int sig __unused)
{
got_sigalarm = 1;
}